diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java index 86207b6795e..b73744279a4 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $ -* $Date: 2004/02/07 01:01:12 $ -* $Revision: 1.19 $ +* $Date: 2004/02/12 08:23:19 $ +* $Revision: 1.20 $ * ******************************************************************************* */ @@ -1033,19 +1033,28 @@ public class WriteCharts implements UCD_Types { int[] starts = new int[names.length]; int[] ends = new int[names.length]; - UCD.BlockData blockData = new UCD.BlockData(); + Iterator blockIterator = Default.ucd().getBlockNames().iterator(); + + //UCD.BlockData blockData = new UCD.BlockData(); int counter = 0; - int blockId = 0; - while (Default.ucd().getBlockData(blockId++, blockData)) { - names[counter] = blockData.name; - starts[counter] = blockData.start; - ends[counter] = blockData.end; + String currentName; + //int blockId = 0; + while (blockIterator.hasNext()) { + //while (Default.ucd().getBlockData(blockId++, blockData)) { + names[counter] = currentName = (String) blockIterator.next(); + if (currentName.equals("No_Block")) continue; + UnicodeSet s = Default.ucd().getBlockSet(currentName, null); + if (s.getRangeCount() != 1) { + throw new IllegalArgumentException("Failure with block set: " + currentName); + } + starts[counter] = s.getRangeStart(0); + ends[counter] = s.getRangeEnd(0); //System.out.println(names[counter] + ", " + values[counter]); ++counter; // HACK - if (blockData.name.equals("Tags")) { + if (currentName.equals("Tags")) { names[counter] = "reserved default ignorable"; starts[counter] = 0xE0080; ends[counter] = 0xE0FFF; diff --git a/tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt b/tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt new file mode 100644 index 00000000000..f933abd0596 --- /dev/null +++ b/tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt @@ -0,0 +1,7 @@ +# Correlated with Unicode 4.0 +# Note: The casing of block names is not normative. +# For example, "Basic Latin" and "BASIC LATIN" are equivalent. +# +# Code points not explicitly listed in this file are given the value No_Block. +# +# Start Code..End Code; Block Name diff --git a/tools/unicodetools/com/ibm/text/UCD/CheckICU.java b/tools/unicodetools/com/ibm/text/UCD/CheckICU.java index bad7874be49..e2e7c0cb1e3 100644 --- a/tools/unicodetools/com/ibm/text/UCD/CheckICU.java +++ b/tools/unicodetools/com/ibm/text/UCD/CheckICU.java @@ -4,9 +4,13 @@ import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.List; +import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; @@ -20,7 +24,6 @@ import com.ibm.text.utility.Utility; public class CheckICU { static final BagFormatter bf = new BagFormatter(); - static final BagFormatter bf2 = new BagFormatter(); public static void main(String[] args) throws IOException { System.out.println("Start"); @@ -45,25 +48,24 @@ public class CheckICU { return p.getMaxWidth(v); } } - + + public static void test() throws IOException { + //generateFile("4.0.0", "DerivedCombiningClass"); + //generateFile("4.0.0", "DerivedCoreProperties"); + if (true) return; checkUCD(); itemFailures = new UnicodeSet(); icuFactory = ICUPropertyFactory.make(); toolFactory = ToolUnicodePropertySource.make("4.0.0"); String[] quickList = { - "Block", + "Math", // "Script", "Bidi_Mirroring_Glyph", "Case_Folding", //"Numeric_Value" }; for (int i = 0; i < quickList.length; ++i) { - //testProperty(quickList[i], -1); - bf2.setValueSource(new ReplaceLabel(toolFactory.getProperty(quickList[i]))) - .setLabelSource(null) - .setNameSource(null) - .setShowCount(false); - bf2.showSetNames(bf2.CONSOLE, quickList[i], new UnicodeSet(0,0x10FFFF)); + testProperty(quickList[i], -1); } if (quickList.length > 0) return; @@ -97,11 +99,16 @@ public class CheckICU { if (nfc.isLeading(i)) leading.add(i); } PrintWriter pw = bf.openUTF8Writer(UCD_Types.GEN_DIR, "Trailing.txt"); - bf.showSetNames(pw, "+Trailing+Starter", new UnicodeSet(trailing).retainAll(starter)); - bf.showSetNames(pw, "+Trailing-Starter", new UnicodeSet(trailing).removeAll(starter)); - bf.showSetNames(pw, "-Trailing-Starter", new UnicodeSet(trailing).complement().removeAll(starter)); - bf.showSetNames(pw, "+Trailing+Leading", new UnicodeSet(trailing).retainAll(leading)); - bf.showSetNames(pw, "+Trailing-Leading", new UnicodeSet(trailing).removeAll(leading)); + pw.println("+Trailing+Starter"); + bf.showSetNames(pw, new UnicodeSet(trailing).retainAll(starter)); + pw.println("+Trailing-Starter"); + bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(starter)); + pw.println("-Trailing-Starter"); + bf.showSetNames(pw, new UnicodeSet(trailing).complement().removeAll(starter)); + pw.println("+Trailing+Leading"); + bf.showSetNames(pw, new UnicodeSet(trailing).retainAll(leading)); + pw.println("+Trailing-Leading"); + bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(leading)); pw.close(); } /* diff --git a/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java b/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java index adfe2f46ebc..598636482de 100644 --- a/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java +++ b/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CompareProperties.java,v $ -* $Date: 2004/02/07 01:01:16 $ -* $Revision: 1.4 $ +* $Date: 2004/02/12 08:23:15 $ +* $Revision: 1.5 $ * ******************************************************************************* */ @@ -92,30 +92,26 @@ public class CompareProperties implements UCD_Types { public final static class UnicodeSetComparator implements Comparator { /** * Compares two UnicodeSets, producing a transitive ordering. - * @return -1 if first is smaller (in size) than second, - * 1 if first is greater (in size) than second, - * Otherwise (since they are equal in size) - * returns a comparison based on the first range that differs. + * The ordering is based on the first codepoint that differs between them. + * @return -1 if first set contains the first different code point + * 1 if the second set does. + * 0 if there is no difference. * If compareTo were added to UnicodeSet, this can be optimized to use list[i]. * @author Davis * */ public int compare(Object o1, Object o2) { - UnicodeSet bs1 = (UnicodeSet) o1; - UnicodeSet bs2 = (UnicodeSet) o2; - if (bs1.size() < bs2.size()) return -1; - if (bs1.size() > bs2.size()) return 1; - UnicodeSetIterator it1 = new UnicodeSetIterator(bs1); - UnicodeSetIterator it2 = new UnicodeSetIterator(bs2); - // Note: because they are the same size, and we stop if any ranges - // are different, it is safe to test for both at the same time - while (it1.nextRange() && it2.nextRange()) { - if (it1.codepoint < it2.codepoint) return -1; + UnicodeSetIterator it1 = new UnicodeSetIterator((UnicodeSet) o1); + UnicodeSetIterator it2 = new UnicodeSetIterator((UnicodeSet) o2); + while (it1.nextRange()) { + if (!it2.nextRange()) return -1; // first has range while second exhausted + if (it1.codepoint < it2.codepoint) return -1; // first has code point not in second if (it1.codepoint > it2.codepoint) return 1; - if (it1.codepointEnd < it2.codepointEnd) return -1; - if (it1.codepointEnd > it2.codepointEnd) return 1; + if (it1.codepointEnd < it2.codepointEnd) return 1; // second has codepoint not in first + if (it1.codepointEnd > it2.codepointEnd) return -1; } - return 0; + if (it2.nextRange()) return 1; // second has range while first is exhausted + return 0; // otherwise we ran out in both of them, so equal } } @@ -210,7 +206,7 @@ public class CompareProperties implements UCD_Types { public void printPartition() throws IOException { System.out.println("Set Size: " + map.size()); PrintWriter output = Utility.openPrintWriter("Partition" - + GenerateData.getFileSuffix(true), Utility.LATIN1_WINDOWS); + + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_WINDOWS); Iterator it = map.keySet().iterator(); while (it.hasNext()) { @@ -234,7 +230,7 @@ public class CompareProperties implements UCD_Types { public void printStatistics() throws IOException { System.out.println("Set Size: " + map.size()); PrintWriter output = Utility.openPrintWriter("Statistics" - + GenerateData.getFileSuffix(true), Utility.LATIN1_WINDOWS); + + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_WINDOWS); System.out.println("Finding disjoints/contains"); for (int i = 0; i < count; ++i) { @@ -383,10 +379,10 @@ public class CompareProperties implements UCD_Types { public static void listDifferences() throws IOException { - PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX); + PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX); output.println("# Listing of relationships among properties, suitable for analysis by spreadsheet"); output.println("# Generated for " + Default.ucd().getVersion()); - output.println(GenerateData.generateDateLine()); + output.println(UnicodeDataFile.generateDateLine()); output.println("# P1 P2 R(P1,P2) C(P1&P2) C(P1-P2) C(P2-P1)"); diff --git a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java index adc03b0eff9..1a93de08d3d 100644 --- a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java +++ b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $ -* $Date: 2004/02/06 18:30:23 $ -* $Revision: 1.13 $ +* $Date: 2004/02/12 08:23:17 $ +* $Revision: 1.14 $ * ******************************************************************************* */ @@ -834,9 +834,9 @@ public final class ConvertUCD implements UCD_Types { //UCD_Names.BP_OLD } else if (fieldName.equals("gc")) { - uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GC, true); + uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GENERAL_CATEGORY, true); } else if (fieldName.equals("bc")) { - uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BC, true); + uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BIDI_CLASS, true); } else if (fieldName.equals("dt")) { if (major < 2) { if (fieldValue.equals("no-break")) fieldValue = "noBreak"; @@ -847,17 +847,17 @@ public final class ConvertUCD implements UCD_Types { else if (fieldValue.equals("no-join")) fieldValue = "compat"; else if (fieldValue.equals("join")) fieldValue = "compat"; } - uData.decompositionType = Utility.lookup(fieldValue, UCD_Names.DT, true); + uData.decompositionType = Utility.lookup(fieldValue, UCD_Names.LONG_DECOMPOSITION_TYPE, true); } else if (fieldName.equals("nt")) { - uData.numericType = Utility.lookup(fieldValue, UCD_Names.NT, true); + uData.numericType = Utility.lookup(fieldValue, UCD_Names.LONG_NUMERIC_TYPE, true); } else if (fieldName.equals("ea")) { - uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.SHORT_EA, true); + uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.EAST_ASIAN_WIDTH, true); } else if (fieldName.equals("lb")) { - uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LB, true); + uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LINE_BREAK, true); } else if (fieldName.equals("sn")) { - uData.script = Utility.lookup(fieldValue, UCD_Names.SCRIPT, true); + uData.script = Utility.lookup(fieldValue, UCD_Names.LONG_SCRIPT, true); } else if (fieldName.equals("jt")) { uData.joiningType = Utility.lookup(fieldValue, UCD_Names.JOINING_TYPE, true); diff --git a/tools/unicodetools/com/ibm/text/UCD/Default.java b/tools/unicodetools/com/ibm/text/UCD/Default.java index 178b1d3fb5d..ad5365c6ac7 100644 --- a/tools/unicodetools/com/ibm/text/UCD/Default.java +++ b/tools/unicodetools/com/ibm/text/UCD/Default.java @@ -21,13 +21,19 @@ public final class Default implements UCD_Types { setUCD(); } + private static boolean inRecursiveCall = false; private static void setUCD() { - ucd = UCD.make(ucdVersion()); + if (inRecursiveCall) { + throw new IllegalArgumentException("Recursive call to setUCD"); + } + inRecursiveCall = true; + ucd = UCD.make(ucdVersion); nfd = nf[NFD] = new Normalizer(Normalizer.NFD, ucdVersion()); nfc = nf[NFC] = new Normalizer(Normalizer.NFC, ucdVersion()); nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, ucdVersion()); nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, ucdVersion()); System.out.println("Loaded UCD" + ucd().getVersion() + " " + (new Date(ucd().getDate()))); + inRecursiveCall = false; } static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd', 'HH:mm:ss' GMT'"); @@ -40,32 +46,32 @@ public final class Default implements UCD_Types { } public static String ucdVersion() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return ucdVersion; } public static UCD ucd() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return ucd; } public static Normalizer nfc() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return nfc; } public static Normalizer nfd() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return nfd; } public static Normalizer nfkc() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return nfkc; } public static Normalizer nfkd() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return nfkd; } public static Normalizer nf(int index) { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return nf[index]; } diff --git a/tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt b/tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt new file mode 100644 index 00000000000..1c786cf31c8 --- /dev/null +++ b/tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt @@ -0,0 +1,16 @@ +# +# Unicode Character Database: Derived Property Data +# This file shows when various code points were designated in Unicode +# Notes: +# - The term 'designated' means that a previously reserved code point was specified +# to be a noncharacter or surrogate, or assigned as a character, +# control or format code. +# - Versions are only tracked from 1.1 onwards, since version 1.0 +# predated changes required by the ISO 10646 merger. +# - The Hangul Syllables that were removed from 2.0 are not included in the 1.1 listing. +# - The supplementary private use code points and the non-character code points +# were designated in version 2.0, but not specifically listed in the UCD +# until versions 3.0 and 3.1 respectively. +# +# For details on the contents of each version, see +# http://www.unicode.org/versions/enumeratedversions.html. diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java index 3cf31ed2fd8..34523ce14c2 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java,v $ -* $Date: 2004/02/07 01:01:16 $ -* $Revision: 1.9 $ +* $Date: 2004/02/12 08:23:15 $ +* $Revision: 1.10 $ * ******************************************************************************* */ @@ -28,8 +28,8 @@ abstract public class GenerateBreakTest implements UCD_Types { Normalizer nfd; Normalizer nfkd; - UnicodeMap sampleMap = null; - UnicodeMap map = new UnicodeMap(); + OldUnicodeMap sampleMap = null; + OldUnicodeMap map = new OldUnicodeMap(); // ====================== Main =========================== diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java index f3f4a36239c..6e2d6382efe 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $ -* $Date: 2004/02/07 01:01:15 $ -* $Revision: 1.15 $ +* $Date: 2004/02/12 08:23:15 $ +* $Revision: 1.16 $ * ******************************************************************************* */ @@ -15,6 +15,7 @@ package com.ibm.text.UCD; import java.util.*; import java.io.*; + import com.ibm.icu.text.UTF16; import com.ibm.text.utility.*; @@ -37,11 +38,12 @@ public class GenerateCaseFolding implements UCD_Types { static PrintWriter log; + public static void makeCaseFold(boolean normalized) throws java.io.IOException { PICK_SHORT = NF_CLOSURE = normalized; - log = Utility.openPrintWriter("CaseFoldingLog" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX); - System.out.println("Writing Log: " + "CaseFoldingLog" + GenerateData.getFileSuffix(true)); + log = Utility.openPrintWriter("CaseFoldingLog" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX); + System.out.println("Writing Log: " + "CaseFoldingLog" + UnicodeDataFile.getFileSuffix(true)); System.out.println("Making Full Data"); Map fullData = getCaseFolding(true, NF_CLOSURE, ""); @@ -64,15 +66,8 @@ public class GenerateCaseFolding implements UCD_Types { String filename = "CaseFolding"; if (normalized) filename += "-Normalized"; String directory = "DerivedData/"; - String newFile = directory + filename + GenerateData.getFileSuffix(true); - PrintWriter out = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); - String[] batName = {""}; - String mostRecent = GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true), batName); - - out.println("# CaseFolding" + GenerateData.getFileSuffix(false)); - out.println(GenerateData.generateDateLine()); - out.println("#"); - Utility.appendFile("CaseFoldingHeader.txt", Utility.LATIN1, out); + UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader(directory, filename); + PrintWriter out = fc.out; /* PrintWriter out = new PrintWriter( @@ -124,9 +119,8 @@ public class GenerateCaseFolding implements UCD_Types { drawLine(out, ch, "t", rSimpleTurkish); } } - out.close(); + fc.close(); log.close(); - Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]); } /* Goal is following (with no entries for 0131 or 0069) @@ -470,7 +464,7 @@ public class GenerateCaseFolding implements UCD_Types { if (normalize) suffix2 = "-Normalized"; PrintWriter log = Utility.openPrintWriter("SpecialCasingExceptions" - + suffix2 + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX); + + suffix2 + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX); for (int ch = 0; ch <= 0x10FFFF; ++ch) { Utility.dot(ch); @@ -580,12 +574,12 @@ public class GenerateCaseFolding implements UCD_Types { log.close(); System.out.println("Writing"); - String newFile = "DerivedData/SpecialCasing" + suffix2 + GenerateData.getFileSuffix(true); + String newFile = "DerivedData/SpecialCasing" + suffix2 + UnicodeDataFile.getFileSuffix(true); PrintWriter out = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = GenerateData.generateBat("DerivedData/", "SpecialCasing", suffix2 + GenerateData.getFileSuffix(true), batName); - out.println("# SpecialCasing" + GenerateData.getFileSuffix(false)); - out.println(GenerateData.generateDateLine()); + String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "SpecialCasing", suffix2 + UnicodeDataFile.getFileSuffix(true), batName); + out.println("# SpecialCasing" + UnicodeDataFile.getFileSuffix(false)); + out.println(UnicodeDataFile.generateDateLine()); out.println("#"); Utility.appendFile("SpecialCasingHeader.txt", Utility.UTF8, out); diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java index bec40627b4c..8286f8335b0 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $ -* $Date: 2004/02/07 01:01:15 $ -* $Revision: 1.32 $ +* $Date: 2004/02/12 08:23:15 $ +* $Revision: 1.33 $ * ******************************************************************************* */ @@ -95,24 +95,11 @@ public class GenerateData implements UCD_Types { } - //Remove "d1" from DerivedJoiningGroup-3.1.0d1.txt type names - - public static String fixFile(String s) { - int len = s.length(); - if (!s.endsWith(".txt")) return s; - if (s.charAt(len-6) != 'd') return s; - char c = s.charAt(len-5); - if (c != 'X' && (c < '0' || '9' < c)) return s; - s = s.substring(0,len-6) + s.substring(len-4); - System.out.println("Fixing File Name: " + s); - return s; - } - static final int HEADER_EXTEND = 0, HEADER_DERIVED = 1, HEADER_SCRIPTS = 2; public static void doHeader(String fileName, PrintWriter output, int headerChoice) { output.println("# " + fileName); - output.println(generateDateLine()); + output.println(UnicodeDataFile.generateDateLine()); output.println("#"); if (headerChoice == HEADER_SCRIPTS) { } else if (headerChoice == HEADER_EXTEND) { @@ -128,18 +115,6 @@ public class GenerateData implements UCD_Types { output.println(); } - public static String getFileSuffix(boolean withDVersion) { - return "-" + Default.ucd().getVersion() - + ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "") - + ".txt"; - } - - public static String getHTMLFileSuffix(boolean withDVersion) { - return "-" + Default.ucd().getVersion() - + ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "") - + ".html"; - } - public static void checkDifferences (String targetVersion) throws IOException { System.out.println("Checking Differences"); UCD target = UCD.make(targetVersion); @@ -176,14 +151,14 @@ public class GenerateData implements UCD_Types { public static void generateDerived (byte type, boolean checkTypeAndStandard, int headerChoice, String directory, String fileName) throws IOException { - String newFile = directory + fileName + getFileSuffix(true); + String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true); System.out.println("New File: " + newFile); PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat(directory, fileName, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName); System.out.println("Most recent: " + mostRecent); - doHeader(fileName + getFileSuffix(false), output, headerChoice); + doHeader(fileName + UnicodeDataFile.getFileSuffix(false), output, headerChoice); for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) { UCDProperty up = DerivedProperty.make(i, Default.ucd()); if (up == null) continue; @@ -227,13 +202,13 @@ public class GenerateData implements UCD_Types { public static void generateCompExclusions() throws IOException { - String newFile = "DerivedData/CompositionExclusions" + getFileSuffix(true); + String newFile = "DerivedData/CompositionExclusions" + UnicodeDataFile.getFileSuffix(true); PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat("DerivedData/", "CompositionExclusions", getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "CompositionExclusions", UnicodeDataFile.getFileSuffix(true), batName); - output.println("# CompositionExclusions" + getFileSuffix(false)); - output.println(generateDateLine()); + output.println("# CompositionExclusions" + UnicodeDataFile.getFileSuffix(false)); + output.println(UnicodeDataFile.generateDateLine()); output.println("#"); output.println("# This file lists the characters from the UAX #15 Composition Exclusion Table."); output.println("#"); @@ -289,10 +264,6 @@ public class GenerateData implements UCD_Types { Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]); } - static String generateDateLine() { - return "# Date: " + Default.getDate() + " [MD]"; - } - static class CompLister extends PropertyLister { UCD oldUCD; int type; @@ -509,21 +480,27 @@ public class GenerateData implements UCD_Types { } } + Iterator blockIterator = Default.ucd().getBlockNames().iterator(); + while (blockIterator.hasNext()) { + addLine(sorted, "blk", "n/a", (String)blockIterator.next()); + } + /* UCD.BlockData blockData = new UCD.BlockData(); int blockId = 0; while (Default.ucd().getBlockData(blockId++, blockData)) { addLine(sorted, "blk", "n/a", blockData.name); } + */ String filename = "PropertyAliases"; - String newFile = "DerivedData/" + filename + getFileSuffix(true); + String newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true); PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat("DerivedData/", filename, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName); - log.println("# " + filename + getFileSuffix(false)); - log.println(generateDateLine()); + log.println("# " + filename + UnicodeDataFile.getFileSuffix(false)); + log.println(UnicodeDataFile.generateDateLine()); log.println("#"); Utility.appendFile("PropertyAliasHeader.txt", Utility.LATIN1, log); log.println(HORIZONTAL_LINE); @@ -538,12 +515,12 @@ public class GenerateData implements UCD_Types { Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]); filename = "PropertyValueAliases"; - newFile = "DerivedData/" + filename + getFileSuffix(true); + newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true); log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); - mostRecent = generateBat("DerivedData/", filename, getFileSuffix(true), batName); + mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName); - log.println("# " + filename + getFileSuffix(false)); - log.println(generateDateLine()); + log.println("# " + filename + UnicodeDataFile.getFileSuffix(false)); + log.println(UnicodeDataFile.generateDateLine()); log.println("#"); Utility.appendFile("PropertyValueAliasHeader.txt", Utility.LATIN1, log); log.println(HORIZONTAL_LINE); @@ -554,9 +531,9 @@ public class GenerateData implements UCD_Types { Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]); filename = "PropertyAliasSummary"; - newFile = "OtherData/" + filename + getFileSuffix(true); + newFile = "OtherData/" + filename + UnicodeDataFile.getFileSuffix(true); log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); - mostRecent = generateBat("OtherData/", filename, getFileSuffix(true), batName); + mostRecent = UnicodeDataFile.generateBat("OtherData/", filename, UnicodeDataFile.getFileSuffix(true), batName); log.println(); log.println(HORIZONTAL_LINE); @@ -682,66 +659,16 @@ public class GenerateData implements UCD_Types { } } - /* - static String skeleton(String source) { - StringBuffer result = new StringBuffer(); - source = source.toLowerCase(); - for (int i = 0; i < source.length(); ++i) { - char c = source.charAt(i); - if (c == ' ' || c == '_' || c == '-') continue; - result.append(c); - } - return result.toString(); - } - */ - // static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1; - - public static String generateBat(String directory, String fileRoot, String suffix, String[] batName) throws IOException { - String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), true, true); - if (mostRecent != null) { - batName[0] = generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix, - mostRecent, directory + fileRoot + suffix); - } else { - System.out.println("No previous version of: " + fileRoot + ".txt"); - return null; - } - - String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), false, true); - if (lessRecent != null && !mostRecent.equals(lessRecent)) { - generateBatAux(directory + "DIFF/OLDER-Diff_" + fileRoot + suffix, - lessRecent, directory + fileRoot + suffix); - } - return mostRecent; - } - - public static String generateBatAux(String batName, String oldName, String newName) throws IOException { - String fullBatName = batName + ".bat"; - PrintWriter output = Utility.openPrintWriter(batName + ".bat", Utility.LATIN1_UNIX); - - newName = Utility.getOutputName(newName); - System.out.println("Writing BAT to compare " + oldName + " and " + newName); - - File newFile = new File(newName); - File oldFile = new File(oldName); - output.println("\"C:\\Program Files\\wincmp.exe\" " - + oldFile.getCanonicalFile() - + " " - + newFile.getCanonicalFile()); - output.close(); - return new File(Utility.getOutputName(fullBatName)).getCanonicalFile().toString(); - } - - public static void generateVerticalSlice(int startEnum, int endEnum, int headerChoice, String directory, String file) throws IOException { - String newFile = directory + file + getFileSuffix(true); + String newFile = directory + file + UnicodeDataFile.getFileSuffix(true); PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat(directory, file, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, file, UnicodeDataFile.getFileSuffix(true), batName); - doHeader(file + getFileSuffix(false), output, headerChoice); + doHeader(file + UnicodeDataFile.getFileSuffix(false), output, headerChoice); int last = -1; for (int i = startEnum; i < endEnum; ++i) { UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd()); @@ -810,15 +737,15 @@ public class GenerateData implements UCD_Types { static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException { - String newFile = directory + fileName + getFileSuffix(true); + String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true); PrintWriter log = Utility.openPrintWriter(newFile, Utility.UTF8_UNIX); String[] batName = {""}; - String mostRecent = generateBat(directory, fileName, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName); String[] example = new String[256]; - log.println("# " + fileName + getFileSuffix(false)); - log.println(generateDateLine()); + log.println("# " + fileName + UnicodeDataFile.getFileSuffix(false)); + log.println(UnicodeDataFile.generateDateLine()); log.println("#"); log.println("# Normalization Test Suite"); log.println("# Format:"); @@ -1012,10 +939,10 @@ public class GenerateData implements UCD_Types { static final void backwardsCompat(String directory, String filename, int[] list) throws IOException { - String newFile = directory + filename + getFileSuffix(true); + String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true); PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat(directory, filename, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName); DiffPropertyLister dpl; UnicodeSet cummulative = new UnicodeSet(); @@ -1095,13 +1022,13 @@ public class GenerateData implements UCD_Types { static final void generateAge(String directory, String filename) throws IOException { - String newFile = directory + filename + getFileSuffix(true); + String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true); PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat(directory, filename, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName); try { - log.println("# " + filename + getFileSuffix(false)); - log.println(generateDateLine()); + log.println("# " + filename + UnicodeDataFile.getFileSuffix(false)); + log.println(UnicodeDataFile.generateDateLine()); log.println("#"); log.println("# Unicode Character Database: Derived Property Data"); log.println("# This file shows when various code points were designated in Unicode"); @@ -1195,7 +1122,7 @@ public class GenerateData implements UCD_Types { public static void listCombiningAccents() throws IOException { - PrintWriter log = Utility.openPrintWriter("ListAccents" + getFileSuffix(true), Utility.LATIN1_UNIX); + PrintWriter log = Utility.openPrintWriter("ListAccents" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX); Set set = new TreeSet(); Set set2 = new TreeSet(); @@ -1232,7 +1159,7 @@ public class GenerateData implements UCD_Types { public static void listGreekVowels() throws IOException { - PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + getFileSuffix(true), Utility.LATIN1_UNIX); + PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX); Set set = new TreeSet(); Set set2 = new TreeSet(); diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java b/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java index ad43b6b0e9b..3ba8181c18f 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $ -* $Date: 2004/02/07 01:01:14 $ -* $Revision: 1.4 $ +* $Date: 2004/02/12 08:23:15 $ +* $Revision: 1.5 $ * ******************************************************************************* */ @@ -97,10 +97,10 @@ public final class GenerateStandardizedVariants implements UCD_Types { // now write out the results String directory = "DerivedData/"; - String filename = directory + "StandardizedVariants" + GenerateData.getHTMLFileSuffix(true); + String filename = directory + "StandardizedVariants" + UnicodeDataFile.getHTMLFileSuffix(true); PrintWriter out = Utility.openPrintWriter(filename, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName); String version = Default.ucd().getVersion(); int lastDot = version.lastIndexOf('.'); diff --git a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java new file mode 100644 index 00000000000..f3f1952f404 --- /dev/null +++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java @@ -0,0 +1,280 @@ +package com.ibm.text.UCD; + +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.TreeMap; +import java.util.TreeSet; + +import com.ibm.icu.dev.test.util.BagFormatter; +import com.ibm.icu.dev.test.util.UnicodeProperty; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.text.utility.UnicodeDataFile; + +public class MakeUnicodeFiles { + + static boolean DEBUG = true; + + public static void main() throws IOException { + generateFile("Scripts","z"); + } + + static class OrderedMap { + HashMap map = new HashMap(); + ArrayList keys = new ArrayList(); + void put(Object o, Object t) { + map.put(o,t); + keys.add(o); + } + List keyset() { + return keys; + } + } + + static class PrintStyle { + boolean longForm = false; + boolean noLabel = false; + boolean makeUppercase = false; + boolean makeFirstLetterLowercase = false; + String skipValue = null; + String skipUnassigned = null; + boolean orderByRangeStart = false; + boolean valueList = false; + + PrintStyle setLongForm(boolean value) { + longForm = value; + return this; + } + PrintStyle setSkipUnassigned(String value) { + skipUnassigned = value; + return this; + } + PrintStyle setNoLabel(boolean value) { + noLabel = value; + return this; + } + PrintStyle setMakeUppercase(boolean value) { + makeUppercase = value; + return this; + } + PrintStyle setMakeFirstLetterLowercase(boolean value) { + makeFirstLetterLowercase = value; + return this; + } + PrintStyle setSkipValue(String value) { + skipValue = value; + return this; + } + PrintStyle setOrderByRangeStart(boolean value) { + orderByRangeStart = value; + return this; + } + PrintStyle setValueList(boolean value) { + valueList = value; + return this; + } + } + static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle(); + static Comparator skeletonComparator = new UnicodeProperty.SkeletonComparator(); + static Map printStyles = new TreeMap(/*skeletonComparator*/); + static { + printStyles.put("Script", new PrintStyle().setLongForm(true) + .setMakeUppercase(true).setSkipUnassigned("Common")); + printStyles.put("Age", new PrintStyle().setNoLabel(true)); + printStyles.put("Numeric_Type", new PrintStyle().setLongForm(true) + .setMakeFirstLetterLowercase(true).setSkipUnassigned("none")); + printStyles.put("General_Category", new PrintStyle().setNoLabel(true) + //.setSkipUnassigned(true) + ); + printStyles.put("Line_Break", new PrintStyle().setSkipUnassigned("Unknown")); + printStyles.put("Joining_Type", new PrintStyle().setSkipValue("Non_Joining")); + printStyles.put("Joining_Group", new PrintStyle().setSkipValue("No_Joining_Group") + .setMakeUppercase(true)); + printStyles.put("East_Asian_Width", new PrintStyle().setSkipUnassigned("Neutral")); + printStyles.put("Decomposition_Type", new PrintStyle().setLongForm(true) + .setSkipValue("None").setMakeFirstLetterLowercase(true)); + printStyles.put("Bidi_Class", new PrintStyle().setSkipUnassigned("Left_To_Right")); + printStyles.put("Block", new PrintStyle().setNoLabel(true) + .setValueList(true)); + printStyles.put("Age", new PrintStyle().setSkipValue("unassigned")); + printStyles.put("Canonical_Combining_Class", new PrintStyle().setSkipValue("0")); + printStyles.put("Hangul_Syllable_Type", new PrintStyle().setSkipValue("NA")); + + } + //PropertyAliases + //PropertyValueAliases + //CompositionExclusions + //SpecialCasing + //NormalizationTest + //add("CaseFolding", new String[] {"CaseFolding"}); + static Map contents = new TreeMap(); + static void add(String name, String[] properties) { + contents.put(name, properties); + } + static { + add("Blocks", new String[] {"Block"}); + add("DerivedAge", new String[] {"Age"}); + add("Scripts", new String[] {"Script"}); + add("HangulSyllableType", new String[] {"HangulSyllableType"}); + if (false) add("DerivedNormalizationProps", new String[] { + "FNC", "Full_Composition_Exclusion", + "NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck", + "Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC"}); + + add("DerivedBidiClass", new String[] {"BidiClass"}); + add("DerivedBinaryProperties", new String[] {"BidiMirrored"}); + add("DerivedCombiningClass", new String[] {"CanonicalCombiningClass"}); + add("DerivedDecompositionType", new String[] {"DecompositionType"}); + add("DerivedEastAsianWidth", new String[] {"EastAsianWidth"}); + add("DerivedGeneralCategory", new String[] {"GeneralCategory"}); + add("DerivedJoiningGroup", new String[] {"JoiningGroup"}); + add("DerivedJoiningType", new String[] {"JoiningType"}); + add("DerivedLineBreak", new String[] {"LineBreak"}); + add("DerivedNumericType", new String[] {"NumericType"}); + add("DerivedNumericValues", new String[] {"NumericValue"}); + add("PropList", new String[] { + "White_Space", "Bidi_Control", "Join_Control", + "Dash", "Hyphen", "Quotation_Mark", + "Terminal_Punctuation", "Other_Math", + "Hex_Digit", "ASCII_Hex_Digit", + "Other_Alphabetic", + "Ideographic", + "Diacritic", "Extender", + "Other_Lowercase", "Other_Uppercase", + "Noncharacter_Code_Point", + "Other_Grapheme_Extend", + "Grapheme_Link", + "IDS_Binary_Operator", "IDS_Trinary_Operator", + "Radical", "Unified_Ideograph", + "Other_Default_Ignorable_Code_Point", + "Deprecated", "Soft_Dotted", + "Logical_Order_Exception", + "Other_ID_Start" + }); + add("DerivedCoreProperties", new String[] { + "Math", "Alphabetic", "Lowercase", "Uppercase", + "ID_Start", "ID_Continue", + "XID_Start", "XID_Continue", + "Default_Ignorable_Code_Point", + "Grapheme_Extend", "Grapheme_Base" + }); + } + + public static void generateFile(String atOrAfter, String atOrBefore) throws IOException { + Iterator it = contents.keySet().iterator(); + while (it.hasNext()) { + String propname = (String) it.next(); + if (propname.compareTo(atOrAfter) < 0) continue; + if (propname.compareTo(atOrBefore) > 0) continue; + generateFile(propname); + } + } + + public static void generateFile(String filename) throws IOException { + String[] propList = (String[]) contents.get(filename); + UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename); + PrintWriter pw = udf.out; // bf2.openUTF8Writer(UCD_Types.GEN_DIR, "Test" + filename + ".txt"); + UnicodeProperty.Factory toolFactory + = ToolUnicodePropertySource.make(Default.ucdVersion()); + BagFormatter bf2 = new BagFormatter(toolFactory); + UnicodeSet unassigned = toolFactory.getSet("gc=cn") + .addAll(toolFactory.getSet("gc=cs")); + //System.out.println(unassigned.toPattern(true)); + // .removeAll(toolFactory.getSet("noncharactercodepoint=true")); + String separator = bf2.getLineSeparator() + + "# ================================================" + + bf2.getLineSeparator() + bf2.getLineSeparator(); + + for (int i = 0; i < propList.length; ++i) { + UnicodeProperty prop = toolFactory.getProperty(propList[i]); + System.out.println(prop.getName()); + pw.print(separator); + PrintStyle ps = (PrintStyle) printStyles.get(prop.getName()); + if (ps == null) { + ps = DEFAULT_PRINT_STYLE; + System.out.println("Using default style!"); + } + if (ps.noLabel) bf2.setLabelSource(null); + + if (ps.valueList) { + bf2.setValueSource(new UnicodeProperty.FilteredProperty(prop, new ReplaceFilter())) + .setNameSource(null) + .setShowCount(false) + .showSetNames(pw,new UnicodeSet(0,0x10FFFF)); + } else if (prop.getType() <= prop.EXTENDED_BINARY) { + UnicodeSet s = prop.getSet("True"); + bf2.setValueSource(prop.getName()); + bf2.showSetNames(pw, s); + } else { + bf2.setValueSource(prop); + Collection aliases = prop.getAvailableValueAliases(); + if (ps.orderByRangeStart) { + System.out.println("Reordering"); + TreeSet temp2 = new TreeSet(new RangeStartComparator(prop)); + temp2.addAll(aliases); + aliases = temp2; + } + Iterator it = aliases.iterator(); + while (it.hasNext()) { + String value = (String)it.next(); + UnicodeSet s = prop.getSet(value); + + System.out.println(value + "\t" + prop.getShortestValueAlias(value) + "\t" + ps.skipValue); + System.out.println(s.toPattern(true)); + + if (skeletonComparator.compare(value, ps.skipValue) == 0) continue; + if (skeletonComparator.compare(value, ps.skipUnassigned) == 0) { + s.removeAll(unassigned); + } + + if (s.size() == 0) continue; + //if (unassigned.containsAll(s)) continue; // skip if all unassigned + //if (s.contains(0xD0000)) continue; // skip unassigned + pw.print(separator); + if (!ps.longForm) value = prop.getShortestValueAlias(value); + if (ps.makeUppercase) value = value.toUpperCase(Locale.ENGLISH); + if (ps.makeFirstLetterLowercase) { + // NOTE: this is ok since we are only working in ASCII + value = value.substring(0,1).toLowerCase(Locale.ENGLISH) + + value.substring(1); + } + bf2.setValueSource(value); + bf2.showSetNames(pw, s); + } + } + } + udf.close(); + } + static class RangeStartComparator implements Comparator { + UnicodeProperty prop; + CompareProperties.UnicodeSetComparator comp = new CompareProperties.UnicodeSetComparator(); + RangeStartComparator(UnicodeProperty prop) { + this.prop = prop; + } + public int compare(Object o1, Object o2) { + UnicodeSet s1 = prop.getSet((String)o1); + UnicodeSet s2 = prop.getSet((String)o2); + if (true) System.out.println("comparing " + o1 + ", " + o2 + + s1.toPattern(true) + "?" + s2.toPattern(true) + + ", " + comp.compare(s1, s2)); + return comp.compare(s1, s2); + } + + } + + public static class ReplaceFilter extends UnicodeProperty.StringFilter { + public String remap(String original) { + return original.replace('_',' '); + } + } + + + +} \ No newline at end of file diff --git a/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java b/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java index fa9f12cd880..7eccfeabb0c 100644 --- a/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java +++ b/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $ -* $Date: 2004/02/07 01:01:13 $ -* $Revision: 1.7 $ +* $Date: 2004/02/12 08:23:16 $ +* $Revision: 1.8 $ * ******************************************************************************* */ @@ -237,9 +237,9 @@ public final class TestNormalization { UnicodeSet t = (UnicodeSet) it.next(); UnicodeSet l = (UnicodeSet) map.get(t); System.out.println("" - + bf.showSetNames("",l) + + bf.showSetNames(l) + "" - + bf.showSetNames("",t) + + bf.showSetNames(t) + ""); } } diff --git a/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java b/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java index b2d9b10a2be..6511beebc05 100644 --- a/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java +++ b/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java @@ -1,11 +1,13 @@ package com.ibm.text.UCD; +import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.Locale; import java.util.TreeSet; +import com.ibm.icu.dev.test.util.UnicodeMap; import com.ibm.icu.dev.test.util.UnicodeProperty; import com.ibm.icu.lang.UCharacter; import com.ibm.text.utility.Utility; @@ -15,62 +17,74 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory { private static boolean needAgeCache = true; private static UCD[] ucdCache = new UCD[UCD_Types.LIMIT_AGE]; - private static HashMap cache = new HashMap(); + private static HashMap factoryCache = new HashMap(); public static synchronized ToolUnicodePropertySource make(String version) { - ToolUnicodePropertySource result = (ToolUnicodePropertySource)cache.get(version); + ToolUnicodePropertySource result = (ToolUnicodePropertySource)factoryCache.get(version); if (result != null) return result; result = new ToolUnicodePropertySource(version); - cache.put(version, result); + factoryCache.put(version, result); return result; } private ToolUnicodePropertySource(String version) { ucd = UCD.make(version); + version = ucd.getVersion(); TreeSet names = new TreeSet(); UnifiedProperty.getAvailablePropertiesAliases(names,ucd); Iterator it = names.iterator(); while (it.hasNext()) { String name = (String) it.next(); + //System.out.println("Name: " + name); add(new ToolUnicodeProperty(name)); } + add(new UnicodeProperty.SimpleProperty() { - {set("Name", "na", UnicodeProperty.STRING, "");} - public String getValue(int codepoint) { + public String _getValue(int codepoint) { if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null; return ucd.getName(codepoint); } - }); + }.setMain("Name", "na", UnicodeProperty.STRING, version) + .setValues("")); + add(new UnicodeProperty.SimpleProperty() { - {set("Block", "blk", UnicodeProperty.ENUMERATED, - ucd.getBlockNames(null));} - public String getValue(int codepoint) { + public String _getValue(int codepoint) { + if (codepoint == 0x1D100) { + System.out.println("here"); + } //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null; return ucd.getBlock(codepoint); } - }); + protected UnicodeMap _getUnicodeMap() { + return ucd.blockData; + } + }.setMain("Block", "blk", UnicodeProperty.ENUMERATED, version) + .setValues(ucd.getBlockNames(null))); + add(new UnicodeProperty.SimpleProperty() { - {set("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, "");} - public String getValue(int codepoint) { + public String _getValue(int codepoint) { //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null; return ucd.getBidiMirror(codepoint); } - }); + }.setMain("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, version) + .setValues("")); + add(new UnicodeProperty.SimpleProperty() { - {set("Case_Folding", "cf", UnicodeProperty.STRING, "");} - public String getValue(int codepoint) { + public String _getValue(int codepoint) { //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null; return ucd.getCase(codepoint,UCD_Types.FULL,UCD_Types.FOLD); } - }); + }.setMain("Case_Folding", "cf", UnicodeProperty.STRING, version) + .setValues("")); + add(new UnicodeProperty.SimpleProperty() { - {set("Numeric_Value", "nv", UnicodeProperty.NUMERIC, "");} - public String getValue(int codepoint) { + public String _getValue(int codepoint) { double num = ucd.getNumericValue(codepoint); if (Double.isNaN(num)) return null; return Double.toString(num); } - }); + }.setMain("Numeric_Value", "nv", UnicodeProperty.NUMERIC, version) + .setValues("")); } /* "Bidi_Mirroring_Glyph", "Block", "Case_Folding", "Case_Sensitive", "ISO_Comment", @@ -109,7 +123,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory { setName(propertyAlias); } - public Collection getAvailableValueAliases(Collection result) { + public Collection _getAvailableValueAliases(Collection result) { + if (result == null) result = new ArrayList(); int type = getType() & ~EXTENDED_BIT; if (type == STRING) result.add(""); else if (type == NUMERIC) result.add(""); @@ -149,34 +164,80 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory { } if (temp != null && temp.length() != 0) result.add(Utility.getUnskeleton(temp, titlecase)); } - if (prop == (UCD_Types.DECOMPOSITION_TYPE>>8)) result.add("none"); - if (prop == (UCD_Types.JOINING_TYPE>>8)) result.add("Non_Joining"); - if (prop == (UCD_Types.NUMERIC_TYPE>>8)) result.add("None"); + //if (prop == (UCD_Types.DECOMPOSITION_TYPE>>8)) result.add("none"); + //if (prop == (UCD_Types.JOINING_TYPE>>8)) result.add("Non_Joining"); + //if (prop == (UCD_Types.NUMERIC_TYPE>>8)) result.add("None"); } return result; } - public Collection getAliases(Collection result) { - String longName = up.getName(UCD_Types.LONG); - addUnique(Utility.getUnskeleton(longName, true), result); - String shortName = up.getName(UCD_Types.SHORT); - addUnique(Utility.getUnskeleton(shortName, false), result); - return result; - } - - public Collection getValueAliases(String valueAlias, Collection result) { - // TODO Auto-generated method stub + public Collection _getAliases(Collection result) { + if (result == null) result = new ArrayList(); + String longName = up.getName(UCD_Types.LONG); + addUnique(Utility.getUnskeleton(longName, true), result); + String shortName = up.getName(UCD_Types.SHORT); + addUnique(Utility.getUnskeleton(shortName, false), result); return result; } + + public Collection _getValueAliases(String valueAlias, Collection result) { + if (result == null) result = new ArrayList(); + int type = getType() & ~EXTENDED_BIT; + if (type == STRING) return result; + else if (type == NUMERIC) return result; + else if (type == BINARY) { + UnicodeProperty.addUnique(valueAlias, result); + return lookup(valueAlias, UCD_Names.YN_TABLE_LONG, UCD_Names.YN_TABLE, result); + } else if (type == ENUMERATED) { + byte style = UCD_Types.LONG; + int prop = propMask>>8; + boolean titlecase = false; + for (int i = 0; i < 256; ++i) { + try { + switch (prop) { + case UCD_Types.CATEGORY>>8: + return lookup(valueAlias, UCD_Names.LONG_GENERAL_CATEGORY, UCD_Names.GENERAL_CATEGORY, result); + case UCD_Types.COMBINING_CLASS>>8: + addUnique(""+i, result); + return lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, UCD_Names.COMBINING_CLASS, result); + case UCD_Types.BIDI_CLASS>>8: + return lookup(valueAlias, UCD_Names.LONG_BIDI_CLASS, UCD_Names.BIDI_CLASS, result); + case UCD_Types.DECOMPOSITION_TYPE>>8: + return lookup(valueAlias, UCD_Names.LONG_DECOMPOSITION_TYPE, UCD_Names.DECOMPOSITION_TYPE, result); + case UCD_Types.NUMERIC_TYPE>>8: + return lookup(valueAlias, UCD_Names.LONG_NUMERIC_TYPE, UCD_Names.NUMERIC_TYPE, result); + case UCD_Types.EAST_ASIAN_WIDTH>>8: + return lookup(valueAlias, UCD_Names.LONG_EAST_ASIAN_WIDTH, UCD_Names.EAST_ASIAN_WIDTH, result); + case UCD_Types.LINE_BREAK>>8: + return lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, result); + case UCD_Types.JOINING_TYPE>>8: + return lookup(valueAlias, UCD_Names.LONG_JOINING_TYPE, UCD_Names.JOINING_TYPE, result); + case UCD_Types.JOINING_GROUP>>8: + return lookup(valueAlias, UCD_Names.JOINING_GROUP, null, result); + case UCD_Types.SCRIPT>>8: + return lookup(valueAlias, UCD_Names.LONG_SCRIPT, UCD_Names.SCRIPT, result); + case UCD_Types.AGE>>8: + return lookup(valueAlias, UCD_Names.AGE, null, result); + case UCD_Types.HANGUL_SYLLABLE_TYPE>>8: + return lookup(valueAlias, UCD_Names.LONG_HANGUL_SYLLABLE_TYPE, UCD_Names.HANGUL_SYLLABLE_TYPE, result); + default: throw new IllegalArgumentException("Internal Error: " + prop); + } + } catch (ArrayIndexOutOfBoundsException e) { + continue; + } + } + } + throw new ArrayIndexOutOfBoundsException("not supported yet"); + } - public String getValue(int codepoint) { + public String _getValue(int codepoint) { byte style = UCD_Types.LONG; String temp = null; boolean titlecase = false; switch (propMask>>8) { case UCD_Types.CATEGORY>>8: temp = (ucd.getCategoryID_fromIndex(ucd.getCategory(codepoint), style)); break; case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex(ucd.getCombiningClass(codepoint), style)); - if (temp.startsWith("Fixed_")) temp = temp.substring(6); + //if (temp.startsWith("Fixed_")) temp = temp.substring(6); break; case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex(ucd.getBidiClass(codepoint), style)); break; case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex(ucd.getDecompositionType(codepoint), style)); @@ -226,7 +287,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory { private int getPropertyTypeInternal() { int result = 0; String name = up.getName(UCD_Types.LONG); - if ("Age".equals(name)) return STRING; + if ("Age".equals(name)) return ENUMERATED; switch (up.getValueType()) { case UCD_Types.NUMERIC_PROP: result = NUMERIC; break; case UCD_Types.STRING_PROP: result = STRING; break; @@ -243,5 +304,18 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory { return result; } + public String _getVersion() { + return up.ucd.getVersion(); + } + } + static Collection lookup(String valueAlias, String[] main, String[] aux, Collection result) { + //System.out.println(valueAlias + "=>"); + int pos = 0xFF & Utility.lookup(valueAlias, main, true); + //System.out.println("=>" + aux[pos]); + UnicodeProperty.addUnique(valueAlias, result); + if (aux == null) return result; + return UnicodeProperty.addUnique(aux[pos], result); + } + } diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD.java b/tools/unicodetools/com/ibm/text/UCD/UCD.java index f5a3218a67c..10bfba5f78a 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UCD.java +++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $ -* $Date: 2004/02/07 01:01:13 $ -* $Revision: 1.30 $ +* $Date: 2004/02/12 08:23:16 $ +* $Revision: 1.31 $ * ******************************************************************************* */ @@ -29,6 +29,7 @@ import java.io.BufferedReader; import com.ibm.text.utility.*; import com.ibm.icu.dev.test.util.BagFormatter; +import com.ibm.icu.dev.test.util.UnicodeMap; import com.ibm.icu.dev.test.util.UnicodeProperty; import com.ibm.icu.text.UnicodeSet; @@ -364,9 +365,19 @@ public final class UCD implements UCD_Types { BIDI_R_SET = new UnicodeSet(); BIDI_AL_SET = new UnicodeSet(); - + + blockData.getSet("Hebrew",BIDI_R_SET); + blockData.getSet("Cypriot_Syllabary",BIDI_R_SET); + + blockData.getSet("Arabic",BIDI_AL_SET); + blockData.getSet("Syriac",BIDI_AL_SET); + blockData.getSet("Thaana",BIDI_AL_SET); + blockData.getSet("Arabic_Presentation_Forms-A",BIDI_AL_SET); + blockData.getSet("Arabic_Presentation_Forms-B",BIDI_AL_SET); + /* int blockId = 0; BlockData blockData = new BlockData(); + UnicodeSet s = blockData.get while (getBlockData(blockId++, blockData)) { if (blockData.name.equals("Hebrew") || blockData.name.equals("Cypriot_Syllabary") @@ -391,6 +402,7 @@ public final class UCD implements UCD_Types { + ".." + Utility.hex(blockData.end)); } } + */ System.out.println("BIDI_R_SET: " + BIDI_R_SET); System.out.println("BIDI_AL_SET: " + BIDI_AL_SET); @@ -835,8 +847,8 @@ public final class UCD implements UCD_Types { } public static String getCategoryID_fromIndex(byte prop, byte style) { - return prop < 0 || prop >= UCD_Names.GC.length ? null - : (style != LONG) ? UCD_Names.GC[prop] : UCD_Names.LONG_GC[prop]; + return prop < 0 || prop >= UCD_Names.GENERAL_CATEGORY.length ? null + : (style != LONG) ? UCD_Names.GENERAL_CATEGORY[prop] : UCD_Names.LONG_GENERAL_CATEGORY[prop]; } @@ -898,11 +910,11 @@ public final class UCD implements UCD_Types { public static String getBidiClassID_fromIndex(byte prop, byte style) { return prop < 0 - || prop >= UCD_Names.BC.length + || prop >= UCD_Names.BIDI_CLASS.length ? null : style == SHORT - ? UCD_Names.BC[prop] - : UCD_Names.LONG_BC[prop]; + ? UCD_Names.BIDI_CLASS[prop] + : UCD_Names.LONG_BIDI_CLASS[prop]; } public String getDecompositionTypeID(int codePoint) { @@ -913,8 +925,8 @@ public final class UCD implements UCD_Types { return getDecompositionTypeID_fromIndex(prop, NORMAL); } public static String getDecompositionTypeID_fromIndex(byte prop, byte style) { - return prop < 0 || prop >= UCD_Names.DT.length ? null - : style == SHORT ? UCD_Names.SHORT_DT[prop] : UCD_Names.DT[prop]; + return prop < 0 || prop >= UCD_Names.LONG_DECOMPOSITION_TYPE.length ? null + : style == SHORT ? UCD_Names.DECOMPOSITION_TYPE[prop] : UCD_Names.LONG_DECOMPOSITION_TYPE[prop]; } public String getNumericTypeID(int codePoint) { @@ -926,8 +938,8 @@ public final class UCD implements UCD_Types { } public static String getNumericTypeID_fromIndex(byte prop, byte style) { - return prop < 0 || prop >= UCD_Names.NT.length ? null - : style == SHORT ? UCD_Names.SHORT_NT[prop] : UCD_Names.NT[prop]; + return prop < 0 || prop >= UCD_Names.LONG_NUMERIC_TYPE.length ? null + : style == SHORT ? UCD_Names.NUMERIC_TYPE[prop] : UCD_Names.LONG_NUMERIC_TYPE[prop]; } public String getEastAsianWidthID(int codePoint) { @@ -939,8 +951,8 @@ public final class UCD implements UCD_Types { } public static String getEastAsianWidthID_fromIndex(byte prop, byte style) { - return prop < 0 || prop >= UCD_Names.EA.length ? null - : style != LONG ? UCD_Names.SHORT_EA[prop] : UCD_Names.EA[prop]; + return prop < 0 || prop >= UCD_Names.LONG_EAST_ASIAN_WIDTH.length ? null + : style != LONG ? UCD_Names.EAST_ASIAN_WIDTH[prop] : UCD_Names.LONG_EAST_ASIAN_WIDTH[prop]; } public String getLineBreakID(int codePoint) { @@ -952,8 +964,8 @@ public final class UCD implements UCD_Types { } public static String getLineBreakID_fromIndex(byte prop, byte style) { - return prop < 0 || prop >= UCD_Names.LB.length ? null - : style != LONG ? UCD_Names.LB[prop] : UCD_Names.LONG_LB[prop]; + return prop < 0 || prop >= UCD_Names.LINE_BREAK.length ? null + : style != LONG ? UCD_Names.LINE_BREAK[prop] : UCD_Names.LONG_LINE_BREAK[prop]; } public String getJoiningTypeID(int codePoint) { @@ -993,7 +1005,7 @@ public final class UCD implements UCD_Types { public static String getScriptID_fromIndex(byte prop, byte length) { return prop < 0 || prop >= UCD_Names.JOINING_GROUP.length ? null - : (length == SHORT) ? UCD_Names.ABB_SCRIPT[prop] : UCD_Names.SCRIPT[prop]; + : (length == SHORT) ? UCD_Names.SCRIPT[prop] : UCD_Names.LONG_SCRIPT[prop]; } public String getAgeID(int codePoint) { @@ -1553,6 +1565,54 @@ to guarantee identifier closure. } } + UnicodeMap blockData; + public String getBlock(int codePoint) { + if (blockData == null) loadBlocks(); + return (String)blockData.getValue(codePoint); + } + public Collection getBlockNames() { + return getBlockNames(null); + } + public Collection getBlockNames(Collection result) { + if (result == null) result = new ArrayList(); + if (blockData == null) loadBlocks(); + return blockData.getAvailableValues(result); + } + public UnicodeSet getBlockSet(String value, UnicodeSet result) { + if (result == null) result = new UnicodeSet(); + if (blockData == null) loadBlocks(); + return blockData.getSet(value, result); + } + + private void loadBlocks() { + blockData = new UnicodeMap(); + try { + BufferedReader in = Utility.openUnicodeFile("Blocks", version, true, Utility.LATIN1); + try { + while (true) { + // 0000..007F; Basic Latin + String line = Utility.readDataLine(in); + if (line == null) break; + if (line.length() == 0) continue; + int pos1 = line.indexOf('.'); + int pos2 = line.indexOf(';', pos1); + + //lastBlock = new BlockData(); + int start = Integer.parseInt(line.substring(0, pos1), 16); + int end = Integer.parseInt(line.substring(pos1+2, pos2), 16); + String name = line.substring(pos2+1).trim().replace(' ', '_'); + blockData.putAll(start,end, name); + } + blockData.setMissing("No_Block"); + } finally { + in.close(); + } + } catch (IOException e) { + throw new IllegalArgumentException("Can't read block file"); + } + } + + /* public static class BlockData { public int start; public int end; @@ -1560,13 +1620,17 @@ to guarantee identifier closure. } public String NOBLOCK = Utility.getUnskeleton("no block", true); + private BlockData lastBlock; public String getBlock(int codePoint) { if (blocks == null) loadBlocks(); + if (codePoint >= lastBlock.start && codePoint <= lastBlock.end) return lastBlock.name; Iterator it = blocks.iterator(); while (it.hasNext()) { - BlockData data = (BlockData) it.next(); - if (codePoint >= data.start && codePoint <= data.end) return data.name; + lastBlock = (BlockData) it.next(); + if (codePoint < lastBlock.start) continue; + if (codePoint > lastBlock.end) break; + return lastBlock.name; } return NOBLOCK; } @@ -1612,11 +1676,11 @@ to guarantee identifier closure. int pos1 = line.indexOf('.'); int pos2 = line.indexOf(';', pos1); - BlockData blockData = new BlockData(); - blockData.start = Integer.parseInt(line.substring(0, pos1), 16); - blockData.end = Integer.parseInt(line.substring(pos1+2, pos2), 16); - blockData.name = line.substring(pos2+1).trim().replace(' ', '_'); - blocks.add(blockData); + lastBlock = new BlockData(); + lastBlock.start = Integer.parseInt(line.substring(0, pos1), 16); + lastBlock.end = Integer.parseInt(line.substring(pos1+2, pos2), 16); + lastBlock.name = line.substring(pos2+1).trim().replace(' ', '_'); + blocks.add(lastBlock); } } finally { in.close(); @@ -1625,6 +1689,7 @@ to guarantee identifier closure. throw new IllegalArgumentException("Can't read block file"); } } + */ /** * @return */ diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java index 20a01018b66..04f4cbd786f 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java +++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java @@ -5,14 +5,16 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $ -* $Date: 2004/02/06 18:30:19 $ -* $Revision: 1.24 $ +* $Date: 2004/02/12 08:23:17 $ +* $Revision: 1.25 $ * ******************************************************************************* */ package com.ibm.text.UCD; +import java.util.Locale; + import com.ibm.text.utility.*; @@ -234,15 +236,15 @@ final class UCD_Names implements UCD_Types { static final String[] YN_TABLE = {"F", "T"}; static final String[] YN_TABLE_LONG = {"False", "True"}; - static String[] SHORT_EA = { + static String[] EAST_ASIAN_WIDTH = { "N", "A", "H", "W", "F", "Na" }; - static String[] EA = { + static String[] LONG_EAST_ASIAN_WIDTH = { "Neutral", "Ambiguous", "Halfwidth", "Wide", "Fullwidth", "Narrow" }; - static final String[] LB = { + static final String[] LINE_BREAK = { "XX", "OP", "CL", "QU", "GL", "NS", "EX", "SY", "IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY", "CM", "BB", "BA", "SP", "BK", "CR", "LF", "CB", @@ -255,7 +257,7 @@ final class UCD_Names implements UCD_Types { }; - static final String[] LONG_LB = { + static final String[] LONG_LINE_BREAK = { "Unknown", "OpenPunctuation", "ClosePunctuation", "Quotation", "Glue", "Nonstarter", "Exclamation", "BreakSymbols", "InfixNumeric", "PrefixNumeric", "PostfixNumeric", @@ -270,7 +272,7 @@ final class UCD_Names implements UCD_Types { //"Trailing_Jamo", }; - public static final String[] SCRIPT = { + public static final String[] LONG_SCRIPT = { "COMMON", // COMMON -- NOT A LETTER: NO EXACT CORRESPONDENCE IN 15924 "LATIN", // LATIN "GREEK", // GREEK @@ -328,7 +330,7 @@ final class UCD_Names implements UCD_Types { }; - public static final String[] ABB_SCRIPT = { + public static final String[] SCRIPT = { "Zyyy", // COMMON -- NOT A LETTER: NO EXACT CORRESPONDENCE IN 15924 "Latn", // LATIN "Grek", // GREEK @@ -398,15 +400,18 @@ final class UCD_Names implements UCD_Types { static final String[] AGE = { - "UNSPECIFIED", + "unassigned", "1.1", - "2.0", "2.1", - "3.0", "3.1", "3.2", + "2.0", + "2.1", + "3.0", + "3.1", + "3.2", "4.0" }; - static final String[] GC = { + static final String[] GENERAL_CATEGORY = { "Cn", // = Other, Not Assigned 0 "Lu", // = Letter, Uppercase 1 @@ -449,7 +454,7 @@ final class UCD_Names implements UCD_Types { "Pf" // = Punctuation, Final quote 30 (may behave like Ps or Pe dependingon usage) }; - static final String[] LONG_GC = { + static final String[] LONG_GENERAL_CATEGORY = { "Unassigned", // = Other, Not Assigned 0 "UppercaseLetter", // = Letter, Uppercase 1 @@ -505,7 +510,7 @@ final class UCD_Names implements UCD_Types { - static final String[] BC = { + static final String[] BIDI_CLASS = { "L", // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs) "R", // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts "EN", // European Number @@ -520,7 +525,7 @@ final class UCD_Names implements UCD_Types { "", "BN", "NSM", "AL", "LRO", "RLO", "LRE", "RLE", "PDF" }; - static String[] LONG_BC = { + static String[] LONG_BIDI_CLASS = { "LeftToRight", // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs) "RightToLeft", // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts "EuropeanNumber", // European Number @@ -543,8 +548,8 @@ final class UCD_Names implements UCD_Types { "LOWER", "TITLE", "UPPER", "UNCASED" }; - static String[] DT = { - "", // NONE + static String[] LONG_DECOMPOSITION_TYPE = { + "none", // NONE "canonical", // CANONICAL "compat", // Otherwise unspecified compatibility character. "font", // A font variant (e.g. a blackletter form). @@ -563,9 +568,8 @@ final class UCD_Names implements UCD_Types { "square", // A CJK squared font variant. "fraction", // A vulgar fraction form. }; - - static String[] SHORT_DT = { - "", // NONE + static String[] DECOMPOSITION_TYPE = { + "none", // NONE "can", // CANONICAL "com", // Otherwise unspecified compatibility character. "font", // A font variant (e.g. a blackletter form). @@ -584,14 +588,19 @@ final class UCD_Names implements UCD_Types { "sqr", // A CJK squared font variant. "fra", // A vulgar fraction form. }; + static { + fixArray(LONG_DECOMPOSITION_TYPE); + fixArray(DECOMPOSITION_TYPE); + } + static private String[] MIRRORED_TABLE = { "N", "Y" }; - static String[] NT = { - "", + static String[] LONG_NUMERIC_TYPE = { + "none", "numeric", "digit", "decimal", @@ -602,8 +611,8 @@ final class UCD_Names implements UCD_Types { */ }; - static String[] SHORT_NT = { - "", + static String[] NUMERIC_TYPE = { + "none", "nu", "di", "de", @@ -613,18 +622,66 @@ final class UCD_Names implements UCD_Types { "ho" */ }; - static { - if (LIMIT_CATEGORY != GC.length || LIMIT_CATEGORY != LONG_GC.length) { + fixArray(LONG_NUMERIC_TYPE); + fixArray(NUMERIC_TYPE); + } + + static String[] COMBINING_CLASS = new String[256]; + static String[] LONG_COMBINING_CLASS = new String[256]; + // TODO clean this up, just a quick copy of code + static { + for (int style = SHORT; style <= LONG; ++style) + for (int index = 0; index < 256; ++index) { + String s = null; + switch (index) { + case 0: s = style < LONG ? "NR" : "NotReordered"; break; + case 1: s = style < LONG ? "OV" : "Overlay"; break; + case 7: s = style < LONG ? "NK" : "Nukta"; break; + case 8: s = style < LONG ? "KV" : "KanaVoicing"; break; + case 9: s = style < LONG ? "VR" : "Virama"; break; + case 200: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break; + case 202: s = style < LONG ? "ATB" : "AttachedBelow"; break; + case 204: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break; + case 208: s = style < LONG ? "ATL" : "AttachedLeft"; break; + case 210: s = style < LONG ? "ATR" : "AttachedRight"; break; + case 212: s = style < LONG ? "ATAL" : "AttachedAboveLeft"; break; + case 214: s = style < LONG ? "ATA" : "AttachedAbove"; break; + case 216: s = style < LONG ? "ATAR" : "AttachedAboveRight"; break; + case 218: s = style < LONG ? "BL" : "BelowLeft"; break; + case 220: s = style < LONG ? "B" : "Below"; break; + case 222: s = style < LONG ? "BR" : "BelowRight"; break; + case 224: s = style < LONG ? "L" : "Left"; break; + case 226: s = style < LONG ? "R" : "Right"; break; + case 228: s = style < LONG ? "AL" : "AboveLeft"; break; + case 230: s = style < LONG ? "A" : "Above"; break; + case 232: s = style < LONG ? "AR" : "AboveRight"; break; + case 233: s = style < LONG ? "DB" : "DoubleBelow"; break; + case 234: s = style < LONG ? "DA" : "DoubleAbove"; break; + case 240: s = style < LONG ? "IS" : "IotaSubscript"; break; + default: s = style < LONG ? "" + index : "Fixed_" + index; + } + if (style < LONG) COMBINING_CLASS[index] = s; + else LONG_COMBINING_CLASS[index] = s; + } + if (false) for (int i = 0; i < 256; ++i) { + System.out.println(i + + "\t" + COMBINING_CLASS[i] + + "\t" + LONG_COMBINING_CLASS[i]); + } + } + + static { + if (LIMIT_CATEGORY != GENERAL_CATEGORY.length || LIMIT_CATEGORY != LONG_GENERAL_CATEGORY.length) { System.err.println("!! ERROR !! Enums and Names out of sync: category"); } - if (LIMIT_BIDI_CLASS != BC.length) { + if (LIMIT_BIDI_CLASS != BIDI_CLASS.length) { System.err.println("!! ERROR !! Enums and Names out of sync: bidi"); } - if (LIMIT_LINE_BREAK != LB.length || LIMIT_LINE_BREAK != LONG_LB.length) { + if (LIMIT_LINE_BREAK != LINE_BREAK.length || LIMIT_LINE_BREAK != LONG_LINE_BREAK.length) { System.err.println("!! ERROR !! Enums and Names out of sync: linebreak"); } - if (LIMIT_DECOMPOSITION_TYPE != DT.length || LIMIT_DECOMPOSITION_TYPE != SHORT_DT.length) { + if (LIMIT_DECOMPOSITION_TYPE != LONG_DECOMPOSITION_TYPE.length || LIMIT_DECOMPOSITION_TYPE != DECOMPOSITION_TYPE.length) { System.err.println("!! ERROR !! Enums and Names out of sync: decomp type"); } if (LIMIT_MIRRORED != MIRRORED_TABLE.length) { @@ -633,16 +690,16 @@ final class UCD_Names implements UCD_Types { if (LIMIT_CASE != CASE_TABLE.length) { System.err.println("!! ERROR !! Enums and Names out of sync: case"); } - if (LIMIT_NUMERIC_TYPE != NT.length) { + if (LIMIT_NUMERIC_TYPE != LONG_NUMERIC_TYPE.length) { System.err.println("!! ERROR !! Enums and Names out of sync: numeric type"); } - if (LIMIT_EAST_ASIAN_WIDTH != EA.length) { + if (LIMIT_EAST_ASIAN_WIDTH != LONG_EAST_ASIAN_WIDTH.length) { System.err.println("!! ERROR !! Enums and Names out of sync: east Asian Width"); } if (LIMIT_BINARY_PROPERTIES != BP.length) { System.err.println("!! ERROR !! Enums and Names out of sync: binary properties"); } - if (LIMIT_SCRIPT != SCRIPT.length) { + if (LIMIT_SCRIPT != LONG_SCRIPT.length) { System.err.println("!! ERROR !! Enums and Names out of sync: script"); } if (LIMIT_AGE != AGE.length) { @@ -650,7 +707,7 @@ final class UCD_Names implements UCD_Types { } } - public static byte ON = Utility.lookup("ON", BC, true); + public static byte ON = Utility.lookup("ON", BIDI_CLASS, true); public static String[] HANGUL_SYLLABLE_TYPE = { "NA", @@ -744,6 +801,16 @@ final class UCD_Names implements UCD_Types { "KHAPH", "FE", }; + static { + fixArray(JOINING_GROUP); + } + static void fixArray (String[] array) { + for (int i = 0; i < array.length; ++i) { + array[i] = Utility.getUnskeleton( + array[i].toLowerCase(Locale.ENGLISH), + true); + } + } public static String[] OLD_JOINING_GROUP = { "", diff --git a/tools/unicodetools/com/ibm/text/UCD/UData.java b/tools/unicodetools/com/ibm/text/UCD/UData.java index d37abc87444..dbe897bc8e7 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UData.java +++ b/tools/unicodetools/com/ibm/text/UCD/UData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UData.java,v $ -* $Date: 2004/02/07 01:01:13 $ -* $Revision: 1.10 $ +* $Date: 2004/02/12 08:23:16 $ +* $Revision: 1.11 $ * ******************************************************************************* */ @@ -201,21 +201,21 @@ class UData implements UCD_Types { int lastPos = result.length(); - if (full || generalCategory != Lo) result.append(" gc='").append(UCD_Names.GC[generalCategory]).append('\''); + if (full || generalCategory != Lo) result.append(" gc='").append(UCD_Names.GENERAL_CATEGORY[generalCategory]).append('\''); if (full || combiningClass != 0) result.append(" cc='").append(combiningClass & 0xFF).append('\''); - if (full || decompositionType != NONE) result.append(" dt='").append(UCD_Names.DT[decompositionType]).append('\''); + if (full || decompositionType != NONE) result.append(" dt='").append(UCD_Names.LONG_DECOMPOSITION_TYPE[decompositionType]).append('\''); if (full || !s.equals(decompositionMapping)) result.append(" dm='").append(Utility.quoteXML(decompositionMapping)).append('\''); - if (full || numericType != NUMERIC_NONE) result.append(" nt='").append(UCD_Names.NT[numericType]).append('\''); + if (full || numericType != NUMERIC_NONE) result.append(" nt='").append(UCD_Names.LONG_NUMERIC_TYPE[numericType]).append('\''); if (full || !Double.isNaN(numericValue)) result.append(" nv='").append(numericValue).append('\''); - if (full || eastAsianWidth != EAN) result.append(" ea='").append(UCD_Names.EA[eastAsianWidth]).append('\''); - if (full || lineBreak != LB_AL) result.append(" lb='").append(UCD_Names.LB[lineBreak]).append('\''); + if (full || eastAsianWidth != EAN) result.append(" ea='").append(UCD_Names.LONG_EAST_ASIAN_WIDTH[eastAsianWidth]).append('\''); + if (full || lineBreak != LB_AL) result.append(" lb='").append(UCD_Names.LINE_BREAK[lineBreak]).append('\''); if (joiningType != -1 && (full || joiningType != JT_U)) result.append(" jt='").append(UCD_Names.JOINING_TYPE[joiningType]).append('\''); if (full || joiningGroup != NO_SHAPING) result.append(" jg='").append(UCD_Names.JOINING_GROUP[joiningGroup]).append('\''); if (full || age != 0) result.append(" ag='").append(UCD_Names.AGE[age]).append('\''); - if (full || bidiClass != BIDI_L) result.append(" bc='").append(UCD_Names.BC[bidiClass]).append('\''); + if (full || bidiClass != BIDI_L) result.append(" bc='").append(UCD_Names.BIDI_CLASS[bidiClass]).append('\''); if (full || !bidiMirror.equals(s)) result.append(" bmg='").append(Utility.quoteXML(bidiMirror)).append('\''); if (lastPos != result.length()) { diff --git a/tools/unicodetools/com/ibm/text/UCD/UnicodeMap.java b/tools/unicodetools/com/ibm/text/UCD/UnicodeMap.java deleted file mode 100644 index 1154897188d..00000000000 --- a/tools/unicodetools/com/ibm/text/UCD/UnicodeMap.java +++ /dev/null @@ -1,109 +0,0 @@ -/** -******************************************************************************* -* Copyright (C) 1996-2001, International Business Machines Corporation and * -* others. All Rights Reserved. * -******************************************************************************* -* -* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Attic/UnicodeMap.java,v $ -* $Date: 2003/04/02 05:16:44 $ -* $Revision: 1.2 $ -* -******************************************************************************* -*/ - -package com.ibm.text.UCD; - -import java.util.*; -import java.io.*; - -import com.ibm.text.utility.*; -import com.ibm.icu.text.UTF16; -import com.ibm.icu.text.UnicodeSet; - -/** - * Class that maps from codepoints to an index, and optionally a label. - */ -public class UnicodeMap { - UnicodeSet[] sets = new UnicodeSet[50]; - String[] labels = new String[50]; - int count = 0; - - public int add(String label, UnicodeSet set) { - return add(label, set, false, true); - } - - /** - * Add set - *@param removeOld true: remove any collisions from sets already in the map - * if false, remove any collisions from this set - *@param signal: print a warning when collisions occur - */ - public int add(String label, UnicodeSet set, boolean removeOld, boolean signal) { - // remove from any preceding!! - for (int i = 0; i < count; ++i) { - if (!set.containsSome(sets[i])) continue; - if (signal) showOverlap(label, set, i); - if (removeOld) { - sets[i] = sets[i].removeAll(set); - } else { - set = set.removeAll(sets[i]); - } - } - sets[count] = set; - labels[count++] = label; - return (short)(count - 1); - } - - public void showOverlap(String label, UnicodeSet set, int i) { - UnicodeSet delta = new UnicodeSet(set).retainAll(sets[i]); - System.out.println("Warning! Overlap with " + label + " and " + labels[i] - + ": " + delta); - } - - public int getIndex(int codepoint) { - for (int i = count - 1; i >= 0; --i) { - if (sets[i].contains(codepoint)) return i; - } - return -1; - } - - public int getIndexFromLabel(String label) { - for (int i = count - 1; i >= 0; --i) { - if (labels[i].equalsIgnoreCase(label)) return i; - } - return -1; - } - - public String getLabel(int codepoint) { - return getLabelFromIndex(getIndex(codepoint)); - } - - public String getLabelFromIndex(int index) { - if (index < 0 || index >= count) return null; - return labels[index]; - } - - public UnicodeSet getSetFromIndex(int index) { - if (index < 0 || index >= count) return null; - return new UnicodeSet(sets[index]); // protect from changes - } - - public int size() { - return count; - } - - public int setLabel(int index, String label) { - labels[index] = label; - return index; - } - - public int put(int codepoint, int index) { - if (sets[index] == null) { - sets[index] = new UnicodeSet(); - if (index >= count) count = index + 1; - } - sets[index].add(codepoint); - return index; - } - -} diff --git a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java index f54a6f19e92..708d42ed273 100644 --- a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java +++ b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $ -* $Date: 2004/02/07 01:01:12 $ -* $Revision: 1.24 $ +* $Date: 2004/02/12 08:23:16 $ +* $Revision: 1.25 $ * ******************************************************************************* */ @@ -2239,11 +2239,11 @@ E0020-E007F; [TAGGING CHARACTERS] int j = UTF32.char32At(s, 0); try { if (q == 0) { - check(i, Default.ucd().getCategory(i), Default.ucd().getCategory(j), UCD_Names.GC, "GeneralCategory"); + check(i, Default.ucd().getCategory(i), Default.ucd().getCategory(j), UCD_Names.GENERAL_CATEGORY, "GeneralCategory"); check(i, Default.ucd().getCombiningClass(i), Default.ucd().getCombiningClass(j), "CanonicalClass"); - check(i, Default.ucd().getBidiClass(i), Default.ucd().getBidiClass(j), UCD_Names.BC, "BidiClass"); + check(i, Default.ucd().getBidiClass(i), Default.ucd().getBidiClass(j), UCD_Names.BIDI_CLASS, "BidiClass"); check(i, Default.ucd().getNumericValue(i), Default.ucd().getNumericValue(j), "NumericValue"); - check(i, Default.ucd().getNumericType(i), Default.ucd().getNumericType(j), UCD_Names.NT, "NumericType"); + check(i, Default.ucd().getNumericType(i), Default.ucd().getNumericType(j), UCD_Names.LONG_NUMERIC_TYPE, "NumericType"); if (false) { for (byte k = LOWER; k < LIMIT_CASE; ++k) { diff --git a/tools/unicodetools/com/ibm/text/utility/CallArgs.java b/tools/unicodetools/com/ibm/text/utility/CallArgs.java index 74e712003a3..623f6c39d9a 100644 --- a/tools/unicodetools/com/ibm/text/utility/CallArgs.java +++ b/tools/unicodetools/com/ibm/text/utility/CallArgs.java @@ -28,18 +28,22 @@ public class CallArgs { } int pos = arg.indexOf('.'); Method method = null; + String className = "Main"; + String methodName = ""; if (pos >= 0) { - String className = prefix + arg.substring(0,pos); - String methodName = arg.substring(pos+1); + className = prefix + arg.substring(0,pos); + methodName = arg.substring(pos+1); method = tryMethod(className, methodName, methodArgs); } else { - method = tryMethod("Main", arg, methodArgs); + method = tryMethod(className, arg, methodArgs); if (method == null) { - method = tryMethod(arg, "main", methodArgs); + className = arg; + methodName = "main"; + method = tryMethod(className, methodName, methodArgs); } } - if (method == null) throw new IllegalArgumentException("Bad parameter: " + arg); + if (method == null) throw new IllegalArgumentException("Bad parameter: " + className + ", " + methodName); System.out.println(method.getName() + "\t" + bf.join(methodArgs)); method.invoke(null,methodArgs); } diff --git a/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java b/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java new file mode 100644 index 00000000000..388f7235738 --- /dev/null +++ b/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java @@ -0,0 +1,127 @@ +package com.ibm.text.utility; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintWriter; + +import com.ibm.text.UCD.Default; +import com.ibm.text.UCD.GenerateData; +import com.ibm.text.UCD.UCD_Types; + +public class UnicodeDataFile { + public PrintWriter out; + private String newFile; + private String batName; + private String mostRecent; + private UnicodeDataFile(){}; + + public static UnicodeDataFile openAndWriteHeader(String directory, String filename) throws IOException { + UnicodeDataFile result = new UnicodeDataFile(); + result.newFile = directory + filename + UnicodeDataFile.getFileSuffix(true); + result.out = Utility.openPrintWriter(result.newFile, Utility.LATIN1_UNIX); + String[] batName = {""}; + result.mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName); + result.batName = batName[0]; + + result.out.println("# " + filename + UnicodeDataFile.getFileSuffix(false)); + result.out.println(generateDateLine()); + result.out.println("#"); + try { + Utility.appendFile(filename + "Header.txt", Utility.LATIN1, result.out); + } catch (FileNotFoundException e) { + result.out.println("# Unicode Character Database: Derived Property Data"); + result.out.println("# Generated algorithmically from the Unicode Character Database"); + result.out.println("# For documentation, see UCD.html"); + result.out.println("# Note: Unassigned and Noncharacter codepoints may be omitted"); + result.out.println("# if they have default property values."); + result.out.println("# ================================================"); + } + return result; + } + + public void close() throws IOException { + Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName); + out.close(); + } + + public static String generateDateLine() { + return "# Date: " + Default.getDate() + " [MD]"; + } + + public static String getHTMLFileSuffix(boolean withDVersion) { + return "-" + Default.ucd().getVersion() + + ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "") + + ".html"; + } + + public static String getFileSuffix(boolean withDVersion) { + return "-" + Default.ucd().getVersion() + + ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "") + + ".txt"; + } + + //Remove "d1" from DerivedJoiningGroup-3.1.0d1.txt type names + + public static String fixFile(String s) { + int len = s.length(); + if (!s.endsWith(".txt")) return s; + if (s.charAt(len-6) != 'd') return s; + char c = s.charAt(len-5); + if (c != 'X' && (c < '0' || '9' < c)) return s; + s = s.substring(0,len-6) + s.substring(len-4); + System.out.println("Fixing File Name: " + s); + return s; + } + + private static String generateBatAux(String batName, String oldName, String newName) throws IOException { + String fullBatName = batName + ".bat"; + PrintWriter output = Utility.openPrintWriter(batName + ".bat", Utility.LATIN1_UNIX); + + newName = Utility.getOutputName(newName); + System.out.println("Writing BAT to compare " + oldName + " and " + newName); + + File newFile = new File(newName); + File oldFile = new File(oldName); + output.println("\"C:\\Program Files\\Compare It!\\wincmp3.exe\" " + // "\"C:\\Program Files\\wincmp.exe\" " + + oldFile.getCanonicalFile() + + " " + + newFile.getCanonicalFile()); + output.close(); + return new File(Utility.getOutputName(fullBatName)).getCanonicalFile().toString(); + } + + /* + static String skeleton(String source) { + StringBuffer result = new StringBuffer(); + source = source.toLowerCase(); + for (int i = 0; i < source.length(); ++i) { + char c = source.charAt(i); + if (c == ' ' || c == '_' || c == '-') continue; + result.append(c); + } + return result.toString(); + } + */ + // static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1; + + public static String generateBat(String directory, String fileRoot, String suffix, String[] outputBatName) throws IOException { + String mostRecent = Utility.getMostRecentUnicodeDataFile(UnicodeDataFile.fixFile(fileRoot), Default.ucd().getVersion(), true, true); + if (mostRecent != null) { + outputBatName[0] = UnicodeDataFile.generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix, + mostRecent, directory + fileRoot + suffix); + } else { + System.out.println("No previous version of: " + fileRoot + ".txt"); + return null; + } + + String lessRecent = Utility.getMostRecentUnicodeDataFile(UnicodeDataFile.fixFile(fileRoot), Default.ucd().getVersion(), false, true); + if (lessRecent != null && !mostRecent.equals(lessRecent)) { + UnicodeDataFile.generateBatAux(directory + "DIFF/OLDER-Diff_" + fileRoot + suffix, + lessRecent, directory + fileRoot + suffix); + } + return mostRecent; + } +} + diff --git a/tools/unicodetools/com/ibm/text/utility/Utility.java b/tools/unicodetools/com/ibm/text/utility/Utility.java index 25f717338bb..3952fef6fb1 100644 --- a/tools/unicodetools/com/ibm/text/utility/Utility.java +++ b/tools/unicodetools/com/ibm/text/utility/Utility.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $ -* $Date: 2004/02/07 01:01:17 $ -* $Revision: 1.38 $ +* $Date: 2004/02/12 08:23:14 $ +* $Revision: 1.39 $ * ******************************************************************************* */ @@ -16,6 +16,7 @@ package com.ibm.text.utility; import java.util.*; import java.text.*; import java.io.*; + import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.Replaceable; @@ -717,7 +718,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES public static PrintWriter openPrintWriter(String directory, String filename, Encoding options) throws IOException { File file = new File(directory + filename); Utility.fixDot(); - System.out.println("Creating File: " + file.getCanonicalPath()); + System.out.print("Creating File: " + file); + System.out.println("\t" + file.getCanonicalPath()); File parent = new File(file.getParent()); //System.out.println("Creating File: "+ parent); parent.mkdirs(); @@ -1095,7 +1097,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES } public static void showSetDifferences(PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2, - boolean separateLines, boolean withChar, UnicodeMap names, UCD ucd) { + boolean separateLines, boolean withChar, OldUnicodeMap names, UCD ucd) { UnicodeSet temp = new UnicodeSet(set1).removeAll(set2); pw.println(); @@ -1135,7 +1137,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES static java.text.NumberFormat nf = java.text.NumberFormat.getInstance(); public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN, - boolean withChar, UnicodeMap names, UCD ucd) { + boolean withChar, OldUnicodeMap names, UCD ucd) { if (set.size() == 0) { pw.println(prefix + ""); pw.flush(); @@ -1196,4 +1198,5 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES private static boolean isSeparateLineIDN(int start, int end, UCD ucd) { return (isSeparateLineIDN(start, ucd) || isSeparateLineIDN(end, ucd)); } + } \ No newline at end of file