diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java index 86207b6795e..b73744279a4 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $ -* $Date: 2004/02/07 01:01:12 $ -* $Revision: 1.19 $ +* $Date: 2004/02/12 08:23:19 $ +* $Revision: 1.20 $ * ******************************************************************************* */ @@ -1033,19 +1033,28 @@ public class WriteCharts implements UCD_Types { int[] starts = new int[names.length]; int[] ends = new int[names.length]; - UCD.BlockData blockData = new UCD.BlockData(); + Iterator blockIterator = Default.ucd().getBlockNames().iterator(); + + //UCD.BlockData blockData = new UCD.BlockData(); int counter = 0; - int blockId = 0; - while (Default.ucd().getBlockData(blockId++, blockData)) { - names[counter] = blockData.name; - starts[counter] = blockData.start; - ends[counter] = blockData.end; + String currentName; + //int blockId = 0; + while (blockIterator.hasNext()) { + //while (Default.ucd().getBlockData(blockId++, blockData)) { + names[counter] = currentName = (String) blockIterator.next(); + if (currentName.equals("No_Block")) continue; + UnicodeSet s = Default.ucd().getBlockSet(currentName, null); + if (s.getRangeCount() != 1) { + throw new IllegalArgumentException("Failure with block set: " + currentName); + } + starts[counter] = s.getRangeStart(0); + ends[counter] = s.getRangeEnd(0); //System.out.println(names[counter] + ", " + values[counter]); ++counter; // HACK - if (blockData.name.equals("Tags")) { + if (currentName.equals("Tags")) { names[counter] = "reserved default ignorable"; starts[counter] = 0xE0080; ends[counter] = 0xE0FFF; diff --git a/tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt b/tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt new file mode 100644 index 00000000000..f933abd0596 --- /dev/null +++ b/tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt @@ -0,0 +1,7 @@ +# Correlated with Unicode 4.0 +# Note: The casing of block names is not normative. +# For example, "Basic Latin" and "BASIC LATIN" are equivalent. +# +# Code points not explicitly listed in this file are given the value No_Block. +# +# Start Code..End Code; Block Name diff --git a/tools/unicodetools/com/ibm/text/UCD/CheckICU.java b/tools/unicodetools/com/ibm/text/UCD/CheckICU.java index bad7874be49..e2e7c0cb1e3 100644 --- a/tools/unicodetools/com/ibm/text/UCD/CheckICU.java +++ b/tools/unicodetools/com/ibm/text/UCD/CheckICU.java @@ -4,9 +4,13 @@ import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.List; +import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; @@ -20,7 +24,6 @@ import com.ibm.text.utility.Utility; public class CheckICU { static final BagFormatter bf = new BagFormatter(); - static final BagFormatter bf2 = new BagFormatter(); public static void main(String[] args) throws IOException { System.out.println("Start"); @@ -45,25 +48,24 @@ public class CheckICU { return p.getMaxWidth(v); } } - + + public static void test() throws IOException { + //generateFile("4.0.0", "DerivedCombiningClass"); + //generateFile("4.0.0", "DerivedCoreProperties"); + if (true) return; checkUCD(); itemFailures = new UnicodeSet(); icuFactory = ICUPropertyFactory.make(); toolFactory = ToolUnicodePropertySource.make("4.0.0"); String[] quickList = { - "Block", + "Math", // "Script", "Bidi_Mirroring_Glyph", "Case_Folding", //"Numeric_Value" }; for (int i = 0; i < quickList.length; ++i) { - //testProperty(quickList[i], -1); - bf2.setValueSource(new ReplaceLabel(toolFactory.getProperty(quickList[i]))) - .setLabelSource(null) - .setNameSource(null) - .setShowCount(false); - bf2.showSetNames(bf2.CONSOLE, quickList[i], new UnicodeSet(0,0x10FFFF)); + testProperty(quickList[i], -1); } if (quickList.length > 0) return; @@ -97,11 +99,16 @@ public class CheckICU { if (nfc.isLeading(i)) leading.add(i); } PrintWriter pw = bf.openUTF8Writer(UCD_Types.GEN_DIR, "Trailing.txt"); - bf.showSetNames(pw, "+Trailing+Starter", new UnicodeSet(trailing).retainAll(starter)); - bf.showSetNames(pw, "+Trailing-Starter", new UnicodeSet(trailing).removeAll(starter)); - bf.showSetNames(pw, "-Trailing-Starter", new UnicodeSet(trailing).complement().removeAll(starter)); - bf.showSetNames(pw, "+Trailing+Leading", new UnicodeSet(trailing).retainAll(leading)); - bf.showSetNames(pw, "+Trailing-Leading", new UnicodeSet(trailing).removeAll(leading)); + pw.println("+Trailing+Starter"); + bf.showSetNames(pw, new UnicodeSet(trailing).retainAll(starter)); + pw.println("+Trailing-Starter"); + bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(starter)); + pw.println("-Trailing-Starter"); + bf.showSetNames(pw, new UnicodeSet(trailing).complement().removeAll(starter)); + pw.println("+Trailing+Leading"); + bf.showSetNames(pw, new UnicodeSet(trailing).retainAll(leading)); + pw.println("+Trailing-Leading"); + bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(leading)); pw.close(); } /* diff --git a/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java b/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java index adfe2f46ebc..598636482de 100644 --- a/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java +++ b/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CompareProperties.java,v $ -* $Date: 2004/02/07 01:01:16 $ -* $Revision: 1.4 $ +* $Date: 2004/02/12 08:23:15 $ +* $Revision: 1.5 $ * ******************************************************************************* */ @@ -92,30 +92,26 @@ public class CompareProperties implements UCD_Types { public final static class UnicodeSetComparator implements Comparator { /** * Compares two UnicodeSets, producing a transitive ordering. - * @return -1 if first is smaller (in size) than second, - * 1 if first is greater (in size) than second, - * Otherwise (since they are equal in size) - * returns a comparison based on the first range that differs. + * The ordering is based on the first codepoint that differs between them. + * @return -1 if first set contains the first different code point + * 1 if the second set does. + * 0 if there is no difference. * If compareTo were added to UnicodeSet, this can be optimized to use list[i]. * @author Davis * */ public int compare(Object o1, Object o2) { - UnicodeSet bs1 = (UnicodeSet) o1; - UnicodeSet bs2 = (UnicodeSet) o2; - if (bs1.size() < bs2.size()) return -1; - if (bs1.size() > bs2.size()) return 1; - UnicodeSetIterator it1 = new UnicodeSetIterator(bs1); - UnicodeSetIterator it2 = new UnicodeSetIterator(bs2); - // Note: because they are the same size, and we stop if any ranges - // are different, it is safe to test for both at the same time - while (it1.nextRange() && it2.nextRange()) { - if (it1.codepoint < it2.codepoint) return -1; + UnicodeSetIterator it1 = new UnicodeSetIterator((UnicodeSet) o1); + UnicodeSetIterator it2 = new UnicodeSetIterator((UnicodeSet) o2); + while (it1.nextRange()) { + if (!it2.nextRange()) return -1; // first has range while second exhausted + if (it1.codepoint < it2.codepoint) return -1; // first has code point not in second if (it1.codepoint > it2.codepoint) return 1; - if (it1.codepointEnd < it2.codepointEnd) return -1; - if (it1.codepointEnd > it2.codepointEnd) return 1; + if (it1.codepointEnd < it2.codepointEnd) return 1; // second has codepoint not in first + if (it1.codepointEnd > it2.codepointEnd) return -1; } - return 0; + if (it2.nextRange()) return 1; // second has range while first is exhausted + return 0; // otherwise we ran out in both of them, so equal } } @@ -210,7 +206,7 @@ public class CompareProperties implements UCD_Types { public void printPartition() throws IOException { System.out.println("Set Size: " + map.size()); PrintWriter output = Utility.openPrintWriter("Partition" - + GenerateData.getFileSuffix(true), Utility.LATIN1_WINDOWS); + + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_WINDOWS); Iterator it = map.keySet().iterator(); while (it.hasNext()) { @@ -234,7 +230,7 @@ public class CompareProperties implements UCD_Types { public void printStatistics() throws IOException { System.out.println("Set Size: " + map.size()); PrintWriter output = Utility.openPrintWriter("Statistics" - + GenerateData.getFileSuffix(true), Utility.LATIN1_WINDOWS); + + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_WINDOWS); System.out.println("Finding disjoints/contains"); for (int i = 0; i < count; ++i) { @@ -383,10 +379,10 @@ public class CompareProperties implements UCD_Types { public static void listDifferences() throws IOException { - PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX); + PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX); output.println("# Listing of relationships among properties, suitable for analysis by spreadsheet"); output.println("# Generated for " + Default.ucd().getVersion()); - output.println(GenerateData.generateDateLine()); + output.println(UnicodeDataFile.generateDateLine()); output.println("# P1 P2 R(P1,P2) C(P1&P2) C(P1-P2) C(P2-P1)"); diff --git a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java index adc03b0eff9..1a93de08d3d 100644 --- a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java +++ b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $ -* $Date: 2004/02/06 18:30:23 $ -* $Revision: 1.13 $ +* $Date: 2004/02/12 08:23:17 $ +* $Revision: 1.14 $ * ******************************************************************************* */ @@ -834,9 +834,9 @@ public final class ConvertUCD implements UCD_Types { //UCD_Names.BP_OLD } else if (fieldName.equals("gc")) { - uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GC, true); + uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GENERAL_CATEGORY, true); } else if (fieldName.equals("bc")) { - uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BC, true); + uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BIDI_CLASS, true); } else if (fieldName.equals("dt")) { if (major < 2) { if (fieldValue.equals("no-break")) fieldValue = "noBreak"; @@ -847,17 +847,17 @@ public final class ConvertUCD implements UCD_Types { else if (fieldValue.equals("no-join")) fieldValue = "compat"; else if (fieldValue.equals("join")) fieldValue = "compat"; } - uData.decompositionType = Utility.lookup(fieldValue, UCD_Names.DT, true); + uData.decompositionType = Utility.lookup(fieldValue, UCD_Names.LONG_DECOMPOSITION_TYPE, true); } else if (fieldName.equals("nt")) { - uData.numericType = Utility.lookup(fieldValue, UCD_Names.NT, true); + uData.numericType = Utility.lookup(fieldValue, UCD_Names.LONG_NUMERIC_TYPE, true); } else if (fieldName.equals("ea")) { - uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.SHORT_EA, true); + uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.EAST_ASIAN_WIDTH, true); } else if (fieldName.equals("lb")) { - uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LB, true); + uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LINE_BREAK, true); } else if (fieldName.equals("sn")) { - uData.script = Utility.lookup(fieldValue, UCD_Names.SCRIPT, true); + uData.script = Utility.lookup(fieldValue, UCD_Names.LONG_SCRIPT, true); } else if (fieldName.equals("jt")) { uData.joiningType = Utility.lookup(fieldValue, UCD_Names.JOINING_TYPE, true); diff --git a/tools/unicodetools/com/ibm/text/UCD/Default.java b/tools/unicodetools/com/ibm/text/UCD/Default.java index 178b1d3fb5d..ad5365c6ac7 100644 --- a/tools/unicodetools/com/ibm/text/UCD/Default.java +++ b/tools/unicodetools/com/ibm/text/UCD/Default.java @@ -21,13 +21,19 @@ public final class Default implements UCD_Types { setUCD(); } + private static boolean inRecursiveCall = false; private static void setUCD() { - ucd = UCD.make(ucdVersion()); + if (inRecursiveCall) { + throw new IllegalArgumentException("Recursive call to setUCD"); + } + inRecursiveCall = true; + ucd = UCD.make(ucdVersion); nfd = nf[NFD] = new Normalizer(Normalizer.NFD, ucdVersion()); nfc = nf[NFC] = new Normalizer(Normalizer.NFC, ucdVersion()); nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, ucdVersion()); nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, ucdVersion()); System.out.println("Loaded UCD" + ucd().getVersion() + " " + (new Date(ucd().getDate()))); + inRecursiveCall = false; } static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd', 'HH:mm:ss' GMT'"); @@ -40,32 +46,32 @@ public final class Default implements UCD_Types { } public static String ucdVersion() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return ucdVersion; } public static UCD ucd() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return ucd; } public static Normalizer nfc() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return nfc; } public static Normalizer nfd() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return nfd; } public static Normalizer nfkc() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return nfkc; } public static Normalizer nfkd() { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return nfkd; } public static Normalizer nf(int index) { - if (ucd() == null) setUCD(); + if (ucd == null) setUCD(); return nf[index]; } diff --git a/tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt b/tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt new file mode 100644 index 00000000000..1c786cf31c8 --- /dev/null +++ b/tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt @@ -0,0 +1,16 @@ +# +# Unicode Character Database: Derived Property Data +# This file shows when various code points were designated in Unicode +# Notes: +# - The term 'designated' means that a previously reserved code point was specified +# to be a noncharacter or surrogate, or assigned as a character, +# control or format code. +# - Versions are only tracked from 1.1 onwards, since version 1.0 +# predated changes required by the ISO 10646 merger. +# - The Hangul Syllables that were removed from 2.0 are not included in the 1.1 listing. +# - The supplementary private use code points and the non-character code points +# were designated in version 2.0, but not specifically listed in the UCD +# until versions 3.0 and 3.1 respectively. +# +# For details on the contents of each version, see +# http://www.unicode.org/versions/enumeratedversions.html. diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java index 3cf31ed2fd8..34523ce14c2 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java,v $ -* $Date: 2004/02/07 01:01:16 $ -* $Revision: 1.9 $ +* $Date: 2004/02/12 08:23:15 $ +* $Revision: 1.10 $ * ******************************************************************************* */ @@ -28,8 +28,8 @@ abstract public class GenerateBreakTest implements UCD_Types { Normalizer nfd; Normalizer nfkd; - UnicodeMap sampleMap = null; - UnicodeMap map = new UnicodeMap(); + OldUnicodeMap sampleMap = null; + OldUnicodeMap map = new OldUnicodeMap(); // ====================== Main =========================== diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java index f3f4a36239c..6e2d6382efe 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $ -* $Date: 2004/02/07 01:01:15 $ -* $Revision: 1.15 $ +* $Date: 2004/02/12 08:23:15 $ +* $Revision: 1.16 $ * ******************************************************************************* */ @@ -15,6 +15,7 @@ package com.ibm.text.UCD; import java.util.*; import java.io.*; + import com.ibm.icu.text.UTF16; import com.ibm.text.utility.*; @@ -37,11 +38,12 @@ public class GenerateCaseFolding implements UCD_Types { static PrintWriter log; + public static void makeCaseFold(boolean normalized) throws java.io.IOException { PICK_SHORT = NF_CLOSURE = normalized; - log = Utility.openPrintWriter("CaseFoldingLog" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX); - System.out.println("Writing Log: " + "CaseFoldingLog" + GenerateData.getFileSuffix(true)); + log = Utility.openPrintWriter("CaseFoldingLog" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX); + System.out.println("Writing Log: " + "CaseFoldingLog" + UnicodeDataFile.getFileSuffix(true)); System.out.println("Making Full Data"); Map fullData = getCaseFolding(true, NF_CLOSURE, ""); @@ -64,15 +66,8 @@ public class GenerateCaseFolding implements UCD_Types { String filename = "CaseFolding"; if (normalized) filename += "-Normalized"; String directory = "DerivedData/"; - String newFile = directory + filename + GenerateData.getFileSuffix(true); - PrintWriter out = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); - String[] batName = {""}; - String mostRecent = GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true), batName); - - out.println("# CaseFolding" + GenerateData.getFileSuffix(false)); - out.println(GenerateData.generateDateLine()); - out.println("#"); - Utility.appendFile("CaseFoldingHeader.txt", Utility.LATIN1, out); + UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader(directory, filename); + PrintWriter out = fc.out; /* PrintWriter out = new PrintWriter( @@ -124,9 +119,8 @@ public class GenerateCaseFolding implements UCD_Types { drawLine(out, ch, "t", rSimpleTurkish); } } - out.close(); + fc.close(); log.close(); - Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]); } /* Goal is following (with no entries for 0131 or 0069) @@ -470,7 +464,7 @@ public class GenerateCaseFolding implements UCD_Types { if (normalize) suffix2 = "-Normalized"; PrintWriter log = Utility.openPrintWriter("SpecialCasingExceptions" - + suffix2 + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX); + + suffix2 + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX); for (int ch = 0; ch <= 0x10FFFF; ++ch) { Utility.dot(ch); @@ -580,12 +574,12 @@ public class GenerateCaseFolding implements UCD_Types { log.close(); System.out.println("Writing"); - String newFile = "DerivedData/SpecialCasing" + suffix2 + GenerateData.getFileSuffix(true); + String newFile = "DerivedData/SpecialCasing" + suffix2 + UnicodeDataFile.getFileSuffix(true); PrintWriter out = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = GenerateData.generateBat("DerivedData/", "SpecialCasing", suffix2 + GenerateData.getFileSuffix(true), batName); - out.println("# SpecialCasing" + GenerateData.getFileSuffix(false)); - out.println(GenerateData.generateDateLine()); + String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "SpecialCasing", suffix2 + UnicodeDataFile.getFileSuffix(true), batName); + out.println("# SpecialCasing" + UnicodeDataFile.getFileSuffix(false)); + out.println(UnicodeDataFile.generateDateLine()); out.println("#"); Utility.appendFile("SpecialCasingHeader.txt", Utility.UTF8, out); diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java index bec40627b4c..8286f8335b0 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $ -* $Date: 2004/02/07 01:01:15 $ -* $Revision: 1.32 $ +* $Date: 2004/02/12 08:23:15 $ +* $Revision: 1.33 $ * ******************************************************************************* */ @@ -95,24 +95,11 @@ public class GenerateData implements UCD_Types { } - //Remove "d1" from DerivedJoiningGroup-3.1.0d1.txt type names - - public static String fixFile(String s) { - int len = s.length(); - if (!s.endsWith(".txt")) return s; - if (s.charAt(len-6) != 'd') return s; - char c = s.charAt(len-5); - if (c != 'X' && (c < '0' || '9' < c)) return s; - s = s.substring(0,len-6) + s.substring(len-4); - System.out.println("Fixing File Name: " + s); - return s; - } - static final int HEADER_EXTEND = 0, HEADER_DERIVED = 1, HEADER_SCRIPTS = 2; public static void doHeader(String fileName, PrintWriter output, int headerChoice) { output.println("# " + fileName); - output.println(generateDateLine()); + output.println(UnicodeDataFile.generateDateLine()); output.println("#"); if (headerChoice == HEADER_SCRIPTS) { } else if (headerChoice == HEADER_EXTEND) { @@ -128,18 +115,6 @@ public class GenerateData implements UCD_Types { output.println(); } - public static String getFileSuffix(boolean withDVersion) { - return "-" + Default.ucd().getVersion() - + ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "") - + ".txt"; - } - - public static String getHTMLFileSuffix(boolean withDVersion) { - return "-" + Default.ucd().getVersion() - + ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "") - + ".html"; - } - public static void checkDifferences (String targetVersion) throws IOException { System.out.println("Checking Differences"); UCD target = UCD.make(targetVersion); @@ -176,14 +151,14 @@ public class GenerateData implements UCD_Types { public static void generateDerived (byte type, boolean checkTypeAndStandard, int headerChoice, String directory, String fileName) throws IOException { - String newFile = directory + fileName + getFileSuffix(true); + String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true); System.out.println("New File: " + newFile); PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat(directory, fileName, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName); System.out.println("Most recent: " + mostRecent); - doHeader(fileName + getFileSuffix(false), output, headerChoice); + doHeader(fileName + UnicodeDataFile.getFileSuffix(false), output, headerChoice); for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) { UCDProperty up = DerivedProperty.make(i, Default.ucd()); if (up == null) continue; @@ -227,13 +202,13 @@ public class GenerateData implements UCD_Types { public static void generateCompExclusions() throws IOException { - String newFile = "DerivedData/CompositionExclusions" + getFileSuffix(true); + String newFile = "DerivedData/CompositionExclusions" + UnicodeDataFile.getFileSuffix(true); PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat("DerivedData/", "CompositionExclusions", getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "CompositionExclusions", UnicodeDataFile.getFileSuffix(true), batName); - output.println("# CompositionExclusions" + getFileSuffix(false)); - output.println(generateDateLine()); + output.println("# CompositionExclusions" + UnicodeDataFile.getFileSuffix(false)); + output.println(UnicodeDataFile.generateDateLine()); output.println("#"); output.println("# This file lists the characters from the UAX #15 Composition Exclusion Table."); output.println("#"); @@ -289,10 +264,6 @@ public class GenerateData implements UCD_Types { Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]); } - static String generateDateLine() { - return "# Date: " + Default.getDate() + " [MD]"; - } - static class CompLister extends PropertyLister { UCD oldUCD; int type; @@ -509,21 +480,27 @@ public class GenerateData implements UCD_Types { } } + Iterator blockIterator = Default.ucd().getBlockNames().iterator(); + while (blockIterator.hasNext()) { + addLine(sorted, "blk", "n/a", (String)blockIterator.next()); + } + /* UCD.BlockData blockData = new UCD.BlockData(); int blockId = 0; while (Default.ucd().getBlockData(blockId++, blockData)) { addLine(sorted, "blk", "n/a", blockData.name); } + */ String filename = "PropertyAliases"; - String newFile = "DerivedData/" + filename + getFileSuffix(true); + String newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true); PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat("DerivedData/", filename, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName); - log.println("# " + filename + getFileSuffix(false)); - log.println(generateDateLine()); + log.println("# " + filename + UnicodeDataFile.getFileSuffix(false)); + log.println(UnicodeDataFile.generateDateLine()); log.println("#"); Utility.appendFile("PropertyAliasHeader.txt", Utility.LATIN1, log); log.println(HORIZONTAL_LINE); @@ -538,12 +515,12 @@ public class GenerateData implements UCD_Types { Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]); filename = "PropertyValueAliases"; - newFile = "DerivedData/" + filename + getFileSuffix(true); + newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true); log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); - mostRecent = generateBat("DerivedData/", filename, getFileSuffix(true), batName); + mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName); - log.println("# " + filename + getFileSuffix(false)); - log.println(generateDateLine()); + log.println("# " + filename + UnicodeDataFile.getFileSuffix(false)); + log.println(UnicodeDataFile.generateDateLine()); log.println("#"); Utility.appendFile("PropertyValueAliasHeader.txt", Utility.LATIN1, log); log.println(HORIZONTAL_LINE); @@ -554,9 +531,9 @@ public class GenerateData implements UCD_Types { Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]); filename = "PropertyAliasSummary"; - newFile = "OtherData/" + filename + getFileSuffix(true); + newFile = "OtherData/" + filename + UnicodeDataFile.getFileSuffix(true); log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); - mostRecent = generateBat("OtherData/", filename, getFileSuffix(true), batName); + mostRecent = UnicodeDataFile.generateBat("OtherData/", filename, UnicodeDataFile.getFileSuffix(true), batName); log.println(); log.println(HORIZONTAL_LINE); @@ -682,66 +659,16 @@ public class GenerateData implements UCD_Types { } } - /* - static String skeleton(String source) { - StringBuffer result = new StringBuffer(); - source = source.toLowerCase(); - for (int i = 0; i < source.length(); ++i) { - char c = source.charAt(i); - if (c == ' ' || c == '_' || c == '-') continue; - result.append(c); - } - return result.toString(); - } - */ - // static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1; - - public static String generateBat(String directory, String fileRoot, String suffix, String[] batName) throws IOException { - String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), true, true); - if (mostRecent != null) { - batName[0] = generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix, - mostRecent, directory + fileRoot + suffix); - } else { - System.out.println("No previous version of: " + fileRoot + ".txt"); - return null; - } - - String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), false, true); - if (lessRecent != null && !mostRecent.equals(lessRecent)) { - generateBatAux(directory + "DIFF/OLDER-Diff_" + fileRoot + suffix, - lessRecent, directory + fileRoot + suffix); - } - return mostRecent; - } - - public static String generateBatAux(String batName, String oldName, String newName) throws IOException { - String fullBatName = batName + ".bat"; - PrintWriter output = Utility.openPrintWriter(batName + ".bat", Utility.LATIN1_UNIX); - - newName = Utility.getOutputName(newName); - System.out.println("Writing BAT to compare " + oldName + " and " + newName); - - File newFile = new File(newName); - File oldFile = new File(oldName); - output.println("\"C:\\Program Files\\wincmp.exe\" " - + oldFile.getCanonicalFile() - + " " - + newFile.getCanonicalFile()); - output.close(); - return new File(Utility.getOutputName(fullBatName)).getCanonicalFile().toString(); - } - - public static void generateVerticalSlice(int startEnum, int endEnum, int headerChoice, String directory, String file) throws IOException { - String newFile = directory + file + getFileSuffix(true); + String newFile = directory + file + UnicodeDataFile.getFileSuffix(true); PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat(directory, file, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, file, UnicodeDataFile.getFileSuffix(true), batName); - doHeader(file + getFileSuffix(false), output, headerChoice); + doHeader(file + UnicodeDataFile.getFileSuffix(false), output, headerChoice); int last = -1; for (int i = startEnum; i < endEnum; ++i) { UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd()); @@ -810,15 +737,15 @@ public class GenerateData implements UCD_Types { static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException { - String newFile = directory + fileName + getFileSuffix(true); + String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true); PrintWriter log = Utility.openPrintWriter(newFile, Utility.UTF8_UNIX); String[] batName = {""}; - String mostRecent = generateBat(directory, fileName, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName); String[] example = new String[256]; - log.println("# " + fileName + getFileSuffix(false)); - log.println(generateDateLine()); + log.println("# " + fileName + UnicodeDataFile.getFileSuffix(false)); + log.println(UnicodeDataFile.generateDateLine()); log.println("#"); log.println("# Normalization Test Suite"); log.println("# Format:"); @@ -1012,10 +939,10 @@ public class GenerateData implements UCD_Types { static final void backwardsCompat(String directory, String filename, int[] list) throws IOException { - String newFile = directory + filename + getFileSuffix(true); + String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true); PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat(directory, filename, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName); DiffPropertyLister dpl; UnicodeSet cummulative = new UnicodeSet(); @@ -1095,13 +1022,13 @@ public class GenerateData implements UCD_Types { static final void generateAge(String directory, String filename) throws IOException { - String newFile = directory + filename + getFileSuffix(true); + String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true); PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = generateBat(directory, filename, getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName); try { - log.println("# " + filename + getFileSuffix(false)); - log.println(generateDateLine()); + log.println("# " + filename + UnicodeDataFile.getFileSuffix(false)); + log.println(UnicodeDataFile.generateDateLine()); log.println("#"); log.println("# Unicode Character Database: Derived Property Data"); log.println("# This file shows when various code points were designated in Unicode"); @@ -1195,7 +1122,7 @@ public class GenerateData implements UCD_Types { public static void listCombiningAccents() throws IOException { - PrintWriter log = Utility.openPrintWriter("ListAccents" + getFileSuffix(true), Utility.LATIN1_UNIX); + PrintWriter log = Utility.openPrintWriter("ListAccents" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX); Set set = new TreeSet(); Set set2 = new TreeSet(); @@ -1232,7 +1159,7 @@ public class GenerateData implements UCD_Types { public static void listGreekVowels() throws IOException { - PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + getFileSuffix(true), Utility.LATIN1_UNIX); + PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX); Set set = new TreeSet(); Set set2 = new TreeSet(); diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java b/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java index ad43b6b0e9b..3ba8181c18f 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $ -* $Date: 2004/02/07 01:01:14 $ -* $Revision: 1.4 $ +* $Date: 2004/02/12 08:23:15 $ +* $Revision: 1.5 $ * ******************************************************************************* */ @@ -97,10 +97,10 @@ public final class GenerateStandardizedVariants implements UCD_Types { // now write out the results String directory = "DerivedData/"; - String filename = directory + "StandardizedVariants" + GenerateData.getHTMLFileSuffix(true); + String filename = directory + "StandardizedVariants" + UnicodeDataFile.getHTMLFileSuffix(true); PrintWriter out = Utility.openPrintWriter(filename, Utility.LATIN1_UNIX); String[] batName = {""}; - String mostRecent = GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true), batName); + String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName); String version = Default.ucd().getVersion(); int lastDot = version.lastIndexOf('.'); diff --git a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java new file mode 100644 index 00000000000..f3f1952f404 --- /dev/null +++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java @@ -0,0 +1,280 @@ +package com.ibm.text.UCD; + +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.TreeMap; +import java.util.TreeSet; + +import com.ibm.icu.dev.test.util.BagFormatter; +import com.ibm.icu.dev.test.util.UnicodeProperty; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.text.utility.UnicodeDataFile; + +public class MakeUnicodeFiles { + + static boolean DEBUG = true; + + public static void main() throws IOException { + generateFile("Scripts","z"); + } + + static class OrderedMap { + HashMap map = new HashMap(); + ArrayList keys = new ArrayList(); + void put(Object o, Object t) { + map.put(o,t); + keys.add(o); + } + List keyset() { + return keys; + } + } + + static class PrintStyle { + boolean longForm = false; + boolean noLabel = false; + boolean makeUppercase = false; + boolean makeFirstLetterLowercase = false; + String skipValue = null; + String skipUnassigned = null; + boolean orderByRangeStart = false; + boolean valueList = false; + + PrintStyle setLongForm(boolean value) { + longForm = value; + return this; + } + PrintStyle setSkipUnassigned(String value) { + skipUnassigned = value; + return this; + } + PrintStyle setNoLabel(boolean value) { + noLabel = value; + return this; + } + PrintStyle setMakeUppercase(boolean value) { + makeUppercase = value; + return this; + } + PrintStyle setMakeFirstLetterLowercase(boolean value) { + makeFirstLetterLowercase = value; + return this; + } + PrintStyle setSkipValue(String value) { + skipValue = value; + return this; + } + PrintStyle setOrderByRangeStart(boolean value) { + orderByRangeStart = value; + return this; + } + PrintStyle setValueList(boolean value) { + valueList = value; + return this; + } + } + static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle(); + static Comparator skeletonComparator = new UnicodeProperty.SkeletonComparator(); + static Map printStyles = new TreeMap(/*skeletonComparator*/); + static { + printStyles.put("Script", new PrintStyle().setLongForm(true) + .setMakeUppercase(true).setSkipUnassigned("Common")); + printStyles.put("Age", new PrintStyle().setNoLabel(true)); + printStyles.put("Numeric_Type", new PrintStyle().setLongForm(true) + .setMakeFirstLetterLowercase(true).setSkipUnassigned("none")); + printStyles.put("General_Category", new PrintStyle().setNoLabel(true) + //.setSkipUnassigned(true) + ); + printStyles.put("Line_Break", new PrintStyle().setSkipUnassigned("Unknown")); + printStyles.put("Joining_Type", new PrintStyle().setSkipValue("Non_Joining")); + printStyles.put("Joining_Group", new PrintStyle().setSkipValue("No_Joining_Group") + .setMakeUppercase(true)); + printStyles.put("East_Asian_Width", new PrintStyle().setSkipUnassigned("Neutral")); + printStyles.put("Decomposition_Type", new PrintStyle().setLongForm(true) + .setSkipValue("None").setMakeFirstLetterLowercase(true)); + printStyles.put("Bidi_Class", new PrintStyle().setSkipUnassigned("Left_To_Right")); + printStyles.put("Block", new PrintStyle().setNoLabel(true) + .setValueList(true)); + printStyles.put("Age", new PrintStyle().setSkipValue("unassigned")); + printStyles.put("Canonical_Combining_Class", new PrintStyle().setSkipValue("0")); + printStyles.put("Hangul_Syllable_Type", new PrintStyle().setSkipValue("NA")); + + } + //PropertyAliases + //PropertyValueAliases + //CompositionExclusions + //SpecialCasing + //NormalizationTest + //add("CaseFolding", new String[] {"CaseFolding"}); + static Map contents = new TreeMap(); + static void add(String name, String[] properties) { + contents.put(name, properties); + } + static { + add("Blocks", new String[] {"Block"}); + add("DerivedAge", new String[] {"Age"}); + add("Scripts", new String[] {"Script"}); + add("HangulSyllableType", new String[] {"HangulSyllableType"}); + if (false) add("DerivedNormalizationProps", new String[] { + "FNC", "Full_Composition_Exclusion", + "NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck", + "Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC"}); + + add("DerivedBidiClass", new String[] {"BidiClass"}); + add("DerivedBinaryProperties", new String[] {"BidiMirrored"}); + add("DerivedCombiningClass", new String[] {"CanonicalCombiningClass"}); + add("DerivedDecompositionType", new String[] {"DecompositionType"}); + add("DerivedEastAsianWidth", new String[] {"EastAsianWidth"}); + add("DerivedGeneralCategory", new String[] {"GeneralCategory"}); + add("DerivedJoiningGroup", new String[] {"JoiningGroup"}); + add("DerivedJoiningType", new String[] {"JoiningType"}); + add("DerivedLineBreak", new String[] {"LineBreak"}); + add("DerivedNumericType", new String[] {"NumericType"}); + add("DerivedNumericValues", new String[] {"NumericValue"}); + add("PropList", new String[] { + "White_Space", "Bidi_Control", "Join_Control", + "Dash", "Hyphen", "Quotation_Mark", + "Terminal_Punctuation", "Other_Math", + "Hex_Digit", "ASCII_Hex_Digit", + "Other_Alphabetic", + "Ideographic", + "Diacritic", "Extender", + "Other_Lowercase", "Other_Uppercase", + "Noncharacter_Code_Point", + "Other_Grapheme_Extend", + "Grapheme_Link", + "IDS_Binary_Operator", "IDS_Trinary_Operator", + "Radical", "Unified_Ideograph", + "Other_Default_Ignorable_Code_Point", + "Deprecated", "Soft_Dotted", + "Logical_Order_Exception", + "Other_ID_Start" + }); + add("DerivedCoreProperties", new String[] { + "Math", "Alphabetic", "Lowercase", "Uppercase", + "ID_Start", "ID_Continue", + "XID_Start", "XID_Continue", + "Default_Ignorable_Code_Point", + "Grapheme_Extend", "Grapheme_Base" + }); + } + + public static void generateFile(String atOrAfter, String atOrBefore) throws IOException { + Iterator it = contents.keySet().iterator(); + while (it.hasNext()) { + String propname = (String) it.next(); + if (propname.compareTo(atOrAfter) < 0) continue; + if (propname.compareTo(atOrBefore) > 0) continue; + generateFile(propname); + } + } + + public static void generateFile(String filename) throws IOException { + String[] propList = (String[]) contents.get(filename); + UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename); + PrintWriter pw = udf.out; // bf2.openUTF8Writer(UCD_Types.GEN_DIR, "Test" + filename + ".txt"); + UnicodeProperty.Factory toolFactory + = ToolUnicodePropertySource.make(Default.ucdVersion()); + BagFormatter bf2 = new BagFormatter(toolFactory); + UnicodeSet unassigned = toolFactory.getSet("gc=cn") + .addAll(toolFactory.getSet("gc=cs")); + //System.out.println(unassigned.toPattern(true)); + // .removeAll(toolFactory.getSet("noncharactercodepoint=true")); + String separator = bf2.getLineSeparator() + + "# ================================================" + + bf2.getLineSeparator() + bf2.getLineSeparator(); + + for (int i = 0; i < propList.length; ++i) { + UnicodeProperty prop = toolFactory.getProperty(propList[i]); + System.out.println(prop.getName()); + pw.print(separator); + PrintStyle ps = (PrintStyle) printStyles.get(prop.getName()); + if (ps == null) { + ps = DEFAULT_PRINT_STYLE; + System.out.println("Using default style!"); + } + if (ps.noLabel) bf2.setLabelSource(null); + + if (ps.valueList) { + bf2.setValueSource(new UnicodeProperty.FilteredProperty(prop, new ReplaceFilter())) + .setNameSource(null) + .setShowCount(false) + .showSetNames(pw,new UnicodeSet(0,0x10FFFF)); + } else if (prop.getType() <= prop.EXTENDED_BINARY) { + UnicodeSet s = prop.getSet("True"); + bf2.setValueSource(prop.getName()); + bf2.showSetNames(pw, s); + } else { + bf2.setValueSource(prop); + Collection aliases = prop.getAvailableValueAliases(); + if (ps.orderByRangeStart) { + System.out.println("Reordering"); + TreeSet temp2 = new TreeSet(new RangeStartComparator(prop)); + temp2.addAll(aliases); + aliases = temp2; + } + Iterator it = aliases.iterator(); + while (it.hasNext()) { + String value = (String)it.next(); + UnicodeSet s = prop.getSet(value); + + System.out.println(value + "\t" + prop.getShortestValueAlias(value) + "\t" + ps.skipValue); + System.out.println(s.toPattern(true)); + + if (skeletonComparator.compare(value, ps.skipValue) == 0) continue; + if (skeletonComparator.compare(value, ps.skipUnassigned) == 0) { + s.removeAll(unassigned); + } + + if (s.size() == 0) continue; + //if (unassigned.containsAll(s)) continue; // skip if all unassigned + //if (s.contains(0xD0000)) continue; // skip unassigned + pw.print(separator); + if (!ps.longForm) value = prop.getShortestValueAlias(value); + if (ps.makeUppercase) value = value.toUpperCase(Locale.ENGLISH); + if (ps.makeFirstLetterLowercase) { + // NOTE: this is ok since we are only working in ASCII + value = value.substring(0,1).toLowerCase(Locale.ENGLISH) + + value.substring(1); + } + bf2.setValueSource(value); + bf2.showSetNames(pw, s); + } + } + } + udf.close(); + } + static class RangeStartComparator implements Comparator { + UnicodeProperty prop; + CompareProperties.UnicodeSetComparator comp = new CompareProperties.UnicodeSetComparator(); + RangeStartComparator(UnicodeProperty prop) { + this.prop = prop; + } + public int compare(Object o1, Object o2) { + UnicodeSet s1 = prop.getSet((String)o1); + UnicodeSet s2 = prop.getSet((String)o2); + if (true) System.out.println("comparing " + o1 + ", " + o2 + + s1.toPattern(true) + "?" + s2.toPattern(true) + + ", " + comp.compare(s1, s2)); + return comp.compare(s1, s2); + } + + } + + public static class ReplaceFilter extends UnicodeProperty.StringFilter { + public String remap(String original) { + return original.replace('_',' '); + } + } + + + +} \ No newline at end of file diff --git a/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java b/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java index fa9f12cd880..7eccfeabb0c 100644 --- a/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java +++ b/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $ -* $Date: 2004/02/07 01:01:13 $ -* $Revision: 1.7 $ +* $Date: 2004/02/12 08:23:16 $ +* $Revision: 1.8 $ * ******************************************************************************* */ @@ -237,9 +237,9 @@ public final class TestNormalization { UnicodeSet t = (UnicodeSet) it.next(); UnicodeSet l = (UnicodeSet) map.get(t); System.out.println("