diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java index e1bbb9cca26..e4ef20518e9 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java @@ -5,13 +5,13 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java,v $ -* $Date: 2002/06/15 02:47:12 $ -* $Revision: 1.8 $ +* $Date: 2005/03/04 02:50:25 $ +* $Revision: 1.9 $ * ******************************************************************************* */ -WARNING: OLD FILE. DON"T COMPILE. +//WARNING: OLD FILE. DON"T COMPILE. package com.ibm.text.UCA; @@ -24,7 +24,7 @@ import com.ibm.text.utility.*; public class WriteHTMLCollation implements UCD_Types { - public static final String copyright = +/* public static final String copyright = "Copyright (C) 2000, IBM Corp. and others. All Rights Reserved."; static final boolean EXCLUDE_UNSUPPORTED = true; @@ -64,7 +64,7 @@ public class WriteHTMLCollation implements UCD_Types { //UInfo.init(); ucd = UCD.make(""); - /* + Normalizer foo = new Normalizer(Normalizer.NFKD); char x = '\u1EE2'; System.out.println(UCA.hex(x) + " " + ucd.getName(x)); @@ -74,7 +74,7 @@ public class WriteHTMLCollation implements UCD_Types { System.out.println(ucd.getCanonicalClass(c)); } System.out.println(UCA.hex(nx, " ") + " " + ucd.getName(nx)); - */ + // DO FOLLOWING //writeConformance("CollationTest_NON_IGNORABLE.txt", UCA.NON_IGNORABLE); @@ -119,7 +119,7 @@ public class WriteHTMLCollation implements UCD_Types { String e = nfkc.normalize(d); if (!e.equals(c)) { System.out.println(Utility.hex(a) + "; " + Utility.hex(d, " ") + " # " + ucd.getName(a)); - /* + System.out.println(Utility.hex(a) + ", " + Utility.hex(b, " ") + ", " + Utility.hex(c, " ") @@ -131,7 +131,7 @@ public class WriteHTMLCollation implements UCD_Types { + ", " + ucd.getName(c) + ", " + ucd.getName(d) + ", " + ucd.getName(e)); - */ + } String f = Case.fold(e); String g = nfkc.normalize(f); @@ -181,7 +181,7 @@ public class WriteHTMLCollation implements UCD_Types { } return result.toString(); } - /* + static void writeConformance(String filename, byte option) throws IOException { PrintWriter log = Utility.openPrintWriter(filename); @@ -252,7 +252,7 @@ public class WriteHTMLCollation implements UCD_Types { sortedD.clear(); System.out.println("Done"); } - */ + static void addStringX(int x, byte option) { addStringX(String.valueOf((char)x), option); @@ -274,9 +274,9 @@ public class WriteHTMLCollation implements UCD_Types { sortedD.put(colDbase, s); } - /** + *//** * Check that the primaries are the same as the compatibility decomposition. - */ + *//* static void checkBadDecomps(int strength, boolean decomposition) { int oldStrength = collator.getStrength(); collator.setStrength(strength); @@ -319,7 +319,7 @@ public class WriteHTMLCollation implements UCD_Types { return buf.toString(); } - /* + log = new PrintWriter(new FileOutputStream("Frequencies.html")); log.println(""); MessageFormat mf = new MessageFormat("{0}{1}{2}{3}"); @@ -344,16 +344,16 @@ public class WriteHTMLCollation implements UCD_Types { log.println(""); log.println(""); log.close(); - */ + static int[] compactSecondary; - /*static void checkEquivalents() { + static void checkEquivalents() { Normalizer nfkd = new Normalizer(Normalizer.NFKC); Normalizer nfd = new Normalizer(Normalizer.NFKD); for (char c = 0; c < 0xFFFF; ++c) { - }*/ + } static void testCompatibilityCharacters() throws IOException { log = new PrintWriter(new BufferedWriter(new OutputStreamWriter( @@ -774,9 +774,9 @@ public class WriteHTMLCollation implements UCD_Types { for (int secondary = 0; secondary < compactSecondary.length; ++secondary) { if (secondarySet.get(secondary)) { compactSecondary[secondary] = subtotal++; - /*System.out.println("compact[" + Utility.hex(secondary) + System.out.println("compact[" + Utility.hex(secondary) + "]=" + Utility.hex(compactSecondary[secondary]) - + ", " + Utility.hex(fixSecondary(secondary)));*/ + + ", " + Utility.hex(fixSecondary(secondary))); } } System.out.println(); @@ -980,14 +980,14 @@ public class WriteHTMLCollation implements UCD_Types { } summary.println("Last: " + Utility.hex(lastNp) + " " + ucd.getName(lastChr.charAt(0))); - /* + String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3"; for (int i = 0; i < sample.length(); ++i) { char ch = sample.charAt(i); log.println(Utility.hex(ch) + " => " + Utility.hex(fixHan(ch)) + " " + ucd.getName(ch)); } - */ + summary.println(); summary.println("# First Implicit: " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(0))); summary.println("# Last Implicit: " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(0x10FFFF))); @@ -1160,7 +1160,7 @@ public class WriteHTMLCollation implements UCD_Types { return s; } - /* + 30F5;KATAKANA LETTER SMALL KA;Lo;0;L;;;;;N;;;;; 30AB;KATAKANA LETTER KA;Lo;0;L;;;;;N;;;;; 30F6;KATAKANA LETTER SMALL KE;Lo;0;L;;;;;N;;;;; @@ -1209,7 +1209,7 @@ public class WriteHTMLCollation implements UCD_Types { 308E;HIRAGANA LETTER SMALL WA;Lo;0;L;;;;;N;;;;; 308F;HIRAGANA LETTER WA;Lo;0;L;;;;;N;;;;; -*/ + static final int secondaryDoubleStart = 0xD0; @@ -1248,13 +1248,13 @@ public class WriteHTMLCollation implements UCD_Types { return (top << 8) | bottom; } -/* + # 0153: (EE3D) 20E3 [0000.0153.0002] COMBINING ENCLOSING KEYCAP # 0154: (EE41) 0153 [0997.0154.0004][08B1.0020.0004] LATIN SMALL LIGATURE OE # 0155: (EE45) 017F [09F3.0155.0004] LATIN SMALL LETTER LONG S # 0157: (EE49) 16C6 [1656.0157.0004] RUNIC LETTER SHORT-TWIG-AR A # 0158: (EE4D) 2776 [0858.0158.0006] DINGBAT NEGATIVE CIRCLED DIGIT ONE -*/ + static int fixTertiary(int x) { if (x == 0) return x; @@ -1304,12 +1304,12 @@ public class WriteHTMLCollation implements UCD_Types { return (x & 1) == 0; } - /* static String ceToString(int primary, int secondary, int tertiary) { + static String ceToString(int primary, int secondary, int tertiary) { return "[" + hexBytes(primary) + ", " + hexBytes(secondary) + ", " + hexBytes(tertiary) + "]"; } - */ + static String hexBytes(long x) { StringBuffer temp = new StringBuffer(); @@ -1397,13 +1397,13 @@ public class WriteHTMLCollation implements UCD_Types { setSingle(k, ces); } } - /*setSingle('\u0300', ces); + setSingle('\u0300', ces); setSingle('\u0301', ces); setSingle('\u0302', ces); setSingle('\u0303', ces); setSingle('\u0308', ces); setSingle('\u030C', ces); - */ + bumps.set(0x089A); // lowest non-variable bumps.set(0x4E00); // lowest Kangxi @@ -1498,13 +1498,13 @@ public class WriteHTMLCollation implements UCD_Types { } log.println(UCA.toString(sortKey) + "
"); - /*if (source.equals(lastSource)) { + if (source.equals(lastSource)) { it.remove(); --duplicateCount; } //lastSortKey = sortKey; lastSource = lastSource; - */ + } System.out.println("Total: " + sortedD.size()); } @@ -1571,9 +1571,9 @@ public class WriteHTMLCollation implements UCD_Types { case 1: color = 0x6666FF; break; case 0: color = 0x3333FF; break; } - /*if (mark == MARK2) { + if (mark == MARK2) { color = color & 0xFF00FF; - }*/ + } if (color != 0xFFFFFF) out.print(" bgcolor='#" + Integer.toString(color,16) + "'"); //if (firstRow) out.print(" width='6%'"); out.print(">"); @@ -1600,7 +1600,7 @@ public class WriteHTMLCollation implements UCD_Types { System.out.println("Done"); } -/* + 3400;;Lo;0;L;;;;;N;;;;; 4DB5;;Lo;0;L;;;;;N;;;;; 4E00;;Lo;0;L;;;;;N;;;;; @@ -1611,7 +1611,7 @@ A000;YI SYLLABLE IT;Lo;0;L;;;;;N;;;;; A001;YI SYLLABLE IX;Lo;0;L;;;;;N;;;;; A4C4;YI RADICAL ZZIET;So;0;ON;;;;;N;;;;; A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; -*/ + static final char[][] extraRanges = {{0x3400, 0x4DB5}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0xA000, 0xA48C}}; @@ -1644,10 +1644,10 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; String colNbase = collator.getSortKey(ch, option, false); String colCbase = collator.getSortKey(nfc.normalize(ch), option, false); if (!colNbase.equals(colCbase)) { - /*System.out.println(Utility.hex(ch)); + System.out.println(Utility.hex(ch)); + System.out.println(printableKey(colNbase)); System.out.println(printableKey(colNbase)); System.out.println(printableKey(colNbase)); - System.out.println(printableKey(colNbase));*/ MismatchedN.put(ch,colNbase); MismatchedC.put(ch,colCbase); MismatchedD.put(ch,colDbase); @@ -1659,7 +1659,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; backD.put(ch, colD); sortedN.put(colN, ch); backN.put(ch, colN); - /* + if (strength > 4) { duplicateCount++; duplicates.put(ch+MARK1, col); @@ -1668,7 +1668,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; sorted.put(col2 + MARK2, ch); } unique += 2; - */ + } static void removeAdjacentDuplicates() { @@ -1910,18 +1910,18 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; return sortKey; } - /* + LINKS
CONTENTS - */ + static void writeTail(PrintWriter out, int counter, String title, String other, boolean show) throws IOException { copyFile(out, "HTML-Part2.txt"); - /* + out.println(""); out.println(""); - */ + out.close(); } @@ -1940,7 +1940,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; "UTF8"), 4*1024)); copyFile(out, "HTML-Part1.txt"); - /* + out.println(""); out.println(""); out.println("" + HTMLString(title) + ""); @@ -1974,7 +1974,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;

Show Key -*/ + // index @@ -2091,10 +2091,10 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; || ch >= '\uD800' && ch <= '\uDFFF' || ch >= '\uFFFE') { result.append('\uFFFD'); - /*result.append("#x"); + result.append("#x"); result.append(cpName(ch)); result.append(";"); - */ + } else if (quoteApos && ch == '\'') { result.append("'"); } else if (ch == '\"') { @@ -2133,5 +2133,5 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; return 0x100000; } - +*/ } \ No newline at end of file diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java b/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java index c89f330b005..2dea69bc75d 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java,v $ -* $Date: 2002/10/05 01:28:58 $ -* $Revision: 1.1 $ +* $Date: 2005/03/04 02:50:26 $ +* $Revision: 1.2 $ * ******************************************************************************* */ @@ -14,7 +14,7 @@ package com.ibm.text.UCD; import java.io.*; import com.ibm.text.utility.*; -import com.ibm.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSet; import java.util.*; public class GenerateThaiBreaks { diff --git a/tools/unicodetools/com/ibm/text/UCD/OldUnicodeMap.java b/tools/unicodetools/com/ibm/text/UCD/OldUnicodeMap.java new file mode 100644 index 00000000000..6bbc581bd14 --- /dev/null +++ b/tools/unicodetools/com/ibm/text/UCD/OldUnicodeMap.java @@ -0,0 +1,109 @@ +/** +******************************************************************************* +* Copyright (C) 1996-2001, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/OldUnicodeMap.java,v $ +* $Date: 2005/03/04 02:50:26 $ +* $Revision: 1.1 $ +* +******************************************************************************* +*/ + +package com.ibm.text.UCD; + +import java.util.*; +import java.io.*; + +import com.ibm.text.utility.*; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** + * Class that maps from codepoints to an index, and optionally a label. + */ +public class OldUnicodeMap { + UnicodeSet[] sets = new UnicodeSet[50]; + String[] labels = new String[50]; + int count = 0; + + public int add(String label, UnicodeSet set) { + return add(label, set, false, true); + } + + /** + * Add set + *@param removeOld true: remove any collisions from sets already in the map + * if false, remove any collisions from this set + *@param signal: print a warning when collisions occur + */ + public int add(String label, UnicodeSet set, boolean removeOld, boolean signal) { + // remove from any preceding!! + for (int i = 0; i < count; ++i) { + if (!set.containsSome(sets[i])) continue; + if (signal) showOverlap(label, set, i); + if (removeOld) { + sets[i] = sets[i].removeAll(set); + } else { + set = set.removeAll(sets[i]); + } + } + sets[count] = set; + labels[count++] = label; + return (short)(count - 1); + } + + public void showOverlap(String label, UnicodeSet set, int i) { + UnicodeSet delta = new UnicodeSet(set).retainAll(sets[i]); + System.out.println("Warning! Overlap with " + label + " and " + labels[i] + + ": " + delta); + } + + public int getIndex(int codepoint) { + for (int i = count - 1; i >= 0; --i) { + if (sets[i].contains(codepoint)) return i; + } + return -1; + } + + public int getIndexFromLabel(String label) { + for (int i = count - 1; i >= 0; --i) { + if (labels[i].equalsIgnoreCase(label)) return i; + } + return -1; + } + + public String getLabel(int codepoint) { + return getLabelFromIndex(getIndex(codepoint)); + } + + public String getLabelFromIndex(int index) { + if (index < 0 || index >= count) return null; + return labels[index]; + } + + public UnicodeSet getSetFromIndex(int index) { + if (index < 0 || index >= count) return null; + return new UnicodeSet(sets[index]); // protect from changes + } + + public int size() { + return count; + } + + public int setLabel(int index, String label) { + labels[index] = label; + return index; + } + + public int put(int codepoint, int index) { + if (sets[index] == null) { + sets[index] = new UnicodeSet(); + if (index >= count) count = index + 1; + } + sets[index].add(codepoint); + return index; + } + +} diff --git a/tools/unicodetools/com/ibm/text/UCD/ProcessUnihan.java b/tools/unicodetools/com/ibm/text/UCD/ProcessUnihan.java index f6b4cc6127a..83594101714 100644 --- a/tools/unicodetools/com/ibm/text/UCD/ProcessUnihan.java +++ b/tools/unicodetools/com/ibm/text/UCD/ProcessUnihan.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ProcessUnihan.java,v $ -* $Date: 2003/04/25 01:39:15 $ -* $Revision: 1.2 $ +* $Date: 2005/03/04 02:50:26 $ +* $Revision: 1.3 $ * ******************************************************************************* */ @@ -20,6 +20,7 @@ import java.util.*; // stub file, ignore public final class ProcessUnihan { + /* static final boolean TESTING = false; static int type; @@ -50,7 +51,7 @@ public final class ProcessUnihan { //out = Utility.openPrintWriter("Transliterate_Han_English.txt"); //err = Utility.openPrintWriter("Transliterate_Han_English.log.txt"); - BufferedReader in = Utility.openUnicodeFile("Unihan", "3.2.0", Utility.UTF8); + BufferedReader in = Utility.openUnicodeFile("Unihan", "3.2.0", true, Utility.UTF8); while (true) { Utility.dot(++lineCounter); @@ -63,12 +64,13 @@ public final class ProcessUnihan { int count = Utility.split(line, '#', parts); int code = Integer.parseInt(parts[0].substring(2), 16); - Byte itag = tags.get("a"); + Byte itag = (Byte) tags.get("a"); if (itag == null) {} String tag = parts[1]; String value = parts[2]; if (tags.containsKey(tag)) {} } } + */ } diff --git a/tools/unicodetools/com/ibm/text/utility/TestUtility.java b/tools/unicodetools/com/ibm/text/utility/TestUtility.java index 38624624c5e..256b2c4f2d7 100644 --- a/tools/unicodetools/com/ibm/text/utility/TestUtility.java +++ b/tools/unicodetools/com/ibm/text/utility/TestUtility.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/TestUtility.java,v $ -* $Date: 2002/07/14 22:04:49 $ -* $Revision: 1.2 $ +* $Date: 2005/03/04 02:50:26 $ +* $Revision: 1.3 $ * ******************************************************************************* */ @@ -18,7 +18,7 @@ import java.text.*; import java.io.*; public class TestUtility { - + /* static public class MyEnum extends EnumBase { public static MyEnum ZEROED = (MyEnum) makeNext(myEnum.getClass()), @@ -38,5 +38,5 @@ public class TestUtility { System.out.println(i.getValue()); } } - + */ } \ No newline at end of file diff --git a/tools/unicodetools/com/ibm/text/utility/UnicodeMapInt.java b/tools/unicodetools/com/ibm/text/utility/UnicodeMapInt.java index 3e19063677b..045004f5d37 100644 --- a/tools/unicodetools/com/ibm/text/utility/UnicodeMapInt.java +++ b/tools/unicodetools/com/ibm/text/utility/UnicodeMapInt.java @@ -2,15 +2,15 @@ package com.ibm.text.utility; import com.ibm.icu.text.UnicodeSet; final class UnicodeMapInt { - private int [] index = new int[1]; +/* private int [] index = new int[1]; private int [] data = new int[1]; private int len = 1; - /* index array is a set of inflection points; it and the data are always of the form + index array is a set of inflection points; it and the data are always of the form index: {MIN_VALUE, x, y, ..., q, MAX_VALUE} data: {value for ..x-1, value for x..y-1, value for y..z-1, ..., value for q..} AND no adjacent values are identical! - */ + public int put (int cp, int value) { @@ -63,9 +63,9 @@ final class UnicodeMapInt { return data[findIndex(cp) - 1]; } - /** + *//** * Returns the set of all characters that have the given value - */ + *//* public UnicodeSet getMatch(int value) { UnicodeSet result = new UnicodeSet(); for (int i = 0; i < len; ++i) { @@ -74,7 +74,7 @@ final class UnicodeMapInt { return result; } - /** Finds the least index with a value greater than cp */ + *//** Finds the least index with a value greater than cp *//* private int findIndex( int cp) { if (cp > 0x10FFFF) throw new ArrayIndexOutOfBoundsException("Code point too large: " + cp); // out of bounds! int i = -1; @@ -84,7 +84,7 @@ final class UnicodeMapInt { } - /* + public UnicodeSetIterator iterator() { } */ diff --git a/tools/unicodetools/com/ibm/text/utility/Utility.java b/tools/unicodetools/com/ibm/text/utility/Utility.java index 7f3e1c2c785..5698ea04e24 100644 --- a/tools/unicodetools/com/ibm/text/utility/Utility.java +++ b/tools/unicodetools/com/ibm/text/utility/Utility.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $ -* $Date: 2004/11/13 23:10:32 $ -* $Revision: 1.46 $ +* $Date: 2005/03/04 02:50:26 $ +* $Revision: 1.47 $ * ******************************************************************************* */ @@ -23,6 +23,7 @@ import com.ibm.icu.text.UTF16; import com.ibm.icu.text.Replaceable; import com.ibm.icu.text.ReplaceableString; import com.ibm.icu.text.UnicodeMatcher; +import com.ibm.icu.dev.test.util.UnicodeMap; import com.ibm.icu.dev.test.util.UnicodeProperty; import com.ibm.text.UCD.*; @@ -1131,7 +1132,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES } public static void showSetDifferences(PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2, - boolean separateLines, boolean withChar, OldUnicodeMap names, UCD ucd) { + boolean separateLines, boolean withChar, UnicodeMap names, UCD ucd) { UnicodeSet temp = new UnicodeSet(set1).removeAll(set2); pw.println(); @@ -1171,7 +1172,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES static java.text.NumberFormat nf = java.text.NumberFormat.getInstance(); public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN, - boolean withChar, OldUnicodeMap names, UCD ucd) { + boolean withChar, UnicodeMap names, UCD ucd) { if (set.size() == 0) { pw.println(prefix + ""); pw.flush(); @@ -1188,7 +1189,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES + "\t# " + (useHTML ? "(" + getUnicodeImage(cp) + ") " : "") + (withChar && (cp >= 0x20) ? "(" + UTF16.valueOf(cp) + ") " : "") - + (names != null ? names.getLabel(cp) + " " : "") + + (names != null ? names.getValue(cp) + " " : "") + ucd.getName(cp) + (useHTML ? "
" : "")); else {