diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java index d405b0e0da7..fb4a1e4d7ea 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $ -* $Date: 2002/04/24 02:38:52 $ -* $Revision: 1.7 $ +* $Date: 2002/05/29 02:01:00 $ +* $Revision: 1.8 $ * ******************************************************************************* */ @@ -25,6 +25,15 @@ public class WriteCharts implements UCD_Types { static boolean HACK_KANA = false; + static public void special() { + Default.setUCD(); + for (int i = 0xE000; i < 0x10000; ++i) { + if (!Default.ucd.isRepresented(i)) continue; + if (Default.nfkc.normalizationDiffers(i)) continue; + System.out.println(Default.ucd.getCodeAndName(i)); + } + } + static public void collationChart(UCA uca) throws IOException { Default.setUCD(uca.getUCDVersion()); HACK_KANA = true; @@ -408,6 +417,131 @@ public class WriteCharts implements UCD_Types { closeIndexFile(indexFile, "", CASE); } + static public void addMapChar(Map m, Set stoplist, String key, String ch) { + if (stoplist.contains(key)) return; + for (int i = 0; i < key.length(); ++i) { + char c = key.charAt(i); + if ('0' <= c && c <= '9') return; + } + Set result = (Set)m.get(key); + if (result == null) { + result = new TreeSet(); + m.put(key, result); + } + result.add(ch); + } + + static public void indexChart() throws IOException { + Default.setUCD(); + HACK_KANA = false; + + Map map = new TreeMap(); + Set stoplist = new TreeSet(); + + String[] stops = {"LETTER", "CHARACTER", "AND", "CAPITAL", "SMALL", "COMPATIBILITY", "WITH"}; + stoplist.addAll(Arrays.asList(stops)); + System.out.println("Stop-list: " + stoplist); + + for (int i = 0; i < LIMIT_SCRIPT; ++i) { + stoplist.add(Default.ucd.getScriptID_fromIndex((byte)i)); + } + System.out.println("Stop-list: " + stoplist); + + for (int i = 0; i <= 0x10FFFF; ++i) { + if (!Default.ucd.isRepresented(i)) continue; + if (0xAC00 <= i && i <= 0xD7A3) continue; + if (Default.ucd.hasComputableName(i)) continue; + + String s = Default.ucd.getName(i); + if (s == null) continue; + + if (s.startsWith("<")) { + System.out.println("Wierd character at " + Default.ucd.getCodeAndName(i)); + } + String ch = UTF16.valueOf(i); + int last = -1; + int j; + for (j = 0; j < s.length(); ++j) { + char c = s.charAt(j); + if ('A' <= c && c <= 'Z' || '0' <= c && c <= '9') { + if (last == -1) last = j; + } else { + if (last != -1) { + String word = s.substring(last, j); + addMapChar(map, stoplist, word, ch); + last = -1; + } + } + } + if (last != -1) { + String word = s.substring(last, j); + addMapChar(map, stoplist, word, ch); + } + } + + PrintWriter output = null; + + Iterator it = map.keySet().iterator(); + + int oldScript = -127; + + int counter = 0; + String[] replacement = new String[] {"%%%", "Name Charts"}; + String folder = "charts\\name\\"; + + Utility.copyTextFile("index.html", true, folder + "index.html", replacement); + Utility.copyTextFile("charts.css", false, folder + "charts.css"); + Utility.copyTextFile("name_help.html", true, folder + "help.html"); + + indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false); + Utility.appendFile("index_header.html", true, indexFile, replacement); + + int columnCount = 0; + char lastInitial = 0; + + while (it.hasNext()) { + Utility.dot(counter); + + String key = (String) it.next(); + + Set chars = (Set) map.get(key); + + char initial = key.charAt(0); + + if (initial != lastInitial) { + closeFile(output); + output = null; + lastInitial = initial; + } + + if (output == null) { + output = openFile2(0, folder, String.valueOf(initial)); + } + + output.println("" + key + ""); + columnCount = 1; + + Iterator sublist = chars.iterator(); + while (sublist.hasNext()) { + + String ch = (String) sublist.next(); + if (columnCount > 10) { + output.println(""); + columnCount = 1; + } + showCell(output, ch, ""); + + } + + closeFile(output); + closeIndexFile(indexFile, "", CASE); + } + static void showCell(PrintWriter output, String s, String prefix, String extra, boolean skipName) { String name = Default.ucd.getName(s); String comp = Default.nfc.normalize(s); @@ -481,6 +615,21 @@ public class WriteCharts implements UCD_Types { return output; } + static PrintWriter openFile2(int count, String directory, String name) throws IOException { + String fileName = "chart_" + name + (count > 1 ? count + "" : "") + ".html"; + PrintWriter output = Utility.openPrintWriter(directory + fileName, false, false); + Utility.fixDot(); + System.out.println("Writing: " + name); + indexFile.println(" " + name + ""); + String title = name; + output.println(""); + output.println("" + title + ""); + output.println(""); + output.println(""); + output.println(""); + return output; + } + static final int NULL_ORDER = -3, IGNORABLE_ORDER = -2, diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java index 89c87fe4e4a..4d0c7e8e7c5 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ -* $Date: 2002/04/23 22:45:40 $ -* $Revision: 1.10 $ +* $Date: 2002/05/29 02:00:59 $ +* $Revision: 1.11 $ * ******************************************************************************* */ @@ -73,6 +73,8 @@ public class WriteCollationData implements UCD_Types { else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(collator); else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart(); else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart(); + else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart(); + else if (arg.equalsIgnoreCase("special")) WriteCharts.special(); else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(collator); diff --git a/tools/unicodetools/com/ibm/text/UCA/case_help.html b/tools/unicodetools/com/ibm/text/UCA/case_help.html index f51602ac8e8..6cc84490c94 100644 --- a/tools/unicodetools/com/ibm/text/UCA/case_help.html +++ b/tools/unicodetools/com/ibm/text/UCA/case_help.html @@ -36,7 +36,7 @@ at least one of their case forms (lower, title, upper, or fold).

listed in NoCaseMapping. -
  • Within each chart page, the code points are sorted by lowercased NFKD, +
  • Within each chart page, the code points are sorted by lowercased NFKD, to place related characters next to one another.
  • To help pick out cells visually, the more interesting ones have a light blue background. The other cells have grayed-out text. @@ -53,7 +53,7 @@ at least one of their case forms (lower, title, upper, or fold).

  • If your browser supports tool-tops, then hovering your mouse over cells will show the names of the characters.
  • -
  • For more information, see UAX +
  • For more information, see UAX #21: Case Mappings.
  • diff --git a/tools/unicodetools/com/ibm/text/UCA/charts.css b/tools/unicodetools/com/ibm/text/UCA/charts.css index cf283e5c76f..4d2d3a36541 100644 --- a/tools/unicodetools/com/ibm/text/UCA/charts.css +++ b/tools/unicodetools/com/ibm/text/UCA/charts.css @@ -14,4 +14,5 @@ tt { font-size: 50% } td.g { font-size: 120%; text-align: Center; width: 72px; color: #808080; } td.n { font-size: 120%; text-align: Center; width: 72px; color: #000000; background-color: #CCCCFF; } -td.z { font-size: 120%; text-align: Center; width: 72px; font-weight: bold; background-color: #EEEEEE; } +td.z { font-size: 120%; text-align: Center; width: 72px; font-weight: bold; background-color: #EEEEEE; } +td.h { font-size: 120%; text-align: Left; color: #000000; background-color: #EEEEEE; } diff --git a/tools/unicodetools/com/ibm/text/UCA/help.html b/tools/unicodetools/com/ibm/text/UCA/help.html index 1c8176d2147..cb97f1fe795 100644 --- a/tools/unicodetools/com/ibm/text/UCA/help.html +++ b/tools/unicodetools/com/ibm/text/UCA/help.html @@ -116,7 +116,7 @@ character in the chart, as follows.

    Private Use Area, etc. are represented by a sampling.
  • Some unassigned code points, noncharacters and other edge cases are also added to the list for comparison.
  • -
  • For more information, see UTS +
  • For more information, see UTS #10: Unicode Collation Algorithm.
  • diff --git a/tools/unicodetools/com/ibm/text/UCA/norm_help.html b/tools/unicodetools/com/ibm/text/UCA/norm_help.html index a00af2c35ab..86c76e17442 100644 --- a/tools/unicodetools/com/ibm/text/UCA/norm_help.html +++ b/tools/unicodetools/com/ibm/text/UCA/norm_help.html @@ -33,7 +33,7 @@ differ from at least one of their normalization forms (C, D, KC, KD).

  • By general category, in the latter two cases
  • -
  • Within each chart page, the code points are sorted by lowercased NFKD, +
  • Within each chart page, the code points are sorted by folded NFKD, to place related characters next to one another.
  • To keep the size of the Hangul chart manageable, characters U+AD00..U+D6FF (관..훿) are omitted.
  • @@ -52,7 +52,7 @@ differ from at least one of their normalization forms (C, D, KC, KD).

  • If your browser supports tool-tops, then hovering your mouse over cells will show the names of the characters.
  • -
  • For more information, see UAX +
  • For more information, see UAX #15: Unicode Normalization Forms.
  • diff --git a/tools/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java b/tools/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java index c9a1148b420..78905ef0801 100644 --- a/tools/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java +++ b/tools/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java,v $ -* $Date: 2001/12/06 00:05:53 $ -* $Revision: 1.5 $ +* $Date: 2002/05/29 02:01:00 $ +* $Revision: 1.6 $ * ******************************************************************************* */ @@ -16,14 +16,24 @@ import java.io.*; class DiffPropertyLister extends PropertyLister { private UCD oldUCD; + private static final int NOPROPERTY = -1; - public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output) { + public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output, int property) { this.output = output; this.ucdData = UCD.make(newUCDName); - if (oldUCDName != null) this.oldUCD = UCD.make(oldUCDName); - breakByCategory = false; + if (property != NOPROPERTY) newProp = DerivedProperty.make(property, ucdData); + + if (oldUCDName != null) { + this.oldUCD = UCD.make(oldUCDName); + if (property != NOPROPERTY) oldProp = DerivedProperty.make(property, oldUCD); + } + breakByCategory = property != NOPROPERTY; useKenName = false; } + + public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output) { + this(oldUCDName, newUCDName, output, NOPROPERTY); + } public String valueName(int cp) { return major_minor_only(ucdData.getVersion()); @@ -39,13 +49,38 @@ class DiffPropertyLister extends PropertyLister { } */ + UnicodeProperty newProp = null; + UnicodeProperty oldProp = null; + String value = ""; + + public String optionalComment(int cp) { + String normal = super.optionalComment(cp); + return oldUCD.getModCatID_fromIndex( + oldUCD.getModCat(cp, breakByCategory ? CASED_LETTER_MASK : 0)) + + "/" + normal; + } + + public byte status(int cp) { + if (newProp == null) { + return ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp)) ? INCLUDE : EXCLUDE; + } + + // just look at property differences among allocated characters + + if (!ucdData.isAllocated(cp)) return EXCLUDE; + if (!oldUCD.isAllocated(cp)) return EXCLUDE; + + String val = newProp.getValue(cp); + String oldVal = oldProp.getValue(cp); + if (!oldVal.equals(val)) return INCLUDE; + return EXCLUDE; + /*if (cp == 0xFFFF) { System.out.println("# " + Utility.hex(cp)); } */ - return ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp)) ? INCLUDE : EXCLUDE; } public String headerString() { @@ -91,6 +126,8 @@ class DiffPropertyLister extends PropertyLister { */ private String major_minor_only(String s) { + if (newProp != null) return s; + return s.substring(0, s.lastIndexOf('.')); } diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java index e96848b7c63..5a41a4c82e9 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $ -* $Date: 2002/04/23 01:59:14 $ -* $Revision: 1.17 $ +* $Date: 2002/05/29 02:01:00 $ +* $Revision: 1.18 $ * ******************************************************************************* */ @@ -1077,6 +1077,55 @@ public class GenerateData implements UCD_Types { }; + static final void backwardsCompat(String directory, String filename, int[] list) throws IOException { + + Default.setUCD(); + String newFile = directory + filename + getFileSuffix(true); + PrintWriter log = Utility.openPrintWriter(newFile); + String mostRecent = generateBat(directory, filename, getFileSuffix(true)); + try { + for (int i = 0; i < list.length; ++i) { + int prop = list[i]; + log.println(); + log.println(HORIZONTAL_LINE); + log.println("###### " + DerivedProperty.make(prop, Default.ucd).getName()); + //log.println(); + //log.println(HORIZONTAL_LINE); + //new DiffPropertyLister("3.2.0", "1.1.0", log, prop).print(); + log.println(); + log.println(HORIZONTAL_LINE); + new DiffPropertyLister("3.2.0", "2.0.0", log, prop).print(); + log.println(); + log.println(HORIZONTAL_LINE); + new DiffPropertyLister("3.2.0", "2.1.2", log, prop).print(); + log.println(); + log.println(HORIZONTAL_LINE); + new DiffPropertyLister("3.2.0", "2.1.5", log, prop).print(); + log.println(); + log.println(HORIZONTAL_LINE); + new DiffPropertyLister("3.2.0", "2.1.8", log, prop).print(); + log.println(); + log.println(HORIZONTAL_LINE); + new DiffPropertyLister("3.2.0", "3.0.0", log, prop).print(); + log.println(HORIZONTAL_LINE); + log.println(); + new DiffPropertyLister("3.2.0", "3.0.1", log, prop).print(); + log.println(HORIZONTAL_LINE); + log.println(); + new DiffPropertyLister("3.2.0", "3.1.0", log, prop).print(); + log.println(HORIZONTAL_LINE); + log.println(); + new DiffPropertyLister("3.2.0", "3.1.1", log, prop).print(); + log.println(HORIZONTAL_LINE); + } + } finally { + if (log != null) { + log.close(); + Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile)); + } + } + } + static final void generateAge(String directory, String filename) throws IOException { Default.setUCD(); String newFile = directory + filename + getFileSuffix(true); @@ -1102,7 +1151,7 @@ public class GenerateData implements UCD_Types { log.println("# For details on the contents of each version, see"); log.println("# http://www.unicode.org/versions/enumeratedversions.html."); - http://www.unicode.org/versions/enumeratedversions.html + // http://www.unicode.org/versions/enumeratedversions.html log.println(HORIZONTAL_LINE); log.println(); diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java index 4e99edd3a29..9029035e309 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $ -* $Date: 2002/03/15 01:57:01 $ -* $Revision: 1.3 $ +* $Date: 2002/05/29 02:01:00 $ +* $Revision: 1.4 $ * ******************************************************************************* */ @@ -23,9 +23,10 @@ public final class GenerateHanTransliterator { static final boolean TESTING = false; static int type; - public static void main() { + public static void main(int typeIn) { + type = typeIn; + Default.setUCD(); try { - type = 0; System.out.println("Starting"); generate(); } catch (Exception e) { @@ -43,23 +44,27 @@ public final class GenerateHanTransliterator { String name = "$Han$English"; String key = "kDefinition"; // kMandarin, kKorean, kJapaneseKun, kJapaneseOn String filter = "kJis0"; + String filename = "Han_English"; switch (type) { default: break; case 1: name = "$Han$OnRomaji"; key = "kJapaneseOn"; filter = "kJis0"; + filename = "Han_Romaji"; break; case 2: name = "$Han$Pinyin"; key = "kMandarin"; + filename = "Han_Pinyin"; filter = null; break; } - out = Utility.openPrintWriter("Transliterate_Han_English.txt"); - err = Utility.openPrintWriter("Transliterate_Han_English.log.txt"); + out = Utility.openPrintWriter("Transliterate_" + filename + ".txt", false, false); + err = Utility.openPrintWriter("Transliterate_" + filename + "_log.txt", false, false); - BufferedReader in = Utility.openUnicodeFile("Unihan", "3.2.0", true); + BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true); + int totalCount = 0; int count = 0; String oldCode = ""; String oldLine = ""; @@ -76,11 +81,15 @@ public final class GenerateHanTransliterator { if (line == null) break; if (line.length() < 6) continue; if (line.charAt(0) == '#') continue; - String code = line.substring(2,6); + int tabPos = line.indexOf(' '); + String code = line.substring(2, tabPos); + /* if (code.compareTo("9FA0") >= 0) { System.out.println("? " + line); }*/ if (!code.equals(oldCode)) { + totalCount++; + if (foundKey && foundFilter) { count++; /*if (true) { //*/ @@ -106,6 +115,8 @@ public final class GenerateHanTransliterator { } if (foundKey && foundFilter) printDef(out, oldCode, oldLine, oldStart); + System.out.println("Total: " + totalCount); + System.out.println("Defined Count: " + count); in.close(); out.close(); err.close(); @@ -137,6 +148,7 @@ public final class GenerateHanTransliterator { String definition = line.substring(start,end); if (type == 2) definition = handlePinyin(definition, line); definition.trim(); + definition = definition.toLowerCase(); String cp = UTF16.valueOf(Integer.parseInt(code, 16)); String key = (String) definitionMap.get(definition); if (key == null) { diff --git a/tools/unicodetools/com/ibm/text/UCD/Main.java b/tools/unicodetools/com/ibm/text/UCD/Main.java index d51cd68957c..2cf3d8ea9f2 100644 --- a/tools/unicodetools/com/ibm/text/UCD/Main.java +++ b/tools/unicodetools/com/ibm/text/UCD/Main.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $ -* $Date: 2002/04/23 01:59:14 $ -* $Revision: 1.11 $ +* $Date: 2002/05/29 02:01:00 $ +* $Revision: 1.12 $ * ******************************************************************************* */ @@ -65,8 +65,13 @@ public final class Main implements UCD_Types { else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed(); else if (arg.equalsIgnoreCase("verifyNormalizationStability")) VerifyUCD.verifyNormalizationStability(); - else if (arg.equalsIgnoreCase("generateHanTransliterator")) GenerateHanTransliterator.main(); + else if (arg.equalsIgnoreCase("hanTransliterator")) GenerateHanTransliterator.main(0); + else if (arg.equalsIgnoreCase("romajiTransliterator")) GenerateHanTransliterator.main(1); + else if (arg.equalsIgnoreCase("pinYinTransliterator")) GenerateHanTransliterator.main(2); else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry(); + + else if (arg.equalsIgnoreCase("checkBIDI")) VerifyUCD.checkBIDI(); + else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test(); else if (arg.equalsIgnoreCase("checkCase")) VerifyUCD.checkCase(); @@ -182,6 +187,10 @@ public final class Main implements UCD_Types { } else if (arg.equalsIgnoreCase("DerivedAge")) { GenerateData.generateAge("DerivedData/", "DerivedAge"); + } else if (arg.equalsIgnoreCase("backwardsCompat")) { + GenerateData.backwardsCompat("DerivedData/extracted/", "Compatibility_ID_START", + new int[] {ID_Start, ID_Continue_NO_Cf, Mod_ID_Start, Mod_ID_Continue_NO_Cf}); + } else if (arg.equalsIgnoreCase("DerivedCoreProperties")) { GenerateData.generateDerived(DERIVED_CORE, true, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedCoreProperties"); diff --git a/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java b/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java index a95a2478862..bbe0b366251 100644 --- a/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java +++ b/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/PropertyLister.java,v $ -* $Date: 2002/03/15 01:57:01 $ -* $Revision: 1.8 $ +* $Date: 2002/05/29 02:01:00 $ +* $Revision: 1.9 $ * ******************************************************************************* */ @@ -58,10 +58,7 @@ abstract public class PropertyLister implements UCD_Types { public String optionalComment(int cp) { if (!usePropertyComment || !breakByCategory) return ""; - byte cat = getModCat(cp); - if (cat == FAKELC) return "L&"; - if (cat == FAKENC) return "NC"; - return ucdData.getCategoryID_fromIndex(cat); + return ucdData.getModCatID_fromIndex(getModCat(cp)); } public int minPropertyWidth() { @@ -144,6 +141,10 @@ abstract public class PropertyLister implements UCD_Types { } return result; } + + byte getModCat(int cp) { + return ucdData.getModCat(cp, breakByCategory ? CASED_LETTER_MASK : 0); + } /** @@ -168,23 +169,6 @@ abstract public class PropertyLister implements UCD_Types { return lastSpace; } - private static final byte FAKERC = 63; // fake category for comparison - private static final byte FAKELC = 63; // fake category for comparison - private static final byte FAKENC = 64; // fake category for comparison - - private byte getModCat(int cp) { - byte cat = ucdData.getCategory(cp); - if (cat == UNASSIGNED && ucdData.isNoncharacter(cp)) cat = FAKENC; - else if (breakByCategory) { - if (cat == Lt || cat == Ll || cat == Lu) cat = FAKELC; - } else { - // MASH almost everything together - if (cat != CONTROL && cat != FORMAT && cat != SURROGATE - && cat != PRIVATE_USE && cat != UNASSIGNED) cat = FAKERC; - } - return cat; - } - public int print() { set.clear(); int count = 0; diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD.java b/tools/unicodetools/com/ibm/text/UCD/UCD.java index df07b4c1c71..d45cc996090 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UCD.java +++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $ -* $Date: 2002/04/23 01:59:14 $ -* $Revision: 1.11 $ +* $Date: 2002/05/29 02:01:00 $ +* $Revision: 1.12 $ * ******************************************************************************* */ @@ -194,6 +194,79 @@ public final class UCD implements UCD_Types { public byte getCategory(int codePoint) { return get(codePoint, false).generalCategory; } + + private static final byte FAKE_SYMBOL = 57; // fake category for comparison + private static final byte FAKE_PUNCTUATION = 58; // fake category for comparison + private static final byte FAKE_SEPERATOR = 59; // fake category for comparison + private static final byte FAKE_NUMBER = 60; // fake category for comparison + private static final byte FAKE_MARK = 61; // fake category for comparison + private static final byte FAKE_LETTER = 62; // fake category for comparison + private static final byte FAKE_OTHER = 63; // fake category for comparison + private static final byte FAKENC = 31; // fake category for comparison + + public byte getModCat(int cp, int collapseBits) { + byte cat = getCategory(cp); + if (cat == UNASSIGNED && isNoncharacter(cp)) cat = FAKENC; + if (((1<