diff --git a/tools/unicodetools/com/ibm/text/UCA/UCA.java b/tools/unicodetools/com/ibm/text/UCA/UCA.java index 14d9de32d9f..caab14ed69b 100644 --- a/tools/unicodetools/com/ibm/text/UCA/UCA.java +++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $ -* $Date: 2001/10/26 23:32:03 $ -* $Revision: 1.6 $ +* $Date: 2001/10/31 00:01:28 $ +* $Revision: 1.7 $ * ******************************************************************************* */ @@ -236,7 +236,7 @@ final public class UCA implements Comparator { // add weights char w = getPrimary(ce); - if (DEBUG) System.out.println("\tCE: " + hex(ce)); + if (DEBUG) System.out.println("\tCE: " + Utility.hex(ce)); if (w != 0) primaries.append(w); w = getSecondary(ce); @@ -490,7 +490,10 @@ final public class UCA implements Comparator { /** * Return the type of the CE */ - public byte getCEType(char ch) { + public byte getCEType(int ch) { + + if (ch > 0xFFFF) ch = UTF16.getLeadSurrogate(ch); // first if expands + int ce = collationElements[ch]; if ((ce & EXCEPTION_CE_MASK) != EXCEPTION_CE_MASK) return NORMAL_CE; if (ce == UNSUPPORTED) { @@ -586,7 +589,7 @@ final public class UCA implements Comparator { result.append("|"); needSep = true; } else { - result.append(hex(ch)); + result.append(Utility.hex(ch)); needSep = true; } } @@ -598,9 +601,9 @@ final public class UCA implements Comparator { * Produces a human-readable string for a collation element */ static public String ceToString(int ce) { - return "[" + hex(getPrimary(ce)) + "." - + hex(getSecondary(ce)) + "." - + hex(getTertiary(ce)) + "]"; + return "[" + Utility.hex(getPrimary(ce)) + "." + + Utility.hex(getSecondary(ce)) + "." + + Utility.hex(getTertiary(ce)) + "]"; } /** @@ -631,32 +634,36 @@ final public class UCA implements Comparator { /** * Supplies a zero-padded hex representation of an integer (without 0x) */ + /* static public String hex(int i) { String result = Long.toString(i & 0xFFFFFFFFL, 16).toUpperCase(); return "00000000".substring(result.length(),8) + result; } - + */ /** * Supplies a zero-padded hex representation of a Unicode character (without 0x, \\u) */ + /* static public String hex(char i) { String result = Integer.toString(i, 16).toUpperCase(); return "0000".substring(result.length(),4) + result; } - + */ /** * Supplies a zero-padded hex representation of a Unicode character (without 0x, \\u) */ + /* static public String hex(byte b) { int i = b & 0xFF; String result = Integer.toString(i, 16).toUpperCase(); return "00".substring(result.length(),2) + result; } - + */ /** * Supplies a zero-padded hex representation of a Unicode String (without 0x, \\u) *@param sep can be used to give a sequence, e.g. hex("ab", ",") gives "0061,0062" */ + /* static public String hex(String s, String sep) { StringBuffer result = new StringBuffer(); for (int i = 0; i < s.length(); ++i) { @@ -665,11 +672,12 @@ final public class UCA implements Comparator { } return result.toString(); } - + */ /** * Supplies a zero-padded hex representation of a Unicode String (without 0x, \\u) *@param sep can be used to give a sequence, e.g. hex("ab", ",") gives "0061,0062" */ + /* static public String hex(StringBuffer s, String sep) { StringBuffer result = new StringBuffer(); for (int i = 0; i < s.length(); ++i) { @@ -678,6 +686,7 @@ final public class UCA implements Comparator { } return result.toString(); } + */ // ============================================================= // Privates @@ -1161,6 +1170,7 @@ final public class UCA implements Comparator { public class UCAContents { int current = -1; Normalizer skipDecomps = new Normalizer(Normalizer.NFD); + Normalizer nfd = skipDecomps; Iterator enum = null; byte ceLimit; int currentRange = Integer.MAX_VALUE; // set to ZERO to enable @@ -1191,11 +1201,15 @@ final public class UCA implements Comparator { String result = null; // null if done // normal case - while (current++ <= 0xFFFF) { - char ch = (char)current; - if (getCEType(ch) >= ceLimit) continue; - if (skipDecomps != null && skipDecomps.hasDecomposition(ch)) continue; - result = String.valueOf(ch); + while (current++ < 0x10FFFF) { + //char ch = (char)current; + byte type = getCEType(current); + + if (!nfd.normalizationDiffers(current) || type == HANGUL_CE) { + if (type >= ceLimit) continue; + if (skipDecomps != null && skipDecomps.hasDecomposition(current)) continue; + } + result = UTF16.valueOf(current); return result; } @@ -1502,19 +1516,19 @@ final public class UCA implements Comparator { hangulHackBottom = collationElements[0x1100] & 0xFFFF0000; // remove secondaries & tertiaries hangulHackTop = collationElements[0x11F9] | 0xFFFF; // bump up secondaries and tertiaries - if (SHOW_STATS) System.out.println("\tHangul Hack: " + hex(hangulHackBottom) + ", " + hex(hangulHackTop)); + if (SHOW_STATS) System.out.println("\tHangul Hack: " + Utility.hex(hangulHackBottom) + ", " + Utility.hex(hangulHackTop)); // show some statistics if (SHOW_STATS) System.out.println("\tcount1: " + count1); if (SHOW_STATS) System.out.println("\tcount2: " + max2); if (SHOW_STATS) System.out.println("\tcount3: " + max3); - if (SHOW_STATS) System.out.println("\tMIN1/MAX1: " + hex(MIN1) + "/" + hex(MAX1)); - if (SHOW_STATS) System.out.println("\tMIN2/MAX2: " + hex(MIN2) + "/" + hex(MAX2)); - if (SHOW_STATS) System.out.println("\tMIN3/MAX3: " + hex(MIN3) + "/" + hex(MAX3)); + if (SHOW_STATS) System.out.println("\tMIN1/MAX1: " + Utility.hex(MIN1) + "/" + Utility.hex(MAX1)); + if (SHOW_STATS) System.out.println("\tMIN2/MAX2: " + Utility.hex(MIN2) + "/" + Utility.hex(MAX2)); + if (SHOW_STATS) System.out.println("\tMIN3/MAX3: " + Utility.hex(MIN3) + "/" + Utility.hex(MAX3)); - if (SHOW_STATS) System.out.println("\tVar Min/Max: " + hex(variableLow) + "/" + hex(variableHigh)); - if (SHOW_STATS) System.out.println("\tNon-Var Min: " + hex(nonVariableLow)); + if (SHOW_STATS) System.out.println("\tVar Min/Max: " + Utility.hex(variableLow) + "/" + Utility.hex(variableHigh)); + if (SHOW_STATS) System.out.println("\tNon-Var Min: " + Utility.hex(nonVariableLow)); if (SHOW_STATS) System.out.println("\trenumberedVariable: " + renumberedVariable); } @@ -1565,7 +1579,7 @@ final public class UCA implements Comparator { if (strength > 1) { if (weights.get(i)) { count++; - p.println(mf.format(new Object[] {hex((char)i), new Integer(stCounts[strength][i])})); + p.println(mf.format(new Object[] {Utility.hex((char)i), new Integer(stCounts[strength][i])})); } continue; } @@ -1575,8 +1589,8 @@ final public class UCA implements Comparator { int last = i-1; int diff = last - first + 1; count += diff; - String lastStr = last == first ? "" : hex((char)last); - p.println(mf.format(new Object[] {hex((char)first),lastStr,new Integer(diff), new Integer(count)})); + String lastStr = last == first ? "" : Utility.hex((char)last); + p.println(mf.format(new Object[] {Utility.hex((char)first),lastStr,new Integer(diff), new Integer(count)})); first = -1; } } @@ -1623,17 +1637,17 @@ final public class UCA implements Comparator { variable = false; // FIX DATA FILE } if (key2 > 0x1FF) { - throw new IllegalArgumentException("Weight2 doesn't fit: " + hex(key2) + "," + line); + throw new IllegalArgumentException("Weight2 doesn't fit: " + Utility.hex(key2) + "," + line); } if (key3 > 0x7F) { - throw new IllegalArgumentException("Weight3 doesn't fit: " + hex(key3) + "," + line); + throw new IllegalArgumentException("Weight3 doesn't fit: " + Utility.hex(key3) + "," + line); } // adjust variable bounds, if needed if (variable) { if (key1 > nonVariableLow) { if (!haveVariableWarning) { System.out.println("\tBAD DATA: Variable overlap, nonvariable low: " - + hex(nonVariableLow) + ", line: \"" + line + "\""); + + Utility.hex(nonVariableLow) + ", line: \"" + line + "\""); haveVariableWarning = true; } } else { @@ -1644,7 +1658,7 @@ final public class UCA implements Comparator { if (key1 < variableHigh) { if (!haveVariableWarning) { System.out.println("\tBAD DATA: Variable overlap, variable high: " - + hex(variableHigh) + ", line: \"" + line + "\""); + + Utility.hex(variableHigh) + ", line: \"" + line + "\""); haveVariableWarning = true; } } else { @@ -1717,8 +1731,8 @@ final public class UCA implements Comparator { Object ceObj = new Long(((long)result << 16) | fourth); Object probe = uniqueTable.get(ceObj); if (probe != null) { - System.out.println("\tCE(" + hex(value) - + ")=CE(" + hex(((Character)probe).charValue()) + "); " + line); + System.out.println("\tCE(" + Utility.hex(value) + + ")=CE(" + Utility.hex(((Character)probe).charValue()) + "); " + line); } else { uniqueTable.put(ceObj, new Character(value)); diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java index 9d33a11590d..2b135f318ac 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ -* $Date: 2001/10/26 23:32:03 $ -* $Revision: 1.6 $ +* $Date: 2001/10/31 00:01:28 $ +* $Revision: 1.7 $ * ******************************************************************************* */ @@ -14,6 +14,7 @@ package com.ibm.text.UCA; import java.util.*; +import com.ibm.text.UTF16; import java.io.*; //import java.text.*; @@ -106,13 +107,13 @@ public class WriteCollationData implements UCD_Types { Normalizer foo = new Normalizer(Normalizer.NFKD); char x = '\u1EE2'; - System.out.println(UCA.hex(x) + " " + ucd.getName(x)); + System.out.println(Utility.hex(x) + " " + ucd.getName(x)); String nx = foo.normalize(x); for (int i = 0; i < nx.length(); ++i) { char c = nx.charAt(i); System.out.println(ucd.getCanonicalClass(c)); } - System.out.println(UCA.hex(nx, " ") + " " + ucd.getName(nx)); + System.out.println(Utility.hex(nx, " ") + " " + ucd.getName(nx)); */ } @@ -251,7 +252,7 @@ public class WriteCollationData implements UCD_Types { CompactShortArray csa = new CompactShortArray((short)0); for (char c = 0; c < 0xFFFF; ++c) { - if ((c & 0xFFF) == 0) System.err.println(UCA.hex(c)); + if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c)); if (0xAC00 <= c && c <= 0xD7A3) continue; if (normKD.hasDecomposition(c)) { ++count; @@ -260,7 +261,7 @@ public class WriteCollationData implements UCD_Types { if (max < decomp.length()) max = decomp.length(); if (decomp.length() > 7) ++over7; csa.setElementAt(c, (short)count); - log.println("\t KD[0x" + UCA.hex(c) + "]='\\u" + UCA.hex(decomp,"\\u") + "';"); + log.println("\t KD[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';"); } } csa.compact(); @@ -279,7 +280,7 @@ public class WriteCollationData implements UCD_Types { csa = new CompactShortArray((short)0); for (char c = 0; c < 0xFFFF; ++c) { - if ((c & 0xFFF) == 0) System.err.println(UCA.hex(c)); + if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c)); if (0xAC00 <= c && c <= 0xD7A3) continue; if (normD.hasDecomposition(c)) { ++count; @@ -287,7 +288,7 @@ public class WriteCollationData implements UCD_Types { datasize += decomp.length(); if (max < decomp.length()) max = decomp.length(); csa.setElementAt(c, (short)count); - log.println("\t D[0x" + UCA.hex(c) + "]='\\u" + UCA.hex(decomp,"\\u") + "';"); + log.println("\t D[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';"); } } csa.compact(); @@ -304,12 +305,12 @@ public class WriteCollationData implements UCD_Types { CompactByteArray cba = new CompactByteArray(); for (char c = 0; c < 0xFFFF; ++c) { - if ((c & 0xFFF) == 0) System.err.println(UCA.hex(c)); + if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c)); int canClass = normKD.getCanonicalClass(c); if (canClass != 0) { ++count; - log.println("\t CC[0x" + UCA.hex(c) + "]=" + canClass + ";"); + log.println("\t CC[0x" + Utility.hex(c) + "]=" + canClass + ";"); } } cba.compact(); @@ -332,7 +333,7 @@ public class WriteCollationData implements UCD_Types { char val = (char) enum.value(); if (0xAC00 <= val && val <= 0xD7A3) continue; ++count; - log.println("\tC[0x" + UCA.hex(key) + "]=0x" + UCA.hex(val) + ";"); + log.println("\tC[0x" + Utility.hex(key) + "]=0x" + Utility.hex(val) + ";"); } log.println("// " + count + " composition mappings total"); log.println(); @@ -480,7 +481,7 @@ public class WriteCollationData implements UCD_Types { decompSortKey = remove(decompSortKey, '\u0020'); } if (!sortKey.equals(decompSortKey)) { - log.println("" + UCA.hex(ch) + log.println("" + Utility.hex(ch) + "" + UCA.toString(sortKey) + "" + UCA.toString(decompSortKey) + "" + ucd.getName(ch) @@ -649,9 +650,11 @@ public class WriteCollationData implements UCD_Types { static final byte getDecompType(int cp) { byte result = ucd.getDecompositionType(cp); if (result == ucd.CANONICAL) { - String d = NFD.normalize((char)cp); // TODO - for (int i = 0; i < d.length(); ++i) { - byte t = ucd.getDecompositionType(d.charAt(i)); + String d = NFD.normalize(cp); // TODO + int cp1; + for (int i = 0; i < d.length(); i += UTF16.getCharCount(cp1)) { + cp1 = UTF16.charAt(d, i); + byte t = ucd.getDecompositionType(cp1); if (t > ucd.CANONICAL) return t; } } @@ -707,7 +710,7 @@ public class WriteCollationData implements UCD_Types { static int[] markCes = new int[50]; static int fixCompatibilityCE(String s, boolean decompose, int[] output, boolean compress) { - byte type = getDecompType(s.charAt(0)); + byte type = getDecompType(UTF16.charAt(s, 0)); char ch = s.charAt(0); String decomp = NFKD.normalize(s); @@ -1654,6 +1657,7 @@ public class WriteCollationData implements UCD_Types { static final int COMMON = 5; static int gapForA = 0; + static int[] primaryDelta; static void writeFractionalUCA(String filename) throws IOException { @@ -1672,9 +1676,9 @@ public class WriteCollationData implements UCD_Types { for (int secondary = 0; secondary < compactSecondary.length; ++secondary) { if (secondarySet.get(secondary)) { compactSecondary[secondary] = subtotal++; - /*System.out.println("compact[" + UCA.hex(secondary) - + "]=" + UCA.hex(compactSecondary[secondary]) - + ", " + UCA.hex(fixSecondary(secondary)));*/ + /*System.out.println("compact[" + Utility.hex(secondary) + + "]=" + Utility.hex(compactSecondary[secondary]) + + ", " + Utility.hex(fixSecondary(secondary)));*/ } } System.out.println(); @@ -1687,7 +1691,9 @@ public class WriteCollationData implements UCD_Types { System.out.println("Fixing Primaries"); BitSet primarySet = collator.getWeightUsage(1); - int[] primaryDelta = new int[65536]; + + primaryDelta = new int[65536]; + // start at 1 so zero stays zero. for (int primary = 1; primary < 0xFFFF; ++primary) { if (primarySet.get(primary)) primaryDelta[primary] = 2; @@ -1749,7 +1755,7 @@ public class WriteCollationData implements UCD_Types { lastValue = primaryDelta[primary] = CE >>> 8; } - //if ((primary & 0xFF) == 0) System.out.println(UCA.hex(primary) + " => " + hexBytes(primaryDelta[primary])); + //if ((primary & 0xFF) == 0) System.out.println(Utility.hex(primary) + " => " + hexBytes(primaryDelta[primary])); } @@ -1757,19 +1763,37 @@ public class WriteCollationData implements UCD_Types { System.out.println("Sorting"); Map ordered = new TreeMap(); - - for (char ch = 0; ch < 0xFFFF; ++ch) { - byte type = collator.getCEType(ch); - if (type >= UCA.FIXED_CE) continue; - String s = String.valueOf(ch); + UCA.UCAContents ucac = collator.getContents(UCA.FIXED_CE, null); + int ccounter = 0; + while (true) { + Utility.dot(ccounter++); + String s = ucac.next(); + if (s == null) break; ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s); } + + + /* + + for (int ch = 0; ch < 0x10FFFF; ++ch) { + Utility.dot(ch); + byte type = collator.getCEType(ch); + if (type >= UCA.FIXED_CE && !nfd.hasDecomposition(ch)) + continue; + } + String s = com.ibm.text.UTF16.valueOf(ch); + ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s); + } + Hashtable multiTable = collator.getContracting(); Enumeration enum = multiTable.keys(); + int ecount = 0; while (enum.hasMoreElements()) { + Utility.dot(ecount++); String s = (String)enum.nextElement(); ordered.put(collator.getSortKey(s, UCA.NON_IGNORABLE) + '\u0000' + s, s); } + */ // JUST FOR TESTING if (false) { String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3"; @@ -1779,6 +1803,7 @@ public class WriteCollationData implements UCD_Types { } } + Utility.fixDot(); System.out.println("Writing"); PrintWriter shortLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + ".txt"), 32*1024)); PrintWriter longLog = new PrintWriter(new BufferedWriter(new FileWriter(GEN_DIR + filename + "_long.txt"), 32*1024)); @@ -1821,6 +1846,8 @@ public class WriteCollationData implements UCD_Types { String lastChr = ""; int lastNp = 0; boolean doVariable = false; + char[] codeUnits = new char[100]; + while (it.hasNext()) { Object sortKey = it.next(); @@ -1846,8 +1873,12 @@ public class WriteCollationData implements UCD_Types { wasVariable = isVariable; } oldStr.setLength(0); - log.print(UCA.hex(chr, " ") + "; "); + chr.getChars(0, chr.length(), codeUnits, 0); + + log.print(Utility.hex(codeUnits, 0, chr.length(), " ") + "; "); boolean nonePrinted = true; + boolean isFirst = true; + for (int q = 0; q < len; ++q) { nonePrinted = false; newPrimary.setLength(0); @@ -1856,7 +1887,32 @@ public class WriteCollationData implements UCD_Types { int pri = UCA.getPrimary(ces[q]); int sec = UCA.getSecondary(ces[q]); - int ter = UCA.getTertiary(ces[q]); + int ter = UCA.getTertiary(ces[q]); + + oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16); + + // special hack for unsupported! + + if (pri >= UCA.UNSUPPORTED_BASE) { + ++q; + oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16); + + int pri2 = UCA.getPrimary(ces[q]); + // get old code point + // pri = UNSUPPORTED_BASE + (bigChar >>> 15) + // pri2 = (bigChar & 0x7FFF) | 0x8000 + pri -= UCA.UNSUPPORTED_BASE; + pri <<= 15; + pri2 &= 0x7FFF; + pri += pri2; + System.out.println("Unsupported: " + + Utility.hex(UCA.getPrimary(ces[q-1])) + + ", " + Utility.hex(UCA.getPrimary(ces[q])) + + ", " + Utility.hex(pri) + + ", " + Utility.hex(fixPrimary(pri) & 0xFFFFFFFFL) + ); + + } if (sec != 0x20) { boolean changed = secEq.add(new Integer(sec), new Integer(pri)); @@ -1866,28 +1922,26 @@ public class WriteCollationData implements UCD_Types { } if (sampleEq[sec] == null) sampleEq[sec] = chr; if (sampleEq[ter] == null) sampleEq[ter] = chr; - oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16); - int oldPrimaryValue = UCA.getPrimary(ces[q]); - int np = primaryDelta[oldPrimaryValue]; - if (oldPrimaryValue > 0x3400) { - System.out.println(Utility.hex(oldPrimaryValue) + " => " + Utility.hex(np)); - } + + // int oldPrimaryValue = UCA.getPrimary(ces[q]); + int np = fixPrimary(pri); hexBytes(np, newPrimary); - hexBytes(fixSecondary(UCA.getSecondary(ces[q])), newSecondary); - hexBytes(fixTertiary(UCA.getTertiary(ces[q])), newTertiary); - if (q == 0) { + hexBytes(fixSecondary(sec), newSecondary); + hexBytes(fixTertiary(ter), newTertiary); + if (isFirst) { if (!sameTopByte(np, lastNp)) { - summary.println("Last: " + Utility.hex(lastNp) + " " + ucd.getName(lastChr.charAt(0))); + summary.println("Last: " + Utility.hex(lastNp & 0xFFFFFFFFL) + " " + ucd.getName(UTF16.charAt(lastChr,0))); summary.println(); if (doVariable) { doVariable = false; summary.println("[variable top = " + Utility.hex(primaryDelta[firstPrimary]) + "] # END OF VARIABLE SECTION!!!"); summary.println(); } - summary.println("First: " + Utility.hex(np) + " " + ucd.getName(chr.charAt(0))); + summary.println("First: " + Utility.hex(np & 0xFFFFFFFFL) + " " + ucd.getName(UTF16.charAt(chr,0))); } lastNp = np; + isFirst = false; } log.print("[" + newPrimary + ", " + newSecondary @@ -1898,17 +1952,17 @@ public class WriteCollationData implements UCD_Types { log.print("[,,]"); oldStr.append(UCA.ceToString(0)); } - longLog.print(" # " + oldStr + " # " + ucd.getName(chr.charAt(0))); + longLog.print(" # " + oldStr + " # " + ucd.getName(UTF16.charAt(chr, 0))); log.println(); lastChr = chr; } - summary.println("Last: " + Utility.hex(lastNp) + " " + ucd.getName(lastChr.charAt(0))); + summary.println("Last: " + Utility.hex(lastNp) + " " + ucd.getName(UTF16.charAt(lastChr, 0))); /* String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3"; for (int i = 0; i < sample.length(); ++i) { char ch = sample.charAt(i); - log.println(UCA.hex(ch) + " => " + UCA.hex(fixHan(ch)) + log.println(Utility.hex(ch) + " => " + Utility.hex(fixHan(ch)) + " " + ucd.getName(ch)); } */ @@ -1981,8 +2035,24 @@ public class WriteCollationData implements UCD_Types { static boolean isFixedIdeograph(int cp) { - return (0x3400 <= cp && cp <= 0x4DB5 || 0x4E00 <= cp && cp <= 0x9FA5 || 0xF900 <= cp && cp <= 0xFA2D); + return (0x3400 <= cp && cp <= 0x4DB5 + || 0x4E00 <= cp && cp <= 0x9FA5 + || 0xF900 <= cp && cp <= 0xFA2D // compat: most of these decompose anyway + || 0x20000 <= cp && cp <= 0x2A6D6 + || 0x2F800 <= cp && cp <= 0x2FA1D // compat: most of these decompose anyway + ); } +/* +3400;;Lo;0;L;;;;;N;;;;; +4DB5;;Lo;0;L;;;;;N;;;;; +4E00;;Lo;0;L;;;;;N;;;;; +9FA5;;Lo;0;L;;;;;N;;;;; +20000;;Lo;0;L;;;;;N;;;;; +2A6D6;;Lo;0;L;;;;;N;;;;; +2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;; +... +2FA1D;CJK COMPATIBILITY IDEOGRAPH-2FA1D;Lo;0;L;2A600;;;;N;;;;; +*/ static int remapUCA_CompatibilityIdeographToCp(int cp) { switch (cp) { @@ -2175,6 +2245,18 @@ public class WriteCollationData implements UCD_Types { static final int secondaryDoubleStart = 0xD0; + static int fixPrimary(int x) { + int result = 0; + if (x <= 0xFFFF) result = primaryDelta[x]; + else result = getImplicitPrimary(x); + + /*if (x > 0x3400) { + System.out.println(Utility.hex(x) + " => " + Utility.hex(result)); + } + */ + return result; + } + static int fixSecondary(int x) { x = compactSecondary[x]; return fixSecondary2(x, compactSecondary[0x153], compactSecondary[0x157]); @@ -2301,7 +2383,7 @@ public class WriteCollationData implements UCD_Types { byte b = (byte)(x >>> shift); if (b != 0) { if (result.length() != 0) result.append(" "); - result.append(UCA.hex(b)); + result.append(Utility.hex(b)); //if (lastb == 0) System.err.println(" bad zero byte: " + result); } lastb = b; @@ -2352,7 +2434,7 @@ public class WriteCollationData implements UCD_Types { if (cat <= ucd.OTHER_LETTER && cat != ucd.Lm) { scripts[script] = primary; scriptChar[script] = ch; - if (script == ucd.GREEK_SCRIPT) System.out.println("*" + UCA.hex(primary) + ucd.getName(ch)); + if (script == ucd.GREEK_SCRIPT) System.out.println("*" + Utility.hex(primary) + ucd.getName(ch)); } } // get representative char for primary @@ -2469,7 +2551,7 @@ public class WriteCollationData implements UCD_Types { source = source.substring(0,source.length()-1); if (endMark == MARK1) { log.println("
"); - log.println("Mismatch: " + UCA.hex(source, " ") + log.println("Mismatch: " + Utility.hex(source, " ") + ", " + ucd.getName(source) + "
"); log.print(" NFD:"); } else { @@ -2557,11 +2639,11 @@ public class WriteCollationData implements UCD_Types { //if (firstRow) out.print(" width='6%'"); out.print(">"); - //log.println(UCA.hex(ch2.charAt(0))); + //log.println(Utility.hex(ch2.charAt(0))); boolean ignorable = col2.charAt(0) == 0; out.print(HTMLString(ch2) + "
" + (ignorable ? "" : "") - + UCA.hex(ch2, " ") + + Utility.hex(ch2, " ") + (ignorable ? "" : "") ); if (SHOW_CE) out.print("
" + UCA.toString(col2) + ""); @@ -2633,7 +2715,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; String colNbase = collator.getSortKey(ch, option, false); String colCbase = collator.getSortKey(toC.normalize(ch), option, false); if (!colNbase.equals(colCbase)) { - /*System.out.println(UCA.hex(ch)); + /*System.out.println(Utility.hex(ch)); System.out.println(printableKey(colNbase)); System.out.println(printableKey(colNbase)); System.out.println(printableKey(colNbase));*/ @@ -2747,10 +2829,10 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; String ch = (String)sortedD.get(col); String colN = (String)backN.get(ch); if (colN == null || colN.length() < 1) { - System.out.println("Missing colN value for " + UCA.hex(ch, " ") + ": " + printableKey(colN)); + System.out.println("Missing colN value for " + Utility.hex(ch, " ") + ": " + printableKey(colN)); } if (col == null || col.length() < 1) { - System.out.println("Missing col value for " + UCA.hex(ch, " ") + ": " + printableKey(col)); + System.out.println("Missing col value for " + Utility.hex(ch, " ") + ": " + printableKey(col)); } if (compareMinusLast(col, lastCol) == compareMinusLast(colN, lastColN)) { @@ -2758,14 +2840,14 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; } else { if (true && count < 200) { System.out.println(); - System.out.println(UCA.hex(ch, " ") + ", " + UCA.hex(lastCh, " ")); - System.out.println(" col: " + UCA.hex(col, " ")); + System.out.println(Utility.hex(ch, " ") + ", " + Utility.hex(lastCh, " ")); + System.out.println(" col: " + Utility.hex(col, " ")); System.out.println(compareMinusLast(col, lastCol)); - System.out.println(" lastCol: " + UCA.hex(lastCol, " ")); + System.out.println(" lastCol: " + Utility.hex(lastCol, " ")); System.out.println(); - System.out.println(" colN: " + UCA.hex(colN, " ")); + System.out.println(" colN: " + Utility.hex(colN, " ")); System.out.println(compareMinusLast(colN, lastColN)); - System.out.println(" lastColN: " + UCA.hex(lastColN, " ")); + System.out.println(" lastColN: " + Utility.hex(lastColN, " ")); } if (!showedLast) { log.println(""); @@ -2791,9 +2873,9 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; static void showLine(int count, String ch, String keyD, String keyN) { String decomp = toD.normalize(ch); - if (decomp.equals(ch)) decomp = ""; else decomp = "
<" + UCA.hex(decomp, " ") + "> "; + if (decomp.equals(ch)) decomp = ""; else decomp = "
<" + Utility.hex(decomp, " ") + "> "; log.println("" + count + "" - + UCA.hex(ch, " ") + + Utility.hex(ch, " ") + " " + ucd.getName(ch) + decomp + ""); @@ -2863,12 +2945,12 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; if (showName) { if (ch.equals(decomp)) { log.println(//title + counter + " " - UCA.hex(ch, " ") + Utility.hex(ch, " ") + " " + ucd.getName(ch) ); } else { log.println(//title + counter + " " - "" + UCA.hex(ch, " ") + "" + Utility.hex(ch, " ") + " " + ucd.getName(ch) + "" ); } @@ -2877,11 +2959,11 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; String keyN = printableKey(backN.get(chobj)); if (keyD.equals(keyN)) { log.println(//title + counter + " " - UCA.hex(ch, " ") + " " + keyN); + Utility.hex(ch, " ") + " " + keyN); } else { log.println(//title + counter + " " - "" + UCA.hex(ch, " ") + " " + keyN - + "
" + UCA.hex(decomp, " ") + " " + keyD + "" + "" + Utility.hex(ch, " ") + " " + keyN + + "
" + Utility.hex(decomp, " ") + " " + keyD + "" ); } } diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java index a79556bbfe3..c9b8a4aa538 100644 --- a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java +++ b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java,v $ -* $Date: 2001/10/26 23:32:03 $ -* $Revision: 1.3 $ +* $Date: 2001/10/31 00:01:28 $ +* $Revision: 1.4 $ * ******************************************************************************* */ @@ -285,7 +285,7 @@ public class WriteHTMLCollation implements UCD_Types { decompSortKey = remove(decompSortKey, '\u0020'); } if (!sortKey.equals(decompSortKey)) { - log.println("" + UCA.hex(ch) + log.println("" + Utility.hex(ch) + "" + UCA.toString(sortKey) + "" + UCA.toString(decompSortKey) + "" + ucd.getName(ch) @@ -762,9 +762,9 @@ public class WriteHTMLCollation implements UCD_Types { for (int secondary = 0; secondary < compactSecondary.length; ++secondary) { if (secondarySet.get(secondary)) { compactSecondary[secondary] = subtotal++; - /*System.out.println("compact[" + UCA.hex(secondary) - + "]=" + UCA.hex(compactSecondary[secondary]) - + ", " + UCA.hex(fixSecondary(secondary)));*/ + /*System.out.println("compact[" + Utility.hex(secondary) + + "]=" + Utility.hex(compactSecondary[secondary]) + + ", " + Utility.hex(fixSecondary(secondary)));*/ } } System.out.println(); @@ -822,7 +822,7 @@ public class WriteHTMLCollation implements UCD_Types { primaryDelta[primary] = CE >>> 8; } - if ((primary & 0xFF) == 0) System.out.println(UCA.hex(primary) + " => " + hexBytes(primaryDelta[primary])); + if ((primary & 0xFF) == 0) System.out.println(Utility.hex(primary) + " => " + hexBytes(primaryDelta[primary])); } @@ -916,7 +916,7 @@ public class WriteHTMLCollation implements UCD_Types { wasVariable = isVariable; } oldStr.setLength(0); - log.print(UCA.hex(chr, " ") + "; " + (needsCaseBit(chr) ? '1' : '0') + "; "); + log.print(Utility.hex(chr, " ") + "; " + (needsCaseBit(chr) ? '1' : '0') + "; "); boolean nonePrinted = true; for (int q = 0; q < len; ++q) { nonePrinted = false; @@ -972,7 +972,7 @@ public class WriteHTMLCollation implements UCD_Types { String sample = "\u3400\u3401\u4DB4\u4DB5\u4E00\u4E01\u9FA4\u9FA5\uAC00\uAC01\uD7A2\uD7A3"; for (int i = 0; i < sample.length(); ++i) { char ch = sample.charAt(i); - log.println(UCA.hex(ch) + " => " + UCA.hex(fixHan(ch)) + log.println(Utility.hex(ch) + " => " + Utility.hex(fixHan(ch)) + " " + ucd.getName(ch)); } */ @@ -1311,7 +1311,7 @@ public class WriteHTMLCollation implements UCD_Types { byte b = (byte)(x >>> shift); if (b != 0) { if (result.length() != 0) result.append(" "); - result.append(UCA.hex(b)); + result.append(Utility.hex(b)); //if (lastb == 0) System.err.println(" bad zero byte: " + result); } lastb = b; @@ -1360,7 +1360,7 @@ public class WriteHTMLCollation implements UCD_Types { if (cat <= ucd.OTHER_LETTER && cat != ucd.Lm) { scripts[script] = primary; scriptChar[script] = ch; - if (script == ucd.GREEK_SCRIPT) System.out.println("*" + UCA.hex(primary) + ucd.getName(ch)); + if (script == ucd.GREEK_SCRIPT) System.out.println("*" + Utility.hex(primary) + ucd.getName(ch)); } } // get representative char for primary @@ -1478,7 +1478,7 @@ public class WriteHTMLCollation implements UCD_Types { source = source.substring(0,source.length()-1); if (endMark == MARK1) { log.println("
"); - log.println("Mismatch: " + UCA.hex(source, " ") + log.println("Mismatch: " + Utility.hex(source, " ") + ", " + ucd.getName(source) + "
"); log.print(" NFD:"); } else { @@ -1566,11 +1566,11 @@ public class WriteHTMLCollation implements UCD_Types { //if (firstRow) out.print(" width='6%'"); out.print(">"); - //log.println(UCA.hex(ch2.charAt(0))); + //log.println(Utility.hex(ch2.charAt(0))); boolean ignorable = col2.charAt(0) == 0; out.print(HTMLString(ch2) + "
" + (ignorable ? "" : "") - + UCA.hex(ch2, " ") + + Utility.hex(ch2, " ") + (ignorable ? "" : "") ); if (SHOW_CE) out.print("
" + UCA.toString(col2) + ""); @@ -1632,7 +1632,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; String colNbase = collator.getSortKey(ch, option, false); String colCbase = collator.getSortKey(toC.normalize(ch), option, false); if (!colNbase.equals(colCbase)) { - /*System.out.println(UCA.hex(ch)); + /*System.out.println(Utility.hex(ch)); System.out.println(printableKey(colNbase)); System.out.println(printableKey(colNbase)); System.out.println(printableKey(colNbase));*/ @@ -1746,10 +1746,10 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; String ch = (String)sortedD.get(col); String colN = (String)backN.get(ch); if (colN == null || colN.length() < 1) { - System.out.println("Missing colN value for " + UCA.hex(ch, " ") + ": " + printableKey(colN)); + System.out.println("Missing colN value for " + Utility.hex(ch, " ") + ": " + printableKey(colN)); } if (col == null || col.length() < 1) { - System.out.println("Missing col value for " + UCA.hex(ch, " ") + ": " + printableKey(col)); + System.out.println("Missing col value for " + Utility.hex(ch, " ") + ": " + printableKey(col)); } if (compareMinusLast(col, lastCol) == compareMinusLast(colN, lastColN)) { @@ -1757,14 +1757,14 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; } else { if (true && count < 200) { System.out.println(); - System.out.println(UCA.hex(ch, " ") + ", " + UCA.hex(lastCh, " ")); - System.out.println(" col: " + UCA.hex(col, " ")); + System.out.println(Utility.hex(ch, " ") + ", " + Utility.hex(lastCh, " ")); + System.out.println(" col: " + Utility.hex(col, " ")); System.out.println(compareMinusLast(col, lastCol)); - System.out.println(" lastCol: " + UCA.hex(lastCol, " ")); + System.out.println(" lastCol: " + Utility.hex(lastCol, " ")); System.out.println(); - System.out.println(" colN: " + UCA.hex(colN, " ")); + System.out.println(" colN: " + Utility.hex(colN, " ")); System.out.println(compareMinusLast(colN, lastColN)); - System.out.println(" lastColN: " + UCA.hex(lastColN, " ")); + System.out.println(" lastColN: " + Utility.hex(lastColN, " ")); } if (!showedLast) { log.println(""); @@ -1790,9 +1790,9 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; static void showLine(int count, String ch, String keyD, String keyN) { String decomp = toD.normalize(ch); - if (decomp.equals(ch)) decomp = ""; else decomp = "
<" + UCA.hex(decomp, " ") + "> "; + if (decomp.equals(ch)) decomp = ""; else decomp = "
<" + Utility.hex(decomp, " ") + "> "; log.println("" + count + "" - + UCA.hex(ch, " ") + + Utility.hex(ch, " ") + " " + ucd.getName(ch) + decomp + ""); @@ -1862,12 +1862,12 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; if (showName) { if (ch.equals(decomp)) { log.println(//title + counter + " " - UCA.hex(ch, " ") + Utility.hex(ch, " ") + " " + ucd.getName(ch) ); } else { log.println(//title + counter + " " - "" + UCA.hex(ch, " ") + "" + Utility.hex(ch, " ") + " " + ucd.getName(ch) + "" ); } @@ -1876,11 +1876,11 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;; String keyN = printableKey(backN.get(chobj)); if (keyD.equals(keyN)) { log.println(//title + counter + " " - UCA.hex(ch, " ") + " " + keyN); + Utility.hex(ch, " ") + " " + keyN); } else { log.println(//title + counter + " " - "" + UCA.hex(ch, " ") + " " + keyN - + "
" + UCA.hex(decomp, " ") + " " + keyD + "" + "" + Utility.hex(ch, " ") + " " + keyN + + "
" + Utility.hex(decomp, " ") + " " + keyD + "" ); } } diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java index 417a8c1f95d..637f35c53a4 100644 --- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java +++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $ -* $Date: 2001/10/26 23:33:07 $ -* $Revision: 1.8 $ +* $Date: 2001/10/31 00:02:27 $ +* $Revision: 1.9 $ * ******************************************************************************* */ @@ -414,15 +414,36 @@ public class GenerateData implements UCD_Types { public static void listProperties() throws IOException { - String propAbb = ""; String prop = ""; + String propAbb = ""; + String value = ""; + String valueAbb = ""; Map duplicates = new TreeMap(); Set sorted = new TreeSet(java.text.Collator.getInstance()); - Map accumulation = new TreeMap(); + Set accumulation = new TreeSet(java.text.Collator.getInstance()); String spacing; - for(int k = 0; k < UCD_Names.NON_ENUMERATED.length; ++k) { + BufferedReader blocks = Utility.openUnicodeFile("Blocks", ucd.getVersion()); + String[] parts = new String[10]; + while (true) { + String line = blocks.readLine(); + if (line == null) break; + int commentPos = line.indexOf('#'); + if (commentPos >= 0) line = line.substring(0,commentPos); + line = line.trim(); + if (line.length() == 0) continue; + int count = Utility.split(line,';',parts); + if (count != 2) System.out.println("Whow!"); + value = fixGaps(parts[1].trim(), true); + valueAbb = "n/a"; + spacing = Utility.repeat(" ", 10-valueAbb.length()); + sorted.add("blk; " + valueAbb + spacing + "; " + value); + checkDuplicate(duplicates, accumulation, value, "Block=" + value); + } + blocks.close(); + + for (int k = 0; k < UCD_Names.NON_ENUMERATED.length; ++k) { propAbb = fixGaps(UCD_Names.NON_ENUMERATED[k][0], false); prop = fixGaps(UCD_Names.NON_ENUMERATED[k][1], true); spacing = Utility.repeat(" ", 10-propAbb.length()); @@ -430,6 +451,15 @@ public class GenerateData implements UCD_Types { checkDuplicate(duplicates, accumulation, propAbb, prop); if (!prop.equals(propAbb)) checkDuplicate(duplicates, accumulation, prop, prop); } + + for (int k = 0; k < UCD_Names.SUPER_CATEGORIES.length; ++k) { + valueAbb = fixGaps(UCD_Names.SUPER_CATEGORIES[k][0], false); + value = fixGaps(UCD_Names.SUPER_CATEGORIES[k][1], true); + spacing = Utility.repeat(" ", 10-valueAbb.length()); + sorted.add("gc; " + valueAbb + spacing + "; " + value); + checkDuplicate(duplicates, accumulation, value, "General_Category=" + value); + if (!value.equals(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, "General_Category=" + value); + } sorted.add("xx; T ; True"); checkDuplicate(duplicates, accumulation, "T", "xx=True"); @@ -460,7 +490,7 @@ public class GenerateData implements UCD_Types { if (!ubp.isDefined(i)) continue; if (ubp.isTest(i)) continue; - String value = ubp.getID(i, LONG); + value = ubp.getID(i, LONG); if (value.length() == 0) value = "none"; else if (value.equals("")) continue; value = fixGaps(value, true); @@ -469,9 +499,9 @@ public class GenerateData implements UCD_Types { value = ucd.getCase(value, FULL, TITLE); } - String abbvalue = ubp.getID(i, SHORT); - if (abbvalue.length() == 0) abbvalue = "no"; - abbvalue = fixGaps(abbvalue, false); + valueAbb = ubp.getID(i, SHORT); + if (valueAbb.length() == 0) valueAbb = "no"; + valueAbb = fixGaps(valueAbb, false); if (type == COMBINING_CLASS) { if (value.startsWith("Fixed_")) { continue; } @@ -480,13 +510,13 @@ public class GenerateData implements UCD_Types { /* String elide = ""; if (type == CATEGORY || type == SCRIPT || type == BINARY_PROPERTIES) elide = "\\p{" - + abbvalue + + valueAbb + "}"; String abb = ""; if (type != BINARY_PROPERTIES) abb = "\\p{" + UCD_Names.ABB_UNIFIED_PROPERTIES[i>>8] + "=" - + abbvalue + + valueAbb + "}"; String norm = ""; if (type != BINARY_PROPERTIES) norm = "\\p{" @@ -497,18 +527,18 @@ public class GenerateData implements UCD_Types { System.out.println("" + elide + "" + abb + "" + norm + ""); */ - spacing = Utility.repeat(" ", 10-abbvalue.length()); + spacing = Utility.repeat(" ", 10-valueAbb.length()); if (type == BINARY_PROPERTIES || type == DERIVED) { - sorted.add("ZZ; " + abbvalue + spacing + "; " + value); + sorted.add("ZZ; " + valueAbb + spacing + "; " + value); checkDuplicate(duplicates, accumulation, value, value); - if (!value.equalsIgnoreCase(abbvalue)) checkDuplicate(duplicates, accumulation, abbvalue, value); + if (!value.equalsIgnoreCase(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, value); continue; } - sorted.add(propAbb + "; " + abbvalue + spacing + "; " + value); + sorted.add(propAbb + "; " + valueAbb + spacing + "; " + value); checkDuplicate(duplicates, accumulation, value, prop + "=" + value); - if (!value.equalsIgnoreCase(abbvalue)) checkDuplicate(duplicates, accumulation, abbvalue, prop + "=" + value); + if (!value.equalsIgnoreCase(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, prop + "=" + value); } PrintWriter log = Utility.openPrintWriter("PropertyAliases-" + ucd.getVersion() + "dX.txt"); @@ -525,7 +555,7 @@ public class GenerateData implements UCD_Types { log.println("# Note: no two property names can be the same,"); log.println("# nor can two property value names for the same property be the same."); log.println(); - Utility.print(log, accumulation.values(), "\r\n", new MyBreaker()); + Utility.print(log, accumulation, "\r\n", new MyBreaker()); log.println(); log.close(); } @@ -542,7 +572,7 @@ public class GenerateData implements UCD_Types { } } - static void checkDuplicate(Map m, Map accumulation, String toCheck, String originalComment) { + static void checkDuplicate(Map m, Set accumulation, String toCheck, String originalComment) { toCheck = skeleton(toCheck); String comment = "{" + originalComment + "}"; @@ -575,14 +605,15 @@ public class GenerateData implements UCD_Types { } // accumulate differences + /* String acc = (String)accumulation.get(toCheck); - /*if (acc == null) { + if (acc == null) { acc = "# \"" + toCheck + "\":\t" + originalComment; } acc += ";\t" + result; */ result.add(comment); - accumulation.put(toCheck, "# \"" + toCheck + "\":\t" + result); + accumulation.add("# " + result.toString() + ":\t" + toCheck); } else { result = new TreeSet(); result.add(comment); diff --git a/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt b/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt index 0060965364f..526f7967e3c 100644 --- a/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt +++ b/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt @@ -12,8 +12,10 @@ # # FORMAT # -# Each line has three fields. Where the first field is AA, BB, or ZZ, then -# the line describes a property name. +# Each line has three fields, separated by semicolons. +# +# First Field: Where the first field is AA, BB, or ZZ, then the line describes a property name: +# # AA - non-enumerated properties # BB - enumerated, non-binary properties # ZZ - binary properties and quick-check properties @@ -28,10 +30,20 @@ # xx stands for any binary property # qc stands for any quick-check property # -# With loose matching of property names, case distinctions, whitespace, +# Second Field: The second field is an abbreviated name. +# If there is no abbreviated name available, the field is marked with "n/a". +# +# Third Field: The third field is a long name. +# +# With loose matching of property names, the case distinctions, whitespace, # and '_' are ignored. # -# NOTE: the property value names are NOT unique across properties, especially +# NOTE: Currently there is at most one abbreviated name and one long name for +# each property and property value. However, in the future additional aliases +# may be added. In such a case, the first line for the property or property value +# would have the preferred alias for output. +# +# NOTE: The property value names are NOT unique across properties, especially # with loose matches. For example, # AL means Arabic Letter for the Bidi_Class property, and # AL means Alpha_Left for the Combining_Class property, and @@ -41,5 +53,7 @@ # cc means Combining_Class property, and # cc means the General_Category property value Control (cc) # +# Comments at the end of the file show cases of non-unique names. +# # The combination of property value and property name is, however, unique. # For more information, see UTR #24: Regular Expression Guidelines diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java index dff4f19fec5..e826d722135 100644 --- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java +++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $ -* $Date: 2001/10/26 23:33:07 $ -* $Revision: 1.5 $ +* $Date: 2001/10/31 00:02:27 $ +* $Revision: 1.6 $ * ******************************************************************************* */ @@ -31,7 +31,8 @@ final class UCD_Names implements UCD_Types { {"suc", "Simple_Uppercase_Mapping"}, {"stc", "Simple_Titlecase_Mapping"}, {"sfc", "Simple_Case_Folding"}, - {"scc", "Special_Case_Condition"} + {"scc", "Special_Case_Condition"}, + {"blk", "Block"} }; static final String[] UNIFIED_PROPERTIES = { @@ -404,9 +405,19 @@ final class UCD_Names implements UCD_Types { "FinalPunctuation" // = Punctuation, Final quote 30 (may behave like Ps or Pe dependingon usage) }; + static final String[][] SUPER_CATEGORIES = { + {"L", "Letter"}, + {"M", "Mark"}, + {"N", "Number"}, + {"Z", "Separator"}, + {"C", "Other"}, + {"S", "Symbol"}, + {"P", "Punctuation"}, + }; - static String[] BC = { + + static final String[] BC = { "L", // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs) "R", // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts "EN", // European Number @@ -752,8 +763,6 @@ final class UCD_Names implements UCD_Types { "H", // U+11C2; H; HANGUL JONGSEONG HIEUH }; - - /* static { UNASSIGNED_INFO.code = '\uFFFF'; diff --git a/tools/unicodetools/com/ibm/text/utility/Utility.java b/tools/unicodetools/com/ibm/text/utility/Utility.java index 65ab2ae3357..97851e139ec 100644 --- a/tools/unicodetools/com/ibm/text/utility/Utility.java +++ b/tools/unicodetools/com/ibm/text/utility/Utility.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $ -* $Date: 2001/10/26 23:33:48 $ -* $Revision: 1.6 $ +* $Date: 2001/10/31 00:02:54 $ +* $Revision: 1.7 $ * ******************************************************************************* */ @@ -122,6 +122,14 @@ public final class Utility { // COMMON UTILITIES return hex(ch,4); } + public static String hex(byte ch) { + return hex(ch & 0xFF,2); + } + + public static String hex(char ch) { + return hex(ch & 0xFFFF,4); + } + public static String hex(Object s) { return hex(s, 4, " "); } @@ -149,21 +157,21 @@ public final class Utility { // COMMON UTILITIES return result.toString(); } - public static String hex(byte[] o, int start, int end) { + public static String hex(byte[] o, int start, int end, String separator) { StringBuffer result = new StringBuffer(); //int ch; for (int i = start; i < end; ++i) { - if (i != 0) result.append(' '); - result.append(hex(o[i] & 0xFF, 2)); + if (i != 0) result.append(separator); + result.append(hex(o[i])); } return result.toString(); } - public static String hex(char[] o, int start, int end) { + public static String hex(char[] o, int start, int end, String separator) { StringBuffer result = new StringBuffer(); for (int i = start; i < end; ++i) { - if (i != 0) result.append(' '); - result.append(hex(o[i], 4)); + if (i != 0) result.append(separator); + result.append(hex(o[i])); } return result.toString(); }