diff --git a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
index 42a64314b40..97278a46bb9 100644
--- a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
+++ b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $
-* $Date: 2001/09/19 23:32:21 $
-* $Revision: 1.4 $
+* $Date: 2001/10/25 20:35:42 $
+* $Revision: 1.5 $
*
*******************************************************************************
*/
@@ -30,6 +30,33 @@ public class GenOverlap implements UCD_Types {
static Normalizer nfd;
static Normalizer nfkd;
+ public static void validateUCA(UCA collatorIn) throws Exception {
+ collator = collatorIn;
+ ucd = UCD.make();
+
+ nfd = new Normalizer(Normalizer.NFD);
+ nfkd = new Normalizer(Normalizer.NFKD);
+
+ for (int cp = 0x0; cp <= 0x10FFFF; ++cp) {
+ Utility.dot(cp);
+ if (!ucd.isRepresented(cp)) continue;
+ byte decompType = ucd.getDecompositionType(cp);
+ if (decompType >= UCD.COMPATIBILITY) {
+ String decomp = nfkd.normalize(cp);
+ CEList celistDecomp = getCEList(cp, decomp, true, decompType);
+ CEList celistNormal = getCEList(UTF16.valueOf(cp), false);
+ if (!celistNormal.equals(celistDecomp)) {
+ Utility.fixDot();
+ System.out.println();
+ System.out.println(ucd.getCodeAndName(cp));
+ System.out.println(celistNormal);
+ System.out.println(celistDecomp);
+ }
+ }
+ }
+
+ }
+
public static void test(UCA collatorIn) throws Exception {
collator = collatorIn;
@@ -68,7 +95,7 @@ public class GenOverlap implements UCD_Types {
byte decompType = ucd.getDecompositionType(cp);
if (decompType >= UCD.COMPATIBILITY) {
String decomp = nfkd.normalize(cp);
- CEList celist = getCEList(cp, decomp, decompType);
+ CEList celist = getCEList(cp, decomp, true, decompType);
addString(decomp, celist);
System.out.println("Adding: " + ucd.getCodeAndName(cp) + "\t" + celist);
}
@@ -182,16 +209,22 @@ public class GenOverlap implements UCD_Types {
}
static private CEList getCEList(String s) {
- int len = collator.getCEs(s, true, ces);
+ return getCEList(s, true);
+ }
+
+ static private CEList getCEList(String s, boolean decomp) {
+ int len = collator.getCEs(s, decomp, ces);
return new CEList(ces, 0, len);
}
- static private CEList getCEList(int originalChar, String s, byte type) {
- int len = collator.getCEs(s, true, ces);
- for (int i = 0; i < len; ++i) {
- ces[i] = UCA.makeKey(UCA.getPrimary(ces[i]),
- UCA.getSecondary(ces[i]),
- CEList.remap(originalChar, type, UCA.getTertiary(ces[i])));
+ static private CEList getCEList(int originalChar, String s, boolean decomp, byte type) {
+ int len = collator.getCEs(s, decomp, ces);
+ if (decomp) {
+ for (int i = 0; i < len; ++i) {
+ ces[i] = UCA.makeKey(UCA.getPrimary(ces[i]),
+ UCA.getSecondary(ces[i]),
+ CEList.remap(originalChar, type, UCA.getTertiary(ces[i])));
+ }
}
return new CEList(ces, 0, len);
}
@@ -290,7 +323,7 @@ public class GenOverlap implements UCD_Types {
}
public static void generateRevision (UCA collatorIn) throws Exception {
- generateRevision(collatorIn, false);
+ //generateRevision(collatorIn, false);
generateRevision(collatorIn, true);
}
@@ -336,7 +369,7 @@ public class GenOverlap implements UCD_Types {
int cp;
for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(str, i);
- if (0xFF67 <= cp && cp <= 0xFF6F) {
+ if (0xFF3F == cp) {
System.out.println("debug");
}
boolean mashLast = false;
@@ -351,7 +384,7 @@ public class GenOverlap implements UCD_Types {
int s = UCA.getSecondary(ces[j]);
boolean needsFix = (s != 0x20 && p != 0);
if (needsFix) ++len;
- int t = (doMax && len > 1 && j == len-1 ? 0x1F : CEList.remap(cp, type, UCA.getTertiary(ces[j])));
+ int t = (doMax && j > 0 ? 0x1F : CEList.remap(cp, type, UCA.getTertiary(ces[j])));
if (needsFix) {
ces[j++] = UCA.makeKey(p, 0x20, t); // Set Extra
System.arraycopy(ces, j, ces, j+1, len - j); // Insert HOLE!
@@ -413,7 +446,7 @@ public class GenOverlap implements UCD_Types {
newKeys.removeAll(joint);
oldKeys.removeAll(joint);
- PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"));
+ PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), false);
Iterator it = list.iterator();
int last = -1;
while (it.hasNext()) {
@@ -657,4 +690,51 @@ public class GenOverlap implements UCD_Types {
+ "
" + nf.format(sd)
+ " | ");
}
+
+ public static void listCyrillic(UCA collatorIn) throws IOException {
+ PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", false);
+ Set set = new TreeSet(collatorIn);
+ Set set2 = new TreeSet(collatorIn);
+ ucd = UCD.make();
+
+ nfd = new Normalizer(Normalizer.NFD);
+
+ for (char i = 0; i < 0xFFFF; ++i) {
+ Utility.dot(i);
+ if (!ucd.isRepresented(i)) continue;
+ if (ucd.getScript(i) != CYRILLIC_SCRIPT) continue;
+
+ String decomp = nfd.normalize(String.valueOf(i));
+ String oldDecomp = decomp;
+ for (int j = 0; j < decomp.length(); ++j) {
+ if (ucd.getCategory(decomp.charAt(j)) == Mn) {
+ decomp = decomp.substring(0,j) + decomp.substring(j+1);
+ }
+ }
+ if (decomp.length() == 0) continue;
+
+ set.add(decomp);
+ if (!decomp.equals(oldDecomp)) set2.add(oldDecomp);
+ }
+
+ Iterator it = set.iterator();
+ while (it.hasNext()) {
+ String s = (String) it.next();
+ String name = ucd.getName(s.charAt(0));
+ Utility.replace(name, "CYRILLIC ", "");
+ log.println("# " + s + " <> XXX ; # " + name);
+ }
+
+ it = set2.iterator();
+ while (it.hasNext()) {
+ String s = (String) it.next();
+ String name = ucd.getName(s.charAt(0));
+ Utility.replace(name, "CYRILLIC ", "");
+ log.println("### " + s + " <> XXX ; # " + name);
+ }
+
+ log.close();
+ }
+
+
}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/UCA/UCA.java b/tools/unicodetools/com/ibm/text/UCA/UCA.java
index 3855c3cf0c9..22055cba399 100644
--- a/tools/unicodetools/com/ibm/text/UCA/UCA.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
-* $Date: 2001/09/19 23:32:21 $
-* $Revision: 1.4 $
+* $Date: 2001/10/25 20:35:41 $
+* $Revision: 1.5 $
*
*******************************************************************************
*/
@@ -61,9 +61,13 @@ This is because of shared
characters between scripts with different directions, like French with Arabic or Greek.
*/
-final public class UCA {
+final public class UCA implements Comparator {
public static final String copyright =
"Copyright (C) 2000, IBM Corp. and others. All Rights Reserved.";
+
+ public int compare(Object a, Object b) {
+ return getSortKey((String) a).compareTo(getSortKey((String) b));
+ }
/**
* Version of the UCA tables to use
diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
index f61ddac467a..293a8dd3d7a 100644
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
-* $Date: 2001/09/19 23:31:50 $
-* $Revision: 1.1 $
+* $Date: 2001/10/25 20:35:41 $
+* $Revision: 1.2 $
*
*******************************************************************************
*/
@@ -30,6 +30,7 @@ public class WriteCharts implements UCD_Types {
ucd = UCD.make();
Normalizer nfd = new Normalizer(Normalizer.NFD);
+ Normalizer nfc = new Normalizer(Normalizer.NFC);
UCA.UCAContents cc = uca.getContents(UCA.FIXED_CE, null); // nfd instead of null if skipping decomps
@@ -53,18 +54,23 @@ public class WriteCharts implements UCD_Types {
int lastPrimary = -1;
- String lastSortKey = null;
+ String lastSortKey = "\u0000";
int high = uca.getSortKey("a").charAt(0);
int variable = UCA.getPrimary(uca.getVariableHigh());
int columnCount = 0;
+ Utility.copyTextFile("index.html", true, "CollationCharts\\index.html");
+ Utility.copyTextFile("charts.css", false, "CollationCharts\\charts.css");
+ Utility.copyTextFile("help.html", true, "CollationCharts\\help.html");
+
indexFile = Utility.openPrintWriter("CollationCharts\\index_list.html");
indexFile.println("");
indexFile.println("UCA Default Collation Table");
indexFile.println("");
+ indexFile.println("");
indexFile.println("UCA Default Collation Table
");
indexFile.println("Help");
@@ -102,19 +108,31 @@ public class WriteCharts implements UCD_Types {
oldScript = script;
}
- int strength = 6;
- if (lastSortKey != null && sortKey.charAt(0) == lastSortKey.charAt(0)) {
- strength = uca.strengthDifference(sortKey, lastSortKey);
- if (strength < 0) strength = -strength;
- }
+ boolean firstPrimaryEquals = sortKey.charAt(0) == lastSortKey.charAt(0);
+
+ int strength = uca.strengthDifference(sortKey, lastSortKey);
+ if (strength < 0) strength = -strength;
lastSortKey = sortKey;
+
+ // find out if this is an expansion: more than one primary weight
+
+ int primaryCount = 0;
+ for (int i = 0; i < sortKey.length(); ++i) {
+ char w = sortKey.charAt(i);
+ if (w == 0) break;
+ ++ primaryCount;
+ }
+
String breaker = "";
- if (columnCount > 10 || strength > 5) {
- if (strength <= 5) breaker = "
| ";
- else breaker = "
";
+ if (columnCount > 10 || !firstPrimaryEquals) {
+ if (!firstPrimaryEquals) breaker = "
";
+ else breaker = "
| "; // indent 1 cell
columnCount = 0;
}
- output.println(breaker + CLASSNAME[strength] + s
+
+ String classname = primaryCount > 1 ? XCLASSNAME[strength] : CLASSNAME[strength];
+
+ output.println(breaker + classname + nfc.normalize(s)
+ "
" + Utility.hex(s)
//+ "
" + script
//+ "
" + UCA.toString(sortKey)
@@ -133,8 +151,15 @@ public class WriteCharts implements UCD_Types {
"",
" | ",
" | ",
- " | ",
- " | "};
+ " | "};
+
+ static final String[] XCLASSNAME = {
+ " | ",
+ " | ",
+ " | ",
+ " | ",
+ " | ",
+ " | "};
static PrintWriter indexFile;
diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
index aa04e472ab6..da2345bd325 100644
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
-* $Date: 2001/09/19 23:32:21 $
-* $Revision: 1.4 $
+* $Date: 2001/10/25 20:35:41 $
+* $Revision: 1.5 $
*
*******************************************************************************
*/
@@ -63,9 +63,13 @@ public class WriteCollationData implements UCD_Types {
String arg = args[i];
if (arg.equalsIgnoreCase("WriteRulesWithNames")) writeRules(WITH_NAMES);
else if (arg.equalsIgnoreCase("GenOverlap")) GenOverlap.test(collator);
+ else if (arg.equalsIgnoreCase("validateUCA")) GenOverlap.validateUCA(collator);
+ else if (arg.equalsIgnoreCase("writeNonspacingDifference")) writeNonspacingDifference();
+
else if (arg.equalsIgnoreCase("WriteCharts")) WriteCharts.test(collator);
else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(collator);
else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(collator);
+ else if (arg.equalsIgnoreCase("listCyrillic")) GenOverlap.listCyrillic(collator);
else if (arg.equalsIgnoreCase("WriteRules")) writeRules(WITHOUT_NAMES);
else if (arg.equalsIgnoreCase("WriteRulesXML")) writeRules(IN_XML);
@@ -748,6 +752,47 @@ public class WriteCollationData implements UCD_Types {
return len;
}
+ static void writeNonspacingDifference() throws IOException {
+ PrintWriter diLog = new PrintWriter(
+ new BufferedWriter(
+ new OutputStreamWriter(
+ new FileOutputStream(GEN_DIR + "UCA_Nonspacing.txt"),
+ "UTF8"),
+ 32*1024));
+ diLog.write('\uFEFF');
+
+ Normalizer nfd = new Normalizer(Normalizer.NFD);
+
+ Set sorted = new TreeSet();
+
+ for (int i = 0; i < 0x10FFFF; ++i) {
+ Utility.dot(i);
+ if (!ucd.isRepresented(i)) continue;
+ byte cat = ucd.getCategory(i);
+ boolean isNonSpacing = cat == Mn || cat == Me;
+ CEList celist = collator.getCEList(UTF32.valueOf32(i), true);
+ boolean isPrimaryIgnorable = true;
+ for (int j = 0; j < celist.length(); ++j) {
+ int ce = celist.at(j);
+ int primary = collator.getPrimary(ce);
+ if (primary != 0) {
+ isPrimaryIgnorable = false;
+ break;
+ }
+ }
+
+ if (isNonSpacing != isPrimaryIgnorable) {
+ sorted.add(ucd.getCategoryID(i)
+ + "\t" + celist
+ + "\t" + ucd.getCodeAndName(i));
+ }
+ }
+
+ Utility.print(diLog, sorted, "\r\n");
+
+ diLog.close();
+ }
+
static void writeContractions() throws IOException {
PrintWriter diLog = new PrintWriter(
new BufferedWriter(
|