diff --git a/icu4j/src/com/ibm/icu/dev/demo/chart/UnicodeChart.java b/icu4j/src/com/ibm/icu/dev/demo/chart/UnicodeChart.java
index aed90038533..a3ee1d377ee 100644
--- a/icu4j/src/com/ibm/icu/dev/demo/chart/UnicodeChart.java
+++ b/icu4j/src/com/ibm/icu/dev/demo/chart/UnicodeChart.java
@@ -7,19 +7,33 @@
package com.ibm.icu.dev.demo.chart;
import java.io.*;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
import com.ibm.icu.dev.test.util.*;
+import com.ibm.icu.impl.UCharacterProperty;
import com.ibm.icu.lang.*;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.VersionInfo;
public class UnicodeChart {
+ static int surrogateType = UCharacter.getType('\ud800');
+ static int privateUseType = UCharacter.getType('\ue000');
+
public static void main(String[] args) throws IOException {
//int rowWidth = 256;
- PrintWriter pw = BagFormatter.openUTF8Writer("", "UnicodeChart.html");
+ VersionInfo vi = UCharacter.getUnicodeVersion();
+ String version = vi.getMajor() + "." + vi.getMinor() + "." + vi.getMilli();
+ PrintWriter pw = BagFormatter.openUTF8Writer("C:\\DATA\\GEN\\", "UnicodeChart.html");
pw.println("
Unicode 4.0 Chart
");
+ pw.println("Unicode " + version + " Chart
");
/*pw.println(" | ");
for (int j = 0; j < rowWidth; ++j) {
@@ -29,58 +43,118 @@ public class UnicodeChart {
*/
// TODO: fix Utility to take ints
- int surrogateType = UCharacter.getType('\ud800');
- int privateUseType = UCharacter.getType('\ue000');
- System.out.println("Surrogate Type: Java=" + Character.SURROGATE + ", ICU=" + surrogateType);
- System.out.println("Private-Use Type: Java=" + Character.PRIVATE_USE + ", ICU=" + privateUseType);
+ System.out.println("//Surrogate Type: Java=" + Character.SURROGATE + ", ICU=" + surrogateType);
+ System.out.println("//Private-Use Type: Java=" + Character.PRIVATE_USE + ", ICU=" + privateUseType);
//boolean gotOne = true;
int columns = 0;
- int limit = 0x10FFFF;
+ int limit = 0x110000/16;
char lastType = 'x';
int lastCount = 0;
pw.println("
---|
");
pw.println("Key");
pw.println("X | Graphic characters |
");
pw.println("\u00A0 | Whitespace |
");
- pw.println(" | Other Default Ignorable |
");
- pw.println(" | Undefined, Private Use, or Surrogates |
");
- pw.println(" | Noncharacter |
");
+ pw.println("\u00A0 | Other Default Ignorable |
");
+ pw.println("\u00A0 | Undefined, Private Use, or Surrogates |
");
+ pw.println("\u00A0 | Noncharacter |
");
pw.println("
");
pw.println("Copyright \u00A9 2003, Mark Davis. All Rights Reserved.");
pw.close();
- System.out.println("columns: " + columns);
+ System.out.println("//columns: " + columns);
}
+
+ private static char getType(int i) {
+ char type = 'v';
+ int cat = UCharacter.getType(i);
+ if (UCharacter.hasBinaryProperty(i, UProperty.NONCHARACTER_CODE_POINT)) {
+ type = 'n';
+ } else if (cat == Character.UNASSIGNED || cat == surrogateType || cat == privateUseType) {
+ type = 'u';
+ } else if (UCharacter.isUWhiteSpace(i)) {
+ type = 'w';
+ } else if (UCharacter.hasBinaryProperty(i, UProperty.DEFAULT_IGNORABLE_CODE_POINT)) {
+ type = 'i';
+ } else {
+ type = 'v';
+ }
+ return type;
+ }
static String hex(int i, int padTo) {
String result = Integer.toHexString(i).toUpperCase(java.util.Locale.ENGLISH);
diff --git a/icu4j/src/com/ibm/icu/dev/test/util/CollectionUtilities.java b/icu4j/src/com/ibm/icu/dev/test/util/CollectionUtilities.java
index bd41b1a04b8..9b449dcb369 100644
--- a/icu4j/src/com/ibm/icu/dev/test/util/CollectionUtilities.java
+++ b/icu4j/src/com/ibm/icu/dev/test/util/CollectionUtilities.java
@@ -206,29 +206,32 @@ public final class CollectionUtilities {
}
/**
- * Returns an int with bits set.
- * Bit 4: a - b != {}
- * Bit 2: a * b != {} // * is intersects
- * Bit 1: b - a != {}
- * Thus the bits can be used to get the following relations, plus
- * for A_SUPERSET_B, use (x & NOT_A_SUPERSET_B) == 0
- * for A_SUBSET_B, use (x & NOT_A_SUBSET_B) == 0
- * for A_EQUALS_B, use (x & A_PROPER_DISJOINT_B) == 0
- * for A_DISJOINT_B, use (x & NOT_A_DISJOINT_B) == 0
- * for A_OVERLAPS_B, use (x & NOT_A_DISJOINT_B) == 1
+ * Used for results of getContainmentRelation
*/
- static final int
- // ContainmentRelation
+ public static final int
ALL_EMPTY = 0,
NOT_A_SUPERSET_B = 1,
NOT_A_DISJOINT_B = 2,
NOT_A_SUBSET_B = 4,
+ NOT_A_EQUALS_B = NOT_A_SUBSET_B | NOT_A_SUPERSET_B,
A_PROPER_SUBSET_OF_B = NOT_A_DISJOINT_B | NOT_A_SUPERSET_B,
- A_PROPER_DISJOINT_B = NOT_A_SUBSET_B | NOT_A_SUPERSET_B,
A_PROPER_SUPERSET_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B,
A_PROPER_OVERLAPS_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B | NOT_A_SUPERSET_B;
- public static int getContainmentRelation(Collection a, Collection b) {
+ /**
+ * Assesses all the possible containment relations between collections A and B with one call.
+ * Returns an int with bits set, according to a "Venn Diagram" view of A vs B.
+ * NOT_A_SUPERSET_B: a - b != {}
+ * NOT_A_DISJOINT_B: a * b != {} // * is intersects
+ * NOT_A_SUBSET_B: b - a != {}
+ * Thus the bits can be used to get the following relations:
+ * for A_SUPERSET_B, use (x & CollectionUtilities.NOT_A_SUPERSET_B) == 0
+ * for A_SUBSET_B, use (x & CollectionUtilities.NOT_A_SUBSET_B) == 0
+ * for A_EQUALS_B, use (x & CollectionUtilities.NOT_A_EQUALS_B) == 0
+ * for A_DISJOINT_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) == 0
+ * for A_OVERLAPS_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) != 0
+ */
+ public static int getContainmentRelation(Collection a, Collection b) {
if (a.size() == 0) {
return (b.size() == 0) ? ALL_EMPTY : NOT_A_SUPERSET_B;
} else if (b.size() == 0) {
diff --git a/icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java b/icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java
index 29901b4cd01..1c1ccb6af65 100644
--- a/icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java
+++ b/icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java
@@ -185,7 +185,7 @@ public class TestUtilities extends TestFmwk {
case CollectionUtilities.A_PROPER_SUBSET_OF_B:
checkContainment(b.containsAll(a) && !a.equals(b), a, relation, b);
break;
- case CollectionUtilities.A_PROPER_DISJOINT_B:
+ case CollectionUtilities.NOT_A_EQUALS_B:
checkContainment(!CollectionUtilities.containsSome(a, b) && a.size() != 0 && b.size() != 0, a, relation, b);
break;
case CollectionUtilities.A_PROPER_SUPERSET_B:
diff --git a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java
index a33c1682e13..5723e85a57c 100644
--- a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java
+++ b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java
@@ -318,8 +318,23 @@ Name: Unicode_1_Name
* @return the unicode map
*/
public UnicodeMap getUnicodeMap() {
- return (UnicodeMap) getUnicodeMap_internal().clone();
+ return getUnicodeMap(false);
}
+
+ /**
+ * @return the unicode map
+ */
+ public UnicodeMap getUnicodeMap(boolean getShortest) {
+ if (!getShortest) return (UnicodeMap) getUnicodeMap_internal().clone();
+ UnicodeMap result = new UnicodeMap();
+ for (int i = 0; i <= 0x10FFFF; ++i) {
+ //if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
+ String value = getValue(i,true);
+ result.put(i, value);
+ }
+ return result;
+ }
+
/**
* @return the unicode map
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
index 4019fec01fa..dd863002008 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
-* $Date: 2005/03/26 05:40:04 $
-* $Revision: 1.38 $
+* $Date: 2005/10/11 19:39:15 $
+* $Revision: 1.39 $
*
*******************************************************************************
*/
@@ -24,7 +24,7 @@ import com.ibm.icu.text.UnicodeSet;
public class GenerateData implements UCD_Types {
- static final boolean DEBUG = false;
+ /* static final boolean DEBUG = false;
static final String HORIZONTAL_LINE = "# ================================================";
@@ -156,8 +156,7 @@ public class GenerateData implements UCD_Types {
System.out.println("New File: " + newFile);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
- String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
- System.out.println("Most recent: " + mostRecent);
+ org.unicode.cldr.util.Utility.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
doHeader(fileName + UnicodeDataFile.getFileSuffix(false), output, headerChoice);
for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) {
@@ -180,7 +179,7 @@ public class GenerateData implements UCD_Types {
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
}
- /*
+
public static void listStrings(String file, int type, int subtype) throws IOException {
Default.ucd = UCD.make("3.1.0");
UCD ucd30 = UCD.make("3.0.0");
@@ -199,14 +198,14 @@ public class GenerateData implements UCD_Types {
}
output.close();
}
- */
+
public static void generateCompExclusions() throws IOException {
String newFile = "DerivedData/CompositionExclusions" + UnicodeDataFile.getFileSuffix(true);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
- String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "CompositionExclusions", UnicodeDataFile.getFileSuffix(true), batName);
+ String mostRecent = org.unicode.cldr.util.Utility.generateBat("DerivedData/", "CompositionExclusions", UnicodeDataFile.getFileSuffix(true), batName);
output.println("# CompositionExclusions" + UnicodeDataFile.getFileSuffix(false));
output.println(UnicodeDataFile.generateDateLine());
@@ -280,11 +279,11 @@ public class GenerateData implements UCD_Types {
}
public String optionalComment(int cp) { return ""; }
- /*
+
public String valueName(int cp) {
return UTF32.length32(ucdData.getDecompositionMapping(cp)) + "";
}
- */
+
public byte status(int cp) {
if (getType(cp) == type) return INCLUDE;
return EXCLUDE;
@@ -356,12 +355,12 @@ public class GenerateData implements UCD_Types {
if (extra != null) checkDuplicate(duplicates, accumulation, extra, "General_Category=" + value);
}
- /*
+
addLine(sorted, "xx; T ; True");
checkDuplicate(duplicates, accumulation, "T", "xx=True");
addLine(sorted, "xx; F ; False");
checkDuplicate(duplicates, accumulation, "F", "xx=False");
- */
+
addLine(sorted, "qc", UCD_Names.YN_TABLE[1], UCD_Names.YN_TABLE_LONG[1], null);
checkDuplicate(duplicates, accumulation, UCD_Names.YN_TABLE[1], "qc=" + UCD_Names.YN_TABLE_LONG[1]);
addLine(sorted, "qc", UCD_Names.YN_TABLE[0], UCD_Names.YN_TABLE_LONG[0], null);
@@ -440,7 +439,7 @@ public class GenerateData implements UCD_Types {
valueAbb = "n/a";
}
- /*
+
String elide = "";
if (type == CATEGORY || type == SCRIPT || type == BINARY_PROPERTIES) elide = "\\p{"
+ valueAbb
@@ -458,9 +457,9 @@ public class GenerateData implements UCD_Types {
+ value
+ "}";
System.out.println("
" + elide + " | " + abb + " | " + norm + " |
");
- */
- /*
+
+
if (type == BINARY_PROPERTIES || type == DERIVED) {
//if (value.equals(YN_TABLE_LONG[1])) continue;
addLine(sorted, PROP_TYPE_NAMES[BINARY][1], valueAbb, value);
@@ -468,7 +467,7 @@ public class GenerateData implements UCD_Types {
if (!value.equalsIgnoreCase(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, value);
continue;
}
- */
+
if (type == COMBINING_CLASS) {
String num = up.getValue(NUMBER);
@@ -487,20 +486,20 @@ public class GenerateData implements UCD_Types {
while (blockIterator.hasNext()) {
addLine(sorted, "blk", "n/a", (String)blockIterator.next(), null);
}
- /*
+
UCD.BlockData blockData = new UCD.BlockData();
int blockId = 0;
while (Default.ucd().getBlockData(blockId++, blockData)) {
addLine(sorted, "blk", "n/a", blockData.name);
}
- */
+
String filename = "PropertyAliases";
String newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
- String mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
+ String mostRecent = org.unicode.cldr.util.Utility.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
@@ -520,7 +519,7 @@ public class GenerateData implements UCD_Types {
filename = "PropertyValueAliases";
newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true);
log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
- mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
+ mostRecent = org.unicode.cldr.util.Utility.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
@@ -536,7 +535,7 @@ public class GenerateData implements UCD_Types {
filename = "PropertyAliasSummary";
newFile = "OtherData/" + filename + UnicodeDataFile.getFileSuffix(true);
log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
- mostRecent = UnicodeDataFile.generateBat("OtherData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
+ mostRecent = org.unicode.cldr.util.Utility.generateBat("OtherData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
log.println();
log.println(HORIZONTAL_LINE);
@@ -650,13 +649,13 @@ public class GenerateData implements UCD_Types {
}
// accumulate differences
- /*
+
String acc = (String)accumulation.get(toCheck);
if (acc == null) {
acc = "# \"" + toCheck + "\":\t" + originalComment;
}
acc += ";\t" + result;
- */
+
result.add(comment);
accumulation.add("# " + result.toString() + ":\t" + toCheck);
} else {
@@ -673,7 +672,7 @@ public class GenerateData implements UCD_Types {
String newFile = directory + file + UnicodeDataFile.getFileSuffix(true);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
- String mostRecent = UnicodeDataFile.generateBat(directory, file, UnicodeDataFile.getFileSuffix(true), batName);
+ String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, file, UnicodeDataFile.getFileSuffix(true), batName);
doHeader(file + UnicodeDataFile.getFileSuffix(false), output, headerChoice);
int last = -1;
@@ -682,7 +681,7 @@ public class GenerateData implements UCD_Types {
if (up == null) continue;
if (up.skipInDerivedListing()) continue;
- /*
+
if (i == DECOMPOSITION_TYPE || i == NUMERIC_TYPE
|| i == (BINARY_PROPERTIES | Non_break)
|| i == (BINARY_PROPERTIES | CaseFoldTurkishI)
@@ -690,11 +689,11 @@ public class GenerateData implements UCD_Types {
|| i == (JOINING_TYPE | JT_U)
|| i == (JOINING_GROUP | NO_SHAPING)
) continue; // skip zero case
- */
- /*if (skipSpecial == SKIP_SPECIAL
+
+ if (skipSpecial == SKIP_SPECIAL
&& i >= (BINARY_PROPERTIES | CompositionExclusion)
&& i < (AGE + NEXT_ENUM)) continue;
- */
+
if ((last & 0xFF00) != (i & 0xFF00) && (i <= BINARY_PROPERTIES || i >= SCRIPT)) {
output.println();
output.println(HORIZONTAL_LINE);
@@ -741,7 +740,8 @@ public class GenerateData implements UCD_Types {
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
System.out.println();
}
-
+
+ */
static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException {
UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader(directory, fileName);
@@ -750,13 +750,13 @@ public class GenerateData implements UCD_Types {
String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true);
//PrintWriter log = Utility.openPrintWriter(newFile, Utility.UTF8_UNIX);
//String[] batName = {""};
- //String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
+ //String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
String[] example = new String[256];
//log.println("# " + fileName + UnicodeDataFile.getFileSuffix(false));
//log.println(UnicodeDataFile.generateDateLine());
- /*log.println("#");
+ log.println("#");
log.println("# Normalization Test Suite");
log.println("# Format:");
log.println("#");
@@ -790,7 +790,7 @@ public class GenerateData implements UCD_Types {
log.println("#");
log.println("@Part0 # Specific cases");
- log.println("#");*/
+ log.println("#");
for (int j = 0; j < testSuiteCases.length; ++j) {
writeLine(testSuiteCases[j], log, false);
@@ -897,6 +897,7 @@ public class GenerateData implements UCD_Types {
fc.close();
//Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
}
+ /*
static void handleIdentical() throws IOException {
DirectoryIterator target = new DirectoryIterator(GEN_DIR + File.separator + "DerivedData");
@@ -916,6 +917,7 @@ public class GenerateData implements UCD_Types {
}
}
+*/
static void writeLine(String cc, PrintWriter log, boolean check) {
String c = Default.nfc().normalize(cc);
String d = Default.nfd().normalize(cc);
@@ -982,14 +984,14 @@ public class GenerateData implements UCD_Types {
"\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD"
};
-
+ /*
static final void backwardsCompat(String directory, String filename, int[] list) throws IOException {
String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
- String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
+ String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
DiffPropertyLister dpl;
UnicodeSet cummulative = new UnicodeSet();
@@ -1072,7 +1074,7 @@ public class GenerateData implements UCD_Types {
String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
- String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
+ String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
try {
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
@@ -1116,7 +1118,7 @@ public class GenerateData implements UCD_Types {
log.println(HORIZONTAL_LINE);
log.println();
new DiffPropertyLister("3.2.0", "4.0.0", log).print();
- /*
+
printDiff("110", "200");
UnicodeSet u11 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-1.1.txt", false);
UnicodeSet u20 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.0.txt", false);
@@ -1157,7 +1159,7 @@ public class GenerateData implements UCD_Types {
+ n.format(u31m.count()));
log.println();
u31m.print(log, false, false, "3.1");
- */
+
} finally {
if (log != null) {
log.close();
@@ -1326,5 +1328,5 @@ public class GenerateData implements UCD_Types {
+ (start != end ? ".." + Default.ucd().getName(end) : ""));
}
System.out.println("TrailingZero count: " + result.size());
- }
+ }*/
}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java b/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java
index 3ba8181c18f..496d8042cc4 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $
-* $Date: 2004/02/12 08:23:15 $
-* $Revision: 1.5 $
+* $Date: 2005/10/11 19:39:15 $
+* $Revision: 1.6 $
*
*******************************************************************************
*/
@@ -99,8 +99,8 @@ public final class GenerateStandardizedVariants implements UCD_Types {
String directory = "DerivedData/";
String filename = directory + "StandardizedVariants" + UnicodeDataFile.getHTMLFileSuffix(true);
PrintWriter out = Utility.openPrintWriter(filename, Utility.LATIN1_UNIX);
- String[] batName = {""};
- String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
+ //String[] batName = {""};
+ //String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
String version = Default.ucd().getVersion();
int lastDot = version.lastIndexOf('.');
@@ -118,6 +118,6 @@ public final class GenerateStandardizedVariants implements UCD_Types {
Utility.appendFile("StandardizedVariants-Template.html", Utility.UTF8, out, replacementList);
out.close();
- Utility.renameIdentical(mostRecent, Utility.getOutputName(filename), batName[0]);
+ //Utility.renameIdentical(mostRecent, Utility.getOutputName(filename), batName[0]);
}
}
diff --git a/tools/unicodetools/com/ibm/text/UCD/Main.java b/tools/unicodetools/com/ibm/text/UCD/Main.java
index dfaad4d993a..c059d8d1c74 100644
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
-* $Date: 2004/10/14 17:54:56 $
-* $Revision: 1.35 $
+* $Date: 2005/10/11 19:39:15 $
+* $Revision: 1.36 $
*
*******************************************************************************
*/
@@ -143,7 +143,7 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);
else if (arg.equalsIgnoreCase("checkcollator")) CheckCollator.main(null);
- else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
+ //else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
else if (arg.equalsIgnoreCase("iana")) IANANames.testSensitivity();
else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
@@ -157,7 +157,7 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("VerifyIDN")) VerifyUCD.VerifyIDN();
else if (arg.equalsIgnoreCase("NFTest")) VerifyUCD.NFTest();
else if (arg.equalsIgnoreCase("test1")) VerifyUCD.test1();
- else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
+ //else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
else if (arg.equalsIgnoreCase("GenerateThaiBreaks")) GenerateThaiBreaks.main(null);
else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]});
@@ -168,9 +168,9 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
- else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
+ //else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.checkNameList();
- else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
+ //else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
else if (arg.equalsIgnoreCase("Compare14652")) Compare14652.main(null);
@@ -182,7 +182,7 @@ public final class Main implements UCD_Types {
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
*/
// EXTRACTED PROPERTIES
-
+ /*
else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedBidiClass");
@@ -230,8 +230,9 @@ public final class Main implements UCD_Types {
} else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
GenerateData.generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedNumericValues" );
-
- } else if (arg.equalsIgnoreCase("StandardizedVariants")) {
+ }
+ */
+ else if (arg.equalsIgnoreCase("StandardizedVariants")) {
GenerateStandardizedVariants.generate();
// OTHER STANDARD PROPERTIES
@@ -244,7 +245,7 @@ public final class Main implements UCD_Types {
GenerateCaseFolding.generateSpecialCasing(true);
GenerateCaseFolding.generateSpecialCasing(false);
- } else if (arg.equalsIgnoreCase("CompositionExclusions")) {
+ /* } else if (arg.equalsIgnoreCase("CompositionExclusions")) {
GenerateData.generateCompExclusions();
} else if (arg.equalsIgnoreCase("DerivedAge")) {
@@ -305,7 +306,7 @@ public final class Main implements UCD_Types {
} else if (arg.equalsIgnoreCase("listKatakana")) {
GenerateData.listKatakana();
-
+*/
/*
} else if (arg.equalsIgnoreCase("DerivedFullNormalization")) {
mask = Utility.setBits(0, DerivedProperty.GenNFD, DerivedProperty.GenNFKC);
diff --git a/tools/unicodetools/com/ibm/text/UCD/QuickTest.java b/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
index 48eba3058aa..fbb4bab907b 100644
--- a/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
-* $Date: 2005/06/24 23:51:52 $
-* $Revision: 1.6 $
+* $Date: 2005/10/11 19:39:15 $
+* $Revision: 1.7 $
*
*******************************************************************************
*/
@@ -17,6 +17,11 @@ import java.util.*;
import java.io.*;
import com.ibm.icu.dev.test.util.BagFormatter;
+import com.ibm.icu.dev.test.util.UnicodeMap;
+import com.ibm.icu.dev.test.util.UnicodeProperty;
+import com.ibm.icu.dev.test.util.UnicodePropertySource;
+import com.ibm.icu.dev.test.util.UnicodeMap.MapIterator;
+import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
@@ -24,7 +29,77 @@ import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.text.utility.*;
public class QuickTest implements UCD_Types {
+ public static void main(String[] args) throws IOException {
+ getBidiMirrored();
+ if (true) return;
+ getLengths("NFC", Default.nfc());
+ getLengths("NFD", Default.nfd());
+ getLengths("NFKC", Default.nfkc());
+ getLengths("NFKD", Default.nfkd());
+ System.out.println("Done");
+ }
+
+
+ private static void getBidiMirrored() {
+ ToolUnicodePropertySource foo = ToolUnicodePropertySource.make("");
+ UnicodeMap status = new UnicodeMap();
+ status.putAll(foo.getSet("generalcategory=ps"), "*open/close*");
+ status.putAll(foo.getSet("generalcategory=pe"), "*open/close*");
+ status.putAll(foo.getSet("generalcategory=pi"), "*open/close*");
+ status.putAll(foo.getSet("generalcategory=pf"), "*open/close*");
+
+ UnicodeSet bidiMirroredSet = foo.getSet("bidimirrored=true");
+ status.putAll(bidiMirroredSet, "*core*");
+ UnicodeSet bidiMirroringSet = new UnicodeSet();
+ UnicodeProperty x = foo.getProperty("bidimirroringglyph");
+ for (int i = 0; i < 0x10FFFF; ++i) {
+ String s = x.getValue(i);
+ if (!s.equals(UTF16.valueOf(i))) bidiMirroringSet.add(i);
+ }
+ status.putAll(new UnicodeSet(bidiMirroredSet).removeAll(bidiMirroringSet), "no bidi mirroring");
+ UnicodeSet mathSet = foo.getSet("generalcategory=sm");
+ status.putAll(mathSet, "math");
+
+ UnicodeSet special = new UnicodeSet("[<>]");
+ for (UnicodeSetIterator it = new UnicodeSetIterator(mathSet); it.next();) {
+ String s = Default.nfkd().normalize(it.codepoint);
+ if (special.containsSome(s)) status.put(it.codepoint, "*special*");
+ }
+ //showStatus(status);
+ // close under nfd
+ for (int i = 0; i < 0x10FFFF; ++i) {
+ if (!Default.ucd().isAssigned(i)) continue;
+ if (!Default.ucd().isPUA(i)) continue;
+ if (Default.nfkc().isNormalized(i)) continue;
+ String oldValue = (String) status.getValue(i);
+ if (oldValue != null) continue;
+ String s = Default.nfkc().normalize(i);
+ if (UTF16.countCodePoint(s) != 1) continue;
+ int cp = UTF16.charAt(s, 0);
+ String value = (String)status.getValue(cp);
+ if (value != null) status.put(i, "nfc-closure-" + value);
+ }
+ showStatus(status, bidiMirroredSet);
+ }
+
+ static BagFormatter bf = new BagFormatter();
+ private static void showStatus(UnicodeMap status, UnicodeSet x) {
+ Collection list = new TreeSet(status.getAvailableValues());
+ for (Iterator it = list.iterator(); it.hasNext(); ) {
+ String value = (String) it.next();
+ if (value == null) continue;
+ UnicodeSet set = status.getSet(value);
+ for (UnicodeSetIterator umi = new UnicodeSetIterator(set); umi.next();) {
+ System.out.println(Utility.hex(umi.codepoint)
+ + ";\t" + value
+ + ";\t" + (x.contains(umi.codepoint) ? "O" : "")
+ + ";\t" + Default.ucd().getName(umi.codepoint));
+ }
+ }
+ }
+
+
public static class Length {
String title;
int bytesPerCodeUnit;
@@ -50,14 +125,6 @@ public class QuickTest implements UCD_Types {
}
}
- public static void main(String[] args) throws IOException {
- getLengths("NFC", Default.nfc());
- getLengths("NFD", Default.nfd());
- getLengths("NFKC", Default.nfkc());
- getLengths("NFKD", Default.nfkd());
- System.out.println("Done");
- }
-
static final int skip = (1< 2) return false;
+ if (len == 1) return i == value.charAt(0);
+ if (i <= 0xFFFF) return false;
+ return i == UTF16.charAt(value,0);
+ }
+
+ /**
+ *
+ */
+ private static void testHanProp(int iterations, int total, String pname, String type) throws IOException, ClassNotFoundException {
+ System.out.println();
+ UnicodeMap umap = Default.ucd().getHanValue(pname);
+ System.out.println(umap);
+ umap.setMissing("na");
+ System.out.print("Name:\t" + pname + "\tType:\t" + type);
+ total = testUnicodeMapSerialization(iterations, total, pname, umap);
+ }
+
+ static String outdircore = "C:\\DATA\\bin\\UCD_Data";
+ static String outdir = outdircore + "4.1.0\\";
+ /**
+ * @param pname
+ *
+ */
+ private static int testUnicodeMapSerialization(int iterations, int total, String pname, UnicodeMap umap) throws IOException, ClassNotFoundException {
+ System.out.print("\tValue Count:\t" + umap.getAvailableValues().size());
+
+ String filename = outdir + pname + ".bin";
+ OutputStream out;
+ ByteArrayOutputStream baout = null;
+ if (USE_FILE) {
+ out = new FileOutputStream(filename);
+ } else {
+ out = baout = new ByteArrayOutputStream();
+ }
+ out = new GZIPOutputStream(out);
+ ObjectOutputStream oos = new ObjectOutputStream(out);
+ //Random rand = new Random();
+
+/* if (false) {
+ oos.writeObject(umap);
+ oos.close();
+ buffer = baout.toByteArray();
+ in = new ByteArrayInputStream(buffer, 0, baout.size());
+ ois = new ObjectInputStream(in);
+ reverseMap = (UnicodeMap) ois.readObject();
+ }
+*/
+ // UnicodeMap.StreamCompressor sc = new UnicodeMap.StreamCompressor();
+ // int test = (int)Math.abs(rand.nextGaussian()*100000);
+ // System.out.print(Integer.toString(test, 16).toUpperCase());
+ // sc.writeInt(out, test);
+ // out.close();
+ //oos.writeBoolean(true);
+ //oos.writeUTF("abcdefg");
+ oos.writeObject(umap);
+ oos.close();
+
+
+ long size;
+ byte[] buffer;
+ if (USE_FILE) {
+ size = new File(filename).length();
+ } else {
+ size = baout.size();
+ buffer = baout.toByteArray();
+ if (DEBUG) System.out.println(showBuffer(buffer, size));
+ }
+ System.out.print("\t"+"Size:\t" + size);
+
+
+ // only measure read time
+ UnicodeMap reverseMap = null;
+ long start = System.currentTimeMillis();
+ for (int i = iterations; i > 0; --i) {
+ InputStream in;
+ if (USE_FILE) {
+ in = new FileInputStream(filename);
+ } else {
+ in = new ByteArrayInputStream(buffer, 0, (int)size);
+ }
+ in = new GZIPInputStream(in);
+ // int x = sc.readInt(in);
+ // if (x != test) System.out.println("Failure");
+ // System.out.println("\t=> " + Integer.toString(x, 16).toUpperCase());
+ ObjectInputStream ois = new ObjectInputStream(in);
+ //System.out.println(ois.readBoolean());
+ //System.out.println(ois.readUTF());
+
+ try {
+ reverseMap = (UnicodeMap) ois.readObject();
+ } catch (java.io.OptionalDataException e1) {
+ System.out.println(e1.eof + "\t" + e1.length);
+ // TODO Auto-generated catch block
+ e1.printStackTrace();
+ }
+ ois.close();
+ }
+ long end = System.currentTimeMillis();
+
+ if (!reverseMap.equals(umap)) {
+ System.out.println("Failed roundtrip");
+ for (int i = 0; i <= 0x10FFFF; ++i) {
+ String main = (String) umap.getValue(i);
+ String rev = (String) reverseMap.getValue(i);
+ if (UnicodeMap.areEqual(main, rev))
+ continue;
+ System.out.println(Utility.hex(i) + "\t'" + main + "',\t'"
+ + rev + "'");
+ }
+ }
+ //out.toByteArray();
+ total += size;
+ System.out.print("\tTime:\t" + (end - start) / (iterations * 1.0)
+ + "\tmsecs (raw:\t" + ((end - start) / 1000.0) + "\tsecs)");
+ /* with Vanilla Serialization
+ * Size: 24131
+ * Time: 1.9488 msecs (raw: 9.744 secs)
+ * With my serialization
+ * Size: 19353
+ * Time: 0.8652 msecs (raw: 4.326 secs)
+ * With my serialization, and compression of ints
+ * Size: 8602
+ * Time: 2.784 msecs (raw: 1.392 secs)
+ * With delta encoding
+ * Size: 5226
+ * Time: 1.924 msecs (raw: 0.962 secs)
+ * Name:
+ * Size: 776926
+ * Time: 180.3 msecs (raw: 1.803 secs)
+ */
+ return total;
+ }
+
+ /**
+ *
+ */
+ private static String showBuffer(byte[] buffer, long size) {
+ StringBuffer result = new StringBuffer();
+ for (int j = 0; j < size; ++j) {
+ if (j != 0) result.append(' ');
+ result.append(Utility.hex(buffer[j]&0xFF,2));
+ }
+ return result.toString();
+ }
+
+ /**
+ *
+ */
+ private static void testStreamCompressor() throws IOException {
+ Object[] tests = {
+ UTF16.valueOf(0x10FFFF),"\u1234", "abc",
+ new Long(-3), new Long(12345),
+ new Short(Short.MAX_VALUE), new Short(Short.MIN_VALUE),
+ new Integer(Integer.MAX_VALUE), new Integer(Integer.MIN_VALUE),
+ new Long(Long.MIN_VALUE), new Long(Long.MAX_VALUE)};
+
+ for (int i = 0; i < tests.length; ++i) {
+ Object source = tests[i];
+ ByteArrayOutputStream out = new ByteArrayOutputStream(100);
+ ObjectOutputStream out2 = new ObjectOutputStream(out);
+ ByteArrayInputStream in;
+ ObjectInputStream ois;
+ byte[] buffer;
+ DataOutputCompressor sc = new DataOutputCompressor(out2);
+ long y = 0;
+ if (source instanceof String) {
+ sc.writeUTF((String)source);
+ } else {
+ y = ((Number)source).longValue();
+ sc.writeLong(y);
+ }
+ out2.close();
+ buffer = out.toByteArray();
+ showBytes(buffer, out.size());
+ System.out.println();
+ in = new ByteArrayInputStream(buffer, 0, out.size());
+ ObjectInputStream in2 = new ObjectInputStream(in);
+ DataInputCompressor isc = new DataInputCompressor(in2);
+ boolean success = false;
+ Object result;
+ boolean isString = source instanceof String;
+ long x = 0;
+ if (isString) {
+ result = isc.readUTF();
+ System.out.println(i + "\t" + source
+ + "\t" + result
+ + (source.equals(result) ? "\tSuccess" : "\tBitter Failure"));
+ } else {
+ x = isc.readLong();
+ result = new Long(x);
+ System.out.println(i + "\t" + y
+ + x
+ + "\t" + Utility.hex(y)
+ + "\t" + Utility.hex(x)
+ + (x == y ? "\tSuccess" : "\tBitter Failure"));
+ }
+
+ in2.close();
+ }
+ }
+
+ /**
+ *
+ */
+ private static void showBytes(byte[] buffer, int len) {
+ for (int i = 0; i < len; ++i) {
+ System.out.print(Utility.hex(buffer[i]&0xFF,2) + " ");
+ }
+ }
+
+ /**
+ *
+ */
+ private static UnicodeMap fixNameMap(BreakIterator bk, UnicodeMap umap) {
+ UnicodeMap temp = new UnicodeMap();
+ Counter counter = new Counter();
+ for (int i = 0; i < 0x10FFFF; ++i) {
+ String name = (String) umap.getValue(i);
+ if (name == null)
+ continue;
+ if (name.startsWith("CJK UNIFIED IDEOGRAPH-"))
+ name = "*";
+ else if (name.startsWith("CJK COMPATIBILITY IDEOGRAPH-"))
+ name = "#";
+ else if (name.startsWith("HANGUL SYLLABLE ")) name = "@";
+ bk.setText(name);
+ int start = 0;
+ while (true) {
+ int end = bk.next();
+ if (end == bk.DONE)
+ break;
+ String word = name.substring(start, end);
+ counter.add(word, Math.max(0, word.length() - 2));
+ start = end;
+ }
+ temp.put(i, name);
+ }
+ if (false) {
+ Map m = counter.getSortedByCount();
+ int count = 0;
+ int running = 0;
+ for (Iterator it = m.keySet().iterator(); it.hasNext();) {
+ Counter.RWInteger c = (Counter.RWInteger) it.next();
+ String value = (String) m.get(c);
+ running += c.value;
+ System.out.println(count++ + "\t" + c + "\t" + running
+ + "\t" + value);
+ }
+ for (UnicodeMap.MapIterator it2 = new UnicodeMap.MapIterator(
+ temp); it2.nextRange();) {
+ System.out.println(Utility.hex(it2.codepoint) + "\t"
+ + Utility.hex(it2.codepointEnd) + "\t"
+ + it2.value);
+ }
+ }
+ umap = temp;
+ return umap;
+ }
+
+ /**
+ *
+ */
+ private static void tryFileUnicodeProperty() {
+ UnicodeProperty.Factory factory = FileUnicodeProperty.Factory.make("4.1.0");
+ System.out.println(factory.getAvailableNames());
+ UnicodeProperty prop = factory.getProperty("White_Space");
+ System.out.println(prop.getUnicodeMap());
+ prop = factory.getProperty("kRSUnicode");
+ System.out.println();
+ System.out.println(prop.getUnicodeMap());
+ }
+
+ public static class FileUnicodeProperty extends UnicodeProperty {
+ private File file;
+ private String version;
+ private UnicodeMap map;
+
+ private FileUnicodeProperty(File file, String version) {
+ this.file = file;
+ this.version = version;
+ String base = file.getName();
+ setName(base.substring(0, base.length()-4)); // subtract .bin
+ }
+
+ public static class Factory extends UnicodeProperty.Factory {
+ private Factory() {}
+ public static Factory make(String version) {
+ Factory result = new Factory();
+ File f = new File(outdircore + version + "\\");
+ File[] files = f.listFiles();
+ for (int i = 0; i < files.length; ++i) {
+ result.add(new FileUnicodeProperty(files[i], version));
+ }
+ return result;
+ }
+ }
+
+ protected List _getAvailableValues(List result) {
+ if (map == null) make();
+ return (List) map.getAvailableValues(result);
+ }
+
+ protected String _getVersion() {
+ return version;
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.dev.test.util.UnicodeProperty#_getValue(int)
+ */
+ protected String _getValue(int codepoint) {
+ if (map == null) make();
+ return (String)map.getValue(codepoint);
+ }
+
+ /**
+ *
+ */
+ private void make() {
+ try {
+ InputStream in = new FileInputStream(file.getCanonicalPath());
+ ObjectInputStream ois = new ObjectInputStream(in);
+ map = (UnicodeMap) ois.readObject();
+ ois.close();
+ } catch (Exception e) {
+ throw (InternalError)new InternalError("Can't create property").initCause(e);
+ }
+ }
+
+ protected List _getNameAliases(List result) {
+ result.add(getName());
+ return result;
+ }
+
+ protected List _getValueAliases(String valueAlias, List result) {
+ return result;
}
}
- */
}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java b/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
index f2fe443767e..881197e45f0 100644
--- a/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
+++ b/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
@@ -35,7 +35,7 @@ public class UnicodeDataFile {
String[] batName2 = {""};
mostRecent = UnicodeDataFile.generateBat(directory, filename, newSuffix, fileType, batName2);
batName = batName2[0];
- filename = filename;
+ this.filename = filename;
if (!isHTML) {
out.println("# " + filename + UnicodeDataFile.getFileSuffix(false));