diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
index 1637d1180e7..ceecc154718 100644
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
-* $Date: 2002/10/03 22:58:17 $
-* $Revision: 1.13 $
+* $Date: 2002/10/05 01:28:56 $
+* $Revision: 1.14 $
*
*******************************************************************************
*/
@@ -921,14 +921,35 @@ public class WriteCharts implements UCD_Types {
+ "
" + Utility.hex(comp) + "";
}
+
public static void writeAllocation() throws IOException {
Default.setUCD();
+ String[] names = new String[300]; // HACK, 300 is plenty for now. Fix if it ever gets larger
+ int[] starts = new int[names.length];
+ int[] ends = new int[names.length];
+
+ UCD.BlockData blockData = new UCD.BlockData();
+
int counter = 0;
- UnicodeSet[] values = new UnicodeSet[500];
- String[] names = new String[values.length];
- int[] starts = new int[values.length];
- int[] ends = new int[values.length];
+ int blockId = 0;
+ while (Default.ucd.getBlockData(blockId++, blockData)) {
+ names[counter] = blockData.name;
+ starts[counter] = blockData.start;
+ ends[counter] = blockData.end;
+ //System.out.println(names[counter] + ", " + values[counter]);
+ ++counter;
+
+ // HACK
+ if (blockData.name.equals("Tags")) {
+ names[counter] = "reserved default ignorable";
+ starts[counter] = 0xE0080;
+ ends[counter] = 0xE0FFF;
+ ++counter;
+ }
+ }
+
+ /*
BufferedReader in = Utility.openUnicodeFile("Blocks", "", true, false);
try {
while (true) {
@@ -947,42 +968,79 @@ public class WriteCharts implements UCD_Types {
ends[counter] = end;
//System.out.println(names[counter] + ", " + values[counter]);
++counter;
+
+ // HACK
+ if (name.equals("Tags")) {
+ names[counter] = "reserved default ignorable";
+ values[counter] = new UnicodeSet(0xE0080, 0xE0FFF);
+ starts[counter] = 0xE0080;
+ ends[counter] = 0xE0FFF;
+ ++counter;
+ }
}
} finally {
in.close();
}
+ */
- PrintWriter out = Utility.openPrintWriter("Allocation.html", Utility.LATIN1_WINDOWS);
+
+ /*
+ Graphic
+ Format
+ Control
+ Private Use
+ Surrogate
+ Noncharacter
+ Reserved (default ignorable)
+ Reserved (other)
+ */
+
+ PrintWriter out = Utility.openPrintWriter("allocation.html", Utility.LATIN1_WINDOWS);
try {
out.println("
");
out.println("Unicode Allocation");
out.println("");
- out.println("");
- out.println("Start | Block Name | Size |
");
- UnicodeSetIterator it = new UnicodeSetIterator();
- int lastEnd = -1;
- for (int i = 0; i < counter; ++i) {
- if (starts[i] != lastEnd + 1) {
- drawAllocation(out, lastEnd + 1, "reserved", starts[i] - lastEnd + 1, 0);
+ for (int textOnly = 0; textOnly < 2; ++textOnly) {
+ out.println(""); // width='100%'
+ if (textOnly == 0) {
+ out.println("Start | Block Name | Size |
");
+ } else {
+ out.println("Block Name | Start | Total | Assigned |
");
}
- int total = values[i].size();
- int alloc = 0;
- it.reset(values[i]);
- while (it.nextRange()) {
- for (int j = it.codepoint; j <= it.codepointEnd; ++j) {
+ int lastEnd = -1;
+ for (int i = 0; i < counter; ++i) {
+ if (starts[i] != lastEnd + 1) {
+ drawAllocation(out, lastEnd + 1, "reserved", starts[i] - lastEnd + 1, 0, "#000000", "#000000", textOnly);
+ }
+ int total = ends[i] - starts[i] + 1;
+ int alloc = 0;
+ for (int j = starts[i]; j <= ends[i]; ++j) {
if (Default.ucd.isAllocated(j)) ++alloc;
}
+ //System.out.println(names[i] + "\t" + alloc + "\t" + total);
+ String color = names[i].indexOf("Surrogates") >= 0 ? "#FF0000"
+ : names[i].indexOf("Private") >= 0 ? "#0000FF"
+ : "#00FF00";
+ String colorReserved = names[i].indexOf("reserved default ignorable") >= 0 ? "#CCCCCC"
+ : "#000000";
+ drawAllocation(out, starts[i], names[i], total, alloc, color, colorReserved, textOnly);
+ lastEnd = ends[i];
}
- System.out.println(names[i] + "\t" + alloc + "\t" + total);
- drawAllocation(out, starts[i], names[i], total, alloc);
- lastEnd = ends[i];
+ out.println("
");
}
- out.println("
");
- out.println("This chart lists all the Unicode blocks and their starting code points. "
- + "The area of each bar is proportional to the total number of code points in each block, "
- + "with green for the proportion of assigned code points. "
+ out.println("
Key
This chart lists all the Unicode blocks and their starting code points. "
+ + "The area of each bar is proportional to the total number of code points in each block. "
+ + "The colors have the following significance:
"
+ + "
"
+ + "Green | Graphic, Control, Format, Noncharacter* code points |
"
+ + "Red | Surrogate code points |
"
+ + "Blue | Private Use code points |
"
+ + "Gray | Reserved (default ignorable) code points |
"
+ + "Black | Reserved (other) code points |
"
+ + "
"
+ + "* Control, Format, and Noncharacter are not distinguished from Graphic characters by color, since they are mixed into other blocks. "
+ "Tooltips on the bars show the total number of code points and the number assigned. "
+ "(Remember that assigned code points are not necessarily assigned characters.)"
+ "");
@@ -997,23 +1055,27 @@ public class WriteCharts implements UCD_Types {
static NumberFormat nf = NumberFormat.getNumberInstance(Locale.US);
static {nf.setMaximumFractionDigits(0);}
- static void drawAllocation(PrintWriter out, int start, String title, int total, int alloc) {
- int unalloc = total - alloc;
-
- double totalWidth = longestBar*(Math.sqrt(total) / Math.sqrt(longestBlock));
- double allocWidth = alloc * totalWidth / total;
- double unallocWidth = totalWidth - allocWidth;
-
- out.println("" + Utility.hex(start)
- + " | " + title
- + " | ");
-
- if (alloc != 0) out.println(" | ");
- if (unalloc != 0) out.println(" | ");
- out.println("
|
");
+ static void drawAllocation(PrintWriter out, int start, String title, int total, int alloc, String color, String colorReserved, int textOnly) {
+ if (textOnly == 0) {
+ int unalloc = total - alloc;
+
+ double totalWidth = longestBar*(Math.sqrt(total) / Math.sqrt(longestBlock));
+ double allocWidth = alloc * totalWidth / total;
+ double unallocWidth = totalWidth - allocWidth;
+
+ out.println("" + Utility.hex(start)
+ + " | " + title
+ + " | ");
+
+ if (alloc != 0) out.println(" | ");
+ if (unalloc != 0) out.println(" | ");
+ out.println("
|
");
+ } else {
+ out.println("" + title + " | " + start + " | " + total + " | " + alloc + " |
");
+ }
}
}
diff --git a/tools/unicodetools/com/ibm/text/UCD/CaseTestHeader.txt b/tools/unicodetools/com/ibm/text/UCD/CaseTestHeader.txt
new file mode 100644
index 00000000000..1a80bf523aa
--- /dev/null
+++ b/tools/unicodetools/com/ibm/text/UCD/CaseTestHeader.txt
@@ -0,0 +1,47 @@
+#
+# This file is used to test (1) case conversion, (2) case detection,
+# and (3) case-insensitive matching.
+# (1) is represented below by function names such as toLower(),
+# (2) is represented below by function names such as isLower().
+# (3) is represented below by the function name equalsCaseInsensitive().
+# (The actual function names will vary depending on software language and/or library.)
+#
+# The test cases also check whether canonical equivalence is preserved
+# by these functions.
+#
+# Format:
+# ; ; ; ; (# )?
+#
+# Test:
+#
+# A. For each line:
+# 1. Verify the following equalities:
+# lower == toLower(src)
+# upper == toUpper(src)
+# title == toTitle(src)
+# fold == toFold(src)
+# 2. Verify that all of the following are true:
+# isLower(toLower(lower))
+# isUpper(toUpper(upper))
+# isTitle(toTitle(title))
+# isFold(toTitle(fold))
+# 3. Verify that all of the following are true:
+# equalsCaseInsensitive(src, lower)
+# equalsCaseInsensitive(src, upper)
+# equalsCaseInsensitive(src, title)
+# equalsCaseInsensitive(src, fold)
+#
+# B. For each code point that is NOT listed as a src:
+# 1. Verify the following equalities:
+# src == toLower(src) == toUpper(src) == toTitle(src) == toFold(src)
+# 2. Verify that all of the following are true:
+# isLower(toLower(lower))
+# isUpper(toUpper(upper))
+# isTitle(toTitle(title))
+# isFold(toTitle(fold))
+# 3. Verify that all of the following are true:
+# equalsCaseInsensitive(src, lower)
+# equalsCaseInsensitive(src, upper)
+# equalsCaseInsensitive(src, title)
+# equalsCaseInsensitive(src, fold)
+#
diff --git a/tools/unicodetools/com/ibm/text/UCD/Charts.java b/tools/unicodetools/com/ibm/text/UCD/Charts.java
new file mode 100644
index 00000000000..991351f79e5
--- /dev/null
+++ b/tools/unicodetools/com/ibm/text/UCD/Charts.java
@@ -0,0 +1,25 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2001, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Charts.java,v $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+
+package com.ibm.text.UCD;
+import com.ibm.icu.text.UnicodeSet;
+import java.io.*;
+
+import java.util.*;
+import com.ibm.icu.text.UTF16;
+
+import com.ibm.text.utility.*;
+
+
+public class Charts {
+}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/UCD/CodePointProperty.java b/tools/unicodetools/com/ibm/text/UCD/CodePointProperty.java
new file mode 100644
index 00000000000..2e69616a807
--- /dev/null
+++ b/tools/unicodetools/com/ibm/text/UCD/CodePointProperty.java
@@ -0,0 +1,106 @@
+package com.ibm.text.UCD;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.text.utility.*;
+import java.util.*;
+
+// Enumerated properties will be IntCodePointProperty.
+// The string values they return will be the property value names.
+// Binary properties are Enumerated properties. They return 0 or 1
+
+abstract public class CodePointProperty {
+ // styles for names and string values
+ static final byte SHORT = 0, DEFAULT = 1, LONG = 2, NORMAL_LIMIT = 3;
+
+ // gets the property name
+ abstract public String getName(byte style);
+
+ // value may also be numeric, etc, but this returns string equivalent.
+ abstract public String getValue(int codePoint, byte style);
+
+ // returns true if the code point has the value
+ // works with any style that getValue takes
+ abstract public boolean hasValue(int codePoint, String value);
+
+ // returns the set of all code points with that value.
+ // same effect as using hasValue one by one, but faster internal implementation
+ abstract public UnicodeSet getSet(String value);
+
+ // returns a list of all possible values
+ // logically the same as looping from 0..10FFFF with getValue and getStyleLimit,
+ // and throwing out duplicates, but much faster.
+ static Iterator getAllValues(byte style) {
+ return null;
+ }
+
+ // gets top value style available for this property
+ public byte getStyleLimit(byte style) {
+ return NORMAL_LIMIT;
+ }
+
+ // returns true if the value is known to be uniform over a type.
+ // this is used for various optimizations, especially for Cn & Co
+ public boolean isUniformOverCategory(byte generalCategory) {
+ return false;
+ }
+
+ // subclasses
+
+ static abstract public class IntCodePointProperty extends CodePointProperty {
+ abstract int getNumericValue(int codePoint);
+ abstract int getMaxValue();
+ abstract int getMinValue();
+ static Iterator getAllNumericValues() {
+ return null;
+ }
+ }
+
+ static abstract public class DoubleCodePointProperty extends CodePointProperty {
+ abstract double getNumericValue(int codePoint);
+ abstract double getMaxValue();
+ abstract double getMinValue();
+ static Iterator getAllNumericValues() {
+ return null;
+ }
+ }
+
+ // registration and lookup
+
+ // register a new property
+ static void register(CodePointProperty newProp) {
+ //...
+ }
+
+ // finds a registered property by name
+ static CodePointProperty getInstance(String name) {
+ return null;
+ }
+
+ // returns a list of all registered properties
+ static Iterator getAllRegistered() {
+ return null;
+ }
+
+ // UnicodeSet would use these internally to handle properties. That is, when
+ // it encountered ... [:name=value:] ...
+ // it would do:
+ // CodePointProperty x = getInstance(name);
+ // if (x != null) doError(name, value);
+ // UnicodeSet s = x.getSet(value);
+ // and then use s.
+
+ // open issue: we could have a property like: contains("dot")
+ // in that case, we would register "contains" as the 'base' name,
+ // but allow lookup with string parameters ("dot")
+ // Maybe just adding:
+
+ public boolean hasParameters() {
+ return false;
+ }
+ public void setParameters(String parameters) {}
+ public String getParameters() {
+ return null;
+ }
+
+ // that way we could have [[:letter:]&[:contains(dot):]]
+
+}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
index f4c00f7277f..1f6e3657148 100644
--- a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
-* $Date: 2002/06/13 21:14:05 $
-* $Revision: 1.8 $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.9 $
*
*******************************************************************************
*/
@@ -331,7 +331,7 @@ public final class ConvertUCD implements UCD_Types {
static void readBlocks() throws Exception {
System.out.println("Reading 'Blocks'");
- BufferedReader input = Utility.openUnicodeFile(blocksname, version, true, false);
+ BufferedReader input = Utility.openUnicodeFile(blocksname, version, true, Utility.LATIN1);
String line = "";
try {
String[] parts = new String[20];
@@ -376,7 +376,7 @@ public final class ConvertUCD implements UCD_Types {
}
String tempVersion = version;
if (version.equals(UCD.latestVersion)) tempVersion = "";
- BufferedReader input = Utility.openUnicodeFile(labels[0], tempVersion, true, false);
+ BufferedReader input = Utility.openUnicodeFile(labels[0], tempVersion, true, Utility.LATIN1);
if (input == null) {
System.out.println("COULDN'T OPEN: " + labels[0]);
return;
@@ -834,7 +834,7 @@ public final class ConvertUCD implements UCD_Types {
uData.numericType = Utility.lookup(fieldValue, UCD_Names.NT, true);
} else if (fieldName.equals("ea")) {
- uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.EA, true);
+ uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.SHORT_EA, true);
} else if (fieldName.equals("lb")) {
uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LB, true);
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
index 832bad46f83..cb7c6fe5734 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
-* $Date: 2002/07/30 09:56:41 $
-* $Revision: 1.11 $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.12 $
*
*******************************************************************************
*/
@@ -63,7 +63,7 @@ public class GenerateCaseFolding implements UCD_Types {
out.println("# CaseFolding" + GenerateData.getFileSuffix(false));
out.println(GenerateData.generateDateLine());
out.println("#");
- Utility.appendFile("CaseFoldingHeader.txt", false, out);
+ Utility.appendFile("CaseFoldingHeader.txt", Utility.LATIN1, out);
/*
PrintWriter out = new PrintWriter(
@@ -561,7 +561,7 @@ public class GenerateCaseFolding implements UCD_Types {
out.println("# SpecialCasing" + GenerateData.getFileSuffix(false));
out.println(GenerateData.generateDateLine());
out.println("#");
- Utility.appendFile("SpecialCasingHeader.txt", true, out);
+ Utility.appendFile("SpecialCasingHeader.txt", Utility.UTF8, out);
Iterator it = sorted.keySet().iterator();
int lastOrder = -1;
@@ -584,7 +584,7 @@ public class GenerateCaseFolding implements UCD_Types {
case 3: out.println("# Ligatures"); break;
case 4: skipLine = true; break;
case 5: out.println("# No corresponding uppercase precomposed character"); break;
- case 6: Utility.appendFile("SpecialCasingIota.txt", true, out); break;
+ case 6: Utility.appendFile("SpecialCasingIota.txt", Utility.UTF8, out); break;
case 7: out.println("# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases"); break;
case 8: skipLine = true; break;
}
@@ -592,7 +592,7 @@ public class GenerateCaseFolding implements UCD_Types {
}
out.println(line);
}
- Utility.appendFile("SpecialCasingFooter.txt", true, out);
+ Utility.appendFile("SpecialCasingFooter.txt", Utility.UTF8, out);
out.close();
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile));
}
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java
new file mode 100644
index 00000000000..ee6686e071f
--- /dev/null
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java
@@ -0,0 +1,94 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2001, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java,v $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+
+package com.ibm.text.UCD;
+
+import java.util.*;
+import java.io.*;
+
+import com.ibm.text.utility.*;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+abstract public class GenerateCaseTest implements UCD_Types {
+
+ public static void main(String[] args) throws IOException {
+ System.out.println("Remember to add length marks (half & full) and other punctuation for sentence, with FF61");
+ Default.setUCD();
+
+ PrintWriter out = Utility.openPrintWriter("CaseTest.txt", Utility.UTF8_WINDOWS);
+
+ out.println("# CaseTest");
+ out.println("# Generated: " + Default.getDate() + ", MED");
+ Utility.appendFile("CaseTestHeader.txt", Utility.LATIN1, out);
+
+ for (int cp = 0; cp < 0x10FFFF; ++cp) {
+ Utility.dot(cp);
+ if (!Default.ucd.isAllocated(cp)) continue;
+ if (Default.ucd.isHangulSyllable(cp)) continue;
+ byte cat = Default.ucd.getCategory(cp);
+ if (cp == PRIVATE_USE) continue;
+
+ String lower = Default.ucd.getCase(cp, FULL, LOWER);
+ String upper = Default.ucd.getCase(cp, FULL, UPPER);
+ String title = Default.ucd.getCase(cp, FULL, TITLE);
+ String fold = Default.ucd.getCase(cp, FULL, FOLD);
+ if (lower.equals(upper)
+ && lower.equals(title)
+ && lower.equals(fold)) continue;
+
+ String s = UTF16.valueOf(cp);
+ write(out, s, true);
+
+ // if (cp == '\u0345') continue; // don't add combining for this special case
+
+ s = s + testChar;
+
+ String s2 = Default.nfd.normalize(s);
+
+ String lower1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, LOWER));
+ String upper1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, UPPER));
+ String title1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, TITLE));
+ String fold1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, FOLD));
+
+ if (lower1.equals(Default.nfc.normalize(lower+testChar))
+ && upper1.equals(Default.nfc.normalize(upper+testChar))
+ && title1.equals(Default.nfc.normalize(title+testChar))
+ && fold1.equals(Default.nfc.normalize(fold+testChar))
+ ) continue;
+
+ write(out, s, true);
+ }
+ out.println("# total lines: " + counter);
+ out.close();
+ }
+
+ static final char testChar = '\u0316';
+ static int counter = 0;
+
+ static void write(PrintWriter out, String ss, boolean doComment) {
+ String s = Default.nfd.normalize(ss);
+ String lower = Default.nfc.normalize(Default.ucd.getCase(s, FULL, LOWER));
+ String upper = Default.nfc.normalize(Default.ucd.getCase(s, FULL, UPPER));
+ String title = Default.nfc.normalize(Default.ucd.getCase(s, FULL, TITLE));
+ String fold = Default.nfc.normalize(Default.ucd.getCase(s, FULL, FOLD));
+ out.println(Utility.hex(ss) + "; "
+ + Utility.hex(lower) + "; "
+ + Utility.hex(upper) + "; "
+ + Utility.hex(title) + "; "
+ + Utility.hex(fold)
+ + (doComment ? "\t# " + Default.ucd.getName(ss) : "")
+ );
+ counter++;
+ }
+}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
index eeb0a11cebd..c39a0c8055c 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
-* $Date: 2002/07/30 09:56:41 $
-* $Revision: 1.22 $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.23 $
*
*******************************************************************************
*/
@@ -545,6 +545,10 @@ public class GenerateData implements UCD_Types {
if (i == (BINARY_PROPERTIES | CaseFoldTurkishI)) continue;
if (i == (BINARY_PROPERTIES | Non_break)) continue;
+ if (type == NUMERIC_TYPE) {
+ //System.out.println("debug");
+ }
+
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
if (up == null) continue;
if (!up.isStandard()) continue;
@@ -587,8 +591,9 @@ public class GenerateData implements UCD_Types {
}
valueAbb = up.getValue(SHORT);
- if (valueAbb.length() == 0) valueAbb = "n/a";
valueAbb = Utility.getUnskeleton(valueAbb, false);
+ if (valueAbb.length() == 0) valueAbb = "n/a";
+ //else if (valueAbb.equals(value)) valueAbb = "n/a";
if (type == COMBINING_CLASS) {
@@ -643,6 +648,13 @@ public class GenerateData implements UCD_Types {
}
}
+ UCD.BlockData blockData = new UCD.BlockData();
+
+ int blockId = 0;
+ while (Default.ucd.getBlockData(blockId++, blockData)) {
+ addLine(sorted, "blk", "n/a", blockData.name);
+ }
+
String filename = "PropertyAliases";
String newFile = "DerivedData/" + filename + getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
@@ -651,7 +663,7 @@ public class GenerateData implements UCD_Types {
log.println("# " + filename + getFileSuffix(false));
log.println(generateDateLine());
log.println("#");
- Utility.appendFile("PropertyAliasHeader.txt", false, log);
+ Utility.appendFile("PropertyAliasHeader.txt", Utility.LATIN1, log);
log.println(HORIZONTAL_LINE);
log.println();
Utility.print(log, sorted, "\r\n", new MyBreaker(true));
@@ -667,7 +679,7 @@ public class GenerateData implements UCD_Types {
log.println("# " + filename + getFileSuffix(false));
log.println(generateDateLine());
log.println("#");
- Utility.appendFile("PropertyValueAliasHeader.txt", false, log);
+ Utility.appendFile("PropertyValueAliasHeader.txt", Utility.LATIN1, log);
log.println(HORIZONTAL_LINE);
log.println();
Utility.print(log, sorted, "\r\n", new MyBreaker(false));
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
index 54195ea7625..f6af4ac20ef 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
-* $Date: 2002/08/04 21:38:45 $
-* $Revision: 1.9 $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.10 $
*
*******************************************************************************
*/
@@ -45,7 +45,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
log = Utility.openPrintWriter("Unihan_log.html", Utility.UTF8_WINDOWS);
log.println("");
- BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, true);
+ BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
Map properties = new TreeMap();
@@ -502,7 +502,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
if (type == CHINESE) {
System.out.println("Reading chinese_frequency.txt");
- br = Utility.openReadFile(BASE_DIR + "dict\\chinese_frequency.txt", true);
+ br = Utility.openReadFile(BASE_DIR + "dict\\chinese_frequency.txt", Utility.UTF8);
counter = 0;
while (true) {
line = Utility.readDataLine(br);
@@ -521,7 +521,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
if (type == JAPANESE) {
System.out.println("Reading japanese_frequency.txt");
- br = Utility.openReadFile( BASE_DIR + "dict\\japanese_frequency.txt", true);
+ br = Utility.openReadFile( BASE_DIR + "dict\\japanese_frequency.txt", Utility.UTF8);
Map japaneseMap = new HashMap();
while (true) {
line = Utility.readDataLine(br);
@@ -704,7 +704,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
if (type == JAPANESE) fname = "edict.txt";
System.out.println("Reading " + fname);
- BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, true);
+ BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, Utility.UTF8);
int counter = 0;
String[] pieces = new String[50];
String line = "";
@@ -751,7 +751,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
String fname = "Chinese_override.txt";
System.out.println("Reading " + fname);
- BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, true);
+ BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\" + fname, Utility.UTF8);
int counter = 0;
String[] pieces = new String[50];
String line = "";
@@ -997,7 +997,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
static void readCDICT() throws IOException {
System.out.println("Reading cdict.txt");
- BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\cdict.txt", true);
+ BufferedReader br = Utility.openReadFile(BASE_DIR + "dict\\cdict.txt", Utility.UTF8);
int counter = 0;
String[] pieces = new String[50];
String line = "";
@@ -1075,7 +1075,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
static void readUnihanData(String key) throws java.io.IOException {
- BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, true);
+ BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
int count = 0;
int lineCounter = 0;
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java b/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java
new file mode 100644
index 00000000000..c89f330b005
--- /dev/null
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java
@@ -0,0 +1,74 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2001, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java,v $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+
+package com.ibm.text.UCD;
+import java.io.*;
+import com.ibm.text.utility.*;
+import com.ibm.text.UnicodeSet;
+import java.util.*;
+
+public class GenerateThaiBreaks {
+ public static void main(String [] args) throws IOException {
+
+ BufferedReader br = new BufferedReader(
+ new InputStreamReader(
+ new FileInputStream("\\icu4j\\src\\data\\thai6.ucs"), "UnicodeLittle"));
+ try {
+ Main.setUCD();
+ UnicodeSet ignorables = new UnicodeSet("[:M:]");
+ ignorables.retain(0x0E00, 0x0E7F); // just Thai block
+ ignorables.add(0x0E40, 0x0E44); // add logical order exception
+ ignorables.add(0, ' '); // add controls
+ ignorables.add('.');
+
+ UnicodeSet initials = new UnicodeSet();
+ UnicodeSet finals = new UnicodeSet();
+ UnicodeSet medials = new UnicodeSet();
+ while (true) {
+ String line = br.readLine();
+ if (line == null) break;
+ int end;
+
+ // find final consonant
+ for (int i = line.length() - 1; ; --i) {
+ char c = line.charAt(i);
+ if (!ignorables.contains(c)) {
+ finals.add(c);
+ end = i;
+ break;
+ }
+ }
+
+ boolean haveFirst = false;
+ for (int i = 0; i < end; ++i) {
+ char c = line.charAt(i);
+ if (ignorables.contains(c)) continue;
+ if (!haveFirst) {
+ initials.add(c);
+ haveFirst = true;
+ } else {
+ medials.add(c);
+ }
+ }
+ }
+
+ initials.removeAll(medials);
+ finals.removeAll(medials);
+ Utility.showSetNames("initials: ", initials, false, Main.ucd);
+ Utility.showSetNames("finals: ", finals, false, Main.ucd);
+ Utility.showSetNames("medials: ", medials, false, Main.ucd);
+ } finally {
+ br.close();
+ }
+ }
+}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/UCD/IANANames.java b/tools/unicodetools/com/ibm/text/UCD/IANANames.java
index 6abfb651bc3..383b362cc78 100644
--- a/tools/unicodetools/com/ibm/text/UCD/IANANames.java
+++ b/tools/unicodetools/com/ibm/text/UCD/IANANames.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/IANANames.java,v $
-* $Date: 2002/08/08 15:38:16 $
-* $Revision: 1.1 $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.2 $
*
*******************************************************************************
*/
@@ -65,7 +65,7 @@ public class IANANames implements UCD_Types {
}
public IANANames() throws IOException {
- BufferedReader in = Utility.openReadFile(BASE_DIR + "IANA\\character-sets.txt", false);
+ BufferedReader in = Utility.openReadFile(BASE_DIR + "IANA\\character-sets.txt", Utility.LATIN1);
try {
boolean atStart = true;
String lastName = "";
diff --git a/tools/unicodetools/com/ibm/text/UCD/Main.java b/tools/unicodetools/com/ibm/text/UCD/Main.java
index 6a863f6c7ec..00d9e005056 100644
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
-* $Date: 2002/10/01 01:19:16 $
-* $Revision: 1.24 $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.25 $
*
*******************************************************************************
*/
@@ -73,6 +73,8 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
+ else if (arg.equalsIgnoreCase("testenum")) SampleEnum.test();
+
else if (arg.equalsIgnoreCase("quicktest")) QuickTest.test();
else if (arg.equalsIgnoreCase("TernaryStore")) TernaryStore.test();
diff --git a/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt b/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt
index 73858efd205..80f1db25521 100644
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt
@@ -34,4 +34,4 @@
# In addition, some property names may be the same as some property value names.
#
# The combination of property value and property name is, however, unique.
-# For more information, see UTR #24: Regular Expression Guidelines
+# For more information, see UTR #18: Regular Expression Guidelines
diff --git a/tools/unicodetools/com/ibm/text/UCD/PropertyValueAliasHeader.txt b/tools/unicodetools/com/ibm/text/UCD/PropertyValueAliasHeader.txt
index 92c7d7ca715..1b1c0b44866 100644
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyValueAliasHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyValueAliasHeader.txt
@@ -29,7 +29,7 @@
# and '_' are ignored.
#
# NOTE: The Block property values are in Blocks.txt, and not repeated here.
-# For more information on the use of blocks, see UTR #24: Regular Expression Guidelines
+# For more information on the use of blocks, see UTR #18: Regular Expression Guidelines
#
# NOTE: Currently there is at most one abbreviated name and one long name for
# property value. However, in the future additional aliases
diff --git a/tools/unicodetools/com/ibm/text/UCD/QuickTest.java b/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
new file mode 100644
index 00000000000..69f82ca8f28
--- /dev/null
+++ b/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
@@ -0,0 +1,103 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2001, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+
+package com.ibm.text.UCD;
+
+import java.util.*;
+import java.io.*;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+import com.ibm.text.utility.*;
+
+public class QuickTest implements UCD_Types {
+ static final void test() {
+ Default.setUCD();
+/*
+ [4] NameStartChar := ":" | [A-Z] | "_" | [a-z] |
+ [#xC0 - #x2FF] | [#x370 - #x37D] | [#x37F - #x1FFF] |
+ [#x200C - #x200D] | [#x2070 - #x218F] | [#x2C00 - #x2FEF] |
+ [#x3001 - #xD7FF] | [#xF900 - #xF9FF] | [#x10000 - #xDFFFF]
+
+ [4a] NameChar := NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F]
+*/
+ UnicodeSet nameStartChar = new UnicodeSet("[\\: A-Z \\_ a-z"
+ + "\\u00c0-\\u02FF \\u0370-\\u037D \\u037F-\\u1FFF"
+ + "\\u200C-\\u200D \\u2070-\\u218F \\u2C00-\\u2FEF"
+ + "\\u3001-\\uD7FF \\uF900-\\uF9FF \\U00010000-\\U000DFFFF]");
+
+ UnicodeSet nameChar = new UnicodeSet("[\\- \\. 0-9 \\u00B7 \\u0300-\\u036F]")
+ .addAll(nameStartChar);
+
+ showSet("NameStartChar", nameStartChar);
+ showDiffs("NameChar", nameChar, "NameStartChar", nameStartChar);
+
+
+ UnicodeSet defaultIgnorable = UnifiedBinaryProperty.make(DERIVED | DefaultIgnorable).getSet();
+ UnicodeSet whitespace = UnifiedBinaryProperty.make(BINARY_PROPERTIES | White_space).getSet();
+
+ UnicodeSet notNFKC = new UnicodeSet();
+ UnicodeSet privateUse = new UnicodeSet();
+ UnicodeSet noncharacter = new UnicodeSet();
+ UnicodeSet format = new UnicodeSet("[:Cf:]");
+
+ for (int i = 0; i <= 0x10FFFF; ++i) {
+ if (!Default.ucd.isAllocated(i)) continue;
+ if (!Default.nfkc.isNormalized(i)) notNFKC.add(i);
+ if (Default.ucd.isNoncharacter(i)) noncharacter.add(i);
+ if (Default.ucd.getCategory(i) == PRIVATE_USE) privateUse.add(i);
+ }
+
+ showSet("notNFKC in NameChar", new UnicodeSet(notNFKC).retainAll(nameChar));
+ showSet("notNFKC outside of NameChar", new UnicodeSet(notNFKC).removeAll(nameChar));
+
+ showSet("Whitespace in NameChar", new UnicodeSet(nameChar).retainAll(whitespace));
+ showSet("Whitespace not in NameChar", new UnicodeSet(whitespace).removeAll(nameChar));
+
+
+ showSet("Noncharacters in NameChar", new UnicodeSet(noncharacter).retainAll(noncharacter));
+ showSet("Noncharacters outside of NameChar", new UnicodeSet(noncharacter).removeAll(nameChar));
+
+ showSet("Format in NameChar", new UnicodeSet(nameChar).retainAll(format));
+ showSet("Other Default_Ignorables in NameChar", new UnicodeSet(defaultIgnorable).removeAll(format).retainAll(nameChar));
+ showSet("PrivateUse in NameChar", new UnicodeSet(defaultIgnorable).retainAll(privateUse));
+
+ UnicodeSet CID_Start = new UnicodeSet("[:ID_Start:]").removeAll(notNFKC);
+ UnicodeSet CID_Continue = new UnicodeSet("[:ID_Continue:]")
+ .removeAll(notNFKC).removeAll(format);
+
+ UnicodeSet CID_Continue_extras = new UnicodeSet(CID_Continue).removeAll(CID_Start);
+
+ showDiffs("NoK_ID_Start", CID_Start, "NameStartChar", nameStartChar);
+ showDiffs("NoK_ID_Continue_Extras", CID_Continue_extras, "NameChar", nameChar);
+
+ System.out.println("Removing canonical singletons");
+ }
+
+ static void showDiffs(String title1, UnicodeSet set1, String title2, UnicodeSet set2) {
+ showSet(title1 + " - " + title2, new UnicodeSet(set1).removeAll(set2));
+ }
+
+ static void showSet(String title1, UnicodeSet set1) {
+ System.out.println();
+ System.out.println(title1);
+ if (set1.size() == 0) {
+ System.out.println("\tNONE");
+ return;
+ }
+ System.out.println("\tCount:" + set1.size());
+ System.out.println("\tSet:" + set1.toPattern(true));
+ System.out.println("\tDetails:");
+ Utility.showSetNames("", set1, false, Default.ucd);
+ }
+}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/UCD/TernaryStore.java b/tools/unicodetools/com/ibm/text/UCD/TernaryStore.java
new file mode 100644
index 00000000000..9525ebf63e9
--- /dev/null
+++ b/tools/unicodetools/com/ibm/text/UCD/TernaryStore.java
@@ -0,0 +1,566 @@
+package com.ibm.text.UCD;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.text.utility.*;
+import java.util.*;
+import java.io.*;
+
+// Enumerated properties will be IntCodePointProperty.
+// The string values they return will be the property value names.
+// Binary properties are Enumerated properties. They return 0 or 1
+
+public final class TernaryStore {
+
+ static final int DONE = Integer.MIN_VALUE;
+ static final int NOT_FOUND = Integer.MIN_VALUE+1;
+
+ // for testing
+ static DepthPrinter dp;
+
+ static void test() throws java.io.IOException {
+ Default.setUCD();
+
+ PrintWriter pw = Utility.openPrintWriter("TestTernary.txt", Utility.LATIN1_WINDOWS);
+ try {
+ dp = new DepthPrinter(pw);
+
+ String[] tests = {"the", "quick", "fish", "fisherman", "fishes",
+ "brown", "brow", "bracket", "bright", "brat",
+ "brough", "dogs", "upper", "zebra",
+ "fisher"};
+ test("Simple: ", tests, tests.length);
+
+
+ tests = new String[300000];
+ int counter = 0;
+ int i;
+ for (i = 0; counter < tests.length && i <= 0x10FFFF; ++i) {
+ if (Default.ucd.hasComputableName(i)) continue;
+
+ String temp = UCharacter.getName(i);
+ if (temp != null) tests[counter++] = temp.trim();
+ }
+ System.out.println("max-cp: " + Utility.hex(i));
+ test("Unicode Names: ", tests, counter);
+
+ //if (true) return;
+
+ BufferedReader br = Utility.openReadFile(UCD_Types.BASE_DIR + "dict\\DiploFreq.txt", Utility.LATIN1);
+ String line;
+ counter = 0;
+ while (counter < tests.length) {
+ line = Utility.readDataLine(br);
+ if (line == null) break;
+ if (line.length() == 0) continue;
+ Utility.dot(counter);
+ int tabPos = line.indexOf('\t');
+ if (tabPos < 0) {
+ System.out.println("???" + line);
+ continue;
+ }
+ tests[counter++] = line.substring(tabPos+1);
+ }
+ test("French: ", tests, counter);
+ } finally {
+ pw.close();
+ }
+ }
+
+ static void test(String title, String[] tests, int len) {
+ System.out.println();
+ System.out.println(title);
+ dp.println();
+ dp.print(title, 0);
+ dp.println();
+ TernaryStore.Builder builder = new TernaryStore.Builder();
+ int charCount = 0;
+ for (int i = 0; i < len; ++i) {
+ builder.add(tests[i], i);
+ charCount += tests[i].length();
+ }
+ System.out.println("charCount: " + charCount);
+ TernaryStore store = builder.build();
+ store.showNodes();
+ store.checkNodes();
+
+ dp.println("Storage");
+ dp.println(store.stringStore.toString());
+ System.out.println("StorageSize: " + store.stringStore.toString().length());
+
+ Matcher matcher = store.getMatcher();
+ for (int i = 0; i < len; ++i) {
+ int check = test(tests[i], matcher);
+ if (check != i) {
+ System.out.println("\tFail, result: " + tests[i] + ", " + check);
+ }
+ }
+ }
+
+ static int test(String s, Matcher matcher) {
+ matcher.reset(s, 0);
+ int lastResult = -1;
+ for (int result = matcher.next(); result != DONE; result = matcher.next()) {
+ lastResult = result;
+ }
+ return lastResult;
+ }
+
+ static final class Node {
+ String getString(StringStore stringStore) {
+ if (stringCode < 0) return tempString;
+ return stringStore.get(stringCode);
+ }
+ void setString(String s) {
+ tempString = s;
+ }
+ String tempString;
+ int stringCode = -1;
+ Node less;
+ Node greater;
+ Node next;
+ int result = NOT_FOUND;
+
+ public String toString(StringStore store) {
+ return getString(store)
+ + (result != NOT_FOUND ? "(" + result + ")" : "")
+ + (next != null ? next.toString() : "");
+ }
+ }
+
+ Node base;
+ StringStore stringStore = new StringStore();
+
+ final static class Matcher {
+ TernaryStore store;
+ String s;
+ int position;
+ Node lastNode;
+
+ void reset(String s, int position) {
+ this.s = s;
+ this.position = position;
+ this.lastNode = store.base;
+ }
+
+ // returns the next result
+ // or DONE when done
+ // sets position to point after end of found string
+
+ int next() {
+ while (lastNode != null && position < s.length()) {
+ char ch = s.charAt(position++);
+ do {
+ String nodeString = lastNode.getString(store.stringStore);
+ char first = nodeString.charAt(0);
+ if (ch == first) {
+ // now check the rest of the string
+ for (int i = 1; i < nodeString.length(); ++i) {
+ char other = nodeString.charAt(i);
+ if (other != s.charAt(position++)) {
+ return DONE;
+ }
+ }
+
+ // if we succeed, return result if there is one
+ int result = lastNode.result;
+ lastNode = lastNode.next;
+ if (result != NOT_FOUND) return result;
+ break; // get next char
+ }
+ // otherwise branch sideways, keeping same char
+ if (ch > first) {
+ lastNode = lastNode.greater;
+ } else {
+ lastNode = lastNode.less;
+ }
+ } while (lastNode != null);
+ }
+ return DONE;
+ }
+ }
+
+ public Matcher getMatcher() {
+ Matcher result = new Matcher();
+ result.store = this;
+ return result;
+ }
+
+ public void showNodes() {
+ showNodes2(base, "", 5);
+ }
+
+ public void showNodes2(Node n, String path, int depth) {
+ if (n.less != null) {
+ showNodes2(n.less, path+"-", depth);
+ }
+ dp.print("", depth);
+ if (false) dp.print(path);
+ dp.print(n.getString(stringStore));
+ if (n.result != NOT_FOUND) dp.print("/" + n.result);
+ dp.println();
+ if (n.next != null) {
+ showNodes2(n.next, path+".", depth+n.getString(stringStore).length());
+ }
+ if (n.greater != null) {
+ showNodes2(n.greater, path+"+", depth);
+ }
+ }
+
+ static class NodeInfo {
+ int nodeCount;
+ int resultCount;
+ int nullLessCount;
+ int nullGreaterCount;
+ int nullSimpleCount;
+ int nullNextCount;
+ }
+
+ public void checkNodes() {
+ NodeInfo nodeInfo = new NodeInfo();
+ checkNodes(base, nodeInfo);
+ System.out.println("Nodes: " + nodeInfo.nodeCount);
+ System.out.println("nullLessCount: " + nodeInfo.nullLessCount);
+ System.out.println("nullGreaterCount: " + nodeInfo.nullGreaterCount);
+ System.out.println("nullNextCount: " + nodeInfo.nullNextCount);
+ System.out.println("resultCount: " + nodeInfo.resultCount);
+ System.out.println("nullSimpleCount: " + nodeInfo.nullSimpleCount);
+ }
+
+ public void checkNodes(Node n, NodeInfo nodeInfo) {
+ nodeInfo.nodeCount++;
+ if (n.result != NOT_FOUND) nodeInfo.resultCount++;
+ if (n.less != null) {
+ checkNodes(n.less, nodeInfo);
+ } else {
+ nodeInfo.nullLessCount++;
+ if (n.greater == null && n.result == NOT_FOUND) nodeInfo.nullSimpleCount++;
+ }
+ if (n.next != null) {
+ checkNodes(n.next, nodeInfo);
+ } else {
+ nodeInfo.nullNextCount++;
+ }
+ if (n.greater != null) {
+ checkNodes(n.greater, nodeInfo);
+ } else {
+ nodeInfo.nullGreaterCount++;
+ }
+ }
+
+ final static class DepthPrinter {
+ private PrintWriter pw;
+ private int currentDepth = 0;
+ private String leader = ".";
+
+ DepthPrinter(PrintWriter pw) {
+ this.pw = pw;
+ }
+
+ void print(char ch) {
+ print(ch, 0);
+ }
+
+ void print(String s) {
+ print(s, 0);
+ }
+
+ void print(char ch, int depth) {
+ print(String.valueOf(ch), depth);
+ }
+
+ void print(String s, int depth) {
+ int delta = depth - currentDepth;
+ if (delta > 0) {
+ pw.print(Utility.repeat(leader, delta - 1));
+ currentDepth = depth;
+ }
+ pw.print(s);
+ currentDepth += s.length();
+ }
+
+ void println() {
+ pw.println();
+ currentDepth = 0;
+ }
+
+ void println(String s) {
+ pw.print(s);
+ pw.println();
+ currentDepth = 0;
+ }
+ }
+
+ final static class StringStore {
+ // initially, there is a simple strategy
+
+ private String buffer = "";
+ private static final char TERMINATOR = '\u007E';
+ private static final int PIECE_LENGTH = 5;
+ private static String[] pieces = new String[50]; // HACK
+ private static Set strings = new HashSet();
+
+ public void add(String s) {
+ strings.add(s);
+ }
+
+ public void compact() {
+ System.out.println("Adding Pieces");
+ // add all the pieces
+ Iterator it = strings.iterator();
+ Set additions = new HashSet();
+ while (it.hasNext()) {
+ String s = (String)it.next();
+ int len = Utility.split(s, ' ', pieces);
+ for (int i = 0; i < len; ++i) {
+ additions.add(pieces[i]);
+ }
+ }
+
+ store(additions);
+ store(strings);
+ }
+
+ private void store(Set stuff) {
+ System.out.println("Sorting");
+ // sort them by length, longest first
+ Set ordered = new TreeSet();
+ Iterator it = stuff.iterator();
+ while (it.hasNext()) {
+ String s = (String)it.next();
+ ordered.add(new Pair(new Integer(-s.length()), s));
+ }
+ System.out.println("Storing");
+ // add them
+ it = ordered.iterator();
+ while (it.hasNext()) {
+ String s = (String)(((Pair)it.next()).second);
+ get(s);
+ }
+ }
+
+ private int get(String s) {
+ System.out.println("Adding: \'" + s + "\'");
+ int index;
+ if (s.indexOf(' ') < 0) {
+ index = addNoSplit(s);
+ System.out.println("\tReturning: " + index);
+ return index;
+ }
+ int len = Utility.split(s, ' ', pieces);
+ StringBuffer itemCodes = new StringBuffer();
+ for (int i = 0; i < len; ++i) {
+ String piece = pieces[i];
+ itemCodes.append((char)addNoSplit(piece));
+ /*for (int j = 0; j < piece.length(); j += PIECE_LENGTH) {
+ int maxLen = j + PIECE_LENGTH;
+ if (maxLen > piece.length()) maxLen = piece.length();
+ itemCodes.append((char)addNoSplit(piece.substring(j, maxLen)));
+ }*/
+ }
+ index = 0x8000 | addNoSplit(itemCodes.toString()); // mark it as composite
+ System.out.println("\tReturning: " + index);
+ return index;
+ }
+
+ private int addNoSplit(String s) {
+ System.out.println("\tAdding2: \'" + s + "\'");
+ String sTerm = s + TERMINATOR;
+ int index = buffer.indexOf(sTerm);
+ if (index >= 0) return index;
+
+ index = buffer.length();
+ buffer += sTerm;
+ System.out.println("\t\tReturning2: " + index);
+ return index;
+ }
+
+ public String get(int index) {
+ String result;
+ System.out.println("Fetching: " + index);
+
+ if ((index & 0x8000) == 0) {
+ int end = buffer.indexOf(TERMINATOR, index);
+ result = buffer.substring(index, end);
+ System.out.println("\tReturning: '" + result + "'");
+ return result;
+ }
+ index &= ~0x8000; // remove 1 bit
+
+ int end = buffer.indexOf(TERMINATOR, index);
+ result = "";
+ for (int i = index; i < end; ++i) {
+ if (result.length() != 0) result += " ";
+ result += get(buffer.charAt(i));
+ }
+ System.out.println("\tReturning: '" + result + "'");
+ return result;
+ }
+
+ public String toString() {
+ return buffer;
+ }
+
+ }
+
+ final static class Builder {
+ Map map = new TreeMap();
+ String[] names;
+ TernaryStore store;
+ Set set = new TreeSet();
+
+ public void add(String name, int result) {
+ map.put(name, new Integer(result));
+ }
+
+ public TernaryStore build() {
+ // flatten strings into array
+ names = new String[map.size()];
+ Iterator it = map.keySet().iterator();
+ int count = 0;
+ while (it.hasNext()) {
+ names[count++] = (String) it.next();
+ if (false) {
+ dp.print((count-1) + " " + names[count-1]);
+ dp.println();
+ }
+ }
+
+ // build nodes
+ store = new TernaryStore();
+ addNode(0, names.length);
+
+ // free storage
+ names = null;
+ map.clear();
+
+ System.out.println("compacting");
+ compactStore(store.base);
+ store.stringStore.compact();
+
+ //compactStrings(store);
+ //set.clear(); // free more storage
+
+ replaceStrings(store.base);
+ //map.clear(); // free storage
+
+ // free storage
+ TernaryStore result = store;
+ store = null;
+
+ return result;
+ }
+
+ /*
+ void compactStrings(TernaryStore t) {
+ // we have a set of Pairs, first is length, second is string
+ // compact them, word by word
+ Iterator it = set.iterator();
+ while (it.hasNext()) {
+ String string = ((String)((Pair)it.next()).second);
+ int index = t.stringStore.add(string);
+ if (true) {
+ System.out.println("Checking: " + index);
+ String reverse = t.stringStore.get(index);
+ if (!reverse.equals(string)) {
+ System.out.println("source: \'" + string + "\'");
+ System.out.println("reverse: \'" + reverse + "\'");
+ throw new IllegalArgumentException("Failed roundtrip");
+ }
+ }
+
+ map.put(string, new Integer(index));
+ }
+ }
+ */
+
+ public void replaceStrings(Node n) {
+ n.stringCode = store.stringStore.get(n.getString(store.stringStore));
+ n.setString(null);
+ if (n.less != null) replaceStrings(n.less);
+ if (n.next != null) replaceStrings(n.next);
+ if (n.greater != null) replaceStrings(n.greater);
+ }
+
+ public void compactStore(Node n) {
+ Node nextNode = n.next;
+ if (false) dp.println(n.toString());
+ while (n.result == NOT_FOUND && nextNode != null && nextNode.greater == null
+ && nextNode.less == null) {
+ n.setString(n.getString(store.stringStore) + nextNode.getString(store.stringStore));
+ n.result = nextNode.result;
+ n.next = nextNode = nextNode.next; // remove old node
+ }
+ // add strings sorted by length, longest first
+ store.stringStore.add(n.getString(store.stringStore));
+
+ if (n.less != null) compactStore(n.less);
+ if (n.next != null) compactStore(n.next);
+ if (n.greater != null) compactStore(n.greater);
+ }
+
+ private void addNode(int start, int limit) {
+ if (start >= limit) return;
+ int mid = (start + limit) / 2;
+ //System.out.println("start: " + start + ", mid: " + mid + ", limit: " + limit);
+ //System.out.println("adding: " + names[mid]);
+ addNode(names[mid], ((Integer)map.get(names[mid])).intValue());
+ addNode(start, mid);
+ addNode(mid+1, limit);
+ }
+
+ private void addNode(String s, int result) {
+ if (store.base == null) {
+ store.base = addRest(s, 0, result);
+ return;
+ }
+ Node n = store.base;
+ Node lastNode = n;
+
+ for (int i = 0; i < s.length(); ++i) {
+ char ch = s.charAt(i);
+ while (true) {
+ char first = n.getString(store.stringStore).charAt(0);
+ if (ch == first) {
+ if (n.next == null) {
+ n.next = addRest(s, i+1, result);
+ return;
+ }
+ lastNode = n;
+ n = n.next;
+ break; // get next char
+ }
+ // otherwise branch sideways, keeping same char
+ if (ch > first) {
+ if (n.greater == null) {
+ n.greater = addRest(s, i, result);
+ return;
+ }
+ n = n.greater;
+ } else {
+ if (n.less == null) {
+ n.less = addRest(s, i, result);
+ return;
+ }
+ n = n.less;
+ }
+ }
+ }
+ lastNode.result = result;
+ }
+
+ private Node addRest(String s, int position, int result) {
+ Node lastNode = null;
+ for (int i = s.length() - 1; i >= position; --i) {
+ Node n = new Node();
+ n.setString(s.substring(i, i+1)); // + "" to force a new string
+ if (lastNode == null) {
+ n.result = result;
+ }
+ n.next = lastNode;
+ lastNode = n;
+ }
+ return lastNode;
+ }
+ }
+}
+
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD.java b/tools/unicodetools/com/ibm/text/UCD/UCD.java
index 6ce3c43055a..223f7d271ac 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@@ -5,21 +5,25 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2002/09/25 06:40:13 $
-* $Revision: 1.18 $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.19 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
+import java.util.List;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.BitSet;
import java.util.Map;
+
import java.io.IOException;
import java.io.DataInputStream;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
+import java.io.BufferedReader;
import com.ibm.text.utility.*;
@@ -31,7 +35,7 @@ public final class UCD implements UCD_Types {
/**
* Used for the default version.
*/
- public static final String latestVersion = "3.2.0";
+ public static final String latestVersion = "3.2.1";
/**
* Create singleton instance for default (latest) version
@@ -651,7 +655,7 @@ public final class UCD implements UCD_Types {
}
public static String getCategoryID_fromIndex(byte prop) {
- return UCD_Names.GC[prop];
+ return getCategoryID_fromIndex(prop, NORMAL);
}
public static String getCategoryID_fromIndex(byte prop, byte style) {
@@ -660,7 +664,7 @@ public final class UCD implements UCD_Types {
public String getCombiningClassID(int codePoint) {
- return getCombiningClassID_fromIndex(getCombiningClass(codePoint), NORMAL);
+ return getCombiningClassID(codePoint, NORMAL);
}
public String getCombiningClassID(int codePoint, byte style) {
@@ -681,9 +685,9 @@ public final class UCD implements UCD_Types {
case 7: s = style < LONG ? "NK" : "Nukta"; break;
case 8: s = style < LONG ? "KV" : "KanaVoicing"; break;
case 9: s = style < LONG ? "VR" : "Virama"; break;
- case 202: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break;
- case 204: s = style < LONG ? "ATB" : "AttachedBelow"; break;
- case 206: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break;
+ case 200: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break;
+ case 202: s = style < LONG ? "ATB" : "AttachedBelow"; break;
+ case 204: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break;
case 208: s = style < LONG ? "ATL" : "AttachedLeft"; break;
case 210: s = style < LONG ? "ATR" : "AttachedRight"; break;
case 212: s = style < LONG ? "ATAL" : "AttachedAboveLeft"; break;
@@ -734,7 +738,7 @@ public final class UCD implements UCD_Types {
}
public static String getNumericTypeID_fromIndex(byte prop) {
- return UCD_Names.NT[prop];
+ return getNumericTypeID_fromIndex(prop, NORMAL);
}
public static String getNumericTypeID_fromIndex(byte prop, byte style) {
@@ -746,7 +750,7 @@ public final class UCD implements UCD_Types {
}
public static String getEastAsianWidthID_fromIndex(byte prop) {
- return UCD_Names.EA[prop];
+ return getEastAsianWidthID_fromIndex(prop, NORMAL);
}
public static String getEastAsianWidthID_fromIndex(byte prop, byte style) {
@@ -758,7 +762,7 @@ public final class UCD implements UCD_Types {
}
public static String getLineBreakID_fromIndex(byte prop) {
- return UCD_Names.LB[prop];
+ return getLineBreakID_fromIndex(prop, NORMAL);
}
public static String getLineBreakID_fromIndex(byte prop, byte style) {
@@ -770,7 +774,7 @@ public final class UCD implements UCD_Types {
}
public static String getJoiningTypeID_fromIndex(byte prop) {
- return UCD_Names.JOINING_TYPE[prop];
+ return getJoiningTypeID_fromIndex(prop, NORMAL);
}
public static String getJoiningTypeID_fromIndex(byte prop, byte style) {
@@ -782,7 +786,7 @@ public final class UCD implements UCD_Types {
}
public static String getJoiningGroupID_fromIndex(byte prop) {
- return UCD_Names.JOINING_GROUP[prop];
+ return getJoiningGroupID_fromIndex(prop, NORMAL);
}
public static String getJoiningGroupID_fromIndex(byte prop, byte style) {
@@ -795,7 +799,7 @@ public final class UCD implements UCD_Types {
}
public static String getScriptID_fromIndex(byte prop) {
- return UCD_Names.SCRIPT[prop];
+ return getScriptID_fromIndex(prop, NORMAL);
}
public static String getScriptID_fromIndex(byte prop, byte length) {
@@ -808,7 +812,7 @@ public final class UCD implements UCD_Types {
}
public static String getAgeID_fromIndex(byte prop) {
- return UCD_Names.AGE[prop];
+ return getAgeID_fromIndex(prop, NORMAL);
}
public static String getAgeID_fromIndex(byte prop, byte style) {
@@ -1306,4 +1310,53 @@ to guarantee identifier closure.
}
}
}
+
+ public static class BlockData {
+ public int start;
+ public int end;
+ public String name;
+ }
+
+ public boolean getBlockData(int blockId, BlockData output) {
+ if (blocks == null) loadBlocks();
+ BlockData temp;
+ try {
+ temp = (BlockData) blocks.get(blockId);
+ } catch (IndexOutOfBoundsException e) {
+ return false;
+ }
+ output.name = temp.name;
+ output.start = temp.start;
+ output.end = temp.end;
+ return true;
+ }
+
+ private List blocks = null;
+
+ private void loadBlocks() {
+ blocks = new ArrayList();
+ try {
+ BufferedReader in = Utility.openUnicodeFile("Blocks", version, true, Utility.LATIN1);
+ try {
+ while (true) {
+ // 0000..007F; Basic Latin
+ String line = Utility.readDataLine(in);
+ if (line == null) break;
+ if (line.length() == 0) continue;
+ int pos1 = line.indexOf('.');
+ int pos2 = line.indexOf(';', pos1);
+
+ BlockData blockData = new BlockData();
+ blockData.start = Integer.parseInt(line.substring(0, pos1), 16);
+ blockData.end = Integer.parseInt(line.substring(pos1+2, pos2), 16);
+ blockData.name = line.substring(pos2+1).trim().replace(' ', '_');
+ blocks.add(blockData);
+ }
+ } finally {
+ in.close();
+ }
+ } catch (IOException e) {
+ throw new IllegalArgumentException("Can't read block file");
+ }
+ }
}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
index ecca42fc2b9..ec9b7797b6c 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
-* $Date: 2002/06/13 21:14:05 $
-* $Revision: 1.14 $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.15 $
*
*******************************************************************************
*/
@@ -201,11 +201,11 @@ final class UCD_Names implements UCD_Types {
static final String[] YN_TABLE = {"N", "Y"};
- static String[] EA = {
+ static String[] SHORT_EA = {
"N", "A", "H", "W", "F", "Na"
};
- static String[] SHORT_EA = {
+ static String[] EA = {
"Neutral", "Ambiguous", "Halfwidth", "Wide", "Fullwidth", "Narrow"
};
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
index 74173aa16f4..94f82407c77 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
-* $Date: 2002/08/04 21:38:45 $
-* $Revision: 1.15 $
+* $Date: 2002/10/05 01:28:58 $
+* $Revision: 1.16 $
*
*******************************************************************************
*/
@@ -15,7 +15,7 @@ package com.ibm.text.UCD;
public interface UCD_Types {
- public static final int dVersion = 9; // change to fix the generated file D version. If less than zero, no "d"
+ public static final int dVersion = 2; // change to fix the generated file D version. If less than zero, no "d"
public static final String BASE_DIR = "C:\\DATA\\";
public static final String UCD_DIR = BASE_DIR + "UCD\\";
diff --git a/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java b/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java
index 57f4c1eda86..9633b3d1575 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $
-* $Date: 2002/08/04 21:38:44 $
-* $Revision: 1.9 $
+* $Date: 2002/10/05 01:28:57 $
+* $Revision: 1.10 $
*
*******************************************************************************
*/
@@ -299,13 +299,13 @@ public final class UnifiedBinaryProperty extends UnicodeProperty {
case COMBINING_CLASS>>8: return ucd.getCombiningClassID_fromIndex((byte)propValue, style);
case BIDI_CLASS>>8: return ucd.getBidiClassID_fromIndex((byte)propValue, style);
case DECOMPOSITION_TYPE>>8: return ucd.getDecompositionTypeID_fromIndex((byte)propValue, style);
- case NUMERIC_TYPE>>8: ucd.getNumericTypeID_fromIndex((byte)propValue, style);
- case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex((byte)propValue);
+ case NUMERIC_TYPE>>8: return ucd.getNumericTypeID_fromIndex((byte)propValue, style);
+ case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex((byte)propValue, style);
case LINE_BREAK>>8: return ucd.getLineBreakID_fromIndex((byte)propValue, style);
- case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex((byte)propValue);
- case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex((byte)propValue);
+ case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex((byte)propValue, style);
+ case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex((byte)propValue, style);
case BINARY_PROPERTIES>>8: return ucd.getBinaryPropertiesID_fromIndex((byte)propValue, style);
- case SCRIPT>>8: return ucd.getScriptID_fromIndex((byte)propValue);
+ case SCRIPT>>8: return ucd.getScriptID_fromIndex((byte)propValue, style);
case AGE>>8: return ucd.getAgeID_fromIndex((byte)propValue);
/*
case DERIVED>>8:
diff --git a/tools/unicodetools/com/ibm/text/UCD/UnifiedProperty.java b/tools/unicodetools/com/ibm/text/UCD/UnifiedProperty.java
index 88e0a2ac53e..92868108aa7 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UnifiedProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UnifiedProperty.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedProperty.java,v $
-* $Date: 2002/08/08 15:38:16 $
-* $Revision: 1.1 $
+* $Date: 2002/10/05 01:28:57 $
+* $Revision: 1.2 $
*
*******************************************************************************
*/
@@ -175,12 +175,12 @@ public final class UnifiedProperty extends UnicodeProperty {
case BIDI_CLASS>>8: return ucd.getBidiClassID_fromIndex(ucd.getBidiClass(cp), style);
case DECOMPOSITION_TYPE>>8: return ucd.getDecompositionTypeID_fromIndex(ucd.getDecompositionType(cp), style);
case NUMERIC_TYPE>>8: return ucd.getNumericTypeID_fromIndex(ucd.getNumericType(cp), style);
- case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex(ucd.getEastAsianWidth(cp));
- case LINE_BREAK>>8: return ucd.getLineBreakID_fromIndex(ucd.getLineBreak(cp));
- case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex(ucd.getJoiningType(cp));
- case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex(ucd.getJoiningGroup(cp));
- case SCRIPT>>8: return ucd.getScriptID_fromIndex(ucd.getScript(cp));
- case AGE>>8: return ucd.getAgeID_fromIndex(ucd.getAge(cp));
+ case EAST_ASIAN_WIDTH>>8: return ucd.getEastAsianWidthID_fromIndex(ucd.getEastAsianWidth(cp), style);
+ case LINE_BREAK>>8: return ucd.getLineBreakID_fromIndex(ucd.getLineBreak(cp), style);
+ case JOINING_TYPE>>8: return ucd.getJoiningTypeID_fromIndex(ucd.getJoiningType(cp), style);
+ case JOINING_GROUP>>8: return ucd.getJoiningGroupID_fromIndex(ucd.getJoiningGroup(cp), style);
+ case SCRIPT>>8: return ucd.getScriptID_fromIndex(ucd.getScript(cp), style);
+ case AGE>>8: return ucd.getAgeID_fromIndex(ucd.getAge(cp), style);
default: throw new IllegalArgumentException("Internal Error");
}
}
diff --git a/tools/unicodetools/com/ibm/text/utility/PoorMansEnum.java b/tools/unicodetools/com/ibm/text/utility/PoorMansEnum.java
new file mode 100644
index 00000000000..caae2cd4131
--- /dev/null
+++ b/tools/unicodetools/com/ibm/text/utility/PoorMansEnum.java
@@ -0,0 +1,99 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2001, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/PoorMansEnum.java,v $
+* $Date: 2002/10/05 01:28:57 $
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+
+/* Goal for enum is:
+ * Easy to use
+ * ID <-> int
+ * ID <-> string name
+ */
+package com.ibm.text.utility;
+
+import java.util.*;
+
+public class PoorMansEnum {
+ protected int value;
+ protected String name;
+ protected PoorMansEnum next;
+
+ public int toInt() {
+ return value;
+ }
+
+ public String toString() {
+ return name;
+ }
+
+ // for subclassers
+
+ protected PoorMansEnum() {
+ }
+
+ /** Utility for subclasses
+ */
+ protected static class EnumStore {
+ private List int2Id = new ArrayList();
+ private Map string2Id = new HashMap();
+ private PoorMansEnum last = null;
+
+ public PoorMansEnum add(PoorMansEnum id, String name) {
+ // both string and id must be new!
+ if (int2Id.indexOf(id) >= 0) {
+ throw new IllegalArgumentException("ID already stored for \"" + name + '"');
+ } else if (string2Id.containsKey(name)) {
+ throw new IllegalArgumentException('"' + name + "\" already stored for ID ");
+ }
+ id.value = int2Id.size();
+ id.name = name;
+ if (last != null) {
+ last.next = id;
+ }
+ int2Id.add(id);
+ string2Id.put(name, id);
+ last = id;
+ return id;
+ }
+
+ public PoorMansEnum addAlias(PoorMansEnum id, String name) {
+ // id must be old, string must be new
+ if (int2Id.indexOf(id) < 0) {
+ throw new IllegalArgumentException("ID must already be stored for \"" + name + '"');
+ } else if (string2Id.containsKey(name)) {
+ throw new IllegalArgumentException('"' + name + "\" already stored for ID ");
+ }
+ string2Id.put(name, id);
+ return id;
+ }
+
+ public Collection getAliases(PoorMansEnum id, Collection output) {
+ Iterator it = string2Id.keySet().iterator();
+ while (it.hasNext()) {
+ Object s = it.next();
+ if (s == id.name) continue;
+ if (id == string2Id.get(s)) output.add(s);
+ }
+ return output;
+ }
+
+ public int getMax() {
+ return int2Id.size();
+ }
+
+ public PoorMansEnum get(int value) {
+ return (PoorMansEnum) int2Id.get(value);
+ }
+
+ public PoorMansEnum get(String name) {
+ return (PoorMansEnum) string2Id.get(name);
+ }
+ }
+}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/utility/SampleEnum.java b/tools/unicodetools/com/ibm/text/utility/SampleEnum.java
new file mode 100644
index 00000000000..39049b16117
--- /dev/null
+++ b/tools/unicodetools/com/ibm/text/utility/SampleEnum.java
@@ -0,0 +1,76 @@
+/*******************************************************************************
+* Copyright (C) 1996-2001, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/SampleEnum.java,v $
+* $Date: 2002/10/05 01:28:56 $
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+
+package com.ibm.text.utility;
+
+import java.util.*;
+
+/** Sample Poor-Man's Enum.
+ * To use as a template, copy and
+ *
+ * - replace all instances of "SampleEnum" by your enum's name
+ * - change the enum values to your values
+ * - set any aliases (or remove that section)
+ *
+ */
+public final class SampleEnum extends PoorMansEnum {
+ private static PoorMansEnum.EnumStore store = new PoorMansEnum.EnumStore();
+
+ public static final SampleEnum
+ ALPHA = add("The"),
+ BETA = add("Quick"),
+ GAMMA = add("Brown"),
+
+ FIRST = ALPHA;
+
+ static {
+ store.addAlias(ALPHA, "A");
+ }
+
+ /* Boilerplate */
+ public SampleEnum next() { return (SampleEnum) next; }
+ public void getAliases(Collection output) { store.getAliases(this, output); }
+ public static SampleEnum get(String s) { return (SampleEnum) store.get(s); }
+ public static SampleEnum get(int v) { return (SampleEnum) store.get(v); }
+ public static int getMax() { return store.getMax(); }
+
+ private SampleEnum() {}
+ private static SampleEnum add(String name) { return (SampleEnum) store.add(new SampleEnum(), name);}
+
+
+
+ /* just for testing */
+ public static void test() {
+ // int to string, collecting strings as we go
+ Set s = new TreeSet();
+ for (int i = 0; i < SampleEnum.getMax(); ++i) {
+ String n = SampleEnum.get(i).toString();
+ System.out.println(i + ", " + n);
+ s.add(n);
+ }
+ // String to int
+ Iterator it = s.iterator();
+ while (it.hasNext()) {
+ String n = (String)it.next();
+ System.out.println(n + ", " + SampleEnum.get(n).toInt());
+ }
+
+ // iteration
+ for (SampleEnum current = FIRST; current != null; current = current.next()) {
+ s.clear();
+ current.getAliases(s);
+ System.out.println(current.toInt() + ", " + current + ", " + s);
+ }
+ }
+
+
+}
\ No newline at end of file
diff --git a/tools/unicodetools/com/ibm/text/utility/Utility.java b/tools/unicodetools/com/ibm/text/utility/Utility.java
index 57c8a170b2d..149f0e22529 100644
--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
-* $Date: 2002/09/25 06:40:14 $
-* $Revision: 1.25 $
+* $Date: 2002/10/05 01:28:56 $
+* $Revision: 1.26 $
*
*******************************************************************************
*/
@@ -26,7 +26,7 @@ import com.ibm.text.UCD.*;
public final class Utility implements UCD_Types { // COMMON UTILITIES
- static final boolean UTF8 = true; // TODO -- make argument
+ // static final boolean UTF8 = true; // TODO -- make argument
public static final char BOM = '\uFEFF';
public static String[] append(String[] array1, String[] array2) {
@@ -521,7 +521,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
return "";
}
- if (c <= 0x7E || UTF8) {
+ if (c <= 0x7E) {
return UTF32.valueOf32(c);
}
@@ -634,17 +634,45 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
}
*/
- static final byte WINDOWS_MASK = 1, UTF8_MASK = 2;
- public static final byte
- LATIN1_UNIX = 0,
- LATIN1_WINDOWS = WINDOWS_MASK,
- UTF8_UNIX = UTF8_MASK,
- UTF8_WINDOWS = UTF8_MASK | WINDOWS_MASK;
+ public static final class Encoding extends PoorMansEnum {
+ private static PoorMansEnum.EnumStore store = new PoorMansEnum.EnumStore();
+
+ /* Boilerplate */
+ public Encoding next() { return (Encoding) next; }
+ public void getAliases(Collection output) { store.getAliases(this, output); }
+ public static Encoding get(String s) { return (Encoding) store.get(s); }
+ public static Encoding get(int v) { return (Encoding) store.get(v); }
+ public static int getMax() { return store.getMax(); }
+
+ private Encoding() {}
+ private static Encoding add(String name) { return (Encoding) store.add(new Encoding(), name);}
+ }
+ public static final Encoding
+ LATIN1_UNIX = Encoding.add("LATIN1_UNIX"),
+ LATIN1_WINDOWS = Encoding.add("LATIN1_WINDOWS"),
+ UTF8_UNIX = Encoding.add("UTF8_UNIX"),
+ UTF8_WINDOWS = Encoding.add("UTF8_WINDOWS"),
+
+ UTF8 = Encoding.add("UTF8"), // for read-only
+ LATIN1 = Encoding.add("LATIN1"), // for read-only
+
+ FIRST = LATIN1_UNIX;
+
+
+ /*
+ public static final Encoding
+ LATIN1_UNIX = Encoding.LATIN1_UNIX,
+ LATIN1_WINDOWS = Encoding.LATIN1_WINDOWS,
+ UTF8_UNIX = Encoding.UTF8_UNIX,
+ UTF8_WINDOWS = Encoding.UTF8_WINDOWS;
+ */
+
+
// Normally use false, false.
// But for UCD files use true, true
// Or if they are UTF8, use true, false
- public static PrintWriter openPrintWriter(String filename, byte options) throws IOException {
+ public static PrintWriter openPrintWriter(String filename, Encoding options) throws IOException {
File file = new File(getOutputName(filename));
Utility.fixDot();
System.out.println("Creating File: " + file.getCanonicalPath());
@@ -655,7 +683,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
new UTF8StreamWriter(
new FileOutputStream(file),
32*1024,
- (options & WINDOWS_MASK) == 0, (options & UTF8_MASK) == 0));
+ options == LATIN1_UNIX || options == UTF8_UNIX,
+ options == LATIN1_UNIX || options == LATIN1_WINDOWS));
}
public static String getOutputName(String filename) {
@@ -714,13 +743,9 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
}
}
- public static void appendFile(String filename, boolean utf8, PrintWriter output) throws IOException {
- appendFile(filename, utf8, output, null);
- }
-
- public static BufferedReader openReadFile(String filename, boolean UTF8) throws FileNotFoundException, UnsupportedEncodingException {
+ public static BufferedReader openReadFile(String filename, Encoding encoding) throws FileNotFoundException, UnsupportedEncodingException {
FileInputStream fis = new FileInputStream(filename);
- InputStreamReader isr = UTF8 ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
+ InputStreamReader isr = (encoding == UTF8_UNIX || encoding == UTF8_WINDOWS) ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
BufferedReader br = new BufferedReader(isr, 32*1024);
return br;
}
@@ -769,10 +794,17 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
return line;
}
- public static void appendFile(String filename, boolean utf8, PrintWriter output, String[] replacementList) throws IOException {
+ public static void appendFile(String filename, Encoding encoding, PrintWriter output) throws IOException {
+ appendFile(filename, encoding, output, null);
+ }
+
+ public static void appendFile(String filename, Encoding encoding, PrintWriter output, String[] replacementList) throws IOException {
+ BufferedReader br = openReadFile(filename, encoding);
+ /*
FileInputStream fis = new FileInputStream(filename);
- InputStreamReader isr = utf8 ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
+ InputStreamReader isr = (encoding == UTF8_UNIX || encoding == UTF8_WINDOWS) ? new InputStreamReader(fis, "UTF8") : new InputStreamReader(fis);
BufferedReader br = new BufferedReader(isr, 32*1024);
+ */
while (true) {
String line = br.readLine();
if (line == null) break;
@@ -861,20 +893,20 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
return -1;
}
- public static void copyTextFile(String filename, boolean utf8, String newName, String[] replacementList) throws IOException {
+ public static void copyTextFile(String filename, Encoding encoding, String newName, String[] replacementList) throws IOException {
PrintWriter out = Utility.openPrintWriter(newName, UTF8_WINDOWS);
- appendFile(filename, utf8, out, replacementList);
+ appendFile(filename, encoding, out, replacementList);
out.close();
}
- public static void copyTextFile(String filename, boolean utf8, String newName) throws IOException {
- copyTextFile(filename, utf8, newName, null);
+ public static void copyTextFile(String filename, Encoding encoding, String newName) throws IOException {
+ copyTextFile(filename, encoding, newName, null);
}
- public static BufferedReader openUnicodeFile(String filename, String version, boolean show, boolean UTF8) throws IOException {
+ public static BufferedReader openUnicodeFile(String filename, String version, boolean show, Encoding encoding) throws IOException {
String name = getMostRecentUnicodeDataFile(filename, version, true, show);
if (name == null) return null;
- return openReadFile(name, UTF8); // new BufferedReader(new FileReader(name),32*1024);
+ return openReadFile(name, encoding); // new BufferedReader(new FileReader(name),32*1024);
}
public static String getMostRecentUnicodeDataFile(String filename, String version,