diff --git a/tools/unicodetools/com/ibm/text/UCA/UCA.java b/tools/unicodetools/com/ibm/text/UCA/UCA.java
index 420fec19a78..fe2287ff58e 100644
--- a/tools/unicodetools/com/ibm/text/UCA/UCA.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
-* $Date: 2002/07/03 02:15:47 $
-* $Revision: 1.17 $
+* $Date: 2003/03/15 02:36:49 $
+* $Revision: 1.18 $
*
*******************************************************************************
*/
@@ -1109,7 +1109,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
int itemInRange = startOfRange;
int skip = 1;
boolean doSamples = false;
- UnicodeSetIterator usi = new UnicodeSetIterator();
+ AbbreviatedUnicodeSetIterator usi = new AbbreviatedUnicodeSetIterator();
/**
* use FIXED_CE as the limit
@@ -1120,8 +1120,8 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
this.nfkd = new Normalizer(Normalizer.NFKD, unicodeVersion);
this.skipDecomps = skipDecomps;
currentRange = 0;
- usi.reset(unspecified);
- usi.setAbbreviated(true);
+ usi.reset(unspecified, true);
+ //usi.setAbbreviated(true);
// FIX SAMPLES
if (SAMPLE_RANGES[0][0] == 0) {
@@ -1204,8 +1204,8 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
}
}
unspecified = temp;
- usi.reset(unspecified);
- usi.setAbbreviated(true);
+ usi.reset(unspecified, true);
+ //usi.setAbbreviated(true);
if (DEBUG) System.out.println("Unspecified = " + unspecified.toPattern(true));
haveUnspecified = true;
}
diff --git a/tools/unicodetools/com/ibm/text/UCD/CaseFoldingHeader.txt b/tools/unicodetools/com/ibm/text/UCD/CaseFoldingHeader.txt
index f924ba6ee9c..ca8da1ac349 100644
--- a/tools/unicodetools/com/ibm/text/UCD/CaseFoldingHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/CaseFoldingHeader.txt
@@ -11,6 +11,8 @@
# (where string lengths may grow). Note that where they can be supported, the
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
#
+# All code points not listed in this file map to themselves.
+#
# NOTE: case folding does not preserve normalization formats!
#
# For information on case folding, see
diff --git a/tools/unicodetools/com/ibm/text/UCD/Default.java b/tools/unicodetools/com/ibm/text/UCD/Default.java
index 00672f35fe0..60f04992cb2 100644
--- a/tools/unicodetools/com/ibm/text/UCD/Default.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Default.java
@@ -16,6 +16,10 @@ public final class Default implements UCD_Types {
public static Normalizer nfkd;
public static Normalizer[] nf = new Normalizer[4];
+ public static void ensureUCD() {
+ if (ucd == null) setUCD();
+ }
+
public static void setUCD(String version) {
ucdVersion = version;
setUCD();
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
index b71986a1d99..a1344dd7002 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
-* $Date: 2003/03/12 16:01:26 $
-* $Revision: 1.25 $
+* $Date: 2003/03/15 02:36:48 $
+* $Revision: 1.26 $
*
*******************************************************************************
*/
@@ -141,6 +141,39 @@ public class GenerateData implements UCD_Types {
+ ".html";
}
+ public static void checkDifferences (String targetVersion) throws IOException {
+ System.out.println("Checking Differences");
+ UCD target = UCD.make(targetVersion);
+
+ PrintWriter log1 = Utility.openPrintWriter("Log1.xml", Utility.LATIN1_UNIX);
+ log1.println("");
+
+ PrintWriter log2 = Utility.openPrintWriter("Log2.xml", Utility.LATIN1_UNIX);
+ log2.println("");
+
+ for (int i = 0; i <= 0x10FFFF; ++i) {
+ if (!target.isAllocated(i)) continue;
+ Utility.dot(i);
+ UData t = target.get(i, true);
+ UData current = Default.ucd.get(i, true);
+ if (i == 0x5E) {
+ System.out.println(target.getDecompositionTypeID(i)
+ + ", " + Utility.hex(target.getDecompositionMapping(i)));
+ System.out.println(Default.ucd.getDecompositionTypeID(i)
+ + ", " + Utility.hex(Default.ucd.getDecompositionMapping(i)));
+ }
+ if (t.equals(current)) continue;
+
+ // print both for comparison
+ log1.println(t.toString(target, UData.ABBREVIATED));
+ log2.println(current.toString(Default.ucd, UData.ABBREVIATED));
+ }
+ log1.println("");
+ log2.println("");
+ log1.close();
+ log2.close();
+ }
+
public static void generateDerived (byte type, boolean checkTypeAndStandard, int headerChoice, String directory, String fileName) throws IOException {
Default.setUCD();
diff --git a/tools/unicodetools/com/ibm/text/UCD/Main.java b/tools/unicodetools/com/ibm/text/UCD/Main.java
index 5c3719852d0..b8b0d2e32a6 100644
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
-* $Date: 2003/03/12 16:01:26 $
-* $Revision: 1.27 $
+* $Date: 2003/03/15 02:36:48 $
+* $Revision: 1.28 $
*
*******************************************************************************
*/
@@ -47,11 +47,31 @@ public final class Main implements UCD_Types {
public static void main (String[] args) throws Exception {
for (int i = 0; i < args.length; ++i) {
+
+ long mask = 0;
+
String arg = args[i];
if (arg.charAt(0) == '#') return; // skip rest of line
Utility.fixDot();
System.out.println("Argument: " + args[i]);
+
+ // Expand string arguments
+
+ if (arg.equalsIgnoreCase("All")) {
+ args = Utility.append(ALL_FILES, Utility.subarray(args, i+1));
+ continue;
+ }
+
+ // make sure the UCD is set up
+
+ if (arg.equalsIgnoreCase("version")) {
+ Default.setUCD(args[++i]);
+ continue;
+ }
+ Default.ensureUCD();
+
+ // Now handle other options
if (arg.equalsIgnoreCase("verify")) {
VerifyUCD.verify();
@@ -60,7 +80,6 @@ public final class Main implements UCD_Types {
VerifyUCD.checkAgainstUInfo();
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.ucdVersion});
- else if (arg.equalsIgnoreCase("version")) Default.setUCD(args[++i]);
else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics();
else if (arg.equalsIgnoreCase("NFSkippable")) NFSkippable.main(null);
else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
@@ -123,6 +142,7 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.test();
+ else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
//else if (arg.equalsIgnoreCase("NormalizationCharts")) ChartGenerator.writeNormalizationCharts();
@@ -130,36 +150,9 @@ public final class Main implements UCD_Types {
/*else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
*/
- else extras(new String[] {arg});
- }
- }
-
- public static void extras (String[] args) throws Exception {
- //ubp = new UnifiedBinaryProperty(ucd);
-
- boolean expanding = false;
-
- for (int i = 0; i < args.length; ++i) {
- String arg = args[i];
- if (arg.charAt(0) == '#') return; // skip rest of line
- long mask = 0;
-
- Utility.fixDot();
- if (expanding) System.out.println("Argument: " + args[i]);
-
- if (arg.equalsIgnoreCase("All")) {
- // Append all args at end
- /*
- String[] temp = new String[args.length + ALL_FILES.length];
- System.arraycopy(args, 0, temp, 0, args.length);
- System.arraycopy(ALL_FILES, 0, temp, args.length, ALL_FILES.length);
- */
- args = Utility.append(args, ALL_FILES);
- expanding = true;
-
// EXTRACTED PROPERTIES
- } else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
+ else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedBidiClass");
diff --git a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingHeader.txt b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingHeader.txt
index c9ad2c62f1d..ca35c1df91c 100644
--- a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingHeader.txt
@@ -6,6 +6,8 @@
# characters where they are 1-1, and does not have locale-specific mappings.)
# For more information, see the discussion of Case Mappings in the Unicode Standard.
#
+# All code points not listed in this file that do not have a simple case mappings
+# in UnicodeData.txt map to themselves.
# ================================================================================
# Format
# ================================================================================
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD.java b/tools/unicodetools/com/ibm/text/UCD/UCD.java
index 48d13d793dc..6bced563b63 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2003/03/12 16:01:26 $
-* $Revision: 1.21 $
+* $Date: 2003/03/15 02:36:48 $
+* $Revision: 1.22 $
*
*******************************************************************************
*/
@@ -123,7 +123,7 @@ public final class UCD implements UCD_Types {
* Return XML version of the data associated with the code point.
*/
public String toString(int codePoint) {
- return get(codePoint, true).toString(FULL);
+ return get(codePoint, true).toString(this,FULL);
}
/**
@@ -1389,6 +1389,7 @@ to guarantee identifier closure.
size = uDataFileCount = dataIn.readInt();
boolean didJoiningHack = false;
+ System.out.println("Loading UCD " + foundVersion);
// records
@@ -1396,7 +1397,7 @@ to guarantee identifier closure.
UData uData = new UData();
uData.readBytes(dataIn);
- if (uData.codePoint == 0x0221) {
+ if (uData.codePoint == 0x5E) {
System.out.println("SPOT-CHECK: " + uData);
}
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
index f8956d7a666..a389cd385d2 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
-* $Date: 2003/03/12 16:01:26 $
-* $Revision: 1.17 $
+* $Date: 2003/03/15 02:36:48 $
+* $Revision: 1.18 $
*
*******************************************************************************
*/
@@ -51,7 +51,9 @@ final class UCD_Names implements UCD_Types {
+ "#\tAll code points not listed here have the type U",
"Joining Group (listing ArabicShaping.txt, field 2)",
"BidiMirrored (listing UnicodeData.txt, field 9: see UnicodeData.html)",
- "Script",
+ "Script\r\n"
+ + "#\tThe value for all code points not explicitly listed in this file is COMMON."
+ ,
"Age (from a comparison of UCD versions 1.1 [minus Hangul], 2.0, 2.1, 3.0, 3.1)",
"Hangul Syllable Type\r\n# All codepoints not explicitly listed here have the value NA",
"Derived"
@@ -219,11 +221,11 @@ final class UCD_Names implements UCD_Types {
"IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY",
"CM", "BB", "BA", "SP", "BK", "CR", "LF", "CB",
"SA", "AI", "B2", "SG", "ZW",
- "JL",
- "JV",
- "JT",
"NL",
"WJ",
+ //"JL",
+ //"JV",
+ //"JT",
};
@@ -235,11 +237,11 @@ final class UCD_Names implements UCD_Types {
"CombiningMark", "BreakBefore", "BreakAfter", "Space",
"MandatoryBreak", "CarriageReturn", "LineFeed", "ContingentBreak",
"ComplexContext", "Ambiguous", "BreakBoth", "Surrogate", "ZWSpace",
- "Leading_Jamo",
- "Vowel_Jamo",
- "Trailing_Jamo",
"Next_Line",
"Word_Joiner"
+ //"Leading_Jamo",
+ //"Vowel_Jamo",
+ //"Trailing_Jamo",
};
public static final String[] SCRIPT = {
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
index 4728431fe6d..c4a614271f4 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
-* $Date: 2003/03/12 16:01:26 $
-* $Revision: 1.18 $
+* $Date: 2003/03/15 02:36:48 $
+* $Revision: 1.19 $
*
*******************************************************************************
*/
@@ -15,7 +15,7 @@ package com.ibm.text.UCD;
public interface UCD_Types {
- public static final int dVersion = 15; // change to fix the generated file D version. If less than zero, no "d"
+ public static final int dVersion = 18; // change to fix the generated file D version. If less than zero, no "d"
public static final String BASE_DIR = "C:\\DATA\\";
public static final String UCD_DIR = BASE_DIR + "UCD\\";
@@ -34,7 +34,7 @@ public interface UCD_Types {
CJK_B_BASE = 0x20000,
CJK_B_LIMIT = 0x2A6DF+1;
- static final byte BINARY_FORMAT = 7; // bumped if binary format of UCD changes
+ static final byte BINARY_FORMAT = 8; // bumped if binary format of UCD changes
// Unicode Property Types
static final byte
@@ -240,12 +240,12 @@ public interface UCD_Types {
LB_IS = 8, LB_PR = 9, LB_PO = 10, LB_NU = 11, LB_AL = 12, LB_ID = 13, LB_IN = 14, LB_HY = 15,
LB_CM = 16, LB_BB = 17, LB_BA = 18, LB_SP = 19, LB_BK = 20, LB_CR = 21, LB_LF = 22, LB_CB = 23,
LB_SA = 24, LB_AI = 25, LB_B2 = 26, LB_SG = 27, LB_ZW = 28,
- LB_JL = 29,
- LB_JV = 30,
- LB_JT = 31,
- LB_NL = 32,
- LB_WJ = 33,
- LIMIT_LINE_BREAK = 34,
+ LB_NL = 29,
+ LB_WJ = 30,
+ //LB_JL = 29,
+ //LB_JV = 30,
+ //LB_JT = 31,
+ LIMIT_LINE_BREAK = 31,
LB_LIMIT = LIMIT_LINE_BREAK;
// east asian width
diff --git a/tools/unicodetools/com/ibm/text/UCD/UData.java b/tools/unicodetools/com/ibm/text/UCD/UData.java
index 1e408d88280..1176cf7fd8c 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UData.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UData.java,v $
-* $Date: 2003/03/12 16:01:26 $
-* $Revision: 1.7 $
+* $Date: 2003/03/15 02:36:48 $
+* $Revision: 1.8 $
*
*******************************************************************************
*/
@@ -78,6 +78,9 @@ class UData implements UCD_Types {
public boolean equals(Object that) {
UData other = (UData) that;
+
+ // use equals for objects
+
if (!name.equals(other.name)) return false;
if (!decompositionMapping.equals(other.decompositionMapping)) return false;
if (!simpleUppercase.equals(other.simpleUppercase)) return false;
@@ -90,8 +93,12 @@ class UData implements UCD_Types {
if (!fullCaseFolding.equals(other.fullCaseFolding)) return false;
if (!specialCasing.equals(other.specialCasing)) return false;
if (!bidiMirror.equals(other.bidiMirror)) return false;
+
+ // == for primitives
+ // Warning: doubles have to use special comparison, because of NaN
+
if (codePoint != other.codePoint) return false;
- if (numericValue != other.numericValue) return false;
+ if (numericValue < other.numericValue || numericValue > other.numericValue) return false;
if (binaryProperties != other.binaryProperties) return false;
if (generalCategory != other.generalCategory) return false;
if (combiningClass != other.combiningClass) return false;
@@ -104,6 +111,7 @@ class UData implements UCD_Types {
if (joiningGroup != other.joiningGroup) return false;
if (script != other.script) return false;
if (age != other.age) return false;
+
return true;
}
@@ -178,17 +186,17 @@ class UData implements UCD_Types {
static final byte ABBREVIATED = 0, FULL = 1;
public String toString() {
- return toString(FULL);
+ return toString(Default.ucd, FULL);
}
- public String toString(byte style) {
+ public String toString(UCD ucd, byte style) {
boolean full = style == FULL;
StringBuffer result = new StringBuffer();
String s = UTF32.valueOf32(codePoint);
- result.append("