more updates

X-SVN-Rev: 11332
This commit is contained in:
Mark Davis 2003-03-15 02:36:49 +00:00
parent c31d7e59cd
commit 53394d58a2
11 changed files with 127 additions and 72 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
* $Date: 2002/07/03 02:15:47 $
* $Revision: 1.17 $
* $Date: 2003/03/15 02:36:49 $
* $Revision: 1.18 $
*
*******************************************************************************
*/
@ -1109,7 +1109,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
int itemInRange = startOfRange;
int skip = 1;
boolean doSamples = false;
UnicodeSetIterator usi = new UnicodeSetIterator();
AbbreviatedUnicodeSetIterator usi = new AbbreviatedUnicodeSetIterator();
/**
* use FIXED_CE as the limit
@ -1120,8 +1120,8 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
this.nfkd = new Normalizer(Normalizer.NFKD, unicodeVersion);
this.skipDecomps = skipDecomps;
currentRange = 0;
usi.reset(unspecified);
usi.setAbbreviated(true);
usi.reset(unspecified, true);
//usi.setAbbreviated(true);
// FIX SAMPLES
if (SAMPLE_RANGES[0][0] == 0) {
@ -1204,8 +1204,8 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
}
}
unspecified = temp;
usi.reset(unspecified);
usi.setAbbreviated(true);
usi.reset(unspecified, true);
//usi.setAbbreviated(true);
if (DEBUG) System.out.println("Unspecified = " + unspecified.toPattern(true));
haveUnspecified = true;
}

View file

@ -11,6 +11,8 @@
# (where string lengths may grow). Note that where they can be supported, the
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
#
# All code points not listed in this file map to themselves.
#
# NOTE: case folding does not preserve normalization formats!
#
# For information on case folding, see

View file

@ -16,6 +16,10 @@ public final class Default implements UCD_Types {
public static Normalizer nfkd;
public static Normalizer[] nf = new Normalizer[4];
public static void ensureUCD() {
if (ucd == null) setUCD();
}
public static void setUCD(String version) {
ucdVersion = version;
setUCD();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
* $Date: 2003/03/12 16:01:26 $
* $Revision: 1.25 $
* $Date: 2003/03/15 02:36:48 $
* $Revision: 1.26 $
*
*******************************************************************************
*/
@ -141,6 +141,39 @@ public class GenerateData implements UCD_Types {
+ ".html";
}
public static void checkDifferences (String targetVersion) throws IOException {
System.out.println("Checking Differences");
UCD target = UCD.make(targetVersion);
PrintWriter log1 = Utility.openPrintWriter("Log1.xml", Utility.LATIN1_UNIX);
log1.println("<diff version='" + target.getVersion() + "'>");
PrintWriter log2 = Utility.openPrintWriter("Log2.xml", Utility.LATIN1_UNIX);
log2.println("<diff version='" + Default.ucd.getVersion() + "'>");
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!target.isAllocated(i)) continue;
Utility.dot(i);
UData t = target.get(i, true);
UData current = Default.ucd.get(i, true);
if (i == 0x5E) {
System.out.println(target.getDecompositionTypeID(i)
+ ", " + Utility.hex(target.getDecompositionMapping(i)));
System.out.println(Default.ucd.getDecompositionTypeID(i)
+ ", " + Utility.hex(Default.ucd.getDecompositionMapping(i)));
}
if (t.equals(current)) continue;
// print both for comparison
log1.println(t.toString(target, UData.ABBREVIATED));
log2.println(current.toString(Default.ucd, UData.ABBREVIATED));
}
log1.println("</diff>");
log2.println("</diff>");
log1.close();
log2.close();
}
public static void generateDerived (byte type, boolean checkTypeAndStandard, int headerChoice, String directory, String fileName) throws IOException {
Default.setUCD();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
* $Date: 2003/03/12 16:01:26 $
* $Revision: 1.27 $
* $Date: 2003/03/15 02:36:48 $
* $Revision: 1.28 $
*
*******************************************************************************
*/
@ -47,11 +47,31 @@ public final class Main implements UCD_Types {
public static void main (String[] args) throws Exception {
for (int i = 0; i < args.length; ++i) {
long mask = 0;
String arg = args[i];
if (arg.charAt(0) == '#') return; // skip rest of line
Utility.fixDot();
System.out.println("Argument: " + args[i]);
// Expand string arguments
if (arg.equalsIgnoreCase("All")) {
args = Utility.append(ALL_FILES, Utility.subarray(args, i+1));
continue;
}
// make sure the UCD is set up
if (arg.equalsIgnoreCase("version")) {
Default.setUCD(args[++i]);
continue;
}
Default.ensureUCD();
// Now handle other options
if (arg.equalsIgnoreCase("verify")) {
VerifyUCD.verify();
@ -60,7 +80,6 @@ public final class Main implements UCD_Types {
VerifyUCD.checkAgainstUInfo();
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.ucdVersion});
else if (arg.equalsIgnoreCase("version")) Default.setUCD(args[++i]);
else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics();
else if (arg.equalsIgnoreCase("NFSkippable")) NFSkippable.main(null);
else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
@ -123,6 +142,7 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.test();
else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
//else if (arg.equalsIgnoreCase("NormalizationCharts")) ChartGenerator.writeNormalizationCharts();
@ -130,36 +150,9 @@ public final class Main implements UCD_Types {
/*else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
*/
else extras(new String[] {arg});
}
}
public static void extras (String[] args) throws Exception {
//ubp = new UnifiedBinaryProperty(ucd);
boolean expanding = false;
for (int i = 0; i < args.length; ++i) {
String arg = args[i];
if (arg.charAt(0) == '#') return; // skip rest of line
long mask = 0;
Utility.fixDot();
if (expanding) System.out.println("Argument: " + args[i]);
if (arg.equalsIgnoreCase("All")) {
// Append all args at end
/*
String[] temp = new String[args.length + ALL_FILES.length];
System.arraycopy(args, 0, temp, 0, args.length);
System.arraycopy(ALL_FILES, 0, temp, args.length, ALL_FILES.length);
*/
args = Utility.append(args, ALL_FILES);
expanding = true;
// EXTRACTED PROPERTIES
} else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedBidiClass");

View file

@ -6,6 +6,8 @@
# characters where they are 1-1, and does not have locale-specific mappings.)
# For more information, see the discussion of Case Mappings in the Unicode Standard.
#
# All code points not listed in this file that do not have a simple case mappings
# in UnicodeData.txt map to themselves.
# ================================================================================
# Format
# ================================================================================

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2003/03/12 16:01:26 $
* $Revision: 1.21 $
* $Date: 2003/03/15 02:36:48 $
* $Revision: 1.22 $
*
*******************************************************************************
*/
@ -123,7 +123,7 @@ public final class UCD implements UCD_Types {
* Return XML version of the data associated with the code point.
*/
public String toString(int codePoint) {
return get(codePoint, true).toString(FULL);
return get(codePoint, true).toString(this,FULL);
}
/**
@ -1389,6 +1389,7 @@ to guarantee identifier closure.
size = uDataFileCount = dataIn.readInt();
boolean didJoiningHack = false;
System.out.println("Loading UCD " + foundVersion);
// records
@ -1396,7 +1397,7 @@ to guarantee identifier closure.
UData uData = new UData();
uData.readBytes(dataIn);
if (uData.codePoint == 0x0221) {
if (uData.codePoint == 0x5E) {
System.out.println("SPOT-CHECK: " + uData);
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
* $Date: 2003/03/12 16:01:26 $
* $Revision: 1.17 $
* $Date: 2003/03/15 02:36:48 $
* $Revision: 1.18 $
*
*******************************************************************************
*/
@ -51,7 +51,9 @@ final class UCD_Names implements UCD_Types {
+ "#\tAll code points not listed here have the type U",
"Joining Group (listing ArabicShaping.txt, field 2)",
"BidiMirrored (listing UnicodeData.txt, field 9: see UnicodeData.html)",
"Script",
"Script\r\n"
+ "#\tThe value for all code points not explicitly listed in this file is COMMON."
,
"Age (from a comparison of UCD versions 1.1 [minus Hangul], 2.0, 2.1, 3.0, 3.1)",
"Hangul Syllable Type\r\n# All codepoints not explicitly listed here have the value NA",
"Derived"
@ -219,11 +221,11 @@ final class UCD_Names implements UCD_Types {
"IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY",
"CM", "BB", "BA", "SP", "BK", "CR", "LF", "CB",
"SA", "AI", "B2", "SG", "ZW",
"JL",
"JV",
"JT",
"NL",
"WJ",
//"JL",
//"JV",
//"JT",
};
@ -235,11 +237,11 @@ final class UCD_Names implements UCD_Types {
"CombiningMark", "BreakBefore", "BreakAfter", "Space",
"MandatoryBreak", "CarriageReturn", "LineFeed", "ContingentBreak",
"ComplexContext", "Ambiguous", "BreakBoth", "Surrogate", "ZWSpace",
"Leading_Jamo",
"Vowel_Jamo",
"Trailing_Jamo",
"Next_Line",
"Word_Joiner"
//"Leading_Jamo",
//"Vowel_Jamo",
//"Trailing_Jamo",
};
public static final String[] SCRIPT = {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
* $Date: 2003/03/12 16:01:26 $
* $Revision: 1.18 $
* $Date: 2003/03/15 02:36:48 $
* $Revision: 1.19 $
*
*******************************************************************************
*/
@ -15,7 +15,7 @@ package com.ibm.text.UCD;
public interface UCD_Types {
public static final int dVersion = 15; // change to fix the generated file D version. If less than zero, no "d"
public static final int dVersion = 18; // change to fix the generated file D version. If less than zero, no "d"
public static final String BASE_DIR = "C:\\DATA\\";
public static final String UCD_DIR = BASE_DIR + "UCD\\";
@ -34,7 +34,7 @@ public interface UCD_Types {
CJK_B_BASE = 0x20000,
CJK_B_LIMIT = 0x2A6DF+1;
static final byte BINARY_FORMAT = 7; // bumped if binary format of UCD changes
static final byte BINARY_FORMAT = 8; // bumped if binary format of UCD changes
// Unicode Property Types
static final byte
@ -240,12 +240,12 @@ public interface UCD_Types {
LB_IS = 8, LB_PR = 9, LB_PO = 10, LB_NU = 11, LB_AL = 12, LB_ID = 13, LB_IN = 14, LB_HY = 15,
LB_CM = 16, LB_BB = 17, LB_BA = 18, LB_SP = 19, LB_BK = 20, LB_CR = 21, LB_LF = 22, LB_CB = 23,
LB_SA = 24, LB_AI = 25, LB_B2 = 26, LB_SG = 27, LB_ZW = 28,
LB_JL = 29,
LB_JV = 30,
LB_JT = 31,
LB_NL = 32,
LB_WJ = 33,
LIMIT_LINE_BREAK = 34,
LB_NL = 29,
LB_WJ = 30,
//LB_JL = 29,
//LB_JV = 30,
//LB_JT = 31,
LIMIT_LINE_BREAK = 31,
LB_LIMIT = LIMIT_LINE_BREAK;
// east asian width

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UData.java,v $
* $Date: 2003/03/12 16:01:26 $
* $Revision: 1.7 $
* $Date: 2003/03/15 02:36:48 $
* $Revision: 1.8 $
*
*******************************************************************************
*/
@ -78,6 +78,9 @@ class UData implements UCD_Types {
public boolean equals(Object that) {
UData other = (UData) that;
// use equals for objects
if (!name.equals(other.name)) return false;
if (!decompositionMapping.equals(other.decompositionMapping)) return false;
if (!simpleUppercase.equals(other.simpleUppercase)) return false;
@ -90,8 +93,12 @@ class UData implements UCD_Types {
if (!fullCaseFolding.equals(other.fullCaseFolding)) return false;
if (!specialCasing.equals(other.specialCasing)) return false;
if (!bidiMirror.equals(other.bidiMirror)) return false;
// == for primitives
// Warning: doubles have to use special comparison, because of NaN
if (codePoint != other.codePoint) return false;
if (numericValue != other.numericValue) return false;
if (numericValue < other.numericValue || numericValue > other.numericValue) return false;
if (binaryProperties != other.binaryProperties) return false;
if (generalCategory != other.generalCategory) return false;
if (combiningClass != other.combiningClass) return false;
@ -104,6 +111,7 @@ class UData implements UCD_Types {
if (joiningGroup != other.joiningGroup) return false;
if (script != other.script) return false;
if (age != other.age) return false;
return true;
}
@ -178,17 +186,17 @@ class UData implements UCD_Types {
static final byte ABBREVIATED = 0, FULL = 1;
public String toString() {
return toString(FULL);
return toString(Default.ucd, FULL);
}
public String toString(byte style) {
public String toString(UCD ucd, byte style) {
boolean full = style == FULL;
StringBuffer result = new StringBuffer();
String s = UTF32.valueOf32(codePoint);
result.append("<e c='").append(Utility.quoteXML(codePoint)).append('\'');
result.append("<e cp='").append(Utility.quoteXML(codePoint)).append('\'');
result.append(" hx='").append(Utility.hex(codePoint)).append('\'');
if (full || script != COMMON_SCRIPT) result.append(" sn='").append(UCD_Names.SCRIPT[script]).append('\'');
if (full || script != COMMON_SCRIPT) result.append(" sn='").append(ucd.getScriptID_fromIndex(script,SHORT)).append('\'');
result.append(" n='").append(Utility.quoteXML(name)).append("'\r\n");
int lastPos = result.length();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2003/03/12 16:01:26 $
* $Revision: 1.28 $
* $Date: 2003/03/15 02:36:47 $
* $Revision: 1.29 $
*
*******************************************************************************
*/
@ -36,6 +36,16 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
return temp;
}
public static String[] subarray(String[] array1, int start, int limit) {
String[] temp = new String[limit - start];
System.arraycopy(array1, start, temp, 0, limit - start);
return temp;
}
public static String[] subarray(String[] array1, int start) {
return subarray(array1, start, array1.length);
}
public static String getName(int i, String[] names) {
try {
return names[i];