mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
more updates
X-SVN-Rev: 11332
This commit is contained in:
parent
c31d7e59cd
commit
53394d58a2
11 changed files with 127 additions and 72 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
|
||||
* $Date: 2002/07/03 02:15:47 $
|
||||
* $Revision: 1.17 $
|
||||
* $Date: 2003/03/15 02:36:49 $
|
||||
* $Revision: 1.18 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -1109,7 +1109,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
|
|||
int itemInRange = startOfRange;
|
||||
int skip = 1;
|
||||
boolean doSamples = false;
|
||||
UnicodeSetIterator usi = new UnicodeSetIterator();
|
||||
AbbreviatedUnicodeSetIterator usi = new AbbreviatedUnicodeSetIterator();
|
||||
|
||||
/**
|
||||
* use FIXED_CE as the limit
|
||||
|
@ -1120,8 +1120,8 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
|
|||
this.nfkd = new Normalizer(Normalizer.NFKD, unicodeVersion);
|
||||
this.skipDecomps = skipDecomps;
|
||||
currentRange = 0;
|
||||
usi.reset(unspecified);
|
||||
usi.setAbbreviated(true);
|
||||
usi.reset(unspecified, true);
|
||||
//usi.setAbbreviated(true);
|
||||
|
||||
// FIX SAMPLES
|
||||
if (SAMPLE_RANGES[0][0] == 0) {
|
||||
|
@ -1204,8 +1204,8 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
|
|||
}
|
||||
}
|
||||
unspecified = temp;
|
||||
usi.reset(unspecified);
|
||||
usi.setAbbreviated(true);
|
||||
usi.reset(unspecified, true);
|
||||
//usi.setAbbreviated(true);
|
||||
if (DEBUG) System.out.println("Unspecified = " + unspecified.toPattern(true));
|
||||
haveUnspecified = true;
|
||||
}
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
# (where string lengths may grow). Note that where they can be supported, the
|
||||
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
|
||||
#
|
||||
# All code points not listed in this file map to themselves.
|
||||
#
|
||||
# NOTE: case folding does not preserve normalization formats!
|
||||
#
|
||||
# For information on case folding, see
|
||||
|
|
|
@ -16,6 +16,10 @@ public final class Default implements UCD_Types {
|
|||
public static Normalizer nfkd;
|
||||
public static Normalizer[] nf = new Normalizer[4];
|
||||
|
||||
public static void ensureUCD() {
|
||||
if (ucd == null) setUCD();
|
||||
}
|
||||
|
||||
public static void setUCD(String version) {
|
||||
ucdVersion = version;
|
||||
setUCD();
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2003/03/12 16:01:26 $
|
||||
* $Revision: 1.25 $
|
||||
* $Date: 2003/03/15 02:36:48 $
|
||||
* $Revision: 1.26 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -141,6 +141,39 @@ public class GenerateData implements UCD_Types {
|
|||
+ ".html";
|
||||
}
|
||||
|
||||
public static void checkDifferences (String targetVersion) throws IOException {
|
||||
System.out.println("Checking Differences");
|
||||
UCD target = UCD.make(targetVersion);
|
||||
|
||||
PrintWriter log1 = Utility.openPrintWriter("Log1.xml", Utility.LATIN1_UNIX);
|
||||
log1.println("<diff version='" + target.getVersion() + "'>");
|
||||
|
||||
PrintWriter log2 = Utility.openPrintWriter("Log2.xml", Utility.LATIN1_UNIX);
|
||||
log2.println("<diff version='" + Default.ucd.getVersion() + "'>");
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (!target.isAllocated(i)) continue;
|
||||
Utility.dot(i);
|
||||
UData t = target.get(i, true);
|
||||
UData current = Default.ucd.get(i, true);
|
||||
if (i == 0x5E) {
|
||||
System.out.println(target.getDecompositionTypeID(i)
|
||||
+ ", " + Utility.hex(target.getDecompositionMapping(i)));
|
||||
System.out.println(Default.ucd.getDecompositionTypeID(i)
|
||||
+ ", " + Utility.hex(Default.ucd.getDecompositionMapping(i)));
|
||||
}
|
||||
if (t.equals(current)) continue;
|
||||
|
||||
// print both for comparison
|
||||
log1.println(t.toString(target, UData.ABBREVIATED));
|
||||
log2.println(current.toString(Default.ucd, UData.ABBREVIATED));
|
||||
}
|
||||
log1.println("</diff>");
|
||||
log2.println("</diff>");
|
||||
log1.close();
|
||||
log2.close();
|
||||
}
|
||||
|
||||
public static void generateDerived (byte type, boolean checkTypeAndStandard, int headerChoice, String directory, String fileName) throws IOException {
|
||||
|
||||
Default.setUCD();
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||
* $Date: 2003/03/12 16:01:26 $
|
||||
* $Revision: 1.27 $
|
||||
* $Date: 2003/03/15 02:36:48 $
|
||||
* $Revision: 1.28 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -47,11 +47,31 @@ public final class Main implements UCD_Types {
|
|||
public static void main (String[] args) throws Exception {
|
||||
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
|
||||
long mask = 0;
|
||||
|
||||
String arg = args[i];
|
||||
if (arg.charAt(0) == '#') return; // skip rest of line
|
||||
|
||||
Utility.fixDot();
|
||||
System.out.println("Argument: " + args[i]);
|
||||
|
||||
// Expand string arguments
|
||||
|
||||
if (arg.equalsIgnoreCase("All")) {
|
||||
args = Utility.append(ALL_FILES, Utility.subarray(args, i+1));
|
||||
continue;
|
||||
}
|
||||
|
||||
// make sure the UCD is set up
|
||||
|
||||
if (arg.equalsIgnoreCase("version")) {
|
||||
Default.setUCD(args[++i]);
|
||||
continue;
|
||||
}
|
||||
Default.ensureUCD();
|
||||
|
||||
// Now handle other options
|
||||
|
||||
if (arg.equalsIgnoreCase("verify")) {
|
||||
VerifyUCD.verify();
|
||||
|
@ -60,7 +80,6 @@ public final class Main implements UCD_Types {
|
|||
VerifyUCD.checkAgainstUInfo();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.ucdVersion});
|
||||
else if (arg.equalsIgnoreCase("version")) Default.setUCD(args[++i]);
|
||||
else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics();
|
||||
else if (arg.equalsIgnoreCase("NFSkippable")) NFSkippable.main(null);
|
||||
else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
|
||||
|
@ -123,6 +142,7 @@ public final class Main implements UCD_Types {
|
|||
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
|
||||
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
|
||||
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.test();
|
||||
else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
|
||||
|
||||
//else if (arg.equalsIgnoreCase("NormalizationCharts")) ChartGenerator.writeNormalizationCharts();
|
||||
|
||||
|
@ -130,36 +150,9 @@ public final class Main implements UCD_Types {
|
|||
/*else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
|
||||
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
|
||||
*/
|
||||
else extras(new String[] {arg});
|
||||
}
|
||||
}
|
||||
|
||||
public static void extras (String[] args) throws Exception {
|
||||
//ubp = new UnifiedBinaryProperty(ucd);
|
||||
|
||||
boolean expanding = false;
|
||||
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
String arg = args[i];
|
||||
if (arg.charAt(0) == '#') return; // skip rest of line
|
||||
long mask = 0;
|
||||
|
||||
Utility.fixDot();
|
||||
if (expanding) System.out.println("Argument: " + args[i]);
|
||||
|
||||
if (arg.equalsIgnoreCase("All")) {
|
||||
// Append all args at end
|
||||
/*
|
||||
String[] temp = new String[args.length + ALL_FILES.length];
|
||||
System.arraycopy(args, 0, temp, 0, args.length);
|
||||
System.arraycopy(ALL_FILES, 0, temp, args.length, ALL_FILES.length);
|
||||
*/
|
||||
args = Utility.append(args, ALL_FILES);
|
||||
expanding = true;
|
||||
|
||||
// EXTRACTED PROPERTIES
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
|
||||
else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
|
||||
GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedBidiClass");
|
||||
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
# characters where they are 1-1, and does not have locale-specific mappings.)
|
||||
# For more information, see the discussion of Case Mappings in the Unicode Standard.
|
||||
#
|
||||
# All code points not listed in this file that do not have a simple case mappings
|
||||
# in UnicodeData.txt map to themselves.
|
||||
# ================================================================================
|
||||
# Format
|
||||
# ================================================================================
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2003/03/12 16:01:26 $
|
||||
* $Revision: 1.21 $
|
||||
* $Date: 2003/03/15 02:36:48 $
|
||||
* $Revision: 1.22 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -123,7 +123,7 @@ public final class UCD implements UCD_Types {
|
|||
* Return XML version of the data associated with the code point.
|
||||
*/
|
||||
public String toString(int codePoint) {
|
||||
return get(codePoint, true).toString(FULL);
|
||||
return get(codePoint, true).toString(this,FULL);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1389,6 +1389,7 @@ to guarantee identifier closure.
|
|||
size = uDataFileCount = dataIn.readInt();
|
||||
|
||||
boolean didJoiningHack = false;
|
||||
System.out.println("Loading UCD " + foundVersion);
|
||||
|
||||
|
||||
// records
|
||||
|
@ -1396,7 +1397,7 @@ to guarantee identifier closure.
|
|||
UData uData = new UData();
|
||||
uData.readBytes(dataIn);
|
||||
|
||||
if (uData.codePoint == 0x0221) {
|
||||
if (uData.codePoint == 0x5E) {
|
||||
System.out.println("SPOT-CHECK: " + uData);
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
|
||||
* $Date: 2003/03/12 16:01:26 $
|
||||
* $Revision: 1.17 $
|
||||
* $Date: 2003/03/15 02:36:48 $
|
||||
* $Revision: 1.18 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -51,7 +51,9 @@ final class UCD_Names implements UCD_Types {
|
|||
+ "#\tAll code points not listed here have the type U",
|
||||
"Joining Group (listing ArabicShaping.txt, field 2)",
|
||||
"BidiMirrored (listing UnicodeData.txt, field 9: see UnicodeData.html)",
|
||||
"Script",
|
||||
"Script\r\n"
|
||||
+ "#\tThe value for all code points not explicitly listed in this file is COMMON."
|
||||
,
|
||||
"Age (from a comparison of UCD versions 1.1 [minus Hangul], 2.0, 2.1, 3.0, 3.1)",
|
||||
"Hangul Syllable Type\r\n# All codepoints not explicitly listed here have the value NA",
|
||||
"Derived"
|
||||
|
@ -219,11 +221,11 @@ final class UCD_Names implements UCD_Types {
|
|||
"IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY",
|
||||
"CM", "BB", "BA", "SP", "BK", "CR", "LF", "CB",
|
||||
"SA", "AI", "B2", "SG", "ZW",
|
||||
"JL",
|
||||
"JV",
|
||||
"JT",
|
||||
"NL",
|
||||
"WJ",
|
||||
//"JL",
|
||||
//"JV",
|
||||
//"JT",
|
||||
|
||||
};
|
||||
|
||||
|
@ -235,11 +237,11 @@ final class UCD_Names implements UCD_Types {
|
|||
"CombiningMark", "BreakBefore", "BreakAfter", "Space",
|
||||
"MandatoryBreak", "CarriageReturn", "LineFeed", "ContingentBreak",
|
||||
"ComplexContext", "Ambiguous", "BreakBoth", "Surrogate", "ZWSpace",
|
||||
"Leading_Jamo",
|
||||
"Vowel_Jamo",
|
||||
"Trailing_Jamo",
|
||||
"Next_Line",
|
||||
"Word_Joiner"
|
||||
//"Leading_Jamo",
|
||||
//"Vowel_Jamo",
|
||||
//"Trailing_Jamo",
|
||||
};
|
||||
|
||||
public static final String[] SCRIPT = {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
|
||||
* $Date: 2003/03/12 16:01:26 $
|
||||
* $Revision: 1.18 $
|
||||
* $Date: 2003/03/15 02:36:48 $
|
||||
* $Revision: 1.19 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -15,7 +15,7 @@ package com.ibm.text.UCD;
|
|||
|
||||
public interface UCD_Types {
|
||||
|
||||
public static final int dVersion = 15; // change to fix the generated file D version. If less than zero, no "d"
|
||||
public static final int dVersion = 18; // change to fix the generated file D version. If less than zero, no "d"
|
||||
|
||||
public static final String BASE_DIR = "C:\\DATA\\";
|
||||
public static final String UCD_DIR = BASE_DIR + "UCD\\";
|
||||
|
@ -34,7 +34,7 @@ public interface UCD_Types {
|
|||
CJK_B_BASE = 0x20000,
|
||||
CJK_B_LIMIT = 0x2A6DF+1;
|
||||
|
||||
static final byte BINARY_FORMAT = 7; // bumped if binary format of UCD changes
|
||||
static final byte BINARY_FORMAT = 8; // bumped if binary format of UCD changes
|
||||
|
||||
// Unicode Property Types
|
||||
static final byte
|
||||
|
@ -240,12 +240,12 @@ public interface UCD_Types {
|
|||
LB_IS = 8, LB_PR = 9, LB_PO = 10, LB_NU = 11, LB_AL = 12, LB_ID = 13, LB_IN = 14, LB_HY = 15,
|
||||
LB_CM = 16, LB_BB = 17, LB_BA = 18, LB_SP = 19, LB_BK = 20, LB_CR = 21, LB_LF = 22, LB_CB = 23,
|
||||
LB_SA = 24, LB_AI = 25, LB_B2 = 26, LB_SG = 27, LB_ZW = 28,
|
||||
LB_JL = 29,
|
||||
LB_JV = 30,
|
||||
LB_JT = 31,
|
||||
LB_NL = 32,
|
||||
LB_WJ = 33,
|
||||
LIMIT_LINE_BREAK = 34,
|
||||
LB_NL = 29,
|
||||
LB_WJ = 30,
|
||||
//LB_JL = 29,
|
||||
//LB_JV = 30,
|
||||
//LB_JT = 31,
|
||||
LIMIT_LINE_BREAK = 31,
|
||||
LB_LIMIT = LIMIT_LINE_BREAK;
|
||||
|
||||
// east asian width
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UData.java,v $
|
||||
* $Date: 2003/03/12 16:01:26 $
|
||||
* $Revision: 1.7 $
|
||||
* $Date: 2003/03/15 02:36:48 $
|
||||
* $Revision: 1.8 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -78,6 +78,9 @@ class UData implements UCD_Types {
|
|||
|
||||
public boolean equals(Object that) {
|
||||
UData other = (UData) that;
|
||||
|
||||
// use equals for objects
|
||||
|
||||
if (!name.equals(other.name)) return false;
|
||||
if (!decompositionMapping.equals(other.decompositionMapping)) return false;
|
||||
if (!simpleUppercase.equals(other.simpleUppercase)) return false;
|
||||
|
@ -90,8 +93,12 @@ class UData implements UCD_Types {
|
|||
if (!fullCaseFolding.equals(other.fullCaseFolding)) return false;
|
||||
if (!specialCasing.equals(other.specialCasing)) return false;
|
||||
if (!bidiMirror.equals(other.bidiMirror)) return false;
|
||||
|
||||
// == for primitives
|
||||
// Warning: doubles have to use special comparison, because of NaN
|
||||
|
||||
if (codePoint != other.codePoint) return false;
|
||||
if (numericValue != other.numericValue) return false;
|
||||
if (numericValue < other.numericValue || numericValue > other.numericValue) return false;
|
||||
if (binaryProperties != other.binaryProperties) return false;
|
||||
if (generalCategory != other.generalCategory) return false;
|
||||
if (combiningClass != other.combiningClass) return false;
|
||||
|
@ -104,6 +111,7 @@ class UData implements UCD_Types {
|
|||
if (joiningGroup != other.joiningGroup) return false;
|
||||
if (script != other.script) return false;
|
||||
if (age != other.age) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -178,17 +186,17 @@ class UData implements UCD_Types {
|
|||
static final byte ABBREVIATED = 0, FULL = 1;
|
||||
|
||||
public String toString() {
|
||||
return toString(FULL);
|
||||
return toString(Default.ucd, FULL);
|
||||
}
|
||||
|
||||
public String toString(byte style) {
|
||||
public String toString(UCD ucd, byte style) {
|
||||
boolean full = style == FULL;
|
||||
StringBuffer result = new StringBuffer();
|
||||
String s = UTF32.valueOf32(codePoint);
|
||||
|
||||
result.append("<e c='").append(Utility.quoteXML(codePoint)).append('\'');
|
||||
result.append("<e cp='").append(Utility.quoteXML(codePoint)).append('\'');
|
||||
result.append(" hx='").append(Utility.hex(codePoint)).append('\'');
|
||||
if (full || script != COMMON_SCRIPT) result.append(" sn='").append(UCD_Names.SCRIPT[script]).append('\'');
|
||||
if (full || script != COMMON_SCRIPT) result.append(" sn='").append(ucd.getScriptID_fromIndex(script,SHORT)).append('\'');
|
||||
result.append(" n='").append(Utility.quoteXML(name)).append("'\r\n");
|
||||
|
||||
int lastPos = result.length();
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2003/03/12 16:01:26 $
|
||||
* $Revision: 1.28 $
|
||||
* $Date: 2003/03/15 02:36:47 $
|
||||
* $Revision: 1.29 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -36,6 +36,16 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
return temp;
|
||||
}
|
||||
|
||||
public static String[] subarray(String[] array1, int start, int limit) {
|
||||
String[] temp = new String[limit - start];
|
||||
System.arraycopy(array1, start, temp, 0, limit - start);
|
||||
return temp;
|
||||
}
|
||||
|
||||
public static String[] subarray(String[] array1, int start) {
|
||||
return subarray(array1, start, array1.length);
|
||||
}
|
||||
|
||||
public static String getName(int i, String[] names) {
|
||||
try {
|
||||
return names[i];
|
||||
|
|
Loading…
Add table
Reference in a new issue