mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
changed parameterization, fixed gaps, produced raw version, everything now in one file.
X-SVN-Rev: 14324
This commit is contained in:
parent
fcf9869ec8
commit
39e1d2518d
3 changed files with 246 additions and 161 deletions
|
@ -1,5 +1,6 @@
|
|||
package com.ibm.text.UCA;
|
||||
|
||||
import com.ibm.text.UCD.UCD_Types;
|
||||
import com.ibm.text.utility.Utility;
|
||||
|
||||
/**
|
||||
|
@ -16,7 +17,7 @@ import com.ibm.text.utility.Utility;
|
|||
# Last CJK_A: E0DE3100
|
||||
|
||||
*/
|
||||
public class Implicit {
|
||||
public class Implicit implements UCD_Types {
|
||||
|
||||
/**
|
||||
* constants
|
||||
|
@ -27,7 +28,7 @@ public class Implicit {
|
|||
static final long bottomByte = 0xFFL;
|
||||
static final long fourBytes = 0xFFFFFFFFL;
|
||||
|
||||
static final int MAX_INPUT = 0x21FFFF;
|
||||
static final int MAX_INPUT = 0x220000; // 2 * Unicode range + 1
|
||||
|
||||
/**
|
||||
* Testing function
|
||||
|
@ -37,6 +38,10 @@ public class Implicit {
|
|||
System.out.println("Start");
|
||||
try {
|
||||
Implicit foo = new Implicit(0xE0, 0xE4);
|
||||
|
||||
//int x = foo.getRawImplicit(0xF810);
|
||||
foo.getFromRawImplicit(0xE20303E7);
|
||||
|
||||
int gap4 = foo.getGap4();
|
||||
int gap3 = foo.getGap3();
|
||||
int minTrail = foo.getMinTrail();
|
||||
|
@ -44,13 +49,30 @@ public class Implicit {
|
|||
long last = 0;
|
||||
long current;
|
||||
for (int i = 0; i <= MAX_INPUT; ++i) {
|
||||
current = foo.getImplicit(i) & fourBytes;
|
||||
current = foo.getRawImplicit(i) & fourBytes;
|
||||
|
||||
// check that it round-trips AND that all intervening ones are illegal
|
||||
int roundtrip = foo.getFromRawImplicit((int)current);
|
||||
if (roundtrip != i) {
|
||||
foo.throwError("No roundtrip", i);
|
||||
}
|
||||
if (last != 0) {
|
||||
for (long j = last + 1; j < current; ++j) {
|
||||
roundtrip = foo.getFromRawImplicit((int)j);
|
||||
// raise an error if it *doesn't* find an error
|
||||
if (roundtrip != -1) {
|
||||
foo.throwError("Fails to recognize illegal", j);
|
||||
}
|
||||
}
|
||||
}
|
||||
// now do other consistency checks
|
||||
long lastBottom = last & bottomByte;
|
||||
long currentBottom = current & bottomByte;
|
||||
long lastTop = last & topByte;
|
||||
long currentTop = current & topByte;
|
||||
|
||||
// do some consistency checks
|
||||
/*
|
||||
long gap = current - last;
|
||||
if (currentBottom != 0) { // if we are a 4-byte
|
||||
// gap has to be at least gap4
|
||||
|
@ -65,6 +87,7 @@ public class Implicit {
|
|||
if (current3Bottom < minTrail + gap3) foo.throwError("Failed gap3 before", i);
|
||||
if (current3Bottom > maxTrail - gap3) foo.throwError("Failed gap3 after", i);
|
||||
}
|
||||
*/
|
||||
// print out some values for spot-checking
|
||||
if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
|
||||
foo.show(i-3);
|
||||
|
@ -94,13 +117,17 @@ public class Implicit {
|
|||
}
|
||||
}
|
||||
|
||||
private void throwError(String title, int i) {
|
||||
throw new IllegalArgumentException(title + "\t" + Utility.hex(i) + "\t" + Utility.hex(getImplicit(i) & fourBytes));
|
||||
private void throwError(String title, int cp) {
|
||||
throw new IllegalArgumentException(title + "\t" + Utility.hex(cp) + "\t" + Utility.hex(getRawImplicit(cp) & fourBytes));
|
||||
}
|
||||
|
||||
private void throwError(String title, long ce) {
|
||||
throw new IllegalArgumentException(title + "\t" + Utility.hex(ce & fourBytes));
|
||||
}
|
||||
|
||||
private void show(int i) {
|
||||
if (i >= 0 && i <= MAX_INPUT) {
|
||||
System.out.println(Utility.hex(i) + "\t" + Utility.hex(getImplicit(i) & fourBytes));
|
||||
System.out.println(Utility.hex(i) + "\t" + Utility.hex(getRawImplicit(i) & fourBytes));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,6 +144,8 @@ public class Implicit {
|
|||
int max4Primary;
|
||||
int minTrail;
|
||||
int maxTrail;
|
||||
int max3Trail;
|
||||
int max4Trail;
|
||||
int min4Boundary;
|
||||
|
||||
public int getGap4() {
|
||||
|
@ -140,7 +169,7 @@ public class Implicit {
|
|||
*/
|
||||
public Implicit(int minPrimary, int maxPrimary) {
|
||||
// 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
|
||||
this(minPrimary, maxPrimary, 0x04, 0xFE, 1, 15);
|
||||
this(minPrimary, maxPrimary, 0x03, 0xFE, 1, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -152,54 +181,54 @@ public class Implicit {
|
|||
* @param gap3 the gap we leave for tailoring for 3-byte forms
|
||||
* @param gap4 the gap we leave for tailoring for 4-byte forms
|
||||
*/
|
||||
public Implicit(int minPrimary, int maxPrimary, int minTrail, int maxTrail, int gap3, int gap4) {
|
||||
public Implicit(int minPrimary, int maxPrimary, int minTrail, int maxTrail, int gap3, int primaries3count) {
|
||||
// some simple parameter checks
|
||||
if (minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) throw new IllegalArgumentException("bad lead bytes");
|
||||
if (minTrail < 0 || minTrail >= maxTrail || maxTrail > 0xFF) throw new IllegalArgumentException("bad trail bytes");
|
||||
if (gap3 < 1 || gap4 < 1) throw new IllegalArgumentException("must have larger gaps");
|
||||
if (primaries3count < 1) throw new IllegalArgumentException("bad gap");
|
||||
|
||||
this.minTrail = minTrail;
|
||||
this.maxTrail = maxTrail;
|
||||
|
||||
final3Multiplier = gap3 + 1;
|
||||
final4Multiplier = gap4 + 1;
|
||||
min3Primary = minPrimary;
|
||||
max4Primary = maxPrimary;
|
||||
// compute constants for use later.
|
||||
// number of values we can use in trailing bytes
|
||||
// leave room for empty values below, between, AND above, so
|
||||
// gap = 2:
|
||||
// range 3..7 => (3,4) 5 (6,7): so 1 value
|
||||
// range 3..8 => (3,4) 5 (6,7,8): so 1 value
|
||||
// range 3..9 => (3,4) 5 (6,7,8,9): so 1 value
|
||||
// range 3..10 => (3,4) 5 (6,7) 8 (9, 10): so 2 values
|
||||
final3Count = 1 + (maxTrail - minTrail - 1) / final3Multiplier;
|
||||
final4Count = 1 + (maxTrail - minTrail - 1) / final4Multiplier;
|
||||
// leave room for empty values between AND above, e.g. if gap = 2
|
||||
// range 3..7 => +3 -4 -5 -6 -7: so 1 value
|
||||
// range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
|
||||
// range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
|
||||
final3Multiplier = gap3 + 1;
|
||||
final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
|
||||
max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
|
||||
|
||||
// medials can use full range
|
||||
medialCount = (maxTrail - minTrail + 1);
|
||||
// find out how many values fit in each form
|
||||
int fourByteCount = medialCount * medialCount * final4Count;
|
||||
int threeByteCount = medialCount * final3Count;
|
||||
// now determine where the 3/4 boundary is.
|
||||
// we use 3 bytes below the boundary, and 4 above
|
||||
int primariesAvailable = maxPrimary - minPrimary + 1;
|
||||
int min4BytesNeeded = divideAndRoundUp(MAX_INPUT, fourByteCount);
|
||||
int min3BytesNeeded = primariesAvailable - min4BytesNeeded;
|
||||
if (min3BytesNeeded < 1) throw new IllegalArgumentException("Too few 3-byte implicits available.");
|
||||
int min3ByteCoverage = min3BytesNeeded * threeByteCount;
|
||||
min4Primary = minPrimary + min3BytesNeeded;
|
||||
int primaries4count = primariesAvailable - primaries3count;
|
||||
//int min3BytesNeeded = primariesAvailable - min4BytesNeeded;
|
||||
|
||||
|
||||
int min3ByteCoverage = primaries3count * threeByteCount;
|
||||
min4Primary = minPrimary + primaries3count;
|
||||
min4Boundary = min3ByteCoverage;
|
||||
// Now expand out the multiplier for the 4 bytes, and redo.
|
||||
|
||||
int totalNeeded = MAX_INPUT - min4Boundary;
|
||||
int neededPerPrimaryByte = divideAndRoundUp(totalNeeded, min4BytesNeeded);
|
||||
int neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
|
||||
if (DEBUG) System.out.println("neededPerPrimaryByte: " + neededPerPrimaryByte);
|
||||
int neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
|
||||
if (DEBUG) System.out.println("neededPerFinalByte: " + neededPerFinalByte);
|
||||
int expandedGap = (maxTrail - minTrail - 1) / (neededPerFinalByte + 1) - 1;
|
||||
if (DEBUG) System.out.println("expandedGap: " + expandedGap);
|
||||
if (expandedGap < gap4) throw new IllegalArgumentException("must have larger gaps");
|
||||
final4Multiplier = expandedGap + 1;
|
||||
int gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
|
||||
if (DEBUG) System.out.println("expandedGap: " + gap4);
|
||||
if (gap4 < 1) throw new IllegalArgumentException("must have larger gap4s");
|
||||
final4Multiplier = gap4 + 1;
|
||||
final4Count = neededPerFinalByte;
|
||||
max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
|
||||
if (DEBUG) {
|
||||
System.out.println("final4Count: " + final4Count);
|
||||
for (int counter = 0; counter <= final4Count; ++counter) {
|
||||
|
@ -212,13 +241,58 @@ public class Implicit {
|
|||
static public int divideAndRoundUp(int a, int b) {
|
||||
return 1 + (a-1)/b;
|
||||
}
|
||||
/**
|
||||
* Converts implicit CE into raw integer ("code point")
|
||||
* @param implicit
|
||||
* @return -1 if illegal format
|
||||
*/
|
||||
public int getFromRawImplicit(int implicit) {
|
||||
int result;
|
||||
int b3 = implicit & 0xFF;
|
||||
implicit >>= 8;
|
||||
int b2 = implicit & 0xFF;
|
||||
implicit >>= 8;
|
||||
int b1 = implicit & 0xFF;
|
||||
implicit >>= 8;
|
||||
int b0 = implicit & 0xFF;
|
||||
|
||||
// simple parameter checks
|
||||
if (b0 < min3Primary || b0 > max4Primary
|
||||
|| b1 < minTrail || b1 > maxTrail) return -1;
|
||||
// normal offsets
|
||||
b1 -= minTrail;
|
||||
|
||||
// take care of the final values, and compose
|
||||
if (b0 < min4Primary) {
|
||||
if (b2 < minTrail || b2 > max3Trail || b3 != 0) return -1;
|
||||
b2 -= minTrail;
|
||||
int remainder = b2 % final3Multiplier;
|
||||
if (remainder != 0) return -1;
|
||||
b0 -= min3Primary;
|
||||
b2 /= final3Multiplier;
|
||||
result = ((b0 * medialCount) + b1) * final3Count + b2;
|
||||
} else {
|
||||
if (b2 < minTrail || b2 > maxTrail
|
||||
|| b3 < minTrail || b3 > max4Trail) return -1;
|
||||
b2 -= minTrail;
|
||||
b3 -= minTrail;
|
||||
int remainder = b3 % final4Multiplier;
|
||||
if (remainder != 0) return -1;
|
||||
b3 /= final4Multiplier;
|
||||
b0 -= min4Primary;
|
||||
result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
|
||||
}
|
||||
// final check
|
||||
if (result < 0 || result > MAX_INPUT) return -1;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the implicit CE, left shifted to put the first byte at the top of an int.
|
||||
* @param cp code point
|
||||
* @return
|
||||
*/
|
||||
public int getImplicit(int cp) {
|
||||
public int getRawImplicit(int cp) {
|
||||
if (cp < 0 || cp > MAX_INPUT) {
|
||||
throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp));
|
||||
}
|
||||
|
@ -230,7 +304,7 @@ public class Implicit {
|
|||
int last2 = last1 / medialCount;
|
||||
last1 %= medialCount;
|
||||
|
||||
last0 = minTrail + (last0 + 1)*final3Multiplier - 1; // spread out, leaving gap at start
|
||||
last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start
|
||||
last1 = minTrail + last1; // offset
|
||||
last2 = min3Primary + last2; // offset
|
||||
|
||||
|
@ -249,7 +323,7 @@ public class Implicit {
|
|||
int last3 = last2 / medialCount;
|
||||
last2 %= medialCount;
|
||||
|
||||
last0 = minTrail + (last0 + 1)*final4Multiplier - 1; // spread out, leaving gap at start
|
||||
last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start
|
||||
last1 = minTrail + last1; // offset
|
||||
last2 = minTrail + last2; // offset
|
||||
last3 = min4Primary + last3; // offset
|
||||
|
@ -261,6 +335,71 @@ public class Implicit {
|
|||
return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
|
||||
}
|
||||
}
|
||||
|
||||
public int getSwappedImplicit(int cp) {
|
||||
if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
|
||||
|
||||
cp = Implicit.swapCJK(cp);
|
||||
// we now have a range of numbers from 0 to 21FFFF.
|
||||
|
||||
if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
|
||||
|
||||
return getRawImplicit(cp);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Function used to:
|
||||
* a) collapse the 2 different Han ranges from UCA into one (in the right order), and
|
||||
* b) bump any non-CJK characters by 10FFFF.
|
||||
* The relevant blocks are:
|
||||
* A: 4E00..9FFF; CJK Unified Ideographs
|
||||
* F900..FAFF; CJK Compatibility Ideographs
|
||||
* B: 3400..4DBF; CJK Unified Ideographs Extension A
|
||||
* 20000..XX; CJK Unified Ideographs Extension B (and others later on)
|
||||
* As long as
|
||||
* no new B characters are allocated between 4E00 and FAFF, and
|
||||
* no new A characters are outside of this range,
|
||||
* (very high probability) this simple code will work.
|
||||
* The reordered blocks are:
|
||||
* Block1 is CJK
|
||||
* Block2 is CJK_COMPAT_USED
|
||||
* Block3 is CJK_A
|
||||
* (all contiguous)
|
||||
* Any other CJK gets its normal code point
|
||||
* Any non-CJK gets +10FFFF
|
||||
* When we reorder Block1, we make sure that it is at the very start,
|
||||
* so that it will use a 3-byte form.
|
||||
* Warning: the we only pick up the compatibility characters that are
|
||||
* NOT decomposed, so that block is smaller!
|
||||
*/
|
||||
|
||||
static int NON_CJK_OFFSET = 0x110000;
|
||||
|
||||
static int swapCJK(int i) {
|
||||
|
||||
if (i >= CJK_BASE) {
|
||||
if (i < CJK_LIMIT) return i - CJK_BASE;
|
||||
|
||||
if (i < CJK_COMPAT_USED_BASE) return i + NON_CJK_OFFSET;
|
||||
|
||||
if (i < CJK_COMPAT_USED_LIMIT) return i - CJK_COMPAT_USED_BASE
|
||||
+ (CJK_LIMIT - CJK_BASE);
|
||||
if (i < CJK_B_BASE) return i + NON_CJK_OFFSET;
|
||||
|
||||
if (i < CJK_B_LIMIT) return i; // non-BMP-CJK
|
||||
|
||||
return i + NON_CJK_OFFSET; // non-CJK
|
||||
}
|
||||
if (i < CJK_A_BASE) return i + NON_CJK_OFFSET;
|
||||
|
||||
if (i < CJK_A_LIMIT) return i - CJK_A_BASE
|
||||
+ (CJK_LIMIT - CJK_BASE)
|
||||
+ (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
|
||||
return i + NON_CJK_OFFSET; // non-CJK
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
|
||||
* $Date: 2004/01/13 18:32:12 $
|
||||
* $Revision: 1.17 $
|
||||
* $Date: 2004/01/15 01:08:30 $
|
||||
* $Revision: 1.18 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -33,6 +33,7 @@ public class Main {
|
|||
// A few changes would need to be made to the code to do older versions.
|
||||
try {
|
||||
System.out.println("Building UCA");
|
||||
Default.setUCD(UCDVersion);
|
||||
WriteCollationData.collator = new UCA(null, UCDVersion);
|
||||
System.out.println("Built version " + WriteCollationData.collator.getDataVersion()
|
||||
+ "/ucd: " + WriteCollationData.collator.getUCDVersion());
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2004/01/13 18:32:11 $
|
||||
* $Revision: 1.36 $
|
||||
* $Date: 2004/01/15 01:08:30 $
|
||||
* $Revision: 1.37 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -109,15 +109,15 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
|
|||
|
||||
static public void writeCaseExceptions() {
|
||||
System.err.println("Writing Case Exceptions");
|
||||
Normalizer NFKC = new Normalizer(Normalizer.NFKC, UNICODE_VERSION);
|
||||
//Normalizer NFKC = new Normalizer(Normalizer.NFKC, UNICODE_VERSION);
|
||||
for (char a = 0; a < 0xFFFF; ++a) {
|
||||
if (!ucd.isRepresented(a)) continue;
|
||||
//if (0xA000 <= a && a <= 0xA48F) continue; // skip YI
|
||||
|
||||
String b = Case.fold(a);
|
||||
String c = NFKC.normalize(b);
|
||||
String c = Default.nfkc.normalize(b);
|
||||
String d = Case.fold(c);
|
||||
String e = NFKC.normalize(d);
|
||||
String e = Default.nfkc.normalize(d);
|
||||
if (!e.equals(c)) {
|
||||
System.out.println(Utility.hex(a) + "; " + Utility.hex(d, " ") + " # " + ucd.getName(a));
|
||||
/*
|
||||
|
@ -135,7 +135,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
|
|||
*/
|
||||
}
|
||||
String f = Case.fold(e);
|
||||
String g = NFKC.normalize(f);
|
||||
String g = Default.nfkc.normalize(f);
|
||||
if (!f.equals(d) || !g.equals(e)) System.out.println("!!!!!!SKY IS FALLING!!!!!!");
|
||||
}
|
||||
}
|
||||
|
@ -187,8 +187,8 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
|
|||
|
||||
static public void writeJavascriptInfo() throws IOException {
|
||||
System.err.println("Writing Javascript data");
|
||||
Normalizer normKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
|
||||
Normalizer normD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
//Normalizer normKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
|
||||
//Normalizer normD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
//log = new PrintWriter(new FileOutputStream("Normalization_data.js"));
|
||||
log = Utility.openPrintWriter(UCA_GEN_DIR, "Normalization_data.js", Utility.LATIN1_WINDOWS);
|
||||
|
||||
|
@ -204,9 +204,9 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
|
|||
for (char c = 0; c < 0xFFFF; ++c) {
|
||||
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
|
||||
if (0xAC00 <= c && c <= 0xD7A3) continue;
|
||||
if (!normKD.isNormalized(c)) {
|
||||
if (!Default.nfkd.isNormalized(c)) {
|
||||
++count;
|
||||
String decomp = normKD.normalize(c);
|
||||
String decomp = Default.nfkd.normalize(c);
|
||||
datasize += decomp.length();
|
||||
if (max < decomp.length()) max = decomp.length();
|
||||
if (decomp.length() > 7) ++over7;
|
||||
|
@ -232,9 +232,9 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
|
|||
for (char c = 0; c < 0xFFFF; ++c) {
|
||||
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
|
||||
if (0xAC00 <= c && c <= 0xD7A3) continue;
|
||||
if (!normD.isNormalized(c)) {
|
||||
if (!Default.nfd.isNormalized(c)) {
|
||||
++count;
|
||||
String decomp = normD.normalize(c);
|
||||
String decomp = Default.nfd.normalize(c);
|
||||
datasize += decomp.length();
|
||||
if (max < decomp.length()) max = decomp.length();
|
||||
csa.setElementAt(c, (short)count);
|
||||
|
@ -256,7 +256,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
|
|||
|
||||
for (char c = 0; c < 0xFFFF; ++c) {
|
||||
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
|
||||
int canClass = normKD.getCanonicalClass(c);
|
||||
int canClass = Default.nfkd.getCanonicalClass(c);
|
||||
if (canClass != 0) {
|
||||
++count;
|
||||
|
||||
|
@ -277,7 +277,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
|
|||
|
||||
/*
|
||||
|
||||
IntHashtable.IntEnumeration enum = normKD.getComposition();
|
||||
IntHashtable.IntEnumeration enum = Default.nfkd.getComposition();
|
||||
while (enum.hasNext()) {
|
||||
int key = enum.next();
|
||||
char val = (char) enum.value();
|
||||
|
@ -530,8 +530,8 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
}
|
||||
int oldStrength = collator.getStrength();
|
||||
collator.setStrength(strength);
|
||||
Normalizer nfkd = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
|
||||
Normalizer nfc = new Normalizer(Normalizer.NFC, UNICODE_VERSION);
|
||||
//Normalizer nfkd = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
|
||||
//Normalizer nfc = new Normalizer(Normalizer.NFC, UNICODE_VERSION);
|
||||
switch (strength) {
|
||||
case 1: log.println("<h2>3. Primaries Incompatible with Decompositions</h2>"); break;
|
||||
case 2: log.println("<h2>4. Secondaries Incompatible with Decompositions</h2>"); break;
|
||||
|
@ -550,12 +550,12 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
|
||||
for (int ch = 0; ch < 0x10FFFF; ++ch) {
|
||||
if (!ucd_uca_base.isAllocated(ch)) continue;
|
||||
if (nfkd.isNormalized(ch)) continue;
|
||||
if (Default.nfkd.isNormalized(ch)) continue;
|
||||
if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul
|
||||
if (alreadySeen.contains(ch)) continue;
|
||||
Utility.dot(ch);
|
||||
|
||||
String decomp = nfkd.normalize(ch);
|
||||
String decomp = Default.nfkd.normalize(ch);
|
||||
if (ch != ' ' && decomp.charAt(0) == ' ') {
|
||||
skipSet.add(ch);
|
||||
continue; // skip wierd decomps
|
||||
|
@ -608,10 +608,10 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
}
|
||||
|
||||
static String remapSortKey(int cp, boolean decomposition) {
|
||||
if (toD.isNormalized(cp)) return remapCanSortKey(cp, decomposition);
|
||||
if (Default.nfd.isNormalized(cp)) return remapCanSortKey(cp, decomposition);
|
||||
|
||||
// we know that it is not NFKD.
|
||||
String canDecomp = toD.normalize(cp);
|
||||
String canDecomp = Default.nfd.normalize(cp);
|
||||
String result = "";
|
||||
int ch;
|
||||
for (int j = 0; j < canDecomp.length(); j += UTF16.getCharCount(ch)) {
|
||||
|
@ -799,9 +799,9 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
log.println("compressed: " + comp);
|
||||
}
|
||||
log.println("Ken's : " + kenStr);
|
||||
String nfkd = NFKD.normalize(s);
|
||||
String nfkd = Default.nfkd.normalize(s);
|
||||
log.println("NFKD : " + ucd.getCodeAndName(nfkd));
|
||||
String nfd = NFD.normalize(s);
|
||||
String nfd = Default.nfd.normalize(s);
|
||||
if (!nfd.equals(nfkd)) {
|
||||
log.println("NFD : " + ucd.getCodeAndName(nfd));
|
||||
}
|
||||
|
@ -824,7 +824,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
static final byte getDecompType(int cp) {
|
||||
byte result = ucd.getDecompositionType(cp);
|
||||
if (result == ucd.CANONICAL) {
|
||||
String d = NFD.normalize(cp); // TODO
|
||||
String d = Default.nfd.normalize(cp); // TODO
|
||||
int cp1;
|
||||
for (int i = 0; i < d.length(); i += UTF16.getCharCount(cp1)) {
|
||||
cp1 = UTF16.charAt(d, i);
|
||||
|
@ -887,7 +887,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
byte type = getDecompType(UTF16.charAt(s, 0));
|
||||
char ch = s.charAt(0);
|
||||
|
||||
String decomp = NFKD.normalize(s);
|
||||
String decomp = Default.nfkd.normalize(s);
|
||||
int len = 0;
|
||||
int markLen = collator.getCEs(decomp, true, markCes);
|
||||
if (compress) markLen = kenCompress(markCes, markLen);
|
||||
|
@ -957,7 +957,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
log.println("<h2>8. Checking against categories</h2>");
|
||||
log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
|
||||
log.println("<table border='1' cellspacing='0' cellpadding='2'>");
|
||||
Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
//Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
|
||||
Set sorted = new TreeSet();
|
||||
|
||||
|
@ -994,14 +994,14 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
|
||||
log.println("<table border='1' cellspacing='0' cellpadding='2'>");
|
||||
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, toD);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
|
||||
|
||||
Map map = new TreeMap();
|
||||
|
||||
while (true) {
|
||||
String s = cc.next();
|
||||
if (s == null) break;
|
||||
if (!toD.isNormalized(s)) continue; // only unnormalized stuff
|
||||
if (!Default.nfd.isNormalized(s)) continue; // only unnormalized stuff
|
||||
if (UTF16.countCodePoint(s) == 1) {
|
||||
int cat = ucd.getCategory(UTF16.charAt(s,0));
|
||||
if (cat == Cn || cat == Cc || cat == Cs) continue;
|
||||
|
@ -1033,7 +1033,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
|
||||
log.println("<table border='1' cellspacing='0' cellpadding='2'>");
|
||||
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, toD);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
|
||||
|
||||
Map map = new TreeMap();
|
||||
Map tails = new TreeMap();
|
||||
|
@ -1045,7 +1045,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
String s = cc.next();
|
||||
if (s == null) break;
|
||||
Utility.dot(counter++);
|
||||
if (!toD.isNormalized(s)) continue; // only normalized stuff
|
||||
if (!Default.nfd.isNormalized(s)) continue; // only normalized stuff
|
||||
CEList celist = collator.getCEList(s, true);
|
||||
map.put(celist, s);
|
||||
}
|
||||
|
@ -1212,11 +1212,11 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
|
||||
diLog.write('\uFEFF');
|
||||
|
||||
Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
//Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
|
||||
int[] ces = new int[50];
|
||||
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
|
||||
int[] lenArray = new int[1];
|
||||
|
||||
diLog.println("# Contractions");
|
||||
|
@ -1261,7 +1261,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
|
||||
//diLog = new PrintWriter(new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt"));
|
||||
|
||||
Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
//Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
|
||||
int[] ces = new int[50];
|
||||
int[] secondariesZP = new int[400];
|
||||
|
@ -1287,7 +1287,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
String s = String.valueOf(ch);
|
||||
int len = collator.getCEs(s, true, ces);
|
||||
*/
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
|
||||
int[] lenArray = new int[1];
|
||||
|
||||
Set sortedCodes = new TreeSet();
|
||||
|
@ -1432,7 +1432,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
|
||||
//diLog = new PrintWriter(new FileOutputStream(UCA_GEN_DIR + "DisjointIgnorables.txt"));
|
||||
|
||||
Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
//Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
|
||||
int[] ces = new int[50];
|
||||
int[] secondariesZP = new int[400];
|
||||
|
@ -1458,7 +1458,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
String s = String.valueOf(ch);
|
||||
int len = collator.getCEs(s, true, ces);
|
||||
*/
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
|
||||
int[] lenArray = new int[1];
|
||||
|
||||
Set sortedCodes = new TreeSet();
|
||||
|
@ -1628,9 +1628,9 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
}
|
||||
}
|
||||
|
||||
static Normalizer nfdNew = new Normalizer(Normalizer.NFD, "");
|
||||
static Normalizer NFC = new Normalizer(Normalizer.NFC, "");
|
||||
static Normalizer nfkdNew = new Normalizer(Normalizer.NFKD, "");
|
||||
//static Normalizer nfdNew = new Normalizer(Normalizer.NFD, "");
|
||||
//static Normalizer NFC = new Normalizer(Normalizer.NFC, "");
|
||||
//static Normalizer nfkdNew = new Normalizer(Normalizer.NFKD, "");
|
||||
|
||||
static int getFirstCELen(int[] ces, int len) {
|
||||
if (len < 2) return len;
|
||||
|
@ -1653,8 +1653,8 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
//if (true) return;
|
||||
|
||||
int[] ces = new int[50];
|
||||
Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
Normalizer nfkd = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
|
||||
//Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
//Normalizer nfkd = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
|
||||
|
||||
if (false) {
|
||||
int len2 = collator.getCEs("\u2474", true, ces);
|
||||
|
@ -1671,7 +1671,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
Map ordered = new TreeMap(cm);
|
||||
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE,
|
||||
SKIP_CANONICAL_DECOMPOSIBLES ? nfd : null);
|
||||
SKIP_CANONICAL_DECOMPOSIBLES ? Default.nfd : null);
|
||||
int[] lenArray = new int[1];
|
||||
|
||||
Set alreadyDone = new HashSet();
|
||||
|
@ -1737,7 +1737,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
UnicodeSet composites = new UnicodeSet();
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
if (!ucd.isAllocated(i)) continue;
|
||||
if (nfd.isNormalized(i)) continue;
|
||||
if (Default.nfd.isNormalized(i)) continue;
|
||||
composites.add(i);
|
||||
}
|
||||
UnicodeSet CJKcomposites = new UnicodeSet(CJK).retainAll(composites);
|
||||
|
@ -1774,9 +1774,9 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
System.out.println("Adding Kanji");
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
if (!ucd.isAllocated(i)) continue;
|
||||
if (nfkd.isNormalized(i)) continue;
|
||||
if (Default.nfkd.isNormalized(i)) continue;
|
||||
Utility.dot(i);
|
||||
String decomp = nfkd.normalize(i);
|
||||
String decomp = Default.nfkd.normalize(i);
|
||||
int cp;
|
||||
for (int j = 0; j < decomp.length(); j += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(decomp, j);
|
||||
|
@ -2438,7 +2438,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
System.out.println("Fix Homeless! No back map for " + CEList.toString(ces[i])
|
||||
+ " from " + CEList.toString(ces, len));
|
||||
System.out.println("\t" + ucd.getCodeAndName(chr)
|
||||
+ " => " + ucd.getCodeAndName(nfkdNew.normalize(chr))
|
||||
+ " => " + ucd.getCodeAndName(Default.nfkd.normalize(chr))
|
||||
);
|
||||
s = "[" + Utility.hex(ces[i]) + "]";
|
||||
} while (false); // exactly one time, just for breaking
|
||||
|
@ -2528,7 +2528,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
"[[:whitespace:][:c:][:z:][[:ascii:]-[a-zA-Z0-9]]]");
|
||||
// needsQuoting.remove();
|
||||
}
|
||||
s = NFC.normalize(s);
|
||||
s = Default.nfc.normalize(s);
|
||||
quoteOperandBuffer.setLength(0);
|
||||
boolean noQuotes = true;
|
||||
boolean inQuote = false;
|
||||
|
@ -2618,8 +2618,8 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
|| primary > oldJamo5 && primary <= oldJamo6;
|
||||
}
|
||||
|
||||
static Normalizer NFKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
|
||||
static Normalizer NFD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
//static Normalizer NFKD = new Normalizer(Normalizer.NFKD, UNICODE_VERSION);
|
||||
//static Normalizer NFD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
|
||||
static int variableHigh = 0;
|
||||
static final int COMMON = 5;
|
||||
|
@ -2760,7 +2760,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
if (!ucd.isNoncharacter(i)) {
|
||||
if (!ucd.isAllocated(i)) continue;
|
||||
if (NFD.isNormalized(i)) continue;
|
||||
if (Default.nfd.isNormalized(i)) continue;
|
||||
if (ucd.isHangulSyllable(i)) continue;
|
||||
//if (collator.getCEType(i) >= UCA.FIXED_CE) continue;
|
||||
}
|
||||
|
@ -2795,7 +2795,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
|
||||
|
||||
// Skip anything that is not FCD.
|
||||
if (!NFD.isFCD(s)) continue;
|
||||
if (!Default.nfd.isFCD(s)) continue;
|
||||
|
||||
// We ONLY add if the sort key would be different
|
||||
// Than what we would get if we didn't decompose!!
|
||||
|
@ -3462,59 +3462,13 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Function used to:
|
||||
* a) collapse the 2 different Han ranges from UCA into one (in the right order), and
|
||||
* b) bump any non-CJK characters by 10FFFF.
|
||||
* The relevant blocks are:
|
||||
* A: 4E00..9FFF; CJK Unified Ideographs
|
||||
* F900..FAFF; CJK Compatibility Ideographs
|
||||
* B: 3400..4DBF; CJK Unified Ideographs Extension A
|
||||
* 20000..XX; CJK Unified Ideographs Extension B (and others later on)
|
||||
* As long as
|
||||
* no new B characters are allocated between 4E00 and FAFF, and
|
||||
* no new A characters are outside of this range,
|
||||
* (very high probability) this simple code will work.
|
||||
* The reordered blocks are:
|
||||
* Block1 is CJK
|
||||
* Block2 is CJK_COMPAT_USED
|
||||
* Block3 is CJK_A
|
||||
* Any other CJK gets its normal code point
|
||||
* Any non-CJK gets +10FFFF
|
||||
* When we reorder Block1, we make sure that it is at the very start,
|
||||
* so that it will use a 3-byte form.
|
||||
*/
|
||||
static int swapCJK(int i) {
|
||||
|
||||
if (i >= CJK_BASE) {
|
||||
if (i < CJK_LIMIT) return i - CJK_BASE;
|
||||
|
||||
if (i < CJK_COMPAT_USED_BASE) return i + NON_CJK_OFFSET;
|
||||
|
||||
if (i < CJK_COMPAT_USED_LIMIT) return i - CJK_COMPAT_USED_BASE
|
||||
+ (CJK_LIMIT - CJK_BASE);
|
||||
if (i < CJK_B_BASE) return i + NON_CJK_OFFSET;
|
||||
|
||||
if (i < CJK_B_LIMIT) return i; // non-BMP-CJK
|
||||
|
||||
return i + NON_CJK_OFFSET; // non-CJK
|
||||
}
|
||||
if (i < CJK_A_BASE) return i + NON_CJK_OFFSET;
|
||||
|
||||
if (i < CJK_A_LIMIT) return i - CJK_A_BASE
|
||||
+ (CJK_LIMIT - CJK_BASE)
|
||||
+ (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
|
||||
return i + NON_CJK_OFFSET; // non-CJK
|
||||
}
|
||||
|
||||
// Fractional UCA Generation Constants
|
||||
|
||||
static final int
|
||||
TOP = 0xA0,
|
||||
SPECIAL_BASE = 0xF0,
|
||||
|
||||
NON_CJK_OFFSET = 0x110000,
|
||||
BYTES_TO_AVOID = 3,
|
||||
BYTES_TO_AVOID = 3,
|
||||
OTHER_COUNT = 256 - BYTES_TO_AVOID,
|
||||
LAST_COUNT = OTHER_COUNT / 2,
|
||||
LAST_COUNT2 = OTHER_COUNT / 21, // room for intervening, without expanding to 5 bytes
|
||||
|
@ -3533,23 +3487,14 @@ static int swapCJK(int i) {
|
|||
// GET IMPLICIT PRIMARY WEIGHTS
|
||||
// Return value is left justified primary key
|
||||
|
||||
static int getImplicitPrimary(int cp) {
|
||||
|
||||
if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
|
||||
|
||||
cp = swapCJK(cp);
|
||||
|
||||
if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
|
||||
|
||||
// we now have a range of numbers from 0 to 21FFFF.
|
||||
|
||||
return getImplicitPrimaryFromSwapped(cp);
|
||||
}
|
||||
|
||||
static Implicit implicit = new Implicit(IMPLICIT_BASE_BYTE, IMPLICIT_MAX_BYTE);
|
||||
|
||||
static int getImplicitPrimary(int cp) {
|
||||
return implicit.getSwappedImplicit(cp);
|
||||
}
|
||||
|
||||
static int getImplicitPrimaryFromSwapped(int cp) {
|
||||
return implicit.getImplicit(cp);
|
||||
return implicit.getRawImplicit(cp);
|
||||
}
|
||||
|
||||
|
||||
|
@ -3563,7 +3508,7 @@ static int swapCJK(int i) {
|
|||
|
||||
static void showImplicit2(String title, int cp) {
|
||||
System.out.println(title + ":\t" + Utility.hex(cp)
|
||||
+ " => " + Utility.hex(swapCJK(cp))
|
||||
+ " => " + Utility.hex(Implicit.swapCJK(cp))
|
||||
+ " => " + Utility.hex(INT_MASK & getImplicitPrimary(cp)));
|
||||
}
|
||||
|
||||
|
@ -3627,7 +3572,7 @@ static int swapCJK(int i) {
|
|||
|
||||
// test swapping
|
||||
|
||||
int currSwap = swapCJK(i);
|
||||
int currSwap = Implicit.swapCJK(i);
|
||||
if (currSwap < oldSwap) {
|
||||
throw new IllegalArgumentException(Utility.hex(i) + ": overlap: "
|
||||
+ Utility.hex(oldChar) + " (" + Utility.hex(oldSwap) + ")"
|
||||
|
@ -3686,7 +3631,7 @@ static int swapCJK(int i) {
|
|||
// b. toSmallKana(NFKD(x)) != x.
|
||||
|
||||
static final boolean needsCaseBit(String x) {
|
||||
String s = NFKD.normalize(x);
|
||||
String s = Default.nfkd.normalize(x);
|
||||
if (!ucd.getCase(s, FULL, LOWER).equals(s)) return true;
|
||||
if (!toSmallKana(s).equals(s)) return true;
|
||||
return false;
|
||||
|
@ -4175,7 +4120,7 @@ static int swapCJK(int i) {
|
|||
continue;
|
||||
}
|
||||
canIt.setSource(key);
|
||||
String nfdKey = toD.normalize(key);
|
||||
String nfdKey = Default.nfd.normalize(key);
|
||||
|
||||
boolean first = true;
|
||||
while (true) {
|
||||
|
@ -4187,7 +4132,7 @@ static int swapCJK(int i) {
|
|||
|
||||
|
||||
// Skip anything that is not FCD.
|
||||
if (!NFD.isFCD(s)) continue;
|
||||
if (!Default.nfd.isFCD(s)) continue;
|
||||
|
||||
// We ONLY add if the sort key would be different
|
||||
// Than what we would get if we didn't decompose!!
|
||||
|
@ -4235,11 +4180,11 @@ static int swapCJK(int i) {
|
|||
errorCount++;
|
||||
}
|
||||
|
||||
Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
//Normalizer nfd = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
|
||||
int[] ces = new int[50];
|
||||
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
|
||||
int[] lenArray = new int[1];
|
||||
|
||||
int minps = Integer.MAX_VALUE;
|
||||
|
@ -4275,7 +4220,7 @@ static int swapCJK(int i) {
|
|||
}
|
||||
}
|
||||
|
||||
cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
|
||||
log.println("<table border='1' cellspacing='0' cellpadding='2'>");
|
||||
int lastPrimary = 0;
|
||||
|
||||
|
@ -4410,8 +4355,8 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
|||
static final char MARK2 = '\u0002';
|
||||
//Normalizer normalizer = new Normalizer(Normalizer.NFC, true);
|
||||
|
||||
static Normalizer toC = new Normalizer(Normalizer.NFC, UNICODE_VERSION);
|
||||
static Normalizer toD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
//static Normalizer toC = new Normalizer(Normalizer.NFC, UNICODE_VERSION);
|
||||
//static Normalizer toD = new Normalizer(Normalizer.NFD, UNICODE_VERSION);
|
||||
static TreeMap MismatchedC = new TreeMap();
|
||||
static TreeMap MismatchedN = new TreeMap();
|
||||
static TreeMap MismatchedD = new TreeMap();
|
||||
|
@ -4425,7 +4370,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
|||
static void addString(String ch, byte option) {
|
||||
String colDbase = collator.getSortKey(ch, option, true);
|
||||
String colNbase = collator.getSortKey(ch, option, false);
|
||||
String colCbase = collator.getSortKey(toC.normalize(ch), option, false);
|
||||
String colCbase = collator.getSortKey(Default.nfc.normalize(ch), option, false);
|
||||
if (!colNbase.equals(colCbase) || !colNbase.equals(colDbase) ) {
|
||||
/*System.out.println(Utility.hex(ch));
|
||||
System.out.println(printableKey(colNbase));
|
||||
|
@ -4595,7 +4540,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
|||
}
|
||||
|
||||
static void showLine(int count, String ch, String keyD, String keyN) {
|
||||
String decomp = toD.normalize(ch);
|
||||
String decomp = Default.nfd.normalize(ch);
|
||||
if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + Utility.hex(decomp, " ") + "> ";
|
||||
log.println("<tr><td>" + count + "</td><td>"
|
||||
+ Utility.hex(ch, " ")
|
||||
|
@ -4631,8 +4576,8 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
|||
String MN = (String)MismatchedN.get(ch);
|
||||
String MC = (String)MismatchedC.get(ch);
|
||||
String MD = (String)MismatchedD.get(ch);
|
||||
String chInC = toC.normalize(ch);
|
||||
String chInD = toD.normalize(ch);
|
||||
String chInC = Default.nfc.normalize(ch);
|
||||
String chInD = Default.nfd.normalize(ch);
|
||||
|
||||
log.println("<tr><td rowSpan='3' class='bottom'>" + Utility.replace(ucd.getName(ch), ", ", ",<br>")
|
||||
+ "</td><td>NFD</td><td>" + Utility.hex(chInD)
|
||||
|
@ -4665,7 +4610,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
|||
|
||||
static void showDiff(boolean showName, boolean firstColumn, int line, Object chobj) {
|
||||
String ch = chobj.toString();
|
||||
String decomp = toD.normalize(ch);
|
||||
String decomp = Default.nfd.normalize(ch);
|
||||
if (showName) {
|
||||
if (ch.equals(decomp)) {
|
||||
log.println(//title + counter + " "
|
||||
|
|
Loading…
Add table
Reference in a new issue