mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
Added table of information to fractional UCA, changed XML, regenerated collation test
X-SVN-Rev: 8921
This commit is contained in:
parent
b5b02ebdd8
commit
4482f497ce
13 changed files with 686 additions and 76 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2002/06/15 03:15:55 $
|
||||
* $Revision: 1.20 $
|
||||
* $Date: 2002/06/22 01:21:08 $
|
||||
* $Revision: 1.21 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -1411,7 +1411,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
int lastLen = -1;
|
||||
int[] lastCes = new int[50];
|
||||
|
||||
long variableTop = collator.getVariableHigh() & 0xFFFFFFFFL;
|
||||
long variableTop = collator.getVariableHigh() & INT_MASK;
|
||||
|
||||
// for debugging ordering
|
||||
String lastSortKey = "";
|
||||
|
@ -1683,7 +1683,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
|
||||
// static final String[] RELATION_NAMES = {" <", " <<", " <<<", " ="};
|
||||
static final String[] RELATION_NAMES = {" <\t", " <<\t", " <<<\t", " =\t"};
|
||||
static final String[] XML_RELATION_NAMES = {"g1", "g2", "g3", "eq"};
|
||||
static final String[] XML_RELATION_NAMES = {"p", "s", "t", "eq"};
|
||||
|
||||
static class ArrayWrapper {
|
||||
int[] array;
|
||||
|
@ -2278,17 +2278,27 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
log.println("# - Differs from previous version in that MAX value was introduced at 1F.");
|
||||
log.println("# All tertiary values are shifted down by 1, filling the gap at 7!");
|
||||
|
||||
int firstImplicit = getImplicitPrimary(CJK_BASE) >>> 24;
|
||||
int lastImplicit = getImplicitPrimary(0x10FFFF) >>> 24;
|
||||
log.println("[FIRST_IMPLICIT= " + Utility.hex(firstImplicit) + "]");
|
||||
log.println("[LAST_IMPLICIT= " + Utility.hex(lastImplicit) + "]");
|
||||
|
||||
String lastChr = "";
|
||||
int lastNp = 0;
|
||||
boolean doVariable = false;
|
||||
char[] codeUnits = new char[100];
|
||||
|
||||
|
||||
FCE firstSecondaryIgnorable = new FCE(false);
|
||||
FCE lastSecondaryIgnorable = new FCE(true);
|
||||
|
||||
FCE firstPrimaryIgnorable = new FCE(false);
|
||||
FCE lastPrimaryIgnorable = new FCE(true);
|
||||
|
||||
FCE firstVariable = new FCE(false);
|
||||
FCE lastVariable = new FCE(true);
|
||||
|
||||
FCE firstNonIgnorable = new FCE(false);
|
||||
FCE lastNonIgnorable = new FCE(true);
|
||||
|
||||
FCE firstTrailing = new FCE(false);
|
||||
FCE lastTrailing = new FCE(true);
|
||||
|
||||
while (it.hasNext()) {
|
||||
Object sortKey = it.next();
|
||||
String chr = (String)ordered.get(sortKey);
|
||||
|
@ -2334,7 +2344,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
// special treatment for unsupported!
|
||||
|
||||
if (UCA.isImplicitLeadPrimary(pri)) {
|
||||
System.out.println("DEBUG: " + CEList.toString(ces, len)
|
||||
if (DEBUG) System.out.println("DEBUG: " + CEList.toString(ces, len)
|
||||
+ ", Current: " + q + ", " + ucd.getCodeAndName(chr));
|
||||
++q;
|
||||
oldStr.append(CEList.toString(ces[q]));// + "," + Integer.toString(ces[q],16);
|
||||
|
@ -2359,7 +2369,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
+ " => " + Utility.hex(cp)
|
||||
+ " => " + Utility.hex(testImplicit[0])
|
||||
+ ", " + Utility.hex(testImplicit[1])
|
||||
// + ", " + Utility.hex(fixPrimary(pri) & 0xFFFFFFFFL)
|
||||
// + ", " + Utility.hex(fixPrimary(pri) & INT_MASK)
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -2377,24 +2387,26 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
|
||||
// int oldPrimaryValue = UCA.getPrimary(ces[q]);
|
||||
int np = fixPrimary(pri);
|
||||
int ns = fixSecondary(sec);
|
||||
int nt = fixTertiary(ter);
|
||||
|
||||
try {
|
||||
hexBytes(np, newPrimary);
|
||||
hexBytes(fixSecondary(sec), newSecondary);
|
||||
hexBytes(fixTertiary(ter), newTertiary);
|
||||
hexBytes(ns, newSecondary);
|
||||
hexBytes(nt, newTertiary);
|
||||
} catch (Exception e) {
|
||||
throw new ChainException("Character is {0}", new String[] {Utility.hex(chr)}, e);
|
||||
}
|
||||
if (isFirst) {
|
||||
if (!sameTopByte(np, lastNp)) {
|
||||
summary.println("Last: " + Utility.hex(lastNp & 0xFFFFFFFFL) + " " + ucd.getName(UTF16.charAt(lastChr,0)));
|
||||
summary.println("Last: " + Utility.hex(lastNp & INT_MASK) + " " + ucd.getName(UTF16.charAt(lastChr,0)));
|
||||
summary.println();
|
||||
if (doVariable) {
|
||||
doVariable = false;
|
||||
summary.println("[variable top = " + Utility.hex(primaryDelta[firstPrimary]) + "] # END OF VARIABLE SECTION!!!");
|
||||
summary.println();
|
||||
}
|
||||
summary.println("First: " + Utility.hex(np & 0xFFFFFFFFL) + ", " + ucd.getCodeAndName(UTF16.charAt(chr,0)));
|
||||
summary.println("First: " + Utility.hex(np & INT_MASK) + ", " + ucd.getCodeAndName(UTF16.charAt(chr,0)));
|
||||
}
|
||||
lastNp = np;
|
||||
isFirst = false;
|
||||
|
@ -2403,6 +2415,27 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
+ ", " + newSecondary
|
||||
+ ", " + newTertiary
|
||||
+ "]");
|
||||
|
||||
// RECORD STATS
|
||||
|
||||
if (np == 0 && ns == 0) {
|
||||
firstSecondaryIgnorable.setValue(np, ns, nt);
|
||||
lastSecondaryIgnorable.setValue(np, ns, nt);
|
||||
} else if (np == 0) {
|
||||
firstPrimaryIgnorable.setValue(np, ns, nt);
|
||||
lastPrimaryIgnorable.setValue(np, ns, nt);
|
||||
} else if (collator.isVariable(ces[q])) {
|
||||
firstVariable.setValue(np, ns, nt);
|
||||
lastVariable.setValue(np, ns, nt);
|
||||
} else if (UCA.getPrimary(ces[q]) > UNSUPPORTED_LIMIT) { // Trailing (none currently)
|
||||
System.out.println("Trailing: " + CEList.toString(ces[q])
|
||||
+ ", " + Utility.hex(pri) + ", " + Utility.hex(UNSUPPORTED_LIMIT));
|
||||
firstTrailing.setValue(np, ns, nt);
|
||||
lastTrailing.setValue(np, ns, nt);
|
||||
} else if ((pri & MARK_CODE_POINT) == 0) { // skip implicits
|
||||
firstNonIgnorable.setValue(np, ns, nt);
|
||||
lastNonIgnorable.setValue(np, ns, nt);
|
||||
}
|
||||
}
|
||||
if (nonePrinted) {
|
||||
log.print("[,,]");
|
||||
|
@ -2412,6 +2445,61 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
log.println();
|
||||
lastChr = chr;
|
||||
}
|
||||
|
||||
int firstImplicit = getImplicitPrimary(CJK_BASE);
|
||||
int lastImplicit = getImplicitPrimary(0x10FFFF);
|
||||
|
||||
log.println("# VALUES BASED ON UCA");
|
||||
|
||||
log.println("[first tertiary ignorable " + new FCE(false,0,0, 0).formatFCE() + "]");
|
||||
log.println("[last tertiary ignorable " + new FCE(true,0,0, 0).formatFCE() + "]");
|
||||
|
||||
// Since the UCA doesn't have secondary ignorables, fake them.
|
||||
|
||||
if (firstSecondaryIgnorable.isUnset()) {
|
||||
System.out.println("No first/last secondary ignorable: resetting");
|
||||
firstSecondaryIgnorable = new FCE(false, 0, 0, COMMON<<24);
|
||||
lastSecondaryIgnorable = new FCE(true, 0, 0, COMMON<<24);
|
||||
System.out.println(firstSecondaryIgnorable.formatFCE());
|
||||
}
|
||||
|
||||
log.println("[first secondary ignorable " + firstSecondaryIgnorable.formatFCE() + "]");
|
||||
log.println("[last secondary ignorable " + lastSecondaryIgnorable.formatFCE() + "]");
|
||||
|
||||
log.println("[first primary ignorable " + firstPrimaryIgnorable.formatFCE() + "]");
|
||||
log.println("[last primary ignorable " + lastPrimaryIgnorable.formatFCE() + "]");
|
||||
|
||||
log.println("[first variable " + firstVariable.formatFCE() + "]");
|
||||
log.println("[last variable " + lastVariable.formatFCE() + "]");
|
||||
|
||||
log.println("[first non-ignorable " + firstNonIgnorable.formatFCE() + "]");
|
||||
log.println("[last non-ignorable " + lastNonIgnorable.formatFCE() + "]");
|
||||
|
||||
|
||||
log.println("[first implicit " + (new FCE(false,firstImplicit, COMMON<<24, COMMON<<24)).formatFCE() + "]");
|
||||
log.println("[last implicit " + (new FCE(false,lastImplicit, COMMON<<24, COMMON<<24)).formatFCE() + "]");
|
||||
|
||||
if (firstTrailing.isUnset()) {
|
||||
System.out.println("No first/last trailing: resetting");
|
||||
firstTrailing = new FCE(false, (IMPLICIT_LIMIT_BYTE+1)<<24, COMMON<<24, COMMON<<24);
|
||||
lastTrailing = new FCE(true, (IMPLICIT_LIMIT_BYTE+1)<<24, COMMON<<24, COMMON<<24);
|
||||
System.out.println(firstTrailing.formatFCE());
|
||||
}
|
||||
|
||||
log.println("[first trailing " + firstTrailing.formatFCE() + "]");
|
||||
log.println("[last trailing " + lastTrailing.formatFCE() + "]");
|
||||
|
||||
log.println("# FIXED VALUES");
|
||||
|
||||
log.println("[top " + Utility.hex(0xA0,2) + "]");
|
||||
log.println("[first implicit byte " + Utility.hex(IMPLICIT_BASE_BYTE,2) + "]");
|
||||
log.println("[last implicit byte " + Utility.hex(IMPLICIT_LIMIT_BYTE,2) + "]");
|
||||
log.println("[first trail byte" + Utility.hex(IMPLICIT_LIMIT_BYTE+1,2) + "]");
|
||||
log.println("[last implicit byte" + Utility.hex(SPECIAL_BASE-1,2) + "]");
|
||||
log.println("[first special byte" + Utility.hex(SPECIAL_BASE,2) + "]");
|
||||
log.println("[last special byte" + Utility.hex(0xFF,2) + "]");
|
||||
|
||||
|
||||
summary.println("Last: " + Utility.hex(lastNp) + ", " + ucd.getCodeAndName(UTF16.charAt(lastChr, 0)));
|
||||
|
||||
/*
|
||||
|
@ -2423,19 +2511,19 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
}
|
||||
*/
|
||||
summary.println();
|
||||
summary.println("# First Implicit: " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(0)));
|
||||
summary.println("# Last Implicit: " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(0x10FFFF)));
|
||||
summary.println("# First CJK: " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(0x4E00)));
|
||||
summary.println("# Last CJK: " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(0xFA2F)));
|
||||
summary.println("# First CJK_A: " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(0x3400)));
|
||||
summary.println("# Last CJK_A: " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(0x4DBF)));
|
||||
summary.println("# First Implicit: " + Utility.hex(INT_MASK & getImplicitPrimary(0)));
|
||||
summary.println("# Last Implicit: " + Utility.hex(INT_MASK & getImplicitPrimary(0x10FFFF)));
|
||||
summary.println("# First CJK: " + Utility.hex(INT_MASK & getImplicitPrimary(0x4E00)));
|
||||
summary.println("# Last CJK: " + Utility.hex(INT_MASK & getImplicitPrimary(0xFA2F)));
|
||||
summary.println("# First CJK_A: " + Utility.hex(INT_MASK & getImplicitPrimary(0x3400)));
|
||||
summary.println("# Last CJK_A: " + Utility.hex(INT_MASK & getImplicitPrimary(0x4DBF)));
|
||||
|
||||
boolean lastOne = false;
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
boolean thisOne = ucd.isCJK_BASE(i) || ucd.isCJK_AB(i);
|
||||
if (thisOne != lastOne) {
|
||||
summary.println("# Implicit Cusp: CJK=" + lastOne + ": " + Utility.hex(i-1) + " => " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(i-1)));
|
||||
summary.println("# Implicit Cusp: CJK=" + thisOne + ": " + Utility.hex(i) + " => " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(i)));
|
||||
summary.println("# Implicit Cusp: CJK=" + lastOne + ": " + Utility.hex(i-1) + " => " + Utility.hex(INT_MASK & getImplicitPrimary(i-1)));
|
||||
summary.println("# Implicit Cusp: CJK=" + thisOne + ": " + Utility.hex(i) + " => " + Utility.hex(INT_MASK & getImplicitPrimary(i)));
|
||||
lastOne = thisOne;
|
||||
}
|
||||
}
|
||||
|
@ -2479,6 +2567,104 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
summary.close();
|
||||
}
|
||||
|
||||
static final long INT_MASK = 0xFFFFFFFFL;
|
||||
|
||||
static class FCE {
|
||||
static final long UNDEFINED_MAX = Long.MAX_VALUE;
|
||||
static final long UNDEFINED_MIN = Long.MIN_VALUE;
|
||||
long[] key;
|
||||
boolean max;
|
||||
boolean debugShow = false;
|
||||
|
||||
FCE (boolean max) {
|
||||
this.max = max;
|
||||
if (max) key = new long[] {UNDEFINED_MIN, UNDEFINED_MIN, UNDEFINED_MIN}; // make small!
|
||||
else key = new long[] {UNDEFINED_MAX, UNDEFINED_MAX, UNDEFINED_MAX};
|
||||
}
|
||||
|
||||
FCE (boolean max, int primary, int secondary, int tertiary) {
|
||||
this(max);
|
||||
key[0] = primary & INT_MASK;
|
||||
key[1] = secondary & INT_MASK;
|
||||
key[2] = tertiary & INT_MASK;
|
||||
}
|
||||
|
||||
FCE (boolean max, int primary) {
|
||||
this(max);
|
||||
key[0] = primary & INT_MASK;
|
||||
}
|
||||
|
||||
boolean isUnset() {
|
||||
return key[0] == UNDEFINED_MIN || key[0] == UNDEFINED_MAX;
|
||||
}
|
||||
|
||||
String formatFCE() {
|
||||
String b0 = getBuffer(key[0], false);
|
||||
boolean key0Defined = key[0] != UNDEFINED_MIN && key[0] != UNDEFINED_MAX;
|
||||
|
||||
String b1 = getBuffer(key[1], key0Defined);
|
||||
boolean key1Defined = key[1] != UNDEFINED_MIN && key[1] != UNDEFINED_MAX;
|
||||
if (b1.length() != 0) b1 = " " + b1;
|
||||
|
||||
String b2 = getBuffer(key[2], key0Defined || key1Defined);
|
||||
if (b2.length() != 0) b2 = " " + b2;
|
||||
return "[" + b0 + "," + b1 + "," + b2 + "]";
|
||||
}
|
||||
|
||||
String getBuffer(long val, boolean haveHigher) {
|
||||
if (val == UNDEFINED_MIN) return "?";
|
||||
if (val == UNDEFINED_MAX) if (haveHigher) val = COMMON << 24; else return "?";
|
||||
StringBuffer result = new StringBuffer();
|
||||
hexBytes(val, result);
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
void setValue(int npInt, int nsInt, int ntInt) {
|
||||
if (debugShow) System.out.println("Setting FCE: "
|
||||
+ Utility.hex(npInt) + ", " + Utility.hex(nsInt) + ", " + Utility.hex(ntInt));
|
||||
// to get the sign right!
|
||||
long np = npInt & INT_MASK;
|
||||
long ns = nsInt & INT_MASK;
|
||||
long nt = ntInt & INT_MASK;
|
||||
if (max) {
|
||||
if (np < key[0]) return;
|
||||
if (np > key[0]) {
|
||||
key[0] = np;
|
||||
key[1] = ns;
|
||||
key[2] = nt;
|
||||
return;
|
||||
}
|
||||
if (ns < key[1]) return;
|
||||
if (ns > key[1]) {
|
||||
key[1] = ns;
|
||||
key[2] = nt;
|
||||
return;
|
||||
}
|
||||
if (nt > key[2]) {
|
||||
key[2] = nt;
|
||||
}
|
||||
} else {
|
||||
if (np > key[0]) return;
|
||||
if (np < key[0]) {
|
||||
key[0] = np;
|
||||
key[1] = ns;
|
||||
key[2] = nt;
|
||||
return;
|
||||
}
|
||||
if (ns > key[1]) return;
|
||||
if (ns < key[1]) {
|
||||
key[1] = ns;
|
||||
key[2] = nt;
|
||||
return;
|
||||
}
|
||||
if (nt > key[2]) {
|
||||
key[2] = nt;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
static boolean isFixedIdeograph(int cp) {
|
||||
return (0x3400 <= cp && cp <= 0x4DB5
|
||||
|
@ -2566,9 +2752,12 @@ static int swapCJK(int i) {
|
|||
return i + NON_CJK_OFFSET; // non-CJK
|
||||
}
|
||||
|
||||
// CONSTANTS
|
||||
// Fractional UCA Generation Constants
|
||||
|
||||
static final int
|
||||
TOP = 0xA0,
|
||||
SPECIAL_BASE = 0xF0,
|
||||
|
||||
NON_CJK_OFFSET = 0x110000,
|
||||
BYTES_TO_AVOID = 3,
|
||||
OTHER_COUNT = 256 - BYTES_TO_AVOID,
|
||||
|
@ -2659,12 +2848,12 @@ static int swapCJK(int i) {
|
|||
static void showImplicit2(String title, int cp) {
|
||||
System.out.println(title + ":\t" + Utility.hex(cp)
|
||||
+ " => " + Utility.hex(swapCJK(cp))
|
||||
+ " => " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(cp)));
|
||||
+ " => " + Utility.hex(INT_MASK & getImplicitPrimary(cp)));
|
||||
}
|
||||
|
||||
static void showImplicit3(String title, int cp) {
|
||||
System.out.println("*" + title + ":\t" + Utility.hex(cp)
|
||||
+ " => " + Utility.hex(0xFFFFFFFFL & getImplicitPrimaryFromSwapped(cp)));
|
||||
+ " => " + Utility.hex(INT_MASK & getImplicitPrimaryFromSwapped(cp)));
|
||||
}
|
||||
|
||||
// TEST PROGRAM
|
||||
|
@ -2679,7 +2868,7 @@ static int swapCJK(int i) {
|
|||
// test monotonically increasing
|
||||
|
||||
for (int i = 0; i < 0x21FFFF; ++i) {
|
||||
long newPrimary = 0xFFFFFFFFL & getImplicitPrimaryFromSwapped(i);
|
||||
long newPrimary = INT_MASK & getImplicitPrimaryFromSwapped(i);
|
||||
if (newPrimary < oldPrimary) {
|
||||
throw new IllegalArgumentException(Utility.hex(i) + ": overlap: "
|
||||
+ Utility.hex(oldChar) + " (" + Utility.hex(oldPrimary) + ")"
|
||||
|
@ -2730,7 +2919,7 @@ static int swapCJK(int i) {
|
|||
}
|
||||
|
||||
|
||||
long newPrimary = 0xFFFFFFFFL & getImplicitPrimary(i);
|
||||
long newPrimary = INT_MASK & getImplicitPrimary(i);
|
||||
|
||||
// test correct values
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
|
||||
* $Date: 2002/05/31 01:41:04 $
|
||||
* $Revision: 1.14 $
|
||||
* $Date: 2002/06/22 01:21:09 $
|
||||
* $Revision: 1.15 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -22,6 +22,10 @@ public final class DerivedProperty implements UCD_Types {
|
|||
|
||||
// ADD CONSTANT to UCD_TYPES
|
||||
|
||||
static public UnicodeProperty make(int derivedPropertyID) {
|
||||
return make(derivedPropertyID, Default.ucd);
|
||||
}
|
||||
|
||||
static public UnicodeProperty make(int derivedPropertyID, UCD ucd) {
|
||||
if (derivedPropertyID < 0 || derivedPropertyID >= DERIVED_PROPERTY_LIMIT) return null;
|
||||
DerivedProperty dp = getCached(ucd);
|
||||
|
|
|
@ -5,17 +5,19 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java,v $
|
||||
* $Date: 2002/05/29 02:01:00 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2002/06/22 01:21:09 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import java.io.*;
|
||||
|
||||
class DiffPropertyLister extends PropertyLister {
|
||||
private UCD oldUCD;
|
||||
private UnicodeSet set = new UnicodeSet();
|
||||
private static final int NOPROPERTY = -1;
|
||||
|
||||
public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output, int property) {
|
||||
|
@ -34,6 +36,10 @@ class DiffPropertyLister extends PropertyLister {
|
|||
public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output) {
|
||||
this(oldUCDName, newUCDName, output, NOPROPERTY);
|
||||
}
|
||||
|
||||
public UnicodeSet getSet() {
|
||||
return set;
|
||||
}
|
||||
|
||||
public String valueName(int cp) {
|
||||
return major_minor_only(ucdData.getVersion());
|
||||
|
@ -64,7 +70,13 @@ class DiffPropertyLister extends PropertyLister {
|
|||
|
||||
public byte status(int cp) {
|
||||
if (newProp == null) {
|
||||
return ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp)) ? INCLUDE : EXCLUDE;
|
||||
if (ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp))) {
|
||||
set.add(cp);
|
||||
return INCLUDE;
|
||||
}
|
||||
else {
|
||||
return EXCLUDE;
|
||||
}
|
||||
}
|
||||
|
||||
// just look at property differences among allocated characters
|
||||
|
@ -74,7 +86,10 @@ class DiffPropertyLister extends PropertyLister {
|
|||
|
||||
String val = newProp.getValue(cp);
|
||||
String oldVal = oldProp.getValue(cp);
|
||||
if (!oldVal.equals(val)) return INCLUDE;
|
||||
if (!oldVal.equals(val)) {
|
||||
set.add(cp);
|
||||
return INCLUDE;
|
||||
}
|
||||
return EXCLUDE;
|
||||
|
||||
/*if (cp == 0xFFFF) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2002/05/31 01:41:04 $
|
||||
* $Revision: 1.19 $
|
||||
* $Date: 2002/06/22 01:21:09 $
|
||||
* $Revision: 1.20 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -1083,6 +1083,9 @@ public class GenerateData implements UCD_Types {
|
|||
String newFile = directory + filename + getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile);
|
||||
String mostRecent = generateBat(directory, filename, getFileSuffix(true));
|
||||
DiffPropertyLister dpl;
|
||||
UnicodeSet cummulative = new UnicodeSet();
|
||||
|
||||
try {
|
||||
for (int i = 0; i < list.length; ++i) {
|
||||
int prop = list[i];
|
||||
|
@ -1094,29 +1097,60 @@ public class GenerateData implements UCD_Types {
|
|||
//new DiffPropertyLister("3.2.0", "1.1.0", log, prop).print();
|
||||
log.println();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
new DiffPropertyLister("3.2.0", "2.0.0", log, prop).print();
|
||||
|
||||
log.println();
|
||||
dpl = new DiffPropertyLister("3.2.0", "2.0.0", log, prop);
|
||||
dpl.print();
|
||||
cummulative.addAll(dpl.getSet());
|
||||
log.println(HORIZONTAL_LINE);
|
||||
new DiffPropertyLister("3.2.0", "2.1.2", log, prop).print();
|
||||
|
||||
log.println();
|
||||
dpl = new DiffPropertyLister("3.2.0", "2.1.2", log, prop);
|
||||
dpl.print();
|
||||
cummulative.addAll(dpl.getSet());
|
||||
log.println(HORIZONTAL_LINE);
|
||||
new DiffPropertyLister("3.2.0", "2.1.5", log, prop).print();
|
||||
|
||||
log.println();
|
||||
dpl = new DiffPropertyLister("3.2.0", "2.1.5", log, prop);
|
||||
dpl.print();
|
||||
cummulative.addAll(dpl.getSet());
|
||||
log.println(HORIZONTAL_LINE);
|
||||
new DiffPropertyLister("3.2.0", "2.1.8", log, prop).print();
|
||||
|
||||
log.println();
|
||||
dpl = new DiffPropertyLister("3.2.0", "2.1.8", log, prop);
|
||||
dpl.print();
|
||||
cummulative.addAll(dpl.getSet());
|
||||
log.println(HORIZONTAL_LINE);
|
||||
new DiffPropertyLister("3.2.0", "3.0.0", log, prop).print();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
|
||||
log.println();
|
||||
new DiffPropertyLister("3.2.0", "3.0.1", log, prop).print();
|
||||
dpl = new DiffPropertyLister("3.2.0", "3.0.0", log, prop);
|
||||
dpl.print();
|
||||
cummulative.addAll(dpl.getSet());
|
||||
log.println(HORIZONTAL_LINE);
|
||||
|
||||
log.println();
|
||||
new DiffPropertyLister("3.2.0", "3.1.0", log, prop).print();
|
||||
dpl = new DiffPropertyLister("3.2.0", "3.0.1", log, prop);
|
||||
dpl.print();
|
||||
cummulative.addAll(dpl.getSet());
|
||||
log.println(HORIZONTAL_LINE);
|
||||
|
||||
log.println();
|
||||
new DiffPropertyLister("3.2.0", "3.1.1", log, prop).print();
|
||||
dpl = new DiffPropertyLister("3.2.0", "3.1.0", log, prop);
|
||||
dpl.print();
|
||||
cummulative.addAll(dpl.getSet());
|
||||
log.println(HORIZONTAL_LINE);
|
||||
|
||||
log.println();
|
||||
dpl = new DiffPropertyLister("3.2.0", "3.1.1", log, prop);
|
||||
dpl.print();
|
||||
cummulative.addAll(dpl.getSet());
|
||||
log.println(HORIZONTAL_LINE);
|
||||
|
||||
log.println();
|
||||
log.println("Cummulative differences");
|
||||
UnicodeProperty up = DerivedProperty.make(prop, Default.ucd);
|
||||
UnicodeSet newProp = up.getSet();
|
||||
Utility.showSetNames(log, "", cummulative.removeAll(newProp), false, false, Default.ucd);
|
||||
}
|
||||
} finally {
|
||||
if (log != null) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||
* $Date: 2002/06/15 02:47:14 $
|
||||
* $Revision: 1.16 $
|
||||
* $Date: 2002/06/22 01:21:09 $
|
||||
* $Revision: 1.17 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -83,7 +83,8 @@ public final class Main implements UCD_Types {
|
|||
else if (arg.equalsIgnoreCase("checkCaseShort")) VerifyUCD.checkCase2(false);
|
||||
else if (arg.equalsIgnoreCase("checkCanonicalProperties")) VerifyUCD.checkCanonicalProperties();
|
||||
else if (arg.equalsIgnoreCase("CheckCaseFold")) VerifyUCD.CheckCaseFold();
|
||||
else if (arg.equalsIgnoreCase("idn")) VerifyUCD.VerifyIDN();
|
||||
else if (arg.equalsIgnoreCase("genIDN")) VerifyUCD.genIDN();
|
||||
else if (arg.equalsIgnoreCase("VerifyIDN")) VerifyUCD.VerifyIDN();
|
||||
else if (arg.equalsIgnoreCase("NFTest")) VerifyUCD.NFTest();
|
||||
else if (arg.equalsIgnoreCase("test1")) VerifyUCD.test1();
|
||||
else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2002/06/15 02:47:13 $
|
||||
* $Revision: 1.14 $
|
||||
* $Date: 2002/06/22 01:21:09 $
|
||||
* $Revision: 1.15 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -1013,7 +1013,7 @@ to guarantee identifier closure.
|
|||
if (fixStrings) {
|
||||
if (result.name == null) {
|
||||
result.name = "<unassigned-" + Utility.hex(codePoint, 4) + ">";
|
||||
System.out.println("Warning: fixing name for " + result.name);
|
||||
// System.out.println("Warning: fixing name for " + result.name);
|
||||
}
|
||||
if (result.shortName == null) {
|
||||
result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $
|
||||
* $Date: 2002/03/15 01:57:01 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2002/06/22 01:21:09 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -23,6 +23,10 @@ final class UnifiedBinaryProperty extends UnicodeProperty {
|
|||
int propValue;
|
||||
// DerivedProperty dp;
|
||||
|
||||
public static UnicodeProperty make(int propMask) {
|
||||
return make(propMask, Default.ucd);
|
||||
}
|
||||
|
||||
public static UnicodeProperty make(int propMask, UCD ucd) {
|
||||
if ((propMask & 0xFF00) == DERIVED) {
|
||||
return DerivedProperty.make(propMask & 0xFF, ucd);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
|
||||
* $Date: 2002/06/15 02:47:12 $
|
||||
* $Revision: 1.16 $
|
||||
* $Date: 2002/06/22 01:21:09 $
|
||||
* $Revision: 1.17 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -22,11 +22,14 @@ import java.io.*;
|
|||
//import java.text.Un;
|
||||
import com.ibm.icu.text.CanonicalIterator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.text.utility.*;
|
||||
import java.text.NumberFormat;
|
||||
|
||||
public class VerifyUCD implements UCD_Types {
|
||||
static final boolean DEBUG = false;
|
||||
|
||||
static void oneTime() {
|
||||
Default.setUCD();
|
||||
|
@ -1099,7 +1102,288 @@ can help you narrow these down.
|
|||
}
|
||||
probe.put(new Integer(cp), Default.ucd.getCodeAndName(cp) + " (" + Default.ucd.getCategoryID(cp) + ")" + option);
|
||||
}
|
||||
|
||||
static void showDifferences(PrintWriter log, UnicodeSet s1, String name1, UnicodeSet s2, String name2, boolean both) {
|
||||
if (!s1.equals(s2)) {
|
||||
log.println();
|
||||
log.println("In " + name1 + ", but NOT " + name2);
|
||||
Utility.showSetNames(log," ", new UnicodeSet(s1).removeAll(s2), false, false, Default.ucd);
|
||||
log.println();
|
||||
log.println("NOT in " + name1 + ", but in " + name2);
|
||||
Utility.showSetNames(log," ", new UnicodeSet(s2).removeAll(s1), false, false, Default.ucd);
|
||||
log.println();
|
||||
if (both) {
|
||||
log.println("In both " + name1 + " AND " + name2);
|
||||
Utility.showSetNames(log," ", new UnicodeSet(s2).retainAll(s1), false, false, Default.ucd);
|
||||
log.println();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static void genIDN() throws IOException {
|
||||
PrintWriter out = new PrintWriter(System.out);
|
||||
Default.setUCD();
|
||||
PrintWriter log = Utility.openPrintWriter("IDN-tables.txt");
|
||||
|
||||
/*UnicodeSet y = UnifiedBinaryProperty.make(CATEGORY + FORMAT).getSet();
|
||||
UnicodeSet x = new UnicodeSet(0xE0001,0xE007F).retainAll(y);
|
||||
|
||||
System.out.println("y: " + y.toPattern(true));
|
||||
System.out.println("x: " + x.toPattern(true));
|
||||
Utility.showSetNames(out, "* ", x, false, true, Default.ucd);
|
||||
out.flush();
|
||||
*/
|
||||
|
||||
|
||||
// table1
|
||||
System.out.println("Getting Basics");
|
||||
UnicodeSet unassigned = UnifiedBinaryProperty.make(CATEGORY + UNASSIGNED).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet lineSeparators = UnifiedBinaryProperty.make(CATEGORY+LINE_SEPARATOR).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet paraSeparators = UnifiedBinaryProperty.make(CATEGORY+PARAGRAPH_SEPARATOR).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet spaceSeparators = UnifiedBinaryProperty.make(CATEGORY+SPACE_SEPARATOR).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet noncharacters = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Noncharacter_Code_Point).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet deprecated = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Deprecated).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet format = UnifiedBinaryProperty.make(CATEGORY + FORMAT).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet bidi_control = UnifiedBinaryProperty.make(BINARY_PROPERTIES+Bidi_Control).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet binary_IDS = UnifiedBinaryProperty.make(BINARY_PROPERTIES+IDS_BinaryOperator).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet trinary_IDS = UnifiedBinaryProperty.make(BINARY_PROPERTIES+IDS_TrinaryOperator).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet whitespace = UnifiedBinaryProperty.make(BINARY_PROPERTIES+White_space).getSet();
|
||||
whitespace.addAll(spaceSeparators); // bug.
|
||||
System.out.print(".");
|
||||
|
||||
UnicodeSet defaultIgnorable = UnifiedBinaryProperty.make(DERIVED + DefaultIgnorable).getSet();
|
||||
System.out.print(".");
|
||||
|
||||
UnicodeSet privateUse = UnifiedBinaryProperty.make(CATEGORY+PRIVATE_USE).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet control = UnifiedBinaryProperty.make(CATEGORY+Cc).getSet();
|
||||
System.out.print(".");
|
||||
UnicodeSet surrogate = UnifiedBinaryProperty.make(CATEGORY+SURROGATE).getSet();
|
||||
|
||||
System.out.println("Building Sets");
|
||||
// small test:
|
||||
|
||||
if (DEBUG) {
|
||||
showDifferences(log, whitespace, "White_Space",
|
||||
new UnicodeSet(spaceSeparators).addAll(lineSeparators).addAll(paraSeparators), "Separators", true);
|
||||
|
||||
showDifferences(log, UnifiedBinaryProperty.make(DERIVED + ID_Start).getSet(), "ID_Start",
|
||||
UnifiedBinaryProperty.make(DERIVED + Mod_ID_Start).getSet(), "XID_Start", false);
|
||||
|
||||
showDifferences(log, UnifiedBinaryProperty.make(DERIVED + ID_Continue_NO_Cf).getSet(), "ID_Continue",
|
||||
UnifiedBinaryProperty.make(DERIVED + Mod_ID_Continue_NO_Cf).getSet(), "XID_Continue", false);
|
||||
|
||||
System.out.println("Done with Test");
|
||||
}
|
||||
|
||||
UnicodeSet A1 = new UnicodeSet(unassigned).removeAll(noncharacters);
|
||||
|
||||
// special code for B1
|
||||
|
||||
/*
|
||||
B1, old
|
||||
00AD; SOFT HYPHEN
|
||||
1806; MONGOLIAN TODO SOFT HYPHEN
|
||||
180B; MONGOLIAN FREE VARIATION SELECTOR ONE
|
||||
180C; MONGOLIAN FREE VARIATION SELECTOR TWO
|
||||
180D; MONGOLIAN FREE VARIATION SELECTOR THREE
|
||||
200B; ZERO WIDTH SPACE
|
||||
200C; ZERO WIDTH NON-JOINER
|
||||
200D; ZERO WIDTH JOINER
|
||||
FEFF; ZERO WIDTH NO-BREAK SPACE
|
||||
*/
|
||||
|
||||
UnicodeSet B1 = new UnicodeSet().add(0xAD).add(0x1806).add(0x034F); // START WITH soft hyphen, mongolian soft hyphen, grapheme joiner
|
||||
// THEN ADD default ignorables or format characters that are *variation* or *zero width*
|
||||
UnicodeSet temp = new UnicodeSet(defaultIgnorable).addAll(format).addAll(spaceSeparators)
|
||||
.removeAll(surrogate).removeAll(control); // remove some just to avoid clutter when debugging.
|
||||
UnicodeSetIterator it = new UnicodeSetIterator(temp);
|
||||
while(it.next()) {
|
||||
if (!Default.ucd.isAssigned(it.codepoint)) continue;
|
||||
String name = Default.ucd.getName(it.codepoint);
|
||||
System.out.print(Default.ucd.getCodeAndName(it.codepoint));
|
||||
|
||||
if (name.indexOf("VARIATION") >= 0 || name.indexOf("ZERO") >= 0
|
||||
|| name.indexOf("WORD JOINER") >= 0) {
|
||||
B1.add(it.codepoint);
|
||||
System.out.print("*");
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
UnicodeSet C1 = new UnicodeSet(whitespace).removeAll(control).removeAll(lineSeparators)
|
||||
.removeAll(paraSeparators);
|
||||
|
||||
UnicodeSet C2 = new UnicodeSet(defaultIgnorable).removeAll(unassigned).removeAll(surrogate)
|
||||
.addAll(control).addAll(format).addAll(lineSeparators).addAll(paraSeparators);
|
||||
|
||||
UnicodeSet C3 = new UnicodeSet(privateUse);
|
||||
|
||||
UnicodeSet C4 = new UnicodeSet(noncharacters);
|
||||
|
||||
UnicodeSet C5 = new UnicodeSet(surrogate);
|
||||
|
||||
UnicodeSet C6 = new UnicodeSet(0xFFF9, 0xFFFC).add(0xFFFD);
|
||||
|
||||
UnicodeSet C7 = new UnicodeSet(binary_IDS).addAll(trinary_IDS);
|
||||
|
||||
UnicodeSet C8 = new UnicodeSet(deprecated).addAll(bidi_control);
|
||||
|
||||
UnicodeSet C9 = new UnicodeSet(0xE0001,0xE007F).retainAll(format);
|
||||
//Utility.showSetNames(out, "\t&&& ", C9, false, true, Default.ucd);
|
||||
//out.flush();
|
||||
|
||||
|
||||
// FIX UP SETS!!
|
||||
B1.removeAll(C6);
|
||||
B1.removeAll(C8);
|
||||
B1.removeAll(C9);
|
||||
|
||||
C1.removeAll(B1);
|
||||
|
||||
C2.removeAll(B1);
|
||||
C2.removeAll(C6);
|
||||
C2.removeAll(C8);
|
||||
C2.removeAll(C9);
|
||||
|
||||
System.out.println("Check that A1, B1, C1..9 are disjoint");
|
||||
|
||||
UnicodeSet[] test = {A1, B1, C1, C2, C3, C4, C5, C6, C7, C8, C9};
|
||||
String[] testNames = {"A1", "B1", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"};
|
||||
UnicodeSet union = new UnicodeSet();
|
||||
|
||||
for (int i = 0; i < test.length; ++i) {
|
||||
union.addAll(test[i]);
|
||||
for (int j = i + 1; j < test.length; ++j) {
|
||||
if (test[i].containsNone(test[j])) continue;
|
||||
log.println(testNames[i] + " and " + testNames[j] + " intersect!");
|
||||
UnicodeSet intersection = new UnicodeSet(test[i]).retainAll(test[j]);
|
||||
Utility.showSetNames(log," ", intersection, false, true, Default.ucd);
|
||||
log.println();
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("Check that union works");
|
||||
|
||||
UnicodeSet[] badChars = {unassigned, noncharacters, deprecated, format,
|
||||
control, surrogate, privateUse, binary_IDS, trinary_IDS, whitespace, defaultIgnorable,
|
||||
lineSeparators, paraSeparators, spaceSeparators};
|
||||
UnicodeSet badCharUnion = new UnicodeSet();
|
||||
for (int i = 0; i < badChars.length; ++i) {
|
||||
badCharUnion.addAll(badChars[i]);
|
||||
}
|
||||
|
||||
showDifferences(log, union, "(A1+B1+C1-C9)",
|
||||
badCharUnion,
|
||||
"(Whitespace+Deprecated+DefaultIgnorable+Separator+Other (cont/format/surr/priv/unass))", false);
|
||||
|
||||
System.out.println("Generating B2, B3");
|
||||
|
||||
log.println("Generating B2, B3");
|
||||
Map B2 = new TreeMap();
|
||||
Map B3 = new TreeMap();
|
||||
Integer tempInteger = null;
|
||||
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
int cat = Default.ucd.getCategory(i);
|
||||
if (!Default.ucd.isAssigned(i)) continue;
|
||||
//if (cat == Cc || cat == Cf || cat == Co || cat == Cn) continue; // we can skip these
|
||||
//if (Default.ucd.hasComputableName(i)) continue;
|
||||
tempInteger = null;
|
||||
|
||||
String original = UTF16.valueOf(i);
|
||||
String caseFold = Default.ucd.getCase(i, FULL, FOLD);
|
||||
if (!original.equals(caseFold)) {
|
||||
tempInteger = new Integer(i);
|
||||
B2.put(tempInteger, caseFold);
|
||||
B3.put(tempInteger, caseFold);
|
||||
}
|
||||
|
||||
String b = Default.nfkc.normalize(caseFold);
|
||||
String c = Default.nfkc.normalize(Default.ucd.getCase(b, FULL, FOLD));
|
||||
|
||||
if (!c.equals(b)) {
|
||||
if (tempInteger != null) {
|
||||
if (DEBUG) {
|
||||
log.println("Possible Conflict");
|
||||
log.println(" " + Default.ucd.getCodeAndName(i));
|
||||
log.println(" => " + Default.ucd.getCodeAndName(caseFold));
|
||||
log.println(" => " + Default.ucd.getCodeAndName(c));
|
||||
}
|
||||
} else {
|
||||
tempInteger = new Integer(i);
|
||||
if (DEBUG) {
|
||||
log.println(" " + Default.ucd.getCodeAndName(i));
|
||||
log.println(" => " + Default.ucd.getCodeAndName(c));
|
||||
}
|
||||
}
|
||||
if (DEBUG) log.println();
|
||||
B2.put(tempInteger, c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// PRINTOUT
|
||||
|
||||
printIDN_Table(log, "A.1", "Unassigned code points in Unicode " + Default.ucd.getVersion(), A1);
|
||||
printIDN_Table(log, "B.1", "Commonly mapped to nothing", B1);
|
||||
|
||||
printIDN_Map(log, "B.2", "Mapping for lowercase used with NFKC", B2, B3);
|
||||
|
||||
printIDN_Map(log, "B.3", "Mapping for lowercase used with no normalization", B3, B2);
|
||||
|
||||
printIDN_Table(log, "C.1", "Space characters", C1);
|
||||
printIDN_Table(log, "C.2", "Control characters", C2);
|
||||
printIDN_Table(log, "C.3", "Private use", C3);
|
||||
printIDN_Table(log, "C.4", "Non-character code points", C4);
|
||||
printIDN_Table(log, "C.5", "Surrogate codes", C5);
|
||||
printIDN_Table(log, "C.6", "Inappropriate for plain text", C6);
|
||||
printIDN_Table(log, "C.7", "Inappropriate for canonical representation", C7);
|
||||
printIDN_Table(log, "C.8", "Change display properties (or deprecated)", C8);
|
||||
printIDN_Table(log, "C.9", "Tagging characters", C9);
|
||||
|
||||
System.out.println("Done");
|
||||
log.close();
|
||||
}
|
||||
|
||||
public static void printIDN_Map(PrintWriter log, String tableNumber, String description, Map map, Map other) {
|
||||
System.out.println(tableNumber+ " " + description);
|
||||
log.println("");
|
||||
log.println(tableNumber+ " " + description);
|
||||
log.println("");
|
||||
log.println("----- Start Table " + tableNumber + " -----");
|
||||
Iterator it = map.keySet().iterator();
|
||||
while(it.hasNext()) {
|
||||
Integer key = (Integer) it.next();
|
||||
String value = (String) map.get(key);
|
||||
int cp = key.intValue();
|
||||
log.println(Utility.hex(cp, 4) + "; " + Utility.hex(value, 4) + "; "
|
||||
+ (!value.equals(other.get(key))? "***" : "")
|
||||
+ Default.ucd.getName(cp));
|
||||
}
|
||||
log.println("----- End Table " + tableNumber + " -----");
|
||||
}
|
||||
|
||||
public static void printIDN_Table(PrintWriter log, String tableNumber, String description, UnicodeSet set) {
|
||||
System.out.println(tableNumber+ " " + description);
|
||||
log.println("");
|
||||
log.println(tableNumber+ " " + description);
|
||||
log.println("");
|
||||
log.println("----- Start Table " + tableNumber + " -----");
|
||||
Utility.showSetNames(log, "", set, false, true, Default.ucd);
|
||||
log.println("----- End Table " + tableNumber + " -----");
|
||||
}
|
||||
|
||||
public static BitSet guessIDN() {
|
||||
BitSet result = new BitSet();
|
||||
|
@ -1330,9 +1614,11 @@ E0020-E007F; [TAGGING CHARACTERS]
|
|||
}
|
||||
|
||||
if (line.length() == 0) continue;
|
||||
if (line.charAt(0) == '-') continue;
|
||||
|
||||
int count = Utility.split(line,';',parts);
|
||||
if (count != 3) throw new ChainException("Incorrect # of fields in IDN folding", null);
|
||||
if (count != 3) throw new ChainException("Incorrect # of fields in IDN folding, line = {0}",
|
||||
new String[] {line});
|
||||
|
||||
String key = Utility.fromHex(parts[0]);
|
||||
if (UTF32.length32(key) != 1) throw new ChainException("First IDN field not single character: " + line, null);
|
||||
|
@ -1393,8 +1679,12 @@ E0020-E007F; [TAGGING CHARACTERS]
|
|||
Utility.fixDot();
|
||||
System.out.println("//" + lineNumber + ": '" + line + "'");
|
||||
}
|
||||
|
||||
|
||||
int commentPos = line.indexOf(';');
|
||||
if (commentPos >= 0) line = line.substring(0,commentPos);
|
||||
line = line.trim();
|
||||
if (line.length() == 0) continue;
|
||||
if (line.charAt(0) == '-') continue;
|
||||
|
||||
int count = Utility.split(line,'-',parts);
|
||||
if (count > 2) throw new ChainException("Incorrect # of fields in IDN list", null);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<html><body>
|
||||
<h1
|
||||
>1. Mismatches when NFD is OFF</h1><h2
|
||||
>Date:Fri Jun 14 20:11:26 PDT 2002</h2><h2
|
||||
>Date:Fri Jun 21 16:56:03 PDT 2002</h2><h2
|
||||
>File Version:-3.1.1d1</h2><p
|
||||
>Alternate Handling = NON_IGNORABLE</p><table border="1"
|
||||
><caption
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Fractional UCA Table, generated from standard UCA
|
||||
# M. Davis, Fri Jun 14 20:11:34 PDT 2002
|
||||
# M. Davis, Fri Jun 21 16:56:12 PDT 2002
|
||||
# VERSION: UCA=3.1.1d1, UCD=3.2.0
|
||||
|
||||
# Generated processed version, as described in ICU design document.
|
||||
|
@ -16,8 +16,6 @@
|
|||
# WARNING
|
||||
# - Differs from previous version in that MAX value was introduced at 1F.
|
||||
# All tertiary values are shifted down by 1, filling the gap at 7!
|
||||
[FIRST_IMPLICIT= 00E0]
|
||||
[LAST_IMPLICIT= 00E3]
|
||||
|
||||
0000; [,,]
|
||||
0001; [,,]
|
||||
|
@ -17047,3 +17045,26 @@ D87E DE13; [E1 31 58 57, 05, 05]
|
|||
D87E DE14; [E1 31 63 42, 05, 05]
|
||||
D87E DC8F; [E1 31 78 AB, 05, 05]
|
||||
D87E DE1D; [E1 31 AC 81, 05, 05]
|
||||
# VALUES BASED ON UCA
|
||||
[first tertiary ignorable [,,]]
|
||||
[last tertiary ignorable [,,]]
|
||||
[first secondary ignorable [,, 05]]
|
||||
[last secondary ignorable [,, 05]]
|
||||
[first primary ignorable [, 87, 05]]
|
||||
[last primary ignorable [, E1 B1, 05]]
|
||||
[first variable [05 07, 05, 05]]
|
||||
[last variable [17 9B, 05, 05]]
|
||||
[first non-ignorable [1A 20, 05, 05]]
|
||||
[last non-ignorable [78 AA B2, 05, 05]]
|
||||
[first implicit [E0 03 03, 05, 05]]
|
||||
[last implicit [E3 DC 70 C0, 05, 05]]
|
||||
[first trailing [E5, 05, 05]]
|
||||
[last trailing [E5, 05, 05]]
|
||||
# FIXED VALUES
|
||||
[top A0]
|
||||
[first implicit byte E0]
|
||||
[last implicit byte E4]
|
||||
[first trail byteE5]
|
||||
[last implicit byteEF]
|
||||
[first special byteF0]
|
||||
[last special byteFF]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Fractional UCA Table, generated from standard UCA
|
||||
# M. Davis, Fri Jun 14 20:11:34 PDT 2002
|
||||
# M. Davis, Fri Jun 21 16:56:12 PDT 2002
|
||||
# VERSION: UCA=3.1.1d1, UCD=3.2.0
|
||||
|
||||
# Generated processed version, as described in ICU design document.
|
||||
|
@ -16,8 +16,6 @@
|
|||
# WARNING
|
||||
# - Differs from previous version in that MAX value was introduced at 1F.
|
||||
# All tertiary values are shifted down by 1, filling the gap at 7!
|
||||
[FIRST_IMPLICIT= 00E0]
|
||||
[LAST_IMPLICIT= 00E3]
|
||||
|
||||
0000; [,,] # [0000.0000.0000] # <NULL>
|
||||
0001; [,,] # [0000.0000.0000] # <START OF HEADING>
|
||||
|
@ -17047,3 +17045,26 @@ D87E DE13; [E1 31 58 57, 05, 05] # [FF85.0020.0002][A20E.0020.0002] # CJK COM
|
|||
D87E DE14; [E1 31 63 42, 05, 05] # [FF85.0020.0002][A291.0020.0002] # CJK COMPATIBILITY IDEOGRAPH-2FA14
|
||||
D87E DC8F; [E1 31 78 AB, 05, 05] # [FF85.0020.0002][A392.0020.0002] # CJK COMPATIBILITY IDEOGRAPH-2F88F
|
||||
D87E DE1D; [E1 31 AC 81, 05, 05] # [FF85.0020.0002][A600.0020.0002] # CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
# VALUES BASED ON UCA
|
||||
[first tertiary ignorable [,,]]
|
||||
[last tertiary ignorable [,,]]
|
||||
[first secondary ignorable [,, 05]]
|
||||
[last secondary ignorable [,, 05]]
|
||||
[first primary ignorable [, 87, 05]]
|
||||
[last primary ignorable [, E1 B1, 05]]
|
||||
[first variable [05 07, 05, 05]]
|
||||
[last variable [17 9B, 05, 05]]
|
||||
[first non-ignorable [1A 20, 05, 05]]
|
||||
[last non-ignorable [78 AA B2, 05, 05]]
|
||||
[first implicit [E0 03 03, 05, 05]]
|
||||
[last implicit [E3 DC 70 C0, 05, 05]]
|
||||
[first trailing [E5, 05, 05]]
|
||||
[last trailing [E5, 05, 05]]
|
||||
# FIXED VALUES
|
||||
[top A0]
|
||||
[first implicit byte E0]
|
||||
[last implicit byte E4]
|
||||
[first trail byteE5]
|
||||
[last implicit byteEF]
|
||||
[first special byteF0]
|
||||
[last special byteFF]
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2002/06/13 21:14:05 $
|
||||
* $Revision: 1.18 $
|
||||
* $Date: 2002/06/22 01:21:11 $
|
||||
* $Revision: 1.19 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -825,23 +825,54 @@ public final class Utility { // COMMON UTILITIES
|
|||
return "Showing Stack with fake " + sw.getBuffer().toString();
|
||||
}
|
||||
|
||||
public static void showSetNames(String prefix, UnicodeSet set, boolean all, UCD ucd) {
|
||||
public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, UCD ucd) {
|
||||
PrintWriter temp = new PrintWriter(System.out);
|
||||
showSetNames(temp, prefix, set, separateLines, false, ucd);
|
||||
temp.close();
|
||||
}
|
||||
|
||||
public static void showSetNames(PrintWriter pw, String prefix, UnicodeSet set, boolean separateLines, boolean IDN, UCD ucd) {
|
||||
int count = set.getRangeCount();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
int start = set.getRangeStart(i);
|
||||
int end = set.getRangeEnd(i);
|
||||
if (all) {
|
||||
if (separateLines || (IDN && isSeparateLineIDN(start,end,ucd))) {
|
||||
for (int cp = start; cp <= end; ++cp) {
|
||||
if (!set.contains(cp)) continue;
|
||||
System.out.println(prefix + ucd.getCodeAndName(cp));
|
||||
if (!IDN) pw.println(prefix + ucd.getCodeAndName(cp));
|
||||
else {
|
||||
pw.println(prefix + Utility.hex(cp,4) + "; " + ucd.getName(cp));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
System.out.println(prefix + ucd.getCode(start)
|
||||
+ ((start != end) ? (".." + ucd.getCode(end)) : "")
|
||||
+ "\t# " + ucd.getName(start)
|
||||
+ ((start != end) ? (".." + ucd.getName(end)) : "")
|
||||
);
|
||||
if (!IDN) {
|
||||
pw.println(prefix + ucd.getCode(start)
|
||||
+ ((start != end) ? (".." + ucd.getCode(end)) : "")
|
||||
+ "\t# " + ucd.getName(start) + ((start != end) ? (".." + ucd.getName(end)) : "")
|
||||
);
|
||||
} else {
|
||||
|
||||
pw.println(prefix + Utility.hex(start,4)
|
||||
+ ((start != end) ? ("-" + Utility.hex(end,4)) : "")
|
||||
+ (ucd.isAssigned(start)
|
||||
? "; " + ucd.getName(start) + ((start != end)
|
||||
? ("-" + ucd.getName(end))
|
||||
: "")
|
||||
: "")
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isSeparateLineIDN(int cp, UCD ucd) {
|
||||
if (ucd.hasComputableName(cp)) return false;
|
||||
int cat = ucd.getCategory(cp);
|
||||
if (cat == UCD_Types.Cn) return false;
|
||||
if (ucd.getCategory(cp) == UCD_Types.Cc && !ucd.getBinaryProperty(cp, UCD_Types.White_space)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean isSeparateLineIDN(int start, int end, UCD ucd) {
|
||||
return (isSeparateLineIDN(start, ucd) || isSeparateLineIDN(end, ucd));
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue