mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
chart fixes
X-SVN-Rev: 8499
This commit is contained in:
parent
d85d2f804e
commit
3b30dd9146
17 changed files with 1237 additions and 689 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $
|
||||
* $Date: 2002/03/15 01:57:01 $
|
||||
* $Revision: 1.7 $
|
||||
* $Date: 2002/04/23 01:59:14 $
|
||||
* $Revision: 1.8 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -34,8 +34,8 @@ public class GenOverlap implements UCD_Types {
|
|||
collator = collatorIn;
|
||||
ucd = UCD.make();
|
||||
|
||||
nfd = new Normalizer(Normalizer.NFD);
|
||||
nfkd = new Normalizer(Normalizer.NFKD);
|
||||
nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
|
||||
nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
|
||||
|
||||
for (int cp = 0x0; cp <= 0x10FFFF; ++cp) {
|
||||
Utility.dot(cp);
|
||||
|
@ -67,8 +67,8 @@ public class GenOverlap implements UCD_Types {
|
|||
|
||||
ucd = UCD.make();
|
||||
|
||||
nfd = new Normalizer(Normalizer.NFD);
|
||||
nfkd = new Normalizer(Normalizer.NFKD);
|
||||
nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
|
||||
nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
|
||||
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
|
||||
|
@ -339,8 +339,8 @@ public class GenOverlap implements UCD_Types {
|
|||
|
||||
ucd = UCD.make();
|
||||
|
||||
nfd = new Normalizer(Normalizer.NFD);
|
||||
nfkd = new Normalizer(Normalizer.NFKD);
|
||||
nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
|
||||
nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
|
||||
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
|
||||
|
@ -448,7 +448,7 @@ public class GenOverlap implements UCD_Types {
|
|||
newKeys.removeAll(joint);
|
||||
oldKeys.removeAll(joint);
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), false);
|
||||
PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), false, false);
|
||||
Iterator it = list.iterator();
|
||||
int last = -1;
|
||||
while (it.hasNext()) {
|
||||
|
@ -541,8 +541,8 @@ public class GenOverlap implements UCD_Types {
|
|||
//nfkd = new Normalizer(Normalizer.NFKD);
|
||||
|
||||
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
|
||||
nfd = new Normalizer(Normalizer.NFD);
|
||||
nfkd = new Normalizer(Normalizer.NFKD);
|
||||
nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
|
||||
nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
|
||||
|
||||
|
||||
int tableLength = 257;
|
||||
|
@ -694,12 +694,12 @@ public class GenOverlap implements UCD_Types {
|
|||
}
|
||||
|
||||
public static void listCyrillic(UCA collatorIn) throws IOException {
|
||||
PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", false);
|
||||
PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", false, false);
|
||||
Set set = new TreeSet(collatorIn);
|
||||
Set set2 = new TreeSet(collatorIn);
|
||||
ucd = UCD.make();
|
||||
|
||||
nfd = new Normalizer(Normalizer.NFD);
|
||||
nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
|
||||
|
||||
for (char i = 0; i < 0xFFFF; ++i) {
|
||||
Utility.dot(i);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
|
||||
* $Date: 2002/03/15 01:57:01 $
|
||||
* $Revision: 1.9 $
|
||||
* $Date: 2002/04/23 01:59:14 $
|
||||
* $Revision: 1.10 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -483,9 +483,10 @@ final public class UCA implements Comparator {
|
|||
/**
|
||||
* Returns the char associated with a FIXED value
|
||||
*/
|
||||
public char charFromFixed(int ce) {
|
||||
/*public char charFromFixed(int ce) {
|
||||
return getPrimary(ce);
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Return the type of the CE
|
||||
|
@ -716,7 +717,7 @@ final public class UCA implements Comparator {
|
|||
/**
|
||||
* Records the dataversion
|
||||
*/
|
||||
private String dataVersion = "?";
|
||||
private String dataVersion = "3.1d1";
|
||||
|
||||
/**
|
||||
* Records the dataversion
|
||||
|
@ -791,7 +792,7 @@ final public class UCA implements Comparator {
|
|||
* A special bit combination in a CE is used to reserve exception cases. This has the effect
|
||||
* of removing 32 primary key values out of the 65536 possible.
|
||||
*/
|
||||
static final int EXCEPTION_CE_MASK = 0xFFC00000;
|
||||
static final int EXCEPTION_CE_MASK = 0xFF000000;
|
||||
|
||||
/**
|
||||
* Used to composed Hangul and Han characters
|
||||
|
@ -807,7 +808,8 @@ final public class UCA implements Comparator {
|
|||
* There are at least 34 values, so that we can use a range for surrogates
|
||||
* However, we do add to the first weight if we have surrogate pairs!
|
||||
*/
|
||||
public static final int UNSUPPORTED_BASE = 0xFFC2;
|
||||
public static final int UNSUPPORTED_BASE = 0xFF40;
|
||||
public static final int UNSUPPORTED_TOP = 0xFFFF;
|
||||
static final int UNSUPPORTED = makeKey(UNSUPPORTED_BASE, NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
|
||||
|
||||
// was 0xFFC20101;
|
||||
|
@ -819,7 +821,7 @@ final public class UCA implements Comparator {
|
|||
* to be looked up (with following characters) in the contractingTable.<br>
|
||||
* This isn't a MASK since there is exactly one value.
|
||||
*/
|
||||
static final int CONTRACTING = 0xFFC10000;
|
||||
static final int CONTRACTING = 0xFF310000;
|
||||
|
||||
/**
|
||||
* Expanding characters are marked with a exception bit combination
|
||||
|
@ -827,7 +829,7 @@ final public class UCA implements Comparator {
|
|||
* This means that they map to more than one CE, which is looked up in
|
||||
* the expansionTable by index. See EXCEPTION_INDEX_MASK
|
||||
*/
|
||||
static final int EXPANDING_MASK = 0xFFC00000; // marks expanding range start
|
||||
static final int EXPANDING_MASK = 0xFF300000; // marks expanding range start
|
||||
|
||||
/**
|
||||
* This mask is used to get the index from an EXPANDING exception.
|
||||
|
@ -1165,12 +1167,12 @@ final public class UCA implements Comparator {
|
|||
}
|
||||
|
||||
public UCAContents getContents(byte ceLimit, Normalizer skipDecomps) {
|
||||
return new UCAContents(ceLimit, skipDecomps);
|
||||
return new UCAContents(ceLimit, skipDecomps, ucdVersion);
|
||||
}
|
||||
|
||||
public class UCAContents {
|
||||
int current = -1;
|
||||
Normalizer skipDecomps = new Normalizer(Normalizer.NFD);
|
||||
Normalizer skipDecomps;
|
||||
Normalizer nfd = skipDecomps;
|
||||
Iterator enum = null;
|
||||
byte ceLimit;
|
||||
|
@ -1183,8 +1185,9 @@ final public class UCA implements Comparator {
|
|||
/**
|
||||
* use FIXED_CE as the limit
|
||||
*/
|
||||
UCAContents(byte ceLimit, Normalizer skipDecomps) {
|
||||
UCAContents(byte ceLimit, Normalizer skipDecomps, String unicodeVersion) {
|
||||
this.ceLimit = ceLimit;
|
||||
this.nfd = new Normalizer(Normalizer.NFD, unicodeVersion);
|
||||
this.skipDecomps = skipDecomps;
|
||||
}
|
||||
|
||||
|
@ -1208,7 +1211,7 @@ final public class UCA implements Comparator {
|
|||
|
||||
if (!nfd.normalizationDiffers(current) || type == HANGUL_CE) {
|
||||
if (type >= ceLimit) continue;
|
||||
if (skipDecomps != null && skipDecomps.hasDecomposition(current)) continue;
|
||||
if (skipDecomps != null && skipDecomps.normalizationDiffers(current)) continue;
|
||||
}
|
||||
result = UTF16.valueOf(current);
|
||||
return result;
|
||||
|
@ -1363,9 +1366,13 @@ final public class UCA implements Comparator {
|
|||
boolean record = true;
|
||||
/* if (multiChars.length() > 0) record = false;
|
||||
else */
|
||||
if (toD.hasDecomposition(value)) record = false;
|
||||
if (toD.normalizationDiffers(value)) record = false;
|
||||
|
||||
// collect CEs
|
||||
if (value == 0x2F00) {
|
||||
System.out.println("debug");
|
||||
}
|
||||
|
||||
int ce = getCEFromLine(value, line, position, record);
|
||||
int ce2 = getCEFromLine(value, line, position, record);
|
||||
if (CHECK_UNIQUE && (ce2 == TERMINATOR || CHECK_UNIQUE_EXPANSIONS)) {
|
||||
|
@ -1765,7 +1772,7 @@ final public class UCA implements Comparator {
|
|||
* Used for checking data file integrity
|
||||
*/
|
||||
private void checkUnique(char value, int result, int fourth, String line) {
|
||||
if (toD.hasDecomposition(value)) return; // don't check decomposables.
|
||||
if (toD.normalizationDiffers(value)) return; // don't check decomposables.
|
||||
Object ceObj = new Long(((long)result << 16) | fourth);
|
||||
Object probe = uniqueTable.get(ceObj);
|
||||
if (probe != null) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
|
||||
* $Date: 2002/03/15 01:57:01 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2002/04/23 01:59:16 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -19,20 +19,21 @@ import java.io.*;
|
|||
import com.ibm.text.UCD.*;
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import java.text.SimpleDateFormat;
|
||||
|
||||
public class WriteCharts implements UCD_Types {
|
||||
|
||||
static UCD ucd;
|
||||
|
||||
static final byte UNSUPPORTED = 120;
|
||||
static boolean HACK_KANA = false;
|
||||
|
||||
static public void test(UCA uca) throws IOException {
|
||||
|
||||
Default.setUCD(uca.getUCDVersion());
|
||||
HACK_KANA = true;
|
||||
|
||||
uca.setAlternate(UCA.NON_IGNORABLE);
|
||||
|
||||
ucd = UCD.make();
|
||||
Normalizer nfd = new Normalizer(Normalizer.NFD);
|
||||
Normalizer nfc = new Normalizer(Normalizer.NFC);
|
||||
//Normalizer nfd = new Normalizer(Normalizer.NFD);
|
||||
//Normalizer nfc = new Normalizer(Normalizer.NFC);
|
||||
|
||||
UCA.UCAContents cc = uca.getContents(UCA.FIXED_CE, null); // nfd instead of null if skipping decomps
|
||||
cc.enableSamples();
|
||||
|
@ -42,6 +43,10 @@ public class WriteCharts implements UCD_Types {
|
|||
while (true) {
|
||||
String x = cc.next();
|
||||
if (x == null) break;
|
||||
if (x.equals("\u2F00")) {
|
||||
System.out.println("debug");
|
||||
}
|
||||
|
||||
set.add(new Pair(uca.getSortKey(x), x));
|
||||
}
|
||||
|
||||
|
@ -68,7 +73,7 @@ public class WriteCharts implements UCD_Types {
|
|||
Utility.copyTextFile("charts.css", false, "CollationCharts\\charts.css");
|
||||
Utility.copyTextFile("help.html", true, "CollationCharts\\help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter("CollationCharts\\index_list.html");
|
||||
indexFile = Utility.openPrintWriter("CollationCharts\\index_list.html", false, false);
|
||||
Utility.appendFile("index_header.html", true, indexFile);
|
||||
|
||||
/*
|
||||
|
@ -88,15 +93,18 @@ public class WriteCharts implements UCD_Types {
|
|||
String s = (String) p.second;
|
||||
|
||||
int cp = UTF16.charAt(s,0);
|
||||
byte script = ucd.getScript(cp);
|
||||
|
||||
byte script = Default.ucd.getScript(cp);
|
||||
|
||||
// get first non-zero primary
|
||||
int primary = sortKey.charAt(0);
|
||||
int currentPrimary = getFirstPrimary(sortKey);
|
||||
int primary = currentPrimary >>> 16;
|
||||
|
||||
if (sortKey.length() < 4) script = -3;
|
||||
else if (primary == 0) script = -2;
|
||||
else if (primary < variable) script = -1;
|
||||
else if (primary < high) script = COMMON_SCRIPT;
|
||||
else if (primary >= UCA.UNSUPPORTED_BASE) script = UNSUPPORTED;
|
||||
else if (primary >= UCA.UNSUPPORTED_BASE && primary <= UCA.UNSUPPORTED_TOP) script = UNSUPPORTED;
|
||||
|
||||
if (script == KATAKANA_SCRIPT) script = HIRAGANA_SCRIPT;
|
||||
else if ((script == INHERITED_SCRIPT || script == COMMON_SCRIPT) && oldScript >= 0) script = oldScript;
|
||||
|
@ -113,12 +121,12 @@ public class WriteCharts implements UCD_Types {
|
|||
++scriptCount[script+3];
|
||||
if (scriptCount[script+3] > 1) {
|
||||
System.out.println("\t\tFAIL: " + scriptCount[script+3] + ", " +
|
||||
getChunkName(script) + ", " + ucd.getCodeAndName(s));
|
||||
getChunkName(script) + ", " + Default.ucd.getCodeAndName(s));
|
||||
}
|
||||
output = openFile(scriptCount[script+3], script);
|
||||
output = openFile(scriptCount[script+3], "CollationCharts\\", script);
|
||||
}
|
||||
|
||||
boolean firstPrimaryEquals = primary == lastSortKey.charAt(0);
|
||||
boolean firstPrimaryEquals = currentPrimary == getFirstPrimary(lastSortKey);
|
||||
|
||||
int strength = uca.strengthDifference(sortKey, lastSortKey);
|
||||
if (strength < 0) strength = -strength;
|
||||
|
@ -130,36 +138,63 @@ public class WriteCharts implements UCD_Types {
|
|||
for (int i = 0; i < sortKey.length(); ++i) {
|
||||
char w = sortKey.charAt(i);
|
||||
if (w == 0) break;
|
||||
if (w >= UCA.UNSUPPORTED_BASE && w <= UCA.UNSUPPORTED_TOP) {
|
||||
++i; // skip next
|
||||
}
|
||||
++ primaryCount;
|
||||
}
|
||||
|
||||
String breaker = "";
|
||||
if (columnCount > 10 || !firstPrimaryEquals) {
|
||||
if (!firstPrimaryEquals || script == UNSUPPORTED) breaker = "</tr><tr>";
|
||||
else breaker = "</tr><tr><td></td>"; // indent 1 cell
|
||||
columnCount = 0;
|
||||
if (!firstPrimaryEquals || script == UNSUPPORTED) breaker = "</tr><tr>";
|
||||
else {
|
||||
breaker = "</tr><tr><td></td>"; // indent 1 cell
|
||||
++columnCount;
|
||||
}
|
||||
}
|
||||
|
||||
String classname = primaryCount > 1 ? XCLASSNAME[strength] : CLASSNAME[strength];
|
||||
|
||||
output.println(breaker + classname
|
||||
+ " title='" + UCA.toString(sortKey) + "'>"
|
||||
+ nfc.normalize(s)
|
||||
String name = Default.ucd.getName(s);
|
||||
|
||||
|
||||
if (s.equals("\u1eaf")) {
|
||||
System.out.println("debug");
|
||||
}
|
||||
|
||||
String comp = Default.nfc.normalize(s);
|
||||
|
||||
String outline = breaker + classname
|
||||
+ " title='" + Utility.quoteXML(name) + ": " + UCA.toString(sortKey) + "'>"
|
||||
+ Utility.quoteXML(comp)
|
||||
+ "<br><tt>"
|
||||
+ Utility.hex(s)
|
||||
//+ "<br>" + script
|
||||
+ "</tt></td>");
|
||||
+ "</tt></td>";
|
||||
|
||||
output.println(outline);
|
||||
++columnCount;
|
||||
}
|
||||
|
||||
SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
|
||||
|
||||
closeFile(output);
|
||||
indexFile.println("<hr><p>Last Modified: " + new Date());
|
||||
indexFile.println("<hr><p>Last Modified: " + df.format(new Date()));
|
||||
indexFile.println("<br>UCA Version: " + uca.getDataVersion());
|
||||
indexFile.println("<br>UCD Version: " + ucd.getVersion());
|
||||
indexFile.println("<br>UCD Version: " + Default.ucd.getVersion());
|
||||
indexFile.println("</p></body></html>");
|
||||
indexFile.close();
|
||||
}
|
||||
|
||||
static int getFirstPrimary(String sortKey) {
|
||||
int result = sortKey.charAt(0);
|
||||
if (result >= UCA.UNSUPPORTED_BASE && result <= UCA.UNSUPPORTED_TOP) {
|
||||
return (result << 16) | sortKey.charAt(1);
|
||||
}
|
||||
return (result << 16);
|
||||
}
|
||||
|
||||
static final String[] CLASSNAME = {
|
||||
"<td class='q'",
|
||||
"<td class='q'",
|
||||
|
@ -179,16 +214,16 @@ public class WriteCharts implements UCD_Types {
|
|||
|
||||
static PrintWriter indexFile;
|
||||
|
||||
static PrintWriter openFile(int count, byte script) throws IOException {
|
||||
static PrintWriter openFile(int count, String directory, int script) throws IOException {
|
||||
String scriptName = getChunkName(script);
|
||||
scriptName = ucd.getCase(scriptName, FULL, TITLE);
|
||||
if (script < 128) scriptName = Default.ucd.getCase(scriptName, FULL, TITLE);
|
||||
|
||||
String fileName = "chart_" + scriptName + (count > 1 ? count + "" : "") + ".html";
|
||||
PrintWriter output = Utility.openPrintWriter("CollationCharts\\" + fileName);
|
||||
PrintWriter output = Utility.openPrintWriter(directory + fileName, false, false);
|
||||
Utility.fixDot();
|
||||
System.out.println("Writing: " + scriptName);
|
||||
|
||||
indexFile.println(" | <a href = '" + fileName + "'>" + scriptName + "</a>");
|
||||
indexFile.println(" <a href = '" + fileName + "'>" + scriptName + "</a>");
|
||||
String title = "UCA: " + scriptName;
|
||||
output.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
output.println("<title>" + title + "</title>");
|
||||
|
@ -198,20 +233,277 @@ public class WriteCharts implements UCD_Types {
|
|||
return output;
|
||||
}
|
||||
|
||||
static String getChunkName(byte script) {
|
||||
if (script == -3) return "NULL";
|
||||
static String getChunkName(int script) {
|
||||
if (script >= 128) return Default.ucd.getCategoryID_fromIndex((byte)(script - 128), LONG);
|
||||
else if (script == -4) return "NoMapping";
|
||||
else if (script == -3) return "NULL";
|
||||
else if (script == -2) return "IGNORABLE";
|
||||
else if (script == -1) return "VARIABLE";
|
||||
else if (script == HIRAGANA_SCRIPT) return "KATAKANA-HIRAGANA";
|
||||
else if (script == HIRAGANA_SCRIPT && HACK_KANA) return "KATAKANA-HIRAGANA";
|
||||
else if (script == UNSUPPORTED) return "UNSUPPORTED";
|
||||
else return ucd.getScriptID_fromIndex(script);
|
||||
else return Default.ucd.getScriptID_fromIndex((byte)script);
|
||||
}
|
||||
|
||||
static void closeFile(PrintWriter output) {
|
||||
if (output == null) return;
|
||||
output.println("</body></table></html>");
|
||||
output.println("</table></body></html>");
|
||||
output.close();
|
||||
}
|
||||
|
||||
|
||||
static public void normalizationChart() throws IOException {
|
||||
Default.setUCD();
|
||||
HACK_KANA = false;
|
||||
|
||||
Set set = new TreeSet();
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
byte cat = Default.ucd.getCategory(i);
|
||||
if (cat == Cs || cat == Co) continue;
|
||||
|
||||
if (!Default.nfkd.normalizationDiffers(i)) continue;
|
||||
String decomp = Default.nfkd.normalize(i);
|
||||
|
||||
byte script = getBestScript(decomp);
|
||||
|
||||
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + 128 : script),
|
||||
new Pair(decomp,
|
||||
new Integer(i))));
|
||||
}
|
||||
|
||||
PrintWriter output = null;
|
||||
|
||||
Iterator it = set.iterator();
|
||||
|
||||
int oldScript = -127;
|
||||
|
||||
int[] scriptCount = new int[128];
|
||||
|
||||
int counter = 0;
|
||||
|
||||
int lastPrimary = -1;
|
||||
|
||||
String lastSortKey = "\u0000";
|
||||
|
||||
Utility.copyTextFile("index.html", true, "NormalizationCharts\\index.html");
|
||||
Utility.copyTextFile("charts.css", false, "NormalizationCharts\\charts.css");
|
||||
Utility.copyTextFile("norm_help.html", true, "NormalizationCharts\\help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter("NormalizationCharts\\index_list.html", false, false);
|
||||
Utility.appendFile("norm_index_header.html", true, indexFile);
|
||||
|
||||
/*
|
||||
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
indexFile.println("<title>UCA Default Collation Table</title>");
|
||||
indexFile.println("<base target='main'>");
|
||||
indexFile.println("<style><!-- p { font-size: 90% } --></style>");
|
||||
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
|
||||
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
|
||||
*/
|
||||
|
||||
while (it.hasNext()) {
|
||||
Utility.dot(counter);
|
||||
|
||||
Pair p = (Pair) it.next();
|
||||
int script = ((Integer) p.first).intValue();
|
||||
int cp = ((Integer)((Pair) p.second).second).intValue();
|
||||
|
||||
if (script != oldScript
|
||||
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
|
||||
) {
|
||||
closeFile(output);
|
||||
output = null;
|
||||
oldScript = script;
|
||||
}
|
||||
|
||||
if (output == null) {
|
||||
output = openFile(0, "NormalizationCharts\\", script);
|
||||
output.println("<tr><td class='z'>Code</td><td class='z'>C</td><td class='z'>D</td><td class='z'>KC</td><td class='z'>KD</td></tr>");
|
||||
|
||||
}
|
||||
|
||||
output.println("<tr>");
|
||||
|
||||
String prefix;
|
||||
String code = UTF16.valueOf(cp);
|
||||
String c = Default.nfc.normalize(cp);
|
||||
String d = Default.nfd.normalize(cp);
|
||||
String kc = Default.nfkc.normalize(cp);
|
||||
String kd = Default.nfkd.normalize(cp);
|
||||
|
||||
showCell(output, code, "<td class='z' ", "");
|
||||
|
||||
prefix = c.equals(code) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, c, prefix, "");
|
||||
|
||||
prefix = d.equals(c) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, d, prefix, "");
|
||||
|
||||
prefix = kc.equals(c) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, kc, prefix, "");
|
||||
|
||||
prefix = (kd.equals(d) || kd.equals(kc)) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, kd, prefix, "");
|
||||
|
||||
output.println("</tr>");
|
||||
|
||||
}
|
||||
|
||||
SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
|
||||
|
||||
closeFile(output);
|
||||
indexFile.println("<hr><p>Last Modified: " + df.format(new Date()));
|
||||
indexFile.println("<br>UCD Version: " + Default.ucd.getVersion());
|
||||
indexFile.println("</p></body></html>");
|
||||
indexFile.close();
|
||||
}
|
||||
|
||||
static void showCell(PrintWriter output, String s, String prefix, String extra) {
|
||||
String name = Default.ucd.getName(s);
|
||||
String comp = Default.nfc.normalize(s);
|
||||
|
||||
String outline = prefix
|
||||
+ " title='" + Utility.quoteXML(name) + extra + "'>"
|
||||
+ Utility.quoteXML(comp)
|
||||
+ "<br><tt>"
|
||||
+ Utility.hex(s)
|
||||
//+ "<br>" + script
|
||||
+ "</tt></td>";
|
||||
|
||||
output.println(outline);
|
||||
}
|
||||
|
||||
static byte getBestScript(String s) {
|
||||
int cp;
|
||||
byte result = COMMON_SCRIPT;
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(s, i);
|
||||
result = Default.ucd.getScript(cp);
|
||||
if (result != COMMON_SCRIPT && result != INHERITED_SCRIPT) return result;
|
||||
}
|
||||
return COMMON_SCRIPT;
|
||||
}
|
||||
|
||||
static public void caseChart() throws IOException {
|
||||
Default.setUCD();
|
||||
HACK_KANA = false;
|
||||
|
||||
Set set = new TreeSet();
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
byte cat = Default.ucd.getCategory(i);
|
||||
if (cat == Cs || cat == Co) continue;
|
||||
|
||||
String code = UTF16.valueOf(i);
|
||||
String lower = Default.ucd.getCase(i, FULL, LOWER);
|
||||
String title = Default.ucd.getCase(i, FULL, TITLE);
|
||||
String upper = Default.ucd.getCase(i, FULL, UPPER);
|
||||
String fold = Default.ucd.getCase(i, FULL, FOLD);
|
||||
|
||||
String decomp = Default.nfkd.normalize(i);
|
||||
byte script = 0;
|
||||
if (lower.equals(code) && upper.equals(code) && fold.equals(code)) {
|
||||
if (decomp contains Lu, Lo, Lt, or Lowercase or Uppercase) script = -4;
|
||||
else continue;
|
||||
}
|
||||
|
||||
|
||||
if (script == 0) script = getBestScript(decomp);
|
||||
|
||||
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + 128 : script),
|
||||
new Pair(decomp,
|
||||
new Integer(i))));
|
||||
}
|
||||
|
||||
PrintWriter output = null;
|
||||
|
||||
Iterator it = set.iterator();
|
||||
|
||||
int oldScript = -127;
|
||||
|
||||
int[] scriptCount = new int[128];
|
||||
|
||||
int counter = 0;
|
||||
|
||||
int lastPrimary = -1;
|
||||
|
||||
String lastSortKey = "\u0000";
|
||||
|
||||
Utility.copyTextFile("index.html", true, "CaseCharts\\index.html");
|
||||
Utility.copyTextFile("charts.css", false, "CaseCharts\\charts.css");
|
||||
Utility.copyTextFile("norm_help.html", true, "CaseCharts\\help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter("CaseCharts\\index_list.html", false, false);
|
||||
Utility.appendFile("norm_index_header.html", true, indexFile);
|
||||
|
||||
/*
|
||||
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
indexFile.println("<title>UCA Default Collation Table</title>");
|
||||
indexFile.println("<base target='main'>");
|
||||
indexFile.println("<style><!-- p { font-size: 90% } --></style>");
|
||||
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
|
||||
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
|
||||
*/
|
||||
|
||||
while (it.hasNext()) {
|
||||
Utility.dot(counter);
|
||||
|
||||
Pair p = (Pair) it.next();
|
||||
int script = ((Integer) p.first).intValue();
|
||||
int cp = ((Integer)((Pair) p.second).second).intValue();
|
||||
|
||||
if (script != oldScript
|
||||
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
|
||||
) {
|
||||
closeFile(output);
|
||||
output = null;
|
||||
oldScript = script;
|
||||
}
|
||||
|
||||
if (output == null) {
|
||||
output = openFile(0, "CaseCharts\\", script);
|
||||
output.println("<tr><td class='z'>Code</td><td class='z'>Lower</td><td class='z'>Title</td><td class='z'>Upper</td><td class='z'>Fold</td></tr>");
|
||||
|
||||
}
|
||||
|
||||
output.println("<tr>");
|
||||
|
||||
String prefix;
|
||||
String code = UTF16.valueOf(cp);
|
||||
String lower = Default.ucd.getCase(cp, FULL, LOWER);
|
||||
String title = Default.ucd.getCase(cp, FULL, TITLE);
|
||||
String upper = Default.ucd.getCase(cp, FULL, UPPER);
|
||||
String fold = Default.ucd.getCase(cp, FULL, FOLD);
|
||||
|
||||
showCell(output, code, "<td class='z' ", "");
|
||||
|
||||
prefix = lower.equals(code) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, lower, prefix, "");
|
||||
|
||||
prefix = title.equals(upper) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, title, prefix, "");
|
||||
|
||||
prefix = upper.equals(code) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, upper, prefix, "");
|
||||
|
||||
prefix = (fold.equals(lower)) ? "<td class='g' " : "<td class='n' ";
|
||||
showCell(output, fold, prefix, "");
|
||||
|
||||
output.println("</tr>");
|
||||
|
||||
}
|
||||
|
||||
SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
|
||||
|
||||
closeFile(output);
|
||||
indexFile.println("<hr><p>Last Modified: " + df.format(new Date()));
|
||||
indexFile.println("<br>UCD Version: " + Default.ucd.getVersion());
|
||||
indexFile.println("</p></body></html>");
|
||||
indexFile.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2002/03/15 01:57:01 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2002/04/23 01:59:16 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -71,6 +71,10 @@ public class WriteCollationData implements UCD_Types {
|
|||
else if (arg.equalsIgnoreCase("writeNonspacingDifference")) writeNonspacingDifference();
|
||||
|
||||
else if (arg.equalsIgnoreCase("WriteCharts")) WriteCharts.test(collator);
|
||||
else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart();
|
||||
else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart();
|
||||
|
||||
|
||||
else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(collator);
|
||||
else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(collator);
|
||||
else if (arg.equalsIgnoreCase("listCyrillic")) GenOverlap.listCyrillic(collator);
|
||||
|
@ -257,7 +261,7 @@ public class WriteCollationData implements UCD_Types {
|
|||
for (char c = 0; c < 0xFFFF; ++c) {
|
||||
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
|
||||
if (0xAC00 <= c && c <= 0xD7A3) continue;
|
||||
if (normKD.hasDecomposition(c)) {
|
||||
if (normKD.normalizationDiffers(c)) {
|
||||
++count;
|
||||
String decomp = normKD.normalize(c);
|
||||
datasize += decomp.length();
|
||||
|
@ -285,7 +289,7 @@ public class WriteCollationData implements UCD_Types {
|
|||
for (char c = 0; c < 0xFFFF; ++c) {
|
||||
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
|
||||
if (0xAC00 <= c && c <= 0xD7A3) continue;
|
||||
if (normD.hasDecomposition(c)) {
|
||||
if (normD.normalizationDiffers(c)) {
|
||||
++count;
|
||||
String decomp = normD.normalize(c);
|
||||
datasize += decomp.length();
|
||||
|
@ -475,7 +479,7 @@ public class WriteCollationData implements UCD_Types {
|
|||
}
|
||||
log.println("<tr><th>Code</td><th>Sort Key</th><th>Decomposed Sort Key</th><th>Name</th></tr>");
|
||||
for (char ch = 0; ch < 0xFFFF; ++ch) {
|
||||
if (!nfkd.hasDecomposition(ch)) continue;
|
||||
if (!nfkd.normalizationDiffers(ch)) continue;
|
||||
if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul
|
||||
String sortKey = collator.getSortKey(String.valueOf(ch), UCA.NON_IGNORABLE, decomposition);
|
||||
String decompSortKey = collator.getSortKey(nfkd.normalize(ch), UCA.NON_IGNORABLE, decomposition);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java,v $
|
||||
* $Date: 2002/03/15 01:57:01 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2002/04/23 01:59:16 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -19,7 +19,6 @@ import java.io.*;
|
|||
//import com.ibm.text.unicode.*;
|
||||
import com.ibm.text.UCD.*;
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.Normalizer;
|
||||
|
||||
public class WriteHTMLCollation implements UCD_Types {
|
||||
public static final String copyright =
|
||||
|
@ -42,14 +41,21 @@ public class WriteHTMLCollation implements UCD_Types {
|
|||
static PrintWriter log;
|
||||
|
||||
static UCD ucd;
|
||||
static Normalizer nfc, nfd, nfkd, nfkc;
|
||||
|
||||
public static void main(String args[]) throws IOException {
|
||||
|
||||
checkImplicit();
|
||||
checkFixes();
|
||||
|
||||
String unicodeVersion = "";
|
||||
|
||||
System.out.println("Building UCA");
|
||||
collator = new UCA(null, "");
|
||||
collator = new UCA(null, unicodeVersion);
|
||||
nfc = new Normalizer(NFC, unicodeVersion);
|
||||
nfkc = new Normalizer(NFKC, unicodeVersion);
|
||||
nfd = new Normalizer(NFD, unicodeVersion);
|
||||
nfkd = new Normalizer(NFKD, unicodeVersion);
|
||||
|
||||
System.out.println("Building UCD data (old)");
|
||||
//UInfo.init();
|
||||
|
@ -68,13 +74,13 @@ public class WriteHTMLCollation implements UCD_Types {
|
|||
*/
|
||||
|
||||
// DO FOLLOWING
|
||||
writeFractionalUCA("FractionalUCA.txt");
|
||||
writeConformance("CollationTest_NON_IGNORABLE.txt", UCA.NON_IGNORABLE);
|
||||
writeConformance("CollationTest_SHIFTED.txt", UCA.SHIFTED);
|
||||
|
||||
// SKIP BELOW
|
||||
if (true) return;
|
||||
|
||||
writeConformance("CollationTest_NON_IGNORABLE.txt", UCA.NON_IGNORABLE);
|
||||
writeConformance("CollationTest_SHIFTED.txt", UCA.SHIFTED);
|
||||
writeFractionalUCA("FractionalUCA.txt");
|
||||
writeRules(WITH_NAMES);
|
||||
writeRules(WITHOUT_NAMES);
|
||||
|
||||
|
@ -99,15 +105,15 @@ public class WriteHTMLCollation implements UCD_Types {
|
|||
|
||||
static public void writeCaseExceptions() {
|
||||
System.err.println("Writing Case Exceptions");
|
||||
Normalizer NFKC = new Normalizer(Normalizer.NFKC);
|
||||
//Normalizer NFKC = new Normalizer(Normalizer.NFKC);
|
||||
for (char a = 0; a < 0xFFFF; ++a) {
|
||||
if (!ucd.isRepresented(a)) continue;
|
||||
//if (0xA000 <= a && a <= 0xA48F) continue; // skip YI
|
||||
|
||||
String b = Case.fold(a);
|
||||
String c = NFKC.normalize(b);
|
||||
String c = nfkc.normalize(b);
|
||||
String d = Case.fold(c);
|
||||
String e = NFKC.normalize(d);
|
||||
String e = nfkc.normalize(d);
|
||||
if (!e.equals(c)) {
|
||||
System.out.println(Utility.hex(a) + "; " + Utility.hex(d, " ") + " # " + ucd.getName(a));
|
||||
/*
|
||||
|
@ -125,7 +131,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
|||
*/
|
||||
}
|
||||
String f = Case.fold(e);
|
||||
String g = NFKC.normalize(f);
|
||||
String g = nfkc.normalize(f);
|
||||
if (!f.equals(d) || !g.equals(e)) System.out.println("!!!!!!SKY IS FALLING!!!!!!");
|
||||
}
|
||||
}
|
||||
|
@ -269,7 +275,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
|||
static void checkBadDecomps(int strength, boolean decomposition) {
|
||||
int oldStrength = collator.getStrength();
|
||||
collator.setStrength(strength);
|
||||
Normalizer nfkd = new Normalizer(Normalizer.NFKD);
|
||||
//Normalizer nfkd = new Normalizer(Normalizer.NFKD);
|
||||
if (strength == 1) {
|
||||
log.println("<h2>3. Primaries Incompatible with Decompositions</h2><table border='1'>");
|
||||
} else {
|
||||
|
@ -277,7 +283,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
|||
}
|
||||
log.println("<tr><th>Code</td><th>Sort Key</th><th>Decomposed Sort Key</th><th>Name</th></tr>");
|
||||
for (char ch = 0; ch < 0xFFFF; ++ch) {
|
||||
if (!nfkd.hasDecomposition(ch)) continue;
|
||||
if (!nfkd.normalizationDiffers(ch)) continue;
|
||||
if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul
|
||||
String sortKey = collator.getSortKey(String.valueOf(ch), UCA.NON_IGNORABLE, decomposition);
|
||||
String decompSortKey = collator.getSortKey(nfkd.normalize(ch), UCA.NON_IGNORABLE, decomposition);
|
||||
|
@ -431,11 +437,11 @@ public class WriteHTMLCollation implements UCD_Types {
|
|||
log.println("compressed: " + comp);
|
||||
}
|
||||
log.println("Ken's : " + kenStr);
|
||||
String nfkd = NFKD.normalize(s);
|
||||
log.println("NFKD : " + ucd.getCodeAndName(nfkd));
|
||||
String nfd = NFD.normalize(s);
|
||||
if (!nfd.equals(nfkd)) {
|
||||
log.println("NFD : " + ucd.getCodeAndName(nfd));
|
||||
String nfkdstr = nfkd.normalize(s);
|
||||
log.println("NFKD : " + ucd.getCodeAndName(nfkdstr));
|
||||
String nfdstr = nfd.normalize(s);
|
||||
if (!nfdstr.equals(nfkdstr)) {
|
||||
log.println("NFD : " + ucd.getCodeAndName(nfdstr));
|
||||
}
|
||||
//kenCLen = collator.getCEs(decomp, true, kenComp);
|
||||
//log.println("decomp ce: " + collator.ceToString(kenComp, kenCLen));
|
||||
|
@ -456,7 +462,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
|||
static final byte getDecompType(int cp) {
|
||||
byte result = ucd.getDecompositionType(cp);
|
||||
if (result == ucd.CANONICAL) {
|
||||
String d = NFD.normalize((char)cp); // TODO
|
||||
String d = nfd.normalize((char)cp); // TODO
|
||||
for (int i = 0; i < d.length(); ++i) {
|
||||
byte t = ucd.getDecompositionType(d.charAt(i));
|
||||
if (t > ucd.CANONICAL) return t;
|
||||
|
@ -517,7 +523,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
|||
int type = getDecompType(s.charAt(0));
|
||||
char ch = s.charAt(0);
|
||||
|
||||
String decomp = NFKD.normalize(s);
|
||||
String decomp = nfkd.normalize(s);
|
||||
int len = 0;
|
||||
int markLen = collator.getCEs(decomp, true, markCes);
|
||||
if (compress) markLen = kenCompress(markCes, markLen);
|
||||
|
@ -741,8 +747,8 @@ public class WriteHTMLCollation implements UCD_Types {
|
|||
return result.toString();
|
||||
}
|
||||
|
||||
static Normalizer NFKD = new Normalizer(Normalizer.NFKD);
|
||||
static Normalizer NFD = new Normalizer(Normalizer.NFD);
|
||||
//static Normalizer NFKD = new Normalizer(Normalizer.NFKD);
|
||||
//static Normalizer NFD = new Normalizer(Normalizer.NFD);
|
||||
|
||||
static int variableHigh = 0;
|
||||
static final int COMMON = 5;
|
||||
|
@ -1112,7 +1118,7 @@ public class WriteHTMLCollation implements UCD_Types {
|
|||
// b. toSmallKana(NFKD(x)) != x.
|
||||
|
||||
static final boolean needsCaseBit(String x) {
|
||||
String s = NFKD.normalize(x);
|
||||
String s = nfkd.normalize(x);
|
||||
if (!ucd.getCase(s, FULL, LOWER).equals(s)) return true;
|
||||
if (!toSmallKana(s).equals(s)) return true;
|
||||
return false;
|
||||
|
@ -1616,8 +1622,8 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
|||
static final char MARK2 = '\u0002';
|
||||
//Normalizer normalizer = new Normalizer(Normalizer.NFC, true);
|
||||
|
||||
static Normalizer toC = new Normalizer(Normalizer.NFC);
|
||||
static Normalizer toD = new Normalizer(Normalizer.NFD);
|
||||
//static Normalizer toC = new Normalizer(Normalizer.NFC);
|
||||
//static Normalizer toD = new Normalizer(Normalizer.NFD);
|
||||
static TreeMap MismatchedC = new TreeMap();
|
||||
static TreeMap MismatchedN = new TreeMap();
|
||||
static TreeMap MismatchedD = new TreeMap();
|
||||
|
@ -1631,7 +1637,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
|||
static void addString(String ch, byte option) {
|
||||
String colDbase = collator.getSortKey(ch, option, true);
|
||||
String colNbase = collator.getSortKey(ch, option, false);
|
||||
String colCbase = collator.getSortKey(toC.normalize(ch), option, false);
|
||||
String colCbase = collator.getSortKey(nfc.normalize(ch), option, false);
|
||||
if (!colNbase.equals(colCbase)) {
|
||||
/*System.out.println(Utility.hex(ch));
|
||||
System.out.println(printableKey(colNbase));
|
||||
|
@ -1790,7 +1796,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
|||
}
|
||||
|
||||
static void showLine(int count, String ch, String keyD, String keyN) {
|
||||
String decomp = toD.normalize(ch);
|
||||
String decomp = nfd.normalize(ch);
|
||||
if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + Utility.hex(decomp, " ") + "> ";
|
||||
log.println("<tr><td>" + count + "</td><td>"
|
||||
+ Utility.hex(ch, " ")
|
||||
|
@ -1831,7 +1837,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
|||
String ch = (String)it.next();
|
||||
String MN = (String)MismatchedN.get(ch);
|
||||
String MC = (String)MismatchedC.get(ch);
|
||||
String chInC = toC.normalize(ch);
|
||||
String chInC = nfc.normalize(ch);
|
||||
out.el("tr");
|
||||
out.el("th").at("rowSpan",2).at("align","right").tx16(ch).tx(' ').tx(ucd.getName(ch));
|
||||
out.el("br").cl().tx("NFC=").tx16(chInC).cl();
|
||||
|
@ -1859,7 +1865,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
|
|||
|
||||
static void showDiff(boolean showName, boolean firstColumn, int line, Object chobj) {
|
||||
String ch = chobj.toString();
|
||||
String decomp = toD.normalize(ch);
|
||||
String decomp = nfd.normalize(ch);
|
||||
if (showName) {
|
||||
if (ch.equals(decomp)) {
|
||||
log.println(//title + counter + " "
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
td { border: 1 solid #0000FF; color: #000000; background-color: #FFFFFF; font-size: 120%; text-align: Center; vertical-align: top; width: 48px }
|
||||
td { border: 1 solid #0000FF; color: #000000; background-color: #FFFFFF;
|
||||
font-size: 120%; text-align: Center; vertical-align: top; width: 48px }
|
||||
td.p { color: #000000; background-color: #7777FF }
|
||||
td.s { color: #000000; background-color: #BBBBFF }
|
||||
td.t { color: #000000; background-color: #DDDDFF }
|
||||
|
@ -9,4 +10,8 @@ td.et { color: #000000; background-color: #FF9999 }
|
|||
td.eq { color: #000000; background-color: #FFBBBB }
|
||||
th { vertical-align: top; font-weight: bold }
|
||||
th.x { vertical-align: top; font-weight: regular; text-align: Left }
|
||||
tt { font-size: 50% }
|
||||
tt { font-size: 50% }
|
||||
|
||||
td.g { font-size: 120%; text-align: Center; width: 72px; color: #808080; }
|
||||
td.n { font-size: 120%; text-align: Center; width: 72px; color: #000000; background-color: #CCCCFF; }
|
||||
td.z { font-size: 120%; text-align: Center; width: 72px; font-weight: bold; background-color: #EEEEEE; }
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
<meta name="ProgId" content="FrontPage.Editor.Document">
|
||||
</head>
|
||||
|
||||
<frameset rows="192,*">
|
||||
<frameset cols="192,*">
|
||||
<frame name="header" src="index_list.html" target="main" scrolling="auto">
|
||||
<frame name="main" src="help.html" target="main" scrolling="auto">
|
||||
<noframes>
|
||||
|
|
|
@ -3,54 +3,33 @@
|
|||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<meta http-equiv="Content-Language" content="en-us">
|
||||
<meta name="VI60_defaultClientScript" content="JavaScript">
|
||||
<meta name="GENERATOR" content="Microsoft FrontPage 4.0">
|
||||
<meta name="keywords" content="Basic">
|
||||
<meta name="ProgId" content="FrontPage.Editor.Document">
|
||||
<title>UCA Default Collation Table</title>
|
||||
<base target="main">
|
||||
<style><!-- p { font-size: 90% }
|
||||
<title>UCA Chart</title>
|
||||
<style><!--
|
||||
p { font-size: 90% }
|
||||
--></style>
|
||||
<base target="main">
|
||||
<link rel="stylesheet" type="text/css"
|
||||
href="http://www.unicode.org/webscripts/standard_styles.css">
|
||||
<script language="Javascript" type="text/javascript"
|
||||
src="http://www.unicode.org/webscripts/commonHeader.js"></script>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
<table cellspacing="0" cellpadding="0" width="100%" border="0">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td colspan="2">
|
||||
<table cellspacing="0" cellpadding="0" width="100%" border="0">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="icon"><a href="http://www.unicode.org/"><img
|
||||
alt="[Unicode]"
|
||||
src="http://www.unicode.org/webscripts/logo60s2.gif"
|
||||
align="middle" border="0" width="34" height="33"></a> <a
|
||||
class="bar" href="http://www.unicode.org/charts"
|
||||
target="_parent"><font size="3">Charts</font></a></td>
|
||||
<td class="bar"><a class="bar" href="http://www.unicode.org"
|
||||
target="_parent">Home</a> | <a class="bar"
|
||||
href="http://www.unicode.org/sitemap/" target="_parent">Site Map</a>
|
||||
| <a class="bar" href="http://www.unicode.org/search"
|
||||
target="_parent">Search </a><script
|
||||
language="Javascript"
|
||||
src="http://www.unicode.org/webscripts/commonSearch.js"
|
||||
type="text/javascript"></script>
|
||||
<NOSCRIPT>
|
||||
<a href="http://www.unicode.org/webscripts/quick_links.html"
|
||||
class="bar" target="_blank">Goto</a></NOSCRIPT>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<table width="100%" cellpadding="0" cellspacing="0" border="0">
|
||||
<tr>
|
||||
<td colspan="2">
|
||||
<table width="100%" border="0" cellpadding="0" cellspacing="0">
|
||||
<tr>
|
||||
<td class="icon"><a href="http://www.unicode.org/"><img border="0"
|
||||
src="http://www.unicode.org/webscripts/logo60s2.gif" align="middle"
|
||||
alt="[Unicode]" width="34" height="33"></a> <a class="bar"
|
||||
href="http://www.unicode.org/unicode/faq/"><font size="3">Charts</font></a>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<div class="body">
|
||||
<!-- BEGIN CONTENTS -->
|
||||
<h2 align="center">UCA Default Collation Table</h2>
|
||||
<h2 align="center">UCA Chart</h2>
|
||||
<p align="center"><a href="help.html">Help</a>
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/BuildNames.java,v $
|
||||
* $Date: 2001/12/13 23:35:54 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2002/04/23 01:59:12 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -28,7 +28,7 @@ public class BuildNames implements UCD_Types {
|
|||
|
||||
public static void main(String[] args) throws IOException {
|
||||
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
collectWords();
|
||||
}
|
||||
|
||||
|
@ -82,8 +82,8 @@ public class BuildNames implements UCD_Types {
|
|||
int used = 0;
|
||||
int sum = 0;
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
if (Main.ucd.hasComputableName(i)) continue;
|
||||
String name = transform(Main.ucd.getName(i));
|
||||
if (Default.ucd.hasComputableName(i)) continue;
|
||||
String name = transform(Default.ucd.getName(i));
|
||||
|
||||
|
||||
sum += name.length();
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
|
||||
* $Date: 2002/03/20 00:21:43 $
|
||||
* $Revision: 1.12 $
|
||||
* $Date: 2002/04/23 01:59:13 $
|
||||
* $Revision: 1.13 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -97,7 +97,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
Normalizer nfx;
|
||||
ExDProp(int i) {
|
||||
type = DERIVED_NORMALIZATION;
|
||||
nfx = Main.nf[i];
|
||||
nfx = Default.nf[i];
|
||||
name = "Expands_On_" + nfx.getName();
|
||||
shortName = "XO_" + nfx.getName();
|
||||
header = "# Derived Property: " + name
|
||||
|
@ -121,7 +121,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
NF_UnsafeStartProp(int i) {
|
||||
isStandard = false;
|
||||
type = DERIVED_NORMALIZATION;
|
||||
nfx = Main.nf[i];
|
||||
nfx = Default.nf[i];
|
||||
name = nfx.getName() + "_UnsafeStart";
|
||||
shortName = nfx.getName() + "_SS";
|
||||
header = "# Derived Property: " + name
|
||||
|
@ -157,7 +157,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
case NFC_TrailingNonZero: bitsets[1] = bitset = new BitSet(); break;
|
||||
}
|
||||
filter = bitsets[1] != null;
|
||||
Main.nfc.getCompositionStatus(bitsets[0], bitsets[1], bitsets[2]);
|
||||
Default.nfc.getCompositionStatus(bitsets[0], bitsets[1], bitsets[2]);
|
||||
|
||||
name = Names[i-NFC_Leading];
|
||||
shortName = SNames[i-NFC_Leading];
|
||||
|
@ -193,17 +193,17 @@ public final class DerivedProperty implements UCD_Types {
|
|||
isStandard = false;
|
||||
setValueType(NON_ENUMERATED);
|
||||
type = DERIVED_NORMALIZATION;
|
||||
nfx = Main.nf[i];
|
||||
nfx = Default.nf[i];
|
||||
name = nfx.getName();
|
||||
String compName = "the character itself";
|
||||
|
||||
if (i == NFKC || i == NFD) {
|
||||
name += "-NFC";
|
||||
nfComp = Main.nfc;
|
||||
nfComp = Default.nfc;
|
||||
compName = "NFC for the character";
|
||||
} else if (i == NFKD) {
|
||||
name += "-NFD";
|
||||
nfComp = Main.nfd;
|
||||
nfComp = Default.nfd;
|
||||
compName = "NFD for the character";
|
||||
}
|
||||
header = "# Derived Property: " + name
|
||||
|
@ -269,7 +269,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
QuickDProp (int i) {
|
||||
setValueType((i == NFC || i == NFKC) ? ENUMERATED : BINARY);
|
||||
type = DERIVED_NORMALIZATION;
|
||||
nfx = Main.nf[i];
|
||||
nfx = Default.nf[i];
|
||||
NO = nfx.getName() + "_NO";
|
||||
MAYBE = nfx.getName() + "_MAYBE";
|
||||
name = nfx.getName() + "_QuickCheck";
|
||||
|
@ -507,8 +507,8 @@ of characters, the first of which has a non-zero combining class.
|
|||
}
|
||||
public String getValue(int cp, byte style) {
|
||||
if (!ucdData.isRepresented(cp)) return "";
|
||||
String b = Main.nfkc.normalize(fold(cp));
|
||||
String c = Main.nfkc.normalize(fold(b));
|
||||
String b = Default.nfkc.normalize(fold(cp));
|
||||
String c = Default.nfkc.normalize(fold(b));
|
||||
if (c.equals(b)) return "";
|
||||
return "FNC; " + Utility.hex(c);
|
||||
} // default
|
||||
|
@ -529,8 +529,8 @@ of characters, the first of which has a non-zero combining class.
|
|||
}
|
||||
public String getValue(int cp, byte style) {
|
||||
if (!ucdData.isRepresented(cp)) return "";
|
||||
String b = Main.nfc.normalize(fold(cp));
|
||||
String c = Main.nfc.normalize(fold(b));
|
||||
String b = Default.nfc.normalize(fold(cp));
|
||||
String c = Default.nfc.normalize(fold(b));
|
||||
if (c.equals(b)) return "";
|
||||
return "FN; " + Utility.hex(c);
|
||||
} // default
|
||||
|
@ -598,8 +598,8 @@ of characters, the first of which has a non-zero combining class.
|
|||
}
|
||||
boolean hasValue(int cp) {
|
||||
if (hasSoftDot(cp)) return true;
|
||||
if (!Main.nfkd.normalizationDiffers(cp)) return false;
|
||||
String decomp = Main.nfd.normalize(cp);
|
||||
if (!Default.nfkd.normalizationDiffers(cp)) return false;
|
||||
String decomp = Default.nfd.normalize(cp);
|
||||
boolean ok = false;
|
||||
for (int i = decomp.length()-1; i >= 0; --i) {
|
||||
int ch = UTF16.charAt(decomp, i);
|
||||
|
@ -698,11 +698,11 @@ of characters, the first of which has a non-zero combining class.
|
|||
|| ucdData.getBinaryProperty(cp, Other_Lowercase)) return Ll;
|
||||
if (cat == Lt || cat == Lo || cat == Lm || cat == Nl) return cat;
|
||||
|
||||
// if (true) throw new IllegalArgumentException("FIX Main.nf[2]");
|
||||
// if (true) throw new IllegalArgumentException("FIX Default.nf[2]");
|
||||
|
||||
if (!Main.nf[NFKD].normalizationDiffers(cp)) return Lo;
|
||||
if (!Default.nf[NFKD].normalizationDiffers(cp)) return Lo;
|
||||
|
||||
String norm = Main.nf[NFKD].normalize(cp);
|
||||
String norm = Default.nf[NFKD].normalize(cp);
|
||||
int cp2;
|
||||
boolean gotUpper = false;
|
||||
boolean gotLower = false;
|
||||
|
@ -740,8 +740,8 @@ of characters, the first of which has a non-zero combining class.
|
|||
}
|
||||
|
||||
public static void test() {
|
||||
Main.setUCD();
|
||||
DerivedProperty dprop = new DerivedProperty(Main.ucd);
|
||||
Default.setUCD();
|
||||
DerivedProperty dprop = new DerivedProperty(Default.ucd);
|
||||
/*
|
||||
for (int j = 0; j < LIMIT; ++j) {
|
||||
System.out.println();
|
||||
|
@ -752,9 +752,9 @@ of characters, the first of which has a non-zero combining class.
|
|||
|
||||
for (int cp = 0xA0; cp < 0xFF; ++cp) {
|
||||
System.out.println();
|
||||
System.out.println(Main.ucd.getCodeAndName(cp));
|
||||
System.out.println(Default.ucd.getCodeAndName(cp));
|
||||
for (int j = 0; j < DERIVED_PROPERTY_LIMIT; ++j) {
|
||||
String prop = make(j, Main.ucd).getValue(cp);
|
||||
String prop = make(j, Default.ucd).getValue(cp);
|
||||
if (prop.length() != 0) System.out.println("\t" + prop);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
|
||||
* $Date: 2002/03/22 22:08:53 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2002/04/23 01:59:14 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -40,7 +40,7 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
public static void makeCaseFold(boolean normalized) throws java.io.IOException {
|
||||
PICK_SHORT = NF_CLOSURE = normalized;
|
||||
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
log = Utility.openPrintWriter("CaseFoldingLog" + GenerateData.getFileSuffix(true));
|
||||
System.out.println("Writing Log: " + "CaseFoldingLog" + GenerateData.getFileSuffix(true));
|
||||
|
||||
|
@ -123,15 +123,15 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
static void drawLine(PrintWriter out, int ch, String type, String result) {
|
||||
String comment = "";
|
||||
if (COMMENT_DIFFS) {
|
||||
String lower = Main.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
|
||||
String lower = Default.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
|
||||
if (!lower.equals(result)) {
|
||||
String upper = Main.ucd.getCase(UTF16.valueOf(ch), FULL, UPPER);
|
||||
String lower2 = Main.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
|
||||
String upper = Default.ucd.getCase(UTF16.valueOf(ch), FULL, UPPER);
|
||||
String lower2 = Default.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
|
||||
if (lower.equals(lower2)) {
|
||||
comment = "[Diff " + Utility.hex(lower, " ") + "] ";
|
||||
} else {
|
||||
Utility.fixDot();
|
||||
System.out.println("PROBLEM WITH: " + Main.ucd.getCodeAndName(ch));
|
||||
System.out.println("PROBLEM WITH: " + Default.ucd.getCodeAndName(ch));
|
||||
comment = "[DIFF " + Utility.hex(lower, " ") + ", " + Utility.hex(lower2, " ") + "] ";
|
||||
}
|
||||
}
|
||||
|
@ -140,7 +140,7 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
out.println(Utility.hex(ch)
|
||||
+ "; " + type
|
||||
+ "; " + Utility.hex(result, " ")
|
||||
+ "; # " + comment + Main.ucd.getName(ch));
|
||||
+ "; # " + comment + Default.ucd.getName(ch));
|
||||
}
|
||||
|
||||
static int probeCh = 0x01f0;
|
||||
|
@ -156,7 +156,7 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
for (int ch = 0; ch <= 0x10FFFF; ++ch) {
|
||||
Utility.dot(ch);
|
||||
//if ((ch & 0x3FF) == 0) System.out.println(Utility.hex(ch));
|
||||
if (!Main.ucd.isRepresented(ch)) continue;
|
||||
if (!Default.ucd.isRepresented(ch)) continue;
|
||||
getClosure(ch, data, full, nfClose);
|
||||
}
|
||||
|
||||
|
@ -202,7 +202,7 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
}
|
||||
Utility.fixDot();
|
||||
log.println("Non-Optimal Representative " + message);
|
||||
log.println(" Rep:\t" + Main.ucd.getCodeAndName(rep));
|
||||
log.println(" Rep:\t" + Default.ucd.getCodeAndName(rep));
|
||||
log.println(" Set:\t" + toString(set,true, true));
|
||||
}
|
||||
|
||||
|
@ -234,13 +234,13 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
if (!full) result <<= 8;
|
||||
String low = lower(upper(s, full), full);
|
||||
if (s.equals(low)) result |= ISLOWER;
|
||||
else if (PICK_SHORT && Main.nfd.normalize(s).equals(Main.nfd.normalize(low))) result |= ISLOWER;
|
||||
else if (PICK_SHORT && Default.nfd.normalize(s).equals(Default.nfd.normalize(low))) result |= ISLOWER;
|
||||
|
||||
if (s.equals(Main.nfc.normalize(s))) result |= NFC_FORMAT;
|
||||
if (s.equals(Default.nfc.normalize(s))) result |= NFC_FORMAT;
|
||||
|
||||
if (show) {
|
||||
Utility.fixDot();
|
||||
System.out.println(Utility.hex(result) + ", " + Main.ucd.getCodeAndName(s));
|
||||
System.out.println(Utility.hex(result) + ", " + Default.ucd.getCodeAndName(s));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -269,28 +269,28 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
|
||||
/*
|
||||
String
|
||||
String lower1 = Main.ucd.getLowercase(ch);
|
||||
String lower2 = Main.ucd.toLowercase(ch,option);
|
||||
String lower1 = Default.ucd.getLowercase(ch);
|
||||
String lower2 = Default.ucd.toLowercase(ch,option);
|
||||
|
||||
char ch2 = Main.ucd.getLowercase(Main.ucd.getUppercase(ch).charAt(0)).charAt(0);
|
||||
//String lower1 = String.valueOf(Main.ucd.getLowercase(ch));
|
||||
//String lower = Main.ucd.toLowercase(ch2,option);
|
||||
String upper = Main.ucd.toUppercase(ch2,option);
|
||||
String lowerUpper = Main.ucd.toLowercase(upper,option);
|
||||
//String title = Main.ucd.toTitlecase(ch2,option);
|
||||
//String lowerTitle = Main.ucd.toLowercase(upper,option);
|
||||
char ch2 = Default.ucd.getLowercase(Default.ucd.getUppercase(ch).charAt(0)).charAt(0);
|
||||
//String lower1 = String.valueOf(Default.ucd.getLowercase(ch));
|
||||
//String lower = Default.ucd.toLowercase(ch2,option);
|
||||
String upper = Default.ucd.toUppercase(ch2,option);
|
||||
String lowerUpper = Default.ucd.toLowercase(upper,option);
|
||||
//String title = Default.ucd.toTitlecase(ch2,option);
|
||||
//String lowerTitle = Default.ucd.toLowercase(upper,option);
|
||||
|
||||
if (ch != ch2 || lowerUpper.length() != 1 || ch != lowerUpper.charAt(0)) { //
|
||||
output.println(Utility.hex(ch)
|
||||
+ "; " + (lowerUpper.equals(lower1) ? "L" : lowerUpper.equals(lower2) ? "S" : "E")
|
||||
+ "; " + Utility.hex(lowerUpper," ")
|
||||
+ ";\t#" + Main.ucd.getName(ch)
|
||||
+ ";\t#" + Default.ucd.getName(ch)
|
||||
);
|
||||
//if (!lowerUpper.equals(lower)) {
|
||||
// output.println("Warning1: " + Utility.hex(lower) + " " + Main.ucd.getName(lower));
|
||||
// output.println("Warning1: " + Utility.hex(lower) + " " + Default.ucd.getName(lower));
|
||||
//}
|
||||
//if (!lowerUpper.equals(lowerTitle)) {
|
||||
// output.println("Warning2: " + Utility.hex(lowerTitle) + " " + Main.ucd.getName(lowerTitle));
|
||||
// output.println("Warning2: " + Utility.hex(lowerTitle) + " " + Default.ucd.getName(lowerTitle));
|
||||
//}
|
||||
}
|
||||
*/
|
||||
|
@ -322,10 +322,10 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
// do funny stuff since we can't modify set while iterating
|
||||
// We don't do this because if the source is not normalized, we don't want to normalize
|
||||
if (nfClose) {
|
||||
if (add(set, Main.nfd.normalize(s), data)) continue main;
|
||||
if (add(set, Main.nfc.normalize(s), data)) continue main;
|
||||
if (add(set, Main.nfkd.normalize(s), data)) continue main;
|
||||
if (add(set, Main.nfkc.normalize(s), data)) continue main;
|
||||
if (add(set, Default.nfd.normalize(s), data)) continue main;
|
||||
if (add(set, Default.nfc.normalize(s), data)) continue main;
|
||||
if (add(set, Default.nfkd.normalize(s), data)) continue main;
|
||||
if (add(set, Default.nfkc.normalize(s), data)) continue main;
|
||||
}
|
||||
if (add(set, lower(s, full), data)) continue main;
|
||||
if (add(set, title(s, full), data)) continue main;
|
||||
|
@ -340,34 +340,34 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
return result.replace('\u03C2', '\u03C3'); // HACK for lower
|
||||
}
|
||||
|
||||
// These functions are no longer necessary, since Main.ucd is parameterized,
|
||||
// These functions are no longer necessary, since Default.ucd is parameterized,
|
||||
// but it's not worth changing
|
||||
|
||||
static String lower2(String s, boolean full) {
|
||||
/*if (!full) {
|
||||
if (s.length() != 1) return s;
|
||||
return Main.ucd.getCase(UTF32.char32At(s,0), SIMPLE, LOWER);
|
||||
return Default.ucd.getCase(UTF32.char32At(s,0), SIMPLE, LOWER);
|
||||
}
|
||||
*/
|
||||
return Main.ucd.getCase(s, full ? FULL : SIMPLE, LOWER);
|
||||
return Default.ucd.getCase(s, full ? FULL : SIMPLE, LOWER);
|
||||
}
|
||||
|
||||
static String upper(String s, boolean full) {
|
||||
/* if (!full) {
|
||||
if (s.length() != 1) return s;
|
||||
return Main.ucd.getCase(UTF32.char32At(s,0), FULL, UPPER);
|
||||
return Default.ucd.getCase(UTF32.char32At(s,0), FULL, UPPER);
|
||||
}
|
||||
*/
|
||||
return Main.ucd.getCase(s, full ? FULL : SIMPLE, UPPER);
|
||||
return Default.ucd.getCase(s, full ? FULL : SIMPLE, UPPER);
|
||||
}
|
||||
|
||||
static String title(String s, boolean full) {
|
||||
/*if (!full) {
|
||||
if (s.length() != 1) return s;
|
||||
return Main.ucd.getCase(UTF32.char32At(s,0), FULL, TITLE);
|
||||
return Default.ucd.getCase(UTF32.char32At(s,0), FULL, TITLE);
|
||||
}
|
||||
*/
|
||||
return Main.ucd.getCase(s, full ? FULL : SIMPLE, TITLE);
|
||||
return Default.ucd.getCase(s, full ? FULL : SIMPLE, TITLE);
|
||||
}
|
||||
|
||||
static boolean add(Set set, String s, Map data) {
|
||||
|
@ -406,7 +406,7 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
}
|
||||
first = false;
|
||||
if (name) {
|
||||
result += Main.ucd.getCodeAndName(s2);
|
||||
result += Default.ucd.getCodeAndName(s2);
|
||||
} else {
|
||||
result += Utility.hex(s2, " ");
|
||||
}
|
||||
|
@ -416,12 +416,12 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
|
||||
static boolean specialNormalizationDiffers(int ch) {
|
||||
if (ch == 0x00DF) return true; // es-zed
|
||||
return Main.nfkd.normalizationDiffers(ch);
|
||||
return Default.nfkd.normalizationDiffers(ch);
|
||||
}
|
||||
|
||||
static String specialNormalization(String s) {
|
||||
if (s.equals("\u00DF")) return "ss";
|
||||
return Main.nfkd.normalize(s);
|
||||
return Default.nfkd.normalize(s);
|
||||
}
|
||||
|
||||
static boolean isExcluded(int ch) {
|
||||
|
@ -431,14 +431,14 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
if (0x249C <= ch && ch <= 0x24B5) return true; // skip PARENTHESIZED LATIN SMALL LETTER A..
|
||||
if (0x20A8 <= ch && ch <= 0x217B) return true; // skip Rupee..
|
||||
|
||||
byte type = Main.ucd.getDecompositionType(ch);
|
||||
byte type = Default.ucd.getDecompositionType(ch);
|
||||
if (type == COMPAT_SQUARE) return true;
|
||||
//if (type == COMPAT_UNSPECIFIED) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void generateSpecialCasing(boolean normalize) throws IOException {
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
Map sorted = new TreeMap();
|
||||
|
||||
String suffix2 = "";
|
||||
|
@ -448,19 +448,19 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
|
||||
for (int ch = 0; ch <= 0x10FFFF; ++ch) {
|
||||
Utility.dot(ch);
|
||||
if (!Main.ucd.isRepresented(ch)) continue;
|
||||
if (!Default.ucd.isRepresented(ch)) continue;
|
||||
if (!specialNormalizationDiffers(ch)) continue;
|
||||
|
||||
String lower = Main.nfc.normalize(Main.ucd.getCase(ch, SIMPLE, LOWER));
|
||||
String upper = Main.nfc.normalize(Main.ucd.getCase(ch, SIMPLE, UPPER));
|
||||
String title = Main.nfc.normalize(Main.ucd.getCase(ch, SIMPLE, TITLE));
|
||||
String lower = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, LOWER));
|
||||
String upper = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, UPPER));
|
||||
String title = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, TITLE));
|
||||
|
||||
String chstr = UTF16.valueOf(ch);
|
||||
|
||||
String decomp = specialNormalization(chstr);
|
||||
String flower = Main.nfc.normalize(Main.ucd.getCase(decomp, SIMPLE, LOWER));
|
||||
String fupper = Main.nfc.normalize(Main.ucd.getCase(decomp, SIMPLE, UPPER));
|
||||
String ftitle = Main.nfc.normalize(Main.ucd.getCase(decomp, SIMPLE, TITLE));
|
||||
String flower = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, LOWER));
|
||||
String fupper = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, UPPER));
|
||||
String ftitle = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, TITLE));
|
||||
|
||||
String base = decomp;
|
||||
String blower = specialNormalization(lower);
|
||||
|
@ -468,42 +468,42 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
String btitle = specialNormalization(title);
|
||||
|
||||
if (true) {
|
||||
flower = Main.nfc.normalize(flower);
|
||||
fupper = Main.nfc.normalize(fupper);
|
||||
ftitle = Main.nfc.normalize(ftitle);
|
||||
base = Main.nfc.normalize(base);
|
||||
blower = Main.nfc.normalize(blower);
|
||||
bupper = Main.nfc.normalize(bupper);
|
||||
btitle = Main.nfc.normalize(btitle);
|
||||
flower = Default.nfc.normalize(flower);
|
||||
fupper = Default.nfc.normalize(fupper);
|
||||
ftitle = Default.nfc.normalize(ftitle);
|
||||
base = Default.nfc.normalize(base);
|
||||
blower = Default.nfc.normalize(blower);
|
||||
bupper = Default.nfc.normalize(bupper);
|
||||
btitle = Default.nfc.normalize(btitle);
|
||||
}
|
||||
|
||||
if (ch == CHECK_CHAR) {
|
||||
System.out.println("Code: " + Main.ucd.getCodeAndName(ch));
|
||||
System.out.println("Decomp: " + Main.ucd.getCodeAndName(decomp));
|
||||
System.out.println("Base: " + Main.ucd.getCodeAndName(base));
|
||||
System.out.println("SLower: " + Main.ucd.getCodeAndName(lower));
|
||||
System.out.println("FLower: " + Main.ucd.getCodeAndName(flower));
|
||||
System.out.println("BLower: " + Main.ucd.getCodeAndName(blower));
|
||||
System.out.println("STitle: " + Main.ucd.getCodeAndName(title));
|
||||
System.out.println("FTitle: " + Main.ucd.getCodeAndName(ftitle));
|
||||
System.out.println("BTitle: " + Main.ucd.getCodeAndName(btitle));
|
||||
System.out.println("SUpper: " + Main.ucd.getCodeAndName(upper));
|
||||
System.out.println("FUpper: " + Main.ucd.getCodeAndName(fupper));
|
||||
System.out.println("BUpper: " + Main.ucd.getCodeAndName(bupper));
|
||||
System.out.println("Code: " + Default.ucd.getCodeAndName(ch));
|
||||
System.out.println("Decomp: " + Default.ucd.getCodeAndName(decomp));
|
||||
System.out.println("Base: " + Default.ucd.getCodeAndName(base));
|
||||
System.out.println("SLower: " + Default.ucd.getCodeAndName(lower));
|
||||
System.out.println("FLower: " + Default.ucd.getCodeAndName(flower));
|
||||
System.out.println("BLower: " + Default.ucd.getCodeAndName(blower));
|
||||
System.out.println("STitle: " + Default.ucd.getCodeAndName(title));
|
||||
System.out.println("FTitle: " + Default.ucd.getCodeAndName(ftitle));
|
||||
System.out.println("BTitle: " + Default.ucd.getCodeAndName(btitle));
|
||||
System.out.println("SUpper: " + Default.ucd.getCodeAndName(upper));
|
||||
System.out.println("FUpper: " + Default.ucd.getCodeAndName(fupper));
|
||||
System.out.println("BUpper: " + Default.ucd.getCodeAndName(bupper));
|
||||
}
|
||||
|
||||
// presumably if there is a single code point, it would already be in the simple mappings
|
||||
|
||||
if (UTF16.countCodePoint(flower) == 1 && UTF16.countCodePoint(fupper) == 1
|
||||
&& UTF16.countCodePoint(title) == 1) {
|
||||
if (ch == CHECK_CHAR) System.out.println("Skipping single code point: " + Main.ucd.getCodeAndName(ch));
|
||||
if (ch == CHECK_CHAR) System.out.println("Skipping single code point: " + Default.ucd.getCodeAndName(ch));
|
||||
continue;
|
||||
}
|
||||
|
||||
// if there is no change from the base, skip
|
||||
|
||||
if (flower.equals(base) && fupper.equals(base) && ftitle.equals(base)) {
|
||||
if (ch == CHECK_CHAR) System.out.println("Skipping equals base: " + Main.ucd.getCodeAndName(ch));
|
||||
if (ch == CHECK_CHAR) System.out.println("Skipping equals base: " + Default.ucd.getCodeAndName(ch));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -516,11 +516,11 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
// if there are no changes from the original, or the expanded original, skip
|
||||
|
||||
if (flower.equals(lower) && fupper.equals(upper) && ftitle.equals(title)) {
|
||||
if (ch == CHECK_CHAR) System.out.println("Skipping unchanged: " + Main.ucd.getCodeAndName(ch));
|
||||
if (ch == CHECK_CHAR) System.out.println("Skipping unchanged: " + Default.ucd.getCodeAndName(ch));
|
||||
continue;
|
||||
}
|
||||
|
||||
String name = Main.ucd.getName(ch);
|
||||
String name = Default.ucd.getName(ch);
|
||||
|
||||
int order = name.equals("LATIN SMALL LETTER SHARP S") ? 1
|
||||
: ch == 0x130 ? 2
|
||||
|
@ -531,16 +531,16 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
: UTF16.countCodePoint(fupper) == 2 ? 7
|
||||
: 8;
|
||||
|
||||
if (ch == CHECK_CHAR) System.out.println("Order: " + order + " for " + Main.ucd.getCodeAndName(ch));
|
||||
if (ch == CHECK_CHAR) System.out.println("Order: " + order + " for " + Default.ucd.getCodeAndName(ch));
|
||||
|
||||
// HACK
|
||||
boolean denormalize = !normalize && order != 6 && order != 7;
|
||||
|
||||
String mapping = Utility.hex(ch)
|
||||
+ "; " + Utility.hex(flower.equals(base) ? chstr : denormalize ? Main.nfd.normalize(flower) : flower)
|
||||
+ "; " + Utility.hex(ftitle.equals(base) ? chstr : denormalize ? Main.nfd.normalize(ftitle) : ftitle)
|
||||
+ "; " + Utility.hex(fupper.equals(base) ? chstr : denormalize ? Main.nfd.normalize(fupper) : fupper)
|
||||
+ "; # " + Main.ucd.getName(ch);
|
||||
+ "; " + Utility.hex(flower.equals(base) ? chstr : denormalize ? Default.nfd.normalize(flower) : flower)
|
||||
+ "; " + Utility.hex(ftitle.equals(base) ? chstr : denormalize ? Default.nfd.normalize(ftitle) : ftitle)
|
||||
+ "; " + Utility.hex(fupper.equals(base) ? chstr : denormalize ? Default.nfd.normalize(fupper) : fupper)
|
||||
+ "; # " + Default.ucd.getName(ch);
|
||||
|
||||
// special exclusions
|
||||
if (isExcluded(ch)) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2002/03/20 00:21:42 $
|
||||
* $Revision: 1.16 $
|
||||
* $Date: 2002/04/23 01:59:14 $
|
||||
* $Revision: 1.17 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -32,7 +32,7 @@ public class GenerateData implements UCD_Types {
|
|||
//static UnifiedBinaryProperty ubp
|
||||
|
||||
public static void checkHoffman(String test) {
|
||||
String result = Main.nfkc.normalize(test);
|
||||
String result = Default.nfkc.normalize(test);
|
||||
System.out.println(Utility.hex(test) + " => " + Utility.hex(result));
|
||||
System.out.println();
|
||||
show(test, 0);
|
||||
|
@ -44,10 +44,10 @@ public class GenerateData implements UCD_Types {
|
|||
int cp;
|
||||
for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
|
||||
cp = UTF32.char32At(s, i);
|
||||
String cc = " " + Main.ucd.getCombiningClass(cp);
|
||||
String cc = " " + Default.ucd.getCombiningClass(cp);
|
||||
cc = Utility.repeat(" ", 4 - cc.length()) + cc;
|
||||
System.out.println(Utility.repeat(" ", indent) + Main.ucd.getCode(cp) + cc + " " + Main.ucd.getName(cp));
|
||||
String decomp = Main.nfkc.normalize(cp);
|
||||
System.out.println(Utility.repeat(" ", indent) + Default.ucd.getCode(cp) + cc + " " + Default.ucd.getName(cp));
|
||||
String decomp = Default.nfkc.normalize(cp);
|
||||
if (!decomp.equals(UTF32.valueOf32(cp))) {
|
||||
show(decomp, indent + 4);
|
||||
}
|
||||
|
@ -98,14 +98,14 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getFileSuffix(boolean withDVersion) {
|
||||
return "-" + Main.ucd.getVersion()
|
||||
return "-" + Default.ucd.getVersion()
|
||||
+ ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "")
|
||||
+ ".txt";
|
||||
}
|
||||
|
||||
public static void generateDerived (byte type, boolean checkTypeAndStandard, int headerChoice, String directory, String fileName) throws IOException {
|
||||
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
String newFile = directory + fileName + getFileSuffix(true);
|
||||
System.out.println("New File: " + newFile);
|
||||
PrintWriter output = Utility.openPrintWriter(newFile);
|
||||
|
@ -114,7 +114,7 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
doHeader(fileName + getFileSuffix(false), output, headerChoice);
|
||||
for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) {
|
||||
UnicodeProperty up = DerivedProperty.make(i, Main.ucd);
|
||||
UnicodeProperty up = DerivedProperty.make(i, Default.ucd);
|
||||
boolean keepGoing = true;
|
||||
if (!up.isStandard()) keepGoing = false;
|
||||
if ((up.getType() & type) == 0) keepGoing = false;
|
||||
|
@ -125,7 +125,7 @@ public class GenerateData implements UCD_Types {
|
|||
System.out.print('.');
|
||||
output.println(HORIZONTAL_LINE);
|
||||
output.println();
|
||||
new DerivedPropertyLister(Main.ucd, i, output).print();
|
||||
new DerivedPropertyLister(Default.ucd, i, output).print();
|
||||
output.flush();
|
||||
}
|
||||
output.close();
|
||||
|
@ -134,27 +134,27 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
/*
|
||||
public static void listStrings(String file, int type, int subtype) throws IOException {
|
||||
Main.ucd = UCD.make("3.1.0");
|
||||
Default.ucd = UCD.make("3.1.0");
|
||||
UCD ucd30 = UCD.make("3.0.0");
|
||||
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file));
|
||||
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
if ((i & 0xFFF) == 0) System.out.println("# " + i);
|
||||
if (!Main.ucd.isRepresented(i)) continue;
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
if (ucd30.isRepresented(i)) continue;
|
||||
String string = "";
|
||||
switch(type) {
|
||||
case 0: string = Main.ucd.getSimpleLowercase(i);
|
||||
case 0: string = Default.ucd.getSimpleLowercase(i);
|
||||
}
|
||||
if (UTF32.length32(string) == 1 && UTF32.char32At(string,0) == i) continue;
|
||||
output.println(Utility.hex(i) + "; C; " + Utility.hex(string) + "; # " + Main.ucd.getName(i));
|
||||
output.println(Utility.hex(i) + "; C; " + Utility.hex(string) + "; # " + Default.ucd.getName(i));
|
||||
}
|
||||
output.close();
|
||||
}
|
||||
*/
|
||||
|
||||
public static void generateCompExclusions() throws IOException {
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
String newFile = "DerivedData/CompositionExclusions" + getFileSuffix(true);
|
||||
PrintWriter output = Utility.openPrintWriter(newFile);
|
||||
String mostRecent = generateBat("DerivedData/", "CompositionExclusions", getFileSuffix(true));
|
||||
|
@ -164,7 +164,7 @@ public class GenerateData implements UCD_Types {
|
|||
output.println("#");
|
||||
output.println("# This file lists the characters from the UAX #15 Composition Exclusion Table.");
|
||||
output.println("#");
|
||||
if (Main.ucd.getVersion().equals("3.2.0")) {
|
||||
if (Default.ucd.getVersion().equals("3.2.0")) {
|
||||
output.println("# The format of the comments in this file has been updated since the last version,");
|
||||
output.println("# CompositionExclusions-3.txt. The only substantive change to this file between that");
|
||||
output.println("# version and this one is the addition of U+2ADC FORKING.");
|
||||
|
@ -226,7 +226,7 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
public CompLister(PrintWriter output, int type) {
|
||||
this.output = output;
|
||||
ucdData = Main.ucd;
|
||||
ucdData = Default.ucd;
|
||||
oldUCD = UCD.make("3.0.0");
|
||||
// showOnConsole = true;
|
||||
alwaysBreaks = type <= 2; // CHANGE LATER
|
||||
|
@ -269,13 +269,13 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
// find properties
|
||||
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
int count = 0;
|
||||
UnicodeProperty[] props = new UnicodeProperty[500];
|
||||
for (int i = 1; i < LIMIT_ENUM; ++i) { // || iType == SCRIPT
|
||||
int iType = i & 0xFF00;
|
||||
if (iType == JOINING_GROUP || iType == AGE || iType == COMBINING_CLASS) continue;
|
||||
UnicodeProperty up = UnifiedBinaryProperty.make(i, Main.ucd);
|
||||
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
if (up == null) continue;
|
||||
if (!up.isStandard()) {
|
||||
System.out.println("Skipping " + up.getName() + "; not standard");
|
||||
|
@ -312,9 +312,9 @@ public class GenerateData implements UCD_Types {
|
|||
int total = 0;
|
||||
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
||||
Utility.dot(cp);
|
||||
int cat = Main.ucd.getCategory(cp);
|
||||
int cat = Default.ucd.getCategory(cp);
|
||||
if (cat == UNASSIGNED || cat == PRIVATE_USE || cat == SURROGATE) continue;
|
||||
if (!Main.ucd.isAllocated(cp)) continue;
|
||||
if (!Default.ucd.isAllocated(cp)) continue;
|
||||
|
||||
for (int i = 0; i < count; ++i) {
|
||||
UnicodeProperty up = props[i];
|
||||
|
@ -326,7 +326,7 @@ public class GenerateData implements UCD_Types {
|
|||
if (!map.containsKey(probe)) {
|
||||
map.put(probe.clone(), new Integer(cp));
|
||||
Utility.fixDot();
|
||||
// System.out.println("Set Size: " + map.size() + ", total: " + total + ", " + Main.ucd.getCodeAndName(cp));
|
||||
// System.out.println("Set Size: " + map.size() + ", total: " + total + ", " + Default.ucd.getCodeAndName(cp));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -338,7 +338,7 @@ public class GenerateData implements UCD_Types {
|
|||
while (it.hasNext()) {
|
||||
BitSet probe2 = (BitSet) it.next();
|
||||
int ch = ((Integer) map.get(probe2)).intValue();
|
||||
output.println(Main.ucd.getCodeAndName(ch));
|
||||
output.println(Default.ucd.getCodeAndName(ch));
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (!probe2.get(i)) continue;
|
||||
output.print(" " + props[i].getFullName(SHORT));
|
||||
|
@ -350,10 +350,10 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
public static void listDifferences() throws IOException {
|
||||
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + getFileSuffix(true));
|
||||
output.println("# Listing of relationships among properties, suitable for analysis by spreadsheet");
|
||||
output.println("# Generated for " + Main.ucd.getVersion());
|
||||
output.println("# Generated for " + Default.ucd.getVersion());
|
||||
output.println(generateDateLine());
|
||||
output.println("# P1 P2 R(P1,P2) C(P1&P2) C(P1-P2) C(P2-P1)");
|
||||
|
||||
|
@ -361,7 +361,7 @@ public class GenerateData implements UCD_Types {
|
|||
for (int i = 1; i < LIMIT_ENUM; ++i) {
|
||||
int iType = i & 0xFF00;
|
||||
if (iType == JOINING_GROUP || iType == AGE || iType == COMBINING_CLASS || iType == SCRIPT) continue;
|
||||
UnicodeProperty upi = UnifiedBinaryProperty.make(i, Main.ucd);
|
||||
UnicodeProperty upi = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
if (upi == null) continue;
|
||||
if (!upi.isStandard()) {
|
||||
System.out.println("Skipping " + upi.getName() + "; not standard");
|
||||
|
@ -385,7 +385,7 @@ public class GenerateData implements UCD_Types {
|
|||
int jType = j & 0xFF00;
|
||||
if (jType == JOINING_GROUP || jType == AGE || jType == COMBINING_CLASS || jType == SCRIPT
|
||||
|| (jType == iType && jType != BINARY_PROPERTIES)) continue;
|
||||
UnicodeProperty upj = UnifiedBinaryProperty.make(j, Main.ucd);
|
||||
UnicodeProperty upj = UnifiedBinaryProperty.make(j, Default.ucd);
|
||||
if (upj == null) continue;
|
||||
if (!upj.isStandard()) continue;
|
||||
if (upj.getValueType() < BINARY) continue;
|
||||
|
@ -405,9 +405,9 @@ public class GenerateData implements UCD_Types {
|
|||
int bothCount = 0, i_jPropCount = 0, j_iPropCount = 0, iCount = 0, jCount = 0;
|
||||
|
||||
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
||||
int cat = Main.ucd.getCategory(cp);
|
||||
int cat = Default.ucd.getCategory(cp);
|
||||
if (cat == UNASSIGNED || cat == PRIVATE_USE || cat == SURROGATE) continue;
|
||||
if (!Main.ucd.isAllocated(cp)) continue;
|
||||
if (!Default.ucd.isAllocated(cp)) continue;
|
||||
|
||||
boolean iProp = upi.hasValue(cp);
|
||||
boolean jProp = upj.hasValue(cp);
|
||||
|
@ -444,7 +444,7 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
|
||||
public static void generatePropertyAliases() throws IOException {
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
String prop = "";
|
||||
String propAbb = "";
|
||||
String value = "";
|
||||
|
@ -455,7 +455,7 @@ public class GenerateData implements UCD_Types {
|
|||
Set accumulation = new TreeSet(java.text.Collator.getInstance());
|
||||
|
||||
/*
|
||||
BufferedReader blocks = Utility.openUnicodeFile("Blocks", Main.ucd.getVersion());
|
||||
BufferedReader blocks = Utility.openUnicodeFile("Blocks", Default.ucd.getVersion());
|
||||
String[] parts = new String[10];
|
||||
while (true) {
|
||||
String line = blocks.readLine();
|
||||
|
@ -509,7 +509,7 @@ public class GenerateData implements UCD_Types {
|
|||
if (type == AGE) continue;
|
||||
if (i == (BINARY_PROPERTIES | CaseFoldTurkishI)) continue;
|
||||
|
||||
UnicodeProperty up = UnifiedBinaryProperty.make(i, Main.ucd);
|
||||
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
if (up == null) continue;
|
||||
if (!up.isStandard()) continue;
|
||||
|
||||
|
@ -547,7 +547,7 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
|
||||
if (type == SCRIPT) {
|
||||
value = Main.ucd.getCase(value, FULL, TITLE);
|
||||
value = Default.ucd.getCase(value, FULL, TITLE);
|
||||
}
|
||||
|
||||
valueAbb = up.getValue(SHORT);
|
||||
|
@ -774,7 +774,7 @@ public class GenerateData implements UCD_Types {
|
|||
// static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1;
|
||||
|
||||
public static String generateBat(String directory, String fileRoot, String suffix) throws IOException {
|
||||
String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Main.ucd.getVersion(), true, true);
|
||||
String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd.getVersion(), true, true);
|
||||
if (mostRecent != null) {
|
||||
generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix,
|
||||
mostRecent, directory + fileRoot + suffix);
|
||||
|
@ -783,7 +783,7 @@ public class GenerateData implements UCD_Types {
|
|||
return null;
|
||||
}
|
||||
|
||||
String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Main.ucd.getVersion(), false, true);
|
||||
String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd.getVersion(), false, true);
|
||||
if (lessRecent != null && !mostRecent.equals(lessRecent)) {
|
||||
generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix + "-OLDER",
|
||||
lessRecent, directory + fileRoot + suffix);
|
||||
|
@ -809,7 +809,7 @@ public class GenerateData implements UCD_Types {
|
|||
public static void generateVerticalSlice(int startEnum, int endEnum,
|
||||
int headerChoice, String directory, String file) throws IOException {
|
||||
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
String newFile = directory + file + getFileSuffix(true);
|
||||
PrintWriter output = Utility.openPrintWriter(newFile);
|
||||
String mostRecent = generateBat(directory, file, getFileSuffix(true));
|
||||
|
@ -817,7 +817,7 @@ public class GenerateData implements UCD_Types {
|
|||
doHeader(file + getFileSuffix(false), output, headerChoice);
|
||||
int last = -1;
|
||||
for (int i = startEnum; i < endEnum; ++i) {
|
||||
UnicodeProperty up = UnifiedBinaryProperty.make(i, Main.ucd);
|
||||
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
if (up == null) continue;
|
||||
|
||||
if (i == DECOMPOSITION_TYPE || i == NUMERIC_TYPE
|
||||
|
@ -845,7 +845,7 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
System.out.print(".");
|
||||
if (DEBUG) System.out.println(i);
|
||||
new MyPropertyLister(Main.ucd, i, output).print();
|
||||
new MyPropertyLister(Default.ucd, i, output).print();
|
||||
output.flush();
|
||||
}
|
||||
if (endEnum == LIMIT_ENUM) {
|
||||
|
@ -859,13 +859,13 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
Set floatSet = new TreeSet();
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
float nv = Main.ucd.getNumericValue(i);
|
||||
float nv = Default.ucd.getNumericValue(i);
|
||||
if (Float.isNaN(nv)) continue;
|
||||
floatSet.add(new Float(nv));
|
||||
}
|
||||
Iterator it = floatSet.iterator();
|
||||
while(it.hasNext()) {
|
||||
new MyFloatLister(Main.ucd, ((Float)it.next()).floatValue(), output).print();
|
||||
new MyFloatLister(Default.ucd, ((Float)it.next()).floatValue(), output).print();
|
||||
output.println();
|
||||
System.out.print(".");
|
||||
}
|
||||
|
@ -878,7 +878,7 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
|
||||
static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException {
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
String newFile = directory + fileName + getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile, true, false);
|
||||
String mostRecent = generateBat(directory, fileName, getFileSuffix(true));
|
||||
|
@ -936,8 +936,8 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
for (int ch = 0; ch < 0x10FFFF; ++ch) {
|
||||
Utility.dot(ch);
|
||||
if (!Main.ucd.isAssigned(ch)) continue;
|
||||
if (Main.ucd.isPUA(ch)) continue;
|
||||
if (!Default.ucd.isAssigned(ch)) continue;
|
||||
if (Default.ucd.isPUA(ch)) continue;
|
||||
String cc = UTF32.valueOf32(ch);
|
||||
writeLine(cc,log, true);
|
||||
}
|
||||
|
@ -947,9 +947,9 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
for (int ch = 0; ch < 0x10FFFF; ++ch) {
|
||||
Utility.dot(ch);
|
||||
if (!Main.ucd.isAssigned(ch)) continue;
|
||||
if (Main.ucd.isPUA(ch)) continue;
|
||||
int cc = Main.ucd.getCombiningClass(ch);
|
||||
if (!Default.ucd.isAssigned(ch)) continue;
|
||||
if (Default.ucd.isPUA(ch)) continue;
|
||||
int cc = Default.ucd.getCombiningClass(ch);
|
||||
if (example[cc] == null) example[cc] = UTF32.valueOf32(ch);
|
||||
}
|
||||
|
||||
|
@ -963,9 +963,9 @@ public class GenerateData implements UCD_Types {
|
|||
for (int ch = 0; ch < 0x10FFFF; ++ch) {
|
||||
|
||||
Utility.dot(ch);
|
||||
if (!Main.ucd.isAssigned(ch)) continue;
|
||||
if (Main.ucd.isPUA(ch)) continue;
|
||||
short c = Main.ucd.getCombiningClass(ch);
|
||||
if (!Default.ucd.isAssigned(ch)) continue;
|
||||
if (Default.ucd.isPUA(ch)) continue;
|
||||
short c = Default.ucd.getCombiningClass(ch);
|
||||
if (c == 0) continue;
|
||||
|
||||
// add character with higher class, same class, lower class
|
||||
|
@ -1012,19 +1012,19 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
|
||||
static void writeLine(String cc, PrintWriter log, boolean check) {
|
||||
String c = Main.nfc.normalize(cc);
|
||||
String d = Main.nfd.normalize(cc);
|
||||
String kc = Main.nfkc.normalize(cc);
|
||||
String kd = Main.nfkd.normalize(cc);
|
||||
String c = Default.nfc.normalize(cc);
|
||||
String d = Default.nfd.normalize(cc);
|
||||
String kc = Default.nfkc.normalize(cc);
|
||||
String kd = Default.nfkd.normalize(cc);
|
||||
if (check & cc.equals(c) && cc.equals(d) && cc.equals(kc) && cc.equals(kd)) return;
|
||||
|
||||
// consistency check
|
||||
String dc = Main.nfd.normalize(c);
|
||||
String dkc = Main.nfd.normalize(kc);
|
||||
String dc = Default.nfd.normalize(c);
|
||||
String dkc = Default.nfd.normalize(kc);
|
||||
if (!dc.equals(d) || !dkc.equals(kd)) {
|
||||
System.out.println("Danger Will Robinson!");
|
||||
Normalizer.SHOW_PROGRESS = true;
|
||||
d = Main.nfd.normalize(cc);
|
||||
d = Default.nfd.normalize(cc);
|
||||
}
|
||||
|
||||
// printout
|
||||
|
@ -1033,7 +1033,7 @@ public class GenerateData implements UCD_Types {
|
|||
+ Utility.hex(kc," ") + ";" + Utility.hex(kd," ")
|
||||
+ "; # ("
|
||||
+ comma(cc) + "; " + comma(c) + "; " + comma(d) + "; " + comma(kc) + "; " + comma(kd) + "; "
|
||||
+ ") " + Main.ucd.getName(cc));
|
||||
+ ") " + Default.ucd.getName(cc));
|
||||
}
|
||||
|
||||
static StringBuffer commaResult = new StringBuffer();
|
||||
|
@ -1044,7 +1044,7 @@ public class GenerateData implements UCD_Types {
|
|||
int cp;
|
||||
for (int i = 0; i < s.length(); i += UTF32.count16(i)) {
|
||||
cp = UTF32.char32At(s, i);
|
||||
if (Main.ucd.getCategory(cp) == Mn) commaResult.append('\u25CC');
|
||||
if (Default.ucd.getCategory(cp) == Mn) commaResult.append('\u25CC');
|
||||
UTF32.append32(commaResult, cp);
|
||||
}
|
||||
return commaResult.toString();
|
||||
|
@ -1078,7 +1078,7 @@ public class GenerateData implements UCD_Types {
|
|||
};
|
||||
|
||||
static final void generateAge(String directory, String filename) throws IOException {
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
String newFile = directory + filename + getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile);
|
||||
String mostRecent = generateBat(directory, filename, getFileSuffix(true));
|
||||
|
@ -1174,32 +1174,32 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
|
||||
public static void listCombiningAccents() throws IOException {
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
PrintWriter log = Utility.openPrintWriter("ListAccents" + getFileSuffix(true));
|
||||
Set set = new TreeSet();
|
||||
Set set2 = new TreeSet();
|
||||
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
Utility.dot(i);
|
||||
if (!Main.ucd.isRepresented(i)) continue;
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
|
||||
if (!Main.nfd.normalizationDiffers(i)) {
|
||||
if (Main.ucd.getScript(i) == LATIN_SCRIPT) {
|
||||
if (!Default.nfd.normalizationDiffers(i)) {
|
||||
if (Default.ucd.getScript(i) == LATIN_SCRIPT) {
|
||||
int cp = i;
|
||||
String hex = "u" + Utility.hex(cp, 4);
|
||||
set.add("# yyy $x <> \\" + hex + " ; # " + Main.ucd.getName(cp));
|
||||
set.add("# yyy $x <> \\" + hex + " ; # " + Default.ucd.getName(cp));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
String decomp = Main.nfd.normalize(i);
|
||||
String decomp = Default.nfd.normalize(i);
|
||||
int j;
|
||||
for (j = 0; j < decomp.length(); j += UTF16.getCharCount(i)) {
|
||||
int cp = UTF16.charAt(decomp, j);
|
||||
byte cat = Main.ucd.getCategory(cp);
|
||||
byte cat = Default.ucd.getCategory(cp);
|
||||
if (cat != Mn) continue;
|
||||
String hex = "u" + Utility.hex(cp, 4);
|
||||
set.add("# xxx $x <> \\" + hex + " ; # " + Main.ucd.getName(cp));
|
||||
set.add("# xxx $x <> \\" + hex + " ; # " + Default.ucd.getName(cp));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1211,7 +1211,7 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
|
||||
public static void listGreekVowels() throws IOException {
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + getFileSuffix(true));
|
||||
Set set = new TreeSet();
|
||||
Set set2 = new TreeSet();
|
||||
|
@ -1224,14 +1224,14 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
for (char i = 0; i < 0xFFFF; ++i) {
|
||||
Utility.dot(i);
|
||||
if (!Main.ucd.isRepresented(i)) continue;
|
||||
if (Main.ucd.getScript(i) != GREEK_SCRIPT) continue;
|
||||
String decomp = Main.nfd.normalize(i);
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
if (Default.ucd.getScript(i) != GREEK_SCRIPT) continue;
|
||||
String decomp = Default.nfd.normalize(i);
|
||||
|
||||
if (decomp.indexOf('\u0306') >= 0) continue; // skip breve
|
||||
if (decomp.indexOf('\u0304') >= 0) continue; // skip macron
|
||||
|
||||
String comp = Main.nfc.normalize(decomp);
|
||||
String comp = Default.nfc.normalize(decomp);
|
||||
if (!comp.equals(String.valueOf(i))) continue; // skip compats
|
||||
|
||||
char first = decomp.charAt(0);
|
||||
|
@ -1245,7 +1245,7 @@ public class GenerateData implements UCD_Types {
|
|||
for (int j = 0; j < diphthongStart.length(); ++j) {
|
||||
String v = diphthongStart.substring(j, j+1);
|
||||
char vc = v.charAt(0);
|
||||
if (Main.ucd.getCategory(vc) == Ll && Main.ucd.getCategory(first) == Lu) continue;
|
||||
if (Default.ucd.getCategory(vc) == Ll && Default.ucd.getCategory(first) == Lu) continue;
|
||||
if (etas.indexOf(vc) >= 0 && iotas.indexOf(first) >= 0) continue;
|
||||
set.add(new Pair(h + v + first, new Pair(v + decomp, v + i)));
|
||||
}
|
||||
|
@ -1271,7 +1271,7 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
public static void listKatakana() throws IOException {
|
||||
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
for (char i = 'a'; i <= 'z'; ++i) {
|
||||
doKana(String.valueOf(i));
|
||||
if (i == 'c') doKana("ch");
|
||||
|
@ -1304,18 +1304,18 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
|
||||
public static void genTrailingZeros() {
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
if ((i & 0xFFF) == 0) System.out.println("# " + i);
|
||||
if (!Main.ucd.isAssigned(i)) continue;
|
||||
if (!Main.nfd.normalizationDiffers(i)) continue;
|
||||
String decomp = Main.nfd.normalize(i);
|
||||
if (!Default.ucd.isAssigned(i)) continue;
|
||||
if (!Default.nfd.normalizationDiffers(i)) continue;
|
||||
String decomp = Default.nfd.normalize(i);
|
||||
int cp;
|
||||
for (int j = 0; j < decomp.length(); j += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(decomp,j);
|
||||
if (j == 0) continue; // skip first
|
||||
if (Main.ucd.getCombiningClass(cp) == 0) {
|
||||
if (Default.ucd.getCombiningClass(cp) == 0) {
|
||||
result.add(cp);
|
||||
}
|
||||
}
|
||||
|
@ -1328,8 +1328,8 @@ public class GenerateData implements UCD_Types {
|
|||
Utility.hex(start)
|
||||
+ (start != end ? ".." + Utility.hex(end) : "")
|
||||
+ "; "
|
||||
+ Main.ucd.getName(start)
|
||||
+ (start != end ? ".." + Main.ucd.getName(end) : ""));
|
||||
+ Default.ucd.getName(start)
|
||||
+ (start != end ? ".." + Default.ucd.getName(end) : ""));
|
||||
}
|
||||
System.out.println("TrailingZero count: " + result.size());
|
||||
}
|
||||
|
|
|
@ -5,35 +5,17 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||
* $Date: 2002/03/20 00:21:42 $
|
||||
* $Revision: 1.10 $
|
||||
* $Date: 2002/04/23 01:59:14 $
|
||||
* $Revision: 1.11 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
import com.ibm.text.utility.*;
|
||||
import java.util.Date;
|
||||
|
||||
public final class Main implements UCD_Types {
|
||||
|
||||
static String ucdVersion = UCD.latestVersion;
|
||||
static UCD ucd;
|
||||
static Normalizer nfc;
|
||||
static Normalizer nfd;
|
||||
static Normalizer nfkc;
|
||||
static Normalizer nfkd;
|
||||
static Normalizer[] nf = new Normalizer[4];
|
||||
|
||||
static void setUCD() {
|
||||
ucd = UCD.make(Main.ucdVersion);
|
||||
nfd = nf[NFD] = new Normalizer(Normalizer.NFD, Main.ucdVersion);
|
||||
nfc = nf[NFC] = new Normalizer(Normalizer.NFC, Main.ucdVersion);
|
||||
nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, Main.ucdVersion);
|
||||
nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, Main.ucdVersion);
|
||||
System.out.println("Loaded UCD" + ucd.getVersion() + " " + (new Date(Main.ucd.getDate())));
|
||||
}
|
||||
|
||||
static final String[] ALL_FILES = {
|
||||
"CaseFolding",
|
||||
"CompositionExclusions",
|
||||
|
@ -74,8 +56,9 @@ public final class Main implements UCD_Types {
|
|||
VerifyUCD.CheckCaseFold();
|
||||
VerifyUCD.checkAgainstUInfo();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{ucdVersion});
|
||||
else if (arg.equalsIgnoreCase("version")) ucdVersion = args[++i];
|
||||
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.ucdVersion});
|
||||
else if (arg.equalsIgnoreCase("version")) Default.setUCD(args[++i]);
|
||||
else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics();
|
||||
else if (arg.equalsIgnoreCase("testskippable")) NFSkippable.main(null);
|
||||
else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
|
||||
else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
|
||||
|
@ -105,6 +88,10 @@ public final class Main implements UCD_Types {
|
|||
else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
|
||||
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
|
||||
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
|
||||
|
||||
//else if (arg.equalsIgnoreCase("NormalizationCharts")) ChartGenerator.writeNormalizationCharts();
|
||||
|
||||
|
||||
/*else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
|
||||
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
|
||||
*/
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $
|
||||
* $Date: 2001/12/13 23:35:57 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2002/04/23 01:59:14 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -33,14 +33,14 @@ public final class TestNormalization {
|
|||
|
||||
public static void main(String[] args) throws java.io.IOException {
|
||||
System.out.println("Creating Normalizers");
|
||||
Main.setUCD();
|
||||
Default.setUCD();
|
||||
|
||||
|
||||
String x = UTF32.valueOf32(0x10000);
|
||||
check("NFC", Main.nfc, x);
|
||||
check("NFD", Main.nfd, x);
|
||||
check("NFKC", Main.nfkc, x);
|
||||
check("NFKD", Main.nfkd, x);
|
||||
check("NFC", Default.nfc, x);
|
||||
check("NFD", Default.nfd, x);
|
||||
check("NFKC", Default.nfkc, x);
|
||||
check("NFKD", Default.nfkd, x);
|
||||
|
||||
|
||||
out = new PrintWriter(
|
||||
|
@ -87,36 +87,36 @@ public final class TestNormalization {
|
|||
}
|
||||
|
||||
// c2 == NFC(c1) == NFC(c2) == NFC(c3)
|
||||
errorCount += check("NFCa", Main.nfc, parts[1], parts[0]);
|
||||
errorCount += check("NFCb", Main.nfc, parts[1], parts[1]);
|
||||
errorCount += check("NFCc", Main.nfc, parts[1], parts[2]);
|
||||
errorCount += check("NFCa", Default.nfc, parts[1], parts[0]);
|
||||
errorCount += check("NFCb", Default.nfc, parts[1], parts[1]);
|
||||
errorCount += check("NFCc", Default.nfc, parts[1], parts[2]);
|
||||
|
||||
// c4 == NFC(c4) == NFC(c5)
|
||||
errorCount += check("NFCd", Main.nfc, parts[3], parts[3]);
|
||||
errorCount += check("NFCe", Main.nfc, parts[3], parts[4]);
|
||||
errorCount += check("NFCd", Default.nfc, parts[3], parts[3]);
|
||||
errorCount += check("NFCe", Default.nfc, parts[3], parts[4]);
|
||||
|
||||
// c3 == NFD(c1) == NFD(c2) == NFD(c3)
|
||||
errorCount += check("NFDa", Main.nfd, parts[2], parts[0]);
|
||||
errorCount += check("NFDb", Main.nfd, parts[2], parts[1]);
|
||||
errorCount += check("NFDc", Main.nfd, parts[2], parts[2]);
|
||||
errorCount += check("NFDa", Default.nfd, parts[2], parts[0]);
|
||||
errorCount += check("NFDb", Default.nfd, parts[2], parts[1]);
|
||||
errorCount += check("NFDc", Default.nfd, parts[2], parts[2]);
|
||||
|
||||
// c5 == NFD(c4) == NFD(c5)
|
||||
errorCount += check("NFDd", Main.nfd, parts[4], parts[3]);
|
||||
errorCount += check("NFDe", Main.nfd, parts[4], parts[4]);
|
||||
errorCount += check("NFDd", Default.nfd, parts[4], parts[3]);
|
||||
errorCount += check("NFDe", Default.nfd, parts[4], parts[4]);
|
||||
|
||||
// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
|
||||
errorCount += check("NFKCa", Main.nfkc, parts[3], parts[0]);
|
||||
errorCount += check("NFKCb", Main.nfkc, parts[3], parts[1]);
|
||||
errorCount += check("NFKCc", Main.nfkc, parts[3], parts[2]);
|
||||
errorCount += check("NFKCd", Main.nfkc, parts[3], parts[3]);
|
||||
errorCount += check("NFKCe", Main.nfkc, parts[3], parts[4]);
|
||||
errorCount += check("NFKCa", Default.nfkc, parts[3], parts[0]);
|
||||
errorCount += check("NFKCb", Default.nfkc, parts[3], parts[1]);
|
||||
errorCount += check("NFKCc", Default.nfkc, parts[3], parts[2]);
|
||||
errorCount += check("NFKCd", Default.nfkc, parts[3], parts[3]);
|
||||
errorCount += check("NFKCe", Default.nfkc, parts[3], parts[4]);
|
||||
|
||||
// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
|
||||
errorCount += check("NFKDa", Main.nfkd, parts[4], parts[0]);
|
||||
errorCount += check("NFKDb", Main.nfkd, parts[4], parts[1]);
|
||||
errorCount += check("NFKDc", Main.nfkd, parts[4], parts[2]);
|
||||
errorCount += check("NFKDd", Main.nfkd, parts[4], parts[3]);
|
||||
errorCount += check("NFKDe", Main.nfkd, parts[4], parts[4]);
|
||||
errorCount += check("NFKDa", Default.nfkd, parts[4], parts[0]);
|
||||
errorCount += check("NFKDb", Default.nfkd, parts[4], parts[1]);
|
||||
errorCount += check("NFKDc", Default.nfkd, parts[4], parts[2]);
|
||||
errorCount += check("NFKDd", Default.nfkd, parts[4], parts[3]);
|
||||
errorCount += check("NFKDe", Default.nfkd, parts[4], parts[4]);
|
||||
}
|
||||
System.out.println("Total errors in file: " + errorCount
|
||||
+ ", lines: " + lineErrorCount);
|
||||
|
@ -150,21 +150,21 @@ public final class TestNormalization {
|
|||
}
|
||||
String otherList = "";
|
||||
if (!base.equals(other)) {
|
||||
otherList = "(" + Main.ucd.getCodeAndName(other) + ")";
|
||||
otherList = "(" + Default.ucd.getCodeAndName(other) + ")";
|
||||
}
|
||||
out.println("DIFF " + type + ": "
|
||||
+ Main.ucd.getCodeAndName(base) + " != "
|
||||
+ Default.ucd.getCodeAndName(base) + " != "
|
||||
+ type
|
||||
+ otherList
|
||||
+ " == " + Main.ucd.getCodeAndName(trans)
|
||||
+ " == " + Default.ucd.getCodeAndName(trans)
|
||||
+ temp
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new ChainException("DIFF " + type + ": "
|
||||
+ Main.ucd.getCodeAndName(base) + " != "
|
||||
+ type + "(" + Main.ucd.getCodeAndName(other) + ")", new Object[]{}, e);
|
||||
+ Default.ucd.getCodeAndName(base) + " != "
|
||||
+ type + "(" + Default.ucd.getCodeAndName(other) + ")", new Object[]{}, e);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -178,10 +178,10 @@ public final class TestNormalization {
|
|||
if ((missing & 0xFFF) == 0) System.out.println("# " + Utility.hex(missing));
|
||||
if (charsListed.get(missing)) continue;
|
||||
String x = UTF32.valueOf32(missing);
|
||||
errorCount += check("NFC", Main.nfc, x);
|
||||
errorCount += check("NFD", Main.nfd, x);
|
||||
errorCount += check("NFKC", Main.nfkc, x);
|
||||
errorCount += check("NFKD", Main.nfkd, x);
|
||||
errorCount += check("NFC", Default.nfc, x);
|
||||
errorCount += check("NFD", Default.nfd, x);
|
||||
errorCount += check("NFKC", Default.nfkc, x);
|
||||
errorCount += check("NFKD", Default.nfkd, x);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2002/03/20 00:21:42 $
|
||||
* $Revision: 1.10 $
|
||||
* $Date: 2002/04/23 01:59:14 $
|
||||
* $Revision: 1.11 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -31,7 +31,7 @@ public final class UCD implements UCD_Types {
|
|||
/**
|
||||
* Used for the default version.
|
||||
*/
|
||||
public static final String latestVersion = "3.1.1";
|
||||
public static final String latestVersion = "3.2.0";
|
||||
|
||||
/**
|
||||
* Create singleton instance for default (latest) version
|
||||
|
@ -675,6 +675,11 @@ public final class UCD implements UCD_Types {
|
|||
return UCD_Names.SCRIPT[prop];
|
||||
}
|
||||
|
||||
public static String getScriptID_fromIndex(byte prop, byte length) {
|
||||
if (length == SHORT) return UCD_Names.ABB_SCRIPT[prop];
|
||||
return UCD_Names.SCRIPT[prop];
|
||||
}
|
||||
|
||||
public String getAgeID(int codePoint) {
|
||||
return getAgeID_fromIndex(getAge(codePoint));
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2002/03/15 01:57:01 $
|
||||
* $Revision: 1.12 $
|
||||
* $Date: 2002/04/23 01:59:16 $
|
||||
* $Revision: 1.13 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -422,12 +422,12 @@ public final class Utility { // COMMON UTILITIES
|
|||
// fix noncharacters, since XML can't handle
|
||||
case 0xFFFE: case 0xFFFF:
|
||||
|
||||
return "#x" + hex(c,1) + ";";
|
||||
return "#" + hex(c,1);
|
||||
}
|
||||
|
||||
// fix surrogates, since XML can't handle
|
||||
if (UTF32.isSurrogate(c)) {
|
||||
return "#x" + hex(c,1) + ";";
|
||||
return "#" + hex(c,1);
|
||||
}
|
||||
|
||||
if (c <= 0x7E || UTF8) {
|
||||
|
@ -519,9 +519,14 @@ public final class Utility { // COMMON UTILITIES
|
|||
}
|
||||
|
||||
public static PrintWriter openPrintWriter(String filename, boolean removeCR, boolean latin1) throws IOException {
|
||||
File file = new File(getOutputName(filename));
|
||||
System.out.println("Creating File: " + file);
|
||||
File parent = new File(file.getParent());
|
||||
//System.out.println("Creating File: "+ parent);
|
||||
parent.mkdirs();
|
||||
return new PrintWriter(
|
||||
new UTF8StreamWriter(
|
||||
new FileOutputStream(getOutputName(filename)),
|
||||
new FileOutputStream(file),
|
||||
32*1024,
|
||||
removeCR, latin1));
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue