chart fixes

X-SVN-Rev: 8499
This commit is contained in:
Mark Davis 2002-04-23 01:59:16 +00:00
parent d85d2f804e
commit 3b30dd9146
17 changed files with 1237 additions and 689 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $
* $Date: 2002/03/15 01:57:01 $
* $Revision: 1.7 $
* $Date: 2002/04/23 01:59:14 $
* $Revision: 1.8 $
*
*******************************************************************************
*/
@ -34,8 +34,8 @@ public class GenOverlap implements UCD_Types {
collator = collatorIn;
ucd = UCD.make();
nfd = new Normalizer(Normalizer.NFD);
nfkd = new Normalizer(Normalizer.NFKD);
nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
for (int cp = 0x0; cp <= 0x10FFFF; ++cp) {
Utility.dot(cp);
@ -67,8 +67,8 @@ public class GenOverlap implements UCD_Types {
ucd = UCD.make();
nfd = new Normalizer(Normalizer.NFD);
nfkd = new Normalizer(Normalizer.NFKD);
nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
@ -339,8 +339,8 @@ public class GenOverlap implements UCD_Types {
ucd = UCD.make();
nfd = new Normalizer(Normalizer.NFD);
nfkd = new Normalizer(Normalizer.NFKD);
nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
@ -448,7 +448,7 @@ public class GenOverlap implements UCD_Types {
newKeys.removeAll(joint);
oldKeys.removeAll(joint);
PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), false);
PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), false, false);
Iterator it = list.iterator();
int last = -1;
while (it.hasNext()) {
@ -541,8 +541,8 @@ public class GenOverlap implements UCD_Types {
//nfkd = new Normalizer(Normalizer.NFKD);
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
nfd = new Normalizer(Normalizer.NFD);
nfkd = new Normalizer(Normalizer.NFKD);
nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
int tableLength = 257;
@ -694,12 +694,12 @@ public class GenOverlap implements UCD_Types {
}
public static void listCyrillic(UCA collatorIn) throws IOException {
PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", false);
PrintWriter log = Utility.openPrintWriter("ListCyrillic.txt", false, false);
Set set = new TreeSet(collatorIn);
Set set2 = new TreeSet(collatorIn);
ucd = UCD.make();
nfd = new Normalizer(Normalizer.NFD);
nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
for (char i = 0; i < 0xFFFF; ++i) {
Utility.dot(i);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
* $Date: 2002/03/15 01:57:01 $
* $Revision: 1.9 $
* $Date: 2002/04/23 01:59:14 $
* $Revision: 1.10 $
*
*******************************************************************************
*/
@ -483,9 +483,10 @@ final public class UCA implements Comparator {
/**
* Returns the char associated with a FIXED value
*/
public char charFromFixed(int ce) {
/*public char charFromFixed(int ce) {
return getPrimary(ce);
}
*/
/**
* Return the type of the CE
@ -716,7 +717,7 @@ final public class UCA implements Comparator {
/**
* Records the dataversion
*/
private String dataVersion = "?";
private String dataVersion = "3.1d1";
/**
* Records the dataversion
@ -791,7 +792,7 @@ final public class UCA implements Comparator {
* A special bit combination in a CE is used to reserve exception cases. This has the effect
* of removing 32 primary key values out of the 65536 possible.
*/
static final int EXCEPTION_CE_MASK = 0xFFC00000;
static final int EXCEPTION_CE_MASK = 0xFF000000;
/**
* Used to composed Hangul and Han characters
@ -807,7 +808,8 @@ final public class UCA implements Comparator {
* There are at least 34 values, so that we can use a range for surrogates
* However, we do add to the first weight if we have surrogate pairs!
*/
public static final int UNSUPPORTED_BASE = 0xFFC2;
public static final int UNSUPPORTED_BASE = 0xFF40;
public static final int UNSUPPORTED_TOP = 0xFFFF;
static final int UNSUPPORTED = makeKey(UNSUPPORTED_BASE, NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
// was 0xFFC20101;
@ -819,7 +821,7 @@ final public class UCA implements Comparator {
* to be looked up (with following characters) in the contractingTable.<br>
* This isn't a MASK since there is exactly one value.
*/
static final int CONTRACTING = 0xFFC10000;
static final int CONTRACTING = 0xFF310000;
/**
* Expanding characters are marked with a exception bit combination
@ -827,7 +829,7 @@ final public class UCA implements Comparator {
* This means that they map to more than one CE, which is looked up in
* the expansionTable by index. See EXCEPTION_INDEX_MASK
*/
static final int EXPANDING_MASK = 0xFFC00000; // marks expanding range start
static final int EXPANDING_MASK = 0xFF300000; // marks expanding range start
/**
* This mask is used to get the index from an EXPANDING exception.
@ -1165,12 +1167,12 @@ final public class UCA implements Comparator {
}
public UCAContents getContents(byte ceLimit, Normalizer skipDecomps) {
return new UCAContents(ceLimit, skipDecomps);
return new UCAContents(ceLimit, skipDecomps, ucdVersion);
}
public class UCAContents {
int current = -1;
Normalizer skipDecomps = new Normalizer(Normalizer.NFD);
Normalizer skipDecomps;
Normalizer nfd = skipDecomps;
Iterator enum = null;
byte ceLimit;
@ -1183,8 +1185,9 @@ final public class UCA implements Comparator {
/**
* use FIXED_CE as the limit
*/
UCAContents(byte ceLimit, Normalizer skipDecomps) {
UCAContents(byte ceLimit, Normalizer skipDecomps, String unicodeVersion) {
this.ceLimit = ceLimit;
this.nfd = new Normalizer(Normalizer.NFD, unicodeVersion);
this.skipDecomps = skipDecomps;
}
@ -1208,7 +1211,7 @@ final public class UCA implements Comparator {
if (!nfd.normalizationDiffers(current) || type == HANGUL_CE) {
if (type >= ceLimit) continue;
if (skipDecomps != null && skipDecomps.hasDecomposition(current)) continue;
if (skipDecomps != null && skipDecomps.normalizationDiffers(current)) continue;
}
result = UTF16.valueOf(current);
return result;
@ -1363,9 +1366,13 @@ final public class UCA implements Comparator {
boolean record = true;
/* if (multiChars.length() > 0) record = false;
else */
if (toD.hasDecomposition(value)) record = false;
if (toD.normalizationDiffers(value)) record = false;
// collect CEs
if (value == 0x2F00) {
System.out.println("debug");
}
int ce = getCEFromLine(value, line, position, record);
int ce2 = getCEFromLine(value, line, position, record);
if (CHECK_UNIQUE && (ce2 == TERMINATOR || CHECK_UNIQUE_EXPANSIONS)) {
@ -1765,7 +1772,7 @@ final public class UCA implements Comparator {
* Used for checking data file integrity
*/
private void checkUnique(char value, int result, int fourth, String line) {
if (toD.hasDecomposition(value)) return; // don't check decomposables.
if (toD.normalizationDiffers(value)) return; // don't check decomposables.
Object ceObj = new Long(((long)result << 16) | fourth);
Object probe = uniqueTable.get(ceObj);
if (probe != null) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
* $Date: 2002/03/15 01:57:01 $
* $Revision: 1.4 $
* $Date: 2002/04/23 01:59:16 $
* $Revision: 1.5 $
*
*******************************************************************************
*/
@ -19,20 +19,21 @@ import java.io.*;
import com.ibm.text.UCD.*;
import com.ibm.text.utility.*;
import com.ibm.icu.text.UTF16;
import java.text.SimpleDateFormat;
public class WriteCharts implements UCD_Types {
static UCD ucd;
static final byte UNSUPPORTED = 120;
static boolean HACK_KANA = false;
static public void test(UCA uca) throws IOException {
Default.setUCD(uca.getUCDVersion());
HACK_KANA = true;
uca.setAlternate(UCA.NON_IGNORABLE);
ucd = UCD.make();
Normalizer nfd = new Normalizer(Normalizer.NFD);
Normalizer nfc = new Normalizer(Normalizer.NFC);
//Normalizer nfd = new Normalizer(Normalizer.NFD);
//Normalizer nfc = new Normalizer(Normalizer.NFC);
UCA.UCAContents cc = uca.getContents(UCA.FIXED_CE, null); // nfd instead of null if skipping decomps
cc.enableSamples();
@ -42,6 +43,10 @@ public class WriteCharts implements UCD_Types {
while (true) {
String x = cc.next();
if (x == null) break;
if (x.equals("\u2F00")) {
System.out.println("debug");
}
set.add(new Pair(uca.getSortKey(x), x));
}
@ -68,7 +73,7 @@ public class WriteCharts implements UCD_Types {
Utility.copyTextFile("charts.css", false, "CollationCharts\\charts.css");
Utility.copyTextFile("help.html", true, "CollationCharts\\help.html");
indexFile = Utility.openPrintWriter("CollationCharts\\index_list.html");
indexFile = Utility.openPrintWriter("CollationCharts\\index_list.html", false, false);
Utility.appendFile("index_header.html", true, indexFile);
/*
@ -88,15 +93,18 @@ public class WriteCharts implements UCD_Types {
String s = (String) p.second;
int cp = UTF16.charAt(s,0);
byte script = ucd.getScript(cp);
byte script = Default.ucd.getScript(cp);
// get first non-zero primary
int primary = sortKey.charAt(0);
int currentPrimary = getFirstPrimary(sortKey);
int primary = currentPrimary >>> 16;
if (sortKey.length() < 4) script = -3;
else if (primary == 0) script = -2;
else if (primary < variable) script = -1;
else if (primary < high) script = COMMON_SCRIPT;
else if (primary >= UCA.UNSUPPORTED_BASE) script = UNSUPPORTED;
else if (primary >= UCA.UNSUPPORTED_BASE && primary <= UCA.UNSUPPORTED_TOP) script = UNSUPPORTED;
if (script == KATAKANA_SCRIPT) script = HIRAGANA_SCRIPT;
else if ((script == INHERITED_SCRIPT || script == COMMON_SCRIPT) && oldScript >= 0) script = oldScript;
@ -113,12 +121,12 @@ public class WriteCharts implements UCD_Types {
++scriptCount[script+3];
if (scriptCount[script+3] > 1) {
System.out.println("\t\tFAIL: " + scriptCount[script+3] + ", " +
getChunkName(script) + ", " + ucd.getCodeAndName(s));
getChunkName(script) + ", " + Default.ucd.getCodeAndName(s));
}
output = openFile(scriptCount[script+3], script);
output = openFile(scriptCount[script+3], "CollationCharts\\", script);
}
boolean firstPrimaryEquals = primary == lastSortKey.charAt(0);
boolean firstPrimaryEquals = currentPrimary == getFirstPrimary(lastSortKey);
int strength = uca.strengthDifference(sortKey, lastSortKey);
if (strength < 0) strength = -strength;
@ -130,36 +138,63 @@ public class WriteCharts implements UCD_Types {
for (int i = 0; i < sortKey.length(); ++i) {
char w = sortKey.charAt(i);
if (w == 0) break;
if (w >= UCA.UNSUPPORTED_BASE && w <= UCA.UNSUPPORTED_TOP) {
++i; // skip next
}
++ primaryCount;
}
String breaker = "";
if (columnCount > 10 || !firstPrimaryEquals) {
if (!firstPrimaryEquals || script == UNSUPPORTED) breaker = "</tr><tr>";
else breaker = "</tr><tr><td></td>"; // indent 1 cell
columnCount = 0;
if (!firstPrimaryEquals || script == UNSUPPORTED) breaker = "</tr><tr>";
else {
breaker = "</tr><tr><td></td>"; // indent 1 cell
++columnCount;
}
}
String classname = primaryCount > 1 ? XCLASSNAME[strength] : CLASSNAME[strength];
output.println(breaker + classname
+ " title='" + UCA.toString(sortKey) + "'>"
+ nfc.normalize(s)
String name = Default.ucd.getName(s);
if (s.equals("\u1eaf")) {
System.out.println("debug");
}
String comp = Default.nfc.normalize(s);
String outline = breaker + classname
+ " title='" + Utility.quoteXML(name) + ": " + UCA.toString(sortKey) + "'>"
+ Utility.quoteXML(comp)
+ "<br><tt>"
+ Utility.hex(s)
//+ "<br>" + script
+ "</tt></td>");
+ "</tt></td>";
output.println(outline);
++columnCount;
}
SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
closeFile(output);
indexFile.println("<hr><p>Last Modified: " + new Date());
indexFile.println("<hr><p>Last Modified: " + df.format(new Date()));
indexFile.println("<br>UCA Version: " + uca.getDataVersion());
indexFile.println("<br>UCD Version: " + ucd.getVersion());
indexFile.println("<br>UCD Version: " + Default.ucd.getVersion());
indexFile.println("</p></body></html>");
indexFile.close();
}
static int getFirstPrimary(String sortKey) {
int result = sortKey.charAt(0);
if (result >= UCA.UNSUPPORTED_BASE && result <= UCA.UNSUPPORTED_TOP) {
return (result << 16) | sortKey.charAt(1);
}
return (result << 16);
}
static final String[] CLASSNAME = {
"<td class='q'",
"<td class='q'",
@ -179,16 +214,16 @@ public class WriteCharts implements UCD_Types {
static PrintWriter indexFile;
static PrintWriter openFile(int count, byte script) throws IOException {
static PrintWriter openFile(int count, String directory, int script) throws IOException {
String scriptName = getChunkName(script);
scriptName = ucd.getCase(scriptName, FULL, TITLE);
if (script < 128) scriptName = Default.ucd.getCase(scriptName, FULL, TITLE);
String fileName = "chart_" + scriptName + (count > 1 ? count + "" : "") + ".html";
PrintWriter output = Utility.openPrintWriter("CollationCharts\\" + fileName);
PrintWriter output = Utility.openPrintWriter(directory + fileName, false, false);
Utility.fixDot();
System.out.println("Writing: " + scriptName);
indexFile.println(" | <a href = '" + fileName + "'>" + scriptName + "</a>");
indexFile.println(" <a href = '" + fileName + "'>" + scriptName + "</a>");
String title = "UCA: " + scriptName;
output.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
output.println("<title>" + title + "</title>");
@ -198,20 +233,277 @@ public class WriteCharts implements UCD_Types {
return output;
}
static String getChunkName(byte script) {
if (script == -3) return "NULL";
static String getChunkName(int script) {
if (script >= 128) return Default.ucd.getCategoryID_fromIndex((byte)(script - 128), LONG);
else if (script == -4) return "NoMapping";
else if (script == -3) return "NULL";
else if (script == -2) return "IGNORABLE";
else if (script == -1) return "VARIABLE";
else if (script == HIRAGANA_SCRIPT) return "KATAKANA-HIRAGANA";
else if (script == HIRAGANA_SCRIPT && HACK_KANA) return "KATAKANA-HIRAGANA";
else if (script == UNSUPPORTED) return "UNSUPPORTED";
else return ucd.getScriptID_fromIndex(script);
else return Default.ucd.getScriptID_fromIndex((byte)script);
}
static void closeFile(PrintWriter output) {
if (output == null) return;
output.println("</body></table></html>");
output.println("</table></body></html>");
output.close();
}
static public void normalizationChart() throws IOException {
Default.setUCD();
HACK_KANA = false;
Set set = new TreeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isRepresented(i)) continue;
byte cat = Default.ucd.getCategory(i);
if (cat == Cs || cat == Co) continue;
if (!Default.nfkd.normalizationDiffers(i)) continue;
String decomp = Default.nfkd.normalize(i);
byte script = getBestScript(decomp);
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + 128 : script),
new Pair(decomp,
new Integer(i))));
}
PrintWriter output = null;
Iterator it = set.iterator();
int oldScript = -127;
int[] scriptCount = new int[128];
int counter = 0;
int lastPrimary = -1;
String lastSortKey = "\u0000";
Utility.copyTextFile("index.html", true, "NormalizationCharts\\index.html");
Utility.copyTextFile("charts.css", false, "NormalizationCharts\\charts.css");
Utility.copyTextFile("norm_help.html", true, "NormalizationCharts\\help.html");
indexFile = Utility.openPrintWriter("NormalizationCharts\\index_list.html", false, false);
Utility.appendFile("norm_index_header.html", true, indexFile);
/*
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
indexFile.println("<title>UCA Default Collation Table</title>");
indexFile.println("<base target='main'>");
indexFile.println("<style><!-- p { font-size: 90% } --></style>");
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
*/
while (it.hasNext()) {
Utility.dot(counter);
Pair p = (Pair) it.next();
int script = ((Integer) p.first).intValue();
int cp = ((Integer)((Pair) p.second).second).intValue();
if (script != oldScript
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
) {
closeFile(output);
output = null;
oldScript = script;
}
if (output == null) {
output = openFile(0, "NormalizationCharts\\", script);
output.println("<tr><td class='z'>Code</td><td class='z'>C</td><td class='z'>D</td><td class='z'>KC</td><td class='z'>KD</td></tr>");
}
output.println("<tr>");
String prefix;
String code = UTF16.valueOf(cp);
String c = Default.nfc.normalize(cp);
String d = Default.nfd.normalize(cp);
String kc = Default.nfkc.normalize(cp);
String kd = Default.nfkd.normalize(cp);
showCell(output, code, "<td class='z' ", "");
prefix = c.equals(code) ? "<td class='g' " : "<td class='n' ";
showCell(output, c, prefix, "");
prefix = d.equals(c) ? "<td class='g' " : "<td class='n' ";
showCell(output, d, prefix, "");
prefix = kc.equals(c) ? "<td class='g' " : "<td class='n' ";
showCell(output, kc, prefix, "");
prefix = (kd.equals(d) || kd.equals(kc)) ? "<td class='g' " : "<td class='n' ";
showCell(output, kd, prefix, "");
output.println("</tr>");
}
SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
closeFile(output);
indexFile.println("<hr><p>Last Modified: " + df.format(new Date()));
indexFile.println("<br>UCD Version: " + Default.ucd.getVersion());
indexFile.println("</p></body></html>");
indexFile.close();
}
static void showCell(PrintWriter output, String s, String prefix, String extra) {
String name = Default.ucd.getName(s);
String comp = Default.nfc.normalize(s);
String outline = prefix
+ " title='" + Utility.quoteXML(name) + extra + "'>"
+ Utility.quoteXML(comp)
+ "<br><tt>"
+ Utility.hex(s)
//+ "<br>" + script
+ "</tt></td>";
output.println(outline);
}
static byte getBestScript(String s) {
int cp;
byte result = COMMON_SCRIPT;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i);
result = Default.ucd.getScript(cp);
if (result != COMMON_SCRIPT && result != INHERITED_SCRIPT) return result;
}
return COMMON_SCRIPT;
}
static public void caseChart() throws IOException {
Default.setUCD();
HACK_KANA = false;
Set set = new TreeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isRepresented(i)) continue;
byte cat = Default.ucd.getCategory(i);
if (cat == Cs || cat == Co) continue;
String code = UTF16.valueOf(i);
String lower = Default.ucd.getCase(i, FULL, LOWER);
String title = Default.ucd.getCase(i, FULL, TITLE);
String upper = Default.ucd.getCase(i, FULL, UPPER);
String fold = Default.ucd.getCase(i, FULL, FOLD);
String decomp = Default.nfkd.normalize(i);
byte script = 0;
if (lower.equals(code) && upper.equals(code) && fold.equals(code)) {
if (decomp contains Lu, Lo, Lt, or Lowercase or Uppercase) script = -4;
else continue;
}
if (script == 0) script = getBestScript(decomp);
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + 128 : script),
new Pair(decomp,
new Integer(i))));
}
PrintWriter output = null;
Iterator it = set.iterator();
int oldScript = -127;
int[] scriptCount = new int[128];
int counter = 0;
int lastPrimary = -1;
String lastSortKey = "\u0000";
Utility.copyTextFile("index.html", true, "CaseCharts\\index.html");
Utility.copyTextFile("charts.css", false, "CaseCharts\\charts.css");
Utility.copyTextFile("norm_help.html", true, "CaseCharts\\help.html");
indexFile = Utility.openPrintWriter("CaseCharts\\index_list.html", false, false);
Utility.appendFile("norm_index_header.html", true, indexFile);
/*
indexFile.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
indexFile.println("<title>UCA Default Collation Table</title>");
indexFile.println("<base target='main'>");
indexFile.println("<style><!-- p { font-size: 90% } --></style>");
indexFile.println("</head><body><h2 align='center'>UCA Default Collation Table</h2>");
indexFile.println("<p align='center'><a href = 'help.html'>Help</a>");
*/
while (it.hasNext()) {
Utility.dot(counter);
Pair p = (Pair) it.next();
int script = ((Integer) p.first).intValue();
int cp = ((Integer)((Pair) p.second).second).intValue();
if (script != oldScript
// && (script != COMMON_SCRIPT && script != INHERITED_SCRIPT)
) {
closeFile(output);
output = null;
oldScript = script;
}
if (output == null) {
output = openFile(0, "CaseCharts\\", script);
output.println("<tr><td class='z'>Code</td><td class='z'>Lower</td><td class='z'>Title</td><td class='z'>Upper</td><td class='z'>Fold</td></tr>");
}
output.println("<tr>");
String prefix;
String code = UTF16.valueOf(cp);
String lower = Default.ucd.getCase(cp, FULL, LOWER);
String title = Default.ucd.getCase(cp, FULL, TITLE);
String upper = Default.ucd.getCase(cp, FULL, UPPER);
String fold = Default.ucd.getCase(cp, FULL, FOLD);
showCell(output, code, "<td class='z' ", "");
prefix = lower.equals(code) ? "<td class='g' " : "<td class='n' ";
showCell(output, lower, prefix, "");
prefix = title.equals(upper) ? "<td class='g' " : "<td class='n' ";
showCell(output, title, prefix, "");
prefix = upper.equals(code) ? "<td class='g' " : "<td class='n' ";
showCell(output, upper, prefix, "");
prefix = (fold.equals(lower)) ? "<td class='g' " : "<td class='n' ";
showCell(output, fold, prefix, "");
output.println("</tr>");
}
SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
closeFile(output);
indexFile.println("<hr><p>Last Modified: " + df.format(new Date()));
indexFile.println("<br>UCD Version: " + Default.ucd.getVersion());
indexFile.println("</p></body></html>");
indexFile.close();
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
* $Date: 2002/03/15 01:57:01 $
* $Revision: 1.8 $
* $Date: 2002/04/23 01:59:16 $
* $Revision: 1.9 $
*
*******************************************************************************
*/
@ -71,6 +71,10 @@ public class WriteCollationData implements UCD_Types {
else if (arg.equalsIgnoreCase("writeNonspacingDifference")) writeNonspacingDifference();
else if (arg.equalsIgnoreCase("WriteCharts")) WriteCharts.test(collator);
else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart();
else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart();
else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(collator);
else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(collator);
else if (arg.equalsIgnoreCase("listCyrillic")) GenOverlap.listCyrillic(collator);
@ -257,7 +261,7 @@ public class WriteCollationData implements UCD_Types {
for (char c = 0; c < 0xFFFF; ++c) {
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
if (0xAC00 <= c && c <= 0xD7A3) continue;
if (normKD.hasDecomposition(c)) {
if (normKD.normalizationDiffers(c)) {
++count;
String decomp = normKD.normalize(c);
datasize += decomp.length();
@ -285,7 +289,7 @@ public class WriteCollationData implements UCD_Types {
for (char c = 0; c < 0xFFFF; ++c) {
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
if (0xAC00 <= c && c <= 0xD7A3) continue;
if (normD.hasDecomposition(c)) {
if (normD.normalizationDiffers(c)) {
++count;
String decomp = normD.normalize(c);
datasize += decomp.length();
@ -475,7 +479,7 @@ public class WriteCollationData implements UCD_Types {
}
log.println("<tr><th>Code</td><th>Sort Key</th><th>Decomposed Sort Key</th><th>Name</th></tr>");
for (char ch = 0; ch < 0xFFFF; ++ch) {
if (!nfkd.hasDecomposition(ch)) continue;
if (!nfkd.normalizationDiffers(ch)) continue;
if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul
String sortKey = collator.getSortKey(String.valueOf(ch), UCA.NON_IGNORABLE, decomposition);
String decompSortKey = collator.getSortKey(nfkd.normalize(ch), UCA.NON_IGNORABLE, decomposition);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java,v $
* $Date: 2002/03/15 01:57:01 $
* $Revision: 1.5 $
* $Date: 2002/04/23 01:59:16 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -19,7 +19,6 @@ import java.io.*;
//import com.ibm.text.unicode.*;
import com.ibm.text.UCD.*;
import com.ibm.text.utility.*;
import com.ibm.icu.text.Normalizer;
public class WriteHTMLCollation implements UCD_Types {
public static final String copyright =
@ -42,14 +41,21 @@ public class WriteHTMLCollation implements UCD_Types {
static PrintWriter log;
static UCD ucd;
static Normalizer nfc, nfd, nfkd, nfkc;
public static void main(String args[]) throws IOException {
checkImplicit();
checkFixes();
String unicodeVersion = "";
System.out.println("Building UCA");
collator = new UCA(null, "");
collator = new UCA(null, unicodeVersion);
nfc = new Normalizer(NFC, unicodeVersion);
nfkc = new Normalizer(NFKC, unicodeVersion);
nfd = new Normalizer(NFD, unicodeVersion);
nfkd = new Normalizer(NFKD, unicodeVersion);
System.out.println("Building UCD data (old)");
//UInfo.init();
@ -68,13 +74,13 @@ public class WriteHTMLCollation implements UCD_Types {
*/
// DO FOLLOWING
writeFractionalUCA("FractionalUCA.txt");
writeConformance("CollationTest_NON_IGNORABLE.txt", UCA.NON_IGNORABLE);
writeConformance("CollationTest_SHIFTED.txt", UCA.SHIFTED);
// SKIP BELOW
if (true) return;
writeConformance("CollationTest_NON_IGNORABLE.txt", UCA.NON_IGNORABLE);
writeConformance("CollationTest_SHIFTED.txt", UCA.SHIFTED);
writeFractionalUCA("FractionalUCA.txt");
writeRules(WITH_NAMES);
writeRules(WITHOUT_NAMES);
@ -99,15 +105,15 @@ public class WriteHTMLCollation implements UCD_Types {
static public void writeCaseExceptions() {
System.err.println("Writing Case Exceptions");
Normalizer NFKC = new Normalizer(Normalizer.NFKC);
//Normalizer NFKC = new Normalizer(Normalizer.NFKC);
for (char a = 0; a < 0xFFFF; ++a) {
if (!ucd.isRepresented(a)) continue;
//if (0xA000 <= a && a <= 0xA48F) continue; // skip YI
String b = Case.fold(a);
String c = NFKC.normalize(b);
String c = nfkc.normalize(b);
String d = Case.fold(c);
String e = NFKC.normalize(d);
String e = nfkc.normalize(d);
if (!e.equals(c)) {
System.out.println(Utility.hex(a) + "; " + Utility.hex(d, " ") + " # " + ucd.getName(a));
/*
@ -125,7 +131,7 @@ public class WriteHTMLCollation implements UCD_Types {
*/
}
String f = Case.fold(e);
String g = NFKC.normalize(f);
String g = nfkc.normalize(f);
if (!f.equals(d) || !g.equals(e)) System.out.println("!!!!!!SKY IS FALLING!!!!!!");
}
}
@ -269,7 +275,7 @@ public class WriteHTMLCollation implements UCD_Types {
static void checkBadDecomps(int strength, boolean decomposition) {
int oldStrength = collator.getStrength();
collator.setStrength(strength);
Normalizer nfkd = new Normalizer(Normalizer.NFKD);
//Normalizer nfkd = new Normalizer(Normalizer.NFKD);
if (strength == 1) {
log.println("<h2>3. Primaries Incompatible with Decompositions</h2><table border='1'>");
} else {
@ -277,7 +283,7 @@ public class WriteHTMLCollation implements UCD_Types {
}
log.println("<tr><th>Code</td><th>Sort Key</th><th>Decomposed Sort Key</th><th>Name</th></tr>");
for (char ch = 0; ch < 0xFFFF; ++ch) {
if (!nfkd.hasDecomposition(ch)) continue;
if (!nfkd.normalizationDiffers(ch)) continue;
if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul
String sortKey = collator.getSortKey(String.valueOf(ch), UCA.NON_IGNORABLE, decomposition);
String decompSortKey = collator.getSortKey(nfkd.normalize(ch), UCA.NON_IGNORABLE, decomposition);
@ -431,11 +437,11 @@ public class WriteHTMLCollation implements UCD_Types {
log.println("compressed: " + comp);
}
log.println("Ken's : " + kenStr);
String nfkd = NFKD.normalize(s);
log.println("NFKD : " + ucd.getCodeAndName(nfkd));
String nfd = NFD.normalize(s);
if (!nfd.equals(nfkd)) {
log.println("NFD : " + ucd.getCodeAndName(nfd));
String nfkdstr = nfkd.normalize(s);
log.println("NFKD : " + ucd.getCodeAndName(nfkdstr));
String nfdstr = nfd.normalize(s);
if (!nfdstr.equals(nfkdstr)) {
log.println("NFD : " + ucd.getCodeAndName(nfdstr));
}
//kenCLen = collator.getCEs(decomp, true, kenComp);
//log.println("decomp ce: " + collator.ceToString(kenComp, kenCLen));
@ -456,7 +462,7 @@ public class WriteHTMLCollation implements UCD_Types {
static final byte getDecompType(int cp) {
byte result = ucd.getDecompositionType(cp);
if (result == ucd.CANONICAL) {
String d = NFD.normalize((char)cp); // TODO
String d = nfd.normalize((char)cp); // TODO
for (int i = 0; i < d.length(); ++i) {
byte t = ucd.getDecompositionType(d.charAt(i));
if (t > ucd.CANONICAL) return t;
@ -517,7 +523,7 @@ public class WriteHTMLCollation implements UCD_Types {
int type = getDecompType(s.charAt(0));
char ch = s.charAt(0);
String decomp = NFKD.normalize(s);
String decomp = nfkd.normalize(s);
int len = 0;
int markLen = collator.getCEs(decomp, true, markCes);
if (compress) markLen = kenCompress(markCes, markLen);
@ -741,8 +747,8 @@ public class WriteHTMLCollation implements UCD_Types {
return result.toString();
}
static Normalizer NFKD = new Normalizer(Normalizer.NFKD);
static Normalizer NFD = new Normalizer(Normalizer.NFD);
//static Normalizer NFKD = new Normalizer(Normalizer.NFKD);
//static Normalizer NFD = new Normalizer(Normalizer.NFD);
static int variableHigh = 0;
static final int COMMON = 5;
@ -1112,7 +1118,7 @@ public class WriteHTMLCollation implements UCD_Types {
// b. toSmallKana(NFKD(x)) != x.
static final boolean needsCaseBit(String x) {
String s = NFKD.normalize(x);
String s = nfkd.normalize(x);
if (!ucd.getCase(s, FULL, LOWER).equals(s)) return true;
if (!toSmallKana(s).equals(s)) return true;
return false;
@ -1616,8 +1622,8 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
static final char MARK2 = '\u0002';
//Normalizer normalizer = new Normalizer(Normalizer.NFC, true);
static Normalizer toC = new Normalizer(Normalizer.NFC);
static Normalizer toD = new Normalizer(Normalizer.NFD);
//static Normalizer toC = new Normalizer(Normalizer.NFC);
//static Normalizer toD = new Normalizer(Normalizer.NFD);
static TreeMap MismatchedC = new TreeMap();
static TreeMap MismatchedN = new TreeMap();
static TreeMap MismatchedD = new TreeMap();
@ -1631,7 +1637,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
static void addString(String ch, byte option) {
String colDbase = collator.getSortKey(ch, option, true);
String colNbase = collator.getSortKey(ch, option, false);
String colCbase = collator.getSortKey(toC.normalize(ch), option, false);
String colCbase = collator.getSortKey(nfc.normalize(ch), option, false);
if (!colNbase.equals(colCbase)) {
/*System.out.println(Utility.hex(ch));
System.out.println(printableKey(colNbase));
@ -1790,7 +1796,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
}
static void showLine(int count, String ch, String keyD, String keyN) {
String decomp = toD.normalize(ch);
String decomp = nfd.normalize(ch);
if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + Utility.hex(decomp, " ") + "> ";
log.println("<tr><td>" + count + "</td><td>"
+ Utility.hex(ch, " ")
@ -1831,7 +1837,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
String ch = (String)it.next();
String MN = (String)MismatchedN.get(ch);
String MC = (String)MismatchedC.get(ch);
String chInC = toC.normalize(ch);
String chInC = nfc.normalize(ch);
out.el("tr");
out.el("th").at("rowSpan",2).at("align","right").tx16(ch).tx(' ').tx(ucd.getName(ch));
out.el("br").cl().tx("NFC=").tx16(chInC).cl();
@ -1859,7 +1865,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
static void showDiff(boolean showName, boolean firstColumn, int line, Object chobj) {
String ch = chobj.toString();
String decomp = toD.normalize(ch);
String decomp = nfd.normalize(ch);
if (showName) {
if (ch.equals(decomp)) {
log.println(//title + counter + " "

View file

@ -1,4 +1,5 @@
td { border: 1 solid #0000FF; color: #000000; background-color: #FFFFFF; font-size: 120%; text-align: Center; vertical-align: top; width: 48px }
td { border: 1 solid #0000FF; color: #000000; background-color: #FFFFFF;
font-size: 120%; text-align: Center; vertical-align: top; width: 48px }
td.p { color: #000000; background-color: #7777FF }
td.s { color: #000000; background-color: #BBBBFF }
td.t { color: #000000; background-color: #DDDDFF }
@ -9,4 +10,8 @@ td.et { color: #000000; background-color: #FF9999 }
td.eq { color: #000000; background-color: #FFBBBB }
th { vertical-align: top; font-weight: bold }
th.x { vertical-align: top; font-weight: regular; text-align: Left }
tt { font-size: 50% }
tt { font-size: 50% }
td.g { font-size: 120%; text-align: Center; width: 72px; color: #808080; }
td.n { font-size: 120%; text-align: Center; width: 72px; color: #000000; background-color: #CCCCFF; }
td.z { font-size: 120%; text-align: Center; width: 72px; font-weight: bold; background-color: #EEEEEE; }

View file

@ -6,7 +6,7 @@
<meta name="ProgId" content="FrontPage.Editor.Document">
</head>
<frameset rows="192,*">
<frameset cols="192,*">
<frame name="header" src="index_list.html" target="main" scrolling="auto">
<frame name="main" src="help.html" target="main" scrolling="auto">
<noframes>

View file

@ -3,54 +3,33 @@
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta http-equiv="Content-Language" content="en-us">
<meta name="VI60_defaultClientScript" content="JavaScript">
<meta name="GENERATOR" content="Microsoft FrontPage 4.0">
<meta name="keywords" content="Basic">
<meta name="ProgId" content="FrontPage.Editor.Document">
<title>UCA Default Collation Table</title>
<base target="main">
<style><!-- p { font-size: 90% }
<title>UCA Chart</title>
<style><!--
p { font-size: 90% }
--></style>
<base target="main">
<link rel="stylesheet" type="text/css"
href="http://www.unicode.org/webscripts/standard_styles.css">
<script language="Javascript" type="text/javascript"
src="http://www.unicode.org/webscripts/commonHeader.js"></script>
</head>
<body>
<table cellspacing="0" cellpadding="0" width="100%" border="0">
<tbody>
<tr>
<td colspan="2">
<table cellspacing="0" cellpadding="0" width="100%" border="0">
<tbody>
<tr>
<td class="icon"><a href="http://www.unicode.org/"><img
alt="[Unicode]"
src="http://www.unicode.org/webscripts/logo60s2.gif"
align="middle" border="0" width="34" height="33"></a>&nbsp;&nbsp;<a
class="bar" href="http://www.unicode.org/charts"
target="_parent"><font size="3">Charts</font></a></td>
<td class="bar"><a class="bar" href="http://www.unicode.org"
target="_parent">Home</a> | <a class="bar"
href="http://www.unicode.org/sitemap/" target="_parent">Site Map</a>
| <a class="bar" href="http://www.unicode.org/search"
target="_parent">Search </a><script
language="Javascript"
src="http://www.unicode.org/webscripts/commonSearch.js"
type="text/javascript"></script>
<NOSCRIPT>
<a href="http://www.unicode.org/webscripts/quick_links.html"
class="bar" target="_blank">Goto</a></NOSCRIPT>
</td>
</tr>
</tbody>
</table>
</td>
</tr>
</tbody>
<table width="100%" cellpadding="0" cellspacing="0" border="0">
<tr>
<td colspan="2">
<table width="100%" border="0" cellpadding="0" cellspacing="0">
<tr>
<td class="icon"><a href="http://www.unicode.org/"><img border="0"
src="http://www.unicode.org/webscripts/logo60s2.gif" align="middle"
alt="[Unicode]" width="34" height="33"></a>&nbsp;&nbsp;<a class="bar"
href="http://www.unicode.org/unicode/faq/"><font size="3">Charts</font></a>
</tr>
</table>
</td>
</tr>
</table>
<div class="body">
<!-- BEGIN CONTENTS -->
<h2 align="center">UCA Default Collation Table</h2>
<h2 align="center">UCA Chart</h2>
<p align="center"><a href="help.html">Help</a>

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/BuildNames.java,v $
* $Date: 2001/12/13 23:35:54 $
* $Revision: 1.3 $
* $Date: 2002/04/23 01:59:12 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
@ -28,7 +28,7 @@ public class BuildNames implements UCD_Types {
public static void main(String[] args) throws IOException {
Main.setUCD();
Default.setUCD();
collectWords();
}
@ -82,8 +82,8 @@ public class BuildNames implements UCD_Types {
int used = 0;
int sum = 0;
for (int i = 0; i < 0x10FFFF; ++i) {
if (Main.ucd.hasComputableName(i)) continue;
String name = transform(Main.ucd.getName(i));
if (Default.ucd.hasComputableName(i)) continue;
String name = transform(Default.ucd.getName(i));
sum += name.length();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
* $Date: 2002/03/20 00:21:43 $
* $Revision: 1.12 $
* $Date: 2002/04/23 01:59:13 $
* $Revision: 1.13 $
*
*******************************************************************************
*/
@ -97,7 +97,7 @@ public final class DerivedProperty implements UCD_Types {
Normalizer nfx;
ExDProp(int i) {
type = DERIVED_NORMALIZATION;
nfx = Main.nf[i];
nfx = Default.nf[i];
name = "Expands_On_" + nfx.getName();
shortName = "XO_" + nfx.getName();
header = "# Derived Property: " + name
@ -121,7 +121,7 @@ public final class DerivedProperty implements UCD_Types {
NF_UnsafeStartProp(int i) {
isStandard = false;
type = DERIVED_NORMALIZATION;
nfx = Main.nf[i];
nfx = Default.nf[i];
name = nfx.getName() + "_UnsafeStart";
shortName = nfx.getName() + "_SS";
header = "# Derived Property: " + name
@ -157,7 +157,7 @@ public final class DerivedProperty implements UCD_Types {
case NFC_TrailingNonZero: bitsets[1] = bitset = new BitSet(); break;
}
filter = bitsets[1] != null;
Main.nfc.getCompositionStatus(bitsets[0], bitsets[1], bitsets[2]);
Default.nfc.getCompositionStatus(bitsets[0], bitsets[1], bitsets[2]);
name = Names[i-NFC_Leading];
shortName = SNames[i-NFC_Leading];
@ -193,17 +193,17 @@ public final class DerivedProperty implements UCD_Types {
isStandard = false;
setValueType(NON_ENUMERATED);
type = DERIVED_NORMALIZATION;
nfx = Main.nf[i];
nfx = Default.nf[i];
name = nfx.getName();
String compName = "the character itself";
if (i == NFKC || i == NFD) {
name += "-NFC";
nfComp = Main.nfc;
nfComp = Default.nfc;
compName = "NFC for the character";
} else if (i == NFKD) {
name += "-NFD";
nfComp = Main.nfd;
nfComp = Default.nfd;
compName = "NFD for the character";
}
header = "# Derived Property: " + name
@ -269,7 +269,7 @@ public final class DerivedProperty implements UCD_Types {
QuickDProp (int i) {
setValueType((i == NFC || i == NFKC) ? ENUMERATED : BINARY);
type = DERIVED_NORMALIZATION;
nfx = Main.nf[i];
nfx = Default.nf[i];
NO = nfx.getName() + "_NO";
MAYBE = nfx.getName() + "_MAYBE";
name = nfx.getName() + "_QuickCheck";
@ -507,8 +507,8 @@ of characters, the first of which has a non-zero combining class.
}
public String getValue(int cp, byte style) {
if (!ucdData.isRepresented(cp)) return "";
String b = Main.nfkc.normalize(fold(cp));
String c = Main.nfkc.normalize(fold(b));
String b = Default.nfkc.normalize(fold(cp));
String c = Default.nfkc.normalize(fold(b));
if (c.equals(b)) return "";
return "FNC; " + Utility.hex(c);
} // default
@ -529,8 +529,8 @@ of characters, the first of which has a non-zero combining class.
}
public String getValue(int cp, byte style) {
if (!ucdData.isRepresented(cp)) return "";
String b = Main.nfc.normalize(fold(cp));
String c = Main.nfc.normalize(fold(b));
String b = Default.nfc.normalize(fold(cp));
String c = Default.nfc.normalize(fold(b));
if (c.equals(b)) return "";
return "FN; " + Utility.hex(c);
} // default
@ -598,8 +598,8 @@ of characters, the first of which has a non-zero combining class.
}
boolean hasValue(int cp) {
if (hasSoftDot(cp)) return true;
if (!Main.nfkd.normalizationDiffers(cp)) return false;
String decomp = Main.nfd.normalize(cp);
if (!Default.nfkd.normalizationDiffers(cp)) return false;
String decomp = Default.nfd.normalize(cp);
boolean ok = false;
for (int i = decomp.length()-1; i >= 0; --i) {
int ch = UTF16.charAt(decomp, i);
@ -698,11 +698,11 @@ of characters, the first of which has a non-zero combining class.
|| ucdData.getBinaryProperty(cp, Other_Lowercase)) return Ll;
if (cat == Lt || cat == Lo || cat == Lm || cat == Nl) return cat;
// if (true) throw new IllegalArgumentException("FIX Main.nf[2]");
// if (true) throw new IllegalArgumentException("FIX Default.nf[2]");
if (!Main.nf[NFKD].normalizationDiffers(cp)) return Lo;
if (!Default.nf[NFKD].normalizationDiffers(cp)) return Lo;
String norm = Main.nf[NFKD].normalize(cp);
String norm = Default.nf[NFKD].normalize(cp);
int cp2;
boolean gotUpper = false;
boolean gotLower = false;
@ -740,8 +740,8 @@ of characters, the first of which has a non-zero combining class.
}
public static void test() {
Main.setUCD();
DerivedProperty dprop = new DerivedProperty(Main.ucd);
Default.setUCD();
DerivedProperty dprop = new DerivedProperty(Default.ucd);
/*
for (int j = 0; j < LIMIT; ++j) {
System.out.println();
@ -752,9 +752,9 @@ of characters, the first of which has a non-zero combining class.
for (int cp = 0xA0; cp < 0xFF; ++cp) {
System.out.println();
System.out.println(Main.ucd.getCodeAndName(cp));
System.out.println(Default.ucd.getCodeAndName(cp));
for (int j = 0; j < DERIVED_PROPERTY_LIMIT; ++j) {
String prop = make(j, Main.ucd).getValue(cp);
String prop = make(j, Default.ucd).getValue(cp);
if (prop.length() != 0) System.out.println("\t" + prop);
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
* $Date: 2002/03/22 22:08:53 $
* $Revision: 1.8 $
* $Date: 2002/04/23 01:59:14 $
* $Revision: 1.9 $
*
*******************************************************************************
*/
@ -40,7 +40,7 @@ public class GenerateCaseFolding implements UCD_Types {
public static void makeCaseFold(boolean normalized) throws java.io.IOException {
PICK_SHORT = NF_CLOSURE = normalized;
Main.setUCD();
Default.setUCD();
log = Utility.openPrintWriter("CaseFoldingLog" + GenerateData.getFileSuffix(true));
System.out.println("Writing Log: " + "CaseFoldingLog" + GenerateData.getFileSuffix(true));
@ -123,15 +123,15 @@ public class GenerateCaseFolding implements UCD_Types {
static void drawLine(PrintWriter out, int ch, String type, String result) {
String comment = "";
if (COMMENT_DIFFS) {
String lower = Main.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
String lower = Default.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
if (!lower.equals(result)) {
String upper = Main.ucd.getCase(UTF16.valueOf(ch), FULL, UPPER);
String lower2 = Main.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
String upper = Default.ucd.getCase(UTF16.valueOf(ch), FULL, UPPER);
String lower2 = Default.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
if (lower.equals(lower2)) {
comment = "[Diff " + Utility.hex(lower, " ") + "] ";
} else {
Utility.fixDot();
System.out.println("PROBLEM WITH: " + Main.ucd.getCodeAndName(ch));
System.out.println("PROBLEM WITH: " + Default.ucd.getCodeAndName(ch));
comment = "[DIFF " + Utility.hex(lower, " ") + ", " + Utility.hex(lower2, " ") + "] ";
}
}
@ -140,7 +140,7 @@ public class GenerateCaseFolding implements UCD_Types {
out.println(Utility.hex(ch)
+ "; " + type
+ "; " + Utility.hex(result, " ")
+ "; # " + comment + Main.ucd.getName(ch));
+ "; # " + comment + Default.ucd.getName(ch));
}
static int probeCh = 0x01f0;
@ -156,7 +156,7 @@ public class GenerateCaseFolding implements UCD_Types {
for (int ch = 0; ch <= 0x10FFFF; ++ch) {
Utility.dot(ch);
//if ((ch & 0x3FF) == 0) System.out.println(Utility.hex(ch));
if (!Main.ucd.isRepresented(ch)) continue;
if (!Default.ucd.isRepresented(ch)) continue;
getClosure(ch, data, full, nfClose);
}
@ -202,7 +202,7 @@ public class GenerateCaseFolding implements UCD_Types {
}
Utility.fixDot();
log.println("Non-Optimal Representative " + message);
log.println(" Rep:\t" + Main.ucd.getCodeAndName(rep));
log.println(" Rep:\t" + Default.ucd.getCodeAndName(rep));
log.println(" Set:\t" + toString(set,true, true));
}
@ -234,13 +234,13 @@ public class GenerateCaseFolding implements UCD_Types {
if (!full) result <<= 8;
String low = lower(upper(s, full), full);
if (s.equals(low)) result |= ISLOWER;
else if (PICK_SHORT && Main.nfd.normalize(s).equals(Main.nfd.normalize(low))) result |= ISLOWER;
else if (PICK_SHORT && Default.nfd.normalize(s).equals(Default.nfd.normalize(low))) result |= ISLOWER;
if (s.equals(Main.nfc.normalize(s))) result |= NFC_FORMAT;
if (s.equals(Default.nfc.normalize(s))) result |= NFC_FORMAT;
if (show) {
Utility.fixDot();
System.out.println(Utility.hex(result) + ", " + Main.ucd.getCodeAndName(s));
System.out.println(Utility.hex(result) + ", " + Default.ucd.getCodeAndName(s));
}
return result;
}
@ -269,28 +269,28 @@ public class GenerateCaseFolding implements UCD_Types {
/*
String
String lower1 = Main.ucd.getLowercase(ch);
String lower2 = Main.ucd.toLowercase(ch,option);
String lower1 = Default.ucd.getLowercase(ch);
String lower2 = Default.ucd.toLowercase(ch,option);
char ch2 = Main.ucd.getLowercase(Main.ucd.getUppercase(ch).charAt(0)).charAt(0);
//String lower1 = String.valueOf(Main.ucd.getLowercase(ch));
//String lower = Main.ucd.toLowercase(ch2,option);
String upper = Main.ucd.toUppercase(ch2,option);
String lowerUpper = Main.ucd.toLowercase(upper,option);
//String title = Main.ucd.toTitlecase(ch2,option);
//String lowerTitle = Main.ucd.toLowercase(upper,option);
char ch2 = Default.ucd.getLowercase(Default.ucd.getUppercase(ch).charAt(0)).charAt(0);
//String lower1 = String.valueOf(Default.ucd.getLowercase(ch));
//String lower = Default.ucd.toLowercase(ch2,option);
String upper = Default.ucd.toUppercase(ch2,option);
String lowerUpper = Default.ucd.toLowercase(upper,option);
//String title = Default.ucd.toTitlecase(ch2,option);
//String lowerTitle = Default.ucd.toLowercase(upper,option);
if (ch != ch2 || lowerUpper.length() != 1 || ch != lowerUpper.charAt(0)) { //
output.println(Utility.hex(ch)
+ "; " + (lowerUpper.equals(lower1) ? "L" : lowerUpper.equals(lower2) ? "S" : "E")
+ "; " + Utility.hex(lowerUpper," ")
+ ";\t#" + Main.ucd.getName(ch)
+ ";\t#" + Default.ucd.getName(ch)
);
//if (!lowerUpper.equals(lower)) {
// output.println("Warning1: " + Utility.hex(lower) + " " + Main.ucd.getName(lower));
// output.println("Warning1: " + Utility.hex(lower) + " " + Default.ucd.getName(lower));
//}
//if (!lowerUpper.equals(lowerTitle)) {
// output.println("Warning2: " + Utility.hex(lowerTitle) + " " + Main.ucd.getName(lowerTitle));
// output.println("Warning2: " + Utility.hex(lowerTitle) + " " + Default.ucd.getName(lowerTitle));
//}
}
*/
@ -322,10 +322,10 @@ public class GenerateCaseFolding implements UCD_Types {
// do funny stuff since we can't modify set while iterating
// We don't do this because if the source is not normalized, we don't want to normalize
if (nfClose) {
if (add(set, Main.nfd.normalize(s), data)) continue main;
if (add(set, Main.nfc.normalize(s), data)) continue main;
if (add(set, Main.nfkd.normalize(s), data)) continue main;
if (add(set, Main.nfkc.normalize(s), data)) continue main;
if (add(set, Default.nfd.normalize(s), data)) continue main;
if (add(set, Default.nfc.normalize(s), data)) continue main;
if (add(set, Default.nfkd.normalize(s), data)) continue main;
if (add(set, Default.nfkc.normalize(s), data)) continue main;
}
if (add(set, lower(s, full), data)) continue main;
if (add(set, title(s, full), data)) continue main;
@ -340,34 +340,34 @@ public class GenerateCaseFolding implements UCD_Types {
return result.replace('\u03C2', '\u03C3'); // HACK for lower
}
// These functions are no longer necessary, since Main.ucd is parameterized,
// These functions are no longer necessary, since Default.ucd is parameterized,
// but it's not worth changing
static String lower2(String s, boolean full) {
/*if (!full) {
if (s.length() != 1) return s;
return Main.ucd.getCase(UTF32.char32At(s,0), SIMPLE, LOWER);
return Default.ucd.getCase(UTF32.char32At(s,0), SIMPLE, LOWER);
}
*/
return Main.ucd.getCase(s, full ? FULL : SIMPLE, LOWER);
return Default.ucd.getCase(s, full ? FULL : SIMPLE, LOWER);
}
static String upper(String s, boolean full) {
/* if (!full) {
if (s.length() != 1) return s;
return Main.ucd.getCase(UTF32.char32At(s,0), FULL, UPPER);
return Default.ucd.getCase(UTF32.char32At(s,0), FULL, UPPER);
}
*/
return Main.ucd.getCase(s, full ? FULL : SIMPLE, UPPER);
return Default.ucd.getCase(s, full ? FULL : SIMPLE, UPPER);
}
static String title(String s, boolean full) {
/*if (!full) {
if (s.length() != 1) return s;
return Main.ucd.getCase(UTF32.char32At(s,0), FULL, TITLE);
return Default.ucd.getCase(UTF32.char32At(s,0), FULL, TITLE);
}
*/
return Main.ucd.getCase(s, full ? FULL : SIMPLE, TITLE);
return Default.ucd.getCase(s, full ? FULL : SIMPLE, TITLE);
}
static boolean add(Set set, String s, Map data) {
@ -406,7 +406,7 @@ public class GenerateCaseFolding implements UCD_Types {
}
first = false;
if (name) {
result += Main.ucd.getCodeAndName(s2);
result += Default.ucd.getCodeAndName(s2);
} else {
result += Utility.hex(s2, " ");
}
@ -416,12 +416,12 @@ public class GenerateCaseFolding implements UCD_Types {
static boolean specialNormalizationDiffers(int ch) {
if (ch == 0x00DF) return true; // es-zed
return Main.nfkd.normalizationDiffers(ch);
return Default.nfkd.normalizationDiffers(ch);
}
static String specialNormalization(String s) {
if (s.equals("\u00DF")) return "ss";
return Main.nfkd.normalize(s);
return Default.nfkd.normalize(s);
}
static boolean isExcluded(int ch) {
@ -431,14 +431,14 @@ public class GenerateCaseFolding implements UCD_Types {
if (0x249C <= ch && ch <= 0x24B5) return true; // skip PARENTHESIZED LATIN SMALL LETTER A..
if (0x20A8 <= ch && ch <= 0x217B) return true; // skip Rupee..
byte type = Main.ucd.getDecompositionType(ch);
byte type = Default.ucd.getDecompositionType(ch);
if (type == COMPAT_SQUARE) return true;
//if (type == COMPAT_UNSPECIFIED) return true;
return false;
}
static void generateSpecialCasing(boolean normalize) throws IOException {
Main.setUCD();
Default.setUCD();
Map sorted = new TreeMap();
String suffix2 = "";
@ -448,19 +448,19 @@ public class GenerateCaseFolding implements UCD_Types {
for (int ch = 0; ch <= 0x10FFFF; ++ch) {
Utility.dot(ch);
if (!Main.ucd.isRepresented(ch)) continue;
if (!Default.ucd.isRepresented(ch)) continue;
if (!specialNormalizationDiffers(ch)) continue;
String lower = Main.nfc.normalize(Main.ucd.getCase(ch, SIMPLE, LOWER));
String upper = Main.nfc.normalize(Main.ucd.getCase(ch, SIMPLE, UPPER));
String title = Main.nfc.normalize(Main.ucd.getCase(ch, SIMPLE, TITLE));
String lower = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, LOWER));
String upper = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, UPPER));
String title = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, TITLE));
String chstr = UTF16.valueOf(ch);
String decomp = specialNormalization(chstr);
String flower = Main.nfc.normalize(Main.ucd.getCase(decomp, SIMPLE, LOWER));
String fupper = Main.nfc.normalize(Main.ucd.getCase(decomp, SIMPLE, UPPER));
String ftitle = Main.nfc.normalize(Main.ucd.getCase(decomp, SIMPLE, TITLE));
String flower = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, LOWER));
String fupper = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, UPPER));
String ftitle = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, TITLE));
String base = decomp;
String blower = specialNormalization(lower);
@ -468,42 +468,42 @@ public class GenerateCaseFolding implements UCD_Types {
String btitle = specialNormalization(title);
if (true) {
flower = Main.nfc.normalize(flower);
fupper = Main.nfc.normalize(fupper);
ftitle = Main.nfc.normalize(ftitle);
base = Main.nfc.normalize(base);
blower = Main.nfc.normalize(blower);
bupper = Main.nfc.normalize(bupper);
btitle = Main.nfc.normalize(btitle);
flower = Default.nfc.normalize(flower);
fupper = Default.nfc.normalize(fupper);
ftitle = Default.nfc.normalize(ftitle);
base = Default.nfc.normalize(base);
blower = Default.nfc.normalize(blower);
bupper = Default.nfc.normalize(bupper);
btitle = Default.nfc.normalize(btitle);
}
if (ch == CHECK_CHAR) {
System.out.println("Code: " + Main.ucd.getCodeAndName(ch));
System.out.println("Decomp: " + Main.ucd.getCodeAndName(decomp));
System.out.println("Base: " + Main.ucd.getCodeAndName(base));
System.out.println("SLower: " + Main.ucd.getCodeAndName(lower));
System.out.println("FLower: " + Main.ucd.getCodeAndName(flower));
System.out.println("BLower: " + Main.ucd.getCodeAndName(blower));
System.out.println("STitle: " + Main.ucd.getCodeAndName(title));
System.out.println("FTitle: " + Main.ucd.getCodeAndName(ftitle));
System.out.println("BTitle: " + Main.ucd.getCodeAndName(btitle));
System.out.println("SUpper: " + Main.ucd.getCodeAndName(upper));
System.out.println("FUpper: " + Main.ucd.getCodeAndName(fupper));
System.out.println("BUpper: " + Main.ucd.getCodeAndName(bupper));
System.out.println("Code: " + Default.ucd.getCodeAndName(ch));
System.out.println("Decomp: " + Default.ucd.getCodeAndName(decomp));
System.out.println("Base: " + Default.ucd.getCodeAndName(base));
System.out.println("SLower: " + Default.ucd.getCodeAndName(lower));
System.out.println("FLower: " + Default.ucd.getCodeAndName(flower));
System.out.println("BLower: " + Default.ucd.getCodeAndName(blower));
System.out.println("STitle: " + Default.ucd.getCodeAndName(title));
System.out.println("FTitle: " + Default.ucd.getCodeAndName(ftitle));
System.out.println("BTitle: " + Default.ucd.getCodeAndName(btitle));
System.out.println("SUpper: " + Default.ucd.getCodeAndName(upper));
System.out.println("FUpper: " + Default.ucd.getCodeAndName(fupper));
System.out.println("BUpper: " + Default.ucd.getCodeAndName(bupper));
}
// presumably if there is a single code point, it would already be in the simple mappings
if (UTF16.countCodePoint(flower) == 1 && UTF16.countCodePoint(fupper) == 1
&& UTF16.countCodePoint(title) == 1) {
if (ch == CHECK_CHAR) System.out.println("Skipping single code point: " + Main.ucd.getCodeAndName(ch));
if (ch == CHECK_CHAR) System.out.println("Skipping single code point: " + Default.ucd.getCodeAndName(ch));
continue;
}
// if there is no change from the base, skip
if (flower.equals(base) && fupper.equals(base) && ftitle.equals(base)) {
if (ch == CHECK_CHAR) System.out.println("Skipping equals base: " + Main.ucd.getCodeAndName(ch));
if (ch == CHECK_CHAR) System.out.println("Skipping equals base: " + Default.ucd.getCodeAndName(ch));
continue;
}
@ -516,11 +516,11 @@ public class GenerateCaseFolding implements UCD_Types {
// if there are no changes from the original, or the expanded original, skip
if (flower.equals(lower) && fupper.equals(upper) && ftitle.equals(title)) {
if (ch == CHECK_CHAR) System.out.println("Skipping unchanged: " + Main.ucd.getCodeAndName(ch));
if (ch == CHECK_CHAR) System.out.println("Skipping unchanged: " + Default.ucd.getCodeAndName(ch));
continue;
}
String name = Main.ucd.getName(ch);
String name = Default.ucd.getName(ch);
int order = name.equals("LATIN SMALL LETTER SHARP S") ? 1
: ch == 0x130 ? 2
@ -531,16 +531,16 @@ public class GenerateCaseFolding implements UCD_Types {
: UTF16.countCodePoint(fupper) == 2 ? 7
: 8;
if (ch == CHECK_CHAR) System.out.println("Order: " + order + " for " + Main.ucd.getCodeAndName(ch));
if (ch == CHECK_CHAR) System.out.println("Order: " + order + " for " + Default.ucd.getCodeAndName(ch));
// HACK
boolean denormalize = !normalize && order != 6 && order != 7;
String mapping = Utility.hex(ch)
+ "; " + Utility.hex(flower.equals(base) ? chstr : denormalize ? Main.nfd.normalize(flower) : flower)
+ "; " + Utility.hex(ftitle.equals(base) ? chstr : denormalize ? Main.nfd.normalize(ftitle) : ftitle)
+ "; " + Utility.hex(fupper.equals(base) ? chstr : denormalize ? Main.nfd.normalize(fupper) : fupper)
+ "; # " + Main.ucd.getName(ch);
+ "; " + Utility.hex(flower.equals(base) ? chstr : denormalize ? Default.nfd.normalize(flower) : flower)
+ "; " + Utility.hex(ftitle.equals(base) ? chstr : denormalize ? Default.nfd.normalize(ftitle) : ftitle)
+ "; " + Utility.hex(fupper.equals(base) ? chstr : denormalize ? Default.nfd.normalize(fupper) : fupper)
+ "; # " + Default.ucd.getName(ch);
// special exclusions
if (isExcluded(ch)) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
* $Date: 2002/03/20 00:21:42 $
* $Revision: 1.16 $
* $Date: 2002/04/23 01:59:14 $
* $Revision: 1.17 $
*
*******************************************************************************
*/
@ -32,7 +32,7 @@ public class GenerateData implements UCD_Types {
//static UnifiedBinaryProperty ubp
public static void checkHoffman(String test) {
String result = Main.nfkc.normalize(test);
String result = Default.nfkc.normalize(test);
System.out.println(Utility.hex(test) + " => " + Utility.hex(result));
System.out.println();
show(test, 0);
@ -44,10 +44,10 @@ public class GenerateData implements UCD_Types {
int cp;
for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
cp = UTF32.char32At(s, i);
String cc = " " + Main.ucd.getCombiningClass(cp);
String cc = " " + Default.ucd.getCombiningClass(cp);
cc = Utility.repeat(" ", 4 - cc.length()) + cc;
System.out.println(Utility.repeat(" ", indent) + Main.ucd.getCode(cp) + cc + " " + Main.ucd.getName(cp));
String decomp = Main.nfkc.normalize(cp);
System.out.println(Utility.repeat(" ", indent) + Default.ucd.getCode(cp) + cc + " " + Default.ucd.getName(cp));
String decomp = Default.nfkc.normalize(cp);
if (!decomp.equals(UTF32.valueOf32(cp))) {
show(decomp, indent + 4);
}
@ -98,14 +98,14 @@ public class GenerateData implements UCD_Types {
}
public static String getFileSuffix(boolean withDVersion) {
return "-" + Main.ucd.getVersion()
return "-" + Default.ucd.getVersion()
+ ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "")
+ ".txt";
}
public static void generateDerived (byte type, boolean checkTypeAndStandard, int headerChoice, String directory, String fileName) throws IOException {
Main.setUCD();
Default.setUCD();
String newFile = directory + fileName + getFileSuffix(true);
System.out.println("New File: " + newFile);
PrintWriter output = Utility.openPrintWriter(newFile);
@ -114,7 +114,7 @@ public class GenerateData implements UCD_Types {
doHeader(fileName + getFileSuffix(false), output, headerChoice);
for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) {
UnicodeProperty up = DerivedProperty.make(i, Main.ucd);
UnicodeProperty up = DerivedProperty.make(i, Default.ucd);
boolean keepGoing = true;
if (!up.isStandard()) keepGoing = false;
if ((up.getType() & type) == 0) keepGoing = false;
@ -125,7 +125,7 @@ public class GenerateData implements UCD_Types {
System.out.print('.');
output.println(HORIZONTAL_LINE);
output.println();
new DerivedPropertyLister(Main.ucd, i, output).print();
new DerivedPropertyLister(Default.ucd, i, output).print();
output.flush();
}
output.close();
@ -134,27 +134,27 @@ public class GenerateData implements UCD_Types {
/*
public static void listStrings(String file, int type, int subtype) throws IOException {
Main.ucd = UCD.make("3.1.0");
Default.ucd = UCD.make("3.1.0");
UCD ucd30 = UCD.make("3.0.0");
PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file));
for (int i = 0; i < 0x10FFFF; ++i) {
if ((i & 0xFFF) == 0) System.out.println("# " + i);
if (!Main.ucd.isRepresented(i)) continue;
if (!Default.ucd.isRepresented(i)) continue;
if (ucd30.isRepresented(i)) continue;
String string = "";
switch(type) {
case 0: string = Main.ucd.getSimpleLowercase(i);
case 0: string = Default.ucd.getSimpleLowercase(i);
}
if (UTF32.length32(string) == 1 && UTF32.char32At(string,0) == i) continue;
output.println(Utility.hex(i) + "; C; " + Utility.hex(string) + "; # " + Main.ucd.getName(i));
output.println(Utility.hex(i) + "; C; " + Utility.hex(string) + "; # " + Default.ucd.getName(i));
}
output.close();
}
*/
public static void generateCompExclusions() throws IOException {
Main.setUCD();
Default.setUCD();
String newFile = "DerivedData/CompositionExclusions" + getFileSuffix(true);
PrintWriter output = Utility.openPrintWriter(newFile);
String mostRecent = generateBat("DerivedData/", "CompositionExclusions", getFileSuffix(true));
@ -164,7 +164,7 @@ public class GenerateData implements UCD_Types {
output.println("#");
output.println("# This file lists the characters from the UAX #15 Composition Exclusion Table.");
output.println("#");
if (Main.ucd.getVersion().equals("3.2.0")) {
if (Default.ucd.getVersion().equals("3.2.0")) {
output.println("# The format of the comments in this file has been updated since the last version,");
output.println("# CompositionExclusions-3.txt. The only substantive change to this file between that");
output.println("# version and this one is the addition of U+2ADC FORKING.");
@ -226,7 +226,7 @@ public class GenerateData implements UCD_Types {
public CompLister(PrintWriter output, int type) {
this.output = output;
ucdData = Main.ucd;
ucdData = Default.ucd;
oldUCD = UCD.make("3.0.0");
// showOnConsole = true;
alwaysBreaks = type <= 2; // CHANGE LATER
@ -269,13 +269,13 @@ public class GenerateData implements UCD_Types {
// find properties
Main.setUCD();
Default.setUCD();
int count = 0;
UnicodeProperty[] props = new UnicodeProperty[500];
for (int i = 1; i < LIMIT_ENUM; ++i) { // || iType == SCRIPT
int iType = i & 0xFF00;
if (iType == JOINING_GROUP || iType == AGE || iType == COMBINING_CLASS) continue;
UnicodeProperty up = UnifiedBinaryProperty.make(i, Main.ucd);
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
if (up == null) continue;
if (!up.isStandard()) {
System.out.println("Skipping " + up.getName() + "; not standard");
@ -312,9 +312,9 @@ public class GenerateData implements UCD_Types {
int total = 0;
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
Utility.dot(cp);
int cat = Main.ucd.getCategory(cp);
int cat = Default.ucd.getCategory(cp);
if (cat == UNASSIGNED || cat == PRIVATE_USE || cat == SURROGATE) continue;
if (!Main.ucd.isAllocated(cp)) continue;
if (!Default.ucd.isAllocated(cp)) continue;
for (int i = 0; i < count; ++i) {
UnicodeProperty up = props[i];
@ -326,7 +326,7 @@ public class GenerateData implements UCD_Types {
if (!map.containsKey(probe)) {
map.put(probe.clone(), new Integer(cp));
Utility.fixDot();
// System.out.println("Set Size: " + map.size() + ", total: " + total + ", " + Main.ucd.getCodeAndName(cp));
// System.out.println("Set Size: " + map.size() + ", total: " + total + ", " + Default.ucd.getCodeAndName(cp));
}
}
@ -338,7 +338,7 @@ public class GenerateData implements UCD_Types {
while (it.hasNext()) {
BitSet probe2 = (BitSet) it.next();
int ch = ((Integer) map.get(probe2)).intValue();
output.println(Main.ucd.getCodeAndName(ch));
output.println(Default.ucd.getCodeAndName(ch));
for (int i = 0; i < count; ++i) {
if (!probe2.get(i)) continue;
output.print(" " + props[i].getFullName(SHORT));
@ -350,10 +350,10 @@ public class GenerateData implements UCD_Types {
public static void listDifferences() throws IOException {
Main.setUCD();
Default.setUCD();
PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + getFileSuffix(true));
output.println("# Listing of relationships among properties, suitable for analysis by spreadsheet");
output.println("# Generated for " + Main.ucd.getVersion());
output.println("# Generated for " + Default.ucd.getVersion());
output.println(generateDateLine());
output.println("# P1 P2 R(P1,P2) C(P1&P2) C(P1-P2) C(P2-P1)");
@ -361,7 +361,7 @@ public class GenerateData implements UCD_Types {
for (int i = 1; i < LIMIT_ENUM; ++i) {
int iType = i & 0xFF00;
if (iType == JOINING_GROUP || iType == AGE || iType == COMBINING_CLASS || iType == SCRIPT) continue;
UnicodeProperty upi = UnifiedBinaryProperty.make(i, Main.ucd);
UnicodeProperty upi = UnifiedBinaryProperty.make(i, Default.ucd);
if (upi == null) continue;
if (!upi.isStandard()) {
System.out.println("Skipping " + upi.getName() + "; not standard");
@ -385,7 +385,7 @@ public class GenerateData implements UCD_Types {
int jType = j & 0xFF00;
if (jType == JOINING_GROUP || jType == AGE || jType == COMBINING_CLASS || jType == SCRIPT
|| (jType == iType && jType != BINARY_PROPERTIES)) continue;
UnicodeProperty upj = UnifiedBinaryProperty.make(j, Main.ucd);
UnicodeProperty upj = UnifiedBinaryProperty.make(j, Default.ucd);
if (upj == null) continue;
if (!upj.isStandard()) continue;
if (upj.getValueType() < BINARY) continue;
@ -405,9 +405,9 @@ public class GenerateData implements UCD_Types {
int bothCount = 0, i_jPropCount = 0, j_iPropCount = 0, iCount = 0, jCount = 0;
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
int cat = Main.ucd.getCategory(cp);
int cat = Default.ucd.getCategory(cp);
if (cat == UNASSIGNED || cat == PRIVATE_USE || cat == SURROGATE) continue;
if (!Main.ucd.isAllocated(cp)) continue;
if (!Default.ucd.isAllocated(cp)) continue;
boolean iProp = upi.hasValue(cp);
boolean jProp = upj.hasValue(cp);
@ -444,7 +444,7 @@ public class GenerateData implements UCD_Types {
}
public static void generatePropertyAliases() throws IOException {
Main.setUCD();
Default.setUCD();
String prop = "";
String propAbb = "";
String value = "";
@ -455,7 +455,7 @@ public class GenerateData implements UCD_Types {
Set accumulation = new TreeSet(java.text.Collator.getInstance());
/*
BufferedReader blocks = Utility.openUnicodeFile("Blocks", Main.ucd.getVersion());
BufferedReader blocks = Utility.openUnicodeFile("Blocks", Default.ucd.getVersion());
String[] parts = new String[10];
while (true) {
String line = blocks.readLine();
@ -509,7 +509,7 @@ public class GenerateData implements UCD_Types {
if (type == AGE) continue;
if (i == (BINARY_PROPERTIES | CaseFoldTurkishI)) continue;
UnicodeProperty up = UnifiedBinaryProperty.make(i, Main.ucd);
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
if (up == null) continue;
if (!up.isStandard()) continue;
@ -547,7 +547,7 @@ public class GenerateData implements UCD_Types {
if (type == SCRIPT) {
value = Main.ucd.getCase(value, FULL, TITLE);
value = Default.ucd.getCase(value, FULL, TITLE);
}
valueAbb = up.getValue(SHORT);
@ -774,7 +774,7 @@ public class GenerateData implements UCD_Types {
// static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1;
public static String generateBat(String directory, String fileRoot, String suffix) throws IOException {
String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Main.ucd.getVersion(), true, true);
String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd.getVersion(), true, true);
if (mostRecent != null) {
generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix,
mostRecent, directory + fileRoot + suffix);
@ -783,7 +783,7 @@ public class GenerateData implements UCD_Types {
return null;
}
String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Main.ucd.getVersion(), false, true);
String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd.getVersion(), false, true);
if (lessRecent != null && !mostRecent.equals(lessRecent)) {
generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix + "-OLDER",
lessRecent, directory + fileRoot + suffix);
@ -809,7 +809,7 @@ public class GenerateData implements UCD_Types {
public static void generateVerticalSlice(int startEnum, int endEnum,
int headerChoice, String directory, String file) throws IOException {
Main.setUCD();
Default.setUCD();
String newFile = directory + file + getFileSuffix(true);
PrintWriter output = Utility.openPrintWriter(newFile);
String mostRecent = generateBat(directory, file, getFileSuffix(true));
@ -817,7 +817,7 @@ public class GenerateData implements UCD_Types {
doHeader(file + getFileSuffix(false), output, headerChoice);
int last = -1;
for (int i = startEnum; i < endEnum; ++i) {
UnicodeProperty up = UnifiedBinaryProperty.make(i, Main.ucd);
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
if (up == null) continue;
if (i == DECOMPOSITION_TYPE || i == NUMERIC_TYPE
@ -845,7 +845,7 @@ public class GenerateData implements UCD_Types {
}
System.out.print(".");
if (DEBUG) System.out.println(i);
new MyPropertyLister(Main.ucd, i, output).print();
new MyPropertyLister(Default.ucd, i, output).print();
output.flush();
}
if (endEnum == LIMIT_ENUM) {
@ -859,13 +859,13 @@ public class GenerateData implements UCD_Types {
Set floatSet = new TreeSet();
for (int i = 0; i < 0x10FFFF; ++i) {
float nv = Main.ucd.getNumericValue(i);
float nv = Default.ucd.getNumericValue(i);
if (Float.isNaN(nv)) continue;
floatSet.add(new Float(nv));
}
Iterator it = floatSet.iterator();
while(it.hasNext()) {
new MyFloatLister(Main.ucd, ((Float)it.next()).floatValue(), output).print();
new MyFloatLister(Default.ucd, ((Float)it.next()).floatValue(), output).print();
output.println();
System.out.print(".");
}
@ -878,7 +878,7 @@ public class GenerateData implements UCD_Types {
}
static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException {
Main.setUCD();
Default.setUCD();
String newFile = directory + fileName + getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, true, false);
String mostRecent = generateBat(directory, fileName, getFileSuffix(true));
@ -936,8 +936,8 @@ public class GenerateData implements UCD_Types {
for (int ch = 0; ch < 0x10FFFF; ++ch) {
Utility.dot(ch);
if (!Main.ucd.isAssigned(ch)) continue;
if (Main.ucd.isPUA(ch)) continue;
if (!Default.ucd.isAssigned(ch)) continue;
if (Default.ucd.isPUA(ch)) continue;
String cc = UTF32.valueOf32(ch);
writeLine(cc,log, true);
}
@ -947,9 +947,9 @@ public class GenerateData implements UCD_Types {
for (int ch = 0; ch < 0x10FFFF; ++ch) {
Utility.dot(ch);
if (!Main.ucd.isAssigned(ch)) continue;
if (Main.ucd.isPUA(ch)) continue;
int cc = Main.ucd.getCombiningClass(ch);
if (!Default.ucd.isAssigned(ch)) continue;
if (Default.ucd.isPUA(ch)) continue;
int cc = Default.ucd.getCombiningClass(ch);
if (example[cc] == null) example[cc] = UTF32.valueOf32(ch);
}
@ -963,9 +963,9 @@ public class GenerateData implements UCD_Types {
for (int ch = 0; ch < 0x10FFFF; ++ch) {
Utility.dot(ch);
if (!Main.ucd.isAssigned(ch)) continue;
if (Main.ucd.isPUA(ch)) continue;
short c = Main.ucd.getCombiningClass(ch);
if (!Default.ucd.isAssigned(ch)) continue;
if (Default.ucd.isPUA(ch)) continue;
short c = Default.ucd.getCombiningClass(ch);
if (c == 0) continue;
// add character with higher class, same class, lower class
@ -1012,19 +1012,19 @@ public class GenerateData implements UCD_Types {
}
static void writeLine(String cc, PrintWriter log, boolean check) {
String c = Main.nfc.normalize(cc);
String d = Main.nfd.normalize(cc);
String kc = Main.nfkc.normalize(cc);
String kd = Main.nfkd.normalize(cc);
String c = Default.nfc.normalize(cc);
String d = Default.nfd.normalize(cc);
String kc = Default.nfkc.normalize(cc);
String kd = Default.nfkd.normalize(cc);
if (check & cc.equals(c) && cc.equals(d) && cc.equals(kc) && cc.equals(kd)) return;
// consistency check
String dc = Main.nfd.normalize(c);
String dkc = Main.nfd.normalize(kc);
String dc = Default.nfd.normalize(c);
String dkc = Default.nfd.normalize(kc);
if (!dc.equals(d) || !dkc.equals(kd)) {
System.out.println("Danger Will Robinson!");
Normalizer.SHOW_PROGRESS = true;
d = Main.nfd.normalize(cc);
d = Default.nfd.normalize(cc);
}
// printout
@ -1033,7 +1033,7 @@ public class GenerateData implements UCD_Types {
+ Utility.hex(kc," ") + ";" + Utility.hex(kd," ")
+ "; # ("
+ comma(cc) + "; " + comma(c) + "; " + comma(d) + "; " + comma(kc) + "; " + comma(kd) + "; "
+ ") " + Main.ucd.getName(cc));
+ ") " + Default.ucd.getName(cc));
}
static StringBuffer commaResult = new StringBuffer();
@ -1044,7 +1044,7 @@ public class GenerateData implements UCD_Types {
int cp;
for (int i = 0; i < s.length(); i += UTF32.count16(i)) {
cp = UTF32.char32At(s, i);
if (Main.ucd.getCategory(cp) == Mn) commaResult.append('\u25CC');
if (Default.ucd.getCategory(cp) == Mn) commaResult.append('\u25CC');
UTF32.append32(commaResult, cp);
}
return commaResult.toString();
@ -1078,7 +1078,7 @@ public class GenerateData implements UCD_Types {
};
static final void generateAge(String directory, String filename) throws IOException {
Main.setUCD();
Default.setUCD();
String newFile = directory + filename + getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile);
String mostRecent = generateBat(directory, filename, getFileSuffix(true));
@ -1174,32 +1174,32 @@ public class GenerateData implements UCD_Types {
}
public static void listCombiningAccents() throws IOException {
Main.setUCD();
Default.setUCD();
PrintWriter log = Utility.openPrintWriter("ListAccents" + getFileSuffix(true));
Set set = new TreeSet();
Set set2 = new TreeSet();
for (int i = 0; i < 0x10FFFF; ++i) {
Utility.dot(i);
if (!Main.ucd.isRepresented(i)) continue;
if (!Default.ucd.isRepresented(i)) continue;
if (!Main.nfd.normalizationDiffers(i)) {
if (Main.ucd.getScript(i) == LATIN_SCRIPT) {
if (!Default.nfd.normalizationDiffers(i)) {
if (Default.ucd.getScript(i) == LATIN_SCRIPT) {
int cp = i;
String hex = "u" + Utility.hex(cp, 4);
set.add("# yyy $x <> \\" + hex + " ; # " + Main.ucd.getName(cp));
set.add("# yyy $x <> \\" + hex + " ; # " + Default.ucd.getName(cp));
}
continue;
}
String decomp = Main.nfd.normalize(i);
String decomp = Default.nfd.normalize(i);
int j;
for (j = 0; j < decomp.length(); j += UTF16.getCharCount(i)) {
int cp = UTF16.charAt(decomp, j);
byte cat = Main.ucd.getCategory(cp);
byte cat = Default.ucd.getCategory(cp);
if (cat != Mn) continue;
String hex = "u" + Utility.hex(cp, 4);
set.add("# xxx $x <> \\" + hex + " ; # " + Main.ucd.getName(cp));
set.add("# xxx $x <> \\" + hex + " ; # " + Default.ucd.getName(cp));
}
}
@ -1211,7 +1211,7 @@ public class GenerateData implements UCD_Types {
}
public static void listGreekVowels() throws IOException {
Main.setUCD();
Default.setUCD();
PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + getFileSuffix(true));
Set set = new TreeSet();
Set set2 = new TreeSet();
@ -1224,14 +1224,14 @@ public class GenerateData implements UCD_Types {
for (char i = 0; i < 0xFFFF; ++i) {
Utility.dot(i);
if (!Main.ucd.isRepresented(i)) continue;
if (Main.ucd.getScript(i) != GREEK_SCRIPT) continue;
String decomp = Main.nfd.normalize(i);
if (!Default.ucd.isRepresented(i)) continue;
if (Default.ucd.getScript(i) != GREEK_SCRIPT) continue;
String decomp = Default.nfd.normalize(i);
if (decomp.indexOf('\u0306') >= 0) continue; // skip breve
if (decomp.indexOf('\u0304') >= 0) continue; // skip macron
String comp = Main.nfc.normalize(decomp);
String comp = Default.nfc.normalize(decomp);
if (!comp.equals(String.valueOf(i))) continue; // skip compats
char first = decomp.charAt(0);
@ -1245,7 +1245,7 @@ public class GenerateData implements UCD_Types {
for (int j = 0; j < diphthongStart.length(); ++j) {
String v = diphthongStart.substring(j, j+1);
char vc = v.charAt(0);
if (Main.ucd.getCategory(vc) == Ll && Main.ucd.getCategory(first) == Lu) continue;
if (Default.ucd.getCategory(vc) == Ll && Default.ucd.getCategory(first) == Lu) continue;
if (etas.indexOf(vc) >= 0 && iotas.indexOf(first) >= 0) continue;
set.add(new Pair(h + v + first, new Pair(v + decomp, v + i)));
}
@ -1271,7 +1271,7 @@ public class GenerateData implements UCD_Types {
public static void listKatakana() throws IOException {
Main.setUCD();
Default.setUCD();
for (char i = 'a'; i <= 'z'; ++i) {
doKana(String.valueOf(i));
if (i == 'c') doKana("ch");
@ -1304,18 +1304,18 @@ public class GenerateData implements UCD_Types {
}
public static void genTrailingZeros() {
Main.setUCD();
Default.setUCD();
UnicodeSet result = new UnicodeSet();
for (int i = 0; i < 0x10FFFF; ++i) {
if ((i & 0xFFF) == 0) System.out.println("# " + i);
if (!Main.ucd.isAssigned(i)) continue;
if (!Main.nfd.normalizationDiffers(i)) continue;
String decomp = Main.nfd.normalize(i);
if (!Default.ucd.isAssigned(i)) continue;
if (!Default.nfd.normalizationDiffers(i)) continue;
String decomp = Default.nfd.normalize(i);
int cp;
for (int j = 0; j < decomp.length(); j += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(decomp,j);
if (j == 0) continue; // skip first
if (Main.ucd.getCombiningClass(cp) == 0) {
if (Default.ucd.getCombiningClass(cp) == 0) {
result.add(cp);
}
}
@ -1328,8 +1328,8 @@ public class GenerateData implements UCD_Types {
Utility.hex(start)
+ (start != end ? ".." + Utility.hex(end) : "")
+ "; "
+ Main.ucd.getName(start)
+ (start != end ? ".." + Main.ucd.getName(end) : ""));
+ Default.ucd.getName(start)
+ (start != end ? ".." + Default.ucd.getName(end) : ""));
}
System.out.println("TrailingZero count: " + result.size());
}

View file

@ -5,35 +5,17 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
* $Date: 2002/03/20 00:21:42 $
* $Revision: 1.10 $
* $Date: 2002/04/23 01:59:14 $
* $Revision: 1.11 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import com.ibm.text.utility.*;
import java.util.Date;
public final class Main implements UCD_Types {
static String ucdVersion = UCD.latestVersion;
static UCD ucd;
static Normalizer nfc;
static Normalizer nfd;
static Normalizer nfkc;
static Normalizer nfkd;
static Normalizer[] nf = new Normalizer[4];
static void setUCD() {
ucd = UCD.make(Main.ucdVersion);
nfd = nf[NFD] = new Normalizer(Normalizer.NFD, Main.ucdVersion);
nfc = nf[NFC] = new Normalizer(Normalizer.NFC, Main.ucdVersion);
nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, Main.ucdVersion);
nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, Main.ucdVersion);
System.out.println("Loaded UCD" + ucd.getVersion() + " " + (new Date(Main.ucd.getDate())));
}
static final String[] ALL_FILES = {
"CaseFolding",
"CompositionExclusions",
@ -74,8 +56,9 @@ public final class Main implements UCD_Types {
VerifyUCD.CheckCaseFold();
VerifyUCD.checkAgainstUInfo();
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{ucdVersion});
else if (arg.equalsIgnoreCase("version")) ucdVersion = args[++i];
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.ucdVersion});
else if (arg.equalsIgnoreCase("version")) Default.setUCD(args[++i]);
else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics();
else if (arg.equalsIgnoreCase("testskippable")) NFSkippable.main(null);
else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
@ -105,6 +88,10 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
//else if (arg.equalsIgnoreCase("NormalizationCharts")) ChartGenerator.writeNormalizationCharts();
/*else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
*/

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $
* $Date: 2001/12/13 23:35:57 $
* $Revision: 1.3 $
* $Date: 2002/04/23 01:59:14 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
@ -33,14 +33,14 @@ public final class TestNormalization {
public static void main(String[] args) throws java.io.IOException {
System.out.println("Creating Normalizers");
Main.setUCD();
Default.setUCD();
String x = UTF32.valueOf32(0x10000);
check("NFC", Main.nfc, x);
check("NFD", Main.nfd, x);
check("NFKC", Main.nfkc, x);
check("NFKD", Main.nfkd, x);
check("NFC", Default.nfc, x);
check("NFD", Default.nfd, x);
check("NFKC", Default.nfkc, x);
check("NFKD", Default.nfkd, x);
out = new PrintWriter(
@ -87,36 +87,36 @@ public final class TestNormalization {
}
// c2 == NFC(c1) == NFC(c2) == NFC(c3)
errorCount += check("NFCa", Main.nfc, parts[1], parts[0]);
errorCount += check("NFCb", Main.nfc, parts[1], parts[1]);
errorCount += check("NFCc", Main.nfc, parts[1], parts[2]);
errorCount += check("NFCa", Default.nfc, parts[1], parts[0]);
errorCount += check("NFCb", Default.nfc, parts[1], parts[1]);
errorCount += check("NFCc", Default.nfc, parts[1], parts[2]);
// c4 == NFC(c4) == NFC(c5)
errorCount += check("NFCd", Main.nfc, parts[3], parts[3]);
errorCount += check("NFCe", Main.nfc, parts[3], parts[4]);
errorCount += check("NFCd", Default.nfc, parts[3], parts[3]);
errorCount += check("NFCe", Default.nfc, parts[3], parts[4]);
// c3 == NFD(c1) == NFD(c2) == NFD(c3)
errorCount += check("NFDa", Main.nfd, parts[2], parts[0]);
errorCount += check("NFDb", Main.nfd, parts[2], parts[1]);
errorCount += check("NFDc", Main.nfd, parts[2], parts[2]);
errorCount += check("NFDa", Default.nfd, parts[2], parts[0]);
errorCount += check("NFDb", Default.nfd, parts[2], parts[1]);
errorCount += check("NFDc", Default.nfd, parts[2], parts[2]);
// c5 == NFD(c4) == NFD(c5)
errorCount += check("NFDd", Main.nfd, parts[4], parts[3]);
errorCount += check("NFDe", Main.nfd, parts[4], parts[4]);
errorCount += check("NFDd", Default.nfd, parts[4], parts[3]);
errorCount += check("NFDe", Default.nfd, parts[4], parts[4]);
// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
errorCount += check("NFKCa", Main.nfkc, parts[3], parts[0]);
errorCount += check("NFKCb", Main.nfkc, parts[3], parts[1]);
errorCount += check("NFKCc", Main.nfkc, parts[3], parts[2]);
errorCount += check("NFKCd", Main.nfkc, parts[3], parts[3]);
errorCount += check("NFKCe", Main.nfkc, parts[3], parts[4]);
errorCount += check("NFKCa", Default.nfkc, parts[3], parts[0]);
errorCount += check("NFKCb", Default.nfkc, parts[3], parts[1]);
errorCount += check("NFKCc", Default.nfkc, parts[3], parts[2]);
errorCount += check("NFKCd", Default.nfkc, parts[3], parts[3]);
errorCount += check("NFKCe", Default.nfkc, parts[3], parts[4]);
// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
errorCount += check("NFKDa", Main.nfkd, parts[4], parts[0]);
errorCount += check("NFKDb", Main.nfkd, parts[4], parts[1]);
errorCount += check("NFKDc", Main.nfkd, parts[4], parts[2]);
errorCount += check("NFKDd", Main.nfkd, parts[4], parts[3]);
errorCount += check("NFKDe", Main.nfkd, parts[4], parts[4]);
errorCount += check("NFKDa", Default.nfkd, parts[4], parts[0]);
errorCount += check("NFKDb", Default.nfkd, parts[4], parts[1]);
errorCount += check("NFKDc", Default.nfkd, parts[4], parts[2]);
errorCount += check("NFKDd", Default.nfkd, parts[4], parts[3]);
errorCount += check("NFKDe", Default.nfkd, parts[4], parts[4]);
}
System.out.println("Total errors in file: " + errorCount
+ ", lines: " + lineErrorCount);
@ -150,21 +150,21 @@ public final class TestNormalization {
}
String otherList = "";
if (!base.equals(other)) {
otherList = "(" + Main.ucd.getCodeAndName(other) + ")";
otherList = "(" + Default.ucd.getCodeAndName(other) + ")";
}
out.println("DIFF " + type + ": "
+ Main.ucd.getCodeAndName(base) + " != "
+ Default.ucd.getCodeAndName(base) + " != "
+ type
+ otherList
+ " == " + Main.ucd.getCodeAndName(trans)
+ " == " + Default.ucd.getCodeAndName(trans)
+ temp
);
return 1;
}
} catch (Exception e) {
throw new ChainException("DIFF " + type + ": "
+ Main.ucd.getCodeAndName(base) + " != "
+ type + "(" + Main.ucd.getCodeAndName(other) + ")", new Object[]{}, e);
+ Default.ucd.getCodeAndName(base) + " != "
+ type + "(" + Default.ucd.getCodeAndName(other) + ")", new Object[]{}, e);
}
return 0;
}
@ -178,10 +178,10 @@ public final class TestNormalization {
if ((missing & 0xFFF) == 0) System.out.println("# " + Utility.hex(missing));
if (charsListed.get(missing)) continue;
String x = UTF32.valueOf32(missing);
errorCount += check("NFC", Main.nfc, x);
errorCount += check("NFD", Main.nfd, x);
errorCount += check("NFKC", Main.nfkc, x);
errorCount += check("NFKD", Main.nfkd, x);
errorCount += check("NFC", Default.nfc, x);
errorCount += check("NFD", Default.nfd, x);
errorCount += check("NFKC", Default.nfkc, x);
errorCount += check("NFKD", Default.nfkd, x);
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2002/03/20 00:21:42 $
* $Revision: 1.10 $
* $Date: 2002/04/23 01:59:14 $
* $Revision: 1.11 $
*
*******************************************************************************
*/
@ -31,7 +31,7 @@ public final class UCD implements UCD_Types {
/**
* Used for the default version.
*/
public static final String latestVersion = "3.1.1";
public static final String latestVersion = "3.2.0";
/**
* Create singleton instance for default (latest) version
@ -675,6 +675,11 @@ public final class UCD implements UCD_Types {
return UCD_Names.SCRIPT[prop];
}
public static String getScriptID_fromIndex(byte prop, byte length) {
if (length == SHORT) return UCD_Names.ABB_SCRIPT[prop];
return UCD_Names.SCRIPT[prop];
}
public String getAgeID(int codePoint) {
return getAgeID_fromIndex(getAge(codePoint));
}

File diff suppressed because it is too large Load diff

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2002/03/15 01:57:01 $
* $Revision: 1.12 $
* $Date: 2002/04/23 01:59:16 $
* $Revision: 1.13 $
*
*******************************************************************************
*/
@ -422,12 +422,12 @@ public final class Utility { // COMMON UTILITIES
// fix noncharacters, since XML can't handle
case 0xFFFE: case 0xFFFF:
return "#x" + hex(c,1) + ";";
return "#" + hex(c,1);
}
// fix surrogates, since XML can't handle
if (UTF32.isSurrogate(c)) {
return "#x" + hex(c,1) + ";";
return "#" + hex(c,1);
}
if (c <= 0x7E || UTF8) {
@ -519,9 +519,14 @@ public final class Utility { // COMMON UTILITIES
}
public static PrintWriter openPrintWriter(String filename, boolean removeCR, boolean latin1) throws IOException {
File file = new File(getOutputName(filename));
System.out.println("Creating File: " + file);
File parent = new File(file.getParent());
//System.out.println("Creating File: "+ parent);
parent.mkdirs();
return new PrintWriter(
new UTF8StreamWriter(
new FileOutputStream(getOutputName(filename)),
new FileOutputStream(file),
32*1024,
removeCR, latin1));
}