mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
misc. updates
X-SVN-Rev: 8714
This commit is contained in:
parent
5e952008c7
commit
31bee02d7f
14 changed files with 424 additions and 59 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
|
||||
* $Date: 2002/04/24 02:38:52 $
|
||||
* $Revision: 1.7 $
|
||||
* $Date: 2002/05/29 02:01:00 $
|
||||
* $Revision: 1.8 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -25,6 +25,15 @@ public class WriteCharts implements UCD_Types {
|
|||
|
||||
static boolean HACK_KANA = false;
|
||||
|
||||
static public void special() {
|
||||
Default.setUCD();
|
||||
for (int i = 0xE000; i < 0x10000; ++i) {
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
if (Default.nfkc.normalizationDiffers(i)) continue;
|
||||
System.out.println(Default.ucd.getCodeAndName(i));
|
||||
}
|
||||
}
|
||||
|
||||
static public void collationChart(UCA uca) throws IOException {
|
||||
Default.setUCD(uca.getUCDVersion());
|
||||
HACK_KANA = true;
|
||||
|
@ -408,6 +417,131 @@ public class WriteCharts implements UCD_Types {
|
|||
closeIndexFile(indexFile, "", CASE);
|
||||
}
|
||||
|
||||
static public void addMapChar(Map m, Set stoplist, String key, String ch) {
|
||||
if (stoplist.contains(key)) return;
|
||||
for (int i = 0; i < key.length(); ++i) {
|
||||
char c = key.charAt(i);
|
||||
if ('0' <= c && c <= '9') return;
|
||||
}
|
||||
Set result = (Set)m.get(key);
|
||||
if (result == null) {
|
||||
result = new TreeSet();
|
||||
m.put(key, result);
|
||||
}
|
||||
result.add(ch);
|
||||
}
|
||||
|
||||
static public void indexChart() throws IOException {
|
||||
Default.setUCD();
|
||||
HACK_KANA = false;
|
||||
|
||||
Map map = new TreeMap();
|
||||
Set stoplist = new TreeSet();
|
||||
|
||||
String[] stops = {"LETTER", "CHARACTER", "AND", "CAPITAL", "SMALL", "COMPATIBILITY", "WITH"};
|
||||
stoplist.addAll(Arrays.asList(stops));
|
||||
System.out.println("Stop-list: " + stoplist);
|
||||
|
||||
for (int i = 0; i < LIMIT_SCRIPT; ++i) {
|
||||
stoplist.add(Default.ucd.getScriptID_fromIndex((byte)i));
|
||||
}
|
||||
System.out.println("Stop-list: " + stoplist);
|
||||
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (!Default.ucd.isRepresented(i)) continue;
|
||||
if (0xAC00 <= i && i <= 0xD7A3) continue;
|
||||
if (Default.ucd.hasComputableName(i)) continue;
|
||||
|
||||
String s = Default.ucd.getName(i);
|
||||
if (s == null) continue;
|
||||
|
||||
if (s.startsWith("<")) {
|
||||
System.out.println("Wierd character at " + Default.ucd.getCodeAndName(i));
|
||||
}
|
||||
String ch = UTF16.valueOf(i);
|
||||
int last = -1;
|
||||
int j;
|
||||
for (j = 0; j < s.length(); ++j) {
|
||||
char c = s.charAt(j);
|
||||
if ('A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
|
||||
if (last == -1) last = j;
|
||||
} else {
|
||||
if (last != -1) {
|
||||
String word = s.substring(last, j);
|
||||
addMapChar(map, stoplist, word, ch);
|
||||
last = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (last != -1) {
|
||||
String word = s.substring(last, j);
|
||||
addMapChar(map, stoplist, word, ch);
|
||||
}
|
||||
}
|
||||
|
||||
PrintWriter output = null;
|
||||
|
||||
Iterator it = map.keySet().iterator();
|
||||
|
||||
int oldScript = -127;
|
||||
|
||||
int counter = 0;
|
||||
String[] replacement = new String[] {"%%%", "Name Charts"};
|
||||
String folder = "charts\\name\\";
|
||||
|
||||
Utility.copyTextFile("index.html", true, folder + "index.html", replacement);
|
||||
Utility.copyTextFile("charts.css", false, folder + "charts.css");
|
||||
Utility.copyTextFile("name_help.html", true, folder + "help.html");
|
||||
|
||||
indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
|
||||
Utility.appendFile("index_header.html", true, indexFile, replacement);
|
||||
|
||||
int columnCount = 0;
|
||||
char lastInitial = 0;
|
||||
|
||||
while (it.hasNext()) {
|
||||
Utility.dot(counter);
|
||||
|
||||
String key = (String) it.next();
|
||||
|
||||
Set chars = (Set) map.get(key);
|
||||
|
||||
char initial = key.charAt(0);
|
||||
|
||||
if (initial != lastInitial) {
|
||||
closeFile(output);
|
||||
output = null;
|
||||
lastInitial = initial;
|
||||
}
|
||||
|
||||
if (output == null) {
|
||||
output = openFile2(0, folder, String.valueOf(initial));
|
||||
}
|
||||
|
||||
output.println("<tr><td class='h'>" + key + "</td>");
|
||||
columnCount = 1;
|
||||
|
||||
Iterator sublist = chars.iterator();
|
||||
while (sublist.hasNext()) {
|
||||
|
||||
String ch = (String) sublist.next();
|
||||
if (columnCount > 10) {
|
||||
output.println("</tr><tr><td></td>");
|
||||
columnCount = 1;
|
||||
}
|
||||
showCell(output, ch, "<td ", "", true);
|
||||
++columnCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
output.println("</tr>");
|
||||
|
||||
}
|
||||
|
||||
closeFile(output);
|
||||
closeIndexFile(indexFile, "", CASE);
|
||||
}
|
||||
|
||||
static void showCell(PrintWriter output, String s, String prefix, String extra, boolean skipName) {
|
||||
String name = Default.ucd.getName(s);
|
||||
String comp = Default.nfc.normalize(s);
|
||||
|
@ -481,6 +615,21 @@ public class WriteCharts implements UCD_Types {
|
|||
return output;
|
||||
}
|
||||
|
||||
static PrintWriter openFile2(int count, String directory, String name) throws IOException {
|
||||
String fileName = "chart_" + name + (count > 1 ? count + "" : "") + ".html";
|
||||
PrintWriter output = Utility.openPrintWriter(directory + fileName, false, false);
|
||||
Utility.fixDot();
|
||||
System.out.println("Writing: " + name);
|
||||
indexFile.println(" <a href = '" + fileName + "'>" + name + "</a>");
|
||||
String title = name;
|
||||
output.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
output.println("<title>" + title + "</title>");
|
||||
output.println("<link rel='stylesheet' href='charts.css' type='text/css'>");
|
||||
output.println("</head><body>");
|
||||
output.println("<table>");
|
||||
return output;
|
||||
}
|
||||
|
||||
static final int
|
||||
NULL_ORDER = -3,
|
||||
IGNORABLE_ORDER = -2,
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2002/04/23 22:45:40 $
|
||||
* $Revision: 1.10 $
|
||||
* $Date: 2002/05/29 02:00:59 $
|
||||
* $Revision: 1.11 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -73,6 +73,8 @@ public class WriteCollationData implements UCD_Types {
|
|||
else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(collator);
|
||||
else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart();
|
||||
else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart();
|
||||
else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart();
|
||||
else if (arg.equalsIgnoreCase("special")) WriteCharts.special();
|
||||
|
||||
|
||||
else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(collator);
|
||||
|
|
|
@ -36,7 +36,7 @@ at least one of their case forms (lower, title, upper, or fold).</p>
|
|||
listed in NoCaseMapping.</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Within each chart page, the code points are sorted by lowercased <a href="http://www.unicode.org/unicode/reports/tr15/tr15-18.html">NFKD</a>,
|
||||
<li>Within each chart page, the code points are sorted by lowercased <a href="http://www.unicode.org/unicode/reports/tr15/" target="_top">NFKD</a>,
|
||||
to place related characters next to one another.</li>
|
||||
<li>To help pick out cells visually, the more interesting ones have a light
|
||||
blue background. The other cells have grayed-out text.
|
||||
|
@ -53,7 +53,7 @@ at least one of their case forms (lower, title, upper, or fold).</p>
|
|||
</li>
|
||||
<li>If your browser supports tool-tops, then hovering your mouse over cells
|
||||
will show the names of the characters.</li>
|
||||
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr21/">UAX
|
||||
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr21/" target="_top">UAX
|
||||
#21: Case Mappings</a>.</li>
|
||||
</ul>
|
||||
|
||||
|
|
|
@ -14,4 +14,5 @@ tt { font-size: 50% }
|
|||
|
||||
td.g { font-size: 120%; text-align: Center; width: 72px; color: #808080; }
|
||||
td.n { font-size: 120%; text-align: Center; width: 72px; color: #000000; background-color: #CCCCFF; }
|
||||
td.z { font-size: 120%; text-align: Center; width: 72px; font-weight: bold; background-color: #EEEEEE; }
|
||||
td.z { font-size: 120%; text-align: Center; width: 72px; font-weight: bold; background-color: #EEEEEE; }
|
||||
td.h { font-size: 120%; text-align: Left; color: #000000; background-color: #EEEEEE; }
|
||||
|
|
|
@ -116,7 +116,7 @@ character in the chart, as follows.</p>
|
|||
Private Use Area, etc. are represented by a sampling.</li>
|
||||
<li>Some unassigned code points, noncharacters and other edge cases are also
|
||||
added to the list for comparison.</li>
|
||||
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr10/">UTS
|
||||
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr10/" target="_top">UTS
|
||||
#10: Unicode Collation Algorithm</a>.</li>
|
||||
</ul>
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ differ from at least one of their normalization forms (C, D, KC, KD).</p>
|
|||
<li>By general category, in the latter two cases</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Within each chart page, the code points are sorted by lowercased <a href="http://www.unicode.org/unicode/reports/tr15/tr15-18.html">NFKD</a>,
|
||||
<li>Within each chart page, the code points are sorted by folded <a href="http://www.unicode.org/unicode/reports/tr15/" target="_top">NFKD</a>,
|
||||
to place related characters next to one another.</li>
|
||||
<li>To keep the size of the Hangul chart manageable, characters U+AD00..U+D6FF
|
||||
(관..훿) are omitted.</li>
|
||||
|
@ -52,7 +52,7 @@ differ from at least one of their normalization forms (C, D, KC, KD).</p>
|
|||
</li>
|
||||
<li>If your browser supports tool-tops, then hovering your mouse over cells
|
||||
will show the names of the characters.</li>
|
||||
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr15/">UAX
|
||||
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr15/" target="_top">UAX
|
||||
#15: Unicode Normalization Forms</a>.</li>
|
||||
</ul>
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java,v $
|
||||
* $Date: 2001/12/06 00:05:53 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2002/05/29 02:01:00 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -16,14 +16,24 @@ import java.io.*;
|
|||
|
||||
class DiffPropertyLister extends PropertyLister {
|
||||
private UCD oldUCD;
|
||||
private static final int NOPROPERTY = -1;
|
||||
|
||||
public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output) {
|
||||
public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output, int property) {
|
||||
this.output = output;
|
||||
this.ucdData = UCD.make(newUCDName);
|
||||
if (oldUCDName != null) this.oldUCD = UCD.make(oldUCDName);
|
||||
breakByCategory = false;
|
||||
if (property != NOPROPERTY) newProp = DerivedProperty.make(property, ucdData);
|
||||
|
||||
if (oldUCDName != null) {
|
||||
this.oldUCD = UCD.make(oldUCDName);
|
||||
if (property != NOPROPERTY) oldProp = DerivedProperty.make(property, oldUCD);
|
||||
}
|
||||
breakByCategory = property != NOPROPERTY;
|
||||
useKenName = false;
|
||||
}
|
||||
|
||||
public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output) {
|
||||
this(oldUCDName, newUCDName, output, NOPROPERTY);
|
||||
}
|
||||
|
||||
public String valueName(int cp) {
|
||||
return major_minor_only(ucdData.getVersion());
|
||||
|
@ -39,13 +49,38 @@ class DiffPropertyLister extends PropertyLister {
|
|||
}
|
||||
*/
|
||||
|
||||
UnicodeProperty newProp = null;
|
||||
UnicodeProperty oldProp = null;
|
||||
String value = "";
|
||||
|
||||
public String optionalComment(int cp) {
|
||||
String normal = super.optionalComment(cp);
|
||||
return oldUCD.getModCatID_fromIndex(
|
||||
oldUCD.getModCat(cp, breakByCategory ? CASED_LETTER_MASK : 0))
|
||||
+ "/" + normal;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public byte status(int cp) {
|
||||
if (newProp == null) {
|
||||
return ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp)) ? INCLUDE : EXCLUDE;
|
||||
}
|
||||
|
||||
// just look at property differences among allocated characters
|
||||
|
||||
if (!ucdData.isAllocated(cp)) return EXCLUDE;
|
||||
if (!oldUCD.isAllocated(cp)) return EXCLUDE;
|
||||
|
||||
String val = newProp.getValue(cp);
|
||||
String oldVal = oldProp.getValue(cp);
|
||||
if (!oldVal.equals(val)) return INCLUDE;
|
||||
return EXCLUDE;
|
||||
|
||||
/*if (cp == 0xFFFF) {
|
||||
System.out.println("# " + Utility.hex(cp));
|
||||
}
|
||||
*/
|
||||
return ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp)) ? INCLUDE : EXCLUDE;
|
||||
}
|
||||
|
||||
public String headerString() {
|
||||
|
@ -91,6 +126,8 @@ class DiffPropertyLister extends PropertyLister {
|
|||
*/
|
||||
|
||||
private String major_minor_only(String s) {
|
||||
if (newProp != null) return s;
|
||||
|
||||
return s.substring(0, s.lastIndexOf('.'));
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2002/04/23 01:59:14 $
|
||||
* $Revision: 1.17 $
|
||||
* $Date: 2002/05/29 02:01:00 $
|
||||
* $Revision: 1.18 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -1077,6 +1077,55 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
};
|
||||
|
||||
static final void backwardsCompat(String directory, String filename, int[] list) throws IOException {
|
||||
|
||||
Default.setUCD();
|
||||
String newFile = directory + filename + getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile);
|
||||
String mostRecent = generateBat(directory, filename, getFileSuffix(true));
|
||||
try {
|
||||
for (int i = 0; i < list.length; ++i) {
|
||||
int prop = list[i];
|
||||
log.println();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println("###### " + DerivedProperty.make(prop, Default.ucd).getName());
|
||||
//log.println();
|
||||
//log.println(HORIZONTAL_LINE);
|
||||
//new DiffPropertyLister("3.2.0", "1.1.0", log, prop).print();
|
||||
log.println();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
new DiffPropertyLister("3.2.0", "2.0.0", log, prop).print();
|
||||
log.println();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
new DiffPropertyLister("3.2.0", "2.1.2", log, prop).print();
|
||||
log.println();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
new DiffPropertyLister("3.2.0", "2.1.5", log, prop).print();
|
||||
log.println();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
new DiffPropertyLister("3.2.0", "2.1.8", log, prop).print();
|
||||
log.println();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
new DiffPropertyLister("3.2.0", "3.0.0", log, prop).print();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
new DiffPropertyLister("3.2.0", "3.0.1", log, prop).print();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
new DiffPropertyLister("3.2.0", "3.1.0", log, prop).print();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
new DiffPropertyLister("3.2.0", "3.1.1", log, prop).print();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
}
|
||||
} finally {
|
||||
if (log != null) {
|
||||
log.close();
|
||||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static final void generateAge(String directory, String filename) throws IOException {
|
||||
Default.setUCD();
|
||||
String newFile = directory + filename + getFileSuffix(true);
|
||||
|
@ -1102,7 +1151,7 @@ public class GenerateData implements UCD_Types {
|
|||
log.println("# For details on the contents of each version, see");
|
||||
log.println("# http://www.unicode.org/versions/enumeratedversions.html.");
|
||||
|
||||
http://www.unicode.org/versions/enumeratedversions.html
|
||||
// http://www.unicode.org/versions/enumeratedversions.html
|
||||
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
|
||||
* $Date: 2002/03/15 01:57:01 $
|
||||
* $Revision: 1.3 $
|
||||
* $Date: 2002/05/29 02:01:00 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -23,9 +23,10 @@ public final class GenerateHanTransliterator {
|
|||
static final boolean TESTING = false;
|
||||
static int type;
|
||||
|
||||
public static void main() {
|
||||
public static void main(int typeIn) {
|
||||
type = typeIn;
|
||||
Default.setUCD();
|
||||
try {
|
||||
type = 0;
|
||||
System.out.println("Starting");
|
||||
generate();
|
||||
} catch (Exception e) {
|
||||
|
@ -43,23 +44,27 @@ public final class GenerateHanTransliterator {
|
|||
String name = "$Han$English";
|
||||
String key = "kDefinition"; // kMandarin, kKorean, kJapaneseKun, kJapaneseOn
|
||||
String filter = "kJis0";
|
||||
String filename = "Han_English";
|
||||
switch (type) {
|
||||
default: break;
|
||||
case 1: name = "$Han$OnRomaji";
|
||||
key = "kJapaneseOn";
|
||||
filter = "kJis0";
|
||||
filename = "Han_Romaji";
|
||||
break;
|
||||
case 2: name = "$Han$Pinyin";
|
||||
key = "kMandarin";
|
||||
filename = "Han_Pinyin";
|
||||
filter = null;
|
||||
break;
|
||||
}
|
||||
|
||||
out = Utility.openPrintWriter("Transliterate_Han_English.txt");
|
||||
err = Utility.openPrintWriter("Transliterate_Han_English.log.txt");
|
||||
out = Utility.openPrintWriter("Transliterate_" + filename + ".txt", false, false);
|
||||
err = Utility.openPrintWriter("Transliterate_" + filename + "_log.txt", false, false);
|
||||
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", "3.2.0", true);
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true);
|
||||
|
||||
int totalCount = 0;
|
||||
int count = 0;
|
||||
String oldCode = "";
|
||||
String oldLine = "";
|
||||
|
@ -76,11 +81,15 @@ public final class GenerateHanTransliterator {
|
|||
if (line == null) break;
|
||||
if (line.length() < 6) continue;
|
||||
if (line.charAt(0) == '#') continue;
|
||||
String code = line.substring(2,6);
|
||||
int tabPos = line.indexOf(' ');
|
||||
String code = line.substring(2, tabPos);
|
||||
|
||||
/* if (code.compareTo("9FA0") >= 0) {
|
||||
System.out.println("? " + line);
|
||||
}*/
|
||||
if (!code.equals(oldCode)) {
|
||||
totalCount++;
|
||||
|
||||
if (foundKey && foundFilter) {
|
||||
count++;
|
||||
/*if (true) { //*/
|
||||
|
@ -106,6 +115,8 @@ public final class GenerateHanTransliterator {
|
|||
}
|
||||
if (foundKey && foundFilter) printDef(out, oldCode, oldLine, oldStart);
|
||||
|
||||
System.out.println("Total: " + totalCount);
|
||||
System.out.println("Defined Count: " + count);
|
||||
in.close();
|
||||
out.close();
|
||||
err.close();
|
||||
|
@ -137,6 +148,7 @@ public final class GenerateHanTransliterator {
|
|||
String definition = line.substring(start,end);
|
||||
if (type == 2) definition = handlePinyin(definition, line);
|
||||
definition.trim();
|
||||
definition = definition.toLowerCase();
|
||||
String cp = UTF16.valueOf(Integer.parseInt(code, 16));
|
||||
String key = (String) definitionMap.get(definition);
|
||||
if (key == null) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||
* $Date: 2002/04/23 01:59:14 $
|
||||
* $Revision: 1.11 $
|
||||
* $Date: 2002/05/29 02:01:00 $
|
||||
* $Revision: 1.12 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -65,8 +65,13 @@ public final class Main implements UCD_Types {
|
|||
else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed();
|
||||
else if (arg.equalsIgnoreCase("verifyNormalizationStability")) VerifyUCD.verifyNormalizationStability();
|
||||
|
||||
else if (arg.equalsIgnoreCase("generateHanTransliterator")) GenerateHanTransliterator.main();
|
||||
else if (arg.equalsIgnoreCase("hanTransliterator")) GenerateHanTransliterator.main(0);
|
||||
else if (arg.equalsIgnoreCase("romajiTransliterator")) GenerateHanTransliterator.main(1);
|
||||
else if (arg.equalsIgnoreCase("pinYinTransliterator")) GenerateHanTransliterator.main(2);
|
||||
else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
|
||||
|
||||
else if (arg.equalsIgnoreCase("checkBIDI")) VerifyUCD.checkBIDI();
|
||||
|
||||
|
||||
else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
|
||||
else if (arg.equalsIgnoreCase("checkCase")) VerifyUCD.checkCase();
|
||||
|
@ -182,6 +187,10 @@ public final class Main implements UCD_Types {
|
|||
} else if (arg.equalsIgnoreCase("DerivedAge")) {
|
||||
GenerateData.generateAge("DerivedData/", "DerivedAge");
|
||||
|
||||
} else if (arg.equalsIgnoreCase("backwardsCompat")) {
|
||||
GenerateData.backwardsCompat("DerivedData/extracted/", "Compatibility_ID_START",
|
||||
new int[] {ID_Start, ID_Continue_NO_Cf, Mod_ID_Start, Mod_ID_Continue_NO_Cf});
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
|
||||
GenerateData.generateDerived(DERIVED_CORE, true, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedCoreProperties");
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/PropertyLister.java,v $
|
||||
* $Date: 2002/03/15 01:57:01 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2002/05/29 02:01:00 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -58,10 +58,7 @@ abstract public class PropertyLister implements UCD_Types {
|
|||
|
||||
public String optionalComment(int cp) {
|
||||
if (!usePropertyComment || !breakByCategory) return "";
|
||||
byte cat = getModCat(cp);
|
||||
if (cat == FAKELC) return "L&";
|
||||
if (cat == FAKENC) return "NC";
|
||||
return ucdData.getCategoryID_fromIndex(cat);
|
||||
return ucdData.getModCatID_fromIndex(getModCat(cp));
|
||||
}
|
||||
|
||||
public int minPropertyWidth() {
|
||||
|
@ -144,6 +141,10 @@ abstract public class PropertyLister implements UCD_Types {
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
byte getModCat(int cp) {
|
||||
return ucdData.getModCat(cp, breakByCategory ? CASED_LETTER_MASK : 0);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
|
@ -168,23 +169,6 @@ abstract public class PropertyLister implements UCD_Types {
|
|||
return lastSpace;
|
||||
}
|
||||
|
||||
private static final byte FAKERC = 63; // fake category for comparison
|
||||
private static final byte FAKELC = 63; // fake category for comparison
|
||||
private static final byte FAKENC = 64; // fake category for comparison
|
||||
|
||||
private byte getModCat(int cp) {
|
||||
byte cat = ucdData.getCategory(cp);
|
||||
if (cat == UNASSIGNED && ucdData.isNoncharacter(cp)) cat = FAKENC;
|
||||
else if (breakByCategory) {
|
||||
if (cat == Lt || cat == Ll || cat == Lu) cat = FAKELC;
|
||||
} else {
|
||||
// MASH almost everything together
|
||||
if (cat != CONTROL && cat != FORMAT && cat != SURROGATE
|
||||
&& cat != PRIVATE_USE && cat != UNASSIGNED) cat = FAKERC;
|
||||
}
|
||||
return cat;
|
||||
}
|
||||
|
||||
public int print() {
|
||||
set.clear();
|
||||
int count = 0;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2002/04/23 01:59:14 $
|
||||
* $Revision: 1.11 $
|
||||
* $Date: 2002/05/29 02:01:00 $
|
||||
* $Revision: 1.12 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -194,6 +194,79 @@ public final class UCD implements UCD_Types {
|
|||
public byte getCategory(int codePoint) {
|
||||
return get(codePoint, false).generalCategory;
|
||||
}
|
||||
|
||||
private static final byte FAKE_SYMBOL = 57; // fake category for comparison
|
||||
private static final byte FAKE_PUNCTUATION = 58; // fake category for comparison
|
||||
private static final byte FAKE_SEPERATOR = 59; // fake category for comparison
|
||||
private static final byte FAKE_NUMBER = 60; // fake category for comparison
|
||||
private static final byte FAKE_MARK = 61; // fake category for comparison
|
||||
private static final byte FAKE_LETTER = 62; // fake category for comparison
|
||||
private static final byte FAKE_OTHER = 63; // fake category for comparison
|
||||
private static final byte FAKENC = 31; // fake category for comparison
|
||||
|
||||
public byte getModCat(int cp, int collapseBits) {
|
||||
byte cat = getCategory(cp);
|
||||
if (cat == UNASSIGNED && isNoncharacter(cp)) cat = FAKENC;
|
||||
if (((1<<cat) & collapseBits) != 0) {
|
||||
switch (cat) {
|
||||
case UNASSIGNED: cat = FAKE_OTHER; break;
|
||||
case FAKENC: cat = FAKE_OTHER; break;
|
||||
|
||||
case UPPERCASE_LETTER: cat = FAKE_LETTER; break;
|
||||
case LOWERCASE_LETTER: cat = FAKE_LETTER; break;
|
||||
case TITLECASE_LETTER: cat = FAKE_LETTER; break;
|
||||
case MODIFIER_LETTER: cat = FAKE_LETTER; break;
|
||||
case OTHER_LETTER: cat = FAKE_LETTER; break;
|
||||
|
||||
case NON_SPACING_MARK: cat = FAKE_MARK; break;
|
||||
case ENCLOSING_MARK: cat = FAKE_MARK; break;
|
||||
case COMBINING_SPACING_MARK: cat = FAKE_MARK; break;
|
||||
|
||||
case DECIMAL_DIGIT_NUMBER: cat = FAKE_NUMBER; break;
|
||||
case LETTER_NUMBER: cat = FAKE_NUMBER; break;
|
||||
case OTHER_NUMBER: cat = FAKE_NUMBER; break;
|
||||
|
||||
case SPACE_SEPARATOR: cat = FAKE_SEPERATOR; break;
|
||||
case LINE_SEPARATOR: cat = FAKE_SEPERATOR; break;
|
||||
case PARAGRAPH_SEPARATOR: cat = FAKE_SEPERATOR; break;
|
||||
|
||||
case CONTROL: cat = FAKE_OTHER; break;
|
||||
case FORMAT: cat = FAKE_OTHER; break;
|
||||
case UNUSED_CATEGORY: cat = FAKE_OTHER; break;
|
||||
case PRIVATE_USE: cat = FAKE_OTHER; break;
|
||||
case SURROGATE: cat = FAKE_OTHER; break;
|
||||
|
||||
case DASH_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
|
||||
case START_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
|
||||
case END_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
|
||||
case CONNECTOR_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
|
||||
case OTHER_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
|
||||
case INITIAL_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
|
||||
case FINAL_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
|
||||
|
||||
case MATH_SYMBOL: cat = FAKE_SYMBOL; break;
|
||||
case CURRENCY_SYMBOL: cat = FAKE_SYMBOL; break;
|
||||
case MODIFIER_SYMBOL: cat = FAKE_SYMBOL; break;
|
||||
case OTHER_SYMBOL: cat = FAKE_SYMBOL; break;
|
||||
|
||||
}
|
||||
}
|
||||
return cat;
|
||||
}
|
||||
|
||||
public String getModCatID_fromIndex(byte cat) {
|
||||
switch (cat) {
|
||||
case FAKE_SYMBOL: return "S&";
|
||||
case FAKE_PUNCTUATION: return "P&";
|
||||
case FAKE_SEPERATOR: return "Z&";
|
||||
case FAKE_NUMBER: return "N&";
|
||||
case FAKE_MARK: return "M&";
|
||||
case FAKE_LETTER: return "L&";
|
||||
case FAKE_OTHER: return "C&";
|
||||
case FAKENC: return "NC";
|
||||
}
|
||||
return getCategoryID_fromIndex(cat);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the main category, as a mask
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
|
||||
* $Date: 2002/03/22 22:08:53 $
|
||||
* $Revision: 1.11 $
|
||||
* $Date: 2002/05/29 02:01:00 $
|
||||
* $Revision: 1.12 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -150,6 +150,7 @@ public interface UCD_Types {
|
|||
|
||||
static final int
|
||||
LETTER_MASK = (1<<Lu) | (1<<Ll) | (1<<Lt) | (1<<Lm) | (1 << Lo),
|
||||
CASED_LETTER_MASK = (1<<Lu) | (1<<Ll) | (1<<Lt),
|
||||
MARK_MASK = (1<<Mn) | (1<<Me) | (1<<Mc),
|
||||
NUMBER_MASK = (1<<Nd) | (1<<Nl) | (1<<No),
|
||||
SEPARATOR_MASK = (1<<Zs) | (1<<Zl) | (1<<Zp),
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
|
||||
* $Date: 2002/04/23 01:59:14 $
|
||||
* $Revision: 1.12 $
|
||||
* $Date: 2002/05/29 02:01:00 $
|
||||
* $Revision: 1.13 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -284,6 +284,54 @@ public class VerifyUCD implements UCD_Types {
|
|||
return result;
|
||||
}
|
||||
|
||||
public static void checkBIDI() {
|
||||
Default.setUCD();
|
||||
|
||||
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
|
||||
Utility.dot(cp);
|
||||
if (!Default.ucd.isAllocated(cp)) continue;
|
||||
|
||||
if (!Default.nfd.normalizationDiffers(cp)) continue;
|
||||
|
||||
String decomp = Default.nfd.normalize(cp);
|
||||
String comp = Default.nfc.normalize(cp);
|
||||
String source = UTF16.valueOf(cp);
|
||||
|
||||
String bidiDecomp = getBidi(decomp, true);
|
||||
String bidiComp = getBidi(comp, true);
|
||||
String bidiSource = getBidi(source, true);
|
||||
|
||||
if (!bidiDecomp.equals(bidiSource) || !bidiComp.equals(bidiSource)) {
|
||||
Utility.fixDot();
|
||||
System.out.println(Default.ucd.getCodeAndName(cp) + ": " + getBidi(source, false));
|
||||
System.out.println("\tNFC: " + Default.ucd.getCodeAndName(comp) + ": " + getBidi(comp, false));
|
||||
System.out.println("\tNFD: " + Default.ucd.getCodeAndName(decomp) + ": " + getBidi(decomp, false));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static String getBidi(String s, boolean compact) {
|
||||
String result = "";
|
||||
byte lastBidi = -1;
|
||||
int cp;
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(s, i);
|
||||
byte bidi = Default.ucd.getBidiClass(cp);
|
||||
if (compact) {
|
||||
if (bidi == BIDI_NSM) {
|
||||
if (lastBidi != -1) bidi = lastBidi;
|
||||
}
|
||||
if (bidi == lastBidi && bidi != BIDI_ES && bidi != BIDI_CS) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
result += Default.ucd.getCase(
|
||||
Default.ucd.getBidiClassID_fromIndex(bidi, SHORT), FULL, TITLE);
|
||||
lastBidi = bidi;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static void verify() throws IOException {
|
||||
Default.setUCD();
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue