misc. updates

X-SVN-Rev: 8714
This commit is contained in:
Mark Davis 2002-05-29 02:01:00 +00:00
parent 5e952008c7
commit 31bee02d7f
14 changed files with 424 additions and 59 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
* $Date: 2002/04/24 02:38:52 $
* $Revision: 1.7 $
* $Date: 2002/05/29 02:01:00 $
* $Revision: 1.8 $
*
*******************************************************************************
*/
@ -25,6 +25,15 @@ public class WriteCharts implements UCD_Types {
static boolean HACK_KANA = false;
static public void special() {
Default.setUCD();
for (int i = 0xE000; i < 0x10000; ++i) {
if (!Default.ucd.isRepresented(i)) continue;
if (Default.nfkc.normalizationDiffers(i)) continue;
System.out.println(Default.ucd.getCodeAndName(i));
}
}
static public void collationChart(UCA uca) throws IOException {
Default.setUCD(uca.getUCDVersion());
HACK_KANA = true;
@ -408,6 +417,131 @@ public class WriteCharts implements UCD_Types {
closeIndexFile(indexFile, "", CASE);
}
static public void addMapChar(Map m, Set stoplist, String key, String ch) {
if (stoplist.contains(key)) return;
for (int i = 0; i < key.length(); ++i) {
char c = key.charAt(i);
if ('0' <= c && c <= '9') return;
}
Set result = (Set)m.get(key);
if (result == null) {
result = new TreeSet();
m.put(key, result);
}
result.add(ch);
}
static public void indexChart() throws IOException {
Default.setUCD();
HACK_KANA = false;
Map map = new TreeMap();
Set stoplist = new TreeSet();
String[] stops = {"LETTER", "CHARACTER", "AND", "CAPITAL", "SMALL", "COMPATIBILITY", "WITH"};
stoplist.addAll(Arrays.asList(stops));
System.out.println("Stop-list: " + stoplist);
for (int i = 0; i < LIMIT_SCRIPT; ++i) {
stoplist.add(Default.ucd.getScriptID_fromIndex((byte)i));
}
System.out.println("Stop-list: " + stoplist);
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isRepresented(i)) continue;
if (0xAC00 <= i && i <= 0xD7A3) continue;
if (Default.ucd.hasComputableName(i)) continue;
String s = Default.ucd.getName(i);
if (s == null) continue;
if (s.startsWith("<")) {
System.out.println("Wierd character at " + Default.ucd.getCodeAndName(i));
}
String ch = UTF16.valueOf(i);
int last = -1;
int j;
for (j = 0; j < s.length(); ++j) {
char c = s.charAt(j);
if ('A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
if (last == -1) last = j;
} else {
if (last != -1) {
String word = s.substring(last, j);
addMapChar(map, stoplist, word, ch);
last = -1;
}
}
}
if (last != -1) {
String word = s.substring(last, j);
addMapChar(map, stoplist, word, ch);
}
}
PrintWriter output = null;
Iterator it = map.keySet().iterator();
int oldScript = -127;
int counter = 0;
String[] replacement = new String[] {"%%%", "Name Charts"};
String folder = "charts\\name\\";
Utility.copyTextFile("index.html", true, folder + "index.html", replacement);
Utility.copyTextFile("charts.css", false, folder + "charts.css");
Utility.copyTextFile("name_help.html", true, folder + "help.html");
indexFile = Utility.openPrintWriter(folder + "index_list.html", false, false);
Utility.appendFile("index_header.html", true, indexFile, replacement);
int columnCount = 0;
char lastInitial = 0;
while (it.hasNext()) {
Utility.dot(counter);
String key = (String) it.next();
Set chars = (Set) map.get(key);
char initial = key.charAt(0);
if (initial != lastInitial) {
closeFile(output);
output = null;
lastInitial = initial;
}
if (output == null) {
output = openFile2(0, folder, String.valueOf(initial));
}
output.println("<tr><td class='h'>" + key + "</td>");
columnCount = 1;
Iterator sublist = chars.iterator();
while (sublist.hasNext()) {
String ch = (String) sublist.next();
if (columnCount > 10) {
output.println("</tr><tr><td></td>");
columnCount = 1;
}
showCell(output, ch, "<td ", "", true);
++columnCount;
continue;
}
output.println("</tr>");
}
closeFile(output);
closeIndexFile(indexFile, "", CASE);
}
static void showCell(PrintWriter output, String s, String prefix, String extra, boolean skipName) {
String name = Default.ucd.getName(s);
String comp = Default.nfc.normalize(s);
@ -481,6 +615,21 @@ public class WriteCharts implements UCD_Types {
return output;
}
static PrintWriter openFile2(int count, String directory, String name) throws IOException {
String fileName = "chart_" + name + (count > 1 ? count + "" : "") + ".html";
PrintWriter output = Utility.openPrintWriter(directory + fileName, false, false);
Utility.fixDot();
System.out.println("Writing: " + name);
indexFile.println(" <a href = '" + fileName + "'>" + name + "</a>");
String title = name;
output.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
output.println("<title>" + title + "</title>");
output.println("<link rel='stylesheet' href='charts.css' type='text/css'>");
output.println("</head><body>");
output.println("<table>");
return output;
}
static final int
NULL_ORDER = -3,
IGNORABLE_ORDER = -2,

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
* $Date: 2002/04/23 22:45:40 $
* $Revision: 1.10 $
* $Date: 2002/05/29 02:00:59 $
* $Revision: 1.11 $
*
*******************************************************************************
*/
@ -73,6 +73,8 @@ public class WriteCollationData implements UCD_Types {
else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(collator);
else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart();
else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart();
else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart();
else if (arg.equalsIgnoreCase("special")) WriteCharts.special();
else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(collator);

View file

@ -36,7 +36,7 @@ at least one of their case forms (lower, title, upper, or fold).</p>
listed in NoCaseMapping.</li>
</ul>
</li>
<li>Within each chart page, the code points are sorted by lowercased <a href="http://www.unicode.org/unicode/reports/tr15/tr15-18.html">NFKD</a>,
<li>Within each chart page, the code points are sorted by lowercased <a href="http://www.unicode.org/unicode/reports/tr15/" target="_top">NFKD</a>,
to place related characters next to one another.</li>
<li>To help pick out cells visually, the more interesting ones have a light
blue background. The other cells have grayed-out text.
@ -53,7 +53,7 @@ at least one of their case forms (lower, title, upper, or fold).</p>
</li>
<li>If your browser supports tool-tops, then hovering your mouse over cells
will show the names of the characters.</li>
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr21/">UAX
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr21/" target="_top">UAX
#21: Case Mappings</a>.</li>
</ul>

View file

@ -14,4 +14,5 @@ tt { font-size: 50% }
td.g { font-size: 120%; text-align: Center; width: 72px; color: #808080; }
td.n { font-size: 120%; text-align: Center; width: 72px; color: #000000; background-color: #CCCCFF; }
td.z { font-size: 120%; text-align: Center; width: 72px; font-weight: bold; background-color: #EEEEEE; }
td.z { font-size: 120%; text-align: Center; width: 72px; font-weight: bold; background-color: #EEEEEE; }
td.h { font-size: 120%; text-align: Left; color: #000000; background-color: #EEEEEE; }

View file

@ -116,7 +116,7 @@ character in the chart, as follows.</p>
Private Use Area, etc. are represented by a sampling.</li>
<li>Some unassigned code points, noncharacters and other edge cases are also
added to the list for comparison.</li>
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr10/">UTS
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr10/" target="_top">UTS
#10: Unicode Collation Algorithm</a>.</li>
</ul>

View file

@ -33,7 +33,7 @@ differ from at least one of their normalization forms (C, D, KC, KD).</p>
<li>By general category, in the latter two cases</li>
</ul>
</li>
<li>Within each chart page, the code points are sorted by lowercased <a href="http://www.unicode.org/unicode/reports/tr15/tr15-18.html">NFKD</a>,
<li>Within each chart page, the code points are sorted by folded <a href="http://www.unicode.org/unicode/reports/tr15/" target="_top">NFKD</a>,
to place related characters next to one another.</li>
<li>To keep the size of the Hangul chart manageable, characters U+AD00..U+D6FF
(관..훿) are omitted.</li>
@ -52,7 +52,7 @@ differ from at least one of their normalization forms (C, D, KC, KD).</p>
</li>
<li>If your browser supports tool-tops, then hovering your mouse over cells
will show the names of the characters.</li>
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr15/">UAX
<li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr15/" target="_top">UAX
#15: Unicode Normalization Forms</a>.</li>
</ul>

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java,v $
* $Date: 2001/12/06 00:05:53 $
* $Revision: 1.5 $
* $Date: 2002/05/29 02:01:00 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -16,14 +16,24 @@ import java.io.*;
class DiffPropertyLister extends PropertyLister {
private UCD oldUCD;
private static final int NOPROPERTY = -1;
public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output) {
public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output, int property) {
this.output = output;
this.ucdData = UCD.make(newUCDName);
if (oldUCDName != null) this.oldUCD = UCD.make(oldUCDName);
breakByCategory = false;
if (property != NOPROPERTY) newProp = DerivedProperty.make(property, ucdData);
if (oldUCDName != null) {
this.oldUCD = UCD.make(oldUCDName);
if (property != NOPROPERTY) oldProp = DerivedProperty.make(property, oldUCD);
}
breakByCategory = property != NOPROPERTY;
useKenName = false;
}
public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output) {
this(oldUCDName, newUCDName, output, NOPROPERTY);
}
public String valueName(int cp) {
return major_minor_only(ucdData.getVersion());
@ -39,13 +49,38 @@ class DiffPropertyLister extends PropertyLister {
}
*/
UnicodeProperty newProp = null;
UnicodeProperty oldProp = null;
String value = "";
public String optionalComment(int cp) {
String normal = super.optionalComment(cp);
return oldUCD.getModCatID_fromIndex(
oldUCD.getModCat(cp, breakByCategory ? CASED_LETTER_MASK : 0))
+ "/" + normal;
}
public byte status(int cp) {
if (newProp == null) {
return ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp)) ? INCLUDE : EXCLUDE;
}
// just look at property differences among allocated characters
if (!ucdData.isAllocated(cp)) return EXCLUDE;
if (!oldUCD.isAllocated(cp)) return EXCLUDE;
String val = newProp.getValue(cp);
String oldVal = oldProp.getValue(cp);
if (!oldVal.equals(val)) return INCLUDE;
return EXCLUDE;
/*if (cp == 0xFFFF) {
System.out.println("# " + Utility.hex(cp));
}
*/
return ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp)) ? INCLUDE : EXCLUDE;
}
public String headerString() {
@ -91,6 +126,8 @@ class DiffPropertyLister extends PropertyLister {
*/
private String major_minor_only(String s) {
if (newProp != null) return s;
return s.substring(0, s.lastIndexOf('.'));
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
* $Date: 2002/04/23 01:59:14 $
* $Revision: 1.17 $
* $Date: 2002/05/29 02:01:00 $
* $Revision: 1.18 $
*
*******************************************************************************
*/
@ -1077,6 +1077,55 @@ public class GenerateData implements UCD_Types {
};
static final void backwardsCompat(String directory, String filename, int[] list) throws IOException {
Default.setUCD();
String newFile = directory + filename + getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile);
String mostRecent = generateBat(directory, filename, getFileSuffix(true));
try {
for (int i = 0; i < list.length; ++i) {
int prop = list[i];
log.println();
log.println(HORIZONTAL_LINE);
log.println("###### " + DerivedProperty.make(prop, Default.ucd).getName());
//log.println();
//log.println(HORIZONTAL_LINE);
//new DiffPropertyLister("3.2.0", "1.1.0", log, prop).print();
log.println();
log.println(HORIZONTAL_LINE);
new DiffPropertyLister("3.2.0", "2.0.0", log, prop).print();
log.println();
log.println(HORIZONTAL_LINE);
new DiffPropertyLister("3.2.0", "2.1.2", log, prop).print();
log.println();
log.println(HORIZONTAL_LINE);
new DiffPropertyLister("3.2.0", "2.1.5", log, prop).print();
log.println();
log.println(HORIZONTAL_LINE);
new DiffPropertyLister("3.2.0", "2.1.8", log, prop).print();
log.println();
log.println(HORIZONTAL_LINE);
new DiffPropertyLister("3.2.0", "3.0.0", log, prop).print();
log.println(HORIZONTAL_LINE);
log.println();
new DiffPropertyLister("3.2.0", "3.0.1", log, prop).print();
log.println(HORIZONTAL_LINE);
log.println();
new DiffPropertyLister("3.2.0", "3.1.0", log, prop).print();
log.println(HORIZONTAL_LINE);
log.println();
new DiffPropertyLister("3.2.0", "3.1.1", log, prop).print();
log.println(HORIZONTAL_LINE);
}
} finally {
if (log != null) {
log.close();
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile));
}
}
}
static final void generateAge(String directory, String filename) throws IOException {
Default.setUCD();
String newFile = directory + filename + getFileSuffix(true);
@ -1102,7 +1151,7 @@ public class GenerateData implements UCD_Types {
log.println("# For details on the contents of each version, see");
log.println("# http://www.unicode.org/versions/enumeratedversions.html.");
http://www.unicode.org/versions/enumeratedversions.html
// http://www.unicode.org/versions/enumeratedversions.html
log.println(HORIZONTAL_LINE);
log.println();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
* $Date: 2002/03/15 01:57:01 $
* $Revision: 1.3 $
* $Date: 2002/05/29 02:01:00 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
@ -23,9 +23,10 @@ public final class GenerateHanTransliterator {
static final boolean TESTING = false;
static int type;
public static void main() {
public static void main(int typeIn) {
type = typeIn;
Default.setUCD();
try {
type = 0;
System.out.println("Starting");
generate();
} catch (Exception e) {
@ -43,23 +44,27 @@ public final class GenerateHanTransliterator {
String name = "$Han$English";
String key = "kDefinition"; // kMandarin, kKorean, kJapaneseKun, kJapaneseOn
String filter = "kJis0";
String filename = "Han_English";
switch (type) {
default: break;
case 1: name = "$Han$OnRomaji";
key = "kJapaneseOn";
filter = "kJis0";
filename = "Han_Romaji";
break;
case 2: name = "$Han$Pinyin";
key = "kMandarin";
filename = "Han_Pinyin";
filter = null;
break;
}
out = Utility.openPrintWriter("Transliterate_Han_English.txt");
err = Utility.openPrintWriter("Transliterate_Han_English.log.txt");
out = Utility.openPrintWriter("Transliterate_" + filename + ".txt", false, false);
err = Utility.openPrintWriter("Transliterate_" + filename + "_log.txt", false, false);
BufferedReader in = Utility.openUnicodeFile("Unihan", "3.2.0", true);
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true);
int totalCount = 0;
int count = 0;
String oldCode = "";
String oldLine = "";
@ -76,11 +81,15 @@ public final class GenerateHanTransliterator {
if (line == null) break;
if (line.length() < 6) continue;
if (line.charAt(0) == '#') continue;
String code = line.substring(2,6);
int tabPos = line.indexOf(' ');
String code = line.substring(2, tabPos);
/* if (code.compareTo("9FA0") >= 0) {
System.out.println("? " + line);
}*/
if (!code.equals(oldCode)) {
totalCount++;
if (foundKey && foundFilter) {
count++;
/*if (true) { //*/
@ -106,6 +115,8 @@ public final class GenerateHanTransliterator {
}
if (foundKey && foundFilter) printDef(out, oldCode, oldLine, oldStart);
System.out.println("Total: " + totalCount);
System.out.println("Defined Count: " + count);
in.close();
out.close();
err.close();
@ -137,6 +148,7 @@ public final class GenerateHanTransliterator {
String definition = line.substring(start,end);
if (type == 2) definition = handlePinyin(definition, line);
definition.trim();
definition = definition.toLowerCase();
String cp = UTF16.valueOf(Integer.parseInt(code, 16));
String key = (String) definitionMap.get(definition);
if (key == null) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
* $Date: 2002/04/23 01:59:14 $
* $Revision: 1.11 $
* $Date: 2002/05/29 02:01:00 $
* $Revision: 1.12 $
*
*******************************************************************************
*/
@ -65,8 +65,13 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed();
else if (arg.equalsIgnoreCase("verifyNormalizationStability")) VerifyUCD.verifyNormalizationStability();
else if (arg.equalsIgnoreCase("generateHanTransliterator")) GenerateHanTransliterator.main();
else if (arg.equalsIgnoreCase("hanTransliterator")) GenerateHanTransliterator.main(0);
else if (arg.equalsIgnoreCase("romajiTransliterator")) GenerateHanTransliterator.main(1);
else if (arg.equalsIgnoreCase("pinYinTransliterator")) GenerateHanTransliterator.main(2);
else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
else if (arg.equalsIgnoreCase("checkBIDI")) VerifyUCD.checkBIDI();
else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
else if (arg.equalsIgnoreCase("checkCase")) VerifyUCD.checkCase();
@ -182,6 +187,10 @@ public final class Main implements UCD_Types {
} else if (arg.equalsIgnoreCase("DerivedAge")) {
GenerateData.generateAge("DerivedData/", "DerivedAge");
} else if (arg.equalsIgnoreCase("backwardsCompat")) {
GenerateData.backwardsCompat("DerivedData/extracted/", "Compatibility_ID_START",
new int[] {ID_Start, ID_Continue_NO_Cf, Mod_ID_Start, Mod_ID_Continue_NO_Cf});
} else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
GenerateData.generateDerived(DERIVED_CORE, true, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedCoreProperties");

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/PropertyLister.java,v $
* $Date: 2002/03/15 01:57:01 $
* $Revision: 1.8 $
* $Date: 2002/05/29 02:01:00 $
* $Revision: 1.9 $
*
*******************************************************************************
*/
@ -58,10 +58,7 @@ abstract public class PropertyLister implements UCD_Types {
public String optionalComment(int cp) {
if (!usePropertyComment || !breakByCategory) return "";
byte cat = getModCat(cp);
if (cat == FAKELC) return "L&";
if (cat == FAKENC) return "NC";
return ucdData.getCategoryID_fromIndex(cat);
return ucdData.getModCatID_fromIndex(getModCat(cp));
}
public int minPropertyWidth() {
@ -144,6 +141,10 @@ abstract public class PropertyLister implements UCD_Types {
}
return result;
}
byte getModCat(int cp) {
return ucdData.getModCat(cp, breakByCategory ? CASED_LETTER_MASK : 0);
}
/**
@ -168,23 +169,6 @@ abstract public class PropertyLister implements UCD_Types {
return lastSpace;
}
private static final byte FAKERC = 63; // fake category for comparison
private static final byte FAKELC = 63; // fake category for comparison
private static final byte FAKENC = 64; // fake category for comparison
private byte getModCat(int cp) {
byte cat = ucdData.getCategory(cp);
if (cat == UNASSIGNED && ucdData.isNoncharacter(cp)) cat = FAKENC;
else if (breakByCategory) {
if (cat == Lt || cat == Ll || cat == Lu) cat = FAKELC;
} else {
// MASH almost everything together
if (cat != CONTROL && cat != FORMAT && cat != SURROGATE
&& cat != PRIVATE_USE && cat != UNASSIGNED) cat = FAKERC;
}
return cat;
}
public int print() {
set.clear();
int count = 0;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2002/04/23 01:59:14 $
* $Revision: 1.11 $
* $Date: 2002/05/29 02:01:00 $
* $Revision: 1.12 $
*
*******************************************************************************
*/
@ -194,6 +194,79 @@ public final class UCD implements UCD_Types {
public byte getCategory(int codePoint) {
return get(codePoint, false).generalCategory;
}
private static final byte FAKE_SYMBOL = 57; // fake category for comparison
private static final byte FAKE_PUNCTUATION = 58; // fake category for comparison
private static final byte FAKE_SEPERATOR = 59; // fake category for comparison
private static final byte FAKE_NUMBER = 60; // fake category for comparison
private static final byte FAKE_MARK = 61; // fake category for comparison
private static final byte FAKE_LETTER = 62; // fake category for comparison
private static final byte FAKE_OTHER = 63; // fake category for comparison
private static final byte FAKENC = 31; // fake category for comparison
public byte getModCat(int cp, int collapseBits) {
byte cat = getCategory(cp);
if (cat == UNASSIGNED && isNoncharacter(cp)) cat = FAKENC;
if (((1<<cat) & collapseBits) != 0) {
switch (cat) {
case UNASSIGNED: cat = FAKE_OTHER; break;
case FAKENC: cat = FAKE_OTHER; break;
case UPPERCASE_LETTER: cat = FAKE_LETTER; break;
case LOWERCASE_LETTER: cat = FAKE_LETTER; break;
case TITLECASE_LETTER: cat = FAKE_LETTER; break;
case MODIFIER_LETTER: cat = FAKE_LETTER; break;
case OTHER_LETTER: cat = FAKE_LETTER; break;
case NON_SPACING_MARK: cat = FAKE_MARK; break;
case ENCLOSING_MARK: cat = FAKE_MARK; break;
case COMBINING_SPACING_MARK: cat = FAKE_MARK; break;
case DECIMAL_DIGIT_NUMBER: cat = FAKE_NUMBER; break;
case LETTER_NUMBER: cat = FAKE_NUMBER; break;
case OTHER_NUMBER: cat = FAKE_NUMBER; break;
case SPACE_SEPARATOR: cat = FAKE_SEPERATOR; break;
case LINE_SEPARATOR: cat = FAKE_SEPERATOR; break;
case PARAGRAPH_SEPARATOR: cat = FAKE_SEPERATOR; break;
case CONTROL: cat = FAKE_OTHER; break;
case FORMAT: cat = FAKE_OTHER; break;
case UNUSED_CATEGORY: cat = FAKE_OTHER; break;
case PRIVATE_USE: cat = FAKE_OTHER; break;
case SURROGATE: cat = FAKE_OTHER; break;
case DASH_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
case START_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
case END_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
case CONNECTOR_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
case OTHER_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
case INITIAL_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
case FINAL_PUNCTUATION: cat = FAKE_PUNCTUATION; break;
case MATH_SYMBOL: cat = FAKE_SYMBOL; break;
case CURRENCY_SYMBOL: cat = FAKE_SYMBOL; break;
case MODIFIER_SYMBOL: cat = FAKE_SYMBOL; break;
case OTHER_SYMBOL: cat = FAKE_SYMBOL; break;
}
}
return cat;
}
public String getModCatID_fromIndex(byte cat) {
switch (cat) {
case FAKE_SYMBOL: return "S&";
case FAKE_PUNCTUATION: return "P&";
case FAKE_SEPERATOR: return "Z&";
case FAKE_NUMBER: return "N&";
case FAKE_MARK: return "M&";
case FAKE_LETTER: return "L&";
case FAKE_OTHER: return "C&";
case FAKENC: return "NC";
}
return getCategoryID_fromIndex(cat);
}
/**
* Get the main category, as a mask

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
* $Date: 2002/03/22 22:08:53 $
* $Revision: 1.11 $
* $Date: 2002/05/29 02:01:00 $
* $Revision: 1.12 $
*
*******************************************************************************
*/
@ -150,6 +150,7 @@ public interface UCD_Types {
static final int
LETTER_MASK = (1<<Lu) | (1<<Ll) | (1<<Lt) | (1<<Lm) | (1 << Lo),
CASED_LETTER_MASK = (1<<Lu) | (1<<Ll) | (1<<Lt),
MARK_MASK = (1<<Mn) | (1<<Me) | (1<<Mc),
NUMBER_MASK = (1<<Nd) | (1<<Nl) | (1<<No),
SEPARATOR_MASK = (1<<Zs) | (1<<Zl) | (1<<Zp),

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
* $Date: 2002/04/23 01:59:14 $
* $Revision: 1.12 $
* $Date: 2002/05/29 02:01:00 $
* $Revision: 1.13 $
*
*******************************************************************************
*/
@ -284,6 +284,54 @@ public class VerifyUCD implements UCD_Types {
return result;
}
public static void checkBIDI() {
Default.setUCD();
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
Utility.dot(cp);
if (!Default.ucd.isAllocated(cp)) continue;
if (!Default.nfd.normalizationDiffers(cp)) continue;
String decomp = Default.nfd.normalize(cp);
String comp = Default.nfc.normalize(cp);
String source = UTF16.valueOf(cp);
String bidiDecomp = getBidi(decomp, true);
String bidiComp = getBidi(comp, true);
String bidiSource = getBidi(source, true);
if (!bidiDecomp.equals(bidiSource) || !bidiComp.equals(bidiSource)) {
Utility.fixDot();
System.out.println(Default.ucd.getCodeAndName(cp) + ": " + getBidi(source, false));
System.out.println("\tNFC: " + Default.ucd.getCodeAndName(comp) + ": " + getBidi(comp, false));
System.out.println("\tNFD: " + Default.ucd.getCodeAndName(decomp) + ": " + getBidi(decomp, false));
}
}
}
public static String getBidi(String s, boolean compact) {
String result = "";
byte lastBidi = -1;
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i);
byte bidi = Default.ucd.getBidiClass(cp);
if (compact) {
if (bidi == BIDI_NSM) {
if (lastBidi != -1) bidi = lastBidi;
}
if (bidi == lastBidi && bidi != BIDI_ES && bidi != BIDI_CS) {
continue;
}
}
result += Default.ucd.getCase(
Default.ucd.getBidiClassID_fromIndex(bidi, SHORT), FULL, TITLE);
lastBidi = bidi;
}
return result;
}
public static void verify() throws IOException {
Default.setUCD();