mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
misc changes for UnicodeProperty
X-SVN-Rev: 14466
This commit is contained in:
parent
153015d3ec
commit
aa012dfd7c
35 changed files with 1877 additions and 975 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/AbbreviatedUnicodeSetIterator.java,v $
|
||||
* $Date: 2003/03/17 23:00:20 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2004/02/06 18:32:04 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -24,7 +24,7 @@ import com.ibm.text.UCD.Normalizer;
|
|||
import com.ibm.text.UCD.UCD;
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.text.UCD.UnifiedBinaryProperty;
|
||||
import com.ibm.text.UCD.UnicodeProperty;
|
||||
import com.ibm.text.UCD.UCDProperty;
|
||||
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
|
||||
* $Date: 2003/08/21 07:32:52 $
|
||||
* $Revision: 1.22 $
|
||||
* $Date: 2004/02/06 18:32:03 $
|
||||
* $Revision: 1.23 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -24,7 +24,7 @@ import com.ibm.text.UCD.Normalizer;
|
|||
import com.ibm.text.UCD.UCD;
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.text.UCD.UnifiedBinaryProperty;
|
||||
import com.ibm.text.UCD.UnicodeProperty;
|
||||
import com.ibm.text.UCD.UCDProperty;
|
||||
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
@ -1418,7 +1418,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
|
|||
*/
|
||||
private void cleanup() {
|
||||
|
||||
UnicodeProperty ubp = UnifiedBinaryProperty.make(
|
||||
UCDProperty ubp = UnifiedBinaryProperty.make(
|
||||
UCD.BINARY_PROPERTIES + UCD.Logical_Order_Exception, ucd);
|
||||
UnicodeSet desiredSet = ubp.getSet();
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
|
||||
* $Date: 2003/08/22 16:51:21 $
|
||||
* $Revision: 1.17 $
|
||||
* $Date: 2004/02/06 18:32:03 $
|
||||
* $Revision: 1.18 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -175,32 +175,9 @@ public class WriteCharts implements UCD_Types {
|
|||
|
||||
String classname = primaryCount > 1 ? XCLASSNAME[strength] : CLASSNAME[strength];
|
||||
|
||||
String name = Default.ucd.getName(s);
|
||||
String outline = showCell2(sortKey, s, script, classname);
|
||||
|
||||
|
||||
if (s.equals("\u1eaf")) {
|
||||
System.out.println("debug");
|
||||
}
|
||||
|
||||
String comp = Default.nfc.normalize(s);
|
||||
|
||||
String outline = breaker + classname
|
||||
+ " title='"
|
||||
+ (script != UNSUPPORTED
|
||||
? Utility.quoteXML(name, true) + ": "
|
||||
: "")
|
||||
+ UCA.toString(sortKey) + "'>"
|
||||
+ Utility.quoteXML(comp, true)
|
||||
+ "<br><tt>"
|
||||
+ Utility.hex(s)
|
||||
//+ "<br>" + script
|
||||
+ "</tt></td>"
|
||||
+ (script == UNSUPPORTED
|
||||
? "<td class='name'><tt>" + Utility.quoteXML(name, true) + "</td>"
|
||||
: "")
|
||||
;
|
||||
|
||||
output.println(outline);
|
||||
output.println(breaker + outline);
|
||||
++columnCount;
|
||||
}
|
||||
|
||||
|
@ -208,6 +185,46 @@ public class WriteCharts implements UCD_Types {
|
|||
closeIndexFile(indexFile, "<br>UCA: " + uca.getDataVersion(), COLLATION);
|
||||
}
|
||||
|
||||
private static String showCell2(
|
||||
String sortKey,
|
||||
String s,
|
||||
byte script,
|
||||
String classname) {
|
||||
String name = Default.ucd.getName(s);
|
||||
|
||||
|
||||
if (s.equals("\u1eaf")) {
|
||||
System.out.println("debug");
|
||||
}
|
||||
|
||||
String comp = Default.nfc.normalize(s);
|
||||
int cat = Default.ucd.getCategory(UTF16.charAt(comp,0));
|
||||
if (cat == Mn || cat == Mc || cat == Me) {
|
||||
comp = '\u25CC' + comp;
|
||||
if (s.equals("\u0300")) {
|
||||
System.out.println(Default.ucd.getCodeAndName(comp));
|
||||
}
|
||||
}
|
||||
// TODO: merge with showCell
|
||||
|
||||
String outline = classname
|
||||
+ " title='"
|
||||
+ (script != UNSUPPORTED
|
||||
? Utility.quoteXML(name, true) + ": "
|
||||
: "")
|
||||
+ UCA.toString(sortKey) + "'>"
|
||||
+ Utility.quoteXML(comp, true)
|
||||
+ "<br><tt>"
|
||||
+ Utility.hex(s)
|
||||
//+ "<br>" + script
|
||||
+ "</tt></td>"
|
||||
+ (script == UNSUPPORTED
|
||||
? "<td class='name'><tt>" + Utility.quoteXML(name, true) + "</td>"
|
||||
: "")
|
||||
;
|
||||
return outline;
|
||||
}
|
||||
|
||||
static public void normalizationChart() throws IOException {
|
||||
Default.setUCD();
|
||||
HACK_KANA = false;
|
||||
|
@ -642,9 +659,20 @@ public class WriteCharts implements UCD_Types {
|
|||
closeIndexFile(indexFile, "", CASE);
|
||||
}
|
||||
|
||||
static void showCell(PrintWriter output, String s, String prefix, String extra, boolean skipName) {
|
||||
static void showCell(PrintWriter output, String s,
|
||||
String prefix, String extra, boolean skipName) {
|
||||
if (s.equals("\u0300")) {
|
||||
System.out.println();
|
||||
}
|
||||
String name = Default.ucd.getName(s);
|
||||
String comp = Default.nfc.normalize(s);
|
||||
int cat = Default.ucd.getCategory(UTF16.charAt(comp,0));
|
||||
if (cat == Mn || cat == Mc || cat == Me) {
|
||||
comp = '\u25CC' + comp;
|
||||
if (s.equals("\u0300")) {
|
||||
System.out.println(Default.ucd.getCodeAndName(comp));
|
||||
}
|
||||
}
|
||||
|
||||
String outline = prefix
|
||||
+ (skipName ? "" : " title='" + Utility.quoteXML(name, true) + "'")
|
||||
|
|
218
tools/unicodetools/com/ibm/text/UCD/CheckICU.java
Normal file
218
tools/unicodetools/com/ibm/text/UCD/CheckICU.java
Normal file
|
@ -0,0 +1,218 @@
|
|||
package com.ibm.text.UCD;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.dev.test.util.ICUPropertyFactory;
|
||||
import com.ibm.icu.lang.UProperty;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.text.utility.Utility;
|
||||
|
||||
public class CheckICU {
|
||||
static final BagFormatter bf = new BagFormatter();
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
System.out.println("Start");
|
||||
test();
|
||||
System.out.println("End");
|
||||
}
|
||||
|
||||
static UnicodeSet itemFailures;
|
||||
static ICUPropertyFactory icuFactory;
|
||||
static ToolUnicodePropertySource toolFactory;
|
||||
|
||||
public static void test() throws IOException {
|
||||
checkUCD();
|
||||
itemFailures = new UnicodeSet();
|
||||
icuFactory = ICUPropertyFactory.make();
|
||||
toolFactory = ToolUnicodePropertySource.make("4.0.0");
|
||||
|
||||
String[] quickList = {
|
||||
"Name",
|
||||
// "Script", "Bidi_Mirroring_Glyph", "Case_Folding",
|
||||
//"Numeric_Value"
|
||||
};
|
||||
for (int i = 0; i < quickList.length; ++i) {
|
||||
testProperty(quickList[i], -1);
|
||||
}
|
||||
if (quickList.length > 0) return;
|
||||
|
||||
Collection availableTool = toolFactory.getAvailablePropertyAliases(new TreeSet());
|
||||
|
||||
Collection availableICU = icuFactory.getAvailablePropertyAliases(new TreeSet());
|
||||
System.out.println(showDifferences("Property Aliases", "ICU", availableICU, "Tool", availableTool));
|
||||
Collection common = new TreeSet(availableICU);
|
||||
common.retainAll(availableTool);
|
||||
|
||||
for (int j = UnicodeProperty.BINARY; j < UnicodeProperty.LIMIT_TYPE; ++j) {
|
||||
System.out.println();
|
||||
System.out.println(UnicodeProperty.getTypeName(j));
|
||||
Iterator it = common.iterator();
|
||||
while (it.hasNext()) {
|
||||
String prop = (String)it.next();
|
||||
testProperty(prop, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void checkUCD() throws IOException {
|
||||
UCD myUCD = UCD.make("4.0.0");
|
||||
Normalizer nfc = new Normalizer(Normalizer.NFC, "4.0.0");
|
||||
UnicodeSet leading = new UnicodeSet();
|
||||
UnicodeSet trailing = new UnicodeSet();
|
||||
UnicodeSet starter = new UnicodeSet();
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (myUCD.getCombiningClass(i) == 0) starter.add(i);
|
||||
if (nfc.isTrailing(i)) trailing.add(i);
|
||||
if (nfc.isLeading(i)) leading.add(i);
|
||||
}
|
||||
PrintWriter pw = bf.openUTF8Writer(UCD_Types.GEN_DIR, "Trailing.txt");
|
||||
bf.showSetNames(pw, "+Trailing+Starter", new UnicodeSet(trailing).retainAll(starter));
|
||||
bf.showSetNames(pw, "+Trailing-Starter", new UnicodeSet(trailing).removeAll(starter));
|
||||
bf.showSetNames(pw, "-Trailing-Starter", new UnicodeSet(trailing).complement().removeAll(starter));
|
||||
bf.showSetNames(pw, "+Trailing+Leading", new UnicodeSet(trailing).retainAll(leading));
|
||||
bf.showSetNames(pw, "+Trailing-Leading", new UnicodeSet(trailing).removeAll(leading));
|
||||
pw.close();
|
||||
}
|
||||
/*
|
||||
* int icuType;
|
||||
int toolType;
|
||||
Collection icuAliases;
|
||||
Collection toolAliases;
|
||||
String firstDiffICU;
|
||||
String firstDiffTool;
|
||||
String firstDiffCP;
|
||||
String icuProp;
|
||||
String toolProp;
|
||||
|
||||
*/
|
||||
|
||||
private static void testProperty(String prop, int typeFilter) {
|
||||
UnicodeProperty icuProp = icuFactory.getProperty(prop);
|
||||
int icuType = icuProp.getPropertyType();
|
||||
|
||||
if (typeFilter >= 0 && icuType != typeFilter) return;
|
||||
|
||||
System.out.println();
|
||||
System.out.println("Testing: " + prop);
|
||||
UnicodeProperty toolProp = toolFactory.getProperty(prop);
|
||||
|
||||
int toolType = toolProp.getPropertyType();
|
||||
if (icuType != toolType) {
|
||||
System.out.println("FAILURE Type: ICU: " + UnicodeProperty.getTypeName(icuType)
|
||||
+ "\tTool: " + UnicodeProperty.getTypeName(toolType));
|
||||
}
|
||||
|
||||
Collection icuAliases = icuProp.getPropertyAliases(new ArrayList());
|
||||
Collection toolAliases = toolProp.getPropertyAliases(new ArrayList());
|
||||
System.out.println(showDifferences("Aliases", "ICU", icuAliases, "Tool", toolAliases));
|
||||
|
||||
icuAliases = icuProp.getAvailablePropertyValueAliases(new ArrayList());
|
||||
toolAliases = toolProp.getAvailablePropertyValueAliases(new ArrayList());
|
||||
System.out.println(showDifferences("Value Aliases", "ICU", icuAliases, "Tool", toolAliases));
|
||||
|
||||
// TODO do property value aliases
|
||||
itemFailures.clear();
|
||||
String firstDiffICU = null, firstDiffTool = null, firstDiffCP = null;
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
/*if (i == 0x0237) {
|
||||
System.out.println();
|
||||
}
|
||||
*/
|
||||
String icuValue = icuProp.getPropertyValue(i);
|
||||
String toolValue = toolProp.getPropertyValue(i);
|
||||
if (!equals(icuValue, toolValue)) {
|
||||
itemFailures.add(i);
|
||||
if (firstDiffCP == null) {
|
||||
firstDiffICU = icuValue;
|
||||
firstDiffTool = toolValue;
|
||||
firstDiffCP = Utility.hex(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (itemFailures.size() != 0) {
|
||||
System.out.println("FAILURE " + itemFailures.size() + " Differences: ");
|
||||
System.out.println(itemFailures.toPattern(true));
|
||||
if (firstDiffICU != null) firstDiffICU = bf.hex.transliterate(firstDiffICU);
|
||||
if (firstDiffTool != null) firstDiffTool = bf.hex.transliterate(firstDiffTool);
|
||||
System.out.println(firstDiffCP
|
||||
+ "\tICU: <" + firstDiffICU
|
||||
+ ">\tTool: <" + firstDiffTool + ">");
|
||||
}
|
||||
System.out.println("done");
|
||||
|
||||
// do values later, and their aliases
|
||||
/*
|
||||
System.out.println("-Values");
|
||||
UnicodeSet
|
||||
System.out.println(showDifferences("ICU", availableICU, "Tool", availableTool));
|
||||
*/
|
||||
}
|
||||
|
||||
static boolean equals(Object a, Object b) {
|
||||
if (a == null) return b == null;
|
||||
return a.equals(b);
|
||||
}
|
||||
|
||||
static public String showDifferences(
|
||||
String title,
|
||||
String name1,
|
||||
Collection set1,
|
||||
String name2,
|
||||
Collection set2) {
|
||||
|
||||
Collection temp = new TreeSet(set1);
|
||||
temp.retainAll(set2);
|
||||
|
||||
if (set1.size() == temp.size()) {
|
||||
return title + ": " + name1 + " == " + name2 + ": " + bf.join(set1);
|
||||
}
|
||||
|
||||
StringBuffer result = new StringBuffer();
|
||||
result.append(title + "\tFAILURE\r\n");
|
||||
result.append("\t" + name1 + " = " + bf.join(set1) + "\r\n");
|
||||
result.append("\t" + name2 + " = " + bf.join(set2) + "\r\n");
|
||||
|
||||
// damn'd collection doesn't have a clone, so
|
||||
// we go with Set, even though that
|
||||
// may not preserve order and duplicates
|
||||
if (temp.size() != 0) {
|
||||
result.append("\t" + name2 + " & " + name1 + ":\r\n");
|
||||
result.append("\t" + bf.join(temp));
|
||||
result.append("\r\n");
|
||||
}
|
||||
|
||||
|
||||
temp.clear();
|
||||
temp.addAll(set1);
|
||||
temp.removeAll(set2);
|
||||
if (temp.size() != 0) {
|
||||
result.append("\t" + name1 + " - " + name2 + ":\r\n");
|
||||
result.append("\t" + bf.join(temp));
|
||||
result.append("\r\n");
|
||||
}
|
||||
|
||||
temp.clear();
|
||||
temp.addAll(set2);
|
||||
temp.removeAll(set1);
|
||||
if (temp.size() != 0) {
|
||||
result.append("\t" + name2 + " - " + name1 + ":\r\n");
|
||||
result.append("\t" + bf.join(temp));
|
||||
result.append("\r\n");
|
||||
}
|
||||
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
|
||||
}
|
81
tools/unicodetools/com/ibm/text/UCD/ChineseFrequency.java
Normal file
81
tools/unicodetools/com/ibm/text/UCD/ChineseFrequency.java
Normal file
|
@ -0,0 +1,81 @@
|
|||
package com.ibm.text.UCD;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.text.DecimalFormat;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.text.utility.Pair;
|
||||
import com.ibm.text.utility.Utility;
|
||||
|
||||
public class ChineseFrequency {
|
||||
static final String DICT_DIR = "C:\\DATA\\dict\\";
|
||||
static NumberFormat percent = new DecimalFormat("0.000000%");
|
||||
static NumberFormat percent3 = new DecimalFormat("000.000000%");
|
||||
static NumberFormat number = new DecimalFormat("#,##0");
|
||||
|
||||
static class InverseCompareTo implements Comparator {
|
||||
public int compare(Object o1, Object o2) {
|
||||
return -((Comparable)o1).compareTo(o2);
|
||||
}
|
||||
}
|
||||
|
||||
public static void test() throws IOException{
|
||||
Set freq_char = new TreeSet(new InverseCompareTo());
|
||||
BufferedReader br = BagFormatter.openUTF8Reader(DICT_DIR, "kHYPLCDPF.txt");
|
||||
double grandTotal = 0.0;
|
||||
while (true) {
|
||||
String line = br.readLine();
|
||||
if (line == null) break;
|
||||
String[] pieces = Utility.split(line,'\t');
|
||||
int cp = Integer.parseInt(pieces[0],16);
|
||||
String[] says = Utility.split(pieces[1],',');
|
||||
long total = 0;
|
||||
for (int i = 0; i < says.length; ++i) {
|
||||
int start = says[i].indexOf('(');
|
||||
int end = says[i].indexOf(')');
|
||||
long count = Long.parseLong(says[i].substring(start+1, end));
|
||||
total += count;
|
||||
}
|
||||
grandTotal += total;
|
||||
freq_char.add(new Pair(new Long(total), new Integer(cp)));
|
||||
}
|
||||
br.close();
|
||||
PrintWriter pw = BagFormatter.openUTF8Writer(DICT_DIR,"kHYPLCDPF_frequency.txt");
|
||||
pw.write("\uFEFF");
|
||||
pw.println("No.\tPercentage\tAccummulated\tHex\tChar");
|
||||
|
||||
Iterator it = freq_char.iterator();
|
||||
int counter = 0;
|
||||
double cummulative = 0;
|
||||
double cummulativePercentage = 0;
|
||||
while (it.hasNext()) {
|
||||
Pair item = (Pair)it.next();
|
||||
Long total = (Long) item.first;
|
||||
Integer cp = (Integer) item.second;
|
||||
double current = total.longValue();
|
||||
cummulative += current;
|
||||
double percentage = current / grandTotal;
|
||||
cummulativePercentage += percentage;
|
||||
pw.println(
|
||||
++counter
|
||||
//+ "\t" + number.format(current)
|
||||
//+ "\t" + number.format(cummulative)
|
||||
+ "\t" + percent.format(percentage)
|
||||
+ "\t" + percent3.format(cummulativePercentage)
|
||||
+ "\t" + Integer.toHexString(cp.intValue()).toUpperCase()
|
||||
+ "\t" + UTF16.valueOf(cp.intValue()));
|
||||
}
|
||||
//pw.println("Grand total: " + (long)grandTotal);
|
||||
pw.close();
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CompareProperties.java,v $
|
||||
* $Date: 2003/07/21 15:50:07 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2004/02/06 18:30:23 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -89,7 +89,7 @@ public class CompareProperties implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
public final class UnicodeSetComparator implements Comparator {
|
||||
public final static class UnicodeSetComparator implements Comparator {
|
||||
/**
|
||||
* Compares two UnicodeSets, producing a transitive ordering.
|
||||
* @return -1 if first is smaller (in size) than second,
|
||||
|
@ -121,7 +121,7 @@ public class CompareProperties implements UCD_Types {
|
|||
|
||||
boolean isPartitioned = false;
|
||||
|
||||
UnicodeProperty[] props = new UnicodeProperty[500];
|
||||
UCDProperty[] props = new UCDProperty[500];
|
||||
UnicodeSet[] sets = new UnicodeSet[500];
|
||||
int count = 0;
|
||||
BitSet[] disjoints = new BitSet[500];
|
||||
|
@ -147,7 +147,7 @@ public class CompareProperties implements UCD_Types {
|
|||
if (!Default.ucd.isAllocated(cp)) continue;
|
||||
|
||||
for (int i = 0; i < count; ++i) {
|
||||
UnicodeProperty up = props[i];
|
||||
UCDProperty up = props[i];
|
||||
boolean iProp = up.hasValue(cp);
|
||||
if (iProp) {
|
||||
probe.set(i);
|
||||
|
@ -177,7 +177,7 @@ public class CompareProperties implements UCD_Types {
|
|||
if (i == 0x0900) {
|
||||
System.out.println("debug");
|
||||
}
|
||||
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
if (up == null) continue;
|
||||
if (up.getValueType() < BINARY_PROP) {
|
||||
System.out.println("\tSkipping " + up.getName() + "; value varies");
|
||||
|
@ -378,7 +378,7 @@ public class CompareProperties implements UCD_Types {
|
|||
return getPropName(props[propertyIndex]);
|
||||
}
|
||||
|
||||
private String getPropName(UnicodeProperty ubp) {
|
||||
private String getPropName(UCDProperty ubp) {
|
||||
return Utility.getUnskeleton(ubp.getFullName(LONG), true);
|
||||
}
|
||||
|
||||
|
@ -395,7 +395,7 @@ public class CompareProperties implements UCD_Types {
|
|||
for (int i = 1; i < UCD_Types.LIMIT_ENUM; ++i) {
|
||||
int iType = i & 0xFF00;
|
||||
if (iType == UCD_Types.JOINING_GROUP || iType == UCD_Types.AGE || iType == UCD_Types.COMBINING_CLASS || iType == UCD_Types.SCRIPT) continue;
|
||||
UnicodeProperty upi = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
UCDProperty upi = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
if (upi == null) continue;
|
||||
if (!upi.isStandard()) {
|
||||
System.out.println("Skipping " + upi.getName() + "; not standard");
|
||||
|
@ -419,7 +419,7 @@ public class CompareProperties implements UCD_Types {
|
|||
int jType = j & 0xFF00;
|
||||
if (jType == UCD_Types.JOINING_GROUP || jType == UCD_Types.AGE || jType == UCD_Types.COMBINING_CLASS || jType == UCD_Types.SCRIPT
|
||||
|| (jType == iType && jType != UCD_Types.BINARY_PROPERTIES)) continue;
|
||||
UnicodeProperty upj = UnifiedBinaryProperty.make(j, Default.ucd);
|
||||
UCDProperty upj = UnifiedBinaryProperty.make(j, Default.ucd);
|
||||
if (upj == null) continue;
|
||||
if (!upj.isStandard()) continue;
|
||||
if (upj.getValueType() < UCD_Types.BINARY_PROP) continue;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
|
||||
* $Date: 2003/07/21 15:50:06 $
|
||||
* $Revision: 1.12 $
|
||||
* $Date: 2004/02/06 18:30:23 $
|
||||
* $Revision: 1.13 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -27,12 +27,14 @@ import java.io.*;
|
|||
public final class ConvertUCD implements UCD_Types {
|
||||
public static final boolean SHOW = false;
|
||||
public static final boolean DEBUG = false;
|
||||
static final boolean SHOW_SAMPLE = false;
|
||||
|
||||
public static int major;
|
||||
public static int minor;
|
||||
public static int update;
|
||||
|
||||
static String version;
|
||||
int major;
|
||||
int minor;
|
||||
int update;
|
||||
|
||||
String version;
|
||||
|
||||
// varies by version
|
||||
/*
|
||||
|
@ -79,6 +81,47 @@ public final class ConvertUCD implements UCD_Types {
|
|||
/*
|
||||
//*/
|
||||
};
|
||||
static HashMap isHex = new HashMap();
|
||||
static HashMap defaults = new HashMap();
|
||||
|
||||
static {
|
||||
for (int j = 0; j < labelList.length; ++j) {
|
||||
String[] labels = labelList[j];
|
||||
|
||||
for (int i = 1; i < labels.length; ++i) {
|
||||
boolean hex = false;
|
||||
String def = null;
|
||||
//char appendChar = '\u0000';
|
||||
|
||||
// pull off "*": hex interpretation
|
||||
if (labels[i].charAt(0) == '*') { // HEX value
|
||||
hex = true;
|
||||
labels[i] = labels[i].substring(1);
|
||||
}
|
||||
|
||||
/*
|
||||
// pull off "$": append duplicates
|
||||
if (labels[i].charAt(0) == '$') { // HEX value
|
||||
appendChar = labels[i].charAt(1);
|
||||
labels[i] = labels[i].substring(2);
|
||||
}
|
||||
|
||||
// pull off default values
|
||||
int pos = labels[i].indexOf('-');
|
||||
if (pos >= 0) {
|
||||
def = labels[i].substring(pos+1);
|
||||
labels[i] = labels[i].substring(0,pos);
|
||||
}
|
||||
*/
|
||||
// store results
|
||||
// we do this after all processing, so that the label is clean!!
|
||||
|
||||
if (hex) isHex.put(labels[i], "");
|
||||
//if (appendChar != 0) appendDuplicates.put(labels[i], String.valueOf(appendChar));
|
||||
defaults.put(labels[i], def);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
static String[][] labelList31 = {
|
||||
// Labels for the incoming files. Labels MUST match field order in file.
|
||||
|
@ -212,15 +255,10 @@ public final class ConvertUCD implements UCD_Types {
|
|||
|
||||
try {
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
version = args[i];
|
||||
String version = args[i];
|
||||
if (version.length() == 0) version = UCD.latestVersion;
|
||||
String[] parts = new String[3];
|
||||
Utility.split(version, '.', parts);
|
||||
major = Integer.parseInt(parts[0]);
|
||||
minor = Integer.parseInt(parts[1]);
|
||||
update = Integer.parseInt(parts[2]);
|
||||
|
||||
toJava();
|
||||
new ConvertUCD().toJava(version);
|
||||
}
|
||||
} finally {
|
||||
log.close();
|
||||
|
@ -242,7 +280,13 @@ public final class ConvertUCD implements UCD_Types {
|
|||
}
|
||||
*/
|
||||
|
||||
static void toJava() throws Exception {
|
||||
void toJava(String version) throws Exception {
|
||||
this.version = version;
|
||||
String[] parts = new String[3];
|
||||
Utility.split(version, '.', parts);
|
||||
major = Integer.parseInt(parts[0]);
|
||||
minor = Integer.parseInt(parts[1]);
|
||||
update = Integer.parseInt(parts[2]);
|
||||
System.out.println("Building " + version);
|
||||
// Blocks is special
|
||||
// Unihan is special
|
||||
|
@ -264,10 +308,13 @@ public final class ConvertUCD implements UCD_Types {
|
|||
UData ud;
|
||||
ud = getEntry(0x5e);
|
||||
System.out.println("SPOT-CHECK: 5e: " + ud);
|
||||
|
||||
|
||||
ud = getEntry(0x130);
|
||||
System.out.println("SPOT-CHECK: 130: " + ud);
|
||||
|
||||
ud = getEntry(0x1f6);
|
||||
System.out.println("SPOT-CHECK: 1f6: " + ud);
|
||||
|
||||
ud = getEntry(0x2A6D6);
|
||||
System.out.println("SPOT-CHECK: 2A6D6: " + ud);
|
||||
|
||||
|
@ -285,51 +332,10 @@ public final class ConvertUCD implements UCD_Types {
|
|||
* "OMIT" is special -- means don't record
|
||||
*/
|
||||
|
||||
static HashMap isHex = new HashMap();
|
||||
static HashMap defaults = new HashMap();
|
||||
|
||||
static {
|
||||
for (int j = 0; j < labelList.length; ++j) {
|
||||
String[] labels = labelList[j];
|
||||
List blockData = new LinkedList();
|
||||
|
||||
for (int i = 1; i < labels.length; ++i) {
|
||||
boolean hex = false;
|
||||
String def = null;
|
||||
//char appendChar = '\u0000';
|
||||
|
||||
// pull off "*": hex interpretation
|
||||
if (labels[i].charAt(0) == '*') { // HEX value
|
||||
hex = true;
|
||||
labels[i] = labels[i].substring(1);
|
||||
}
|
||||
|
||||
/*
|
||||
// pull off "$": append duplicates
|
||||
if (labels[i].charAt(0) == '$') { // HEX value
|
||||
appendChar = labels[i].charAt(1);
|
||||
labels[i] = labels[i].substring(2);
|
||||
}
|
||||
|
||||
// pull off default values
|
||||
int pos = labels[i].indexOf('-');
|
||||
if (pos >= 0) {
|
||||
def = labels[i].substring(pos+1);
|
||||
labels[i] = labels[i].substring(0,pos);
|
||||
}
|
||||
*/
|
||||
// store results
|
||||
// we do this after all processing, so that the label is clean!!
|
||||
|
||||
if (hex) isHex.put(labels[i], "");
|
||||
//if (appendChar != 0) appendDuplicates.put(labels[i], String.valueOf(appendChar));
|
||||
defaults.put(labels[i], def);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static List blockData = new LinkedList();
|
||||
|
||||
static void readBlocks() throws Exception {
|
||||
void readBlocks() throws Exception {
|
||||
System.out.println("Reading 'Blocks'");
|
||||
BufferedReader input = Utility.openUnicodeFile(blocksname, version, true, Utility.LATIN1);
|
||||
String line = "";
|
||||
|
@ -363,9 +369,9 @@ public final class ConvertUCD implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
static Set properties = new TreeSet();
|
||||
Set properties = new TreeSet();
|
||||
|
||||
static void readSemi(String[] labels) throws Exception {
|
||||
void readSemi(String[] labels) throws Exception {
|
||||
System.out.println();
|
||||
System.out.println("Reading '" + labels[0] + "'");
|
||||
if (major < 3 || (major == 3 && minor < 1)) {
|
||||
|
@ -554,8 +560,9 @@ public final class ConvertUCD implements UCD_Types {
|
|||
System.out.println(";");
|
||||
}
|
||||
|
||||
static Map charData = new TreeMap();
|
||||
Map charData = new TreeMap();
|
||||
|
||||
/*
|
||||
static void writeXML() throws IOException {
|
||||
System.out.println("Writing 'UCD-Main.xml'");
|
||||
BufferedWriter output = new BufferedWriter(
|
||||
|
@ -604,7 +611,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
String value = Utility.quoteXML((String) data.get(label));
|
||||
output.write(" " + label + "='" + value + "'");
|
||||
}
|
||||
*/
|
||||
*//*
|
||||
output.write("/>\r\n");
|
||||
}
|
||||
|
||||
|
@ -615,8 +622,9 @@ public final class ConvertUCD implements UCD_Types {
|
|||
output.close();
|
||||
}
|
||||
}
|
||||
|
||||
static void writeJavaData() throws IOException {
|
||||
*/
|
||||
|
||||
void writeJavaData() throws IOException {
|
||||
Iterator it = charData.keySet().iterator();
|
||||
int codePoint = -1;
|
||||
System.out.println("Writing " + dataFilePrefix + version);
|
||||
|
@ -665,13 +673,13 @@ public final class ConvertUCD implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
static String[] xsSplit = new String[40];
|
||||
//static String[] xsSplit = new String[40];
|
||||
|
||||
// Cache a little bit for speed
|
||||
static int getEntryCodePoint = -1;
|
||||
static UData getEntryUData = null;
|
||||
int getEntryCodePoint = -1;
|
||||
UData getEntryUData = null;
|
||||
|
||||
static UData getEntryIfExists(int cp) {
|
||||
UData getEntryIfExists(int cp) {
|
||||
if (cp == getEntryCodePoint) return getEntryUData;
|
||||
Integer cc = new Integer(cp);
|
||||
UData charEntry = (UData) charData.get(cc);
|
||||
|
@ -683,7 +691,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
|
||||
/* Get entry in table for cc
|
||||
*/
|
||||
static UData getEntry(int cp) {
|
||||
UData getEntry(int cp) {
|
||||
if (cp == getEntryCodePoint) return getEntryUData;
|
||||
Integer cc = new Integer(cp);
|
||||
UData charEntry = (UData) charData.get(cc);
|
||||
|
@ -699,12 +707,12 @@ public final class ConvertUCD implements UCD_Types {
|
|||
/** Adds the character data. Signals duplicates with an exception
|
||||
*/
|
||||
|
||||
static void setBinaryProperty(int cp, int binProp) {
|
||||
void setBinaryProperty(int cp, int binProp) {
|
||||
UData charEntry = getEntry(cp);
|
||||
charEntry.binaryProperties |= (1L << binProp);
|
||||
}
|
||||
|
||||
static void appendCharProperties(int cp, String key) {
|
||||
void appendCharProperties(int cp, String key) {
|
||||
int ind;
|
||||
//if (true || NEWPROPS) {
|
||||
ind = Utility.lookup(key, UCD_Names.BP, true);
|
||||
|
@ -716,14 +724,12 @@ public final class ConvertUCD implements UCD_Types {
|
|||
setBinaryProperty(cp, ind);
|
||||
}
|
||||
|
||||
static Set jtSet = new TreeSet();
|
||||
static Set jgSet = new TreeSet();
|
||||
Set jtSet = new TreeSet();
|
||||
Set jgSet = new TreeSet();
|
||||
|
||||
static final boolean SHOW_SAMPLE = false;
|
||||
|
||||
/** Adds the character data. Signals duplicates with an exception
|
||||
*/
|
||||
static void addCharData(int cp, String key, String value) {
|
||||
void addCharData(int cp, String key, String value) {
|
||||
//if (cp < 10) System.out.println("A: " + Utility.hex(cp) + ", " + key + ", " + Utility.quoteJavaString(value));
|
||||
UData charEntry = getEntry(cp);
|
||||
//if (cp < 10) System.out.println(" " + charEntry);
|
||||
|
@ -794,7 +800,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
|
||||
}
|
||||
|
||||
static public void setField(UData uData, String fieldName, String fieldValue) {
|
||||
public void setField(UData uData, String fieldName, String fieldValue) {
|
||||
try {
|
||||
if (fieldName.equals("n")) {
|
||||
uData.name = fieldValue;
|
||||
|
|
|
@ -8,7 +8,7 @@ import java.util.TimeZone;
|
|||
|
||||
public final class Default implements UCD_Types {
|
||||
|
||||
public static String ucdVersion = UCD.latestVersion;
|
||||
private static String ucdVersion = UCD.latestVersion;
|
||||
public static UCD ucd;
|
||||
public static Normalizer nfc;
|
||||
public static Normalizer nfd;
|
||||
|
@ -21,16 +21,16 @@ public final class Default implements UCD_Types {
|
|||
}
|
||||
|
||||
public static void setUCD(String version) {
|
||||
ucdVersion = version;
|
||||
setUcdVersion(version);
|
||||
setUCD();
|
||||
}
|
||||
|
||||
public static void setUCD() {
|
||||
ucd = UCD.make(ucdVersion);
|
||||
nfd = nf[NFD] = new Normalizer(Normalizer.NFD, ucdVersion);
|
||||
nfc = nf[NFC] = new Normalizer(Normalizer.NFC, ucdVersion);
|
||||
nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, ucdVersion);
|
||||
nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, ucdVersion);
|
||||
ucd = UCD.make(getUcdVersion());
|
||||
nfd = nf[NFD] = new Normalizer(Normalizer.NFD, getUcdVersion());
|
||||
nfc = nf[NFC] = new Normalizer(Normalizer.NFC, getUcdVersion());
|
||||
nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, getUcdVersion());
|
||||
nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, getUcdVersion());
|
||||
System.out.println("Loaded UCD" + ucd.getVersion() + " " + (new Date(ucd.getDate())));
|
||||
}
|
||||
|
||||
|
@ -43,4 +43,12 @@ public final class Default implements UCD_Types {
|
|||
return myDateFormat.format(new Date());
|
||||
}
|
||||
|
||||
public static void setUcdVersion(String ucdVersion) {
|
||||
Default.ucdVersion = ucdVersion;
|
||||
}
|
||||
|
||||
public static String getUcdVersion() {
|
||||
return ucdVersion;
|
||||
}
|
||||
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
|
||||
* $Date: 2003/07/21 15:50:06 $
|
||||
* $Revision: 1.22 $
|
||||
* $Date: 2004/02/06 18:30:22 $
|
||||
* $Revision: 1.23 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -31,11 +31,11 @@ public final class DerivedProperty implements UCD_Types {
|
|||
|
||||
// ADD CONSTANT to UCD_TYPES
|
||||
|
||||
static public UnicodeProperty make(int derivedPropertyID) {
|
||||
static public UCDProperty make(int derivedPropertyID) {
|
||||
return make(derivedPropertyID, Default.ucd);
|
||||
}
|
||||
|
||||
static public UnicodeProperty make(int derivedPropertyID, UCD ucd) {
|
||||
static public UCDProperty make(int derivedPropertyID, UCD ucd) {
|
||||
if (derivedPropertyID < 0 || derivedPropertyID >= DERIVED_PROPERTY_LIMIT) return null;
|
||||
DerivedProperty dp = getCached(ucd);
|
||||
return dp.dprops[derivedPropertyID];
|
||||
|
@ -96,14 +96,14 @@ public final class DerivedProperty implements UCD_Types {
|
|||
return dprops[propNumber].getValue(int cp);
|
||||
}
|
||||
*/
|
||||
private UnicodeProperty[] dprops = new UnicodeProperty[50];
|
||||
private UCDProperty[] dprops = new UCDProperty[50];
|
||||
|
||||
static final String[] CaseNames = {
|
||||
"Uppercase",
|
||||
"Lowercase",
|
||||
"Mixedcase"};
|
||||
|
||||
class ExDProp extends UnicodeProperty {
|
||||
class ExDProp extends UCDProperty {
|
||||
Normalizer nfx;
|
||||
ExDProp(int i) {
|
||||
type = DERIVED_NORMALIZATION;
|
||||
|
@ -124,7 +124,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
}
|
||||
};
|
||||
|
||||
class NF_UnsafeStartProp extends UnicodeProperty {
|
||||
class NF_UnsafeStartProp extends UCDProperty {
|
||||
Normalizer nfx;
|
||||
//int prop;
|
||||
|
||||
|
@ -180,7 +180,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
*/
|
||||
|
||||
|
||||
class NFC_Prop extends UnicodeProperty {
|
||||
class NFC_Prop extends UCDProperty {
|
||||
BitSet bitset;
|
||||
boolean filter = false;
|
||||
boolean keepNonZero = true;
|
||||
|
@ -224,7 +224,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
};
|
||||
};
|
||||
|
||||
class GenDProp extends UnicodeProperty {
|
||||
class GenDProp extends UCDProperty {
|
||||
Normalizer nfx;
|
||||
Normalizer nfComp = null;
|
||||
|
||||
|
@ -281,7 +281,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
public boolean hasValue(int cp) { return getValue(cp).length() != 0; }
|
||||
};
|
||||
|
||||
class CaseDProp extends UnicodeProperty {
|
||||
class CaseDProp extends UCDProperty {
|
||||
byte val;
|
||||
CaseDProp (int i) {
|
||||
type = DERIVED_CORE;
|
||||
|
@ -301,7 +301,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
}
|
||||
};
|
||||
|
||||
class QuickDProp extends UnicodeProperty {
|
||||
class QuickDProp extends UCDProperty {
|
||||
String NO;
|
||||
String MAYBE;
|
||||
Normalizer nfx;
|
||||
|
@ -357,7 +357,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
dprops[i] = new NF_UnsafeStartProp(i-NFD_UnsafeStart);
|
||||
}
|
||||
|
||||
dprops[ID_Start] = new UnicodeProperty() {
|
||||
dprops[ID_Start] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "ID_Start";
|
||||
|
@ -371,7 +371,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
}
|
||||
};
|
||||
|
||||
dprops[ID_Continue_NO_Cf] = new UnicodeProperty() {
|
||||
dprops[ID_Continue_NO_Cf] = new UCDProperty() {
|
||||
{
|
||||
name = "ID_Continue";
|
||||
type = DERIVED_CORE;
|
||||
|
@ -441,7 +441,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
if (status != 0) XID_Continue_Set.add(cp);
|
||||
}
|
||||
|
||||
dprops[Mod_ID_Start] = new UnicodeProperty() {
|
||||
dprops[Mod_ID_Start] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "XID_Start";
|
||||
|
@ -457,7 +457,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
}
|
||||
};
|
||||
|
||||
dprops[Mod_ID_Continue_NO_Cf] = new UnicodeProperty() {
|
||||
dprops[Mod_ID_Continue_NO_Cf] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "XID_Continue";
|
||||
|
@ -474,7 +474,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
}
|
||||
};
|
||||
|
||||
dprops[PropMath] = new UnicodeProperty() {
|
||||
dprops[PropMath] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "Math";
|
||||
|
@ -490,7 +490,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
}
|
||||
};
|
||||
|
||||
dprops[PropAlphabetic] = new UnicodeProperty() {
|
||||
dprops[PropAlphabetic] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "Alphabetic";
|
||||
|
@ -506,7 +506,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
}
|
||||
};
|
||||
|
||||
dprops[PropLowercase] = new UnicodeProperty() {
|
||||
dprops[PropLowercase] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "Lowercase";
|
||||
|
@ -522,7 +522,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
}
|
||||
};
|
||||
|
||||
dprops[PropUppercase] = new UnicodeProperty() {
|
||||
dprops[PropUppercase] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "Uppercase";
|
||||
|
@ -549,7 +549,7 @@ including all characters whose canonical decomposition consists of a single char
|
|||
file by including all characters whose canonical decomposition consists of a sequence
|
||||
of characters, the first of which has a non-zero combining class.
|
||||
*/
|
||||
dprops[FullCompExclusion] = new UnicodeProperty() {
|
||||
dprops[FullCompExclusion] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_NORMALIZATION;
|
||||
name = "Full_Composition_Exclusion";
|
||||
|
@ -577,7 +577,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
*/
|
||||
};
|
||||
|
||||
dprops[FullCompInclusion] = new UnicodeProperty() {
|
||||
dprops[FullCompInclusion] = new UCDProperty() {
|
||||
{
|
||||
isStandard = false;
|
||||
type = DERIVED_NORMALIZATION;
|
||||
|
@ -598,7 +598,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
}
|
||||
};
|
||||
|
||||
dprops[FC_NFKC_Closure] = new UnicodeProperty() {
|
||||
dprops[FC_NFKC_Closure] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_NORMALIZATION;
|
||||
setValueType(STRING_PROP);
|
||||
|
@ -621,7 +621,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
public boolean hasValue(int cp) { return getValue(cp).length() != 0; }
|
||||
};
|
||||
|
||||
dprops[FC_NFC_Closure] = new UnicodeProperty() {
|
||||
dprops[FC_NFC_Closure] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_NORMALIZATION;
|
||||
isStandard = false;
|
||||
|
@ -649,33 +649,47 @@ of characters, the first of which has a non-zero combining class.
|
|||
dprops[i] = new QuickDProp(i - QuickNFD);
|
||||
}
|
||||
|
||||
dprops[DefaultIgnorable] = new UnicodeProperty() {
|
||||
dprops[DefaultIgnorable] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "Default_Ignorable_Code_Point";
|
||||
hasUnassigned = true;
|
||||
shortName = "DI";
|
||||
header = header = "# Derived Property: " + name
|
||||
+ "\r\n# Generated from (Other_Default_Ignorable_Code_Point + Variation_Selector"
|
||||
+ "\r\n# + Noncharacter_Code_Point + Cf + Cc + Cs) - White_Space"
|
||||
//+ "\r\n# - U+0600..U+0603 - U+06DD - U+070F"
|
||||
;
|
||||
header = null;
|
||||
|
||||
}
|
||||
public String getHeader() {
|
||||
if (ucdData.getCompositeVersion() > 0x040000) return "# Derived Property: " + name
|
||||
+ "\r\n# Generated from (Other_Default_Ignorable_Code_Point + Variation_Selector"
|
||||
+ "\r\n# + Noncharacter_Code_Point + Cf + Cc + Cs) - White_Space"
|
||||
+ "\r\n# - U+FFF9..U+FFFB// INTERLINEAR ANNOTATION characters";
|
||||
//+ "\r\n# - U+0600..U+0603 - U+06DD - U+070F"
|
||||
return "# Derived Property: " + name
|
||||
+ "\r\n# Generated from (Other_Default_Ignorable_Code_Point + Cf + Cc + Cs) - White_Space";
|
||||
}
|
||||
|
||||
public boolean hasValue(int cp) {
|
||||
if (ucdData.getBinaryProperty(cp, White_space)) return false;
|
||||
if (ucdData.getBinaryProperty(cp, Other_Default_Ignorable_Code_Point)) return true;
|
||||
|
||||
if (ucdData.getCompositeVersion() > 0x040000 && cp >= 0xFFF9 && cp <= 0xFFFB) return false;
|
||||
|
||||
byte cat = ucdData.getCategory(cp);
|
||||
if (cat == Cf || cat == Cs || cat == Cc) return true;
|
||||
|
||||
if (ucdData.getCompositeVersion() <= 0x040000) return false;
|
||||
|
||||
//if (cp >= 0xFFF9 && cp <= 0xFFFB) return false;
|
||||
//if (0x2060 <= cp && cp <= 0x206F || 0xFFF0 <= cp && cp <= 0xFFFB || 0xE0000 <= cp && cp <= 0xE0FFF) return true;
|
||||
//if (0x0600 <= cp && cp <= 0x0603 || 0x06DD == cp || 0x070F == cp) return false;
|
||||
|
||||
if (ucdData.getBinaryProperty(cp, Other_Default_Ignorable_Code_Point)) return true;
|
||||
if (ucdData.getBinaryProperty(cp, Variation_Selector)) return true;
|
||||
if (ucdData.getBinaryProperty(cp, Noncharacter_Code_Point)) return true;
|
||||
byte cat = ucdData.getCategory(cp);
|
||||
if (cat == Cf || cat == Cs || cat == Cc) return true;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
dprops[Case_Sensitive] = new UnicodeProperty() {
|
||||
dprops[Case_Sensitive] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
isStandard = false;
|
||||
|
@ -763,7 +777,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
}
|
||||
};
|
||||
|
||||
dprops[Other_Case_Ignorable] = new UnicodeProperty() {
|
||||
dprops[Other_Case_Ignorable] = new UCDProperty() {
|
||||
{
|
||||
name = "Other_Case_Ignorable";
|
||||
shortName = "OCI";
|
||||
|
@ -785,7 +799,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
}
|
||||
};
|
||||
|
||||
dprops[Type_i] = new UnicodeProperty() {
|
||||
dprops[Type_i] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
isStandard = false;
|
||||
|
@ -819,7 +833,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
}
|
||||
};
|
||||
|
||||
dprops[Case_Ignorable] = new UnicodeProperty() {
|
||||
dprops[Case_Ignorable] = new UCDProperty() {
|
||||
{
|
||||
name = "Case_Ignorable";
|
||||
isStandard = false;
|
||||
|
@ -842,7 +856,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
# GraphemeBase :=
|
||||
|
||||
*/
|
||||
dprops[GraphemeExtend] = new UnicodeProperty() {
|
||||
dprops[GraphemeExtend] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "Grapheme_Extend";
|
||||
|
@ -865,7 +879,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
}
|
||||
};
|
||||
|
||||
dprops[GraphemeBase] = new UnicodeProperty() {
|
||||
dprops[GraphemeBase] = new UCDProperty() {
|
||||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "Grapheme_Base";
|
||||
|
@ -888,7 +902,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
};
|
||||
|
||||
for (int i = 0; i < dprops.length; ++i) {
|
||||
UnicodeProperty up = dprops[i];
|
||||
UCDProperty up = dprops[i];
|
||||
if (up == null) continue;
|
||||
if (up.getValueType() != BINARY_PROP) continue;
|
||||
up.setValue(NUMBER, "1");
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java,v $
|
||||
* $Date: 2003/07/21 15:50:06 $
|
||||
* $Revision: 1.11 $
|
||||
* $Date: 2004/02/06 18:30:22 $
|
||||
* $Revision: 1.12 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -24,7 +24,7 @@ final class DerivedPropertyLister extends PropertyLister {
|
|||
|
||||
//private int propMask;
|
||||
//private DerivedProperty dprop;
|
||||
private UnicodeProperty uprop;
|
||||
private UCDProperty uprop;
|
||||
int width;
|
||||
boolean varies;
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java,v $
|
||||
* $Date: 2003/02/25 23:38:23 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2004/02/06 18:30:22 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -56,8 +56,8 @@ class DiffPropertyLister extends PropertyLister {
|
|||
}
|
||||
*/
|
||||
|
||||
UnicodeProperty newProp = null;
|
||||
UnicodeProperty oldProp = null;
|
||||
UCDProperty newProp = null;
|
||||
UCDProperty oldProp = null;
|
||||
String value = "";
|
||||
|
||||
public String optionalComment(int cp) {
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
|
||||
* $Date: 2003/02/25 23:38:23 $
|
||||
* $Revision: 1.13 $
|
||||
* $Date: 2004/02/06 18:30:22 $
|
||||
* $Revision: 1.14 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -613,7 +613,7 @@ public class GenerateCaseFolding implements UCD_Types {
|
|||
case 4: skipLine = true; break;
|
||||
case 5: out.println("# No corresponding uppercase precomposed character"); break;
|
||||
case 6: Utility.appendFile("SpecialCasingIota.txt", Utility.UTF8, out); break;
|
||||
case 7: out.println("# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases"); break;
|
||||
case 7: out.println("# Some characters with YPOGEGRAMMENI also have no corresponding titlecases"); break;
|
||||
case 8: skipLine = true; break;
|
||||
}
|
||||
if (!skipLine) out.println();
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2003/08/20 03:46:41 $
|
||||
* $Revision: 1.30 $
|
||||
* $Date: 2004/02/06 18:30:21 $
|
||||
* $Revision: 1.31 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -186,7 +186,7 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
doHeader(fileName + getFileSuffix(false), output, headerChoice);
|
||||
for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) {
|
||||
UnicodeProperty up = DerivedProperty.make(i, Default.ucd);
|
||||
UCDProperty up = DerivedProperty.make(i, Default.ucd);
|
||||
if (up == null) continue;
|
||||
boolean keepGoing = true;
|
||||
if (!up.isStandard()) keepGoing = false;
|
||||
|
@ -409,7 +409,7 @@ public class GenerateData implements UCD_Types {
|
|||
//System.out.println("debug");
|
||||
}
|
||||
|
||||
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
if (up == null) continue;
|
||||
if (!up.isStandard()) continue;
|
||||
|
||||
|
@ -745,7 +745,7 @@ public class GenerateData implements UCD_Types {
|
|||
doHeader(file + getFileSuffix(false), output, headerChoice);
|
||||
int last = -1;
|
||||
for (int i = startEnum; i < endEnum; ++i) {
|
||||
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
|
||||
if (up == null) continue;
|
||||
if (up.skipInDerivedListing()) continue;
|
||||
|
||||
|
@ -1082,7 +1082,7 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
log.println();
|
||||
log.println("Cummulative differences");
|
||||
UnicodeProperty up = DerivedProperty.make(prop, Default.ucd);
|
||||
UCDProperty up = DerivedProperty.make(prop, Default.ucd);
|
||||
UnicodeSet newProp = up.getSet();
|
||||
Utility.showSetNames(log, "", cummulative.removeAll(newProp), false, false, Default.ucd);
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
|
||||
* $Date: 2003/07/07 15:58:57 $
|
||||
* $Revision: 1.12 $
|
||||
* $Date: 2004/02/06 18:30:21 $
|
||||
* $Revision: 1.13 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -49,7 +49,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
log.println("<title>Unihan check</title>");
|
||||
log.println("</head>");
|
||||
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.getUcdVersion(), true, Utility.UTF8);
|
||||
|
||||
Map properties = new TreeMap();
|
||||
|
||||
|
@ -265,24 +265,25 @@ public final class GenerateHanTransliterator implements UCD_Types {
|
|||
switch (type) {
|
||||
case DEFINITION:
|
||||
key = "kDefinition"; // kMandarin, kKorean, kJapaneseKun, kJapaneseOn
|
||||
filename = "Raw_Transliterator_Han_Latin_Definition.txt";
|
||||
filename = "Raw_Transliterator_Han_Latin_Definition";
|
||||
break;
|
||||
case JAPANESE:
|
||||
key = "kJapaneseOn";
|
||||
filename = "Raw_Transliterator_ja_Latin.txt";
|
||||
filename = "Raw_Transliterator_ja_Latin";
|
||||
break;
|
||||
case CHINESE:
|
||||
key = "kMandarin";
|
||||
filename = "Raw_Transliterator_Han_Latin.txt";
|
||||
filename = "Raw_Transliterator_Han_Latin";
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Unexpected option: must be 0..2");
|
||||
}
|
||||
filename += Default.ucd.getVersion() + ".txt";
|
||||
|
||||
err = Utility.openPrintWriter("Transliterate_err.txt", Utility.UTF8_WINDOWS);
|
||||
log = Utility.openPrintWriter("Transliterate_log.txt", Utility.UTF8_WINDOWS);
|
||||
log.print('\uFEFF');
|
||||
|
||||
if (!SKIP_OVERRIDES) {
|
||||
if (false /*!SKIP_OVERRIDES*/) {
|
||||
log.println();
|
||||
log.println("@*Override Data");
|
||||
log.println();
|
||||
|
@ -1798,7 +1799,7 @@ Bad pinyin data: \u4E7F ? LE
|
|||
|
||||
static void readUnihanData(String key) throws java.io.IOException {
|
||||
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
|
||||
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.getUcdVersion(), true, Utility.UTF8);
|
||||
|
||||
int count = 0;
|
||||
int lineCounter = 0;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java,v $
|
||||
* $Date: 2002/08/04 21:38:45 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2004/02/06 18:30:21 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -626,9 +626,9 @@ public class GenerateLineBreakTest implements UCD_Types {
|
|||
|
||||
static final String[] Names = {"CR", "LF", "CTL", "Extend", "Link", "CGJ", "Base", "LetterBase", "Other" };
|
||||
|
||||
static UnicodeProperty extendProp = UnifiedBinaryProperty.make(DERIVED | GraphemeExtend);
|
||||
static UnicodeProperty baseProp = UnifiedBinaryProperty.make(DERIVED | GraphemeBase);
|
||||
static UnicodeProperty linkProp = UnifiedBinaryProperty.make(BINARY_PROPERTIES | GraphemeLink);
|
||||
static UCDProperty extendProp = UnifiedBinaryProperty.make(DERIVED | GraphemeExtend);
|
||||
static UCDProperty baseProp = UnifiedBinaryProperty.make(DERIVED | GraphemeBase);
|
||||
static UCDProperty linkProp = UnifiedBinaryProperty.make(BINARY_PROPERTIES | GraphemeLink);
|
||||
|
||||
{
|
||||
fileName = "Word";
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $
|
||||
* $Date: 2003/03/12 16:01:26 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2004/02/06 18:30:21 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -53,7 +53,7 @@ public final class GenerateStandardizedVariants implements UCD_Types {
|
|||
String[] codes = new String[2];
|
||||
String[] shapes = new String[4];
|
||||
|
||||
BufferedReader in = Utility.openUnicodeFile("StandardizedVariants", Default.ucdVersion, true, Utility.LATIN1);
|
||||
BufferedReader in = Utility.openUnicodeFile("StandardizedVariants", Default.getUcdVersion(), true, Utility.LATIN1);
|
||||
while (true) {
|
||||
String line = Utility.readDataLine(in);
|
||||
if (line == null) break;
|
||||
|
|
|
@ -5,17 +5,21 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||
* $Date: 2003/05/02 21:46:33 $
|
||||
* $Revision: 1.32 $
|
||||
* $Date: 2004/02/06 18:30:21 $
|
||||
* $Revision: 1.33 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
import java.util.Date;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
|
||||
public final class Main implements UCD_Types {
|
||||
|
||||
static final String classPrefix = "com.ibm.text.UCD.";
|
||||
|
||||
static final String[] CORE_FILES = {
|
||||
"CaseFolding",
|
||||
"CompositionExclusions",
|
||||
|
@ -52,288 +56,293 @@ public final class Main implements UCD_Types {
|
|||
};
|
||||
|
||||
public static void main (String[] args) throws Exception {
|
||||
System.out.println("*** Start *** " + new Date());
|
||||
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
|
||||
long mask = 0;
|
||||
|
||||
String arg = args[i];
|
||||
if (arg.charAt(0) == '#') return; // skip rest of line
|
||||
|
||||
Utility.fixDot();
|
||||
System.out.println();
|
||||
System.out.println("** Argument: " + args[i] + " **");
|
||||
try {
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
|
||||
// Expand string arguments
|
||||
|
||||
if (arg.equalsIgnoreCase("ALL")) {
|
||||
args = Utility.append(ALL_FILES, Utility.subarray(args, i+1));
|
||||
i = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg.equalsIgnoreCase("CORE")) {
|
||||
args = Utility.append(CORE_FILES, Utility.subarray(args, i+1));
|
||||
i = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg.equalsIgnoreCase("EXTRACTED")) {
|
||||
args = Utility.append(EXTRACTED_FILES, Utility.subarray(args, i+1));
|
||||
i = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// make sure the UCD is set up
|
||||
|
||||
if (arg.equalsIgnoreCase("version")) {
|
||||
Default.setUCD(args[++i]);
|
||||
continue;
|
||||
}
|
||||
Default.ensureUCD();
|
||||
|
||||
// Now handle other options
|
||||
|
||||
if (arg.equalsIgnoreCase("verify")) {
|
||||
VerifyUCD.verify();
|
||||
VerifyUCD.checkCanonicalProperties();
|
||||
VerifyUCD.CheckCaseFold();
|
||||
VerifyUCD.checkAgainstUInfo();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.ucdVersion});
|
||||
else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics();
|
||||
else if (arg.equalsIgnoreCase("NFSkippable")) NFSkippable.main(null);
|
||||
else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
|
||||
else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
|
||||
else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed();
|
||||
else if (arg.equalsIgnoreCase("onetime")) VerifyUCD.oneTime();
|
||||
else if (arg.equalsIgnoreCase("verifyNormalizationStability")) VerifyUCD.verifyNormalizationStability();
|
||||
|
||||
else if (arg.equalsIgnoreCase("definitionTransliterator")) GenerateHanTransliterator.main(0);
|
||||
else if (arg.equalsIgnoreCase("romajiTransliterator")) GenerateHanTransliterator.main(1);
|
||||
else if (arg.equalsIgnoreCase("pinYinTransliterator")) GenerateHanTransliterator.main(2);
|
||||
else if (arg.equalsIgnoreCase("hanproperties")) GenerateHanTransliterator.readUnihan();
|
||||
|
||||
else if (arg.equalsIgnoreCase("fixChineseOverrides")) GenerateHanTransliterator.fixChineseOverrides();
|
||||
|
||||
|
||||
|
||||
else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
|
||||
|
||||
else if (arg.equalsIgnoreCase("testenum")) SampleEnum.test();
|
||||
|
||||
else if (arg.equalsIgnoreCase("quicktest")) QuickTest.test();
|
||||
else if (arg.equalsIgnoreCase("TernaryStore")) TernaryStore.test();
|
||||
|
||||
else if (arg.equalsIgnoreCase("checkBIDI")) VerifyUCD.checkBIDI();
|
||||
else if (arg.equalsIgnoreCase("Buildnames")) BuildNames.main(null);
|
||||
else if (arg.equalsIgnoreCase("TestNormalization")) TestNormalization.main(null);
|
||||
|
||||
|
||||
else if (arg.equalsIgnoreCase("binary")) FastBinarySearch.test();
|
||||
|
||||
else if (arg.equalsIgnoreCase("GenerateCaseTest")) GenerateCaseTest.main(null);
|
||||
else if (arg.equalsIgnoreCase("checkDecompFolding")) VerifyUCD.checkDecompFolding();
|
||||
|
||||
else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);
|
||||
else if (arg.equalsIgnoreCase("checkcollator")) CheckCollator.main(null);
|
||||
|
||||
else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
|
||||
else if (arg.equalsIgnoreCase("iana")) IANANames.testSensitivity();
|
||||
|
||||
else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
|
||||
else if (arg.equalsIgnoreCase("checkCase")) VerifyUCD.checkCase();
|
||||
else if (arg.equalsIgnoreCase("checkCase3")) VerifyUCD.checkCase3();
|
||||
else if (arg.equalsIgnoreCase("checkCaseLong")) VerifyUCD.checkCase2(true);
|
||||
else if (arg.equalsIgnoreCase("checkCaseShort")) VerifyUCD.checkCase2(false);
|
||||
else if (arg.equalsIgnoreCase("checkCanonicalProperties")) VerifyUCD.checkCanonicalProperties();
|
||||
else if (arg.equalsIgnoreCase("CheckCaseFold")) VerifyUCD.CheckCaseFold();
|
||||
else if (arg.equalsIgnoreCase("genIDN")) VerifyUCD.genIDN();
|
||||
else if (arg.equalsIgnoreCase("VerifyIDN")) VerifyUCD.VerifyIDN();
|
||||
else if (arg.equalsIgnoreCase("NFTest")) VerifyUCD.NFTest();
|
||||
else if (arg.equalsIgnoreCase("test1")) VerifyUCD.test1();
|
||||
else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
|
||||
else if (arg.equalsIgnoreCase("GenerateThaiBreaks")) GenerateThaiBreaks.main(null);
|
||||
|
||||
else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]});
|
||||
|
||||
//else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo();
|
||||
else if (arg.equalsIgnoreCase("checkScripts")) VerifyUCD.checkScripts();
|
||||
else if (arg.equalsIgnoreCase("IdentifierTest")) VerifyUCD.IdentifierTest();
|
||||
else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
|
||||
else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
|
||||
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
|
||||
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
|
||||
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.test();
|
||||
else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
|
||||
|
||||
else if (arg.equalsIgnoreCase("Compare14652")) Compare14652.main(null);
|
||||
|
||||
|
||||
//else if (arg.equalsIgnoreCase("NormalizationCharts")) ChartGenerator.writeNormalizationCharts();
|
||||
|
||||
|
||||
/*else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
|
||||
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
|
||||
*/
|
||||
// EXTRACTED PROPERTIES
|
||||
|
||||
else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
|
||||
GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedBidiClass");
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedBinaryProperties")) {
|
||||
GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES+1, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedBinaryProperties" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedCombiningClass")) {
|
||||
GenerateData.generateVerticalSlice(COMBINING_CLASS, COMBINING_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedCombiningClass" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedDecompositionType")) {
|
||||
GenerateData.generateVerticalSlice(DECOMPOSITION_TYPE, DECOMPOSITION_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedDecompositionType" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedEastAsianWidth")) {
|
||||
GenerateData.generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedEastAsianWidth" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedGeneralCategory")) {
|
||||
GenerateData.generateVerticalSlice(CATEGORY, CATEGORY+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedGeneralCategory" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedJoiningGroup")) {
|
||||
GenerateData.generateVerticalSlice(JOINING_GROUP, JOINING_GROUP+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedJoiningGroup" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedJoiningType")) {
|
||||
GenerateData.generateVerticalSlice(JOINING_TYPE, JOINING_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedJoiningType" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedLineBreak")) {
|
||||
GenerateData.generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedLineBreak" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedNumericType")) {
|
||||
GenerateData.generateVerticalSlice(NUMERIC_TYPE, NUMERIC_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedNumericType" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("HangulSyllableType")) {
|
||||
GenerateData.generateVerticalSlice(HANGUL_SYLLABLE_TYPE,HANGUL_SYLLABLE_TYPE+NEXT_ENUM, GenerateData.HEADER_EXTEND,
|
||||
"DerivedData/", "HangulSyllableType" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
|
||||
GenerateData.generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedNumericValues" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("StandardizedVariants")) {
|
||||
GenerateStandardizedVariants.generate();
|
||||
|
||||
// OTHER STANDARD PROPERTIES
|
||||
long mask = 0;
|
||||
|
||||
} else if (arg.equalsIgnoreCase("CaseFolding")) {
|
||||
GenerateCaseFolding.makeCaseFold(true);
|
||||
GenerateCaseFolding.makeCaseFold(false);
|
||||
|
||||
} else if (arg.equalsIgnoreCase("SpecialCasing")) {
|
||||
GenerateCaseFolding.generateSpecialCasing(true);
|
||||
GenerateCaseFolding.generateSpecialCasing(false);
|
||||
|
||||
} else if (arg.equalsIgnoreCase("CompositionExclusions")) {
|
||||
GenerateData.generateCompExclusions();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedAge")) {
|
||||
GenerateData.generateAge("DerivedData/", "DerivedAge");
|
||||
String arg = args[i];
|
||||
if (arg.charAt(0) == '#') return; // skip rest of line
|
||||
|
||||
Utility.fixDot();
|
||||
System.out.println();
|
||||
System.out.println("** Argument: " + args[i] + " ** " + new Date());
|
||||
|
||||
} else if (arg.equalsIgnoreCase("backwardsCompat")) {
|
||||
GenerateData.backwardsCompat("DerivedData/extracted/", "Compatibility_ID_START",
|
||||
new int[] {ID_Start, ID_Continue_NO_Cf, Mod_ID_Start, Mod_ID_Continue_NO_Cf});
|
||||
// Expand string arguments
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
|
||||
GenerateData.generateDerived(DERIVED_CORE, true, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedCoreProperties");
|
||||
if (arg.equalsIgnoreCase("ALL")) {
|
||||
args = Utility.append(ALL_FILES, Utility.subarray(args, i+1));
|
||||
i = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedNormalizationProps")) {
|
||||
GenerateData.generateDerived(DERIVED_NORMALIZATION, true, GenerateData.HEADER_DERIVED, "DerivedData/",
|
||||
"DerivedNormalizationProps" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("NormalizationTest")) {
|
||||
GenerateData.writeNormalizerTestSuite("DerivedData/", "NormalizationTest");
|
||||
if (arg.equalsIgnoreCase("CORE")) {
|
||||
args = Utility.append(CORE_FILES, Utility.subarray(args, i+1));
|
||||
i = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
} else if (arg.equalsIgnoreCase("PropertyAliases")) {
|
||||
GenerateData.generatePropertyAliases();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("PropList")) {
|
||||
GenerateData.generateVerticalSlice(BINARY_PROPERTIES + White_space, BINARY_PROPERTIES + NEXT_ENUM,
|
||||
GenerateData.HEADER_EXTEND, "DerivedData/", "PropList");
|
||||
|
||||
} else if (arg.equalsIgnoreCase("Scripts")) {
|
||||
GenerateData.generateVerticalSlice(SCRIPT+1, SCRIPT + NEXT_ENUM,
|
||||
GenerateData.HEADER_SCRIPTS, "DerivedData/", "Scripts");
|
||||
// OTHER TESTING
|
||||
|
||||
} else if (arg.equalsIgnoreCase("OtherDerivedProperties")) {
|
||||
//mask = Utility.setBits(0, NFC_Leading, NFC_Resulting);
|
||||
GenerateData.generateDerived((byte)(ALL & ~DERIVED_CORE & ~DERIVED_NORMALIZATION), false, GenerateData.HEADER_DERIVED, "OtherData/", "OtherDerivedProperties");
|
||||
|
||||
} else if (arg.equalsIgnoreCase("AllBinary")) {
|
||||
GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES + NEXT_ENUM,
|
||||
GenerateData.HEADER_EXTEND, "OtherDerived/", "AllBinary");
|
||||
if (arg.equalsIgnoreCase("EXTRACTED")) {
|
||||
args = Utility.append(EXTRACTED_FILES, Utility.subarray(args, i+1));
|
||||
i = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// make sure the UCD is set up
|
||||
|
||||
if (arg.equalsIgnoreCase("version")) {
|
||||
Default.setUCD(args[++i]);
|
||||
continue;
|
||||
}
|
||||
Default.ensureUCD();
|
||||
|
||||
// Now handle other options
|
||||
|
||||
if (arg.equalsIgnoreCase("verify")) {
|
||||
VerifyUCD.verify();
|
||||
VerifyUCD.checkCanonicalProperties();
|
||||
VerifyUCD.CheckCaseFold();
|
||||
VerifyUCD.checkAgainstUInfo();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.getUcdVersion()});
|
||||
else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics();
|
||||
else if (arg.equalsIgnoreCase("NFSkippable")) NFSkippable.main(null);
|
||||
else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
|
||||
else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
|
||||
else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed();
|
||||
else if (arg.equalsIgnoreCase("onetime")) VerifyUCD.oneTime();
|
||||
else if (arg.equalsIgnoreCase("verifyNormalizationStability")) VerifyUCD.verifyNormalizationStability();
|
||||
|
||||
else if (arg.equalsIgnoreCase("definitionTransliterator")) GenerateHanTransliterator.main(0);
|
||||
else if (arg.equalsIgnoreCase("romajiTransliterator")) GenerateHanTransliterator.main(1);
|
||||
else if (arg.equalsIgnoreCase("pinYinTransliterator")) GenerateHanTransliterator.main(2);
|
||||
else if (arg.equalsIgnoreCase("hanproperties")) GenerateHanTransliterator.readUnihan();
|
||||
|
||||
else if (arg.equalsIgnoreCase("fixChineseOverrides")) GenerateHanTransliterator.fixChineseOverrides();
|
||||
|
||||
|
||||
|
||||
else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
|
||||
|
||||
else if (arg.equalsIgnoreCase("testenum")) SampleEnum.test();
|
||||
|
||||
else if (arg.equalsIgnoreCase("quicktest")) QuickTest.test();
|
||||
else if (arg.equalsIgnoreCase("TernaryStore")) TernaryStore.test();
|
||||
|
||||
else if (arg.equalsIgnoreCase("checkBIDI")) VerifyUCD.checkBIDI();
|
||||
else if (arg.equalsIgnoreCase("Buildnames")) BuildNames.main(null);
|
||||
else if (arg.equalsIgnoreCase("TestNormalization")) TestNormalization.main(null);
|
||||
|
||||
|
||||
else if (arg.equalsIgnoreCase("binary")) FastBinarySearch.test();
|
||||
|
||||
else if (arg.equalsIgnoreCase("GenerateCaseTest")) GenerateCaseTest.main(null);
|
||||
else if (arg.equalsIgnoreCase("checkDecompFolding")) VerifyUCD.checkDecompFolding();
|
||||
|
||||
else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);
|
||||
else if (arg.equalsIgnoreCase("checkcollator")) CheckCollator.main(null);
|
||||
|
||||
else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
|
||||
else if (arg.equalsIgnoreCase("iana")) IANANames.testSensitivity();
|
||||
|
||||
else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
|
||||
else if (arg.equalsIgnoreCase("checkCase")) VerifyUCD.checkCase();
|
||||
else if (arg.equalsIgnoreCase("checkCase3")) VerifyUCD.checkCase3();
|
||||
else if (arg.equalsIgnoreCase("checkCaseLong")) VerifyUCD.checkCase2(true);
|
||||
else if (arg.equalsIgnoreCase("checkCaseShort")) VerifyUCD.checkCase2(false);
|
||||
else if (arg.equalsIgnoreCase("checkCanonicalProperties")) VerifyUCD.checkCanonicalProperties();
|
||||
else if (arg.equalsIgnoreCase("CheckCaseFold")) VerifyUCD.CheckCaseFold();
|
||||
else if (arg.equalsIgnoreCase("genIDN")) VerifyUCD.genIDN();
|
||||
else if (arg.equalsIgnoreCase("VerifyIDN")) VerifyUCD.VerifyIDN();
|
||||
else if (arg.equalsIgnoreCase("NFTest")) VerifyUCD.NFTest();
|
||||
else if (arg.equalsIgnoreCase("test1")) VerifyUCD.test1();
|
||||
else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
|
||||
else if (arg.equalsIgnoreCase("GenerateThaiBreaks")) GenerateThaiBreaks.main(null);
|
||||
|
||||
else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]});
|
||||
|
||||
//else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo();
|
||||
else if (arg.equalsIgnoreCase("checkScripts")) VerifyUCD.checkScripts();
|
||||
else if (arg.equalsIgnoreCase("IdentifierTest")) VerifyUCD.IdentifierTest();
|
||||
else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
|
||||
else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
|
||||
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
|
||||
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
|
||||
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.test();
|
||||
else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
|
||||
|
||||
else if (arg.equalsIgnoreCase("Compare14652")) Compare14652.main(null);
|
||||
|
||||
|
||||
//else if (arg.equalsIgnoreCase("NormalizationCharts")) ChartGenerator.writeNormalizationCharts();
|
||||
|
||||
|
||||
/*else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
|
||||
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
|
||||
*/
|
||||
// EXTRACTED PROPERTIES
|
||||
|
||||
else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
|
||||
GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedBidiClass");
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedGeneralCategoryTEST")) {
|
||||
GenerateData.generateVerticalSlice(CATEGORY+29, CATEGORY+32, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/", "DerivedGeneralCategory" );
|
||||
} else if (arg.equalsIgnoreCase("DerivedBinaryProperties")) {
|
||||
GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES+1, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedBinaryProperties" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedCombiningClass")) {
|
||||
GenerateData.generateVerticalSlice(COMBINING_CLASS, COMBINING_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedCombiningClass" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedDecompositionType")) {
|
||||
GenerateData.generateVerticalSlice(DECOMPOSITION_TYPE, DECOMPOSITION_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedDecompositionType" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedEastAsianWidth")) {
|
||||
GenerateData.generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedEastAsianWidth" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedGeneralCategory")) {
|
||||
GenerateData.generateVerticalSlice(CATEGORY, CATEGORY+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedGeneralCategory" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedJoiningGroup")) {
|
||||
GenerateData.generateVerticalSlice(JOINING_GROUP, JOINING_GROUP+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedJoiningGroup" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedJoiningType")) {
|
||||
GenerateData.generateVerticalSlice(JOINING_TYPE, JOINING_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedJoiningType" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedLineBreak")) {
|
||||
GenerateData.generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedLineBreak" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedNumericType")) {
|
||||
GenerateData.generateVerticalSlice(NUMERIC_TYPE, NUMERIC_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedNumericType" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("HangulSyllableType")) {
|
||||
GenerateData.generateVerticalSlice(HANGUL_SYLLABLE_TYPE,HANGUL_SYLLABLE_TYPE+NEXT_ENUM, GenerateData.HEADER_EXTEND,
|
||||
"DerivedData/", "HangulSyllableType" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
|
||||
GenerateData.generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedNumericValues" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("StandardizedVariants")) {
|
||||
GenerateStandardizedVariants.generate();
|
||||
|
||||
// OTHER STANDARD PROPERTIES
|
||||
|
||||
} else if (arg.equalsIgnoreCase("CaseFolding")) {
|
||||
GenerateCaseFolding.makeCaseFold(true);
|
||||
GenerateCaseFolding.makeCaseFold(false);
|
||||
|
||||
} else if (arg.equalsIgnoreCase("SpecialCasing")) {
|
||||
GenerateCaseFolding.generateSpecialCasing(true);
|
||||
GenerateCaseFolding.generateSpecialCasing(false);
|
||||
|
||||
} else if (arg.equalsIgnoreCase("CompositionExclusions")) {
|
||||
GenerateData.generateCompExclusions();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedAge")) {
|
||||
GenerateData.generateAge("DerivedData/", "DerivedAge");
|
||||
|
||||
} else if (arg.equalsIgnoreCase("listDifferences")) {
|
||||
CompareProperties.listDifferences();
|
||||
} else if (arg.equalsIgnoreCase("backwardsCompat")) {
|
||||
GenerateData.backwardsCompat("DerivedData/extracted/", "Compatibility_ID_START",
|
||||
new int[] {ID_Start, ID_Continue_NO_Cf, Mod_ID_Start, Mod_ID_Continue_NO_Cf});
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
|
||||
GenerateData.generateDerived(DERIVED_CORE, true, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedCoreProperties");
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedNormalizationProps")) {
|
||||
GenerateData.generateDerived(DERIVED_NORMALIZATION, true, GenerateData.HEADER_DERIVED, "DerivedData/",
|
||||
"DerivedNormalizationProps" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("NormalizationTest")) {
|
||||
GenerateData.writeNormalizerTestSuite("DerivedData/", "NormalizationTest");
|
||||
|
||||
} else if (arg.equalsIgnoreCase("PropertyAliases")) {
|
||||
GenerateData.generatePropertyAliases();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("PropList")) {
|
||||
GenerateData.generateVerticalSlice(BINARY_PROPERTIES + White_space, BINARY_PROPERTIES + NEXT_ENUM,
|
||||
GenerateData.HEADER_EXTEND, "DerivedData/", "PropList");
|
||||
|
||||
} else if (arg.equalsIgnoreCase("Scripts")) {
|
||||
GenerateData.generateVerticalSlice(SCRIPT+1, SCRIPT + NEXT_ENUM,
|
||||
GenerateData.HEADER_SCRIPTS, "DerivedData/", "Scripts");
|
||||
// OTHER TESTING
|
||||
|
||||
} else if (arg.equalsIgnoreCase("partition")) {
|
||||
CompareProperties.partition();
|
||||
} else if (arg.equalsIgnoreCase("OtherDerivedProperties")) {
|
||||
//mask = Utility.setBits(0, NFC_Leading, NFC_Resulting);
|
||||
GenerateData.generateDerived((byte)(ALL & ~DERIVED_CORE & ~DERIVED_NORMALIZATION), false, GenerateData.HEADER_DERIVED, "OtherData/", "OtherDerivedProperties");
|
||||
|
||||
} else if (arg.equalsIgnoreCase("AllBinary")) {
|
||||
GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES + NEXT_ENUM,
|
||||
GenerateData.HEADER_EXTEND, "OtherDerived/", "AllBinary");
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedGeneralCategoryTEST")) {
|
||||
GenerateData.generateVerticalSlice(CATEGORY+29, CATEGORY+32, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/", "DerivedGeneralCategory" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("listDifferences")) {
|
||||
CompareProperties.listDifferences();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("partition")) {
|
||||
CompareProperties.partition();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("propertyStatistics")) {
|
||||
CompareProperties.statistics();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("listAccents")) {
|
||||
GenerateData.listCombiningAccents();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("listGreekVowels")) {
|
||||
GenerateData.listGreekVowels();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("listKatakana")) {
|
||||
GenerateData.listKatakana();
|
||||
|
||||
/*
|
||||
} else if (arg.equalsIgnoreCase("DerivedFullNormalization")) {
|
||||
mask = Utility.setBits(0, DerivedProperty.GenNFD, DerivedProperty.GenNFKC);
|
||||
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedFullNormalization" );
|
||||
} else if (arg.equalsIgnoreCase("caseignorable")) {
|
||||
mask = Utility.setBits(0, DerivedProperty.Other_Case_Ignorable, DerivedProperty.Type_i);
|
||||
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "CaseIgnorable" );
|
||||
} else if (arg.equalsIgnoreCase("nfunsafestart")) {
|
||||
mask = Utility.setBits(0, NFD_UnsafeStart, NFKC_UnsafeStart);
|
||||
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "NFUnsafeStart");
|
||||
*/
|
||||
|
||||
} else if (arg.equalsIgnoreCase("propertyStatistics")) {
|
||||
CompareProperties.statistics();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("listAccents")) {
|
||||
GenerateData.listCombiningAccents();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("listGreekVowels")) {
|
||||
GenerateData.listGreekVowels();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("listKatakana")) {
|
||||
GenerateData.listKatakana();
|
||||
|
||||
/*
|
||||
} else if (arg.equalsIgnoreCase("DerivedFullNormalization")) {
|
||||
mask = Utility.setBits(0, DerivedProperty.GenNFD, DerivedProperty.GenNFKC);
|
||||
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedFullNormalization" );
|
||||
} else if (arg.equalsIgnoreCase("caseignorable")) {
|
||||
mask = Utility.setBits(0, DerivedProperty.Other_Case_Ignorable, DerivedProperty.Type_i);
|
||||
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "CaseIgnorable" );
|
||||
} else if (arg.equalsIgnoreCase("nfunsafestart")) {
|
||||
mask = Utility.setBits(0, NFD_UnsafeStart, NFKC_UnsafeStart);
|
||||
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "NFUnsafeStart");
|
||||
*/
|
||||
|
||||
} else {
|
||||
throw new IllegalArgumentException(" ! Unknown option -- see Main.java for options");
|
||||
} else {
|
||||
CallArgs.call(new String[]{arg}, classPrefix);
|
||||
}
|
||||
|
||||
|
||||
//checkHoffman("\u05B8\u05B9\u05B1\u0591\u05C3\u05B0\u05AC\u059F");
|
||||
//checkHoffman("\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD");
|
||||
|
||||
|
||||
//GenerateData.generateDerived(Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf),
|
||||
// GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedPropData2" );
|
||||
//GenerateData.generateVerticalSlice(SCRIPT, SCRIPT+1, "ScriptCommon" );
|
||||
//listStrings("LowerCase" , 0,0);
|
||||
//GenerateData.generateVerticalSlice(0, LIMIT_ENUM, SKIP_SPECIAL, PROPLIST1, "DerivedData/", "DerivedPropData1" );
|
||||
|
||||
// AGE stuff
|
||||
//UCD ucd = UCD.make();
|
||||
//System.out.println(ucd.getAgeID(0x61));
|
||||
//System.out.println(ucd.getAgeID(0x2FA1D));
|
||||
|
||||
//
|
||||
}
|
||||
|
||||
|
||||
//checkHoffman("\u05B8\u05B9\u05B1\u0591\u05C3\u05B0\u05AC\u059F");
|
||||
//checkHoffman("\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD");
|
||||
|
||||
|
||||
//GenerateData.generateDerived(Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf),
|
||||
// GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedPropData2" );
|
||||
//GenerateData.generateVerticalSlice(SCRIPT, SCRIPT+1, "ScriptCommon" );
|
||||
//listStrings("LowerCase" , 0,0);
|
||||
//GenerateData.generateVerticalSlice(0, LIMIT_ENUM, SKIP_SPECIAL, PROPLIST1, "DerivedData/", "DerivedPropData1" );
|
||||
|
||||
// AGE stuff
|
||||
//UCD ucd = UCD.make();
|
||||
//System.out.println(ucd.getAgeID(0x61));
|
||||
//System.out.println(ucd.getAgeID(0x2FA1D));
|
||||
|
||||
//
|
||||
} finally {
|
||||
System.out.println("*** Done *** " + new Date());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyPropertyLister.java,v $
|
||||
* $Date: 2003/07/21 15:50:06 $
|
||||
* $Revision: 1.10 $
|
||||
* $Date: 2004/02/06 18:30:20 $
|
||||
* $Revision: 1.11 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -24,7 +24,7 @@ final class MyPropertyLister extends PropertyLister {
|
|||
|
||||
private boolean isDefaultValue = false;
|
||||
|
||||
private UnicodeProperty up;
|
||||
private UCDProperty up;
|
||||
|
||||
public MyPropertyLister(UCD ucd, int propMask, PrintWriter output) {
|
||||
this.propMask = propMask;
|
||||
|
|
|
@ -6,7 +6,7 @@ import com.ibm.text.utility.*;
|
|||
import java.io.PrintWriter;
|
||||
|
||||
|
||||
public final class NFSkippable extends UnicodeProperty {
|
||||
public final class NFSkippable extends UCDProperty {
|
||||
|
||||
static final boolean DEBUG = false;
|
||||
|
||||
|
@ -200,7 +200,7 @@ public final class NFSkippable extends UnicodeProperty {
|
|||
out.println();
|
||||
|
||||
for (int mode = NFD_UnsafeStart; mode <= NFKC_UnsafeStart; ++mode) {
|
||||
UnicodeProperty up = DerivedProperty.make(mode, Default.ucd);
|
||||
UCDProperty up = DerivedProperty.make(mode, Default.ucd);
|
||||
generateSet(out, "UNSAFE[" + Normalizer.getName((byte)(mode-NFD_UnsafeStart)) + "]", up);
|
||||
}
|
||||
|
||||
|
@ -212,7 +212,7 @@ public final class NFSkippable extends UnicodeProperty {
|
|||
out.close();
|
||||
}
|
||||
|
||||
static void generateSet(PrintWriter out, String label, UnicodeProperty up) {
|
||||
static void generateSet(PrintWriter out, String label, UCDProperty up) {
|
||||
System.out.println("Generating: " + up.getName(NORMAL));
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
for (int cp = 0; cp <= limit; ++cp) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
|
||||
* $Date: 2003/02/25 23:38:22 $
|
||||
* $Revision: 1.14 $
|
||||
* $Date: 2004/02/06 18:30:20 $
|
||||
* $Revision: 1.15 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -288,6 +288,9 @@ public final class Normalizer implements UCD_Types {
|
|||
return this.composition ? data.isLeading(cp) : false;
|
||||
}
|
||||
|
||||
public int getComposition(int first, int second) {
|
||||
return data.getPairwiseComposition(first, second);
|
||||
}
|
||||
|
||||
// ======================================
|
||||
// PRIVATES
|
||||
|
@ -518,6 +521,9 @@ Problem: differs: true, call: false U+1FED GREEK DIALYTIKA AND VARIA
|
|||
// we know we decompose all CANONICAL, plus > CANONICAL if compat is TRUE.
|
||||
if (dt == CANONICAL || dt > CANONICAL && compat) {
|
||||
String s = ucd.getDecompositionMapping(cp);
|
||||
if (s.equals(UTF16.valueOf(cp))) {
|
||||
System.out.println("fix");
|
||||
}
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
||||
cp = UTF16.charAt(s, i);
|
||||
getRecursiveDecomposition(cp, buffer, compat);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
|
||||
* $Date: 2003/08/20 03:46:42 $
|
||||
* $Revision: 1.12 $
|
||||
* $Date: 2004/02/06 18:30:20 $
|
||||
* $Revision: 1.13 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -17,6 +17,10 @@ import java.util.*;
|
|||
import java.io.*;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.dev.test.util.ICUPropertyFactory;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.util.Currency;
|
||||
import java.math.BigDecimal;
|
||||
|
@ -27,12 +31,38 @@ import com.ibm.icu.text.*;
|
|||
import com.ibm.text.utility.*;
|
||||
|
||||
public class TestData implements UCD_Types {
|
||||
|
||||
static UnicodeProperty.Factory upf;
|
||||
|
||||
public static void main (String[] args) throws IOException {
|
||||
Default.setUCD();
|
||||
System.out.println(new Date());
|
||||
upf = ICUPropertyFactory.make();
|
||||
System.out.println(new Date());
|
||||
|
||||
showPropDiff(
|
||||
"gc=mn", null,
|
||||
"script=inherited", null);
|
||||
|
||||
// upf.getProperty("gc")
|
||||
//.getPropertySet(new ICUPropertyFactory.RegexMatcher("mn|me"),null)
|
||||
|
||||
showPropDiff(
|
||||
"gc=mn|me", null,
|
||||
"script=inherited", null);
|
||||
|
||||
if (true) return;
|
||||
|
||||
showPropDiff(
|
||||
"General_Category=L", null,
|
||||
"Script!=Inherited|Common",
|
||||
UnifiedBinaryProperty.getSet("script=inherited", Default.ucd)
|
||||
.addAll(UnifiedBinaryProperty.getSet("script=common", Default.ucd))
|
||||
.complement()
|
||||
);
|
||||
|
||||
|
||||
UnicodeSet sterm = UnifiedProperty.getSet("Sentence_Terminal", Default.ucd);
|
||||
UnicodeSet sterm = UnifiedProperty.getSet("STerm", Default.ucd);
|
||||
UnicodeSet term = UnifiedProperty.getSet("Terminal_Punctuation", Default.ucd);
|
||||
UnicodeSet po = new UnicodeSet("[:po:]");
|
||||
UnicodeSet empty = new UnicodeSet();
|
||||
|
@ -107,8 +137,20 @@ public class TestData implements UCD_Types {
|
|||
log.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static BagFormatter bf = new BagFormatter();
|
||||
static UnicodeProperty.Matcher matcher = new ICUPropertyFactory.RegexMatcher();
|
||||
|
||||
private static void showPropDiff(String p1, UnicodeSet s1, String p2, UnicodeSet s2) {
|
||||
System.out.println("Property Listing");
|
||||
if (s1 == null) {
|
||||
s1 = upf.getSet(p1, matcher, null);
|
||||
}
|
||||
if (s2 == null) {
|
||||
s2 = upf.getSet(p2, matcher, null);
|
||||
}
|
||||
bf.showSetDifferences(bf.CONSOLE,p1,s1,p2,s2);
|
||||
}
|
||||
|
||||
static private UnicodeSet getSetForName(String regexPattern) {
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $
|
||||
* $Date: 2002/06/13 21:14:05 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2004/02/06 18:30:20 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -16,6 +16,9 @@ package com.ibm.text.UCD;
|
|||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
import com.ibm.text.utility.*;
|
||||
|
||||
public final class TestNormalization {
|
||||
|
@ -192,4 +195,52 @@ public final class TestNormalization {
|
|||
}
|
||||
}
|
||||
|
||||
public static void checkStarters () {
|
||||
System.out.println("Checking Starters");
|
||||
UnicodeSet leading = new UnicodeSet();
|
||||
UnicodeSet trailing = new UnicodeSet();
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (Default.nfc.isLeading(i)) leading.add(i);
|
||||
if (Default.ucd.getCombiningClass(i) != 0) continue;
|
||||
if (Default.nfc.isTrailing(i)) trailing.add(i);
|
||||
}
|
||||
System.out.println("Leading: " + leading.size());
|
||||
System.out.println("Trailing Starters: " + trailing.size());
|
||||
UnicodeSetIterator lead = new UnicodeSetIterator(leading);
|
||||
UnicodeSetIterator trail = new UnicodeSetIterator(trailing);
|
||||
UnicodeSet followers = new UnicodeSet();
|
||||
Map map = new TreeMap(new CompareProperties.UnicodeSetComparator());
|
||||
while (lead.next()) {
|
||||
trail.reset();
|
||||
followers.clear();
|
||||
while (trail.next()) {
|
||||
if (Default.nfc.getComposition(lead.codepoint, trail.codepoint) != 0xFFFF) {
|
||||
followers.add(trail.codepoint);
|
||||
}
|
||||
}
|
||||
if (followers.size() == 0) continue;
|
||||
System.out.println(Default.ucd.getCode(lead.codepoint)
|
||||
+ "\t" + followers.toPattern(true));
|
||||
UnicodeSet possLead = (UnicodeSet) map.get(followers);
|
||||
if (possLead == null) {
|
||||
possLead = new UnicodeSet();
|
||||
map.put(followers.clone(), possLead);
|
||||
}
|
||||
possLead.add(lead.codepoint);
|
||||
}
|
||||
Iterator it = map.keySet().iterator();
|
||||
BagFormatter bf = new BagFormatter();
|
||||
bf.setLineSeparator("<br>");
|
||||
bf.setLabelSource(null);
|
||||
bf.setAbbreviated(true);
|
||||
while (it.hasNext()) {
|
||||
UnicodeSet t = (UnicodeSet) it.next();
|
||||
UnicodeSet l = (UnicodeSet) map.get(t);
|
||||
System.out.println("<tr><td>"
|
||||
+ bf.showSetNames("",l)
|
||||
+ "</td><td>"
|
||||
+ bf.showSetNames("",t)
|
||||
+ "</td></tr>");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,246 @@
|
|||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.text.utility.Utility;
|
||||
|
||||
public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
||||
private UCD ucd;
|
||||
private static boolean needAgeCache = true;
|
||||
private static UCD[] ucdCache = new UCD[UCD_Types.LIMIT_AGE];
|
||||
|
||||
private static HashMap cache = new HashMap();
|
||||
|
||||
public static synchronized ToolUnicodePropertySource make(String version) {
|
||||
ToolUnicodePropertySource result = (ToolUnicodePropertySource)cache.get(version);
|
||||
if (result != null) return result;
|
||||
result = new ToolUnicodePropertySource(version);
|
||||
cache.put(version, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
private ToolUnicodePropertySource(String version) {
|
||||
ucd = UCD.make(version);
|
||||
TreeSet names = new TreeSet();
|
||||
UnifiedProperty.getAvailablePropertiesAliases(names,ucd);
|
||||
Iterator it = names.iterator();
|
||||
while (it.hasNext()) {
|
||||
String name = (String) it.next();
|
||||
add(new ToolUnicodeProperty(name));
|
||||
}
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
{set("Name", "na", "<string>", UnicodeProperty.STRING);}
|
||||
public String getPropertyValue(int codepoint) {
|
||||
if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getName(codepoint);
|
||||
}
|
||||
});
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
{set("Block", "blk", "<string>", UnicodeProperty.STRING);}
|
||||
public String getPropertyValue(int codepoint) {
|
||||
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getBlock(codepoint);
|
||||
}
|
||||
});
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
{set("Bidi_Mirroring_Glyph", "bmg", "<string>", UnicodeProperty.STRING);}
|
||||
public String getPropertyValue(int codepoint) {
|
||||
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getBidiMirror(codepoint);
|
||||
}
|
||||
});
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
{set("Case_Folding", "cf", "<string>", UnicodeProperty.STRING);}
|
||||
public String getPropertyValue(int codepoint) {
|
||||
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getCase(codepoint,UCD_Types.FULL,UCD_Types.FOLD);
|
||||
}
|
||||
});
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
{set("Numeric_Value", "nv", "<number>", UnicodeProperty.NUMERIC);}
|
||||
public String getPropertyValue(int codepoint) {
|
||||
double num = ucd.getNumericValue(codepoint);
|
||||
if (Double.isNaN(num)) return null;
|
||||
return Double.toString(num);
|
||||
}
|
||||
});
|
||||
}
|
||||
/*
|
||||
"Bidi_Mirroring_Glyph", "Block", "Case_Folding", "Case_Sensitive", "ISO_Comment",
|
||||
"Lowercase_Mapping", "Name", "Numeric_Value", "Simple_Case_Folding",
|
||||
"Simple_Lowercase_Mapping", "Simple_Titlecase_Mapping", "Simple_Uppercase_Mapping",
|
||||
"Titlecase_Mapping", "Unicode_1_Name", "Uppercase_Mapping", "isCased", "isCasefolded",
|
||||
"isLowercase", "isNFC", "isNFD", "isNFKC", "isNFKD", "isTitlecase", "isUppercase",
|
||||
"toNFC", "toNFD", "toNFKC", "toNKFD"
|
||||
});
|
||||
*/
|
||||
|
||||
/*
|
||||
private class NameProperty extends UnicodeProperty.SimpleProperty {
|
||||
{set("Name", "na", "<string>", UnicodeProperty.STRING);}
|
||||
public String getPropertyValue(int codepoint) {
|
||||
if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getName(codepoint);
|
||||
}
|
||||
}
|
||||
*/
|
||||
static final int ODD_BALLS = (1<<UCD_Types.Cn) | (1<<UCD_Types.Co) | (1<<UCD_Types.Cs) | (1<<UCD.Cc);
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyAliases(java.util.Collection)
|
||||
*/
|
||||
private class ToolUnicodeProperty extends UnicodeProperty {
|
||||
com.ibm.text.UCD.UCDProperty up;
|
||||
int propMask;
|
||||
|
||||
static final int EXTRA_START = 0x10000;
|
||||
|
||||
private ToolUnicodeProperty(String propertyAlias) {
|
||||
propMask = UnifiedProperty.getPropmask(propertyAlias, ucd);
|
||||
up = UnifiedProperty.make(propMask, ucd);
|
||||
setType(getPropertyTypeInternal());
|
||||
setName(propertyAlias);
|
||||
}
|
||||
|
||||
public Collection getAvailablePropertyValueAliases(Collection result) {
|
||||
int type = getPropertyType() & ~EXTENDED_BIT;
|
||||
if (type == STRING) result.add("<string>");
|
||||
else if (type == NUMERIC) result.add("<string>");
|
||||
else if (type == BINARY) {
|
||||
result.add("True");
|
||||
result.add("False");
|
||||
} else if (type == ENUMERATED) {
|
||||
byte style = UCD_Types.LONG;
|
||||
int prop = propMask>>8;
|
||||
String temp = null;
|
||||
boolean titlecase = false;
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
try {
|
||||
switch (prop) {
|
||||
case UCD_Types.CATEGORY>>8: temp = (ucd.getCategoryID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.NUMERIC_TYPE>>8: temp = (ucd.getNumericTypeID_fromIndex((byte)i, style));
|
||||
titlecase = true;
|
||||
break;
|
||||
case UCD_Types.EAST_ASIAN_WIDTH>>8: temp = (ucd.getEastAsianWidthID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.LINE_BREAK>>8: temp = (ucd.getLineBreakID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.JOINING_TYPE>>8: temp = (ucd.getJoiningTypeID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.JOINING_GROUP>>8: temp = (ucd.getJoiningGroupID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.SCRIPT>>8: temp = (ucd.getScriptID_fromIndex((byte)i, style)); titlecase = true;
|
||||
if ("<unused>".equals(temp)) continue;
|
||||
if (temp != null) temp = UCharacter.toTitleCase(Locale.ENGLISH,temp,null);
|
||||
break;
|
||||
case UCD_Types.AGE>>8: temp = (ucd.getAgeID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.HANGUL_SYLLABLE_TYPE>>8:
|
||||
temp = (ucd.getHangulSyllableTypeID_fromIndex((byte)i,style)); break;
|
||||
default: throw new IllegalArgumentException("Internal Error: " + prop);
|
||||
}
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
continue;
|
||||
}
|
||||
if (temp != null && temp.length() != 0) result.add(Utility.getUnskeleton(temp, titlecase));
|
||||
}
|
||||
if (prop == (UCD_Types.DECOMPOSITION_TYPE>>8)) result.add("none");
|
||||
if (prop == (UCD_Types.JOINING_TYPE>>8)) result.add("Non_Joining");
|
||||
if (prop == (UCD_Types.NUMERIC_TYPE>>8)) result.add("None");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public Collection getPropertyAliases(Collection result) {
|
||||
String longName = up.getName(UCD_Types.LONG);
|
||||
addUnique(Utility.getUnskeleton(longName, true), result);
|
||||
String shortName = up.getName(UCD_Types.SHORT);
|
||||
addUnique(Utility.getUnskeleton(shortName, false), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public Collection getPropertyValueAliases(String valueAlias, Collection result) {
|
||||
// TODO Auto-generated method stub
|
||||
return result;
|
||||
}
|
||||
|
||||
public String getPropertyValue(int codepoint) {
|
||||
byte style = UCD_Types.LONG;
|
||||
String temp = null;
|
||||
boolean titlecase = false;
|
||||
switch (propMask>>8) {
|
||||
case UCD_Types.CATEGORY>>8: temp = (ucd.getCategoryID_fromIndex(ucd.getCategory(codepoint), style)); break;
|
||||
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex(ucd.getCombiningClass(codepoint), style));
|
||||
if (temp.startsWith("Fixed_")) temp = temp.substring(6);
|
||||
break;
|
||||
case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex(ucd.getBidiClass(codepoint), style)); break;
|
||||
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex(ucd.getDecompositionType(codepoint), style));
|
||||
if (temp == null || temp.length() == 0) temp = "none";
|
||||
break;
|
||||
case UCD_Types.NUMERIC_TYPE>>8: temp = (ucd.getNumericTypeID_fromIndex(ucd.getNumericType(codepoint), style));
|
||||
titlecase = true;
|
||||
if (temp == null || temp.length() == 0) temp = "None";
|
||||
break;
|
||||
case UCD_Types.EAST_ASIAN_WIDTH>>8: temp = (ucd.getEastAsianWidthID_fromIndex(ucd.getEastAsianWidth(codepoint), style)); break;
|
||||
case UCD_Types.LINE_BREAK>>8: temp = (ucd.getLineBreakID_fromIndex(ucd.getLineBreak(codepoint), style)); break;
|
||||
case UCD_Types.JOINING_TYPE>>8: temp = (ucd.getJoiningTypeID_fromIndex(ucd.getJoiningType(codepoint), style));
|
||||
if (temp == null || temp.length() == 0) temp = "Non_Joining";
|
||||
break;
|
||||
case UCD_Types.JOINING_GROUP>>8: temp = (ucd.getJoiningGroupID_fromIndex(ucd.getJoiningGroup(codepoint), style)); break;
|
||||
case UCD_Types.SCRIPT>>8: temp = (ucd.getScriptID_fromIndex(ucd.getScript(codepoint), style));
|
||||
if (temp != null) temp = UCharacter.toTitleCase(Locale.ENGLISH,temp,null);
|
||||
titlecase = true;
|
||||
break;
|
||||
case UCD_Types.AGE>>8: temp = getAge(codepoint); break;
|
||||
case UCD_Types.HANGUL_SYLLABLE_TYPE>>8:
|
||||
temp = (ucd.getHangulSyllableTypeID_fromIndex(ucd.getHangulSyllableType(codepoint),style)); break;
|
||||
}
|
||||
if (temp != null) return Utility.getUnskeleton(temp,titlecase);
|
||||
if (getPropertyType() == BINARY) {
|
||||
return up.hasValue(codepoint) ? "True" : "False";
|
||||
}
|
||||
return "<unknown>";
|
||||
}
|
||||
|
||||
public String getAge(int codePoint) {
|
||||
if (needAgeCache) {
|
||||
for (int i = UCD_Types.AGE11; i < UCD_Types.LIMIT_AGE; ++i) {
|
||||
ucdCache[i] = UCD.make(UCD_Names.AGE_VERSIONS[i]);
|
||||
}
|
||||
needAgeCache = false;
|
||||
}
|
||||
for (int i = UCD_Types.AGE11; i < UCD_Types.LIMIT_AGE; ++i) {
|
||||
if (ucdCache[i].isAllocated(codePoint)) return UCD_Names.AGE[i];
|
||||
}
|
||||
return UCD_Names.AGE[UCD_Types.UNKNOWN];
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
|
||||
*/
|
||||
private int getPropertyTypeInternal() {
|
||||
int result = 0;
|
||||
String name = up.getName(UCD_Types.LONG);
|
||||
if ("Age".equals(name)) return STRING;
|
||||
switch (up.getValueType()) {
|
||||
case UCD_Types.NUMERIC_PROP: result = NUMERIC; break;
|
||||
case UCD_Types.STRING_PROP: result = STRING; break;
|
||||
case UCD_Types.MISC_PROP: result = STRING; break;
|
||||
case UCD_Types.CATALOG_PROP: result = ENUMERATED; break;
|
||||
case UCD_Types.FLATTENED_BINARY_PROP:
|
||||
case UCD_Types.ENUMERATED_PROP: result = ENUMERATED; break;
|
||||
case UCD_Types.BINARY_PROP: result = BINARY; break;
|
||||
case UCD_Types.UNKNOWN_PROP:
|
||||
default:
|
||||
throw new IllegalArgumentException("Type: UNKNOWN_PROP");
|
||||
}
|
||||
if (!up.isStandard()) result |= EXTENDED_BIT;
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -5,14 +5,15 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2003/07/21 15:50:06 $
|
||||
* $Revision: 1.28 $
|
||||
* $Date: 2004/02/06 18:30:20 $
|
||||
* $Revision: 1.29 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
@ -30,6 +31,8 @@ import com.ibm.icu.text.UnicodeSet;
|
|||
|
||||
public final class UCD implements UCD_Types {
|
||||
|
||||
private static int SPOT_CHECK = 0x20AC;
|
||||
|
||||
static final boolean DEBUG = false;
|
||||
|
||||
/**
|
||||
|
@ -361,7 +364,7 @@ public final class UCD implements UCD_Types {
|
|||
|
||||
int blockId = 0;
|
||||
BlockData blockData = new BlockData();
|
||||
while (Default.ucd.getBlockData(blockId++, blockData)) {
|
||||
while (getBlockData(blockId++, blockData)) {
|
||||
if (blockData.name.equals("Hebrew")
|
||||
|| blockData.name.equals("Cypriot_Syllabary")
|
||||
) {
|
||||
|
@ -399,7 +402,7 @@ public final class UCD implements UCD_Types {
|
|||
System.out.println("AL: Adding " + BIDI_AL_Delta);
|
||||
BIDI_AL_SET.addAll(BIDI_AL_Delta);
|
||||
|
||||
UnicodeSet noncharacters = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Noncharacter_Code_Point).getSet();
|
||||
UnicodeSet noncharacters = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Noncharacter_Code_Point, this).getSet();
|
||||
noncharacters.remove(Utility.BOM);
|
||||
|
||||
System.out.println("Removing Noncharacters/BOM " + noncharacters);
|
||||
|
@ -458,7 +461,7 @@ public final class UCD implements UCD_Types {
|
|||
hanExceptions = new IntMap();
|
||||
BufferedReader in = null;
|
||||
try {
|
||||
in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
|
||||
in = Utility.openUnicodeFile("Unihan", version, true, Utility.UTF8);
|
||||
int lineCounter = 0;
|
||||
while (true) {
|
||||
Utility.dot(++lineCounter);
|
||||
|
@ -590,7 +593,7 @@ public final class UCD implements UCD_Types {
|
|||
StringBuffer result = new StringBuffer();
|
||||
int cp;
|
||||
byte currentCaseType = caseType;
|
||||
UnicodeProperty defaultIgnorable = DerivedProperty.make(DerivedProperty.DefaultIgnorable, this);
|
||||
UCDProperty defaultIgnorable = DerivedProperty.make(DerivedProperty.DefaultIgnorable, this);
|
||||
|
||||
for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
|
||||
cp = UTF32.char32At(s, i);
|
||||
|
@ -829,7 +832,8 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getCategoryID_fromIndex(byte prop, byte style) {
|
||||
return (style != LONG) ? UCD_Names.GC[prop] : UCD_Names.LONG_GC[prop];
|
||||
return prop < 0 || prop >= UCD_Names.GC.length ? null
|
||||
: (style != LONG) ? UCD_Names.GC[prop] : UCD_Names.LONG_GC[prop];
|
||||
}
|
||||
|
||||
|
||||
|
@ -846,6 +850,7 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
static String getCombiningClassID_fromIndex (short index, byte style) {
|
||||
if (index > 255) return null;
|
||||
index &= 0xFF;
|
||||
if (style == NORMAL || style == NUMBER) return String.valueOf(index);
|
||||
String s = "Fixed";
|
||||
|
@ -889,7 +894,12 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getBidiClassID_fromIndex(byte prop, byte style) {
|
||||
return style == SHORT ? UCD_Names.BC[prop] : UCD_Names.LONG_BC[prop];
|
||||
return prop < 0
|
||||
|| prop >= UCD_Names.BC.length
|
||||
? null
|
||||
: style == SHORT
|
||||
? UCD_Names.BC[prop]
|
||||
: UCD_Names.LONG_BC[prop];
|
||||
}
|
||||
|
||||
public String getDecompositionTypeID(int codePoint) {
|
||||
|
@ -900,7 +910,8 @@ public final class UCD implements UCD_Types {
|
|||
return getDecompositionTypeID_fromIndex(prop, NORMAL);
|
||||
}
|
||||
public static String getDecompositionTypeID_fromIndex(byte prop, byte style) {
|
||||
return style == SHORT ? UCD_Names.SHORT_DT[prop] : UCD_Names.DT[prop];
|
||||
return prop < 0 || prop >= UCD_Names.DT.length ? null
|
||||
: style == SHORT ? UCD_Names.SHORT_DT[prop] : UCD_Names.DT[prop];
|
||||
}
|
||||
|
||||
public String getNumericTypeID(int codePoint) {
|
||||
|
@ -912,7 +923,8 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getNumericTypeID_fromIndex(byte prop, byte style) {
|
||||
return style == SHORT ? UCD_Names.SHORT_NT[prop] : UCD_Names.NT[prop];
|
||||
return prop < 0 || prop >= UCD_Names.NT.length ? null
|
||||
: style == SHORT ? UCD_Names.SHORT_NT[prop] : UCD_Names.NT[prop];
|
||||
}
|
||||
|
||||
public String getEastAsianWidthID(int codePoint) {
|
||||
|
@ -924,7 +936,8 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getEastAsianWidthID_fromIndex(byte prop, byte style) {
|
||||
return style != LONG ? UCD_Names.SHORT_EA[prop] : UCD_Names.EA[prop];
|
||||
return prop < 0 || prop >= UCD_Names.EA.length ? null
|
||||
: style != LONG ? UCD_Names.SHORT_EA[prop] : UCD_Names.EA[prop];
|
||||
}
|
||||
|
||||
public String getLineBreakID(int codePoint) {
|
||||
|
@ -936,7 +949,8 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getLineBreakID_fromIndex(byte prop, byte style) {
|
||||
return style != LONG ? UCD_Names.LB[prop] : UCD_Names.LONG_LB[prop];
|
||||
return prop < 0 || prop >= UCD_Names.LB.length ? null
|
||||
: style != LONG ? UCD_Names.LB[prop] : UCD_Names.LONG_LB[prop];
|
||||
}
|
||||
|
||||
public String getJoiningTypeID(int codePoint) {
|
||||
|
@ -948,7 +962,8 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getJoiningTypeID_fromIndex(byte prop, byte style) {
|
||||
return style != LONG ? UCD_Names.JOINING_TYPE[prop] : UCD_Names.LONG_JOINING_TYPE[prop];
|
||||
return prop < 0 || prop >= UCD_Names.JOINING_TYPE.length ? null
|
||||
: style != LONG ? UCD_Names.JOINING_TYPE[prop] : UCD_Names.LONG_JOINING_TYPE[prop];
|
||||
}
|
||||
|
||||
public String getJoiningGroupID(int codePoint) {
|
||||
|
@ -961,7 +976,8 @@ public final class UCD implements UCD_Types {
|
|||
|
||||
public static String getJoiningGroupID_fromIndex(byte prop, byte style) {
|
||||
// no short version
|
||||
return UCD_Names.JOINING_GROUP[prop];
|
||||
return prop < 0 || prop >= UCD_Names.JOINING_GROUP.length ? null
|
||||
: UCD_Names.JOINING_GROUP[prop];
|
||||
}
|
||||
|
||||
public String getScriptID(int codePoint) {
|
||||
|
@ -973,8 +989,8 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getScriptID_fromIndex(byte prop, byte length) {
|
||||
if (length == SHORT) return UCD_Names.ABB_SCRIPT[prop];
|
||||
return UCD_Names.SCRIPT[prop];
|
||||
return prop < 0 || prop >= UCD_Names.JOINING_GROUP.length ? null
|
||||
: (length == SHORT) ? UCD_Names.ABB_SCRIPT[prop] : UCD_Names.SCRIPT[prop];
|
||||
}
|
||||
|
||||
public String getAgeID(int codePoint) {
|
||||
|
@ -987,7 +1003,8 @@ public final class UCD implements UCD_Types {
|
|||
|
||||
public static String getAgeID_fromIndex(byte prop, byte style) {
|
||||
// no short for
|
||||
return UCD_Names.AGE[prop];
|
||||
return prop < 0 || prop >= UCD_Names.AGE.length ? null
|
||||
: UCD_Names.AGE[prop];
|
||||
}
|
||||
|
||||
public String getBinaryPropertiesID(int codePoint, byte bit) {
|
||||
|
@ -999,7 +1016,8 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getBinaryPropertiesID_fromIndex(byte bit, byte style) {
|
||||
return style == SHORT ? UCD_Names.SHORT_BP[bit] : UCD_Names.BP[bit];
|
||||
return bit < 0 || bit >= UCD_Names.BP.length ? null
|
||||
: style == SHORT ? UCD_Names.SHORT_BP[bit] : UCD_Names.BP[bit];
|
||||
}
|
||||
|
||||
public static int mapToRepresentative(int ch, boolean lessThan20105) {
|
||||
|
@ -1208,14 +1226,18 @@ to guarantee identifier closure.
|
|||
String constructedName = null;
|
||||
int rangeStart = mapToRepresentative(codePoint, compositeVersion < 0x020105);
|
||||
boolean isHangul = false;
|
||||
boolean isRemapped = false;
|
||||
switch (rangeStart) {
|
||||
case 0xF900:
|
||||
if (compositeVersion < 0x020105) {
|
||||
if (fixStrings) constructedName = "CJK COMPATIBILITY IDEOGRAPH-" + Utility.hex(codePoint, 4);
|
||||
break;
|
||||
}
|
||||
//isRemapped = true;
|
||||
break;
|
||||
// FALL THROUGH!!!!
|
||||
default:
|
||||
//default:
|
||||
/*
|
||||
result = getRaw(codePoint);
|
||||
if (result == null) {
|
||||
result = UData.UNASSIGNED;
|
||||
|
@ -1234,52 +1256,61 @@ to guarantee identifier closure.
|
|||
result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
*/
|
||||
//break;
|
||||
case 0x3400: // CJK Ideograph Extension A
|
||||
case 0x4E00: // CJK Ideograph
|
||||
case 0x20000: // Extension B
|
||||
if (fixStrings) constructedName = "CJK UNIFIED IDEOGRAPH-" + Utility.hex(codePoint, 4);
|
||||
isRemapped = true;
|
||||
break;
|
||||
case 0xAC00: // Hangul Syllable
|
||||
isHangul = true;
|
||||
if (fixStrings) {
|
||||
constructedName = "HANGUL SYLLABLE " + getHangulName(codePoint);
|
||||
}
|
||||
isRemapped = true;
|
||||
break;
|
||||
case 0xE000: // Private Use
|
||||
case 0xF0000: // Private Use
|
||||
case 0x100000: // Private Use
|
||||
if (fixStrings) constructedName = "<private use area-" + Utility.hex(codePoint, 4) + ">";
|
||||
isRemapped = true;
|
||||
break;
|
||||
case 0xD800: // Surrogate
|
||||
case 0xDB80: // Private Use
|
||||
case 0xDC00: // Private Use
|
||||
if (fixStrings) constructedName = "<surrogate-" + Utility.hex(codePoint, 4) + ">";
|
||||
isRemapped = true;
|
||||
break;
|
||||
case 0xFFFF: // Noncharacter
|
||||
if (fixStrings) constructedName = "<noncharacter-" + Utility.hex(codePoint, 4) + ">";
|
||||
isRemapped = true;
|
||||
break;
|
||||
}
|
||||
result = getRaw(rangeStart);
|
||||
if (result == null) {
|
||||
result = UData.UNASSIGNED;
|
||||
isRemapped = true;
|
||||
result.name = null; // clean this up, since we reuse UNASSIGNED
|
||||
result.shortName = null;
|
||||
if (fixStrings) {
|
||||
result.name = "<reserved-" + Utility.hex(codePoint, 4) + ">";
|
||||
result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
|
||||
constructedName = "<reserved-" + Utility.hex(codePoint, 4) + ">";
|
||||
//result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
|
||||
}
|
||||
return result;
|
||||
//return result;
|
||||
}
|
||||
|
||||
result.codePoint = codePoint;
|
||||
if (fixStrings) {
|
||||
result.name = constructedName;
|
||||
result.shortName = Utility.replace(constructedName, UCD_Names.NAME_ABBREVIATIONS);
|
||||
result.decompositionMapping = result.bidiMirror
|
||||
= result.simpleLowercase = result.simpleUppercase = result.simpleTitlecase = result.simpleCaseFolding
|
||||
= result.fullLowercase = result.fullUppercase = result.fullTitlecase = result.fullCaseFolding
|
||||
= UTF32.valueOf32(codePoint);
|
||||
if (result.name == null || isRemapped) result.name = constructedName;
|
||||
if (result.shortName == null) result.shortName = Utility.replace(constructedName, UCD_Names.NAME_ABBREVIATIONS);
|
||||
if (isRemapped) {
|
||||
result.decompositionMapping = result.bidiMirror
|
||||
= result.simpleLowercase = result.simpleUppercase = result.simpleTitlecase = result.simpleCaseFolding
|
||||
= result.fullLowercase = result.fullUppercase = result.fullTitlecase = result.fullCaseFolding
|
||||
= UTF32.valueOf32(codePoint);
|
||||
}
|
||||
}
|
||||
if (isHangul) {
|
||||
if (fixStrings) result.decompositionMapping = getHangulDecompositionPair(codePoint);
|
||||
|
@ -1416,9 +1447,10 @@ to guarantee identifier closure.
|
|||
return NA;
|
||||
}
|
||||
|
||||
static String getHangulSyllableTypeID_fromIndex(byte index, byte style) {
|
||||
if (style == LONG) return UCD_Names.LONG_HANGUL_SYLLABLE_TYPE[index];
|
||||
return UCD_Names.HANGUL_SYLLABLE_TYPE[index];
|
||||
static String getHangulSyllableTypeID_fromIndex(byte prop, byte style) {
|
||||
return prop < 0 || prop >= UCD_Names.HANGUL_SYLLABLE_TYPE.length ? null
|
||||
: (style == LONG) ? UCD_Names.LONG_HANGUL_SYLLABLE_TYPE[prop]
|
||||
: UCD_Names.HANGUL_SYLLABLE_TYPE[prop];
|
||||
}
|
||||
|
||||
String getHangulSyllableTypeID(int char1, byte style) {
|
||||
|
@ -1471,7 +1503,7 @@ to guarantee identifier closure.
|
|||
UData uData = new UData();
|
||||
uData.readBytes(dataIn);
|
||||
|
||||
if (uData.codePoint == 0x5E) {
|
||||
if (uData.codePoint == SPOT_CHECK) {
|
||||
System.out.println("SPOT-CHECK: " + uData);
|
||||
}
|
||||
|
||||
|
@ -1528,6 +1560,18 @@ to guarantee identifier closure.
|
|||
public String name;
|
||||
}
|
||||
|
||||
public String NOBLOCK = Utility.getUnskeleton("no block", true);
|
||||
|
||||
public String getBlock(int codePoint) {
|
||||
if (blocks == null) loadBlocks();
|
||||
Iterator it = blocks.iterator();
|
||||
while (it.hasNext()) {
|
||||
BlockData data = (BlockData) it.next();
|
||||
if (codePoint >= data.start && codePoint <= data.end) return data.name;
|
||||
}
|
||||
return NOBLOCK;
|
||||
}
|
||||
|
||||
public boolean getBlockData(int blockId, BlockData output) {
|
||||
if (blocks == null) loadBlocks();
|
||||
BlockData temp;
|
||||
|
@ -1570,4 +1614,18 @@ to guarantee identifier closure.
|
|||
throw new IllegalArgumentException("Can't read block file");
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
public int getCompositeVersion() {
|
||||
return compositeVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param i
|
||||
*/
|
||||
public void setCompositeVersion(int i) {
|
||||
compositeVersion = i;
|
||||
}
|
||||
|
||||
}
|
|
@ -2,7 +2,7 @@ package com.ibm.text.UCD;
|
|||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.text.utility.*;
|
||||
|
||||
public abstract class UnicodeProperty implements UCD_Types {
|
||||
public abstract class UCDProperty implements UCD_Types {
|
||||
|
||||
// TODO: turn all of these into privates, and use setters only
|
||||
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
|
||||
* $Date: 2003/08/20 03:46:43 $
|
||||
* $Revision: 1.23 $
|
||||
* $Date: 2004/02/06 18:30:19 $
|
||||
* $Revision: 1.24 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -109,7 +109,7 @@ final class UCD_Names implements UCD_Types {
|
|||
"jg",
|
||||
"",
|
||||
"sc",
|
||||
"ag",
|
||||
"age",
|
||||
"hst",
|
||||
"",
|
||||
};
|
||||
|
@ -148,16 +148,16 @@ final class UCD_Names implements UCD_Types {
|
|||
"Soft_Dotted",
|
||||
"Logical_Order_Exception",
|
||||
"Other_ID_Start",
|
||||
"Sentence_Terminal",
|
||||
"STerm",
|
||||
"Variation_Selector"
|
||||
};
|
||||
|
||||
static final String[] SHORT_BP = {
|
||||
"BidiM",
|
||||
"Bidi_M",
|
||||
"CE",
|
||||
"WSpace",
|
||||
"NBrk",
|
||||
"BidiC",
|
||||
"Bidi_C",
|
||||
"JoinC",
|
||||
"Dash",
|
||||
"Hyphen",
|
||||
|
@ -175,7 +175,7 @@ final class UCD_Names implements UCD_Types {
|
|||
"NChar",
|
||||
"TurkI",
|
||||
"OGrExt",
|
||||
"GrLink",
|
||||
"Gr_Link",
|
||||
"IDSB",
|
||||
"IDST",
|
||||
"Radical",
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
|
||||
* $Date: 2003/08/20 03:46:44 $
|
||||
* $Revision: 1.25 $
|
||||
* $Date: 2004/02/06 18:30:19 $
|
||||
* $Revision: 1.26 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -15,7 +15,9 @@ package com.ibm.text.UCD;
|
|||
|
||||
public interface UCD_Types {
|
||||
|
||||
public static final int dVersion = 2; // change to fix the generated file D version. If less than zero, no "d"
|
||||
public static final int dVersion = 5; // change to fix the generated file D version. If less than zero, no "d"
|
||||
static final byte BINARY_FORMAT = 14; // bumped if binary format of UCD changes. Forces rebuild
|
||||
|
||||
|
||||
public static final String BASE_DIR = "C:\\DATA\\";
|
||||
public static final String UCD_DIR = BASE_DIR + "UCD\\";
|
||||
|
@ -34,8 +36,6 @@ public interface UCD_Types {
|
|||
CJK_B_BASE = 0x20000,
|
||||
CJK_B_LIMIT = 0x2A6DF+1;
|
||||
|
||||
static final byte BINARY_FORMAT = 10; // bumped if binary format of UCD changes
|
||||
|
||||
// Unicode Property Types
|
||||
static final byte
|
||||
NOT_DERIVED = 1,
|
||||
|
@ -387,7 +387,7 @@ public interface UCD_Types {
|
|||
|
||||
static final int
|
||||
UNKNOWN = 0,
|
||||
AGE10 = 1,
|
||||
AGE11 = 1,
|
||||
AGE20 = 2,
|
||||
AGE21 = 3,
|
||||
AGE30 = 4,
|
||||
|
@ -396,7 +396,16 @@ public interface UCD_Types {
|
|||
AGE40 = 7,
|
||||
LIMIT_AGE = 8;
|
||||
|
||||
|
||||
static final String[] AGE_VERSIONS = {
|
||||
"?",
|
||||
"1.1.0",
|
||||
"2.0.0",
|
||||
"2.1.2",
|
||||
"3.0.0",
|
||||
"3.1.0",
|
||||
"3.2.0",
|
||||
"4.0.0"
|
||||
};
|
||||
|
||||
public static byte
|
||||
JT_C = 0,
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $
|
||||
* $Date: 2003/07/21 15:50:05 $
|
||||
* $Revision: 1.14 $
|
||||
* $Date: 2004/02/06 18:30:19 $
|
||||
* $Revision: 1.15 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -18,16 +18,16 @@ import java.util.*;
|
|||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public final class UnifiedBinaryProperty extends UnicodeProperty {
|
||||
public final class UnifiedBinaryProperty extends UCDProperty {
|
||||
int majorProp;
|
||||
int propValue;
|
||||
// DerivedProperty dp;
|
||||
|
||||
public static UnicodeProperty make(int propMask) {
|
||||
public static UCDProperty make(int propMask) {
|
||||
return make(propMask, Default.ucd);
|
||||
}
|
||||
|
||||
public static UnicodeProperty make(int propMask, UCD ucd) {
|
||||
public static UCDProperty make(int propMask, UCD ucd) {
|
||||
if ((propMask & 0xFF00) == DERIVED) {
|
||||
return DerivedProperty.make(propMask & 0xFF, ucd);
|
||||
}
|
||||
|
@ -35,12 +35,12 @@ public final class UnifiedBinaryProperty extends UnicodeProperty {
|
|||
return getCached(propMask, ucd);
|
||||
}
|
||||
|
||||
public static UnicodeProperty make(String propAndValue, UCD ucd) {
|
||||
public static UCDProperty make(String propAndValue, UCD ucd) {
|
||||
return make(getPropmask(propAndValue, ucd), ucd);
|
||||
}
|
||||
|
||||
public static UnicodeSet getSet(int propMask, UCD ucd) {
|
||||
UnicodeProperty up = make(propMask, ucd);
|
||||
UCDProperty up = make(propMask, ucd);
|
||||
return up.getSet();
|
||||
}
|
||||
|
||||
|
@ -58,7 +58,7 @@ public final class UnifiedBinaryProperty extends UnicodeProperty {
|
|||
propNameCache = new HashMap();
|
||||
|
||||
for (int i = 0; i < LIMIT_ENUM; ++i) {
|
||||
UnicodeProperty up = UnifiedBinaryProperty.make(i, ucd);
|
||||
UCDProperty up = UnifiedBinaryProperty.make(i, ucd);
|
||||
if (up == null) continue;
|
||||
if (!up.isStandard()) continue;
|
||||
if (up.getValueType() < BINARY_PROP) continue;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedProperty.java,v $
|
||||
* $Date: 2003/07/21 15:50:05 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2004/02/06 18:30:18 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -18,15 +18,19 @@ import java.util.*;
|
|||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public final class UnifiedProperty extends UnicodeProperty {
|
||||
public final class UnifiedProperty extends UCDProperty {
|
||||
int majorProp;
|
||||
// DerivedProperty dp;
|
||||
|
||||
public static UnicodeProperty make(int propMask) {
|
||||
public static UCDProperty make(int propMask) {
|
||||
return make(propMask, Default.ucd);
|
||||
}
|
||||
|
||||
public static UnicodeProperty make(int propMask, UCD ucd) {
|
||||
public static UCDProperty make(int propMask, UCD ucd) {
|
||||
if (propMask == AGE) {
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
if ((propMask & 0xFF00) == (BINARY_PROPERTIES & 0xFF00)) {
|
||||
return UnifiedBinaryProperty.make(propMask, ucd);
|
||||
}
|
||||
|
@ -37,12 +41,12 @@ public final class UnifiedProperty extends UnicodeProperty {
|
|||
return getCached(propMask, ucd);
|
||||
}
|
||||
|
||||
public static UnicodeProperty make(String propID, UCD ucd) {
|
||||
public static UCDProperty make(String propID, UCD ucd) {
|
||||
return make(getPropmask(propID, ucd), ucd);
|
||||
}
|
||||
|
||||
public static UnicodeSet getSet(int propMask, UCD ucd) {
|
||||
UnicodeProperty up = make(propMask, ucd);
|
||||
UCDProperty up = make(propMask, ucd);
|
||||
return up.getSet();
|
||||
}
|
||||
|
||||
|
@ -51,26 +55,21 @@ public final class UnifiedProperty extends UnicodeProperty {
|
|||
}
|
||||
|
||||
private static Map propNameCache = null;
|
||||
private static Set availablePropNames = new TreeSet();
|
||||
|
||||
public static Collection getAvailablePropertiesAliases(Collection result, UCD ucd) {
|
||||
if (propNameCache == null) {
|
||||
cacheNames(ucd);
|
||||
}
|
||||
result.addAll(availablePropNames);
|
||||
return result;
|
||||
}
|
||||
|
||||
public static int getPropmask(String propID, UCD ucd) {
|
||||
|
||||
// cache the names
|
||||
if (propNameCache == null) {
|
||||
System.out.println("Caching Property Names");
|
||||
propNameCache = new HashMap();
|
||||
|
||||
for (int i = 0; i < LIMIT_ENUM; ++i) {
|
||||
UnicodeProperty up = UnifiedProperty.make(i, ucd);
|
||||
if (up == null) continue;
|
||||
if (!up.isStandard()) continue;
|
||||
if (up.getValueType() < BINARY_PROP) continue;
|
||||
String shortName = Utility.getSkeleton(up.getProperty(SHORT));
|
||||
String longName = Utility.getSkeleton(up.getProperty(LONG));
|
||||
Integer result = new Integer(i);
|
||||
propNameCache.put(longName, result);
|
||||
propNameCache.put(shortName, result);
|
||||
}
|
||||
System.out.println("Done Caching");
|
||||
cacheNames(ucd);
|
||||
}
|
||||
|
||||
propID = Utility.getSkeleton(propID);
|
||||
|
@ -80,6 +79,28 @@ public final class UnifiedProperty extends UnicodeProperty {
|
|||
}
|
||||
return indexObj.intValue();
|
||||
}
|
||||
|
||||
private static void cacheNames(UCD ucd) {
|
||||
System.out.println("Caching Property Names");
|
||||
propNameCache = new HashMap();
|
||||
|
||||
for (int i = 0; i < LIMIT_ENUM; ++i) {
|
||||
UCDProperty up = UnifiedProperty.make(i, ucd);
|
||||
if (up == null) continue;
|
||||
if (!up.isStandard()) continue;
|
||||
if (up.getValueType() < BINARY_PROP) continue;
|
||||
String shortRaw = up.getProperty(SHORT);
|
||||
String shortName = Utility.getSkeleton(shortRaw);
|
||||
String longRaw = up.getProperty(LONG);
|
||||
String longName = Utility.getSkeleton(longRaw);
|
||||
Integer result = new Integer(i);
|
||||
if (!propNameCache.keySet().contains(longName)) propNameCache.put(longName, result);
|
||||
if (!propNameCache.keySet().contains(shortName)) propNameCache.put(shortName, result);
|
||||
String key = longRaw != null ? longRaw : shortRaw;
|
||||
availablePropNames.add(key);
|
||||
}
|
||||
System.out.println("Done Caching");
|
||||
}
|
||||
|
||||
static Map cache = new HashMap();
|
||||
static UCD lastUCD = null;
|
||||
|
@ -92,12 +113,13 @@ public final class UnifiedProperty extends UnicodeProperty {
|
|||
UCD ucd;
|
||||
public boolean equals(Object other) {
|
||||
Clump that = (Clump) other;
|
||||
return (that.prop != prop || !ucd.equals(that));
|
||||
return (that.prop == prop && ucd.equals(that));
|
||||
}
|
||||
}
|
||||
|
||||
private static UnifiedProperty getCached(int propMask, UCD ucd) {
|
||||
System.out.println(ucd);
|
||||
|
||||
//System.out.println(ucd);
|
||||
if (ucd.equals(lastUCD) && propMask == lastPropMask) return lastValue;
|
||||
probeClump.prop = propMask;
|
||||
probeClump.ucd = ucd;
|
||||
|
@ -120,7 +142,9 @@ public final class UnifiedProperty extends UnicodeProperty {
|
|||
majorProp = propMask >> 8;
|
||||
|
||||
//System.out.println("A: " + getValueType());
|
||||
if (majorProp <= (JOINING_GROUP>>8) || majorProp == SCRIPT>>8) setValueType(FLATTENED_BINARY_PROP);
|
||||
if (majorProp <= (JOINING_GROUP>>8)
|
||||
|| majorProp == SCRIPT>>8
|
||||
|| majorProp==(HANGUL_SYLLABLE_TYPE>>8)) setValueType(FLATTENED_BINARY_PROP);
|
||||
//System.out.println("B: " + getValueType());
|
||||
|
||||
header = UCD_Names.UNIFIED_PROPERTY_HEADERS[majorProp];
|
||||
|
@ -158,7 +182,7 @@ public final class UnifiedProperty extends UnicodeProperty {
|
|||
throw new ChainException("Can't call 'hasValue' on non-binary property {0}", new Object[]{
|
||||
new Integer(majorProp)});
|
||||
}
|
||||
|
||||
|
||||
public String getFullName(byte style) {
|
||||
String pre = "";
|
||||
String preShort = getProperty(SHORT);
|
||||
|
@ -168,7 +192,7 @@ public final class UnifiedProperty extends UnicodeProperty {
|
|||
else pre = preShort + "(" + preLong + ")";
|
||||
return pre;
|
||||
}
|
||||
|
||||
|
||||
public String getValue(int cp, byte style) {
|
||||
switch (majorProp) {
|
||||
case CATEGORY>>8: return ucd.getCategoryID_fromIndex(ucd.getCategory(cp), style);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
|
||||
* $Date: 2003/07/21 15:50:05 $
|
||||
* $Revision: 1.22 $
|
||||
* $Date: 2004/02/06 18:30:18 $
|
||||
* $Revision: 1.23 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -441,7 +441,7 @@ can help you narrow these down.
|
|||
checkNF_AndCase("\u0130", true);
|
||||
checkNF_AndCase("\u0131", true);
|
||||
|
||||
UnicodeProperty softdot = null;
|
||||
UCDProperty softdot = null;
|
||||
CanonicalIterator cit = new CanonicalIterator("a");
|
||||
UnicodeSet badChars = new UnicodeSet();
|
||||
|
||||
|
@ -489,9 +489,9 @@ can help you narrow these down.
|
|||
}
|
||||
|
||||
static void checkIdentical(String ubpName1, String ubpName2) {
|
||||
UnicodeProperty prop1 = UnifiedBinaryProperty.make(ubpName1, Default.ucd);
|
||||
UCDProperty prop1 = UnifiedBinaryProperty.make(ubpName1, Default.ucd);
|
||||
UnicodeSet set1 = prop1.getSet();
|
||||
UnicodeProperty prop2 = UnifiedBinaryProperty.make(ubpName2, Default.ucd);
|
||||
UCDProperty prop2 = UnifiedBinaryProperty.make(ubpName2, Default.ucd);
|
||||
UnicodeSet set2 = prop2.getSet();
|
||||
UnicodeSet set1minus2 = new UnicodeSet(set1);
|
||||
set1minus2.removeAll(set2);
|
||||
|
@ -986,8 +986,8 @@ can help you narrow these down.
|
|||
System.out.println("NameChar:");
|
||||
System.out.println("\t" + NameChar.toPattern(true));
|
||||
|
||||
UnicodeProperty IDstart = DerivedProperty.make(Mod_ID_Start, Default.ucd);
|
||||
UnicodeProperty IDcontinue = DerivedProperty.make(Mod_ID_Continue_NO_Cf, Default.ucd);
|
||||
UCDProperty IDstart = DerivedProperty.make(Mod_ID_Start, Default.ucd);
|
||||
UCDProperty IDcontinue = DerivedProperty.make(Mod_ID_Continue_NO_Cf, Default.ucd);
|
||||
|
||||
UnicodeSet IDContinueMinusNameChar = new UnicodeSet();
|
||||
UnicodeSet IDStartMinusNameChar = new UnicodeSet();
|
||||
|
|
63
tools/unicodetools/com/ibm/text/utility/CallArgs.java
Normal file
63
tools/unicodetools/com/ibm/text/utility/CallArgs.java
Normal file
|
@ -0,0 +1,63 @@
|
|||
package com.ibm.text.utility;
|
||||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
|
||||
public class CallArgs {
|
||||
static BagFormatter bf = new BagFormatter();
|
||||
|
||||
public static String getPrefix(Class c) {
|
||||
String prefix = c.getName();
|
||||
int pos = prefix.lastIndexOf('.');
|
||||
if (pos < 0) return "";
|
||||
return prefix.substring(0,pos+1);
|
||||
}
|
||||
|
||||
public static void call(String[] args, String prefix) throws Exception {
|
||||
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
String arg = args[i];
|
||||
if (arg.startsWith("#")) break; // comments out rest of line
|
||||
String[] methodArgs = null;
|
||||
int par = arg.indexOf('(');
|
||||
if (par >= 0) {
|
||||
methodArgs = Utility.split(arg.substring(par+1, arg.length()-1),',');
|
||||
arg = arg.substring(0,par);
|
||||
}
|
||||
int pos = arg.indexOf('.');
|
||||
Method method = null;
|
||||
|
||||
if (pos >= 0) {
|
||||
String className = prefix + arg.substring(0,pos);
|
||||
String methodName = arg.substring(pos+1);
|
||||
method = tryMethod(className, methodName, methodArgs);
|
||||
} else {
|
||||
method = tryMethod("Main", arg, methodArgs);
|
||||
if (method == null) {
|
||||
method = tryMethod(arg, "main", methodArgs);
|
||||
}
|
||||
}
|
||||
if (method == null) throw new IllegalArgumentException("Bad parameter: " + arg);
|
||||
System.out.println(method.getName() + "\t" + bf.join(methodArgs));
|
||||
method.invoke(null,methodArgs);
|
||||
}
|
||||
}
|
||||
private static Method tryMethod(String className, String methodName, String[] methodArgs)
|
||||
throws IllegalAccessException, InvocationTargetException {
|
||||
try {
|
||||
Class foo = Class.forName(className);
|
||||
Class[] parameterTypes = null;
|
||||
if (methodArgs != null) {
|
||||
parameterTypes = new Class[methodArgs.length];
|
||||
for (int i = 0; i < methodArgs.length; ++i) {
|
||||
parameterTypes[i] = String.class;
|
||||
}
|
||||
}
|
||||
return foo.getDeclaredMethod(methodName,parameterTypes);
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/EquivalenceClass.java,v $
|
||||
* $Date: 2001/08/31 00:19:16 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2004/02/06 18:29:39 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -28,7 +28,7 @@ public class EquivalenceClass {
|
|||
// whenever we add a <source, value> pair, we see if any sets collide.
|
||||
// associated with each set of sources, we keep a representative Whenever we add to the set, if we
|
||||
//
|
||||
Map sourceToEquiv = new HashMap();
|
||||
Map sourceToEquiv = new TreeMap();
|
||||
Map valueToRepresentativeSource = new HashMap();
|
||||
Map forcedMerge = new HashMap();
|
||||
/**
|
||||
|
@ -62,7 +62,7 @@ public class EquivalenceClass {
|
|||
if (DEBUG) System.out.println("+Source " + source
|
||||
+ ", value: " + value);
|
||||
if (repSource == null && equivSet == null) {
|
||||
equivSet = new HashSet();
|
||||
equivSet = new TreeSet();
|
||||
equivSet.add(source);
|
||||
sourceToEquiv.put(source, equivSet);
|
||||
valueToRepresentativeSource.put(value, source);
|
||||
|
@ -96,7 +96,7 @@ public class EquivalenceClass {
|
|||
|
||||
// then replace all instances for equivSet by repEquiv
|
||||
// we have to do this in two steps, since iterators are invalidated by changes
|
||||
Set toReplace = new HashSet();
|
||||
Set toReplace = new TreeSet();
|
||||
it = sourceToEquiv.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
Object otherSource = it.next();
|
||||
|
@ -127,6 +127,24 @@ public class EquivalenceClass {
|
|||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private class MyIterator implements Iterator {
|
||||
Iterator it = sourceToEquiv.keySet().iterator();
|
||||
|
||||
public boolean hasNext() {
|
||||
return it.hasNext();
|
||||
}
|
||||
public Object next() {
|
||||
return sourceToEquiv.get(it.next());
|
||||
}
|
||||
public void remove() {
|
||||
throw new IllegalArgumentException("can't remove");
|
||||
}
|
||||
}
|
||||
|
||||
public Iterator getSetIterator () {
|
||||
return new MyIterator();
|
||||
}
|
||||
|
||||
private String toString(Object s) {
|
||||
if (s == null) return "null";
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Main.java,v $
|
||||
* $Date: 2003/07/07 15:58:56 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2004/02/06 18:29:39 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -50,7 +50,7 @@ public class Main {
|
|||
static public void main (String[] args) {
|
||||
for (int i = 0; i < args.length; ++i) {
|
||||
String arg = args[i];
|
||||
if (arg.equalsIgnoreCase("probe")) Probe.test("da");
|
||||
//if (arg.equalsIgnoreCase("probe")) Probe.test("da");
|
||||
}
|
||||
if (true) return;
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2003/08/20 03:47:59 $
|
||||
* $Revision: 1.36 $
|
||||
* $Date: 2004/02/06 18:29:39 $
|
||||
* $Revision: 1.37 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -148,6 +148,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
*/
|
||||
|
||||
public static String getUnskeleton(String source, boolean titlecaseStart) {
|
||||
if (source == null) return source;
|
||||
if (source.equals("noBreak")) return source; // HACK
|
||||
StringBuffer result = new StringBuffer();
|
||||
int lastCat = -1;
|
||||
|
@ -585,7 +586,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
return quoteXML(source, false);
|
||||
}
|
||||
|
||||
private static UnicodeProperty defaultIgnorable = null;
|
||||
private static UCDProperty defaultIgnorable = null;
|
||||
|
||||
public static String getDisplay(int cp) {
|
||||
String result = UTF16.valueOf(cp);
|
||||
|
@ -1110,6 +1111,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
pw.println();
|
||||
pw.println("In both " + name1 + " and " + name2 + ": ");
|
||||
pw.println(temp.size() == 0 ? "<none>" : ""+ temp);
|
||||
pw.flush();
|
||||
// showSetNames(pw, "\t", temp, false, false, withChar, names, ucd);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue