misc changes for UnicodeProperty

X-SVN-Rev: 14466
This commit is contained in:
Mark Davis 2004-02-06 18:32:05 +00:00
parent 153015d3ec
commit aa012dfd7c
35 changed files with 1877 additions and 975 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/AbbreviatedUnicodeSetIterator.java,v $
* $Date: 2003/03/17 23:00:20 $
* $Revision: 1.1 $
* $Date: 2004/02/06 18:32:04 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
@ -24,7 +24,7 @@ import com.ibm.text.UCD.Normalizer;
import com.ibm.text.UCD.UCD;
import com.ibm.text.utility.*;
import com.ibm.text.UCD.UnifiedBinaryProperty;
import com.ibm.text.UCD.UnicodeProperty;
import com.ibm.text.UCD.UCDProperty;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $
* $Date: 2003/08/21 07:32:52 $
* $Revision: 1.22 $
* $Date: 2004/02/06 18:32:03 $
* $Revision: 1.23 $
*
*******************************************************************************
*/
@ -24,7 +24,7 @@ import com.ibm.text.UCD.Normalizer;
import com.ibm.text.UCD.UCD;
import com.ibm.text.utility.*;
import com.ibm.text.UCD.UnifiedBinaryProperty;
import com.ibm.text.UCD.UnicodeProperty;
import com.ibm.text.UCD.UCDProperty;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
@ -1418,7 +1418,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
*/
private void cleanup() {
UnicodeProperty ubp = UnifiedBinaryProperty.make(
UCDProperty ubp = UnifiedBinaryProperty.make(
UCD.BINARY_PROPERTIES + UCD.Logical_Order_Exception, ucd);
UnicodeSet desiredSet = ubp.getSet();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
* $Date: 2003/08/22 16:51:21 $
* $Revision: 1.17 $
* $Date: 2004/02/06 18:32:03 $
* $Revision: 1.18 $
*
*******************************************************************************
*/
@ -175,32 +175,9 @@ public class WriteCharts implements UCD_Types {
String classname = primaryCount > 1 ? XCLASSNAME[strength] : CLASSNAME[strength];
String name = Default.ucd.getName(s);
String outline = showCell2(sortKey, s, script, classname);
if (s.equals("\u1eaf")) {
System.out.println("debug");
}
String comp = Default.nfc.normalize(s);
String outline = breaker + classname
+ " title='"
+ (script != UNSUPPORTED
? Utility.quoteXML(name, true) + ": "
: "")
+ UCA.toString(sortKey) + "'>"
+ Utility.quoteXML(comp, true)
+ "<br><tt>"
+ Utility.hex(s)
//+ "<br>" + script
+ "</tt></td>"
+ (script == UNSUPPORTED
? "<td class='name'><tt>" + Utility.quoteXML(name, true) + "</td>"
: "")
;
output.println(outline);
output.println(breaker + outline);
++columnCount;
}
@ -208,6 +185,46 @@ public class WriteCharts implements UCD_Types {
closeIndexFile(indexFile, "<br>UCA: " + uca.getDataVersion(), COLLATION);
}
private static String showCell2(
String sortKey,
String s,
byte script,
String classname) {
String name = Default.ucd.getName(s);
if (s.equals("\u1eaf")) {
System.out.println("debug");
}
String comp = Default.nfc.normalize(s);
int cat = Default.ucd.getCategory(UTF16.charAt(comp,0));
if (cat == Mn || cat == Mc || cat == Me) {
comp = '\u25CC' + comp;
if (s.equals("\u0300")) {
System.out.println(Default.ucd.getCodeAndName(comp));
}
}
// TODO: merge with showCell
String outline = classname
+ " title='"
+ (script != UNSUPPORTED
? Utility.quoteXML(name, true) + ": "
: "")
+ UCA.toString(sortKey) + "'>"
+ Utility.quoteXML(comp, true)
+ "<br><tt>"
+ Utility.hex(s)
//+ "<br>" + script
+ "</tt></td>"
+ (script == UNSUPPORTED
? "<td class='name'><tt>" + Utility.quoteXML(name, true) + "</td>"
: "")
;
return outline;
}
static public void normalizationChart() throws IOException {
Default.setUCD();
HACK_KANA = false;
@ -642,9 +659,20 @@ public class WriteCharts implements UCD_Types {
closeIndexFile(indexFile, "", CASE);
}
static void showCell(PrintWriter output, String s, String prefix, String extra, boolean skipName) {
static void showCell(PrintWriter output, String s,
String prefix, String extra, boolean skipName) {
if (s.equals("\u0300")) {
System.out.println();
}
String name = Default.ucd.getName(s);
String comp = Default.nfc.normalize(s);
int cat = Default.ucd.getCategory(UTF16.charAt(comp,0));
if (cat == Mn || cat == Mc || cat == Me) {
comp = '\u25CC' + comp;
if (s.equals("\u0300")) {
System.out.println(Default.ucd.getCodeAndName(comp));
}
}
String outline = prefix
+ (skipName ? "" : " title='" + Utility.quoteXML(name, true) + "'")

View file

@ -0,0 +1,218 @@
package com.ibm.text.UCD;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.dev.test.util.ICUPropertyFactory;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.text.utility.Utility;
public class CheckICU {
static final BagFormatter bf = new BagFormatter();
public static void main(String[] args) throws IOException {
System.out.println("Start");
test();
System.out.println("End");
}
static UnicodeSet itemFailures;
static ICUPropertyFactory icuFactory;
static ToolUnicodePropertySource toolFactory;
public static void test() throws IOException {
checkUCD();
itemFailures = new UnicodeSet();
icuFactory = ICUPropertyFactory.make();
toolFactory = ToolUnicodePropertySource.make("4.0.0");
String[] quickList = {
"Name",
// "Script", "Bidi_Mirroring_Glyph", "Case_Folding",
//"Numeric_Value"
};
for (int i = 0; i < quickList.length; ++i) {
testProperty(quickList[i], -1);
}
if (quickList.length > 0) return;
Collection availableTool = toolFactory.getAvailablePropertyAliases(new TreeSet());
Collection availableICU = icuFactory.getAvailablePropertyAliases(new TreeSet());
System.out.println(showDifferences("Property Aliases", "ICU", availableICU, "Tool", availableTool));
Collection common = new TreeSet(availableICU);
common.retainAll(availableTool);
for (int j = UnicodeProperty.BINARY; j < UnicodeProperty.LIMIT_TYPE; ++j) {
System.out.println();
System.out.println(UnicodeProperty.getTypeName(j));
Iterator it = common.iterator();
while (it.hasNext()) {
String prop = (String)it.next();
testProperty(prop, j);
}
}
}
private static void checkUCD() throws IOException {
UCD myUCD = UCD.make("4.0.0");
Normalizer nfc = new Normalizer(Normalizer.NFC, "4.0.0");
UnicodeSet leading = new UnicodeSet();
UnicodeSet trailing = new UnicodeSet();
UnicodeSet starter = new UnicodeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (myUCD.getCombiningClass(i) == 0) starter.add(i);
if (nfc.isTrailing(i)) trailing.add(i);
if (nfc.isLeading(i)) leading.add(i);
}
PrintWriter pw = bf.openUTF8Writer(UCD_Types.GEN_DIR, "Trailing.txt");
bf.showSetNames(pw, "+Trailing+Starter", new UnicodeSet(trailing).retainAll(starter));
bf.showSetNames(pw, "+Trailing-Starter", new UnicodeSet(trailing).removeAll(starter));
bf.showSetNames(pw, "-Trailing-Starter", new UnicodeSet(trailing).complement().removeAll(starter));
bf.showSetNames(pw, "+Trailing+Leading", new UnicodeSet(trailing).retainAll(leading));
bf.showSetNames(pw, "+Trailing-Leading", new UnicodeSet(trailing).removeAll(leading));
pw.close();
}
/*
* int icuType;
int toolType;
Collection icuAliases;
Collection toolAliases;
String firstDiffICU;
String firstDiffTool;
String firstDiffCP;
String icuProp;
String toolProp;
*/
private static void testProperty(String prop, int typeFilter) {
UnicodeProperty icuProp = icuFactory.getProperty(prop);
int icuType = icuProp.getPropertyType();
if (typeFilter >= 0 && icuType != typeFilter) return;
System.out.println();
System.out.println("Testing: " + prop);
UnicodeProperty toolProp = toolFactory.getProperty(prop);
int toolType = toolProp.getPropertyType();
if (icuType != toolType) {
System.out.println("FAILURE Type: ICU: " + UnicodeProperty.getTypeName(icuType)
+ "\tTool: " + UnicodeProperty.getTypeName(toolType));
}
Collection icuAliases = icuProp.getPropertyAliases(new ArrayList());
Collection toolAliases = toolProp.getPropertyAliases(new ArrayList());
System.out.println(showDifferences("Aliases", "ICU", icuAliases, "Tool", toolAliases));
icuAliases = icuProp.getAvailablePropertyValueAliases(new ArrayList());
toolAliases = toolProp.getAvailablePropertyValueAliases(new ArrayList());
System.out.println(showDifferences("Value Aliases", "ICU", icuAliases, "Tool", toolAliases));
// TODO do property value aliases
itemFailures.clear();
String firstDiffICU = null, firstDiffTool = null, firstDiffCP = null;
for (int i = 0; i <= 0x10FFFF; ++i) {
/*if (i == 0x0237) {
System.out.println();
}
*/
String icuValue = icuProp.getPropertyValue(i);
String toolValue = toolProp.getPropertyValue(i);
if (!equals(icuValue, toolValue)) {
itemFailures.add(i);
if (firstDiffCP == null) {
firstDiffICU = icuValue;
firstDiffTool = toolValue;
firstDiffCP = Utility.hex(i);
}
}
}
if (itemFailures.size() != 0) {
System.out.println("FAILURE " + itemFailures.size() + " Differences: ");
System.out.println(itemFailures.toPattern(true));
if (firstDiffICU != null) firstDiffICU = bf.hex.transliterate(firstDiffICU);
if (firstDiffTool != null) firstDiffTool = bf.hex.transliterate(firstDiffTool);
System.out.println(firstDiffCP
+ "\tICU: <" + firstDiffICU
+ ">\tTool: <" + firstDiffTool + ">");
}
System.out.println("done");
// do values later, and their aliases
/*
System.out.println("-Values");
UnicodeSet
System.out.println(showDifferences("ICU", availableICU, "Tool", availableTool));
*/
}
static boolean equals(Object a, Object b) {
if (a == null) return b == null;
return a.equals(b);
}
static public String showDifferences(
String title,
String name1,
Collection set1,
String name2,
Collection set2) {
Collection temp = new TreeSet(set1);
temp.retainAll(set2);
if (set1.size() == temp.size()) {
return title + ": " + name1 + " == " + name2 + ": " + bf.join(set1);
}
StringBuffer result = new StringBuffer();
result.append(title + "\tFAILURE\r\n");
result.append("\t" + name1 + " = " + bf.join(set1) + "\r\n");
result.append("\t" + name2 + " = " + bf.join(set2) + "\r\n");
// damn'd collection doesn't have a clone, so
// we go with Set, even though that
// may not preserve order and duplicates
if (temp.size() != 0) {
result.append("\t" + name2 + " & " + name1 + ":\r\n");
result.append("\t" + bf.join(temp));
result.append("\r\n");
}
temp.clear();
temp.addAll(set1);
temp.removeAll(set2);
if (temp.size() != 0) {
result.append("\t" + name1 + " - " + name2 + ":\r\n");
result.append("\t" + bf.join(temp));
result.append("\r\n");
}
temp.clear();
temp.addAll(set2);
temp.removeAll(set1);
if (temp.size() != 0) {
result.append("\t" + name2 + " - " + name1 + ":\r\n");
result.append("\t" + bf.join(temp));
result.append("\r\n");
}
return result.toString();
}
}

View file

@ -0,0 +1,81 @@
package com.ibm.text.UCD;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.text.DecimalFormat;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.UTF16;
import com.ibm.text.utility.Pair;
import com.ibm.text.utility.Utility;
public class ChineseFrequency {
static final String DICT_DIR = "C:\\DATA\\dict\\";
static NumberFormat percent = new DecimalFormat("0.000000%");
static NumberFormat percent3 = new DecimalFormat("000.000000%");
static NumberFormat number = new DecimalFormat("#,##0");
static class InverseCompareTo implements Comparator {
public int compare(Object o1, Object o2) {
return -((Comparable)o1).compareTo(o2);
}
}
public static void test() throws IOException{
Set freq_char = new TreeSet(new InverseCompareTo());
BufferedReader br = BagFormatter.openUTF8Reader(DICT_DIR, "kHYPLCDPF.txt");
double grandTotal = 0.0;
while (true) {
String line = br.readLine();
if (line == null) break;
String[] pieces = Utility.split(line,'\t');
int cp = Integer.parseInt(pieces[0],16);
String[] says = Utility.split(pieces[1],',');
long total = 0;
for (int i = 0; i < says.length; ++i) {
int start = says[i].indexOf('(');
int end = says[i].indexOf(')');
long count = Long.parseLong(says[i].substring(start+1, end));
total += count;
}
grandTotal += total;
freq_char.add(new Pair(new Long(total), new Integer(cp)));
}
br.close();
PrintWriter pw = BagFormatter.openUTF8Writer(DICT_DIR,"kHYPLCDPF_frequency.txt");
pw.write("\uFEFF");
pw.println("No.\tPercentage\tAccummulated\tHex\tChar");
Iterator it = freq_char.iterator();
int counter = 0;
double cummulative = 0;
double cummulativePercentage = 0;
while (it.hasNext()) {
Pair item = (Pair)it.next();
Long total = (Long) item.first;
Integer cp = (Integer) item.second;
double current = total.longValue();
cummulative += current;
double percentage = current / grandTotal;
cummulativePercentage += percentage;
pw.println(
++counter
//+ "\t" + number.format(current)
//+ "\t" + number.format(cummulative)
+ "\t" + percent.format(percentage)
+ "\t" + percent3.format(cummulativePercentage)
+ "\t" + Integer.toHexString(cp.intValue()).toUpperCase()
+ "\t" + UTF16.valueOf(cp.intValue()));
}
//pw.println("Grand total: " + (long)grandTotal);
pw.close();
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CompareProperties.java,v $
* $Date: 2003/07/21 15:50:07 $
* $Revision: 1.2 $
* $Date: 2004/02/06 18:30:23 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
@ -89,7 +89,7 @@ public class CompareProperties implements UCD_Types {
}
}
public final class UnicodeSetComparator implements Comparator {
public final static class UnicodeSetComparator implements Comparator {
/**
* Compares two UnicodeSets, producing a transitive ordering.
* @return -1 if first is smaller (in size) than second,
@ -121,7 +121,7 @@ public class CompareProperties implements UCD_Types {
boolean isPartitioned = false;
UnicodeProperty[] props = new UnicodeProperty[500];
UCDProperty[] props = new UCDProperty[500];
UnicodeSet[] sets = new UnicodeSet[500];
int count = 0;
BitSet[] disjoints = new BitSet[500];
@ -147,7 +147,7 @@ public class CompareProperties implements UCD_Types {
if (!Default.ucd.isAllocated(cp)) continue;
for (int i = 0; i < count; ++i) {
UnicodeProperty up = props[i];
UCDProperty up = props[i];
boolean iProp = up.hasValue(cp);
if (iProp) {
probe.set(i);
@ -177,7 +177,7 @@ public class CompareProperties implements UCD_Types {
if (i == 0x0900) {
System.out.println("debug");
}
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
if (up == null) continue;
if (up.getValueType() < BINARY_PROP) {
System.out.println("\tSkipping " + up.getName() + "; value varies");
@ -378,7 +378,7 @@ public class CompareProperties implements UCD_Types {
return getPropName(props[propertyIndex]);
}
private String getPropName(UnicodeProperty ubp) {
private String getPropName(UCDProperty ubp) {
return Utility.getUnskeleton(ubp.getFullName(LONG), true);
}
@ -395,7 +395,7 @@ public class CompareProperties implements UCD_Types {
for (int i = 1; i < UCD_Types.LIMIT_ENUM; ++i) {
int iType = i & 0xFF00;
if (iType == UCD_Types.JOINING_GROUP || iType == UCD_Types.AGE || iType == UCD_Types.COMBINING_CLASS || iType == UCD_Types.SCRIPT) continue;
UnicodeProperty upi = UnifiedBinaryProperty.make(i, Default.ucd);
UCDProperty upi = UnifiedBinaryProperty.make(i, Default.ucd);
if (upi == null) continue;
if (!upi.isStandard()) {
System.out.println("Skipping " + upi.getName() + "; not standard");
@ -419,7 +419,7 @@ public class CompareProperties implements UCD_Types {
int jType = j & 0xFF00;
if (jType == UCD_Types.JOINING_GROUP || jType == UCD_Types.AGE || jType == UCD_Types.COMBINING_CLASS || jType == UCD_Types.SCRIPT
|| (jType == iType && jType != UCD_Types.BINARY_PROPERTIES)) continue;
UnicodeProperty upj = UnifiedBinaryProperty.make(j, Default.ucd);
UCDProperty upj = UnifiedBinaryProperty.make(j, Default.ucd);
if (upj == null) continue;
if (!upj.isStandard()) continue;
if (upj.getValueType() < UCD_Types.BINARY_PROP) continue;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
* $Date: 2003/07/21 15:50:06 $
* $Revision: 1.12 $
* $Date: 2004/02/06 18:30:23 $
* $Revision: 1.13 $
*
*******************************************************************************
*/
@ -27,12 +27,14 @@ import java.io.*;
public final class ConvertUCD implements UCD_Types {
public static final boolean SHOW = false;
public static final boolean DEBUG = false;
static final boolean SHOW_SAMPLE = false;
public static int major;
public static int minor;
public static int update;
static String version;
int major;
int minor;
int update;
String version;
// varies by version
/*
@ -79,6 +81,47 @@ public final class ConvertUCD implements UCD_Types {
/*
//*/
};
static HashMap isHex = new HashMap();
static HashMap defaults = new HashMap();
static {
for (int j = 0; j < labelList.length; ++j) {
String[] labels = labelList[j];
for (int i = 1; i < labels.length; ++i) {
boolean hex = false;
String def = null;
//char appendChar = '\u0000';
// pull off "*": hex interpretation
if (labels[i].charAt(0) == '*') { // HEX value
hex = true;
labels[i] = labels[i].substring(1);
}
/*
// pull off "$": append duplicates
if (labels[i].charAt(0) == '$') { // HEX value
appendChar = labels[i].charAt(1);
labels[i] = labels[i].substring(2);
}
// pull off default values
int pos = labels[i].indexOf('-');
if (pos >= 0) {
def = labels[i].substring(pos+1);
labels[i] = labels[i].substring(0,pos);
}
*/
// store results
// we do this after all processing, so that the label is clean!!
if (hex) isHex.put(labels[i], "");
//if (appendChar != 0) appendDuplicates.put(labels[i], String.valueOf(appendChar));
defaults.put(labels[i], def);
}
}
}
/*
static String[][] labelList31 = {
// Labels for the incoming files. Labels MUST match field order in file.
@ -212,15 +255,10 @@ public final class ConvertUCD implements UCD_Types {
try {
for (int i = 0; i < args.length; ++i) {
version = args[i];
String version = args[i];
if (version.length() == 0) version = UCD.latestVersion;
String[] parts = new String[3];
Utility.split(version, '.', parts);
major = Integer.parseInt(parts[0]);
minor = Integer.parseInt(parts[1]);
update = Integer.parseInt(parts[2]);
toJava();
new ConvertUCD().toJava(version);
}
} finally {
log.close();
@ -242,7 +280,13 @@ public final class ConvertUCD implements UCD_Types {
}
*/
static void toJava() throws Exception {
void toJava(String version) throws Exception {
this.version = version;
String[] parts = new String[3];
Utility.split(version, '.', parts);
major = Integer.parseInt(parts[0]);
minor = Integer.parseInt(parts[1]);
update = Integer.parseInt(parts[2]);
System.out.println("Building " + version);
// Blocks is special
// Unihan is special
@ -264,10 +308,13 @@ public final class ConvertUCD implements UCD_Types {
UData ud;
ud = getEntry(0x5e);
System.out.println("SPOT-CHECK: 5e: " + ud);
ud = getEntry(0x130);
System.out.println("SPOT-CHECK: 130: " + ud);
ud = getEntry(0x1f6);
System.out.println("SPOT-CHECK: 1f6: " + ud);
ud = getEntry(0x2A6D6);
System.out.println("SPOT-CHECK: 2A6D6: " + ud);
@ -285,51 +332,10 @@ public final class ConvertUCD implements UCD_Types {
* "OMIT" is special -- means don't record
*/
static HashMap isHex = new HashMap();
static HashMap defaults = new HashMap();
static {
for (int j = 0; j < labelList.length; ++j) {
String[] labels = labelList[j];
List blockData = new LinkedList();
for (int i = 1; i < labels.length; ++i) {
boolean hex = false;
String def = null;
//char appendChar = '\u0000';
// pull off "*": hex interpretation
if (labels[i].charAt(0) == '*') { // HEX value
hex = true;
labels[i] = labels[i].substring(1);
}
/*
// pull off "$": append duplicates
if (labels[i].charAt(0) == '$') { // HEX value
appendChar = labels[i].charAt(1);
labels[i] = labels[i].substring(2);
}
// pull off default values
int pos = labels[i].indexOf('-');
if (pos >= 0) {
def = labels[i].substring(pos+1);
labels[i] = labels[i].substring(0,pos);
}
*/
// store results
// we do this after all processing, so that the label is clean!!
if (hex) isHex.put(labels[i], "");
//if (appendChar != 0) appendDuplicates.put(labels[i], String.valueOf(appendChar));
defaults.put(labels[i], def);
}
}
}
static List blockData = new LinkedList();
static void readBlocks() throws Exception {
void readBlocks() throws Exception {
System.out.println("Reading 'Blocks'");
BufferedReader input = Utility.openUnicodeFile(blocksname, version, true, Utility.LATIN1);
String line = "";
@ -363,9 +369,9 @@ public final class ConvertUCD implements UCD_Types {
}
}
static Set properties = new TreeSet();
Set properties = new TreeSet();
static void readSemi(String[] labels) throws Exception {
void readSemi(String[] labels) throws Exception {
System.out.println();
System.out.println("Reading '" + labels[0] + "'");
if (major < 3 || (major == 3 && minor < 1)) {
@ -554,8 +560,9 @@ public final class ConvertUCD implements UCD_Types {
System.out.println(";");
}
static Map charData = new TreeMap();
Map charData = new TreeMap();
/*
static void writeXML() throws IOException {
System.out.println("Writing 'UCD-Main.xml'");
BufferedWriter output = new BufferedWriter(
@ -604,7 +611,7 @@ public final class ConvertUCD implements UCD_Types {
String value = Utility.quoteXML((String) data.get(label));
output.write(" " + label + "='" + value + "'");
}
*/
*//*
output.write("/>\r\n");
}
@ -615,8 +622,9 @@ public final class ConvertUCD implements UCD_Types {
output.close();
}
}
static void writeJavaData() throws IOException {
*/
void writeJavaData() throws IOException {
Iterator it = charData.keySet().iterator();
int codePoint = -1;
System.out.println("Writing " + dataFilePrefix + version);
@ -665,13 +673,13 @@ public final class ConvertUCD implements UCD_Types {
}
}
static String[] xsSplit = new String[40];
//static String[] xsSplit = new String[40];
// Cache a little bit for speed
static int getEntryCodePoint = -1;
static UData getEntryUData = null;
int getEntryCodePoint = -1;
UData getEntryUData = null;
static UData getEntryIfExists(int cp) {
UData getEntryIfExists(int cp) {
if (cp == getEntryCodePoint) return getEntryUData;
Integer cc = new Integer(cp);
UData charEntry = (UData) charData.get(cc);
@ -683,7 +691,7 @@ public final class ConvertUCD implements UCD_Types {
/* Get entry in table for cc
*/
static UData getEntry(int cp) {
UData getEntry(int cp) {
if (cp == getEntryCodePoint) return getEntryUData;
Integer cc = new Integer(cp);
UData charEntry = (UData) charData.get(cc);
@ -699,12 +707,12 @@ public final class ConvertUCD implements UCD_Types {
/** Adds the character data. Signals duplicates with an exception
*/
static void setBinaryProperty(int cp, int binProp) {
void setBinaryProperty(int cp, int binProp) {
UData charEntry = getEntry(cp);
charEntry.binaryProperties |= (1L << binProp);
}
static void appendCharProperties(int cp, String key) {
void appendCharProperties(int cp, String key) {
int ind;
//if (true || NEWPROPS) {
ind = Utility.lookup(key, UCD_Names.BP, true);
@ -716,14 +724,12 @@ public final class ConvertUCD implements UCD_Types {
setBinaryProperty(cp, ind);
}
static Set jtSet = new TreeSet();
static Set jgSet = new TreeSet();
Set jtSet = new TreeSet();
Set jgSet = new TreeSet();
static final boolean SHOW_SAMPLE = false;
/** Adds the character data. Signals duplicates with an exception
*/
static void addCharData(int cp, String key, String value) {
void addCharData(int cp, String key, String value) {
//if (cp < 10) System.out.println("A: " + Utility.hex(cp) + ", " + key + ", " + Utility.quoteJavaString(value));
UData charEntry = getEntry(cp);
//if (cp < 10) System.out.println(" " + charEntry);
@ -794,7 +800,7 @@ public final class ConvertUCD implements UCD_Types {
}
static public void setField(UData uData, String fieldName, String fieldValue) {
public void setField(UData uData, String fieldName, String fieldValue) {
try {
if (fieldName.equals("n")) {
uData.name = fieldValue;

View file

@ -8,7 +8,7 @@ import java.util.TimeZone;
public final class Default implements UCD_Types {
public static String ucdVersion = UCD.latestVersion;
private static String ucdVersion = UCD.latestVersion;
public static UCD ucd;
public static Normalizer nfc;
public static Normalizer nfd;
@ -21,16 +21,16 @@ public final class Default implements UCD_Types {
}
public static void setUCD(String version) {
ucdVersion = version;
setUcdVersion(version);
setUCD();
}
public static void setUCD() {
ucd = UCD.make(ucdVersion);
nfd = nf[NFD] = new Normalizer(Normalizer.NFD, ucdVersion);
nfc = nf[NFC] = new Normalizer(Normalizer.NFC, ucdVersion);
nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, ucdVersion);
nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, ucdVersion);
ucd = UCD.make(getUcdVersion());
nfd = nf[NFD] = new Normalizer(Normalizer.NFD, getUcdVersion());
nfc = nf[NFC] = new Normalizer(Normalizer.NFC, getUcdVersion());
nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, getUcdVersion());
nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, getUcdVersion());
System.out.println("Loaded UCD" + ucd.getVersion() + " " + (new Date(ucd.getDate())));
}
@ -43,4 +43,12 @@ public final class Default implements UCD_Types {
return myDateFormat.format(new Date());
}
public static void setUcdVersion(String ucdVersion) {
Default.ucdVersion = ucdVersion;
}
public static String getUcdVersion() {
return ucdVersion;
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
* $Date: 2003/07/21 15:50:06 $
* $Revision: 1.22 $
* $Date: 2004/02/06 18:30:22 $
* $Revision: 1.23 $
*
*******************************************************************************
*/
@ -31,11 +31,11 @@ public final class DerivedProperty implements UCD_Types {
// ADD CONSTANT to UCD_TYPES
static public UnicodeProperty make(int derivedPropertyID) {
static public UCDProperty make(int derivedPropertyID) {
return make(derivedPropertyID, Default.ucd);
}
static public UnicodeProperty make(int derivedPropertyID, UCD ucd) {
static public UCDProperty make(int derivedPropertyID, UCD ucd) {
if (derivedPropertyID < 0 || derivedPropertyID >= DERIVED_PROPERTY_LIMIT) return null;
DerivedProperty dp = getCached(ucd);
return dp.dprops[derivedPropertyID];
@ -96,14 +96,14 @@ public final class DerivedProperty implements UCD_Types {
return dprops[propNumber].getValue(int cp);
}
*/
private UnicodeProperty[] dprops = new UnicodeProperty[50];
private UCDProperty[] dprops = new UCDProperty[50];
static final String[] CaseNames = {
"Uppercase",
"Lowercase",
"Mixedcase"};
class ExDProp extends UnicodeProperty {
class ExDProp extends UCDProperty {
Normalizer nfx;
ExDProp(int i) {
type = DERIVED_NORMALIZATION;
@ -124,7 +124,7 @@ public final class DerivedProperty implements UCD_Types {
}
};
class NF_UnsafeStartProp extends UnicodeProperty {
class NF_UnsafeStartProp extends UCDProperty {
Normalizer nfx;
//int prop;
@ -180,7 +180,7 @@ public final class DerivedProperty implements UCD_Types {
*/
class NFC_Prop extends UnicodeProperty {
class NFC_Prop extends UCDProperty {
BitSet bitset;
boolean filter = false;
boolean keepNonZero = true;
@ -224,7 +224,7 @@ public final class DerivedProperty implements UCD_Types {
};
};
class GenDProp extends UnicodeProperty {
class GenDProp extends UCDProperty {
Normalizer nfx;
Normalizer nfComp = null;
@ -281,7 +281,7 @@ public final class DerivedProperty implements UCD_Types {
public boolean hasValue(int cp) { return getValue(cp).length() != 0; }
};
class CaseDProp extends UnicodeProperty {
class CaseDProp extends UCDProperty {
byte val;
CaseDProp (int i) {
type = DERIVED_CORE;
@ -301,7 +301,7 @@ public final class DerivedProperty implements UCD_Types {
}
};
class QuickDProp extends UnicodeProperty {
class QuickDProp extends UCDProperty {
String NO;
String MAYBE;
Normalizer nfx;
@ -357,7 +357,7 @@ public final class DerivedProperty implements UCD_Types {
dprops[i] = new NF_UnsafeStartProp(i-NFD_UnsafeStart);
}
dprops[ID_Start] = new UnicodeProperty() {
dprops[ID_Start] = new UCDProperty() {
{
type = DERIVED_CORE;
name = "ID_Start";
@ -371,7 +371,7 @@ public final class DerivedProperty implements UCD_Types {
}
};
dprops[ID_Continue_NO_Cf] = new UnicodeProperty() {
dprops[ID_Continue_NO_Cf] = new UCDProperty() {
{
name = "ID_Continue";
type = DERIVED_CORE;
@ -441,7 +441,7 @@ public final class DerivedProperty implements UCD_Types {
if (status != 0) XID_Continue_Set.add(cp);
}
dprops[Mod_ID_Start] = new UnicodeProperty() {
dprops[Mod_ID_Start] = new UCDProperty() {
{
type = DERIVED_CORE;
name = "XID_Start";
@ -457,7 +457,7 @@ public final class DerivedProperty implements UCD_Types {
}
};
dprops[Mod_ID_Continue_NO_Cf] = new UnicodeProperty() {
dprops[Mod_ID_Continue_NO_Cf] = new UCDProperty() {
{
type = DERIVED_CORE;
name = "XID_Continue";
@ -474,7 +474,7 @@ public final class DerivedProperty implements UCD_Types {
}
};
dprops[PropMath] = new UnicodeProperty() {
dprops[PropMath] = new UCDProperty() {
{
type = DERIVED_CORE;
name = "Math";
@ -490,7 +490,7 @@ public final class DerivedProperty implements UCD_Types {
}
};
dprops[PropAlphabetic] = new UnicodeProperty() {
dprops[PropAlphabetic] = new UCDProperty() {
{
type = DERIVED_CORE;
name = "Alphabetic";
@ -506,7 +506,7 @@ public final class DerivedProperty implements UCD_Types {
}
};
dprops[PropLowercase] = new UnicodeProperty() {
dprops[PropLowercase] = new UCDProperty() {
{
type = DERIVED_CORE;
name = "Lowercase";
@ -522,7 +522,7 @@ public final class DerivedProperty implements UCD_Types {
}
};
dprops[PropUppercase] = new UnicodeProperty() {
dprops[PropUppercase] = new UCDProperty() {
{
type = DERIVED_CORE;
name = "Uppercase";
@ -549,7 +549,7 @@ including all characters whose canonical decomposition consists of a single char
file by including all characters whose canonical decomposition consists of a sequence
of characters, the first of which has a non-zero combining class.
*/
dprops[FullCompExclusion] = new UnicodeProperty() {
dprops[FullCompExclusion] = new UCDProperty() {
{
type = DERIVED_NORMALIZATION;
name = "Full_Composition_Exclusion";
@ -577,7 +577,7 @@ of characters, the first of which has a non-zero combining class.
*/
};
dprops[FullCompInclusion] = new UnicodeProperty() {
dprops[FullCompInclusion] = new UCDProperty() {
{
isStandard = false;
type = DERIVED_NORMALIZATION;
@ -598,7 +598,7 @@ of characters, the first of which has a non-zero combining class.
}
};
dprops[FC_NFKC_Closure] = new UnicodeProperty() {
dprops[FC_NFKC_Closure] = new UCDProperty() {
{
type = DERIVED_NORMALIZATION;
setValueType(STRING_PROP);
@ -621,7 +621,7 @@ of characters, the first of which has a non-zero combining class.
public boolean hasValue(int cp) { return getValue(cp).length() != 0; }
};
dprops[FC_NFC_Closure] = new UnicodeProperty() {
dprops[FC_NFC_Closure] = new UCDProperty() {
{
type = DERIVED_NORMALIZATION;
isStandard = false;
@ -649,33 +649,47 @@ of characters, the first of which has a non-zero combining class.
dprops[i] = new QuickDProp(i - QuickNFD);
}
dprops[DefaultIgnorable] = new UnicodeProperty() {
dprops[DefaultIgnorable] = new UCDProperty() {
{
type = DERIVED_CORE;
name = "Default_Ignorable_Code_Point";
hasUnassigned = true;
shortName = "DI";
header = header = "# Derived Property: " + name
+ "\r\n# Generated from (Other_Default_Ignorable_Code_Point + Variation_Selector"
+ "\r\n# + Noncharacter_Code_Point + Cf + Cc + Cs) - White_Space"
//+ "\r\n# - U+0600..U+0603 - U+06DD - U+070F"
;
header = null;
}
public String getHeader() {
if (ucdData.getCompositeVersion() > 0x040000) return "# Derived Property: " + name
+ "\r\n# Generated from (Other_Default_Ignorable_Code_Point + Variation_Selector"
+ "\r\n# + Noncharacter_Code_Point + Cf + Cc + Cs) - White_Space"
+ "\r\n# - U+FFF9..U+FFFB// INTERLINEAR ANNOTATION characters";
//+ "\r\n# - U+0600..U+0603 - U+06DD - U+070F"
return "# Derived Property: " + name
+ "\r\n# Generated from (Other_Default_Ignorable_Code_Point + Cf + Cc + Cs) - White_Space";
}
public boolean hasValue(int cp) {
if (ucdData.getBinaryProperty(cp, White_space)) return false;
if (ucdData.getBinaryProperty(cp, Other_Default_Ignorable_Code_Point)) return true;
if (ucdData.getCompositeVersion() > 0x040000 && cp >= 0xFFF9 && cp <= 0xFFFB) return false;
byte cat = ucdData.getCategory(cp);
if (cat == Cf || cat == Cs || cat == Cc) return true;
if (ucdData.getCompositeVersion() <= 0x040000) return false;
//if (cp >= 0xFFF9 && cp <= 0xFFFB) return false;
//if (0x2060 <= cp && cp <= 0x206F || 0xFFF0 <= cp && cp <= 0xFFFB || 0xE0000 <= cp && cp <= 0xE0FFF) return true;
//if (0x0600 <= cp && cp <= 0x0603 || 0x06DD == cp || 0x070F == cp) return false;
if (ucdData.getBinaryProperty(cp, Other_Default_Ignorable_Code_Point)) return true;
if (ucdData.getBinaryProperty(cp, Variation_Selector)) return true;
if (ucdData.getBinaryProperty(cp, Noncharacter_Code_Point)) return true;
byte cat = ucdData.getCategory(cp);
if (cat == Cf || cat == Cs || cat == Cc) return true;
return false;
}
};
dprops[Case_Sensitive] = new UnicodeProperty() {
dprops[Case_Sensitive] = new UCDProperty() {
{
type = DERIVED_CORE;
isStandard = false;
@ -763,7 +777,7 @@ of characters, the first of which has a non-zero combining class.
}
};
dprops[Other_Case_Ignorable] = new UnicodeProperty() {
dprops[Other_Case_Ignorable] = new UCDProperty() {
{
name = "Other_Case_Ignorable";
shortName = "OCI";
@ -785,7 +799,7 @@ of characters, the first of which has a non-zero combining class.
}
};
dprops[Type_i] = new UnicodeProperty() {
dprops[Type_i] = new UCDProperty() {
{
type = DERIVED_CORE;
isStandard = false;
@ -819,7 +833,7 @@ of characters, the first of which has a non-zero combining class.
}
};
dprops[Case_Ignorable] = new UnicodeProperty() {
dprops[Case_Ignorable] = new UCDProperty() {
{
name = "Case_Ignorable";
isStandard = false;
@ -842,7 +856,7 @@ of characters, the first of which has a non-zero combining class.
# GraphemeBase :=
*/
dprops[GraphemeExtend] = new UnicodeProperty() {
dprops[GraphemeExtend] = new UCDProperty() {
{
type = DERIVED_CORE;
name = "Grapheme_Extend";
@ -865,7 +879,7 @@ of characters, the first of which has a non-zero combining class.
}
};
dprops[GraphemeBase] = new UnicodeProperty() {
dprops[GraphemeBase] = new UCDProperty() {
{
type = DERIVED_CORE;
name = "Grapheme_Base";
@ -888,7 +902,7 @@ of characters, the first of which has a non-zero combining class.
};
for (int i = 0; i < dprops.length; ++i) {
UnicodeProperty up = dprops[i];
UCDProperty up = dprops[i];
if (up == null) continue;
if (up.getValueType() != BINARY_PROP) continue;
up.setValue(NUMBER, "1");

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java,v $
* $Date: 2003/07/21 15:50:06 $
* $Revision: 1.11 $
* $Date: 2004/02/06 18:30:22 $
* $Revision: 1.12 $
*
*******************************************************************************
*/
@ -24,7 +24,7 @@ final class DerivedPropertyLister extends PropertyLister {
//private int propMask;
//private DerivedProperty dprop;
private UnicodeProperty uprop;
private UCDProperty uprop;
int width;
boolean varies;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java,v $
* $Date: 2003/02/25 23:38:23 $
* $Revision: 1.8 $
* $Date: 2004/02/06 18:30:22 $
* $Revision: 1.9 $
*
*******************************************************************************
*/
@ -56,8 +56,8 @@ class DiffPropertyLister extends PropertyLister {
}
*/
UnicodeProperty newProp = null;
UnicodeProperty oldProp = null;
UCDProperty newProp = null;
UCDProperty oldProp = null;
String value = "";
public String optionalComment(int cp) {

File diff suppressed because it is too large Load diff

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
* $Date: 2003/02/25 23:38:23 $
* $Revision: 1.13 $
* $Date: 2004/02/06 18:30:22 $
* $Revision: 1.14 $
*
*******************************************************************************
*/
@ -613,7 +613,7 @@ public class GenerateCaseFolding implements UCD_Types {
case 4: skipLine = true; break;
case 5: out.println("# No corresponding uppercase precomposed character"); break;
case 6: Utility.appendFile("SpecialCasingIota.txt", Utility.UTF8, out); break;
case 7: out.println("# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases"); break;
case 7: out.println("# Some characters with YPOGEGRAMMENI also have no corresponding titlecases"); break;
case 8: skipLine = true; break;
}
if (!skipLine) out.println();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
* $Date: 2003/08/20 03:46:41 $
* $Revision: 1.30 $
* $Date: 2004/02/06 18:30:21 $
* $Revision: 1.31 $
*
*******************************************************************************
*/
@ -186,7 +186,7 @@ public class GenerateData implements UCD_Types {
doHeader(fileName + getFileSuffix(false), output, headerChoice);
for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) {
UnicodeProperty up = DerivedProperty.make(i, Default.ucd);
UCDProperty up = DerivedProperty.make(i, Default.ucd);
if (up == null) continue;
boolean keepGoing = true;
if (!up.isStandard()) keepGoing = false;
@ -409,7 +409,7 @@ public class GenerateData implements UCD_Types {
//System.out.println("debug");
}
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
if (up == null) continue;
if (!up.isStandard()) continue;
@ -745,7 +745,7 @@ public class GenerateData implements UCD_Types {
doHeader(file + getFileSuffix(false), output, headerChoice);
int last = -1;
for (int i = startEnum; i < endEnum; ++i) {
UnicodeProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
if (up == null) continue;
if (up.skipInDerivedListing()) continue;
@ -1082,7 +1082,7 @@ public class GenerateData implements UCD_Types {
log.println();
log.println("Cummulative differences");
UnicodeProperty up = DerivedProperty.make(prop, Default.ucd);
UCDProperty up = DerivedProperty.make(prop, Default.ucd);
UnicodeSet newProp = up.getSet();
Utility.showSetNames(log, "", cummulative.removeAll(newProp), false, false, Default.ucd);
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
* $Date: 2003/07/07 15:58:57 $
* $Revision: 1.12 $
* $Date: 2004/02/06 18:30:21 $
* $Revision: 1.13 $
*
*******************************************************************************
*/
@ -49,7 +49,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
log.println("<title>Unihan check</title>");
log.println("</head>");
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.getUcdVersion(), true, Utility.UTF8);
Map properties = new TreeMap();
@ -265,24 +265,25 @@ public final class GenerateHanTransliterator implements UCD_Types {
switch (type) {
case DEFINITION:
key = "kDefinition"; // kMandarin, kKorean, kJapaneseKun, kJapaneseOn
filename = "Raw_Transliterator_Han_Latin_Definition.txt";
filename = "Raw_Transliterator_Han_Latin_Definition";
break;
case JAPANESE:
key = "kJapaneseOn";
filename = "Raw_Transliterator_ja_Latin.txt";
filename = "Raw_Transliterator_ja_Latin";
break;
case CHINESE:
key = "kMandarin";
filename = "Raw_Transliterator_Han_Latin.txt";
filename = "Raw_Transliterator_Han_Latin";
break;
default: throw new IllegalArgumentException("Unexpected option: must be 0..2");
}
filename += Default.ucd.getVersion() + ".txt";
err = Utility.openPrintWriter("Transliterate_err.txt", Utility.UTF8_WINDOWS);
log = Utility.openPrintWriter("Transliterate_log.txt", Utility.UTF8_WINDOWS);
log.print('\uFEFF');
if (!SKIP_OVERRIDES) {
if (false /*!SKIP_OVERRIDES*/) {
log.println();
log.println("@*Override Data");
log.println();
@ -1798,7 +1799,7 @@ Bad pinyin data: \u4E7F ? LE
static void readUnihanData(String key) throws java.io.IOException {
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.getUcdVersion(), true, Utility.UTF8);
int count = 0;
int lineCounter = 0;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java,v $
* $Date: 2002/08/04 21:38:45 $
* $Revision: 1.2 $
* $Date: 2004/02/06 18:30:21 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
@ -626,9 +626,9 @@ public class GenerateLineBreakTest implements UCD_Types {
static final String[] Names = {"CR", "LF", "CTL", "Extend", "Link", "CGJ", "Base", "LetterBase", "Other" };
static UnicodeProperty extendProp = UnifiedBinaryProperty.make(DERIVED | GraphemeExtend);
static UnicodeProperty baseProp = UnifiedBinaryProperty.make(DERIVED | GraphemeBase);
static UnicodeProperty linkProp = UnifiedBinaryProperty.make(BINARY_PROPERTIES | GraphemeLink);
static UCDProperty extendProp = UnifiedBinaryProperty.make(DERIVED | GraphemeExtend);
static UCDProperty baseProp = UnifiedBinaryProperty.make(DERIVED | GraphemeBase);
static UCDProperty linkProp = UnifiedBinaryProperty.make(BINARY_PROPERTIES | GraphemeLink);
{
fileName = "Word";

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $
* $Date: 2003/03/12 16:01:26 $
* $Revision: 1.2 $
* $Date: 2004/02/06 18:30:21 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
@ -53,7 +53,7 @@ public final class GenerateStandardizedVariants implements UCD_Types {
String[] codes = new String[2];
String[] shapes = new String[4];
BufferedReader in = Utility.openUnicodeFile("StandardizedVariants", Default.ucdVersion, true, Utility.LATIN1);
BufferedReader in = Utility.openUnicodeFile("StandardizedVariants", Default.getUcdVersion(), true, Utility.LATIN1);
while (true) {
String line = Utility.readDataLine(in);
if (line == null) break;

View file

@ -5,17 +5,21 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
* $Date: 2003/05/02 21:46:33 $
* $Revision: 1.32 $
* $Date: 2004/02/06 18:30:21 $
* $Revision: 1.33 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.util.Date;
import com.ibm.text.utility.*;
public final class Main implements UCD_Types {
static final String classPrefix = "com.ibm.text.UCD.";
static final String[] CORE_FILES = {
"CaseFolding",
"CompositionExclusions",
@ -52,288 +56,293 @@ public final class Main implements UCD_Types {
};
public static void main (String[] args) throws Exception {
System.out.println("*** Start *** " + new Date());
for (int i = 0; i < args.length; ++i) {
long mask = 0;
String arg = args[i];
if (arg.charAt(0) == '#') return; // skip rest of line
Utility.fixDot();
System.out.println();
System.out.println("** Argument: " + args[i] + " **");
try {
for (int i = 0; i < args.length; ++i) {
// Expand string arguments
if (arg.equalsIgnoreCase("ALL")) {
args = Utility.append(ALL_FILES, Utility.subarray(args, i+1));
i = -1;
continue;
}
if (arg.equalsIgnoreCase("CORE")) {
args = Utility.append(CORE_FILES, Utility.subarray(args, i+1));
i = -1;
continue;
}
if (arg.equalsIgnoreCase("EXTRACTED")) {
args = Utility.append(EXTRACTED_FILES, Utility.subarray(args, i+1));
i = -1;
continue;
}
// make sure the UCD is set up
if (arg.equalsIgnoreCase("version")) {
Default.setUCD(args[++i]);
continue;
}
Default.ensureUCD();
// Now handle other options
if (arg.equalsIgnoreCase("verify")) {
VerifyUCD.verify();
VerifyUCD.checkCanonicalProperties();
VerifyUCD.CheckCaseFold();
VerifyUCD.checkAgainstUInfo();
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.ucdVersion});
else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics();
else if (arg.equalsIgnoreCase("NFSkippable")) NFSkippable.main(null);
else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed();
else if (arg.equalsIgnoreCase("onetime")) VerifyUCD.oneTime();
else if (arg.equalsIgnoreCase("verifyNormalizationStability")) VerifyUCD.verifyNormalizationStability();
else if (arg.equalsIgnoreCase("definitionTransliterator")) GenerateHanTransliterator.main(0);
else if (arg.equalsIgnoreCase("romajiTransliterator")) GenerateHanTransliterator.main(1);
else if (arg.equalsIgnoreCase("pinYinTransliterator")) GenerateHanTransliterator.main(2);
else if (arg.equalsIgnoreCase("hanproperties")) GenerateHanTransliterator.readUnihan();
else if (arg.equalsIgnoreCase("fixChineseOverrides")) GenerateHanTransliterator.fixChineseOverrides();
else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
else if (arg.equalsIgnoreCase("testenum")) SampleEnum.test();
else if (arg.equalsIgnoreCase("quicktest")) QuickTest.test();
else if (arg.equalsIgnoreCase("TernaryStore")) TernaryStore.test();
else if (arg.equalsIgnoreCase("checkBIDI")) VerifyUCD.checkBIDI();
else if (arg.equalsIgnoreCase("Buildnames")) BuildNames.main(null);
else if (arg.equalsIgnoreCase("TestNormalization")) TestNormalization.main(null);
else if (arg.equalsIgnoreCase("binary")) FastBinarySearch.test();
else if (arg.equalsIgnoreCase("GenerateCaseTest")) GenerateCaseTest.main(null);
else if (arg.equalsIgnoreCase("checkDecompFolding")) VerifyUCD.checkDecompFolding();
else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);
else if (arg.equalsIgnoreCase("checkcollator")) CheckCollator.main(null);
else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
else if (arg.equalsIgnoreCase("iana")) IANANames.testSensitivity();
else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
else if (arg.equalsIgnoreCase("checkCase")) VerifyUCD.checkCase();
else if (arg.equalsIgnoreCase("checkCase3")) VerifyUCD.checkCase3();
else if (arg.equalsIgnoreCase("checkCaseLong")) VerifyUCD.checkCase2(true);
else if (arg.equalsIgnoreCase("checkCaseShort")) VerifyUCD.checkCase2(false);
else if (arg.equalsIgnoreCase("checkCanonicalProperties")) VerifyUCD.checkCanonicalProperties();
else if (arg.equalsIgnoreCase("CheckCaseFold")) VerifyUCD.CheckCaseFold();
else if (arg.equalsIgnoreCase("genIDN")) VerifyUCD.genIDN();
else if (arg.equalsIgnoreCase("VerifyIDN")) VerifyUCD.VerifyIDN();
else if (arg.equalsIgnoreCase("NFTest")) VerifyUCD.NFTest();
else if (arg.equalsIgnoreCase("test1")) VerifyUCD.test1();
else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
else if (arg.equalsIgnoreCase("GenerateThaiBreaks")) GenerateThaiBreaks.main(null);
else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]});
//else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo();
else if (arg.equalsIgnoreCase("checkScripts")) VerifyUCD.checkScripts();
else if (arg.equalsIgnoreCase("IdentifierTest")) VerifyUCD.IdentifierTest();
else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.test();
else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
else if (arg.equalsIgnoreCase("Compare14652")) Compare14652.main(null);
//else if (arg.equalsIgnoreCase("NormalizationCharts")) ChartGenerator.writeNormalizationCharts();
/*else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
*/
// EXTRACTED PROPERTIES
else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedBidiClass");
} else if (arg.equalsIgnoreCase("DerivedBinaryProperties")) {
GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES+1, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedBinaryProperties" );
} else if (arg.equalsIgnoreCase("DerivedCombiningClass")) {
GenerateData.generateVerticalSlice(COMBINING_CLASS, COMBINING_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedCombiningClass" );
} else if (arg.equalsIgnoreCase("DerivedDecompositionType")) {
GenerateData.generateVerticalSlice(DECOMPOSITION_TYPE, DECOMPOSITION_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedDecompositionType" );
} else if (arg.equalsIgnoreCase("DerivedEastAsianWidth")) {
GenerateData.generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedEastAsianWidth" );
} else if (arg.equalsIgnoreCase("DerivedGeneralCategory")) {
GenerateData.generateVerticalSlice(CATEGORY, CATEGORY+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedGeneralCategory" );
} else if (arg.equalsIgnoreCase("DerivedJoiningGroup")) {
GenerateData.generateVerticalSlice(JOINING_GROUP, JOINING_GROUP+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedJoiningGroup" );
} else if (arg.equalsIgnoreCase("DerivedJoiningType")) {
GenerateData.generateVerticalSlice(JOINING_TYPE, JOINING_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedJoiningType" );
} else if (arg.equalsIgnoreCase("DerivedLineBreak")) {
GenerateData.generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedLineBreak" );
} else if (arg.equalsIgnoreCase("DerivedNumericType")) {
GenerateData.generateVerticalSlice(NUMERIC_TYPE, NUMERIC_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedNumericType" );
} else if (arg.equalsIgnoreCase("HangulSyllableType")) {
GenerateData.generateVerticalSlice(HANGUL_SYLLABLE_TYPE,HANGUL_SYLLABLE_TYPE+NEXT_ENUM, GenerateData.HEADER_EXTEND,
"DerivedData/", "HangulSyllableType" );
} else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
GenerateData.generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedNumericValues" );
} else if (arg.equalsIgnoreCase("StandardizedVariants")) {
GenerateStandardizedVariants.generate();
// OTHER STANDARD PROPERTIES
long mask = 0;
} else if (arg.equalsIgnoreCase("CaseFolding")) {
GenerateCaseFolding.makeCaseFold(true);
GenerateCaseFolding.makeCaseFold(false);
} else if (arg.equalsIgnoreCase("SpecialCasing")) {
GenerateCaseFolding.generateSpecialCasing(true);
GenerateCaseFolding.generateSpecialCasing(false);
} else if (arg.equalsIgnoreCase("CompositionExclusions")) {
GenerateData.generateCompExclusions();
} else if (arg.equalsIgnoreCase("DerivedAge")) {
GenerateData.generateAge("DerivedData/", "DerivedAge");
String arg = args[i];
if (arg.charAt(0) == '#') return; // skip rest of line
Utility.fixDot();
System.out.println();
System.out.println("** Argument: " + args[i] + " ** " + new Date());
} else if (arg.equalsIgnoreCase("backwardsCompat")) {
GenerateData.backwardsCompat("DerivedData/extracted/", "Compatibility_ID_START",
new int[] {ID_Start, ID_Continue_NO_Cf, Mod_ID_Start, Mod_ID_Continue_NO_Cf});
// Expand string arguments
} else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
GenerateData.generateDerived(DERIVED_CORE, true, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedCoreProperties");
if (arg.equalsIgnoreCase("ALL")) {
args = Utility.append(ALL_FILES, Utility.subarray(args, i+1));
i = -1;
continue;
}
} else if (arg.equalsIgnoreCase("DerivedNormalizationProps")) {
GenerateData.generateDerived(DERIVED_NORMALIZATION, true, GenerateData.HEADER_DERIVED, "DerivedData/",
"DerivedNormalizationProps" );
} else if (arg.equalsIgnoreCase("NormalizationTest")) {
GenerateData.writeNormalizerTestSuite("DerivedData/", "NormalizationTest");
if (arg.equalsIgnoreCase("CORE")) {
args = Utility.append(CORE_FILES, Utility.subarray(args, i+1));
i = -1;
continue;
}
} else if (arg.equalsIgnoreCase("PropertyAliases")) {
GenerateData.generatePropertyAliases();
} else if (arg.equalsIgnoreCase("PropList")) {
GenerateData.generateVerticalSlice(BINARY_PROPERTIES + White_space, BINARY_PROPERTIES + NEXT_ENUM,
GenerateData.HEADER_EXTEND, "DerivedData/", "PropList");
} else if (arg.equalsIgnoreCase("Scripts")) {
GenerateData.generateVerticalSlice(SCRIPT+1, SCRIPT + NEXT_ENUM,
GenerateData.HEADER_SCRIPTS, "DerivedData/", "Scripts");
// OTHER TESTING
} else if (arg.equalsIgnoreCase("OtherDerivedProperties")) {
//mask = Utility.setBits(0, NFC_Leading, NFC_Resulting);
GenerateData.generateDerived((byte)(ALL & ~DERIVED_CORE & ~DERIVED_NORMALIZATION), false, GenerateData.HEADER_DERIVED, "OtherData/", "OtherDerivedProperties");
} else if (arg.equalsIgnoreCase("AllBinary")) {
GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES + NEXT_ENUM,
GenerateData.HEADER_EXTEND, "OtherDerived/", "AllBinary");
if (arg.equalsIgnoreCase("EXTRACTED")) {
args = Utility.append(EXTRACTED_FILES, Utility.subarray(args, i+1));
i = -1;
continue;
}
// make sure the UCD is set up
if (arg.equalsIgnoreCase("version")) {
Default.setUCD(args[++i]);
continue;
}
Default.ensureUCD();
// Now handle other options
if (arg.equalsIgnoreCase("verify")) {
VerifyUCD.verify();
VerifyUCD.checkCanonicalProperties();
VerifyUCD.CheckCaseFold();
VerifyUCD.checkAgainstUInfo();
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.getUcdVersion()});
else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics();
else if (arg.equalsIgnoreCase("NFSkippable")) NFSkippable.main(null);
else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed();
else if (arg.equalsIgnoreCase("onetime")) VerifyUCD.oneTime();
else if (arg.equalsIgnoreCase("verifyNormalizationStability")) VerifyUCD.verifyNormalizationStability();
else if (arg.equalsIgnoreCase("definitionTransliterator")) GenerateHanTransliterator.main(0);
else if (arg.equalsIgnoreCase("romajiTransliterator")) GenerateHanTransliterator.main(1);
else if (arg.equalsIgnoreCase("pinYinTransliterator")) GenerateHanTransliterator.main(2);
else if (arg.equalsIgnoreCase("hanproperties")) GenerateHanTransliterator.readUnihan();
else if (arg.equalsIgnoreCase("fixChineseOverrides")) GenerateHanTransliterator.fixChineseOverrides();
else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
else if (arg.equalsIgnoreCase("testenum")) SampleEnum.test();
else if (arg.equalsIgnoreCase("quicktest")) QuickTest.test();
else if (arg.equalsIgnoreCase("TernaryStore")) TernaryStore.test();
else if (arg.equalsIgnoreCase("checkBIDI")) VerifyUCD.checkBIDI();
else if (arg.equalsIgnoreCase("Buildnames")) BuildNames.main(null);
else if (arg.equalsIgnoreCase("TestNormalization")) TestNormalization.main(null);
else if (arg.equalsIgnoreCase("binary")) FastBinarySearch.test();
else if (arg.equalsIgnoreCase("GenerateCaseTest")) GenerateCaseTest.main(null);
else if (arg.equalsIgnoreCase("checkDecompFolding")) VerifyUCD.checkDecompFolding();
else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);
else if (arg.equalsIgnoreCase("checkcollator")) CheckCollator.main(null);
else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
else if (arg.equalsIgnoreCase("iana")) IANANames.testSensitivity();
else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
else if (arg.equalsIgnoreCase("checkCase")) VerifyUCD.checkCase();
else if (arg.equalsIgnoreCase("checkCase3")) VerifyUCD.checkCase3();
else if (arg.equalsIgnoreCase("checkCaseLong")) VerifyUCD.checkCase2(true);
else if (arg.equalsIgnoreCase("checkCaseShort")) VerifyUCD.checkCase2(false);
else if (arg.equalsIgnoreCase("checkCanonicalProperties")) VerifyUCD.checkCanonicalProperties();
else if (arg.equalsIgnoreCase("CheckCaseFold")) VerifyUCD.CheckCaseFold();
else if (arg.equalsIgnoreCase("genIDN")) VerifyUCD.genIDN();
else if (arg.equalsIgnoreCase("VerifyIDN")) VerifyUCD.VerifyIDN();
else if (arg.equalsIgnoreCase("NFTest")) VerifyUCD.NFTest();
else if (arg.equalsIgnoreCase("test1")) VerifyUCD.test1();
else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
else if (arg.equalsIgnoreCase("GenerateThaiBreaks")) GenerateThaiBreaks.main(null);
else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]});
//else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo();
else if (arg.equalsIgnoreCase("checkScripts")) VerifyUCD.checkScripts();
else if (arg.equalsIgnoreCase("IdentifierTest")) VerifyUCD.IdentifierTest();
else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.test();
else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
else if (arg.equalsIgnoreCase("Compare14652")) Compare14652.main(null);
//else if (arg.equalsIgnoreCase("NormalizationCharts")) ChartGenerator.writeNormalizationCharts();
/*else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
*/
// EXTRACTED PROPERTIES
else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedBidiClass");
} else if (arg.equalsIgnoreCase("DerivedGeneralCategoryTEST")) {
GenerateData.generateVerticalSlice(CATEGORY+29, CATEGORY+32, GenerateData.HEADER_DERIVED,
"DerivedData/", "DerivedGeneralCategory" );
} else if (arg.equalsIgnoreCase("DerivedBinaryProperties")) {
GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES+1, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedBinaryProperties" );
} else if (arg.equalsIgnoreCase("DerivedCombiningClass")) {
GenerateData.generateVerticalSlice(COMBINING_CLASS, COMBINING_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedCombiningClass" );
} else if (arg.equalsIgnoreCase("DerivedDecompositionType")) {
GenerateData.generateVerticalSlice(DECOMPOSITION_TYPE, DECOMPOSITION_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedDecompositionType" );
} else if (arg.equalsIgnoreCase("DerivedEastAsianWidth")) {
GenerateData.generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedEastAsianWidth" );
} else if (arg.equalsIgnoreCase("DerivedGeneralCategory")) {
GenerateData.generateVerticalSlice(CATEGORY, CATEGORY+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedGeneralCategory" );
} else if (arg.equalsIgnoreCase("DerivedJoiningGroup")) {
GenerateData.generateVerticalSlice(JOINING_GROUP, JOINING_GROUP+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedJoiningGroup" );
} else if (arg.equalsIgnoreCase("DerivedJoiningType")) {
GenerateData.generateVerticalSlice(JOINING_TYPE, JOINING_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedJoiningType" );
} else if (arg.equalsIgnoreCase("DerivedLineBreak")) {
GenerateData.generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedLineBreak" );
} else if (arg.equalsIgnoreCase("DerivedNumericType")) {
GenerateData.generateVerticalSlice(NUMERIC_TYPE, NUMERIC_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedNumericType" );
} else if (arg.equalsIgnoreCase("HangulSyllableType")) {
GenerateData.generateVerticalSlice(HANGUL_SYLLABLE_TYPE,HANGUL_SYLLABLE_TYPE+NEXT_ENUM, GenerateData.HEADER_EXTEND,
"DerivedData/", "HangulSyllableType" );
} else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
GenerateData.generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedNumericValues" );
} else if (arg.equalsIgnoreCase("StandardizedVariants")) {
GenerateStandardizedVariants.generate();
// OTHER STANDARD PROPERTIES
} else if (arg.equalsIgnoreCase("CaseFolding")) {
GenerateCaseFolding.makeCaseFold(true);
GenerateCaseFolding.makeCaseFold(false);
} else if (arg.equalsIgnoreCase("SpecialCasing")) {
GenerateCaseFolding.generateSpecialCasing(true);
GenerateCaseFolding.generateSpecialCasing(false);
} else if (arg.equalsIgnoreCase("CompositionExclusions")) {
GenerateData.generateCompExclusions();
} else if (arg.equalsIgnoreCase("DerivedAge")) {
GenerateData.generateAge("DerivedData/", "DerivedAge");
} else if (arg.equalsIgnoreCase("listDifferences")) {
CompareProperties.listDifferences();
} else if (arg.equalsIgnoreCase("backwardsCompat")) {
GenerateData.backwardsCompat("DerivedData/extracted/", "Compatibility_ID_START",
new int[] {ID_Start, ID_Continue_NO_Cf, Mod_ID_Start, Mod_ID_Continue_NO_Cf});
} else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
GenerateData.generateDerived(DERIVED_CORE, true, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedCoreProperties");
} else if (arg.equalsIgnoreCase("DerivedNormalizationProps")) {
GenerateData.generateDerived(DERIVED_NORMALIZATION, true, GenerateData.HEADER_DERIVED, "DerivedData/",
"DerivedNormalizationProps" );
} else if (arg.equalsIgnoreCase("NormalizationTest")) {
GenerateData.writeNormalizerTestSuite("DerivedData/", "NormalizationTest");
} else if (arg.equalsIgnoreCase("PropertyAliases")) {
GenerateData.generatePropertyAliases();
} else if (arg.equalsIgnoreCase("PropList")) {
GenerateData.generateVerticalSlice(BINARY_PROPERTIES + White_space, BINARY_PROPERTIES + NEXT_ENUM,
GenerateData.HEADER_EXTEND, "DerivedData/", "PropList");
} else if (arg.equalsIgnoreCase("Scripts")) {
GenerateData.generateVerticalSlice(SCRIPT+1, SCRIPT + NEXT_ENUM,
GenerateData.HEADER_SCRIPTS, "DerivedData/", "Scripts");
// OTHER TESTING
} else if (arg.equalsIgnoreCase("partition")) {
CompareProperties.partition();
} else if (arg.equalsIgnoreCase("OtherDerivedProperties")) {
//mask = Utility.setBits(0, NFC_Leading, NFC_Resulting);
GenerateData.generateDerived((byte)(ALL & ~DERIVED_CORE & ~DERIVED_NORMALIZATION), false, GenerateData.HEADER_DERIVED, "OtherData/", "OtherDerivedProperties");
} else if (arg.equalsIgnoreCase("AllBinary")) {
GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES + NEXT_ENUM,
GenerateData.HEADER_EXTEND, "OtherDerived/", "AllBinary");
} else if (arg.equalsIgnoreCase("DerivedGeneralCategoryTEST")) {
GenerateData.generateVerticalSlice(CATEGORY+29, CATEGORY+32, GenerateData.HEADER_DERIVED,
"DerivedData/", "DerivedGeneralCategory" );
} else if (arg.equalsIgnoreCase("listDifferences")) {
CompareProperties.listDifferences();
} else if (arg.equalsIgnoreCase("partition")) {
CompareProperties.partition();
} else if (arg.equalsIgnoreCase("propertyStatistics")) {
CompareProperties.statistics();
} else if (arg.equalsIgnoreCase("listAccents")) {
GenerateData.listCombiningAccents();
} else if (arg.equalsIgnoreCase("listGreekVowels")) {
GenerateData.listGreekVowels();
} else if (arg.equalsIgnoreCase("listKatakana")) {
GenerateData.listKatakana();
/*
} else if (arg.equalsIgnoreCase("DerivedFullNormalization")) {
mask = Utility.setBits(0, DerivedProperty.GenNFD, DerivedProperty.GenNFKC);
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedFullNormalization" );
} else if (arg.equalsIgnoreCase("caseignorable")) {
mask = Utility.setBits(0, DerivedProperty.Other_Case_Ignorable, DerivedProperty.Type_i);
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "CaseIgnorable" );
} else if (arg.equalsIgnoreCase("nfunsafestart")) {
mask = Utility.setBits(0, NFD_UnsafeStart, NFKC_UnsafeStart);
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "NFUnsafeStart");
*/
} else if (arg.equalsIgnoreCase("propertyStatistics")) {
CompareProperties.statistics();
} else if (arg.equalsIgnoreCase("listAccents")) {
GenerateData.listCombiningAccents();
} else if (arg.equalsIgnoreCase("listGreekVowels")) {
GenerateData.listGreekVowels();
} else if (arg.equalsIgnoreCase("listKatakana")) {
GenerateData.listKatakana();
/*
} else if (arg.equalsIgnoreCase("DerivedFullNormalization")) {
mask = Utility.setBits(0, DerivedProperty.GenNFD, DerivedProperty.GenNFKC);
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedFullNormalization" );
} else if (arg.equalsIgnoreCase("caseignorable")) {
mask = Utility.setBits(0, DerivedProperty.Other_Case_Ignorable, DerivedProperty.Type_i);
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "CaseIgnorable" );
} else if (arg.equalsIgnoreCase("nfunsafestart")) {
mask = Utility.setBits(0, NFD_UnsafeStart, NFKC_UnsafeStart);
GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "NFUnsafeStart");
*/
} else {
throw new IllegalArgumentException(" ! Unknown option -- see Main.java for options");
} else {
CallArgs.call(new String[]{arg}, classPrefix);
}
//checkHoffman("\u05B8\u05B9\u05B1\u0591\u05C3\u05B0\u05AC\u059F");
//checkHoffman("\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD");
//GenerateData.generateDerived(Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf),
// GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedPropData2" );
//GenerateData.generateVerticalSlice(SCRIPT, SCRIPT+1, "ScriptCommon" );
//listStrings("LowerCase" , 0,0);
//GenerateData.generateVerticalSlice(0, LIMIT_ENUM, SKIP_SPECIAL, PROPLIST1, "DerivedData/", "DerivedPropData1" );
// AGE stuff
//UCD ucd = UCD.make();
//System.out.println(ucd.getAgeID(0x61));
//System.out.println(ucd.getAgeID(0x2FA1D));
//
}
//checkHoffman("\u05B8\u05B9\u05B1\u0591\u05C3\u05B0\u05AC\u059F");
//checkHoffman("\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD");
//GenerateData.generateDerived(Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf),
// GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedPropData2" );
//GenerateData.generateVerticalSlice(SCRIPT, SCRIPT+1, "ScriptCommon" );
//listStrings("LowerCase" , 0,0);
//GenerateData.generateVerticalSlice(0, LIMIT_ENUM, SKIP_SPECIAL, PROPLIST1, "DerivedData/", "DerivedPropData1" );
// AGE stuff
//UCD ucd = UCD.make();
//System.out.println(ucd.getAgeID(0x61));
//System.out.println(ucd.getAgeID(0x2FA1D));
//
} finally {
System.out.println("*** Done *** " + new Date());
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyPropertyLister.java,v $
* $Date: 2003/07/21 15:50:06 $
* $Revision: 1.10 $
* $Date: 2004/02/06 18:30:20 $
* $Revision: 1.11 $
*
*******************************************************************************
*/
@ -24,7 +24,7 @@ final class MyPropertyLister extends PropertyLister {
private boolean isDefaultValue = false;
private UnicodeProperty up;
private UCDProperty up;
public MyPropertyLister(UCD ucd, int propMask, PrintWriter output) {
this.propMask = propMask;

View file

@ -6,7 +6,7 @@ import com.ibm.text.utility.*;
import java.io.PrintWriter;
public final class NFSkippable extends UnicodeProperty {
public final class NFSkippable extends UCDProperty {
static final boolean DEBUG = false;
@ -200,7 +200,7 @@ public final class NFSkippable extends UnicodeProperty {
out.println();
for (int mode = NFD_UnsafeStart; mode <= NFKC_UnsafeStart; ++mode) {
UnicodeProperty up = DerivedProperty.make(mode, Default.ucd);
UCDProperty up = DerivedProperty.make(mode, Default.ucd);
generateSet(out, "UNSAFE[" + Normalizer.getName((byte)(mode-NFD_UnsafeStart)) + "]", up);
}
@ -212,7 +212,7 @@ public final class NFSkippable extends UnicodeProperty {
out.close();
}
static void generateSet(PrintWriter out, String label, UnicodeProperty up) {
static void generateSet(PrintWriter out, String label, UCDProperty up) {
System.out.println("Generating: " + up.getName(NORMAL));
UnicodeSet result = new UnicodeSet();
for (int cp = 0; cp <= limit; ++cp) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
* $Date: 2003/02/25 23:38:22 $
* $Revision: 1.14 $
* $Date: 2004/02/06 18:30:20 $
* $Revision: 1.15 $
*
*******************************************************************************
*/
@ -288,6 +288,9 @@ public final class Normalizer implements UCD_Types {
return this.composition ? data.isLeading(cp) : false;
}
public int getComposition(int first, int second) {
return data.getPairwiseComposition(first, second);
}
// ======================================
// PRIVATES
@ -518,6 +521,9 @@ Problem: differs: true, call: false U+1FED GREEK DIALYTIKA AND VARIA
// we know we decompose all CANONICAL, plus > CANONICAL if compat is TRUE.
if (dt == CANONICAL || dt > CANONICAL && compat) {
String s = ucd.getDecompositionMapping(cp);
if (s.equals(UTF16.valueOf(cp))) {
System.out.println("fix");
}
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i);
getRecursiveDecomposition(cp, buffer, compat);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
* $Date: 2003/08/20 03:46:42 $
* $Revision: 1.12 $
* $Date: 2004/02/06 18:30:20 $
* $Revision: 1.13 $
*
*******************************************************************************
*/
@ -17,6 +17,10 @@ import java.util.*;
import java.io.*;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.ICUPropertyFactory;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.util.Currency;
import java.math.BigDecimal;
@ -27,12 +31,38 @@ import com.ibm.icu.text.*;
import com.ibm.text.utility.*;
public class TestData implements UCD_Types {
static UnicodeProperty.Factory upf;
public static void main (String[] args) throws IOException {
Default.setUCD();
System.out.println(new Date());
upf = ICUPropertyFactory.make();
System.out.println(new Date());
showPropDiff(
"gc=mn", null,
"script=inherited", null);
// upf.getProperty("gc")
//.getPropertySet(new ICUPropertyFactory.RegexMatcher("mn|me"),null)
showPropDiff(
"gc=mn|me", null,
"script=inherited", null);
if (true) return;
showPropDiff(
"General_Category=L", null,
"Script!=Inherited|Common",
UnifiedBinaryProperty.getSet("script=inherited", Default.ucd)
.addAll(UnifiedBinaryProperty.getSet("script=common", Default.ucd))
.complement()
);
UnicodeSet sterm = UnifiedProperty.getSet("Sentence_Terminal", Default.ucd);
UnicodeSet sterm = UnifiedProperty.getSet("STerm", Default.ucd);
UnicodeSet term = UnifiedProperty.getSet("Terminal_Punctuation", Default.ucd);
UnicodeSet po = new UnicodeSet("[:po:]");
UnicodeSet empty = new UnicodeSet();
@ -107,8 +137,20 @@ public class TestData implements UCD_Types {
log.close();
}
}
static BagFormatter bf = new BagFormatter();
static UnicodeProperty.Matcher matcher = new ICUPropertyFactory.RegexMatcher();
private static void showPropDiff(String p1, UnicodeSet s1, String p2, UnicodeSet s2) {
System.out.println("Property Listing");
if (s1 == null) {
s1 = upf.getSet(p1, matcher, null);
}
if (s2 == null) {
s2 = upf.getSet(p2, matcher, null);
}
bf.showSetDifferences(bf.CONSOLE,p1,s1,p2,s2);
}
static private UnicodeSet getSetForName(String regexPattern) {
UnicodeSet result = new UnicodeSet();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $
* $Date: 2002/06/13 21:14:05 $
* $Revision: 1.5 $
* $Date: 2004/02/06 18:30:20 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -16,6 +16,9 @@ package com.ibm.text.UCD;
import java.util.*;
import java.io.*;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.text.utility.*;
public final class TestNormalization {
@ -192,4 +195,52 @@ public final class TestNormalization {
}
}
public static void checkStarters () {
System.out.println("Checking Starters");
UnicodeSet leading = new UnicodeSet();
UnicodeSet trailing = new UnicodeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (Default.nfc.isLeading(i)) leading.add(i);
if (Default.ucd.getCombiningClass(i) != 0) continue;
if (Default.nfc.isTrailing(i)) trailing.add(i);
}
System.out.println("Leading: " + leading.size());
System.out.println("Trailing Starters: " + trailing.size());
UnicodeSetIterator lead = new UnicodeSetIterator(leading);
UnicodeSetIterator trail = new UnicodeSetIterator(trailing);
UnicodeSet followers = new UnicodeSet();
Map map = new TreeMap(new CompareProperties.UnicodeSetComparator());
while (lead.next()) {
trail.reset();
followers.clear();
while (trail.next()) {
if (Default.nfc.getComposition(lead.codepoint, trail.codepoint) != 0xFFFF) {
followers.add(trail.codepoint);
}
}
if (followers.size() == 0) continue;
System.out.println(Default.ucd.getCode(lead.codepoint)
+ "\t" + followers.toPattern(true));
UnicodeSet possLead = (UnicodeSet) map.get(followers);
if (possLead == null) {
possLead = new UnicodeSet();
map.put(followers.clone(), possLead);
}
possLead.add(lead.codepoint);
}
Iterator it = map.keySet().iterator();
BagFormatter bf = new BagFormatter();
bf.setLineSeparator("<br>");
bf.setLabelSource(null);
bf.setAbbreviated(true);
while (it.hasNext()) {
UnicodeSet t = (UnicodeSet) it.next();
UnicodeSet l = (UnicodeSet) map.get(t);
System.out.println("<tr><td>"
+ bf.showSetNames("",l)
+ "</td><td>"
+ bf.showSetNames("",t)
+ "</td></tr>");
}
}
}

View file

@ -0,0 +1,246 @@
package com.ibm.text.UCD;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Locale;
import java.util.TreeSet;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.lang.UCharacter;
import com.ibm.text.utility.Utility;
public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
private UCD ucd;
private static boolean needAgeCache = true;
private static UCD[] ucdCache = new UCD[UCD_Types.LIMIT_AGE];
private static HashMap cache = new HashMap();
public static synchronized ToolUnicodePropertySource make(String version) {
ToolUnicodePropertySource result = (ToolUnicodePropertySource)cache.get(version);
if (result != null) return result;
result = new ToolUnicodePropertySource(version);
cache.put(version, result);
return result;
}
private ToolUnicodePropertySource(String version) {
ucd = UCD.make(version);
TreeSet names = new TreeSet();
UnifiedProperty.getAvailablePropertiesAliases(names,ucd);
Iterator it = names.iterator();
while (it.hasNext()) {
String name = (String) it.next();
add(new ToolUnicodeProperty(name));
}
add(new UnicodeProperty.SimpleProperty() {
{set("Name", "na", "<string>", UnicodeProperty.STRING);}
public String getPropertyValue(int codepoint) {
if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getName(codepoint);
}
});
add(new UnicodeProperty.SimpleProperty() {
{set("Block", "blk", "<string>", UnicodeProperty.STRING);}
public String getPropertyValue(int codepoint) {
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getBlock(codepoint);
}
});
add(new UnicodeProperty.SimpleProperty() {
{set("Bidi_Mirroring_Glyph", "bmg", "<string>", UnicodeProperty.STRING);}
public String getPropertyValue(int codepoint) {
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getBidiMirror(codepoint);
}
});
add(new UnicodeProperty.SimpleProperty() {
{set("Case_Folding", "cf", "<string>", UnicodeProperty.STRING);}
public String getPropertyValue(int codepoint) {
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getCase(codepoint,UCD_Types.FULL,UCD_Types.FOLD);
}
});
add(new UnicodeProperty.SimpleProperty() {
{set("Numeric_Value", "nv", "<number>", UnicodeProperty.NUMERIC);}
public String getPropertyValue(int codepoint) {
double num = ucd.getNumericValue(codepoint);
if (Double.isNaN(num)) return null;
return Double.toString(num);
}
});
}
/*
"Bidi_Mirroring_Glyph", "Block", "Case_Folding", "Case_Sensitive", "ISO_Comment",
"Lowercase_Mapping", "Name", "Numeric_Value", "Simple_Case_Folding",
"Simple_Lowercase_Mapping", "Simple_Titlecase_Mapping", "Simple_Uppercase_Mapping",
"Titlecase_Mapping", "Unicode_1_Name", "Uppercase_Mapping", "isCased", "isCasefolded",
"isLowercase", "isNFC", "isNFD", "isNFKC", "isNFKD", "isTitlecase", "isUppercase",
"toNFC", "toNFD", "toNFKC", "toNKFD"
});
*/
/*
private class NameProperty extends UnicodeProperty.SimpleProperty {
{set("Name", "na", "<string>", UnicodeProperty.STRING);}
public String getPropertyValue(int codepoint) {
if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getName(codepoint);
}
}
*/
static final int ODD_BALLS = (1<<UCD_Types.Cn) | (1<<UCD_Types.Co) | (1<<UCD_Types.Cs) | (1<<UCD.Cc);
/* (non-Javadoc)
* @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyAliases(java.util.Collection)
*/
private class ToolUnicodeProperty extends UnicodeProperty {
com.ibm.text.UCD.UCDProperty up;
int propMask;
static final int EXTRA_START = 0x10000;
private ToolUnicodeProperty(String propertyAlias) {
propMask = UnifiedProperty.getPropmask(propertyAlias, ucd);
up = UnifiedProperty.make(propMask, ucd);
setType(getPropertyTypeInternal());
setName(propertyAlias);
}
public Collection getAvailablePropertyValueAliases(Collection result) {
int type = getPropertyType() & ~EXTENDED_BIT;
if (type == STRING) result.add("<string>");
else if (type == NUMERIC) result.add("<string>");
else if (type == BINARY) {
result.add("True");
result.add("False");
} else if (type == ENUMERATED) {
byte style = UCD_Types.LONG;
int prop = propMask>>8;
String temp = null;
boolean titlecase = false;
for (int i = 0; i < 256; ++i) {
try {
switch (prop) {
case UCD_Types.CATEGORY>>8: temp = (ucd.getCategoryID_fromIndex((byte)i, style)); break;
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex((byte)i, style)); break;
case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex((byte)i, style)); break;
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex((byte)i, style)); break;
case UCD_Types.NUMERIC_TYPE>>8: temp = (ucd.getNumericTypeID_fromIndex((byte)i, style));
titlecase = true;
break;
case UCD_Types.EAST_ASIAN_WIDTH>>8: temp = (ucd.getEastAsianWidthID_fromIndex((byte)i, style)); break;
case UCD_Types.LINE_BREAK>>8: temp = (ucd.getLineBreakID_fromIndex((byte)i, style)); break;
case UCD_Types.JOINING_TYPE>>8: temp = (ucd.getJoiningTypeID_fromIndex((byte)i, style)); break;
case UCD_Types.JOINING_GROUP>>8: temp = (ucd.getJoiningGroupID_fromIndex((byte)i, style)); break;
case UCD_Types.SCRIPT>>8: temp = (ucd.getScriptID_fromIndex((byte)i, style)); titlecase = true;
if ("<unused>".equals(temp)) continue;
if (temp != null) temp = UCharacter.toTitleCase(Locale.ENGLISH,temp,null);
break;
case UCD_Types.AGE>>8: temp = (ucd.getAgeID_fromIndex((byte)i, style)); break;
case UCD_Types.HANGUL_SYLLABLE_TYPE>>8:
temp = (ucd.getHangulSyllableTypeID_fromIndex((byte)i,style)); break;
default: throw new IllegalArgumentException("Internal Error: " + prop);
}
} catch (ArrayIndexOutOfBoundsException e) {
continue;
}
if (temp != null && temp.length() != 0) result.add(Utility.getUnskeleton(temp, titlecase));
}
if (prop == (UCD_Types.DECOMPOSITION_TYPE>>8)) result.add("none");
if (prop == (UCD_Types.JOINING_TYPE>>8)) result.add("Non_Joining");
if (prop == (UCD_Types.NUMERIC_TYPE>>8)) result.add("None");
}
return result;
}
public Collection getPropertyAliases(Collection result) {
String longName = up.getName(UCD_Types.LONG);
addUnique(Utility.getUnskeleton(longName, true), result);
String shortName = up.getName(UCD_Types.SHORT);
addUnique(Utility.getUnskeleton(shortName, false), result);
return result;
}
public Collection getPropertyValueAliases(String valueAlias, Collection result) {
// TODO Auto-generated method stub
return result;
}
public String getPropertyValue(int codepoint) {
byte style = UCD_Types.LONG;
String temp = null;
boolean titlecase = false;
switch (propMask>>8) {
case UCD_Types.CATEGORY>>8: temp = (ucd.getCategoryID_fromIndex(ucd.getCategory(codepoint), style)); break;
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex(ucd.getCombiningClass(codepoint), style));
if (temp.startsWith("Fixed_")) temp = temp.substring(6);
break;
case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex(ucd.getBidiClass(codepoint), style)); break;
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex(ucd.getDecompositionType(codepoint), style));
if (temp == null || temp.length() == 0) temp = "none";
break;
case UCD_Types.NUMERIC_TYPE>>8: temp = (ucd.getNumericTypeID_fromIndex(ucd.getNumericType(codepoint), style));
titlecase = true;
if (temp == null || temp.length() == 0) temp = "None";
break;
case UCD_Types.EAST_ASIAN_WIDTH>>8: temp = (ucd.getEastAsianWidthID_fromIndex(ucd.getEastAsianWidth(codepoint), style)); break;
case UCD_Types.LINE_BREAK>>8: temp = (ucd.getLineBreakID_fromIndex(ucd.getLineBreak(codepoint), style)); break;
case UCD_Types.JOINING_TYPE>>8: temp = (ucd.getJoiningTypeID_fromIndex(ucd.getJoiningType(codepoint), style));
if (temp == null || temp.length() == 0) temp = "Non_Joining";
break;
case UCD_Types.JOINING_GROUP>>8: temp = (ucd.getJoiningGroupID_fromIndex(ucd.getJoiningGroup(codepoint), style)); break;
case UCD_Types.SCRIPT>>8: temp = (ucd.getScriptID_fromIndex(ucd.getScript(codepoint), style));
if (temp != null) temp = UCharacter.toTitleCase(Locale.ENGLISH,temp,null);
titlecase = true;
break;
case UCD_Types.AGE>>8: temp = getAge(codepoint); break;
case UCD_Types.HANGUL_SYLLABLE_TYPE>>8:
temp = (ucd.getHangulSyllableTypeID_fromIndex(ucd.getHangulSyllableType(codepoint),style)); break;
}
if (temp != null) return Utility.getUnskeleton(temp,titlecase);
if (getPropertyType() == BINARY) {
return up.hasValue(codepoint) ? "True" : "False";
}
return "<unknown>";
}
public String getAge(int codePoint) {
if (needAgeCache) {
for (int i = UCD_Types.AGE11; i < UCD_Types.LIMIT_AGE; ++i) {
ucdCache[i] = UCD.make(UCD_Names.AGE_VERSIONS[i]);
}
needAgeCache = false;
}
for (int i = UCD_Types.AGE11; i < UCD_Types.LIMIT_AGE; ++i) {
if (ucdCache[i].isAllocated(codePoint)) return UCD_Names.AGE[i];
}
return UCD_Names.AGE[UCD_Types.UNKNOWN];
}
/* (non-Javadoc)
* @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
*/
private int getPropertyTypeInternal() {
int result = 0;
String name = up.getName(UCD_Types.LONG);
if ("Age".equals(name)) return STRING;
switch (up.getValueType()) {
case UCD_Types.NUMERIC_PROP: result = NUMERIC; break;
case UCD_Types.STRING_PROP: result = STRING; break;
case UCD_Types.MISC_PROP: result = STRING; break;
case UCD_Types.CATALOG_PROP: result = ENUMERATED; break;
case UCD_Types.FLATTENED_BINARY_PROP:
case UCD_Types.ENUMERATED_PROP: result = ENUMERATED; break;
case UCD_Types.BINARY_PROP: result = BINARY; break;
case UCD_Types.UNKNOWN_PROP:
default:
throw new IllegalArgumentException("Type: UNKNOWN_PROP");
}
if (!up.isStandard()) result |= EXTENDED_BIT;
return result;
}
}
}

View file

@ -5,14 +5,15 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2003/07/21 15:50:06 $
* $Revision: 1.28 $
* $Date: 2004/02/06 18:30:20 $
* $Revision: 1.29 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.HashMap;
@ -30,6 +31,8 @@ import com.ibm.icu.text.UnicodeSet;
public final class UCD implements UCD_Types {
private static int SPOT_CHECK = 0x20AC;
static final boolean DEBUG = false;
/**
@ -361,7 +364,7 @@ public final class UCD implements UCD_Types {
int blockId = 0;
BlockData blockData = new BlockData();
while (Default.ucd.getBlockData(blockId++, blockData)) {
while (getBlockData(blockId++, blockData)) {
if (blockData.name.equals("Hebrew")
|| blockData.name.equals("Cypriot_Syllabary")
) {
@ -399,7 +402,7 @@ public final class UCD implements UCD_Types {
System.out.println("AL: Adding " + BIDI_AL_Delta);
BIDI_AL_SET.addAll(BIDI_AL_Delta);
UnicodeSet noncharacters = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Noncharacter_Code_Point).getSet();
UnicodeSet noncharacters = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Noncharacter_Code_Point, this).getSet();
noncharacters.remove(Utility.BOM);
System.out.println("Removing Noncharacters/BOM " + noncharacters);
@ -458,7 +461,7 @@ public final class UCD implements UCD_Types {
hanExceptions = new IntMap();
BufferedReader in = null;
try {
in = Utility.openUnicodeFile("Unihan", Default.ucdVersion, true, Utility.UTF8);
in = Utility.openUnicodeFile("Unihan", version, true, Utility.UTF8);
int lineCounter = 0;
while (true) {
Utility.dot(++lineCounter);
@ -590,7 +593,7 @@ public final class UCD implements UCD_Types {
StringBuffer result = new StringBuffer();
int cp;
byte currentCaseType = caseType;
UnicodeProperty defaultIgnorable = DerivedProperty.make(DerivedProperty.DefaultIgnorable, this);
UCDProperty defaultIgnorable = DerivedProperty.make(DerivedProperty.DefaultIgnorable, this);
for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
cp = UTF32.char32At(s, i);
@ -829,7 +832,8 @@ public final class UCD implements UCD_Types {
}
public static String getCategoryID_fromIndex(byte prop, byte style) {
return (style != LONG) ? UCD_Names.GC[prop] : UCD_Names.LONG_GC[prop];
return prop < 0 || prop >= UCD_Names.GC.length ? null
: (style != LONG) ? UCD_Names.GC[prop] : UCD_Names.LONG_GC[prop];
}
@ -846,6 +850,7 @@ public final class UCD implements UCD_Types {
}
static String getCombiningClassID_fromIndex (short index, byte style) {
if (index > 255) return null;
index &= 0xFF;
if (style == NORMAL || style == NUMBER) return String.valueOf(index);
String s = "Fixed";
@ -889,7 +894,12 @@ public final class UCD implements UCD_Types {
}
public static String getBidiClassID_fromIndex(byte prop, byte style) {
return style == SHORT ? UCD_Names.BC[prop] : UCD_Names.LONG_BC[prop];
return prop < 0
|| prop >= UCD_Names.BC.length
? null
: style == SHORT
? UCD_Names.BC[prop]
: UCD_Names.LONG_BC[prop];
}
public String getDecompositionTypeID(int codePoint) {
@ -900,7 +910,8 @@ public final class UCD implements UCD_Types {
return getDecompositionTypeID_fromIndex(prop, NORMAL);
}
public static String getDecompositionTypeID_fromIndex(byte prop, byte style) {
return style == SHORT ? UCD_Names.SHORT_DT[prop] : UCD_Names.DT[prop];
return prop < 0 || prop >= UCD_Names.DT.length ? null
: style == SHORT ? UCD_Names.SHORT_DT[prop] : UCD_Names.DT[prop];
}
public String getNumericTypeID(int codePoint) {
@ -912,7 +923,8 @@ public final class UCD implements UCD_Types {
}
public static String getNumericTypeID_fromIndex(byte prop, byte style) {
return style == SHORT ? UCD_Names.SHORT_NT[prop] : UCD_Names.NT[prop];
return prop < 0 || prop >= UCD_Names.NT.length ? null
: style == SHORT ? UCD_Names.SHORT_NT[prop] : UCD_Names.NT[prop];
}
public String getEastAsianWidthID(int codePoint) {
@ -924,7 +936,8 @@ public final class UCD implements UCD_Types {
}
public static String getEastAsianWidthID_fromIndex(byte prop, byte style) {
return style != LONG ? UCD_Names.SHORT_EA[prop] : UCD_Names.EA[prop];
return prop < 0 || prop >= UCD_Names.EA.length ? null
: style != LONG ? UCD_Names.SHORT_EA[prop] : UCD_Names.EA[prop];
}
public String getLineBreakID(int codePoint) {
@ -936,7 +949,8 @@ public final class UCD implements UCD_Types {
}
public static String getLineBreakID_fromIndex(byte prop, byte style) {
return style != LONG ? UCD_Names.LB[prop] : UCD_Names.LONG_LB[prop];
return prop < 0 || prop >= UCD_Names.LB.length ? null
: style != LONG ? UCD_Names.LB[prop] : UCD_Names.LONG_LB[prop];
}
public String getJoiningTypeID(int codePoint) {
@ -948,7 +962,8 @@ public final class UCD implements UCD_Types {
}
public static String getJoiningTypeID_fromIndex(byte prop, byte style) {
return style != LONG ? UCD_Names.JOINING_TYPE[prop] : UCD_Names.LONG_JOINING_TYPE[prop];
return prop < 0 || prop >= UCD_Names.JOINING_TYPE.length ? null
: style != LONG ? UCD_Names.JOINING_TYPE[prop] : UCD_Names.LONG_JOINING_TYPE[prop];
}
public String getJoiningGroupID(int codePoint) {
@ -961,7 +976,8 @@ public final class UCD implements UCD_Types {
public static String getJoiningGroupID_fromIndex(byte prop, byte style) {
// no short version
return UCD_Names.JOINING_GROUP[prop];
return prop < 0 || prop >= UCD_Names.JOINING_GROUP.length ? null
: UCD_Names.JOINING_GROUP[prop];
}
public String getScriptID(int codePoint) {
@ -973,8 +989,8 @@ public final class UCD implements UCD_Types {
}
public static String getScriptID_fromIndex(byte prop, byte length) {
if (length == SHORT) return UCD_Names.ABB_SCRIPT[prop];
return UCD_Names.SCRIPT[prop];
return prop < 0 || prop >= UCD_Names.JOINING_GROUP.length ? null
: (length == SHORT) ? UCD_Names.ABB_SCRIPT[prop] : UCD_Names.SCRIPT[prop];
}
public String getAgeID(int codePoint) {
@ -987,7 +1003,8 @@ public final class UCD implements UCD_Types {
public static String getAgeID_fromIndex(byte prop, byte style) {
// no short for
return UCD_Names.AGE[prop];
return prop < 0 || prop >= UCD_Names.AGE.length ? null
: UCD_Names.AGE[prop];
}
public String getBinaryPropertiesID(int codePoint, byte bit) {
@ -999,7 +1016,8 @@ public final class UCD implements UCD_Types {
}
public static String getBinaryPropertiesID_fromIndex(byte bit, byte style) {
return style == SHORT ? UCD_Names.SHORT_BP[bit] : UCD_Names.BP[bit];
return bit < 0 || bit >= UCD_Names.BP.length ? null
: style == SHORT ? UCD_Names.SHORT_BP[bit] : UCD_Names.BP[bit];
}
public static int mapToRepresentative(int ch, boolean lessThan20105) {
@ -1208,14 +1226,18 @@ to guarantee identifier closure.
String constructedName = null;
int rangeStart = mapToRepresentative(codePoint, compositeVersion < 0x020105);
boolean isHangul = false;
boolean isRemapped = false;
switch (rangeStart) {
case 0xF900:
if (compositeVersion < 0x020105) {
if (fixStrings) constructedName = "CJK COMPATIBILITY IDEOGRAPH-" + Utility.hex(codePoint, 4);
break;
}
//isRemapped = true;
break;
// FALL THROUGH!!!!
default:
//default:
/*
result = getRaw(codePoint);
if (result == null) {
result = UData.UNASSIGNED;
@ -1234,52 +1256,61 @@ to guarantee identifier closure.
result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
}
}
return result;
*/
//break;
case 0x3400: // CJK Ideograph Extension A
case 0x4E00: // CJK Ideograph
case 0x20000: // Extension B
if (fixStrings) constructedName = "CJK UNIFIED IDEOGRAPH-" + Utility.hex(codePoint, 4);
isRemapped = true;
break;
case 0xAC00: // Hangul Syllable
isHangul = true;
if (fixStrings) {
constructedName = "HANGUL SYLLABLE " + getHangulName(codePoint);
}
isRemapped = true;
break;
case 0xE000: // Private Use
case 0xF0000: // Private Use
case 0x100000: // Private Use
if (fixStrings) constructedName = "<private use area-" + Utility.hex(codePoint, 4) + ">";
isRemapped = true;
break;
case 0xD800: // Surrogate
case 0xDB80: // Private Use
case 0xDC00: // Private Use
if (fixStrings) constructedName = "<surrogate-" + Utility.hex(codePoint, 4) + ">";
isRemapped = true;
break;
case 0xFFFF: // Noncharacter
if (fixStrings) constructedName = "<noncharacter-" + Utility.hex(codePoint, 4) + ">";
isRemapped = true;
break;
}
result = getRaw(rangeStart);
if (result == null) {
result = UData.UNASSIGNED;
isRemapped = true;
result.name = null; // clean this up, since we reuse UNASSIGNED
result.shortName = null;
if (fixStrings) {
result.name = "<reserved-" + Utility.hex(codePoint, 4) + ">";
result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
constructedName = "<reserved-" + Utility.hex(codePoint, 4) + ">";
//result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
}
return result;
//return result;
}
result.codePoint = codePoint;
if (fixStrings) {
result.name = constructedName;
result.shortName = Utility.replace(constructedName, UCD_Names.NAME_ABBREVIATIONS);
result.decompositionMapping = result.bidiMirror
= result.simpleLowercase = result.simpleUppercase = result.simpleTitlecase = result.simpleCaseFolding
= result.fullLowercase = result.fullUppercase = result.fullTitlecase = result.fullCaseFolding
= UTF32.valueOf32(codePoint);
if (result.name == null || isRemapped) result.name = constructedName;
if (result.shortName == null) result.shortName = Utility.replace(constructedName, UCD_Names.NAME_ABBREVIATIONS);
if (isRemapped) {
result.decompositionMapping = result.bidiMirror
= result.simpleLowercase = result.simpleUppercase = result.simpleTitlecase = result.simpleCaseFolding
= result.fullLowercase = result.fullUppercase = result.fullTitlecase = result.fullCaseFolding
= UTF32.valueOf32(codePoint);
}
}
if (isHangul) {
if (fixStrings) result.decompositionMapping = getHangulDecompositionPair(codePoint);
@ -1416,9 +1447,10 @@ to guarantee identifier closure.
return NA;
}
static String getHangulSyllableTypeID_fromIndex(byte index, byte style) {
if (style == LONG) return UCD_Names.LONG_HANGUL_SYLLABLE_TYPE[index];
return UCD_Names.HANGUL_SYLLABLE_TYPE[index];
static String getHangulSyllableTypeID_fromIndex(byte prop, byte style) {
return prop < 0 || prop >= UCD_Names.HANGUL_SYLLABLE_TYPE.length ? null
: (style == LONG) ? UCD_Names.LONG_HANGUL_SYLLABLE_TYPE[prop]
: UCD_Names.HANGUL_SYLLABLE_TYPE[prop];
}
String getHangulSyllableTypeID(int char1, byte style) {
@ -1471,7 +1503,7 @@ to guarantee identifier closure.
UData uData = new UData();
uData.readBytes(dataIn);
if (uData.codePoint == 0x5E) {
if (uData.codePoint == SPOT_CHECK) {
System.out.println("SPOT-CHECK: " + uData);
}
@ -1528,6 +1560,18 @@ to guarantee identifier closure.
public String name;
}
public String NOBLOCK = Utility.getUnskeleton("no block", true);
public String getBlock(int codePoint) {
if (blocks == null) loadBlocks();
Iterator it = blocks.iterator();
while (it.hasNext()) {
BlockData data = (BlockData) it.next();
if (codePoint >= data.start && codePoint <= data.end) return data.name;
}
return NOBLOCK;
}
public boolean getBlockData(int blockId, BlockData output) {
if (blocks == null) loadBlocks();
BlockData temp;
@ -1570,4 +1614,18 @@ to guarantee identifier closure.
throw new IllegalArgumentException("Can't read block file");
}
}
/**
* @return
*/
public int getCompositeVersion() {
return compositeVersion;
}
/**
* @param i
*/
public void setCompositeVersion(int i) {
compositeVersion = i;
}
}

View file

@ -2,7 +2,7 @@ package com.ibm.text.UCD;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.text.utility.*;
public abstract class UnicodeProperty implements UCD_Types {
public abstract class UCDProperty implements UCD_Types {
// TODO: turn all of these into privates, and use setters only

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
* $Date: 2003/08/20 03:46:43 $
* $Revision: 1.23 $
* $Date: 2004/02/06 18:30:19 $
* $Revision: 1.24 $
*
*******************************************************************************
*/
@ -109,7 +109,7 @@ final class UCD_Names implements UCD_Types {
"jg",
"",
"sc",
"ag",
"age",
"hst",
"",
};
@ -148,16 +148,16 @@ final class UCD_Names implements UCD_Types {
"Soft_Dotted",
"Logical_Order_Exception",
"Other_ID_Start",
"Sentence_Terminal",
"STerm",
"Variation_Selector"
};
static final String[] SHORT_BP = {
"BidiM",
"Bidi_M",
"CE",
"WSpace",
"NBrk",
"BidiC",
"Bidi_C",
"JoinC",
"Dash",
"Hyphen",
@ -175,7 +175,7 @@ final class UCD_Names implements UCD_Types {
"NChar",
"TurkI",
"OGrExt",
"GrLink",
"Gr_Link",
"IDSB",
"IDST",
"Radical",

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
* $Date: 2003/08/20 03:46:44 $
* $Revision: 1.25 $
* $Date: 2004/02/06 18:30:19 $
* $Revision: 1.26 $
*
*******************************************************************************
*/
@ -15,7 +15,9 @@ package com.ibm.text.UCD;
public interface UCD_Types {
public static final int dVersion = 2; // change to fix the generated file D version. If less than zero, no "d"
public static final int dVersion = 5; // change to fix the generated file D version. If less than zero, no "d"
static final byte BINARY_FORMAT = 14; // bumped if binary format of UCD changes. Forces rebuild
public static final String BASE_DIR = "C:\\DATA\\";
public static final String UCD_DIR = BASE_DIR + "UCD\\";
@ -34,8 +36,6 @@ public interface UCD_Types {
CJK_B_BASE = 0x20000,
CJK_B_LIMIT = 0x2A6DF+1;
static final byte BINARY_FORMAT = 10; // bumped if binary format of UCD changes
// Unicode Property Types
static final byte
NOT_DERIVED = 1,
@ -387,7 +387,7 @@ public interface UCD_Types {
static final int
UNKNOWN = 0,
AGE10 = 1,
AGE11 = 1,
AGE20 = 2,
AGE21 = 3,
AGE30 = 4,
@ -396,7 +396,16 @@ public interface UCD_Types {
AGE40 = 7,
LIMIT_AGE = 8;
static final String[] AGE_VERSIONS = {
"?",
"1.1.0",
"2.0.0",
"2.1.2",
"3.0.0",
"3.1.0",
"3.2.0",
"4.0.0"
};
public static byte
JT_C = 0,

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $
* $Date: 2003/07/21 15:50:05 $
* $Revision: 1.14 $
* $Date: 2004/02/06 18:30:19 $
* $Revision: 1.15 $
*
*******************************************************************************
*/
@ -18,16 +18,16 @@ import java.util.*;
import com.ibm.text.utility.*;
import com.ibm.icu.text.UnicodeSet;
public final class UnifiedBinaryProperty extends UnicodeProperty {
public final class UnifiedBinaryProperty extends UCDProperty {
int majorProp;
int propValue;
// DerivedProperty dp;
public static UnicodeProperty make(int propMask) {
public static UCDProperty make(int propMask) {
return make(propMask, Default.ucd);
}
public static UnicodeProperty make(int propMask, UCD ucd) {
public static UCDProperty make(int propMask, UCD ucd) {
if ((propMask & 0xFF00) == DERIVED) {
return DerivedProperty.make(propMask & 0xFF, ucd);
}
@ -35,12 +35,12 @@ public final class UnifiedBinaryProperty extends UnicodeProperty {
return getCached(propMask, ucd);
}
public static UnicodeProperty make(String propAndValue, UCD ucd) {
public static UCDProperty make(String propAndValue, UCD ucd) {
return make(getPropmask(propAndValue, ucd), ucd);
}
public static UnicodeSet getSet(int propMask, UCD ucd) {
UnicodeProperty up = make(propMask, ucd);
UCDProperty up = make(propMask, ucd);
return up.getSet();
}
@ -58,7 +58,7 @@ public final class UnifiedBinaryProperty extends UnicodeProperty {
propNameCache = new HashMap();
for (int i = 0; i < LIMIT_ENUM; ++i) {
UnicodeProperty up = UnifiedBinaryProperty.make(i, ucd);
UCDProperty up = UnifiedBinaryProperty.make(i, ucd);
if (up == null) continue;
if (!up.isStandard()) continue;
if (up.getValueType() < BINARY_PROP) continue;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedProperty.java,v $
* $Date: 2003/07/21 15:50:05 $
* $Revision: 1.4 $
* $Date: 2004/02/06 18:30:18 $
* $Revision: 1.5 $
*
*******************************************************************************
*/
@ -18,15 +18,19 @@ import java.util.*;
import com.ibm.text.utility.*;
import com.ibm.icu.text.UnicodeSet;
public final class UnifiedProperty extends UnicodeProperty {
public final class UnifiedProperty extends UCDProperty {
int majorProp;
// DerivedProperty dp;
public static UnicodeProperty make(int propMask) {
public static UCDProperty make(int propMask) {
return make(propMask, Default.ucd);
}
public static UnicodeProperty make(int propMask, UCD ucd) {
public static UCDProperty make(int propMask, UCD ucd) {
if (propMask == AGE) {
System.out.println();
}
if ((propMask & 0xFF00) == (BINARY_PROPERTIES & 0xFF00)) {
return UnifiedBinaryProperty.make(propMask, ucd);
}
@ -37,12 +41,12 @@ public final class UnifiedProperty extends UnicodeProperty {
return getCached(propMask, ucd);
}
public static UnicodeProperty make(String propID, UCD ucd) {
public static UCDProperty make(String propID, UCD ucd) {
return make(getPropmask(propID, ucd), ucd);
}
public static UnicodeSet getSet(int propMask, UCD ucd) {
UnicodeProperty up = make(propMask, ucd);
UCDProperty up = make(propMask, ucd);
return up.getSet();
}
@ -51,26 +55,21 @@ public final class UnifiedProperty extends UnicodeProperty {
}
private static Map propNameCache = null;
private static Set availablePropNames = new TreeSet();
public static Collection getAvailablePropertiesAliases(Collection result, UCD ucd) {
if (propNameCache == null) {
cacheNames(ucd);
}
result.addAll(availablePropNames);
return result;
}
public static int getPropmask(String propID, UCD ucd) {
// cache the names
if (propNameCache == null) {
System.out.println("Caching Property Names");
propNameCache = new HashMap();
for (int i = 0; i < LIMIT_ENUM; ++i) {
UnicodeProperty up = UnifiedProperty.make(i, ucd);
if (up == null) continue;
if (!up.isStandard()) continue;
if (up.getValueType() < BINARY_PROP) continue;
String shortName = Utility.getSkeleton(up.getProperty(SHORT));
String longName = Utility.getSkeleton(up.getProperty(LONG));
Integer result = new Integer(i);
propNameCache.put(longName, result);
propNameCache.put(shortName, result);
}
System.out.println("Done Caching");
cacheNames(ucd);
}
propID = Utility.getSkeleton(propID);
@ -80,6 +79,28 @@ public final class UnifiedProperty extends UnicodeProperty {
}
return indexObj.intValue();
}
private static void cacheNames(UCD ucd) {
System.out.println("Caching Property Names");
propNameCache = new HashMap();
for (int i = 0; i < LIMIT_ENUM; ++i) {
UCDProperty up = UnifiedProperty.make(i, ucd);
if (up == null) continue;
if (!up.isStandard()) continue;
if (up.getValueType() < BINARY_PROP) continue;
String shortRaw = up.getProperty(SHORT);
String shortName = Utility.getSkeleton(shortRaw);
String longRaw = up.getProperty(LONG);
String longName = Utility.getSkeleton(longRaw);
Integer result = new Integer(i);
if (!propNameCache.keySet().contains(longName)) propNameCache.put(longName, result);
if (!propNameCache.keySet().contains(shortName)) propNameCache.put(shortName, result);
String key = longRaw != null ? longRaw : shortRaw;
availablePropNames.add(key);
}
System.out.println("Done Caching");
}
static Map cache = new HashMap();
static UCD lastUCD = null;
@ -92,12 +113,13 @@ public final class UnifiedProperty extends UnicodeProperty {
UCD ucd;
public boolean equals(Object other) {
Clump that = (Clump) other;
return (that.prop != prop || !ucd.equals(that));
return (that.prop == prop && ucd.equals(that));
}
}
private static UnifiedProperty getCached(int propMask, UCD ucd) {
System.out.println(ucd);
//System.out.println(ucd);
if (ucd.equals(lastUCD) && propMask == lastPropMask) return lastValue;
probeClump.prop = propMask;
probeClump.ucd = ucd;
@ -120,7 +142,9 @@ public final class UnifiedProperty extends UnicodeProperty {
majorProp = propMask >> 8;
//System.out.println("A: " + getValueType());
if (majorProp <= (JOINING_GROUP>>8) || majorProp == SCRIPT>>8) setValueType(FLATTENED_BINARY_PROP);
if (majorProp <= (JOINING_GROUP>>8)
|| majorProp == SCRIPT>>8
|| majorProp==(HANGUL_SYLLABLE_TYPE>>8)) setValueType(FLATTENED_BINARY_PROP);
//System.out.println("B: " + getValueType());
header = UCD_Names.UNIFIED_PROPERTY_HEADERS[majorProp];
@ -158,7 +182,7 @@ public final class UnifiedProperty extends UnicodeProperty {
throw new ChainException("Can't call 'hasValue' on non-binary property {0}", new Object[]{
new Integer(majorProp)});
}
public String getFullName(byte style) {
String pre = "";
String preShort = getProperty(SHORT);
@ -168,7 +192,7 @@ public final class UnifiedProperty extends UnicodeProperty {
else pre = preShort + "(" + preLong + ")";
return pre;
}
public String getValue(int cp, byte style) {
switch (majorProp) {
case CATEGORY>>8: return ucd.getCategoryID_fromIndex(ucd.getCategory(cp), style);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
* $Date: 2003/07/21 15:50:05 $
* $Revision: 1.22 $
* $Date: 2004/02/06 18:30:18 $
* $Revision: 1.23 $
*
*******************************************************************************
*/
@ -441,7 +441,7 @@ can help you narrow these down.
checkNF_AndCase("\u0130", true);
checkNF_AndCase("\u0131", true);
UnicodeProperty softdot = null;
UCDProperty softdot = null;
CanonicalIterator cit = new CanonicalIterator("a");
UnicodeSet badChars = new UnicodeSet();
@ -489,9 +489,9 @@ can help you narrow these down.
}
static void checkIdentical(String ubpName1, String ubpName2) {
UnicodeProperty prop1 = UnifiedBinaryProperty.make(ubpName1, Default.ucd);
UCDProperty prop1 = UnifiedBinaryProperty.make(ubpName1, Default.ucd);
UnicodeSet set1 = prop1.getSet();
UnicodeProperty prop2 = UnifiedBinaryProperty.make(ubpName2, Default.ucd);
UCDProperty prop2 = UnifiedBinaryProperty.make(ubpName2, Default.ucd);
UnicodeSet set2 = prop2.getSet();
UnicodeSet set1minus2 = new UnicodeSet(set1);
set1minus2.removeAll(set2);
@ -986,8 +986,8 @@ can help you narrow these down.
System.out.println("NameChar:");
System.out.println("\t" + NameChar.toPattern(true));
UnicodeProperty IDstart = DerivedProperty.make(Mod_ID_Start, Default.ucd);
UnicodeProperty IDcontinue = DerivedProperty.make(Mod_ID_Continue_NO_Cf, Default.ucd);
UCDProperty IDstart = DerivedProperty.make(Mod_ID_Start, Default.ucd);
UCDProperty IDcontinue = DerivedProperty.make(Mod_ID_Continue_NO_Cf, Default.ucd);
UnicodeSet IDContinueMinusNameChar = new UnicodeSet();
UnicodeSet IDStartMinusNameChar = new UnicodeSet();

View file

@ -0,0 +1,63 @@
package com.ibm.text.utility;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import com.ibm.icu.dev.test.util.BagFormatter;
public class CallArgs {
static BagFormatter bf = new BagFormatter();
public static String getPrefix(Class c) {
String prefix = c.getName();
int pos = prefix.lastIndexOf('.');
if (pos < 0) return "";
return prefix.substring(0,pos+1);
}
public static void call(String[] args, String prefix) throws Exception {
for (int i = 0; i < args.length; ++i) {
String arg = args[i];
if (arg.startsWith("#")) break; // comments out rest of line
String[] methodArgs = null;
int par = arg.indexOf('(');
if (par >= 0) {
methodArgs = Utility.split(arg.substring(par+1, arg.length()-1),',');
arg = arg.substring(0,par);
}
int pos = arg.indexOf('.');
Method method = null;
if (pos >= 0) {
String className = prefix + arg.substring(0,pos);
String methodName = arg.substring(pos+1);
method = tryMethod(className, methodName, methodArgs);
} else {
method = tryMethod("Main", arg, methodArgs);
if (method == null) {
method = tryMethod(arg, "main", methodArgs);
}
}
if (method == null) throw new IllegalArgumentException("Bad parameter: " + arg);
System.out.println(method.getName() + "\t" + bf.join(methodArgs));
method.invoke(null,methodArgs);
}
}
private static Method tryMethod(String className, String methodName, String[] methodArgs)
throws IllegalAccessException, InvocationTargetException {
try {
Class foo = Class.forName(className);
Class[] parameterTypes = null;
if (methodArgs != null) {
parameterTypes = new Class[methodArgs.length];
for (int i = 0; i < methodArgs.length; ++i) {
parameterTypes[i] = String.class;
}
}
return foo.getDeclaredMethod(methodName,parameterTypes);
} catch (Exception e) {
return null;
}
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/EquivalenceClass.java,v $
* $Date: 2001/08/31 00:19:16 $
* $Revision: 1.2 $
* $Date: 2004/02/06 18:29:39 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
@ -28,7 +28,7 @@ public class EquivalenceClass {
// whenever we add a <source, value> pair, we see if any sets collide.
// associated with each set of sources, we keep a representative Whenever we add to the set, if we
//
Map sourceToEquiv = new HashMap();
Map sourceToEquiv = new TreeMap();
Map valueToRepresentativeSource = new HashMap();
Map forcedMerge = new HashMap();
/**
@ -62,7 +62,7 @@ public class EquivalenceClass {
if (DEBUG) System.out.println("+Source " + source
+ ", value: " + value);
if (repSource == null && equivSet == null) {
equivSet = new HashSet();
equivSet = new TreeSet();
equivSet.add(source);
sourceToEquiv.put(source, equivSet);
valueToRepresentativeSource.put(value, source);
@ -96,7 +96,7 @@ public class EquivalenceClass {
// then replace all instances for equivSet by repEquiv
// we have to do this in two steps, since iterators are invalidated by changes
Set toReplace = new HashSet();
Set toReplace = new TreeSet();
it = sourceToEquiv.keySet().iterator();
while (it.hasNext()) {
Object otherSource = it.next();
@ -127,6 +127,24 @@ public class EquivalenceClass {
}
return result.toString();
}
private class MyIterator implements Iterator {
Iterator it = sourceToEquiv.keySet().iterator();
public boolean hasNext() {
return it.hasNext();
}
public Object next() {
return sourceToEquiv.get(it.next());
}
public void remove() {
throw new IllegalArgumentException("can't remove");
}
}
public Iterator getSetIterator () {
return new MyIterator();
}
private String toString(Object s) {
if (s == null) return "null";

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Main.java,v $
* $Date: 2003/07/07 15:58:56 $
* $Revision: 1.2 $
* $Date: 2004/02/06 18:29:39 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
@ -50,7 +50,7 @@ public class Main {
static public void main (String[] args) {
for (int i = 0; i < args.length; ++i) {
String arg = args[i];
if (arg.equalsIgnoreCase("probe")) Probe.test("da");
//if (arg.equalsIgnoreCase("probe")) Probe.test("da");
}
if (true) return;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2003/08/20 03:47:59 $
* $Revision: 1.36 $
* $Date: 2004/02/06 18:29:39 $
* $Revision: 1.37 $
*
*******************************************************************************
*/
@ -148,6 +148,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
*/
public static String getUnskeleton(String source, boolean titlecaseStart) {
if (source == null) return source;
if (source.equals("noBreak")) return source; // HACK
StringBuffer result = new StringBuffer();
int lastCat = -1;
@ -585,7 +586,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
return quoteXML(source, false);
}
private static UnicodeProperty defaultIgnorable = null;
private static UCDProperty defaultIgnorable = null;
public static String getDisplay(int cp) {
String result = UTF16.valueOf(cp);
@ -1110,6 +1111,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
pw.println();
pw.println("In both " + name1 + " and " + name2 + ": ");
pw.println(temp.size() == 0 ? "<none>" : ""+ temp);
pw.flush();
// showSetNames(pw, "\t", temp, false, false, withChar, names, ucd);
}