mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-4677 update tools
X-SVN-Rev: 18658
This commit is contained in:
parent
1437101edb
commit
953d673966
11 changed files with 782 additions and 153 deletions
|
@ -7,19 +7,33 @@
|
|||
|
||||
package com.ibm.icu.dev.demo.chart;
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.ibm.icu.dev.test.util.*;
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
import com.ibm.icu.lang.*;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
|
||||
public class UnicodeChart {
|
||||
static int surrogateType = UCharacter.getType('\ud800');
|
||||
static int privateUseType = UCharacter.getType('\ue000');
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
//int rowWidth = 256;
|
||||
PrintWriter pw = BagFormatter.openUTF8Writer("", "UnicodeChart.html");
|
||||
VersionInfo vi = UCharacter.getUnicodeVersion();
|
||||
String version = vi.getMajor() + "." + vi.getMinor() + "." + vi.getMilli();
|
||||
PrintWriter pw = BagFormatter.openUTF8Writer("C:\\DATA\\GEN\\", "UnicodeChart.html");
|
||||
pw.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
|
||||
pw.println("<script type='text/javascript' src='UnicodeChart.js'></script>");
|
||||
pw.println("<link rel='stylesheet' type='text/css' href='UnicodeChart.css'>");
|
||||
pw.println("<title>Unicode 4.0 Chart</title>");
|
||||
pw.println("<title>Unicode " + version + " Chart</title>");
|
||||
pw.println("</head><body bgcolor='#FFFFFF'>");
|
||||
pw.println("<table border='1' cellspacing='0'><caption><h1>Unicode 4.0 Chart</h1></caption>");
|
||||
pw.println("<table border='1' cellspacing='0'><caption><h1>Unicode " + version + " Chart</h1></caption>");
|
||||
|
||||
/*pw.println("<tr><th></th>");
|
||||
for (int j = 0; j < rowWidth; ++j) {
|
||||
|
@ -29,58 +43,118 @@ public class UnicodeChart {
|
|||
*/
|
||||
// TODO: fix Utility to take ints
|
||||
|
||||
int surrogateType = UCharacter.getType('\ud800');
|
||||
int privateUseType = UCharacter.getType('\ue000');
|
||||
System.out.println("Surrogate Type: Java=" + Character.SURROGATE + ", ICU=" + surrogateType);
|
||||
System.out.println("Private-Use Type: Java=" + Character.PRIVATE_USE + ", ICU=" + privateUseType);
|
||||
System.out.println("//Surrogate Type: Java=" + Character.SURROGATE + ", ICU=" + surrogateType);
|
||||
System.out.println("//Private-Use Type: Java=" + Character.PRIVATE_USE + ", ICU=" + privateUseType);
|
||||
|
||||
//boolean gotOne = true;
|
||||
int columns = 0;
|
||||
int limit = 0x10FFFF;
|
||||
int limit = 0x110000/16;
|
||||
char lastType = 'x';
|
||||
int lastCount = 0;
|
||||
pw.println("<script>");
|
||||
pw.print("top();");
|
||||
int itemCount = 1;
|
||||
for (int i = 0; i <= limit; ++i) {
|
||||
if ((i & 0xFF) == 0) System.out.println(hex(i>>8,2) + "__");
|
||||
columns++;
|
||||
//pw.print("<tr><th>" + hex(i>>8,2) + "__</th>");
|
||||
char type = 'v';
|
||||
int cat = UCharacter.getType(i);
|
||||
if (UCharacter.hasBinaryProperty(i, UProperty.NONCHARACTER_CODE_POINT)) {
|
||||
type = 'n';
|
||||
} else if (cat == Character.UNASSIGNED || cat == surrogateType || cat == privateUseType) {
|
||||
type = 'u';
|
||||
} else if (UCharacter.isUWhiteSpace(i)) {
|
||||
type = 'w';
|
||||
} else if (UCharacter.hasBinaryProperty(i, UProperty.DEFAULT_IGNORABLE_CODE_POINT)) {
|
||||
type = 'i';
|
||||
// an array that maps String (containing column information) to UnicodeSet (containing column numbers)
|
||||
Map info_number = new TreeMap();
|
||||
List number_info = new ArrayList();
|
||||
StringBuffer sb = new StringBuffer();
|
||||
int lastInfo = -1;
|
||||
int sameCount = 0;
|
||||
System.out.println("var charRanges = [");
|
||||
for (int i = 0; i < limit; ++i) {
|
||||
// get the string of info, and get its number
|
||||
sb.setLength(0);
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
int cp = i*16+j;
|
||||
char type = getType(cp);
|
||||
sb.append(type);
|
||||
}
|
||||
String info = sb.toString();
|
||||
Integer s = (Integer) info_number.get(info);
|
||||
if (s == null) {
|
||||
info_number.put(info, s=new Integer(number_info.size()));
|
||||
number_info.add(info);
|
||||
}
|
||||
|
||||
// write a line whenever the value changes
|
||||
if (lastInfo == s.intValue()) {
|
||||
sameCount++;
|
||||
} else {
|
||||
type = 'v';
|
||||
if (lastInfo != -1) System.out.println(sameCount + "," + lastInfo + ",");
|
||||
sameCount = 1;
|
||||
lastInfo = s.intValue();
|
||||
}
|
||||
if (type != lastType) {
|
||||
if (lastCount != 0) pw.print(lastType + "(" + lastCount + ");");
|
||||
lastType = type;
|
||||
lastCount = 0;
|
||||
++itemCount;
|
||||
if ((itemCount & 0xF) == 0) pw.println();
|
||||
}
|
||||
++lastCount;
|
||||
}
|
||||
pw.println(lastType + "(" + lastCount + ");"); // finish last row
|
||||
// write last line
|
||||
System.out.println(sameCount + "," + lastInfo);
|
||||
System.out.println("];");
|
||||
|
||||
// now write out array
|
||||
int count = 0;
|
||||
UnicodeMap um = new UnicodeMap();
|
||||
System.out.println("var charInfo = [");
|
||||
for (Iterator it = number_info.iterator(); it.hasNext();) {
|
||||
String info = (String) it.next();
|
||||
System.out.println("'" + info + "',");
|
||||
}
|
||||
System.out.println("];");
|
||||
|
||||
// write out blocks
|
||||
Map blockMap = new TreeMap();
|
||||
int startValue = -1;
|
||||
int lastEnum = -1;
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
int enum = UCharacter.getIntPropertyValue(i,UProperty.BLOCK);
|
||||
if (enum == lastEnum) continue;
|
||||
if (lastEnum != -1) {
|
||||
String s = UCharacter.getPropertyValueName(UProperty.BLOCK, lastEnum, UProperty.NameChoice.LONG);
|
||||
blockMap.put(s, hex(startValue,0) + "/" + hex(i - startValue,0));
|
||||
System.out.println(s + ": " + blockMap.get(s));
|
||||
}
|
||||
lastEnum = enum;
|
||||
startValue = i;
|
||||
}
|
||||
String s = UCharacter.getPropertyValueName(UProperty.BLOCK, lastEnum, UProperty.NameChoice.LONG);
|
||||
blockMap.put(s, hex(startValue,0) + "/" + hex(0x110000 - startValue,0));
|
||||
blockMap.remove("No_Block");
|
||||
for (Iterator it = blockMap.keySet().iterator(); it.hasNext();) {
|
||||
String blockName = (String)it.next();
|
||||
String val = (String) blockMap.get(blockName);
|
||||
System.out.println("<option value='" + val + "'>" + blockName + "</option>");
|
||||
}
|
||||
|
||||
// <option value="4DC0">Yijing Hexagram Symbols</option>
|
||||
|
||||
|
||||
pw.println("</script></tr></table><p></p>");
|
||||
pw.println("<table><caption>Key</caption>");
|
||||
pw.println("<tr><td>X</td><td class='left'>Graphic characters</td></tr>");
|
||||
pw.println("<tr><td>\u00A0</td><td class='left'>Whitespace</td></tr>");
|
||||
pw.println("<tr><td class='i'> </td><td class='left'>Other Default Ignorable</td></tr>");
|
||||
pw.println("<tr><td class='u'> </td><td class='left'>Undefined, Private Use, or Surrogates</td></tr>");
|
||||
pw.println("<tr><td class='n'> </td><td class='left'>Noncharacter</td></tr>");
|
||||
pw.println("<tr><td class='i'>\u00A0</td><td class='left'>Other Default Ignorable</td></tr>");
|
||||
pw.println("<tr><td class='u'>\u00A0</td><td class='left'>Undefined, Private Use, or Surrogates</td></tr>");
|
||||
pw.println("<tr><td class='n'>\u00A0</td><td class='left'>Noncharacter</td></tr>");
|
||||
pw.println("</table>");
|
||||
pw.println("<p>Copyright \u00A9 2003, Mark Davis. All Rights Reserved.</body></html>");
|
||||
pw.close();
|
||||
System.out.println("columns: " + columns);
|
||||
System.out.println("//columns: " + columns);
|
||||
}
|
||||
|
||||
private static char getType(int i) {
|
||||
char type = 'v';
|
||||
int cat = UCharacter.getType(i);
|
||||
if (UCharacter.hasBinaryProperty(i, UProperty.NONCHARACTER_CODE_POINT)) {
|
||||
type = 'n';
|
||||
} else if (cat == Character.UNASSIGNED || cat == surrogateType || cat == privateUseType) {
|
||||
type = 'u';
|
||||
} else if (UCharacter.isUWhiteSpace(i)) {
|
||||
type = 'w';
|
||||
} else if (UCharacter.hasBinaryProperty(i, UProperty.DEFAULT_IGNORABLE_CODE_POINT)) {
|
||||
type = 'i';
|
||||
} else {
|
||||
type = 'v';
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
static String hex(int i, int padTo) {
|
||||
String result = Integer.toHexString(i).toUpperCase(java.util.Locale.ENGLISH);
|
||||
|
|
|
@ -206,29 +206,32 @@ public final class CollectionUtilities {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns an int with bits set.
|
||||
* Bit 4: a - b != {}
|
||||
* Bit 2: a * b != {} // * is intersects
|
||||
* Bit 1: b - a != {}
|
||||
* Thus the bits can be used to get the following relations, plus
|
||||
* for A_SUPERSET_B, use (x & NOT_A_SUPERSET_B) == 0
|
||||
* for A_SUBSET_B, use (x & NOT_A_SUBSET_B) == 0
|
||||
* for A_EQUALS_B, use (x & A_PROPER_DISJOINT_B) == 0
|
||||
* for A_DISJOINT_B, use (x & NOT_A_DISJOINT_B) == 0
|
||||
* for A_OVERLAPS_B, use (x & NOT_A_DISJOINT_B) == 1
|
||||
* Used for results of getContainmentRelation
|
||||
*/
|
||||
static final int
|
||||
// ContainmentRelation
|
||||
public static final int
|
||||
ALL_EMPTY = 0,
|
||||
NOT_A_SUPERSET_B = 1,
|
||||
NOT_A_DISJOINT_B = 2,
|
||||
NOT_A_SUBSET_B = 4,
|
||||
NOT_A_EQUALS_B = NOT_A_SUBSET_B | NOT_A_SUPERSET_B,
|
||||
A_PROPER_SUBSET_OF_B = NOT_A_DISJOINT_B | NOT_A_SUPERSET_B,
|
||||
A_PROPER_DISJOINT_B = NOT_A_SUBSET_B | NOT_A_SUPERSET_B,
|
||||
A_PROPER_SUPERSET_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B,
|
||||
A_PROPER_OVERLAPS_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B | NOT_A_SUPERSET_B;
|
||||
|
||||
public static int getContainmentRelation(Collection a, Collection b) {
|
||||
/**
|
||||
* Assesses all the possible containment relations between collections A and B with one call.<br>
|
||||
* Returns an int with bits set, according to a "Venn Diagram" view of A vs B.<br>
|
||||
* NOT_A_SUPERSET_B: a - b != {}<br>
|
||||
* NOT_A_DISJOINT_B: a * b != {} // * is intersects<br>
|
||||
* NOT_A_SUBSET_B: b - a != {}<br>
|
||||
* Thus the bits can be used to get the following relations:<br>
|
||||
* for A_SUPERSET_B, use (x & CollectionUtilities.NOT_A_SUPERSET_B) == 0<br>
|
||||
* for A_SUBSET_B, use (x & CollectionUtilities.NOT_A_SUBSET_B) == 0<br>
|
||||
* for A_EQUALS_B, use (x & CollectionUtilities.NOT_A_EQUALS_B) == 0<br>
|
||||
* for A_DISJOINT_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) == 0<br>
|
||||
* for A_OVERLAPS_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) != 0<br>
|
||||
*/
|
||||
public static int getContainmentRelation(Collection a, Collection b) {
|
||||
if (a.size() == 0) {
|
||||
return (b.size() == 0) ? ALL_EMPTY : NOT_A_SUPERSET_B;
|
||||
} else if (b.size() == 0) {
|
||||
|
|
|
@ -185,7 +185,7 @@ public class TestUtilities extends TestFmwk {
|
|||
case CollectionUtilities.A_PROPER_SUBSET_OF_B:
|
||||
checkContainment(b.containsAll(a) && !a.equals(b), a, relation, b);
|
||||
break;
|
||||
case CollectionUtilities.A_PROPER_DISJOINT_B:
|
||||
case CollectionUtilities.NOT_A_EQUALS_B:
|
||||
checkContainment(!CollectionUtilities.containsSome(a, b) && a.size() != 0 && b.size() != 0, a, relation, b);
|
||||
break;
|
||||
case CollectionUtilities.A_PROPER_SUPERSET_B:
|
||||
|
|
|
@ -318,8 +318,23 @@ Name: Unicode_1_Name
|
|||
* @return the unicode map
|
||||
*/
|
||||
public UnicodeMap getUnicodeMap() {
|
||||
return (UnicodeMap) getUnicodeMap_internal().clone();
|
||||
return getUnicodeMap(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the unicode map
|
||||
*/
|
||||
public UnicodeMap getUnicodeMap(boolean getShortest) {
|
||||
if (!getShortest) return (UnicodeMap) getUnicodeMap_internal().clone();
|
||||
UnicodeMap result = new UnicodeMap();
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
//if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
|
||||
String value = getValue(i,true);
|
||||
result.put(i, value);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the unicode map
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2005/03/26 05:40:04 $
|
||||
* $Revision: 1.38 $
|
||||
* $Date: 2005/10/11 19:39:15 $
|
||||
* $Revision: 1.39 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -24,7 +24,7 @@ import com.ibm.icu.text.UnicodeSet;
|
|||
|
||||
public class GenerateData implements UCD_Types {
|
||||
|
||||
static final boolean DEBUG = false;
|
||||
/* static final boolean DEBUG = false;
|
||||
|
||||
static final String HORIZONTAL_LINE = "# ================================================";
|
||||
|
||||
|
@ -156,8 +156,7 @@ public class GenerateData implements UCD_Types {
|
|||
System.out.println("New File: " + newFile);
|
||||
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
System.out.println("Most recent: " + mostRecent);
|
||||
org.unicode.cldr.util.Utility.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
doHeader(fileName + UnicodeDataFile.getFileSuffix(false), output, headerChoice);
|
||||
for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) {
|
||||
|
@ -180,7 +179,7 @@ public class GenerateData implements UCD_Types {
|
|||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
public static void listStrings(String file, int type, int subtype) throws IOException {
|
||||
Default.ucd = UCD.make("3.1.0");
|
||||
UCD ucd30 = UCD.make("3.0.0");
|
||||
|
@ -199,14 +198,14 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
output.close();
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
public static void generateCompExclusions() throws IOException {
|
||||
|
||||
String newFile = "DerivedData/CompositionExclusions" + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "CompositionExclusions", UnicodeDataFile.getFileSuffix(true), batName);
|
||||
String mostRecent = org.unicode.cldr.util.Utility.generateBat("DerivedData/", "CompositionExclusions", UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
output.println("# CompositionExclusions" + UnicodeDataFile.getFileSuffix(false));
|
||||
output.println(UnicodeDataFile.generateDateLine());
|
||||
|
@ -280,11 +279,11 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
|
||||
public String optionalComment(int cp) { return ""; }
|
||||
/*
|
||||
|
||||
public String valueName(int cp) {
|
||||
return UTF32.length32(ucdData.getDecompositionMapping(cp)) + "";
|
||||
}
|
||||
*/
|
||||
|
||||
public byte status(int cp) {
|
||||
if (getType(cp) == type) return INCLUDE;
|
||||
return EXCLUDE;
|
||||
|
@ -356,12 +355,12 @@ public class GenerateData implements UCD_Types {
|
|||
if (extra != null) checkDuplicate(duplicates, accumulation, extra, "General_Category=" + value);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
addLine(sorted, "xx; T ; True");
|
||||
checkDuplicate(duplicates, accumulation, "T", "xx=True");
|
||||
addLine(sorted, "xx; F ; False");
|
||||
checkDuplicate(duplicates, accumulation, "F", "xx=False");
|
||||
*/
|
||||
|
||||
addLine(sorted, "qc", UCD_Names.YN_TABLE[1], UCD_Names.YN_TABLE_LONG[1], null);
|
||||
checkDuplicate(duplicates, accumulation, UCD_Names.YN_TABLE[1], "qc=" + UCD_Names.YN_TABLE_LONG[1]);
|
||||
addLine(sorted, "qc", UCD_Names.YN_TABLE[0], UCD_Names.YN_TABLE_LONG[0], null);
|
||||
|
@ -440,7 +439,7 @@ public class GenerateData implements UCD_Types {
|
|||
valueAbb = "n/a";
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
String elide = "";
|
||||
if (type == CATEGORY || type == SCRIPT || type == BINARY_PROPERTIES) elide = "\\p{"
|
||||
+ valueAbb
|
||||
|
@ -458,9 +457,9 @@ public class GenerateData implements UCD_Types {
|
|||
+ value
|
||||
+ "}";
|
||||
System.out.println("<tr><td>" + elide + "</td><td>" + abb + "</td><td>" + norm + "</td></tr>");
|
||||
*/
|
||||
|
||||
/*
|
||||
|
||||
|
||||
if (type == BINARY_PROPERTIES || type == DERIVED) {
|
||||
//if (value.equals(YN_TABLE_LONG[1])) continue;
|
||||
addLine(sorted, PROP_TYPE_NAMES[BINARY][1], valueAbb, value);
|
||||
|
@ -468,7 +467,7 @@ public class GenerateData implements UCD_Types {
|
|||
if (!value.equalsIgnoreCase(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, value);
|
||||
continue;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
if (type == COMBINING_CLASS) {
|
||||
String num = up.getValue(NUMBER);
|
||||
|
@ -487,20 +486,20 @@ public class GenerateData implements UCD_Types {
|
|||
while (blockIterator.hasNext()) {
|
||||
addLine(sorted, "blk", "n/a", (String)blockIterator.next(), null);
|
||||
}
|
||||
/*
|
||||
|
||||
UCD.BlockData blockData = new UCD.BlockData();
|
||||
|
||||
int blockId = 0;
|
||||
while (Default.ucd().getBlockData(blockId++, blockData)) {
|
||||
addLine(sorted, "blk", "n/a", blockData.name);
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
String filename = "PropertyAliases";
|
||||
String newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
String mostRecent = org.unicode.cldr.util.Utility.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
log.println(UnicodeDataFile.generateDateLine());
|
||||
|
@ -520,7 +519,7 @@ public class GenerateData implements UCD_Types {
|
|||
filename = "PropertyValueAliases";
|
||||
newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
mostRecent = org.unicode.cldr.util.Utility.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
log.println(UnicodeDataFile.generateDateLine());
|
||||
|
@ -536,7 +535,7 @@ public class GenerateData implements UCD_Types {
|
|||
filename = "PropertyAliasSummary";
|
||||
newFile = "OtherData/" + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
mostRecent = UnicodeDataFile.generateBat("OtherData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
mostRecent = org.unicode.cldr.util.Utility.generateBat("OtherData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
log.println();
|
||||
log.println(HORIZONTAL_LINE);
|
||||
|
@ -650,13 +649,13 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
|
||||
// accumulate differences
|
||||
/*
|
||||
|
||||
String acc = (String)accumulation.get(toCheck);
|
||||
if (acc == null) {
|
||||
acc = "# \"" + toCheck + "\":\t" + originalComment;
|
||||
}
|
||||
acc += ";\t" + result;
|
||||
*/
|
||||
|
||||
result.add(comment);
|
||||
accumulation.add("# " + result.toString() + ":\t" + toCheck);
|
||||
} else {
|
||||
|
@ -673,7 +672,7 @@ public class GenerateData implements UCD_Types {
|
|||
String newFile = directory + file + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, file, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, file, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
doHeader(file + UnicodeDataFile.getFileSuffix(false), output, headerChoice);
|
||||
int last = -1;
|
||||
|
@ -682,7 +681,7 @@ public class GenerateData implements UCD_Types {
|
|||
if (up == null) continue;
|
||||
if (up.skipInDerivedListing()) continue;
|
||||
|
||||
/*
|
||||
|
||||
if (i == DECOMPOSITION_TYPE || i == NUMERIC_TYPE
|
||||
|| i == (BINARY_PROPERTIES | Non_break)
|
||||
|| i == (BINARY_PROPERTIES | CaseFoldTurkishI)
|
||||
|
@ -690,11 +689,11 @@ public class GenerateData implements UCD_Types {
|
|||
|| i == (JOINING_TYPE | JT_U)
|
||||
|| i == (JOINING_GROUP | NO_SHAPING)
|
||||
) continue; // skip zero case
|
||||
*/
|
||||
/*if (skipSpecial == SKIP_SPECIAL
|
||||
|
||||
if (skipSpecial == SKIP_SPECIAL
|
||||
&& i >= (BINARY_PROPERTIES | CompositionExclusion)
|
||||
&& i < (AGE + NEXT_ENUM)) continue;
|
||||
*/
|
||||
|
||||
if ((last & 0xFF00) != (i & 0xFF00) && (i <= BINARY_PROPERTIES || i >= SCRIPT)) {
|
||||
output.println();
|
||||
output.println(HORIZONTAL_LINE);
|
||||
|
@ -741,7 +740,8 @@ public class GenerateData implements UCD_Types {
|
|||
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
|
||||
*/
|
||||
static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException {
|
||||
|
||||
UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader(directory, fileName);
|
||||
|
@ -750,13 +750,13 @@ public class GenerateData implements UCD_Types {
|
|||
String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true);
|
||||
//PrintWriter log = Utility.openPrintWriter(newFile, Utility.UTF8_UNIX);
|
||||
//String[] batName = {""};
|
||||
//String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
//String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
String[] example = new String[256];
|
||||
|
||||
//log.println("# " + fileName + UnicodeDataFile.getFileSuffix(false));
|
||||
//log.println(UnicodeDataFile.generateDateLine());
|
||||
/*log.println("#");
|
||||
log.println("#");
|
||||
log.println("# Normalization Test Suite");
|
||||
log.println("# Format:");
|
||||
log.println("#");
|
||||
|
@ -790,7 +790,7 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
log.println("#");
|
||||
log.println("@Part0 # Specific cases");
|
||||
log.println("#");*/
|
||||
log.println("#");
|
||||
|
||||
for (int j = 0; j < testSuiteCases.length; ++j) {
|
||||
writeLine(testSuiteCases[j], log, false);
|
||||
|
@ -897,6 +897,7 @@ public class GenerateData implements UCD_Types {
|
|||
fc.close();
|
||||
//Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
|
||||
}
|
||||
/*
|
||||
|
||||
static void handleIdentical() throws IOException {
|
||||
DirectoryIterator target = new DirectoryIterator(GEN_DIR + File.separator + "DerivedData");
|
||||
|
@ -916,6 +917,7 @@ public class GenerateData implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
*/
|
||||
static void writeLine(String cc, PrintWriter log, boolean check) {
|
||||
String c = Default.nfc().normalize(cc);
|
||||
String d = Default.nfd().normalize(cc);
|
||||
|
@ -982,14 +984,14 @@ public class GenerateData implements UCD_Types {
|
|||
"\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD"
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
static final void backwardsCompat(String directory, String filename, int[] list) throws IOException {
|
||||
|
||||
|
||||
String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
DiffPropertyLister dpl;
|
||||
UnicodeSet cummulative = new UnicodeSet();
|
||||
|
||||
|
@ -1072,7 +1074,7 @@ public class GenerateData implements UCD_Types {
|
|||
String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
|
||||
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
try {
|
||||
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
log.println(UnicodeDataFile.generateDateLine());
|
||||
|
@ -1116,7 +1118,7 @@ public class GenerateData implements UCD_Types {
|
|||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
new DiffPropertyLister("3.2.0", "4.0.0", log).print();
|
||||
/*
|
||||
|
||||
printDiff("110", "200");
|
||||
UnicodeSet u11 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-1.1.txt", false);
|
||||
UnicodeSet u20 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.0.txt", false);
|
||||
|
@ -1157,7 +1159,7 @@ public class GenerateData implements UCD_Types {
|
|||
+ n.format(u31m.count()));
|
||||
log.println();
|
||||
u31m.print(log, false, false, "3.1");
|
||||
*/
|
||||
|
||||
} finally {
|
||||
if (log != null) {
|
||||
log.close();
|
||||
|
@ -1326,5 +1328,5 @@ public class GenerateData implements UCD_Types {
|
|||
+ (start != end ? ".." + Default.ucd().getName(end) : ""));
|
||||
}
|
||||
System.out.println("TrailingZero count: " + result.size());
|
||||
}
|
||||
}*/
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $
|
||||
* $Date: 2004/02/12 08:23:15 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2005/10/11 19:39:15 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -99,8 +99,8 @@ public final class GenerateStandardizedVariants implements UCD_Types {
|
|||
String directory = "DerivedData/";
|
||||
String filename = directory + "StandardizedVariants" + UnicodeDataFile.getHTMLFileSuffix(true);
|
||||
PrintWriter out = Utility.openPrintWriter(filename, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
//String[] batName = {""};
|
||||
//String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
|
||||
|
||||
String version = Default.ucd().getVersion();
|
||||
int lastDot = version.lastIndexOf('.');
|
||||
|
@ -118,6 +118,6 @@ public final class GenerateStandardizedVariants implements UCD_Types {
|
|||
Utility.appendFile("StandardizedVariants-Template.html", Utility.UTF8, out, replacementList);
|
||||
|
||||
out.close();
|
||||
Utility.renameIdentical(mostRecent, Utility.getOutputName(filename), batName[0]);
|
||||
//Utility.renameIdentical(mostRecent, Utility.getOutputName(filename), batName[0]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
|
||||
* $Date: 2004/10/14 17:54:56 $
|
||||
* $Revision: 1.35 $
|
||||
* $Date: 2005/10/11 19:39:15 $
|
||||
* $Revision: 1.36 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -143,7 +143,7 @@ public final class Main implements UCD_Types {
|
|||
else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);
|
||||
else if (arg.equalsIgnoreCase("checkcollator")) CheckCollator.main(null);
|
||||
|
||||
else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
|
||||
//else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
|
||||
else if (arg.equalsIgnoreCase("iana")) IANANames.testSensitivity();
|
||||
|
||||
else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
|
||||
|
@ -157,7 +157,7 @@ public final class Main implements UCD_Types {
|
|||
else if (arg.equalsIgnoreCase("VerifyIDN")) VerifyUCD.VerifyIDN();
|
||||
else if (arg.equalsIgnoreCase("NFTest")) VerifyUCD.NFTest();
|
||||
else if (arg.equalsIgnoreCase("test1")) VerifyUCD.test1();
|
||||
else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
|
||||
//else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
|
||||
else if (arg.equalsIgnoreCase("GenerateThaiBreaks")) GenerateThaiBreaks.main(null);
|
||||
|
||||
else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]});
|
||||
|
@ -168,9 +168,9 @@ public final class Main implements UCD_Types {
|
|||
else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
|
||||
else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
|
||||
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
|
||||
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
|
||||
//else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
|
||||
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.checkNameList();
|
||||
else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
|
||||
//else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
|
||||
|
||||
else if (arg.equalsIgnoreCase("Compare14652")) Compare14652.main(null);
|
||||
|
||||
|
@ -182,7 +182,7 @@ public final class Main implements UCD_Types {
|
|||
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
|
||||
*/
|
||||
// EXTRACTED PROPERTIES
|
||||
|
||||
/*
|
||||
else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
|
||||
GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedBidiClass");
|
||||
|
@ -230,8 +230,9 @@ public final class Main implements UCD_Types {
|
|||
} else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
|
||||
GenerateData.generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, GenerateData.HEADER_DERIVED,
|
||||
"DerivedData/extracted/", "DerivedNumericValues" );
|
||||
|
||||
} else if (arg.equalsIgnoreCase("StandardizedVariants")) {
|
||||
}
|
||||
*/
|
||||
else if (arg.equalsIgnoreCase("StandardizedVariants")) {
|
||||
GenerateStandardizedVariants.generate();
|
||||
|
||||
// OTHER STANDARD PROPERTIES
|
||||
|
@ -244,7 +245,7 @@ public final class Main implements UCD_Types {
|
|||
GenerateCaseFolding.generateSpecialCasing(true);
|
||||
GenerateCaseFolding.generateSpecialCasing(false);
|
||||
|
||||
} else if (arg.equalsIgnoreCase("CompositionExclusions")) {
|
||||
/* } else if (arg.equalsIgnoreCase("CompositionExclusions")) {
|
||||
GenerateData.generateCompExclusions();
|
||||
|
||||
} else if (arg.equalsIgnoreCase("DerivedAge")) {
|
||||
|
@ -305,7 +306,7 @@ public final class Main implements UCD_Types {
|
|||
|
||||
} else if (arg.equalsIgnoreCase("listKatakana")) {
|
||||
GenerateData.listKatakana();
|
||||
|
||||
*/
|
||||
/*
|
||||
} else if (arg.equalsIgnoreCase("DerivedFullNormalization")) {
|
||||
mask = Utility.setBits(0, DerivedProperty.GenNFD, DerivedProperty.GenNFKC);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
|
||||
* $Date: 2005/06/24 23:51:52 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2005/10/11 19:39:15 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -17,6 +17,11 @@ import java.util.*;
|
|||
import java.io.*;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.dev.test.util.UnicodeMap;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.dev.test.util.UnicodePropertySource;
|
||||
import com.ibm.icu.dev.test.util.UnicodeMap.MapIterator;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
|
@ -24,7 +29,77 @@ import com.ibm.icu.text.UnicodeSetIterator;
|
|||
import com.ibm.text.utility.*;
|
||||
|
||||
public class QuickTest implements UCD_Types {
|
||||
public static void main(String[] args) throws IOException {
|
||||
getBidiMirrored();
|
||||
if (true) return;
|
||||
getLengths("NFC", Default.nfc());
|
||||
getLengths("NFD", Default.nfd());
|
||||
getLengths("NFKC", Default.nfkc());
|
||||
getLengths("NFKD", Default.nfkd());
|
||||
System.out.println("Done");
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static void getBidiMirrored() {
|
||||
ToolUnicodePropertySource foo = ToolUnicodePropertySource.make("");
|
||||
UnicodeMap status = new UnicodeMap();
|
||||
status.putAll(foo.getSet("generalcategory=ps"), "*open/close*");
|
||||
status.putAll(foo.getSet("generalcategory=pe"), "*open/close*");
|
||||
status.putAll(foo.getSet("generalcategory=pi"), "*open/close*");
|
||||
status.putAll(foo.getSet("generalcategory=pf"), "*open/close*");
|
||||
|
||||
UnicodeSet bidiMirroredSet = foo.getSet("bidimirrored=true");
|
||||
status.putAll(bidiMirroredSet, "*core*");
|
||||
UnicodeSet bidiMirroringSet = new UnicodeSet();
|
||||
UnicodeProperty x = foo.getProperty("bidimirroringglyph");
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
String s = x.getValue(i);
|
||||
if (!s.equals(UTF16.valueOf(i))) bidiMirroringSet.add(i);
|
||||
}
|
||||
status.putAll(new UnicodeSet(bidiMirroredSet).removeAll(bidiMirroringSet), "no bidi mirroring");
|
||||
UnicodeSet mathSet = foo.getSet("generalcategory=sm");
|
||||
status.putAll(mathSet, "math");
|
||||
|
||||
UnicodeSet special = new UnicodeSet("[<>]");
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(mathSet); it.next();) {
|
||||
String s = Default.nfkd().normalize(it.codepoint);
|
||||
if (special.containsSome(s)) status.put(it.codepoint, "*special*");
|
||||
}
|
||||
//showStatus(status);
|
||||
// close under nfd
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
if (!Default.ucd().isAssigned(i)) continue;
|
||||
if (!Default.ucd().isPUA(i)) continue;
|
||||
if (Default.nfkc().isNormalized(i)) continue;
|
||||
String oldValue = (String) status.getValue(i);
|
||||
if (oldValue != null) continue;
|
||||
String s = Default.nfkc().normalize(i);
|
||||
if (UTF16.countCodePoint(s) != 1) continue;
|
||||
int cp = UTF16.charAt(s, 0);
|
||||
String value = (String)status.getValue(cp);
|
||||
if (value != null) status.put(i, "nfc-closure-" + value);
|
||||
}
|
||||
showStatus(status, bidiMirroredSet);
|
||||
}
|
||||
|
||||
static BagFormatter bf = new BagFormatter();
|
||||
private static void showStatus(UnicodeMap status, UnicodeSet x) {
|
||||
Collection list = new TreeSet(status.getAvailableValues());
|
||||
for (Iterator it = list.iterator(); it.hasNext(); ) {
|
||||
String value = (String) it.next();
|
||||
if (value == null) continue;
|
||||
UnicodeSet set = status.getSet(value);
|
||||
for (UnicodeSetIterator umi = new UnicodeSetIterator(set); umi.next();) {
|
||||
System.out.println(Utility.hex(umi.codepoint)
|
||||
+ ";\t" + value
|
||||
+ ";\t" + (x.contains(umi.codepoint) ? "O" : "")
|
||||
+ ";\t" + Default.ucd().getName(umi.codepoint));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static class Length {
|
||||
String title;
|
||||
int bytesPerCodeUnit;
|
||||
|
@ -50,14 +125,6 @@ public class QuickTest implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
getLengths("NFC", Default.nfc());
|
||||
getLengths("NFD", Default.nfd());
|
||||
getLengths("NFKC", Default.nfkc());
|
||||
getLengths("NFKD", Default.nfkd());
|
||||
System.out.println("Done");
|
||||
}
|
||||
|
||||
static final int skip = (1<<UCD.UNASSIGNED) | (1<<UCD.PRIVATE_USE) | (1<<UCD.SURROGATE);
|
||||
/**
|
||||
*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Counter.java,v $
|
||||
* $Date: 2001/08/31 00:19:16 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2005/10/11 19:39:15 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -44,13 +44,31 @@ public final class Counter {
|
|||
}
|
||||
}
|
||||
|
||||
public void add(String obj) {
|
||||
public void add(Object obj, int countValue) {
|
||||
RWInteger count = (RWInteger)map.get(obj);
|
||||
if (count == null) {
|
||||
count = new RWInteger();
|
||||
map.put(obj, count);
|
||||
if (count == null) map.put(obj, count = new RWInteger());
|
||||
count.value += countValue;
|
||||
}
|
||||
|
||||
public int getCount(Object obj) {
|
||||
RWInteger count = (RWInteger) map.get(obj);
|
||||
return count == null ? 0 : count.value;
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
map.clear();
|
||||
}
|
||||
|
||||
public int getTotal() {
|
||||
int count = 0;
|
||||
for (Iterator it = map.keySet().iterator(); it.hasNext();) {
|
||||
count += ((RWInteger) map.get(it.next())).value;
|
||||
}
|
||||
count.value += obj.length();
|
||||
return count;
|
||||
}
|
||||
|
||||
public int getItemCount() {
|
||||
return map.size();
|
||||
}
|
||||
|
||||
public Map getSortedByCount() {
|
||||
|
|
|
@ -1,42 +1,491 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/TestUtility.java,v $
|
||||
* $Date: 2005/03/04 02:50:26 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/TestUtility.java,v $
|
||||
* $Date: 2005/10/11 19:39:15 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.utility;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
import java.util.zip.ZipInputStream;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
import java.text.*;
|
||||
import java.io.*;
|
||||
import java.nio.Buffer;
|
||||
|
||||
import com.ibm.icu.dev.test.util.DataInputCompressor;
|
||||
import com.ibm.icu.dev.test.util.DataOutputCompressor;
|
||||
import com.ibm.icu.dev.test.util.ICUPropertyFactory;
|
||||
import com.ibm.icu.dev.test.util.UnicodeLabel;
|
||||
import com.ibm.icu.dev.test.util.UnicodeMap;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.dev.test.util.UnicodePropertySource;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
import com.ibm.text.UCD.Default;
|
||||
|
||||
public class TestUtility {
|
||||
/*
|
||||
static public class MyEnum extends EnumBase {
|
||||
public static MyEnum
|
||||
ZEROED = (MyEnum) makeNext(myEnum.getClass()),
|
||||
SHIFTED = (MyEnum) makeNext(),
|
||||
NON_IGNORABLE = (MyEnum) makeNext(),
|
||||
|
||||
FIRST_ENUM = ZEROED,
|
||||
LAST_ENUM = NON_IGNORABLE;
|
||||
public MyEnum next(int value) {
|
||||
return (MyEnum) internalNext(value);
|
||||
}
|
||||
protected MyEnum() {}
|
||||
}
|
||||
/*
|
||||
static public class MyEnum extends EnumBase {
|
||||
public static MyEnum
|
||||
ZEROED = (MyEnum) makeNext(myEnum.getClass()),
|
||||
SHIFTED = (MyEnum) makeNext(),
|
||||
NON_IGNORABLE = (MyEnum) makeNext(),
|
||||
|
||||
FIRST_ENUM = ZEROED,
|
||||
LAST_ENUM = NON_IGNORABLE;
|
||||
public MyEnum next(int value) {
|
||||
return (MyEnum) internalNext(value);
|
||||
}
|
||||
protected MyEnum() {}
|
||||
}
|
||||
*/
|
||||
static final boolean USE_FILE = true;
|
||||
static final boolean DEBUG = false;
|
||||
|
||||
static public void main (String[] args) {
|
||||
for (MyEnum i = MyEnum.FIRST_ENUM; i != null; i = i.next()) {
|
||||
System.out.println(i.getValue());
|
||||
static public void main(String[] args) throws Exception {
|
||||
tryFileUnicodeProperty();
|
||||
check();
|
||||
int iterations = 1;
|
||||
//testStreamCompressor();
|
||||
UnicodeMap umap = new UnicodeMap();
|
||||
umap.put(0,"abcdefg");
|
||||
if (false) for (int i = 0; i < 256; ++i) {
|
||||
umap.put(i, String.valueOf(i&0xF0));
|
||||
}
|
||||
int total = testUnicodeMapSerialization(1, iterations, "dummy", umap);
|
||||
|
||||
//if (true) return;
|
||||
//UnicodeLabel ul;
|
||||
|
||||
ICUPropertyFactory p = ICUPropertyFactory.make();
|
||||
total = 0;
|
||||
BreakIterator bk = BreakIterator.getWordInstance(Locale.ENGLISH);
|
||||
Matcher nameMatch = Pattern.compile("Name").matcher("");
|
||||
|
||||
UnicodeProperty gc = p.getProperty("General_Category");
|
||||
UnicodeSet checkSet = gc.getSet("Cn").addAll(gc.getSet("Co")).addAll(gc.getSet("Cs")).complement();
|
||||
UnicodeSetIterator checkSetIterator = new UnicodeSetIterator(checkSet);
|
||||
UnicodeProperty hangulSyllableType = p.getProperty("Hangul_Syllable_Type");
|
||||
UnicodeSet hangulSyllable = hangulSyllableType.getSet("LVT_Syllable").addAll(hangulSyllableType.getSet("LV_Syllable"));
|
||||
|
||||
|
||||
for (Iterator pnames = p.getAvailableNames().iterator(); pnames
|
||||
.hasNext();) {
|
||||
String pname = (String) pnames.next();
|
||||
if (!nameMatch.reset(pname).matches()) continue;
|
||||
System.out.println();
|
||||
UnicodeProperty up = p.getProperty(pname);
|
||||
int ptype = up.getType();
|
||||
System.out.print("Name:\t" + pname + "\tType:\t" + up.getTypeName(ptype));
|
||||
if (up.isType(up.STRING_MASK)) {
|
||||
boolean excludeHangul = pname.startsWith("isNF");
|
||||
umap = new UnicodeMap();
|
||||
checkSetIterator.reset();
|
||||
while (checkSetIterator.next()) {
|
||||
int i = checkSetIterator.codepoint;
|
||||
if (excludeHangul && hangulSyllable.contains(i)) continue;
|
||||
String value = up.getValue(i);
|
||||
if (equals(i, value)) continue;
|
||||
umap.put(i, value);
|
||||
//System.out.println("Adding " + Utility.hex(i) + ", " + Utility.hex(value));
|
||||
}
|
||||
} else {
|
||||
UnicodeProperty sampleProp = p.getProperty(pname);
|
||||
umap = sampleProp.getUnicodeMap();
|
||||
if (pname.equals("Name")) {
|
||||
umap = fixNameMap(bk, umap);
|
||||
}
|
||||
}
|
||||
total = testUnicodeMapSerialization(iterations, total, pname, umap);
|
||||
}
|
||||
String[] hanProps = {"kIICore", "kRSUnicode"};
|
||||
for (int i = 0; i < hanProps.length; ++i) {
|
||||
String pname = hanProps[i];
|
||||
if (!nameMatch.reset(pname).matches()) continue;
|
||||
testHanProp(iterations, total, pname, "Han");
|
||||
}
|
||||
|
||||
System.out.println();
|
||||
System.out.println("Done");
|
||||
}
|
||||
|
||||
|
||||
static void check() throws IOException, ClassNotFoundException {
|
||||
UnicodeMap m = new UnicodeMap();
|
||||
m.put(1,"abc");
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
ObjectOutputStream oos = new ObjectOutputStream(out);
|
||||
oos.writeBoolean(true);
|
||||
oos.writeUTF("abcdefg");
|
||||
oos.writeObject(m);
|
||||
oos.close();
|
||||
|
||||
int size = out.size();
|
||||
byte[] buffer = out.toByteArray();
|
||||
System.out.println(showBuffer(buffer, size));
|
||||
|
||||
InputStream in = new ByteArrayInputStream(buffer, 0, (int)size);
|
||||
ObjectInputStream ois = new ObjectInputStream(in);
|
||||
System.out.println(ois.readBoolean());
|
||||
System.out.println(ois.readUTF());
|
||||
System.out.println(ois.readObject());
|
||||
ois.close();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static boolean equals(int i, String value) {
|
||||
int len = value.length();
|
||||
if (len < 0 || len > 2) return false;
|
||||
if (len == 1) return i == value.charAt(0);
|
||||
if (i <= 0xFFFF) return false;
|
||||
return i == UTF16.charAt(value,0);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static void testHanProp(int iterations, int total, String pname, String type) throws IOException, ClassNotFoundException {
|
||||
System.out.println();
|
||||
UnicodeMap umap = Default.ucd().getHanValue(pname);
|
||||
System.out.println(umap);
|
||||
umap.setMissing("na");
|
||||
System.out.print("Name:\t" + pname + "\tType:\t" + type);
|
||||
total = testUnicodeMapSerialization(iterations, total, pname, umap);
|
||||
}
|
||||
|
||||
static String outdircore = "C:\\DATA\\bin\\UCD_Data";
|
||||
static String outdir = outdircore + "4.1.0\\";
|
||||
/**
|
||||
* @param pname
|
||||
*
|
||||
*/
|
||||
private static int testUnicodeMapSerialization(int iterations, int total, String pname, UnicodeMap umap) throws IOException, ClassNotFoundException {
|
||||
System.out.print("\tValue Count:\t" + umap.getAvailableValues().size());
|
||||
|
||||
String filename = outdir + pname + ".bin";
|
||||
OutputStream out;
|
||||
ByteArrayOutputStream baout = null;
|
||||
if (USE_FILE) {
|
||||
out = new FileOutputStream(filename);
|
||||
} else {
|
||||
out = baout = new ByteArrayOutputStream();
|
||||
}
|
||||
out = new GZIPOutputStream(out);
|
||||
ObjectOutputStream oos = new ObjectOutputStream(out);
|
||||
//Random rand = new Random();
|
||||
|
||||
/* if (false) {
|
||||
oos.writeObject(umap);
|
||||
oos.close();
|
||||
buffer = baout.toByteArray();
|
||||
in = new ByteArrayInputStream(buffer, 0, baout.size());
|
||||
ois = new ObjectInputStream(in);
|
||||
reverseMap = (UnicodeMap) ois.readObject();
|
||||
}
|
||||
*/
|
||||
// UnicodeMap.StreamCompressor sc = new UnicodeMap.StreamCompressor();
|
||||
// int test = (int)Math.abs(rand.nextGaussian()*100000);
|
||||
// System.out.print(Integer.toString(test, 16).toUpperCase());
|
||||
// sc.writeInt(out, test);
|
||||
// out.close();
|
||||
//oos.writeBoolean(true);
|
||||
//oos.writeUTF("abcdefg");
|
||||
oos.writeObject(umap);
|
||||
oos.close();
|
||||
|
||||
|
||||
long size;
|
||||
byte[] buffer;
|
||||
if (USE_FILE) {
|
||||
size = new File(filename).length();
|
||||
} else {
|
||||
size = baout.size();
|
||||
buffer = baout.toByteArray();
|
||||
if (DEBUG) System.out.println(showBuffer(buffer, size));
|
||||
}
|
||||
System.out.print("\t"+"Size:\t" + size);
|
||||
|
||||
|
||||
// only measure read time
|
||||
UnicodeMap reverseMap = null;
|
||||
long start = System.currentTimeMillis();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
InputStream in;
|
||||
if (USE_FILE) {
|
||||
in = new FileInputStream(filename);
|
||||
} else {
|
||||
in = new ByteArrayInputStream(buffer, 0, (int)size);
|
||||
}
|
||||
in = new GZIPInputStream(in);
|
||||
// int x = sc.readInt(in);
|
||||
// if (x != test) System.out.println("Failure");
|
||||
// System.out.println("\t=> " + Integer.toString(x, 16).toUpperCase());
|
||||
ObjectInputStream ois = new ObjectInputStream(in);
|
||||
//System.out.println(ois.readBoolean());
|
||||
//System.out.println(ois.readUTF());
|
||||
|
||||
try {
|
||||
reverseMap = (UnicodeMap) ois.readObject();
|
||||
} catch (java.io.OptionalDataException e1) {
|
||||
System.out.println(e1.eof + "\t" + e1.length);
|
||||
// TODO Auto-generated catch block
|
||||
e1.printStackTrace();
|
||||
}
|
||||
ois.close();
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
if (!reverseMap.equals(umap)) {
|
||||
System.out.println("Failed roundtrip");
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
String main = (String) umap.getValue(i);
|
||||
String rev = (String) reverseMap.getValue(i);
|
||||
if (UnicodeMap.areEqual(main, rev))
|
||||
continue;
|
||||
System.out.println(Utility.hex(i) + "\t'" + main + "',\t'"
|
||||
+ rev + "'");
|
||||
}
|
||||
}
|
||||
//out.toByteArray();
|
||||
total += size;
|
||||
System.out.print("\tTime:\t" + (end - start) / (iterations * 1.0)
|
||||
+ "\tmsecs (raw:\t" + ((end - start) / 1000.0) + "\tsecs)");
|
||||
/* with Vanilla Serialization
|
||||
* Size: 24131
|
||||
* Time: 1.9488 msecs (raw: 9.744 secs)
|
||||
* With my serialization
|
||||
* Size: 19353
|
||||
* Time: 0.8652 msecs (raw: 4.326 secs)
|
||||
* With my serialization, and compression of ints
|
||||
* Size: 8602
|
||||
* Time: 2.784 msecs (raw: 1.392 secs)
|
||||
* With delta encoding
|
||||
* Size: 5226
|
||||
* Time: 1.924 msecs (raw: 0.962 secs)
|
||||
* Name:
|
||||
* Size: 776926
|
||||
* Time: 180.3 msecs (raw: 1.803 secs)
|
||||
*/
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static String showBuffer(byte[] buffer, long size) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
for (int j = 0; j < size; ++j) {
|
||||
if (j != 0) result.append(' ');
|
||||
result.append(Utility.hex(buffer[j]&0xFF,2));
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static void testStreamCompressor() throws IOException {
|
||||
Object[] tests = {
|
||||
UTF16.valueOf(0x10FFFF),"\u1234", "abc",
|
||||
new Long(-3), new Long(12345),
|
||||
new Short(Short.MAX_VALUE), new Short(Short.MIN_VALUE),
|
||||
new Integer(Integer.MAX_VALUE), new Integer(Integer.MIN_VALUE),
|
||||
new Long(Long.MIN_VALUE), new Long(Long.MAX_VALUE)};
|
||||
|
||||
for (int i = 0; i < tests.length; ++i) {
|
||||
Object source = tests[i];
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream(100);
|
||||
ObjectOutputStream out2 = new ObjectOutputStream(out);
|
||||
ByteArrayInputStream in;
|
||||
ObjectInputStream ois;
|
||||
byte[] buffer;
|
||||
DataOutputCompressor sc = new DataOutputCompressor(out2);
|
||||
long y = 0;
|
||||
if (source instanceof String) {
|
||||
sc.writeUTF((String)source);
|
||||
} else {
|
||||
y = ((Number)source).longValue();
|
||||
sc.writeLong(y);
|
||||
}
|
||||
out2.close();
|
||||
buffer = out.toByteArray();
|
||||
showBytes(buffer, out.size());
|
||||
System.out.println();
|
||||
in = new ByteArrayInputStream(buffer, 0, out.size());
|
||||
ObjectInputStream in2 = new ObjectInputStream(in);
|
||||
DataInputCompressor isc = new DataInputCompressor(in2);
|
||||
boolean success = false;
|
||||
Object result;
|
||||
boolean isString = source instanceof String;
|
||||
long x = 0;
|
||||
if (isString) {
|
||||
result = isc.readUTF();
|
||||
System.out.println(i + "\t" + source
|
||||
+ "\t" + result
|
||||
+ (source.equals(result) ? "\tSuccess" : "\tBitter Failure"));
|
||||
} else {
|
||||
x = isc.readLong();
|
||||
result = new Long(x);
|
||||
System.out.println(i + "\t" + y
|
||||
+ x
|
||||
+ "\t" + Utility.hex(y)
|
||||
+ "\t" + Utility.hex(x)
|
||||
+ (x == y ? "\tSuccess" : "\tBitter Failure"));
|
||||
}
|
||||
|
||||
in2.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static void showBytes(byte[] buffer, int len) {
|
||||
for (int i = 0; i < len; ++i) {
|
||||
System.out.print(Utility.hex(buffer[i]&0xFF,2) + " ");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static UnicodeMap fixNameMap(BreakIterator bk, UnicodeMap umap) {
|
||||
UnicodeMap temp = new UnicodeMap();
|
||||
Counter counter = new Counter();
|
||||
for (int i = 0; i < 0x10FFFF; ++i) {
|
||||
String name = (String) umap.getValue(i);
|
||||
if (name == null)
|
||||
continue;
|
||||
if (name.startsWith("CJK UNIFIED IDEOGRAPH-"))
|
||||
name = "*";
|
||||
else if (name.startsWith("CJK COMPATIBILITY IDEOGRAPH-"))
|
||||
name = "#";
|
||||
else if (name.startsWith("HANGUL SYLLABLE ")) name = "@";
|
||||
bk.setText(name);
|
||||
int start = 0;
|
||||
while (true) {
|
||||
int end = bk.next();
|
||||
if (end == bk.DONE)
|
||||
break;
|
||||
String word = name.substring(start, end);
|
||||
counter.add(word, Math.max(0, word.length() - 2));
|
||||
start = end;
|
||||
}
|
||||
temp.put(i, name);
|
||||
}
|
||||
if (false) {
|
||||
Map m = counter.getSortedByCount();
|
||||
int count = 0;
|
||||
int running = 0;
|
||||
for (Iterator it = m.keySet().iterator(); it.hasNext();) {
|
||||
Counter.RWInteger c = (Counter.RWInteger) it.next();
|
||||
String value = (String) m.get(c);
|
||||
running += c.value;
|
||||
System.out.println(count++ + "\t" + c + "\t" + running
|
||||
+ "\t" + value);
|
||||
}
|
||||
for (UnicodeMap.MapIterator it2 = new UnicodeMap.MapIterator(
|
||||
temp); it2.nextRange();) {
|
||||
System.out.println(Utility.hex(it2.codepoint) + "\t"
|
||||
+ Utility.hex(it2.codepointEnd) + "\t"
|
||||
+ it2.value);
|
||||
}
|
||||
}
|
||||
umap = temp;
|
||||
return umap;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static void tryFileUnicodeProperty() {
|
||||
UnicodeProperty.Factory factory = FileUnicodeProperty.Factory.make("4.1.0");
|
||||
System.out.println(factory.getAvailableNames());
|
||||
UnicodeProperty prop = factory.getProperty("White_Space");
|
||||
System.out.println(prop.getUnicodeMap());
|
||||
prop = factory.getProperty("kRSUnicode");
|
||||
System.out.println();
|
||||
System.out.println(prop.getUnicodeMap());
|
||||
}
|
||||
|
||||
public static class FileUnicodeProperty extends UnicodeProperty {
|
||||
private File file;
|
||||
private String version;
|
||||
private UnicodeMap map;
|
||||
|
||||
private FileUnicodeProperty(File file, String version) {
|
||||
this.file = file;
|
||||
this.version = version;
|
||||
String base = file.getName();
|
||||
setName(base.substring(0, base.length()-4)); // subtract .bin
|
||||
}
|
||||
|
||||
public static class Factory extends UnicodeProperty.Factory {
|
||||
private Factory() {}
|
||||
public static Factory make(String version) {
|
||||
Factory result = new Factory();
|
||||
File f = new File(outdircore + version + "\\");
|
||||
File[] files = f.listFiles();
|
||||
for (int i = 0; i < files.length; ++i) {
|
||||
result.add(new FileUnicodeProperty(files[i], version));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
protected List _getAvailableValues(List result) {
|
||||
if (map == null) make();
|
||||
return (List) map.getAvailableValues(result);
|
||||
}
|
||||
|
||||
protected String _getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see com.ibm.icu.dev.test.util.UnicodeProperty#_getValue(int)
|
||||
*/
|
||||
protected String _getValue(int codepoint) {
|
||||
if (map == null) make();
|
||||
return (String)map.getValue(codepoint);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private void make() {
|
||||
try {
|
||||
InputStream in = new FileInputStream(file.getCanonicalPath());
|
||||
ObjectInputStream ois = new ObjectInputStream(in);
|
||||
map = (UnicodeMap) ois.readObject();
|
||||
ois.close();
|
||||
} catch (Exception e) {
|
||||
throw (InternalError)new InternalError("Can't create property").initCause(e);
|
||||
}
|
||||
}
|
||||
|
||||
protected List _getNameAliases(List result) {
|
||||
result.add(getName());
|
||||
return result;
|
||||
}
|
||||
|
||||
protected List _getValueAliases(String valueAlias, List result) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
|
@ -35,7 +35,7 @@ public class UnicodeDataFile {
|
|||
String[] batName2 = {""};
|
||||
mostRecent = UnicodeDataFile.generateBat(directory, filename, newSuffix, fileType, batName2);
|
||||
batName = batName2[0];
|
||||
filename = filename;
|
||||
this.filename = filename;
|
||||
|
||||
if (!isHTML) {
|
||||
out.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
|
|
Loading…
Add table
Reference in a new issue