ICU-4677 update tools

X-SVN-Rev: 18658
This commit is contained in:
Mark Davis 2005-10-11 19:39:16 +00:00
parent 1437101edb
commit 953d673966
11 changed files with 782 additions and 153 deletions

View file

@ -7,19 +7,33 @@
package com.ibm.icu.dev.demo.chart;
import java.io.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import com.ibm.icu.dev.test.util.*;
import com.ibm.icu.impl.UCharacterProperty;
import com.ibm.icu.lang.*;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.VersionInfo;
public class UnicodeChart {
static int surrogateType = UCharacter.getType('\ud800');
static int privateUseType = UCharacter.getType('\ue000');
public static void main(String[] args) throws IOException {
//int rowWidth = 256;
PrintWriter pw = BagFormatter.openUTF8Writer("", "UnicodeChart.html");
VersionInfo vi = UCharacter.getUnicodeVersion();
String version = vi.getMajor() + "." + vi.getMinor() + "." + vi.getMilli();
PrintWriter pw = BagFormatter.openUTF8Writer("C:\\DATA\\GEN\\", "UnicodeChart.html");
pw.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
pw.println("<script type='text/javascript' src='UnicodeChart.js'></script>");
pw.println("<link rel='stylesheet' type='text/css' href='UnicodeChart.css'>");
pw.println("<title>Unicode 4.0 Chart</title>");
pw.println("<title>Unicode " + version + " Chart</title>");
pw.println("</head><body bgcolor='#FFFFFF'>");
pw.println("<table border='1' cellspacing='0'><caption><h1>Unicode 4.0 Chart</h1></caption>");
pw.println("<table border='1' cellspacing='0'><caption><h1>Unicode " + version + " Chart</h1></caption>");
/*pw.println("<tr><th></th>");
for (int j = 0; j < rowWidth; ++j) {
@ -29,58 +43,118 @@ public class UnicodeChart {
*/
// TODO: fix Utility to take ints
int surrogateType = UCharacter.getType('\ud800');
int privateUseType = UCharacter.getType('\ue000');
System.out.println("Surrogate Type: Java=" + Character.SURROGATE + ", ICU=" + surrogateType);
System.out.println("Private-Use Type: Java=" + Character.PRIVATE_USE + ", ICU=" + privateUseType);
System.out.println("//Surrogate Type: Java=" + Character.SURROGATE + ", ICU=" + surrogateType);
System.out.println("//Private-Use Type: Java=" + Character.PRIVATE_USE + ", ICU=" + privateUseType);
//boolean gotOne = true;
int columns = 0;
int limit = 0x10FFFF;
int limit = 0x110000/16;
char lastType = 'x';
int lastCount = 0;
pw.println("<script>");
pw.print("top();");
int itemCount = 1;
for (int i = 0; i <= limit; ++i) {
if ((i & 0xFF) == 0) System.out.println(hex(i>>8,2) + "__");
columns++;
//pw.print("<tr><th>" + hex(i>>8,2) + "__</th>");
char type = 'v';
int cat = UCharacter.getType(i);
if (UCharacter.hasBinaryProperty(i, UProperty.NONCHARACTER_CODE_POINT)) {
type = 'n';
} else if (cat == Character.UNASSIGNED || cat == surrogateType || cat == privateUseType) {
type = 'u';
} else if (UCharacter.isUWhiteSpace(i)) {
type = 'w';
} else if (UCharacter.hasBinaryProperty(i, UProperty.DEFAULT_IGNORABLE_CODE_POINT)) {
type = 'i';
// an array that maps String (containing column information) to UnicodeSet (containing column numbers)
Map info_number = new TreeMap();
List number_info = new ArrayList();
StringBuffer sb = new StringBuffer();
int lastInfo = -1;
int sameCount = 0;
System.out.println("var charRanges = [");
for (int i = 0; i < limit; ++i) {
// get the string of info, and get its number
sb.setLength(0);
for (int j = 0; j < 16; ++j) {
int cp = i*16+j;
char type = getType(cp);
sb.append(type);
}
String info = sb.toString();
Integer s = (Integer) info_number.get(info);
if (s == null) {
info_number.put(info, s=new Integer(number_info.size()));
number_info.add(info);
}
// write a line whenever the value changes
if (lastInfo == s.intValue()) {
sameCount++;
} else {
type = 'v';
if (lastInfo != -1) System.out.println(sameCount + "," + lastInfo + ",");
sameCount = 1;
lastInfo = s.intValue();
}
if (type != lastType) {
if (lastCount != 0) pw.print(lastType + "(" + lastCount + ");");
lastType = type;
lastCount = 0;
++itemCount;
if ((itemCount & 0xF) == 0) pw.println();
}
++lastCount;
}
pw.println(lastType + "(" + lastCount + ");"); // finish last row
// write last line
System.out.println(sameCount + "," + lastInfo);
System.out.println("];");
// now write out array
int count = 0;
UnicodeMap um = new UnicodeMap();
System.out.println("var charInfo = [");
for (Iterator it = number_info.iterator(); it.hasNext();) {
String info = (String) it.next();
System.out.println("'" + info + "',");
}
System.out.println("];");
// write out blocks
Map blockMap = new TreeMap();
int startValue = -1;
int lastEnum = -1;
for (int i = 0; i <= 0x10FFFF; ++i) {
int enum = UCharacter.getIntPropertyValue(i,UProperty.BLOCK);
if (enum == lastEnum) continue;
if (lastEnum != -1) {
String s = UCharacter.getPropertyValueName(UProperty.BLOCK, lastEnum, UProperty.NameChoice.LONG);
blockMap.put(s, hex(startValue,0) + "/" + hex(i - startValue,0));
System.out.println(s + ": " + blockMap.get(s));
}
lastEnum = enum;
startValue = i;
}
String s = UCharacter.getPropertyValueName(UProperty.BLOCK, lastEnum, UProperty.NameChoice.LONG);
blockMap.put(s, hex(startValue,0) + "/" + hex(0x110000 - startValue,0));
blockMap.remove("No_Block");
for (Iterator it = blockMap.keySet().iterator(); it.hasNext();) {
String blockName = (String)it.next();
String val = (String) blockMap.get(blockName);
System.out.println("<option value='" + val + "'>" + blockName + "</option>");
}
// <option value="4DC0">Yijing Hexagram Symbols</option>
pw.println("</script></tr></table><p></p>");
pw.println("<table><caption>Key</caption>");
pw.println("<tr><td>X</td><td class='left'>Graphic characters</td></tr>");
pw.println("<tr><td>\u00A0</td><td class='left'>Whitespace</td></tr>");
pw.println("<tr><td class='i'>&nbsp;</td><td class='left'>Other Default Ignorable</td></tr>");
pw.println("<tr><td class='u'>&nbsp;</td><td class='left'>Undefined, Private Use, or Surrogates</td></tr>");
pw.println("<tr><td class='n'>&nbsp;</td><td class='left'>Noncharacter</td></tr>");
pw.println("<tr><td class='i'>\u00A0</td><td class='left'>Other Default Ignorable</td></tr>");
pw.println("<tr><td class='u'>\u00A0</td><td class='left'>Undefined, Private Use, or Surrogates</td></tr>");
pw.println("<tr><td class='n'>\u00A0</td><td class='left'>Noncharacter</td></tr>");
pw.println("</table>");
pw.println("<p>Copyright \u00A9 2003, Mark Davis. All Rights Reserved.</body></html>");
pw.close();
System.out.println("columns: " + columns);
System.out.println("//columns: " + columns);
}
private static char getType(int i) {
char type = 'v';
int cat = UCharacter.getType(i);
if (UCharacter.hasBinaryProperty(i, UProperty.NONCHARACTER_CODE_POINT)) {
type = 'n';
} else if (cat == Character.UNASSIGNED || cat == surrogateType || cat == privateUseType) {
type = 'u';
} else if (UCharacter.isUWhiteSpace(i)) {
type = 'w';
} else if (UCharacter.hasBinaryProperty(i, UProperty.DEFAULT_IGNORABLE_CODE_POINT)) {
type = 'i';
} else {
type = 'v';
}
return type;
}
static String hex(int i, int padTo) {
String result = Integer.toHexString(i).toUpperCase(java.util.Locale.ENGLISH);

View file

@ -206,29 +206,32 @@ public final class CollectionUtilities {
}
/**
* Returns an int with bits set.
* Bit 4: a - b != {}
* Bit 2: a * b != {} // * is intersects
* Bit 1: b - a != {}
* Thus the bits can be used to get the following relations, plus
* for A_SUPERSET_B, use (x & NOT_A_SUPERSET_B) == 0
* for A_SUBSET_B, use (x & NOT_A_SUBSET_B) == 0
* for A_EQUALS_B, use (x & A_PROPER_DISJOINT_B) == 0
* for A_DISJOINT_B, use (x & NOT_A_DISJOINT_B) == 0
* for A_OVERLAPS_B, use (x & NOT_A_DISJOINT_B) == 1
* Used for results of getContainmentRelation
*/
static final int
// ContainmentRelation
public static final int
ALL_EMPTY = 0,
NOT_A_SUPERSET_B = 1,
NOT_A_DISJOINT_B = 2,
NOT_A_SUBSET_B = 4,
NOT_A_EQUALS_B = NOT_A_SUBSET_B | NOT_A_SUPERSET_B,
A_PROPER_SUBSET_OF_B = NOT_A_DISJOINT_B | NOT_A_SUPERSET_B,
A_PROPER_DISJOINT_B = NOT_A_SUBSET_B | NOT_A_SUPERSET_B,
A_PROPER_SUPERSET_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B,
A_PROPER_OVERLAPS_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B | NOT_A_SUPERSET_B;
public static int getContainmentRelation(Collection a, Collection b) {
/**
* Assesses all the possible containment relations between collections A and B with one call.<br>
* Returns an int with bits set, according to a "Venn Diagram" view of A vs B.<br>
* NOT_A_SUPERSET_B: a - b != {}<br>
* NOT_A_DISJOINT_B: a * b != {} // * is intersects<br>
* NOT_A_SUBSET_B: b - a != {}<br>
* Thus the bits can be used to get the following relations:<br>
* for A_SUPERSET_B, use (x & CollectionUtilities.NOT_A_SUPERSET_B) == 0<br>
* for A_SUBSET_B, use (x & CollectionUtilities.NOT_A_SUBSET_B) == 0<br>
* for A_EQUALS_B, use (x & CollectionUtilities.NOT_A_EQUALS_B) == 0<br>
* for A_DISJOINT_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) == 0<br>
* for A_OVERLAPS_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) != 0<br>
*/
public static int getContainmentRelation(Collection a, Collection b) {
if (a.size() == 0) {
return (b.size() == 0) ? ALL_EMPTY : NOT_A_SUPERSET_B;
} else if (b.size() == 0) {

View file

@ -185,7 +185,7 @@ public class TestUtilities extends TestFmwk {
case CollectionUtilities.A_PROPER_SUBSET_OF_B:
checkContainment(b.containsAll(a) && !a.equals(b), a, relation, b);
break;
case CollectionUtilities.A_PROPER_DISJOINT_B:
case CollectionUtilities.NOT_A_EQUALS_B:
checkContainment(!CollectionUtilities.containsSome(a, b) && a.size() != 0 && b.size() != 0, a, relation, b);
break;
case CollectionUtilities.A_PROPER_SUPERSET_B:

View file

@ -318,8 +318,23 @@ Name: Unicode_1_Name
* @return the unicode map
*/
public UnicodeMap getUnicodeMap() {
return (UnicodeMap) getUnicodeMap_internal().clone();
return getUnicodeMap(false);
}
/**
* @return the unicode map
*/
public UnicodeMap getUnicodeMap(boolean getShortest) {
if (!getShortest) return (UnicodeMap) getUnicodeMap_internal().clone();
UnicodeMap result = new UnicodeMap();
for (int i = 0; i <= 0x10FFFF; ++i) {
//if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
String value = getValue(i,true);
result.put(i, value);
}
return result;
}
/**
* @return the unicode map

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
* $Date: 2005/03/26 05:40:04 $
* $Revision: 1.38 $
* $Date: 2005/10/11 19:39:15 $
* $Revision: 1.39 $
*
*******************************************************************************
*/
@ -24,7 +24,7 @@ import com.ibm.icu.text.UnicodeSet;
public class GenerateData implements UCD_Types {
static final boolean DEBUG = false;
/* static final boolean DEBUG = false;
static final String HORIZONTAL_LINE = "# ================================================";
@ -156,8 +156,7 @@ public class GenerateData implements UCD_Types {
System.out.println("New File: " + newFile);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
System.out.println("Most recent: " + mostRecent);
org.unicode.cldr.util.Utility.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
doHeader(fileName + UnicodeDataFile.getFileSuffix(false), output, headerChoice);
for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) {
@ -180,7 +179,7 @@ public class GenerateData implements UCD_Types {
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
}
/*
public static void listStrings(String file, int type, int subtype) throws IOException {
Default.ucd = UCD.make("3.1.0");
UCD ucd30 = UCD.make("3.0.0");
@ -199,14 +198,14 @@ public class GenerateData implements UCD_Types {
}
output.close();
}
*/
public static void generateCompExclusions() throws IOException {
String newFile = "DerivedData/CompositionExclusions" + UnicodeDataFile.getFileSuffix(true);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "CompositionExclusions", UnicodeDataFile.getFileSuffix(true), batName);
String mostRecent = org.unicode.cldr.util.Utility.generateBat("DerivedData/", "CompositionExclusions", UnicodeDataFile.getFileSuffix(true), batName);
output.println("# CompositionExclusions" + UnicodeDataFile.getFileSuffix(false));
output.println(UnicodeDataFile.generateDateLine());
@ -280,11 +279,11 @@ public class GenerateData implements UCD_Types {
}
public String optionalComment(int cp) { return ""; }
/*
public String valueName(int cp) {
return UTF32.length32(ucdData.getDecompositionMapping(cp)) + "";
}
*/
public byte status(int cp) {
if (getType(cp) == type) return INCLUDE;
return EXCLUDE;
@ -356,12 +355,12 @@ public class GenerateData implements UCD_Types {
if (extra != null) checkDuplicate(duplicates, accumulation, extra, "General_Category=" + value);
}
/*
addLine(sorted, "xx; T ; True");
checkDuplicate(duplicates, accumulation, "T", "xx=True");
addLine(sorted, "xx; F ; False");
checkDuplicate(duplicates, accumulation, "F", "xx=False");
*/
addLine(sorted, "qc", UCD_Names.YN_TABLE[1], UCD_Names.YN_TABLE_LONG[1], null);
checkDuplicate(duplicates, accumulation, UCD_Names.YN_TABLE[1], "qc=" + UCD_Names.YN_TABLE_LONG[1]);
addLine(sorted, "qc", UCD_Names.YN_TABLE[0], UCD_Names.YN_TABLE_LONG[0], null);
@ -440,7 +439,7 @@ public class GenerateData implements UCD_Types {
valueAbb = "n/a";
}
/*
String elide = "";
if (type == CATEGORY || type == SCRIPT || type == BINARY_PROPERTIES) elide = "\\p{"
+ valueAbb
@ -458,9 +457,9 @@ public class GenerateData implements UCD_Types {
+ value
+ "}";
System.out.println("<tr><td>" + elide + "</td><td>" + abb + "</td><td>" + norm + "</td></tr>");
*/
/*
if (type == BINARY_PROPERTIES || type == DERIVED) {
//if (value.equals(YN_TABLE_LONG[1])) continue;
addLine(sorted, PROP_TYPE_NAMES[BINARY][1], valueAbb, value);
@ -468,7 +467,7 @@ public class GenerateData implements UCD_Types {
if (!value.equalsIgnoreCase(valueAbb)) checkDuplicate(duplicates, accumulation, valueAbb, value);
continue;
}
*/
if (type == COMBINING_CLASS) {
String num = up.getValue(NUMBER);
@ -487,20 +486,20 @@ public class GenerateData implements UCD_Types {
while (blockIterator.hasNext()) {
addLine(sorted, "blk", "n/a", (String)blockIterator.next(), null);
}
/*
UCD.BlockData blockData = new UCD.BlockData();
int blockId = 0;
while (Default.ucd().getBlockData(blockId++, blockData)) {
addLine(sorted, "blk", "n/a", blockData.name);
}
*/
String filename = "PropertyAliases";
String newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
String mostRecent = org.unicode.cldr.util.Utility.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
@ -520,7 +519,7 @@ public class GenerateData implements UCD_Types {
filename = "PropertyValueAliases";
newFile = "DerivedData/" + filename + UnicodeDataFile.getFileSuffix(true);
log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
mostRecent = UnicodeDataFile.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
mostRecent = org.unicode.cldr.util.Utility.generateBat("DerivedData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
@ -536,7 +535,7 @@ public class GenerateData implements UCD_Types {
filename = "PropertyAliasSummary";
newFile = "OtherData/" + filename + UnicodeDataFile.getFileSuffix(true);
log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
mostRecent = UnicodeDataFile.generateBat("OtherData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
mostRecent = org.unicode.cldr.util.Utility.generateBat("OtherData/", filename, UnicodeDataFile.getFileSuffix(true), batName);
log.println();
log.println(HORIZONTAL_LINE);
@ -650,13 +649,13 @@ public class GenerateData implements UCD_Types {
}
// accumulate differences
/*
String acc = (String)accumulation.get(toCheck);
if (acc == null) {
acc = "# \"" + toCheck + "\":\t" + originalComment;
}
acc += ";\t" + result;
*/
result.add(comment);
accumulation.add("# " + result.toString() + ":\t" + toCheck);
} else {
@ -673,7 +672,7 @@ public class GenerateData implements UCD_Types {
String newFile = directory + file + UnicodeDataFile.getFileSuffix(true);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = UnicodeDataFile.generateBat(directory, file, UnicodeDataFile.getFileSuffix(true), batName);
String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, file, UnicodeDataFile.getFileSuffix(true), batName);
doHeader(file + UnicodeDataFile.getFileSuffix(false), output, headerChoice);
int last = -1;
@ -682,7 +681,7 @@ public class GenerateData implements UCD_Types {
if (up == null) continue;
if (up.skipInDerivedListing()) continue;
/*
if (i == DECOMPOSITION_TYPE || i == NUMERIC_TYPE
|| i == (BINARY_PROPERTIES | Non_break)
|| i == (BINARY_PROPERTIES | CaseFoldTurkishI)
@ -690,11 +689,11 @@ public class GenerateData implements UCD_Types {
|| i == (JOINING_TYPE | JT_U)
|| i == (JOINING_GROUP | NO_SHAPING)
) continue; // skip zero case
*/
/*if (skipSpecial == SKIP_SPECIAL
if (skipSpecial == SKIP_SPECIAL
&& i >= (BINARY_PROPERTIES | CompositionExclusion)
&& i < (AGE + NEXT_ENUM)) continue;
*/
if ((last & 0xFF00) != (i & 0xFF00) && (i <= BINARY_PROPERTIES || i >= SCRIPT)) {
output.println();
output.println(HORIZONTAL_LINE);
@ -741,7 +740,8 @@ public class GenerateData implements UCD_Types {
Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
System.out.println();
}
*/
static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException {
UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader(directory, fileName);
@ -750,13 +750,13 @@ public class GenerateData implements UCD_Types {
String newFile = directory + fileName + UnicodeDataFile.getFileSuffix(true);
//PrintWriter log = Utility.openPrintWriter(newFile, Utility.UTF8_UNIX);
//String[] batName = {""};
//String mostRecent = UnicodeDataFile.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
//String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, fileName, UnicodeDataFile.getFileSuffix(true), batName);
String[] example = new String[256];
//log.println("# " + fileName + UnicodeDataFile.getFileSuffix(false));
//log.println(UnicodeDataFile.generateDateLine());
/*log.println("#");
log.println("#");
log.println("# Normalization Test Suite");
log.println("# Format:");
log.println("#");
@ -790,7 +790,7 @@ public class GenerateData implements UCD_Types {
log.println("#");
log.println("@Part0 # Specific cases");
log.println("#");*/
log.println("#");
for (int j = 0; j < testSuiteCases.length; ++j) {
writeLine(testSuiteCases[j], log, false);
@ -897,6 +897,7 @@ public class GenerateData implements UCD_Types {
fc.close();
//Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
}
/*
static void handleIdentical() throws IOException {
DirectoryIterator target = new DirectoryIterator(GEN_DIR + File.separator + "DerivedData");
@ -916,6 +917,7 @@ public class GenerateData implements UCD_Types {
}
}
*/
static void writeLine(String cc, PrintWriter log, boolean check) {
String c = Default.nfc().normalize(cc);
String d = Default.nfd().normalize(cc);
@ -982,14 +984,14 @@ public class GenerateData implements UCD_Types {
"\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD"
};
/*
static final void backwardsCompat(String directory, String filename, int[] list) throws IOException {
String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
DiffPropertyLister dpl;
UnicodeSet cummulative = new UnicodeSet();
@ -1072,7 +1074,7 @@ public class GenerateData implements UCD_Types {
String newFile = directory + filename + UnicodeDataFile.getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
String mostRecent = org.unicode.cldr.util.Utility.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
try {
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
@ -1116,7 +1118,7 @@ public class GenerateData implements UCD_Types {
log.println(HORIZONTAL_LINE);
log.println();
new DiffPropertyLister("3.2.0", "4.0.0", log).print();
/*
printDiff("110", "200");
UnicodeSet u11 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-1.1.txt", false);
UnicodeSet u20 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.0.txt", false);
@ -1157,7 +1159,7 @@ public class GenerateData implements UCD_Types {
+ n.format(u31m.count()));
log.println();
u31m.print(log, false, false, "3.1");
*/
} finally {
if (log != null) {
log.close();
@ -1326,5 +1328,5 @@ public class GenerateData implements UCD_Types {
+ (start != end ? ".." + Default.ucd().getName(end) : ""));
}
System.out.println("TrailingZero count: " + result.size());
}
}*/
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $
* $Date: 2004/02/12 08:23:15 $
* $Revision: 1.5 $
* $Date: 2005/10/11 19:39:15 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -99,8 +99,8 @@ public final class GenerateStandardizedVariants implements UCD_Types {
String directory = "DerivedData/";
String filename = directory + "StandardizedVariants" + UnicodeDataFile.getHTMLFileSuffix(true);
PrintWriter out = Utility.openPrintWriter(filename, Utility.LATIN1_UNIX);
String[] batName = {""};
String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
//String[] batName = {""};
//String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
String version = Default.ucd().getVersion();
int lastDot = version.lastIndexOf('.');
@ -118,6 +118,6 @@ public final class GenerateStandardizedVariants implements UCD_Types {
Utility.appendFile("StandardizedVariants-Template.html", Utility.UTF8, out, replacementList);
out.close();
Utility.renameIdentical(mostRecent, Utility.getOutputName(filename), batName[0]);
//Utility.renameIdentical(mostRecent, Utility.getOutputName(filename), batName[0]);
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
* $Date: 2004/10/14 17:54:56 $
* $Revision: 1.35 $
* $Date: 2005/10/11 19:39:15 $
* $Revision: 1.36 $
*
*******************************************************************************
*/
@ -143,7 +143,7 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);
else if (arg.equalsIgnoreCase("checkcollator")) CheckCollator.main(null);
else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
//else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
else if (arg.equalsIgnoreCase("iana")) IANANames.testSensitivity();
else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
@ -157,7 +157,7 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("VerifyIDN")) VerifyUCD.VerifyIDN();
else if (arg.equalsIgnoreCase("NFTest")) VerifyUCD.NFTest();
else if (arg.equalsIgnoreCase("test1")) VerifyUCD.test1();
else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
//else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
else if (arg.equalsIgnoreCase("GenerateThaiBreaks")) GenerateThaiBreaks.main(null);
else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]});
@ -168,9 +168,9 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
//else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.checkNameList();
else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
//else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
else if (arg.equalsIgnoreCase("Compare14652")) Compare14652.main(null);
@ -182,7 +182,7 @@ public final class Main implements UCD_Types {
GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
*/
// EXTRACTED PROPERTIES
/*
else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedBidiClass");
@ -230,8 +230,9 @@ public final class Main implements UCD_Types {
} else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
GenerateData.generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, GenerateData.HEADER_DERIVED,
"DerivedData/extracted/", "DerivedNumericValues" );
} else if (arg.equalsIgnoreCase("StandardizedVariants")) {
}
*/
else if (arg.equalsIgnoreCase("StandardizedVariants")) {
GenerateStandardizedVariants.generate();
// OTHER STANDARD PROPERTIES
@ -244,7 +245,7 @@ public final class Main implements UCD_Types {
GenerateCaseFolding.generateSpecialCasing(true);
GenerateCaseFolding.generateSpecialCasing(false);
} else if (arg.equalsIgnoreCase("CompositionExclusions")) {
/* } else if (arg.equalsIgnoreCase("CompositionExclusions")) {
GenerateData.generateCompExclusions();
} else if (arg.equalsIgnoreCase("DerivedAge")) {
@ -305,7 +306,7 @@ public final class Main implements UCD_Types {
} else if (arg.equalsIgnoreCase("listKatakana")) {
GenerateData.listKatakana();
*/
/*
} else if (arg.equalsIgnoreCase("DerivedFullNormalization")) {
mask = Utility.setBits(0, DerivedProperty.GenNFD, DerivedProperty.GenNFKC);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
* $Date: 2005/06/24 23:51:52 $
* $Revision: 1.6 $
* $Date: 2005/10/11 19:39:15 $
* $Revision: 1.7 $
*
*******************************************************************************
*/
@ -17,6 +17,11 @@ import java.util.*;
import java.io.*;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.dev.test.util.UnicodePropertySource;
import com.ibm.icu.dev.test.util.UnicodeMap.MapIterator;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
@ -24,7 +29,77 @@ import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.text.utility.*;
public class QuickTest implements UCD_Types {
public static void main(String[] args) throws IOException {
getBidiMirrored();
if (true) return;
getLengths("NFC", Default.nfc());
getLengths("NFD", Default.nfd());
getLengths("NFKC", Default.nfkc());
getLengths("NFKD", Default.nfkd());
System.out.println("Done");
}
private static void getBidiMirrored() {
ToolUnicodePropertySource foo = ToolUnicodePropertySource.make("");
UnicodeMap status = new UnicodeMap();
status.putAll(foo.getSet("generalcategory=ps"), "*open/close*");
status.putAll(foo.getSet("generalcategory=pe"), "*open/close*");
status.putAll(foo.getSet("generalcategory=pi"), "*open/close*");
status.putAll(foo.getSet("generalcategory=pf"), "*open/close*");
UnicodeSet bidiMirroredSet = foo.getSet("bidimirrored=true");
status.putAll(bidiMirroredSet, "*core*");
UnicodeSet bidiMirroringSet = new UnicodeSet();
UnicodeProperty x = foo.getProperty("bidimirroringglyph");
for (int i = 0; i < 0x10FFFF; ++i) {
String s = x.getValue(i);
if (!s.equals(UTF16.valueOf(i))) bidiMirroringSet.add(i);
}
status.putAll(new UnicodeSet(bidiMirroredSet).removeAll(bidiMirroringSet), "no bidi mirroring");
UnicodeSet mathSet = foo.getSet("generalcategory=sm");
status.putAll(mathSet, "math");
UnicodeSet special = new UnicodeSet("[<>]");
for (UnicodeSetIterator it = new UnicodeSetIterator(mathSet); it.next();) {
String s = Default.nfkd().normalize(it.codepoint);
if (special.containsSome(s)) status.put(it.codepoint, "*special*");
}
//showStatus(status);
// close under nfd
for (int i = 0; i < 0x10FFFF; ++i) {
if (!Default.ucd().isAssigned(i)) continue;
if (!Default.ucd().isPUA(i)) continue;
if (Default.nfkc().isNormalized(i)) continue;
String oldValue = (String) status.getValue(i);
if (oldValue != null) continue;
String s = Default.nfkc().normalize(i);
if (UTF16.countCodePoint(s) != 1) continue;
int cp = UTF16.charAt(s, 0);
String value = (String)status.getValue(cp);
if (value != null) status.put(i, "nfc-closure-" + value);
}
showStatus(status, bidiMirroredSet);
}
static BagFormatter bf = new BagFormatter();
private static void showStatus(UnicodeMap status, UnicodeSet x) {
Collection list = new TreeSet(status.getAvailableValues());
for (Iterator it = list.iterator(); it.hasNext(); ) {
String value = (String) it.next();
if (value == null) continue;
UnicodeSet set = status.getSet(value);
for (UnicodeSetIterator umi = new UnicodeSetIterator(set); umi.next();) {
System.out.println(Utility.hex(umi.codepoint)
+ ";\t" + value
+ ";\t" + (x.contains(umi.codepoint) ? "O" : "")
+ ";\t" + Default.ucd().getName(umi.codepoint));
}
}
}
public static class Length {
String title;
int bytesPerCodeUnit;
@ -50,14 +125,6 @@ public class QuickTest implements UCD_Types {
}
}
public static void main(String[] args) throws IOException {
getLengths("NFC", Default.nfc());
getLengths("NFD", Default.nfd());
getLengths("NFKC", Default.nfkc());
getLengths("NFKD", Default.nfkd());
System.out.println("Done");
}
static final int skip = (1<<UCD.UNASSIGNED) | (1<<UCD.PRIVATE_USE) | (1<<UCD.SURROGATE);
/**
*

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Counter.java,v $
* $Date: 2001/08/31 00:19:16 $
* $Revision: 1.2 $
* $Date: 2005/10/11 19:39:15 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
@ -44,13 +44,31 @@ public final class Counter {
}
}
public void add(String obj) {
public void add(Object obj, int countValue) {
RWInteger count = (RWInteger)map.get(obj);
if (count == null) {
count = new RWInteger();
map.put(obj, count);
if (count == null) map.put(obj, count = new RWInteger());
count.value += countValue;
}
public int getCount(Object obj) {
RWInteger count = (RWInteger) map.get(obj);
return count == null ? 0 : count.value;
}
public void clear() {
map.clear();
}
public int getTotal() {
int count = 0;
for (Iterator it = map.keySet().iterator(); it.hasNext();) {
count += ((RWInteger) map.get(it.next())).value;
}
count.value += obj.length();
return count;
}
public int getItemCount() {
return map.size();
}
public Map getSortedByCount() {

View file

@ -1,42 +1,491 @@
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/TestUtility.java,v $
* $Date: 2005/03/04 02:50:26 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/TestUtility.java,v $
* $Date: 2005/10/11 19:39:15 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
package com.ibm.text.utility;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
import java.text.*;
import java.io.*;
import java.nio.Buffer;
import com.ibm.icu.dev.test.util.DataInputCompressor;
import com.ibm.icu.dev.test.util.DataOutputCompressor;
import com.ibm.icu.dev.test.util.ICUPropertyFactory;
import com.ibm.icu.dev.test.util.UnicodeLabel;
import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.dev.test.util.UnicodePropertySource;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.text.UCD.Default;
public class TestUtility {
/*
static public class MyEnum extends EnumBase {
public static MyEnum
ZEROED = (MyEnum) makeNext(myEnum.getClass()),
SHIFTED = (MyEnum) makeNext(),
NON_IGNORABLE = (MyEnum) makeNext(),
FIRST_ENUM = ZEROED,
LAST_ENUM = NON_IGNORABLE;
public MyEnum next(int value) {
return (MyEnum) internalNext(value);
}
protected MyEnum() {}
}
/*
static public class MyEnum extends EnumBase {
public static MyEnum
ZEROED = (MyEnum) makeNext(myEnum.getClass()),
SHIFTED = (MyEnum) makeNext(),
NON_IGNORABLE = (MyEnum) makeNext(),
FIRST_ENUM = ZEROED,
LAST_ENUM = NON_IGNORABLE;
public MyEnum next(int value) {
return (MyEnum) internalNext(value);
}
protected MyEnum() {}
}
*/
static final boolean USE_FILE = true;
static final boolean DEBUG = false;
static public void main (String[] args) {
for (MyEnum i = MyEnum.FIRST_ENUM; i != null; i = i.next()) {
System.out.println(i.getValue());
static public void main(String[] args) throws Exception {
tryFileUnicodeProperty();
check();
int iterations = 1;
//testStreamCompressor();
UnicodeMap umap = new UnicodeMap();
umap.put(0,"abcdefg");
if (false) for (int i = 0; i < 256; ++i) {
umap.put(i, String.valueOf(i&0xF0));
}
int total = testUnicodeMapSerialization(1, iterations, "dummy", umap);
//if (true) return;
//UnicodeLabel ul;
ICUPropertyFactory p = ICUPropertyFactory.make();
total = 0;
BreakIterator bk = BreakIterator.getWordInstance(Locale.ENGLISH);
Matcher nameMatch = Pattern.compile("Name").matcher("");
UnicodeProperty gc = p.getProperty("General_Category");
UnicodeSet checkSet = gc.getSet("Cn").addAll(gc.getSet("Co")).addAll(gc.getSet("Cs")).complement();
UnicodeSetIterator checkSetIterator = new UnicodeSetIterator(checkSet);
UnicodeProperty hangulSyllableType = p.getProperty("Hangul_Syllable_Type");
UnicodeSet hangulSyllable = hangulSyllableType.getSet("LVT_Syllable").addAll(hangulSyllableType.getSet("LV_Syllable"));
for (Iterator pnames = p.getAvailableNames().iterator(); pnames
.hasNext();) {
String pname = (String) pnames.next();
if (!nameMatch.reset(pname).matches()) continue;
System.out.println();
UnicodeProperty up = p.getProperty(pname);
int ptype = up.getType();
System.out.print("Name:\t" + pname + "\tType:\t" + up.getTypeName(ptype));
if (up.isType(up.STRING_MASK)) {
boolean excludeHangul = pname.startsWith("isNF");
umap = new UnicodeMap();
checkSetIterator.reset();
while (checkSetIterator.next()) {
int i = checkSetIterator.codepoint;
if (excludeHangul && hangulSyllable.contains(i)) continue;
String value = up.getValue(i);
if (equals(i, value)) continue;
umap.put(i, value);
//System.out.println("Adding " + Utility.hex(i) + ", " + Utility.hex(value));
}
} else {
UnicodeProperty sampleProp = p.getProperty(pname);
umap = sampleProp.getUnicodeMap();
if (pname.equals("Name")) {
umap = fixNameMap(bk, umap);
}
}
total = testUnicodeMapSerialization(iterations, total, pname, umap);
}
String[] hanProps = {"kIICore", "kRSUnicode"};
for (int i = 0; i < hanProps.length; ++i) {
String pname = hanProps[i];
if (!nameMatch.reset(pname).matches()) continue;
testHanProp(iterations, total, pname, "Han");
}
System.out.println();
System.out.println("Done");
}
static void check() throws IOException, ClassNotFoundException {
UnicodeMap m = new UnicodeMap();
m.put(1,"abc");
ByteArrayOutputStream out = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(out);
oos.writeBoolean(true);
oos.writeUTF("abcdefg");
oos.writeObject(m);
oos.close();
int size = out.size();
byte[] buffer = out.toByteArray();
System.out.println(showBuffer(buffer, size));
InputStream in = new ByteArrayInputStream(buffer, 0, (int)size);
ObjectInputStream ois = new ObjectInputStream(in);
System.out.println(ois.readBoolean());
System.out.println(ois.readUTF());
System.out.println(ois.readObject());
ois.close();
}
/**
*
*/
private static boolean equals(int i, String value) {
int len = value.length();
if (len < 0 || len > 2) return false;
if (len == 1) return i == value.charAt(0);
if (i <= 0xFFFF) return false;
return i == UTF16.charAt(value,0);
}
/**
*
*/
private static void testHanProp(int iterations, int total, String pname, String type) throws IOException, ClassNotFoundException {
System.out.println();
UnicodeMap umap = Default.ucd().getHanValue(pname);
System.out.println(umap);
umap.setMissing("na");
System.out.print("Name:\t" + pname + "\tType:\t" + type);
total = testUnicodeMapSerialization(iterations, total, pname, umap);
}
static String outdircore = "C:\\DATA\\bin\\UCD_Data";
static String outdir = outdircore + "4.1.0\\";
/**
* @param pname
*
*/
private static int testUnicodeMapSerialization(int iterations, int total, String pname, UnicodeMap umap) throws IOException, ClassNotFoundException {
System.out.print("\tValue Count:\t" + umap.getAvailableValues().size());
String filename = outdir + pname + ".bin";
OutputStream out;
ByteArrayOutputStream baout = null;
if (USE_FILE) {
out = new FileOutputStream(filename);
} else {
out = baout = new ByteArrayOutputStream();
}
out = new GZIPOutputStream(out);
ObjectOutputStream oos = new ObjectOutputStream(out);
//Random rand = new Random();
/* if (false) {
oos.writeObject(umap);
oos.close();
buffer = baout.toByteArray();
in = new ByteArrayInputStream(buffer, 0, baout.size());
ois = new ObjectInputStream(in);
reverseMap = (UnicodeMap) ois.readObject();
}
*/
// UnicodeMap.StreamCompressor sc = new UnicodeMap.StreamCompressor();
// int test = (int)Math.abs(rand.nextGaussian()*100000);
// System.out.print(Integer.toString(test, 16).toUpperCase());
// sc.writeInt(out, test);
// out.close();
//oos.writeBoolean(true);
//oos.writeUTF("abcdefg");
oos.writeObject(umap);
oos.close();
long size;
byte[] buffer;
if (USE_FILE) {
size = new File(filename).length();
} else {
size = baout.size();
buffer = baout.toByteArray();
if (DEBUG) System.out.println(showBuffer(buffer, size));
}
System.out.print("\t"+"Size:\t" + size);
// only measure read time
UnicodeMap reverseMap = null;
long start = System.currentTimeMillis();
for (int i = iterations; i > 0; --i) {
InputStream in;
if (USE_FILE) {
in = new FileInputStream(filename);
} else {
in = new ByteArrayInputStream(buffer, 0, (int)size);
}
in = new GZIPInputStream(in);
// int x = sc.readInt(in);
// if (x != test) System.out.println("Failure");
// System.out.println("\t=> " + Integer.toString(x, 16).toUpperCase());
ObjectInputStream ois = new ObjectInputStream(in);
//System.out.println(ois.readBoolean());
//System.out.println(ois.readUTF());
try {
reverseMap = (UnicodeMap) ois.readObject();
} catch (java.io.OptionalDataException e1) {
System.out.println(e1.eof + "\t" + e1.length);
// TODO Auto-generated catch block
e1.printStackTrace();
}
ois.close();
}
long end = System.currentTimeMillis();
if (!reverseMap.equals(umap)) {
System.out.println("Failed roundtrip");
for (int i = 0; i <= 0x10FFFF; ++i) {
String main = (String) umap.getValue(i);
String rev = (String) reverseMap.getValue(i);
if (UnicodeMap.areEqual(main, rev))
continue;
System.out.println(Utility.hex(i) + "\t'" + main + "',\t'"
+ rev + "'");
}
}
//out.toByteArray();
total += size;
System.out.print("\tTime:\t" + (end - start) / (iterations * 1.0)
+ "\tmsecs (raw:\t" + ((end - start) / 1000.0) + "\tsecs)");
/* with Vanilla Serialization
* Size: 24131
* Time: 1.9488 msecs (raw: 9.744 secs)
* With my serialization
* Size: 19353
* Time: 0.8652 msecs (raw: 4.326 secs)
* With my serialization, and compression of ints
* Size: 8602
* Time: 2.784 msecs (raw: 1.392 secs)
* With delta encoding
* Size: 5226
* Time: 1.924 msecs (raw: 0.962 secs)
* Name:
* Size: 776926
* Time: 180.3 msecs (raw: 1.803 secs)
*/
return total;
}
/**
*
*/
private static String showBuffer(byte[] buffer, long size) {
StringBuffer result = new StringBuffer();
for (int j = 0; j < size; ++j) {
if (j != 0) result.append(' ');
result.append(Utility.hex(buffer[j]&0xFF,2));
}
return result.toString();
}
/**
*
*/
private static void testStreamCompressor() throws IOException {
Object[] tests = {
UTF16.valueOf(0x10FFFF),"\u1234", "abc",
new Long(-3), new Long(12345),
new Short(Short.MAX_VALUE), new Short(Short.MIN_VALUE),
new Integer(Integer.MAX_VALUE), new Integer(Integer.MIN_VALUE),
new Long(Long.MIN_VALUE), new Long(Long.MAX_VALUE)};
for (int i = 0; i < tests.length; ++i) {
Object source = tests[i];
ByteArrayOutputStream out = new ByteArrayOutputStream(100);
ObjectOutputStream out2 = new ObjectOutputStream(out);
ByteArrayInputStream in;
ObjectInputStream ois;
byte[] buffer;
DataOutputCompressor sc = new DataOutputCompressor(out2);
long y = 0;
if (source instanceof String) {
sc.writeUTF((String)source);
} else {
y = ((Number)source).longValue();
sc.writeLong(y);
}
out2.close();
buffer = out.toByteArray();
showBytes(buffer, out.size());
System.out.println();
in = new ByteArrayInputStream(buffer, 0, out.size());
ObjectInputStream in2 = new ObjectInputStream(in);
DataInputCompressor isc = new DataInputCompressor(in2);
boolean success = false;
Object result;
boolean isString = source instanceof String;
long x = 0;
if (isString) {
result = isc.readUTF();
System.out.println(i + "\t" + source
+ "\t" + result
+ (source.equals(result) ? "\tSuccess" : "\tBitter Failure"));
} else {
x = isc.readLong();
result = new Long(x);
System.out.println(i + "\t" + y
+ x
+ "\t" + Utility.hex(y)
+ "\t" + Utility.hex(x)
+ (x == y ? "\tSuccess" : "\tBitter Failure"));
}
in2.close();
}
}
/**
*
*/
private static void showBytes(byte[] buffer, int len) {
for (int i = 0; i < len; ++i) {
System.out.print(Utility.hex(buffer[i]&0xFF,2) + " ");
}
}
/**
*
*/
private static UnicodeMap fixNameMap(BreakIterator bk, UnicodeMap umap) {
UnicodeMap temp = new UnicodeMap();
Counter counter = new Counter();
for (int i = 0; i < 0x10FFFF; ++i) {
String name = (String) umap.getValue(i);
if (name == null)
continue;
if (name.startsWith("CJK UNIFIED IDEOGRAPH-"))
name = "*";
else if (name.startsWith("CJK COMPATIBILITY IDEOGRAPH-"))
name = "#";
else if (name.startsWith("HANGUL SYLLABLE ")) name = "@";
bk.setText(name);
int start = 0;
while (true) {
int end = bk.next();
if (end == bk.DONE)
break;
String word = name.substring(start, end);
counter.add(word, Math.max(0, word.length() - 2));
start = end;
}
temp.put(i, name);
}
if (false) {
Map m = counter.getSortedByCount();
int count = 0;
int running = 0;
for (Iterator it = m.keySet().iterator(); it.hasNext();) {
Counter.RWInteger c = (Counter.RWInteger) it.next();
String value = (String) m.get(c);
running += c.value;
System.out.println(count++ + "\t" + c + "\t" + running
+ "\t" + value);
}
for (UnicodeMap.MapIterator it2 = new UnicodeMap.MapIterator(
temp); it2.nextRange();) {
System.out.println(Utility.hex(it2.codepoint) + "\t"
+ Utility.hex(it2.codepointEnd) + "\t"
+ it2.value);
}
}
umap = temp;
return umap;
}
/**
*
*/
private static void tryFileUnicodeProperty() {
UnicodeProperty.Factory factory = FileUnicodeProperty.Factory.make("4.1.0");
System.out.println(factory.getAvailableNames());
UnicodeProperty prop = factory.getProperty("White_Space");
System.out.println(prop.getUnicodeMap());
prop = factory.getProperty("kRSUnicode");
System.out.println();
System.out.println(prop.getUnicodeMap());
}
public static class FileUnicodeProperty extends UnicodeProperty {
private File file;
private String version;
private UnicodeMap map;
private FileUnicodeProperty(File file, String version) {
this.file = file;
this.version = version;
String base = file.getName();
setName(base.substring(0, base.length()-4)); // subtract .bin
}
public static class Factory extends UnicodeProperty.Factory {
private Factory() {}
public static Factory make(String version) {
Factory result = new Factory();
File f = new File(outdircore + version + "\\");
File[] files = f.listFiles();
for (int i = 0; i < files.length; ++i) {
result.add(new FileUnicodeProperty(files[i], version));
}
return result;
}
}
protected List _getAvailableValues(List result) {
if (map == null) make();
return (List) map.getAvailableValues(result);
}
protected String _getVersion() {
return version;
}
/* (non-Javadoc)
* @see com.ibm.icu.dev.test.util.UnicodeProperty#_getValue(int)
*/
protected String _getValue(int codepoint) {
if (map == null) make();
return (String)map.getValue(codepoint);
}
/**
*
*/
private void make() {
try {
InputStream in = new FileInputStream(file.getCanonicalPath());
ObjectInputStream ois = new ObjectInputStream(in);
map = (UnicodeMap) ois.readObject();
ois.close();
} catch (Exception e) {
throw (InternalError)new InternalError("Can't create property").initCause(e);
}
}
protected List _getNameAliases(List result) {
result.add(getName());
return result;
}
protected List _getValueAliases(String valueAlias, List result) {
return result;
}
}
*/
}

View file

@ -35,7 +35,7 @@ public class UnicodeDataFile {
String[] batName2 = {""};
mostRecent = UnicodeDataFile.generateBat(directory, filename, newSuffix, fileType, batName2);
batName = batName2[0];
filename = filename;
this.filename = filename;
if (!isHTML) {
out.println("# " + filename + UnicodeDataFile.getFileSuffix(false));