ICU-4700 Misc tools

X-SVN-Rev: 18741
This commit is contained in:
Mark Davis 2005-11-01 00:10:54 +00:00
parent ddcee69efa
commit b120a3251b
14 changed files with 853 additions and 196 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
* $Date: 2004/11/12 23:17:15 $
* $Revision: 1.16 $
* $Date: 2005/11/01 00:10:53 $
* $Revision: 1.17 $
*
*******************************************************************************
*/
@ -396,142 +396,145 @@ public final class ConvertUCD implements UCD_Types {
try {
String[] parts = new String[20];
for (int lineNumber = 1; ; ++lineNumber) {
line = input.readLine();
if (line == null) break;
if (SHOW && (lineNumber % 500) == 0) System.out.println("//" + lineNumber + ": '" + line + "'");
try {
line = input.readLine();
if (line == null) break;
if (SHOW && (lineNumber % 500) == 0) System.out.println("//" + lineNumber + ": '" + line + "'");
String original = line;
String comment = "";
int commentPos = line.indexOf('#');
if (commentPos >= 0) {
comment = line.substring(commentPos+1).trim();
line = line.substring(0, commentPos);
}
line = line.trim();
if (line.length() == 0) continue;
String original = line;
String comment = "";
int commentPos = line.indexOf('#');
if (commentPos >= 0) {
comment = line.substring(commentPos+1).trim();
line = line.substring(0, commentPos);
}
line = line.trim();
if (line.length() == 0) continue;
int count = Utility.split(line,';',parts);
int count = Utility.split(line,';',parts);
if (false && parts[0].equals("2801")) {
System.out.println("debug?");
}
if (false && parts[0].equals("2801")) {
System.out.println("debug?");
}
// fix malformed or simple lists.
// fix malformed or simple lists.
if (count != labels.length) {
if (count == labels.length + 1 && parts[count-1].equals("")) {
if (!showedSemi) System.out.println("Extra semicolon in: " + original);
showedSemi = true;
} else if (count == 1) { // fix simple list
++count;
parts[1] = "Y";
} else if (count < labels.length) {
if (!showedShort) System.out.println("Line shorter than labels: " + original);
showedShort = true;
for (int i = count; i < labels.length; ++i) {
parts[i] = "";
}
} else {
throw new ChainException("wrong count: {0}",
new Object[] {new Integer(line), new Integer(count)});
}
}
if (count != labels.length) {
if (count == labels.length + 1 && parts[count-1].equals("")) {
if (!showedSemi) System.out.println("Extra semicolon in: " + original);
showedSemi = true;
} else if (count == 1) { // fix simple list
++count;
parts[1] = "Y";
} else if (count < labels.length) {
if (!showedShort) System.out.println("Line shorter than labels: " + original);
showedShort = true;
for (int i = count; i < labels.length; ++i) {
parts[i] = "";
}
} else {
throw new ChainException("wrong count: {0}",
new Object[] {new Integer(line), new Integer(count)});
}
}
// store char
// first field is always character OR range. May be UTF-32
int cpTop;
int cpStart;
int ddot = parts[0].indexOf(".");
if (ddot >= 0) {
cpStart = UTF32.char32At(Utility.fromHex(parts[0].substring(0,ddot)),0);
cpTop = UTF32.char32At(Utility.fromHex(parts[0].substring(ddot+2)),0);
// System.out.println(Utility.hex(cpStart) + " ... " + Utility.hex(cpTop));
} else {
cpStart = UTF32.char32At(Utility.fromHex(parts[0]),0);
cpTop = cpStart;
if (labels[1].equals("RANGE")) UTF32.char32At(Utility.fromHex(parts[1]),0);
}
// store char
// first field is always character OR range. May be UTF-32
int cpTop;
int cpStart;
int ddot = parts[0].indexOf(".");
if (ddot >= 0) {
cpStart = UTF32.char32At(Utility.fromHex(parts[0].substring(0,ddot)),0);
cpTop = UTF32.char32At(Utility.fromHex(parts[0].substring(ddot+2)),0);
// System.out.println(Utility.hex(cpStart) + " ... " + Utility.hex(cpTop));
} else {
cpStart = UTF32.char32At(Utility.fromHex(parts[0]),0);
cpTop = cpStart;
if (labels[1].equals("RANGE")) UTF32.char32At(Utility.fromHex(parts[1]),0);
}
// properties first
if (labels[1].equals("PROP")) {
String prop = parts[2].trim();
// FIX!!
boolean skipLetters = false;
if (prop.equals("Alphabetic")) {
prop = "Other_Alphabetic";
skipLetters = true;
}
// END FIX!!
properties.add(prop);
if (Utility.find(prop, UCD_Names.DeletedProperties, true) == -1) { // only undeleted
int end = UTF32.char32At(Utility.fromHex(parts[1]),0);
if (end == 0) end = cpStart;
for (int j = cpStart; j <= end; ++j) {
if (j != UCD.mapToRepresentative(j, Integer.MAX_VALUE)) continue;
if (skipLetters && getEntry(cpStart).isLetter()) continue;
appendCharProperties(j, prop);
}
}
} else { // not range!
String val = "";
String lastVal;
// properties first
if (labels[1].equals("PROP")) {
String prop = parts[2].trim();
// FIX!!
boolean skipLetters = false;
if (prop.equals("Alphabetic")) {
prop = "Other_Alphabetic";
skipLetters = true;
}
// END FIX!!
properties.add(prop);
if (Utility.find(prop, UCD_Names.DeletedProperties, true) == -1) { // only undeleted
int end = UTF32.char32At(Utility.fromHex(parts[1]),0);
if (end == 0) end = cpStart;
for (int i = 1; i < labels.length; ++i) {
String key = labels[i];
lastVal = val;
if (isHex.get(key) != null) {
val = Utility.fromHex(parts[i]);
} else {
val = parts[i].trim();
}
if (key.equals("OMIT")) continue; // do after val, so lastVal is correct
if (key.equals("RANGE")) continue; // do after val, so lastVal is correct
if (val.equals("")) continue; // skip empty values, they mean default
for (int j = cpStart; j <= end; ++j) {
if (j != UCD.mapToRepresentative(j, Integer.MAX_VALUE)) continue;
if (skipLetters && getEntry(cpStart).isLetter()) continue;
appendCharProperties(j, prop);
}
}
} else { // not range!
String val = "";
String lastVal;
for (int cps = cpStart; cps <= cpTop; ++cps) {
if (UCD.mapToRepresentative(cps, Integer.MAX_VALUE) != cps) continue; // skip condensed ranges
for (int i = 1; i < labels.length; ++i) {
String key = labels[i];
lastVal = val;
if (isHex.get(key) != null) {
val = Utility.fromHex(parts[i]);
} else {
val = parts[i].trim();
}
if (key.equals("OMIT")) continue; // do after val, so lastVal is correct
if (key.equals("RANGE")) continue; // do after val, so lastVal is correct
if (val.equals("")) continue; // skip empty values, they mean default
for (int cps = cpStart; cps <= cpTop; ++cps) {
if (UCD.mapToRepresentative(cps, Integer.MAX_VALUE) != cps) continue; // skip condensed ranges
if (key.equals("binary")) {
appendCharProperties(cps, val);
} else if (key.equals("fc")) {
UData data = getEntry(cps);
String type = parts[i-1].trim();
if (type.equals("F") || type.equals("C") || type.equals("E") || type.equals("L")) {
data.fullCaseFolding = val;
//System.out.println("*<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
}
if (type.equals("S") || type.equals("C") || type.equals("L")) {
data.simpleCaseFolding = val;
//System.out.println("<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
}
if (type.equals("I")) {
data.simpleCaseFolding = val;
setBinaryProperty(cps, CaseFoldTurkishI);
if (DEBUG) System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting "
+ Utility.hex(cps) + ": " + Utility.hex(val));
}
} else if (labels[0].equals("SpecialCasing") // special handling for special casing
&& labels[4].equals("sc")
&& parts[4].trim().length() > 0) {
if (i < 4) {
if (DEBUG) System.out.println("Got special: " + Utility.hex(cps) + ", "
+ Utility.hex(key) + ":" + Utility.hex(val));
addCharData(cps, "sc", parts[4].trim() + ":" + key + ":" + val);
}
} else {
/*if (key.equals("sn")) { // SKIP UNDEFINED!!
UData data = getEntryIfExists(cps);
if (data == null || data.generalCategory == Cn) continue;
}
*/
addCharData(cps, key, val);
}
}
}
}
if (key.equals("binary")) {
appendCharProperties(cps, val);
} else if (key.equals("fc")) {
UData data = getEntry(cps);
String type = parts[i-1].trim();
if (type.equals("F") || type.equals("C") || type.equals("E") || type.equals("L")) {
data.fullCaseFolding = val;
//System.out.println("*<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
}
if (type.equals("S") || type.equals("C") || type.equals("L")) {
data.simpleCaseFolding = val;
//System.out.println("<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
}
if (type.equals("I")) {
data.simpleCaseFolding = val;
setBinaryProperty(cps, CaseFoldTurkishI);
if (DEBUG) System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting "
+ Utility.hex(cps) + ": " + Utility.hex(val));
}
} else if (labels[0].equals("SpecialCasing") // special handling for special casing
&& labels[4].equals("sc")
&& parts[4].trim().length() > 0) {
if (i < 4) {
if (DEBUG) System.out.println("Got special: " + Utility.hex(cps) + ", "
+ Utility.hex(key) + ":" + Utility.hex(val));
addCharData(cps, "sc", parts[4].trim() + ":" + key + ":" + val);
}
} else {
/*if (key.equals("sn")) { // SKIP UNDEFINED!!
UData data = getEntryIfExists(cps);
if (data == null || data.generalCategory == Cn) continue;
}
*/
addCharData(cps, key, val);
}
}
}
}
} catch (Exception e) {
System.err.println("*Exception at: " + line + ", " + e.getMessage());
//System.err.println(e.getMessage());
}
}
} catch (Exception e) {
System.out.println("Exception at: " + line + ", " + e.getMessage());

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateConfusables.java,v $
* $Date: 2005/07/19 17:21:00 $
* $Revision: 1.7 $
* $Date: 2005/11/01 00:10:53 $
* $Revision: 1.8 $
*
*******************************************************************************
*/
@ -290,7 +290,7 @@ public class GenerateConfusables {
lowerIsBetter.putAll(remainingOutputSet, MARK_ASCII);
lowerIsBetter.setMissing(MARK_NOT_NFC);
lowerIsBetter.lock();
lowerIsBetter.freeze();
// add special values:
//lowerIsBetter.putAll(new UnicodeSet("["), new Integer(0));
@ -321,11 +321,11 @@ public class GenerateConfusables {
PROHIBITED + NOT_IN_XID);
removals2.setMissing("future?");
additions.lock();
remap.lock();
removals.lock();
reviews.lock();
removals2.lock();
additions.freeze();
remap.freeze();
removals.freeze();
reviews.freeze();
removals2.freeze();
}
/**
@ -431,8 +431,8 @@ public class GenerateConfusables {
//reviews.putAll(UNASSIGNED, "");
out.print("\uFEFF");
out.println("# Review List for IDN");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("# $Revision: 1.8 $");
out.println("# $Date: 2005/11/01 00:10:53 $");
out.println("");
UnicodeSet fullSet = reviews.getSet("").complement();
@ -487,8 +487,8 @@ public class GenerateConfusables {
PrintWriter out = BagFormatter.openUTF8Writer(outdir, "idnchars.txt");
out.println("# Recommended Identifier Profiles for IDN");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("# $Revision: 1.8 $");
out.println("# $Date: 2005/11/01 00:10:53 $");
out.println("");
out.println("# Output Characters");
@ -557,8 +557,8 @@ public class GenerateConfusables {
"xidmodifications.txt");
out.println("# Security Profile for General Identifiers");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("# $Revision: 1.8 $");
out.println("# $Date: 2005/11/01 00:10:53 $");
out.println("");
out.println("# Characters restricted");
@ -614,8 +614,8 @@ public class GenerateConfusables {
//someRemovals = removals;
out = BagFormatter.openUTF8Writer(outdir, "draft-restrictions.txt");
out.println("# Characters restricted in domain names");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("# $Revision: 1.8 $");
out.println("# $Date: 2005/11/01 00:10:53 $");
out.println("#");
out.println("# This file contains a draft list of characters for use in");
out.println("# UTR #36: Unicode Security Considerations");
@ -1149,8 +1149,8 @@ public class GenerateConfusables {
public void writeSource(String directory, String filename) throws IOException {
PrintWriter out = BagFormatter.openUTF8Writer(directory, filename);
out.println("# Source File for IDN Confusables");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("# $Revision: 1.8 $");
out.println("# $Date: 2005/11/01 00:10:53 $");
out.println("");
dataMixedAnycase.writeSource(out);
out.close();
@ -1160,8 +1160,8 @@ public class GenerateConfusables {
PrintWriter out = BagFormatter.openUTF8Writer(directory, filename);
out.print('\uFEFF');
out.println("# Recommended confusable mapping for IDN");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("# $Revision: 1.8 $");
out.println("# $Date: 2005/11/01 00:10:53 $");
out.println("");
if (appendFile) {
@ -1369,8 +1369,8 @@ public class GenerateConfusables {
UnicodeSet representable = new UnicodeSet();
out.print('\uFEFF');
out.println("# Summary: Recommended confusable mapping for IDN");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("# $Revision: 1.8 $");
out.println("# $Date: 2005/11/01 00:10:53 $");
out.println("");
MyEquivalenceClass data = dataMixedAnycase;
Set items = data.getOrderedExplicitItems();
@ -1494,8 +1494,8 @@ public class GenerateConfusables {
PrintWriter out = BagFormatter.openUTF8Writer(outdir, filename);
out.print('\uFEFF');
out.println("# Summary: Whole-Script Confusables");
out.println("# $Revision: 1.7 $");
out.println("# $Date: 2005/07/19 17:21:00 $");
out.println("# $Revision: 1.8 $");
out.println("# $Date: 2005/11/01 00:10:53 $");
out.println("# This data is used for determining whether a strings is a");
out.println("# whole-script or mixed-script confusable.");
out.println("# The mappings here ignore common and inherited script characters,");

View file

@ -206,7 +206,7 @@ class GenerateStringPrep implements UCD_Types {
return a + "\t" + b;
}
};
UnicodeMap sb = ((UnicodeMap)scripts.clone()).composeWith(blocks, myCompose);
UnicodeMap sb = ((UnicodeMap)scripts.cloneAsThawed()).composeWith(blocks, myCompose);
for (Iterator it = sb.getAvailableValues(new TreeSet()).iterator(); it.hasNext();) {
System.out.println(it.next());
}

View file

@ -0,0 +1,501 @@
package com.ibm.text.UCD;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.dev.test.util.UnicodePropertySource;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.Replaceable;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.util.ULocale;
import com.ibm.text.utility.Utility;
import com.ibm.text.utility.Utility.Encoding;
public class MakeNamesChart {
static int lastCodePoint = -1;
static boolean lastCodePointIsOld = false;
static int lastDecompType = UCD.NONE;
static final String chartPrefix = "c_";
static final String namePrefix = "n_";
static UnicodeSet skipChars;// = new UnicodeSet("[[:gc=cn:]-[:noncharactercodepoint:]]");
static UnicodeSet rtl;// = new UnicodeSet("[[:bidiclass=r:][:bidiclass=al:]]");
static UnicodeSet usePicture;// = new UnicodeSet("[[:whitespace:][:defaultignorablecodepoint:]]");
static UCD ucd41;
public static void main(String[] args) throws Exception {
//ConvertUCD.main(new String[]{"5.0.0"});
BlockInfo blockInfo = new BlockInfo("5.0.0", "NamesList.txt");
// http://www.unicode.org/~book/incoming/kenfiles/U50M051010.lst
Default.setUCD("5.0.0");
ucd41 = UCD.make("4.1.0");
ToolUnicodePropertySource up = ToolUnicodePropertySource.make("5.0.0");
skipChars = new UnicodeSet(up.getSet("gc=cn")).removeAll(up.getSet("gc=cn"));
//"[[:gc=cn:]-[:noncharactercodepoint:]]");
rtl = new UnicodeSet(up.getSet("bidiclass=r")).addAll(up.getSet("bidiclass=al"));// "[[:bidiclass=r:][:bidiclass=al:]]");
usePicture = new UnicodeSet(up.getSet("whitespace=true")).addAll(up.getSet("defaultignorablecodepoint=true"));// new UnicodeSet("[[:whitespace:][:defaultignorablecodepoint:]]");
List nameList = new ArrayList();
ArrayList lines = new ArrayList();
UnicodeSet collectedCodePoints = new UnicodeSet();
BitSet nameListNew = new BitSet();
int limit = Integer.MAX_VALUE;
for (int count = 0; count < limit; ++count) {
if (!blockInfo.next(lines)) break;
String firstLine = (String)lines.get(0);
if (firstLine.startsWith("@@@")) continue;
String[] lineParts = firstLine.split("\t");
String fileName = lineParts[1] + ".html";
nameList.add(firstLine);
System.out.println();
System.out.println("file: " + chartPrefix + fileName);
PrintWriter out = BagFormatter.openUTF8Writer("C:/DATA/GEN/charts/namelist/", chartPrefix + fileName);
out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'><title>" +
BagFormatter.toHTML.transliterate(getHeading(lineParts[2])) +
"</title><link rel='stylesheet' type='text/css' href='namelist.css'>" +
"<base target='names'></head><body>");
// header
out.println("<table class='headerTable'><tr><td class='headerLeft'>" +
lineParts[1] +
" <a href='help.html'>help</a></td><td class='headerCenter'>" +
getHeading(lineParts[2]) +
"</td><td class='headerRight'><a href='mainList.html'>index</a> " +
lineParts[3] +
"</td></tr></table>");
if ("Unassigned".equals(lineParts[2])) {
System.out.println("debug");
}
// first pass through and collect all the code points
collectedCodePoints.clear();
for (int i = 1; i < lines.size(); ++i) {
String line = (String)lines.get(i);
int cp1 = line.charAt(0);
if (cp1 != '@' && cp1 != '\t') {
int cp = Integer.parseInt(line.split("\t")[0],16);
collectedCodePoints.add(cp);
}
}
collectedCodePoints.removeAll(skipChars);
if (collectedCodePoints.size() == 0) {
out.println("<p align='center'>No Names List</p>");
} else {
out.println("<div align='center'><table class='chart'><tr>");
int counter = 0;
for (UnicodeSetIterator it = new UnicodeSetIterator(collectedCodePoints); it.next();) {
if ((counter % 16) == 0 && counter != 0) {
out.println("</tr><tr>");
}
String tdclass = "cell";
if (counter < 16) tdclass = "cellw";
if (it.codepoint == 0x242) {
System.out.println("debug");
}
boolean isNew = isNew(it.codepoint);
if (isNew) tdclass += "new";
String hexcp = Utility.hex(it.codepoint, 4);
String title = "";
String name = Default.ucd().getName(it.codepoint);
if (name != null) title = " title='" + BagFormatter.toHTML.transliterate(name.toLowerCase()) + "'";
out.println("<td class='" + tdclass + "'"
+ title
+ ">\u00A0"
+ showChar(it.codepoint) + "\u00A0<br><tt><a href='" + namePrefix + fileName + "#"+ hexcp + "'>" +
hexcp + "</a></tt></td>");
counter++;
}
if (counter > 16) {
counter &= 0xF;
if (counter != 0) for (; counter < 16; ++counter) out.println("<td class='cell'>\u00A0</td>");
out.println("</tr></table></div>");
}
}
out.close();
out = BagFormatter.openUTF8Writer("C:/DATA/GEN/charts/namelist/", namePrefix + fileName);
out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" +
"<link rel='stylesheet' type='text/css' href='namelist.css'></head><body>");
// now do the characters
boolean inTable = false;
for (int i = 1; i < lines.size(); ++i) {
String line = (String)lines.get(i);
try {
if (line.startsWith("@")) {
finishItem(out);
if (inTable) {
out.println("</table>");
inTable = false;
}
if (line.startsWith("@+")) {
line = line.substring(2).trim();
out.println("<p class='comment'>"
+ line
+ "</p>");
} else {
line = line.substring(1).trim();
out.println("<h2>"
+ line
+ "</h2>");
}
} else {
if (!inTable) {
out.println("<table>");
inTable = true;
}
//String line2 = lineParts[1];
if (line.startsWith("\t")) {
String body = line.trim();
if (false && line.indexOf(body) != 1) {
System.out.println("Format error: too much inital whitespace: <" + line + ">");
}
char firstChar = body.charAt(0);
switch (firstChar) {
case '*': body = "\u2022 " + body.substring(2); break;
case ':': body = checkCanonical(lastCodePoint, body); break;
case '#': body = checkCompatibility(lastCodePoint, body); break;
case 'x': body = getOther(body); break;
case '=': break;
default: throw new IllegalArgumentException("Huh? " + body);
}
out.println("<tr><td>\u00A0</td><td>\u00A0</td><td>"
+ maybeNameStyle(showTextConvertingHex(body, firstChar != '='), firstChar == '=')
+ "</td></tr>");
} else {
finishItem(out);
lineParts = line.split("\t");
String x = lineParts[0];
lastCodePoint = Integer.parseInt(x,16);
boolean lastCodePointIsNew = isNew(lastCodePoint);
if (lastCodePointIsNew) nameListNew.set(nameList.size()-1, true);
out.println("<tr><td"
+ (lastCodePointIsNew ? " class='new'" : "")
+ "><code><a name='" + x + "'>" + x + "</a></code></td><td>\u00A0"
+ showChar(lastCodePoint) + "\u00A0</td><td"
+ (lastCodePointIsNew ? " class='new'" : "") + ">"
+ nameStyle(showTextConvertingHex(lineParts[1], false)) + "</td></tr>");
lastDecompType = Default.ucd().getDecompositionType(lastCodePoint);
}
}
} catch (Exception e) {
throw (IllegalArgumentException) new IllegalArgumentException("Error on line: " + line)
.initCause(e);
}
}
finishItem(out);
out.close();
}
blockInfo.in.close();
PrintWriter out = BagFormatter.openUTF8Writer("C:/DATA/GEN/charts/namelist/", "mainList.html");
out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" +
"<title>Main List</title><link rel='stylesheet' type='text/css' href='namelist.css'>" +
"<base target='chart'></head><body><table>");
for (int i = 0; i < nameList.size(); ++i) {
String line = (String) nameList.get(i);
String[] lineParts = line.split("\t");
String fileName = lineParts[1] + ".html";
out.println("<tr><td><code>" + lineParts[1] +
"</code></td><td"
+ (nameListNew.get(i) ? " class='new'" : "")
+ "><a href='" + chartPrefix + fileName + "'>" + getHeading(lineParts[2]) + "</a></td><td><code>" +
lineParts[3] +"</code></td></tr>");
}
out.println("</table></body></html>");
out.close();
BagFormatter bf = new BagFormatter();
//System.out.println(bf.showSetDifferences("Has name in decomps", hasName, "Has no name in decomps", hasNoName));
System.out.println("Name differences: Canonical");
showNameDifferences(hasNameCan, hasNoNameCan);
System.out.println("Name differences: Compatibility");
showNameDifferences(hasNameComp, hasNoNameComp);
// System.out.println("Characters with names in decomps: " + hasName.toPattern(true));
// System.out.println("Characters without names in decomps: " + hasNoName.toPattern(true));
// System.out.println("Characters sometimes with, sometimes without names in decomps: " + both.toPattern(true));
System.out.println("Done");
}
private static boolean isNew(int codepoint) {
return Default.ucd().isAllocated(codepoint) && !ucd41.isAllocated(codepoint);
}
private static void showNameDifferences(Map hasName, Map hasNoName) {
Set both = new TreeSet(hasNoName.keySet());
both.retainAll(hasName.keySet());
//hasNoName.removeAll(both);
//hasName.removeAll(both);
for (Iterator it = both.iterator(); it.hasNext();) {
String decomp = (String) it.next();
System.out.println();
System.out.println("decomp: " + Utility.hex(decomp));
System.out.println("Has name in: " + Utility.hex((String)hasName.get(decomp)));
System.out.println("Has no name in: " + Utility.hex((String)hasNoName.get(decomp)));
}
System.out.println("Count: " + both.size());
}
static TestIdentifiers ti;
static {
try {
ti = new TestIdentifiers("L");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static void finishItem(PrintWriter out) {
if (lastCodePoint < 0) return;
if (lastDecompType != UCD.NONE) {
System.out.println("Alert: missing decomp for " + Utility.hex(lastCodePoint));
}
String str = UTF16.valueOf(lastCodePoint);
String upper = showForm(out, str, null, null, Default.ucd().getCase(str,UCD.FULL,UCD.UPPER), "\u2191");
showForm(out, str, upper, null, Default.ucd().getCase(str,UCD.FULL,UCD.TITLE), "\u2195");
String lower = showForm(out, str, null, null, Default.ucd().getCase(str,UCD.FULL,UCD.LOWER), "\u2193");
showForm(out, lower, null, null, Default.ucd().getCase(str,UCD.FULL,UCD.FOLD), "\u2194");
String dc = Default.ucd().getDecompositionMapping(lastCodePoint);
String nfd = showForm(out, dc, str, null, Default.nfd().normalize(lastCodePoint), "\u21DB");
//String nfc = showForm(out, dc, null, Default.nfc().normalize(lastCodePoint), "\u21DB");
String nfkd = showForm(out, dc, str, nfd, Default.nfkd().normalize(lastCodePoint), "\u21DD");
if (nfkd.equals(str)) {
Set s = ti.getConfusables(lastCodePoint, "MA");
if (s.size() > 1) {
sortedSet.clear();
for (Iterator it = s.iterator(); it.hasNext();) {
sortedSet.add(Default.nfkd().normalize((String)it.next()));
}
sortedSet.remove(nfkd); // remove me
for (Iterator it = sortedSet.iterator(); it.hasNext();) {
String other = (String)it.next();
if (nfkd.equals(Default.nfkd().normalize(other))) continue;
out.println("<tr><td>\u00A0</td><td>\u00A0</td><td class='conf'>\u279F\u00A0"
+ showTextConvertingHex(Utility.hex(other, 4, " + "), true)
+ " "
+ Default.ucd().getName(other, UCD.NORMAL, " + ").toLowerCase()
// maybeNameStyle(showTextConvertingHex(upper, firstChar != '='), firstChar == '=')
+ "</td></tr>");
}
}
}
lastCodePoint = -1;
}
static Set sortedSet = new TreeSet(Collator.getInstance(ULocale.ENGLISH));
private static String showForm(PrintWriter out, String str, String str2, String str3, String transformed, String symbol) {
if (!transformed.equals(str) && !transformed.equals(str2) && !transformed.equals(str3)) {
out.println("<tr><td>\u00A0</td><td>\u00A0</td><td class='c'>" + symbol + "\u00A0"
+ showTextConvertingHex(Utility.hex(transformed, 4, " + "), true)
+ (UTF16.countCodePoint(transformed) != 1 ? "" :
" " + Default.ucd().getName(transformed, UCD.NORMAL, " + ").toLowerCase())
// maybeNameStyle(showTextConvertingHex(upper, firstChar != '='), firstChar == '=')
+ "</td></tr>");
}
return transformed;
}
static public String getHeading(String name) {
int pos = name.lastIndexOf(" (");
if (pos < 0) return name;
return name.substring(0, pos);
}
private static String maybeNameStyle(String string, boolean b) {
if (b && string.equals(string.toUpperCase(Locale.ENGLISH))) return nameStyle(string);
return string;
}
private static String nameStyle(String string) {
// TODO Auto-generated method stub
String result = "<i>" + Default.ucd().getCase(string, UCD.FULL, UCD.TITLE) + "</i>";
// if it has any &xxx;, then restore them.
int position = 0;
while (true) {
if (!escapeMatch.reset(result).find(position)) break;
int start = escapeMatch.start();
position = escapeMatch.end();
result = result.substring(0,start)
+ result.substring(start, position).toLowerCase()
+ result.substring(position);
}
return result;
}
static Matcher escapeMatch = Pattern.compile("\\&[A-Z][a-z]*\\;").matcher("");
private static String showTextConvertingHex(String body, boolean addCharToHex) {
body = BagFormatter.toHTML.transliterate(body);
if (addCharToHex) {
int position = 0;
while (position < body.length()) {
if (!findHex.reset(body).find(position)) break;
position = findHex.end();
int start = findHex.start();
int len = position - start;
if (len < 4 || len > 6) continue;
int cp = Integer.parseInt(findHex.group(),16);
if (cp > 0x10FFFF) continue;
String insert = "\u00A0" + showChar(cp);
String beginning = body.substring(0,start)
+ "<code>" + body.substring(start, position) + "</code>"
+ insert;
body = beginning + body.substring(position);
position = beginning.length();
}
}
return body;
}
static Matcher pointer = Pattern.compile("x \\((.*) - ([0-9A-F]+)\\)").matcher("");
static Matcher pointer2 = Pattern.compile("x ([0-9A-F]{4,6})").matcher("");
static Matcher findHex = Pattern.compile("[0-9A-F]+").matcher("");
private static String getOther(String body) {
// of form: x (hyphenation point - 2027)
// => arrow 2027 X hyphenation point
int cp;
String name = null;
if (pointer.reset(body).matches()) {
cp = Integer.parseInt(pointer.group(2),16);
name = pointer.group(1);
String name2 = Default.ucd().getName(cp);
if (name2 == null) name2 = "<not a character>";
if (!name.equalsIgnoreCase(name2)) {
System.out.println("Mismatch in name for " + body + " in " + Utility.hex(lastCodePoint));
System.out.println("\tName is: " + name2);
}
} else if (pointer2.reset(body).matches()) {
cp = Integer.parseInt(pointer2.group(1),16);
// name = UCharacter.getName(cp).toLowerCase();
// System.out.println("Irregular format: " + body);
} else {
throw new IllegalArgumentException("Bad format: " + body);
}
return "\u2192 " + Utility.hex(cp,4) /*+ " " + showChar(cp)*/ + (name != null ? " " + name : "");
}
static String showChar(int cp) {
if (usePicture.contains(cp)) {
int rep = '\u2588';
if (cp <= 0x20) rep = 0x2400 + cp;
else if (cp == 0x7F) rep = 0x2421;
return "<span class='inv'>" + (char)rep + "</span>";
//String hex = Utility.hex(cp);
//return "<img alt='" + hex + "' src='http://www.unicode.org/cgi-bin/refglyph?24-" + hex + "'>";
}
int type = Default.ucd().getCategory(cp);
String result = BagFormatter.toHTML.transliterate(UTF16.valueOf(cp));
if (type == UCD.Me || type == UCD.Mn) {
result = "\u25CC" + result;
} else if (rtl.contains(cp)) {
result = "\u200E" + result + "\u200E";
}
return result;
}
//static final UnicodeSet noname = new UnicodeSet("[[:ascii:][:ideographic:]]");
static final Map hasNoNameCan = new TreeMap();
static final Map hasNameCan = new TreeMap();
static final Map hasNoNameComp = new TreeMap();
static final Map hasNameComp = new TreeMap();
private static String checkCanonical(int codePoint, String body) {
body = body.substring(2);
if (lastDecompType != UCD.CANONICAL) {
System.out.println("Mismatching Decomposition Type: " + body + " in " + Utility.hex(codePoint));
}
String lastDecomp = Default.ucd().getDecompositionMapping(lastCodePoint);
String hexed = Utility.hex(lastDecomp, 4, " ");
String hexed2 = hexed;
if (UTF16.countCodePoint(lastDecomp) == 1) {
hexed2 += " " + Default.ucd().getName(lastDecomp).toLowerCase();
}
if (hexed.equalsIgnoreCase(body)) {
hasNoNameCan.put(lastDecomp, UTF16.valueOf(codePoint));
} else if (hexed2.equalsIgnoreCase(body)) {
hasNameCan.put(lastDecomp, UTF16.valueOf(codePoint));
} else {
System.out.println("Mismatching Decomposition: " + body + " in " + Utility.hex(codePoint));
System.out.println("\tShould be: " + hexed);
}
lastDecompType = UCD.NONE;
return "\u2261 " + body;
}
private static String checkCompatibility(int codePoint, String body) {
body = body.substring(2);
if (lastDecompType <= UCD.CANONICAL) {
System.out.println("Mismatching Decomposition Type: " + body + " in " + Utility.hex(codePoint));
}
String lastDecomp = Default.ucd().getDecompositionMapping(lastCodePoint);
String hexed = Utility.hex(lastDecomp, 4, " ");
if (lastDecompType != UCD.COMPAT_UNSPECIFIED) {
String lastDecompID = Default.ucd().getDecompositionTypeID(lastCodePoint);
hexed = "<" + lastDecompID + "> " + hexed;
}
String hexed2 = hexed;
if (UTF16.countCodePoint(lastDecomp) == 1) {
hexed2 += " " + Default.ucd().getName(lastDecomp).toLowerCase();
}
if (hexed.equalsIgnoreCase(body)) {
hasNoNameComp.put(lastDecomp, UTF16.valueOf(codePoint));
} else if (hexed2.equalsIgnoreCase(body)) {
hasNameComp.put(lastDecomp, UTF16.valueOf(codePoint));
} else {
System.out.println("Mismatching Decomposition: " + body + " in " + Utility.hex(codePoint));
System.out.println("\tShould be: " + hexed);
}
lastDecompType = UCD.NONE;
return "\u2248 " + body;
}
static class BlockInfo {
BufferedReader in;
String lastLine;
BlockInfo (String version, String filename) throws IOException {
in = Utility.openUnicodeFile(filename, version, true, Utility.LATIN1_WINDOWS);
//in = BagFormatter.openUTF8Reader(dir, filename);
}
boolean next(List inout) throws IOException {
inout.clear();
if (lastLine != null) {
inout.add(lastLine);
lastLine = null;
}
while (true) {
String line = in.readLine();
if (line == null) break;
if (line.startsWith("@@\t")) {
lastLine = line;
break;
}
inout.add(line);
}
return inout.size() > 0;
}
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
* $Date: 2004/04/17 18:21:39 $
* $Revision: 1.16 $
* $Date: 2005/11/01 00:10:54 $
* $Revision: 1.17 $
*
*******************************************************************************
*/
@ -136,7 +136,7 @@ public final class Normalizer implements UCD_Types {
/**
* Normalizes text according to the chosen form
* @param source the original text, unnormalized
* @param newLocaleID the original text, unnormalized
* @return target the resulting normalized text
*/
public String normalize(int cp) {
@ -157,7 +157,7 @@ public final class Normalizer implements UCD_Types {
/**
* Does a quick check to see if the string is in the current form. Checks canonical order and
* isAllowed().
* @param source source text
* @param newLocaleID source text
* @return YES, NO, MAYBE
*/
/*

View file

@ -86,7 +86,7 @@ public class NormalizerSample implements UCD_Types {
/**
* Normalizes text according to the chosen form
* @param source the original text, unnormalized
* @param newLocaleID the original text, unnormalized
* @return target the resulting normalized text
*/
public String normalize(int cp) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
* $Date: 2005/10/11 19:39:15 $
* $Revision: 1.7 $
* $Date: 2005/11/01 00:10:54 $
* $Revision: 1.8 $
*
*******************************************************************************
*/
@ -21,6 +21,7 @@ import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.dev.test.util.UnicodePropertySource;
import com.ibm.icu.dev.test.util.UnicodeMap.MapIterator;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
@ -30,17 +31,92 @@ import com.ibm.text.utility.*;
public class QuickTest implements UCD_Types {
public static void main(String[] args) throws IOException {
getBidiMirrored();
if (true) return;
getLengths("NFC", Default.nfc());
getLengths("NFD", Default.nfd());
getLengths("NFKC", Default.nfkc());
getLengths("NFKD", Default.nfkd());
System.out.println("Done");
try {
//getBidiMirrored();
getCaseFoldingUnstable();
if (true) return;
getHasAllNormalizations();
getLengths("NFC", Default.nfc());
getLengths("NFD", Default.nfd());
getLengths("NFKC", Default.nfkc());
getLengths("NFKD", Default.nfkd());
} finally {
System.out.println("Done");
}
}
private static void getCaseFoldingUnstable() {
for (int i = 3; i < com.ibm.text.utility.Utility.searchPath.length - 1; ++i) {
String newName = com.ibm.text.utility.Utility.searchPath[i];
String oldName = com.ibm.text.utility.Utility.searchPath[i+1];
showMemoryUsage();
UCD ucdNew = UCD.make(newName);
showMemoryUsage();
UCD ucdOld = UCD.make(oldName);
showMemoryUsage();
UnicodeMap differences = new UnicodeMap();
UnicodeSet differenceSet = new UnicodeSet();
for (int j = 0; j < 0x10FFFF; ++j) {
if (!ucdOld.isAssigned(j)) continue;
String oldString = ucdOld.getCase(j, UCD.FULL, UCD.FOLD);
String newString = ucdNew.getCase(j, UCD.FULL, UCD.FOLD);
if (!oldString.equals(newString)) {
differenceSet.add(j);
differences.put(j, new String[]{oldString, newString});
System.out.println(".");
}
}
if (differenceSet.size() != 0) {
System.out.println("Differences in " + com.ibm.text.utility.Utility.searchPath[i]);
for (UnicodeSetIterator it = new UnicodeSetIterator(differenceSet); it.next();) {
System.out.println(ucdNew.getCodeAndName(it.codepoint));
String[] strings = (String[]) differences.getValue(it.codepoint);
System.out.println("\t" + oldName + ": " + ucdNew.getCodeAndName(strings[0]));
System.out.println("\t" + newName + ": " + ucdNew.getCodeAndName(strings[1]));
}
}
}
}
static public void showMemoryUsage() {
System.gc(); System.gc(); System.gc(); System.gc();
System.gc(); System.gc(); System.gc(); System.gc();
System.gc(); System.gc(); System.gc(); System.gc();
System.gc(); System.gc(); System.gc(); System.gc();
System.out.println("total:\t" + Runtime.getRuntime().totalMemory() + ";\tfree:\t" +
Runtime.getRuntime().freeMemory());
}
private static void getHasAllNormalizations() {
UnicodeSet items = new UnicodeSet();
Set s = new LinkedHashSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd().isAssigned(i)) continue;
if (Default.ucd().getDecompositionType(i) == UCD.NONE) continue;
String source = UTF16.valueOf(i);
String nfc = Default.nfc().normalize(source);
String nfd = Default.nfd().normalize(source);
String nfkd = Default.nfkd().normalize(source);
String nfkc = Default.nfkc().normalize(source);
s.clear();
s.add(source);
s.add(nfc);
s.add(nfd);
s.add(nfkd);
s.add(nfkc);
if (s.size() > 3) {
System.out.println(Utility.hex(source) + "\t" + Utility.escape(source)
+ "\t" + Default.ucd().getName(source)
+ "\tnfd\t" + Utility.hex(nfd) + "\t" + Utility.escape(nfd)
+ "\tnfc\t" + Utility.hex(nfc) + "\t" + Utility.escape(nfc)
+ "\tnfkd\t" + Utility.hex(nfkd) + "\t" + Utility.escape(nfkd)
+ "\tnfkc\t" + Utility.hex(nfkc) + "\t" + Utility.escape(nfkc));
}
}
}
private static void getBidiMirrored() {
ToolUnicodePropertySource foo = ToolUnicodePropertySource.make("");
UnicodeMap status = new UnicodeMap();
@ -92,9 +168,10 @@ public class QuickTest implements UCD_Types {
UnicodeSet set = status.getSet(value);
for (UnicodeSetIterator umi = new UnicodeSetIterator(set); umi.next();) {
System.out.println(Utility.hex(umi.codepoint)
+ ";\t" + value
+ ";\t" + (x.contains(umi.codepoint) ? "O" : "")
+ ";\t" + Default.ucd().getName(umi.codepoint));
+ (value.startsWith("*") ? ";\tBidi_Mirrored" : "")
+ "\t#\t" + value
//+ ";\t" + (x.contains(umi.codepoint) ? "O" : "")
+ "\t" + Default.ucd().getName(umi.codepoint));
}
}
}
@ -288,6 +365,6 @@ public class QuickTest implements UCD_Types {
System.out.println("\tCount:" + set1.size());
System.out.println("\tSet:" + set1.toPattern(true));
System.out.println("\tDetails:");
Utility.showSetNames("", set1, false, Default.ucd());
//Utility.showSetNames("", set1, false, Default.ucd());
}
}

View file

@ -4,10 +4,15 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.dev.test.util.XEquivalenceClass;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.UTF16;
@ -34,6 +39,14 @@ public class TestIdentifiers {
System.out.print(folded);
ti.testItem(folded);
}
for (int j = 0; j < tests[i].length(); ++j) {
int cp = tests[i].charAt(j);
Set s = ti.getConfusables(cp, "MA");
System.out.println(Default.ucd().getCodeAndName(cp));
for (Iterator it = s.iterator(); it.hasNext();) {
System.out.println("\t= " + Default.ucd().getCodeAndName((String)it.next()));
}
}
}
}
@ -141,6 +154,49 @@ public class TestIdentifiers {
}
br.close();
}
Map type_equivalences;
void loadConfusables() throws IOException {
BufferedReader br = BagFormatter.openUTF8Reader(indir,
"confusables.txt");
String line = null;
type_equivalences = new HashMap();
try {
while (true) {
line = Utility.readDataLine(br);
if (line == null)
break;
if (line.length() == 0)
continue;
String[] pieces = Utility.split(line, ';');
// part 0 is source code point
String s = Utility.fromHex(pieces[0].trim());
// part 1 is script1
String t = Utility.fromHex(pieces[1].trim());
String type = pieces[2].trim();
XEquivalenceClass ec = (XEquivalenceClass) type_equivalences.get(type);
if (ec == null) type_equivalences.put(type, ec = new XEquivalenceClass(""));
ec.add(s, t);
//System.out.println(type + ": " + Default.ucd().getCodeAndName(s) + " => " + Default.ucd().getCodeAndName(t));
}
} catch (Exception e) {
throw (RuntimeException) new RuntimeException("Failure on line "
+ line).initCause(e);
}
br.close();
}
public Set getConfusables(int cp, String type) {
try {
if (type_equivalences == null) loadConfusables();
} catch (IOException e) {
return null;
}
XEquivalenceClass ec = (XEquivalenceClass) type_equivalences.get(type);
return ec.getEquivalences(UTF16.valueOf(cp));
}
void loadWholeScriptConfusables(String filterType) throws IOException {
UnicodeSet[][] script_script_set = new UnicodeSet[UScript.CODE_LIMIT][UScript.CODE_LIMIT];

View file

@ -73,7 +73,7 @@ public class TestUnicodeInvariants {
int variableCount = 0;
PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
out.write('\uFEFF'); // BOM
BufferedReader in = BagFormatter.openUTF8Reader("", "UnicodeInvariants.txt");
BufferedReader in = BagFormatter.openUTF8Reader("com/ibm/text/UCD/", "UnicodeInvariants.txt");
BagFormatter bf = new BagFormatter();
bf.setUnicodePropertyFactory(ToolUnicodePropertySource.make(""));
BagFormatter bf2 = new BagFormatter();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2005/05/02 15:39:53 $
* $Revision: 1.39 $
* $Date: 2005/11/01 00:10:54 $
* $Revision: 1.40 $
*
*******************************************************************************
*/
@ -43,7 +43,7 @@ public final class UCD implements UCD_Types {
/**
* Used for the default version.
*/
public static final String latestVersion = "4.1.0";
public static final String latestVersion = "5.1.0";
/**
* Create singleton instance for default (latest) version
@ -158,12 +158,16 @@ public final class UCD implements UCD_Types {
* Get the character names for the code points in a string, separated by ", "
*/
public String getName(String s, byte style) {
return getName(s, style, ", ");
}
public String getName(String s, byte style, String separator) {
if (s.length() == 1) return getName(s.charAt(0), style); // optimize BMP
StringBuffer result = new StringBuffer();
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i);
if (i > 0) result.append(", ");
if (i > 0) result.append(separator);
result.append(getName(cp, style));
}
return result.toString();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
* $Date: 2005/03/10 02:37:20 $
* $Revision: 1.31 $
* $Date: 2005/11/01 00:10:54 $
* $Revision: 1.32 $
*
*******************************************************************************
*/
@ -15,7 +15,7 @@ package com.ibm.text.UCD;
public interface UCD_Types {
static final byte BINARY_FORMAT = 16; // bumped if binary format of UCD changes. Forces rebuild
static final byte BINARY_FORMAT = 17; // bumped if binary format of UCD changes. Forces rebuild
public static final String BASE_DIR = "C:\\DATA\\";
public static final String UCD_DIR = BASE_DIR + "UCD\\";

View file

@ -111,6 +111,21 @@ $XID_Continue ! [$Pattern_Whitespace $Pattern_Syntax]
$Pattern_Whitespace ! [$XID_Continue $Pattern_Syntax]
$Pattern_Syntax ! [$XID_Continue $Pattern_Whitespace]
# Test SA characters
# They are limited to certain scripts:
Let $SAScripts = [$script:thai $script:lao $script:myanmar $script:khmer]
$SAScripts ⊇ $LineBreak:SA
# And in those scripts, they are all the alphabetic spacing characters, plus some odd Cf
[$SAScripts & [$Alphabetic $gc:cf]] = [$SAScripts & [$LineBreak:SA $LineBreak:CM]]
# Try removing M* from alphabetic, and matching to SA
[$SAScripts & [$Alphabetic $gc:cf - $gcAllMarks]] = $LineBreak:SA
# Try adding M* to alphabetic, and matching to SA
[$SAScripts & [$Alphabetic $gc:cf $gcAllMarks]] = $LineBreak:SA
# testing
# [$Pattern_Whitespace $Pattern_Syntax] ! [[^$WB:Format $WB:Other] \u2019 \u0027 \u02BC \u002d \u00ad \u2027 \u058A]
Let $otherword = [\u2019 \u0027 \u02BC \u002d \u00ad \u2027 \u058A]

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/ChainException.java,v $
* $Date: 2001/12/06 00:05:52 $
* $Revision: 1.3 $
* $Date: 2005/11/01 00:10:53 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
@ -20,7 +20,7 @@ import java.io.*;
public class ChainException extends RuntimeException {
Object[] keyData;
String messageFormat;
Exception chain;
//Exception chain;
public ChainException (String messageFormat, Object[] objects) {
this.messageFormat = messageFormat;
@ -30,20 +30,20 @@ public class ChainException extends RuntimeException {
public ChainException (String messageFormat, Object[] objects, Exception chainedException) {
this.messageFormat = messageFormat;
keyData = objects == null ? null : (Object[]) objects.clone();
chain = chainedException;
initCause(chainedException);
}
public String getMessage() {
String chainMsg = "";
if (chain != null) {
chainMsg = "; " + chain.getClass().getName()
+ ", " + chain.getMessage();
StringWriter w = new StringWriter();
PrintWriter p = new PrintWriter(w);
chain.printStackTrace(p);
chainMsg += ", " + w.getBuffer();
p.close();
}
// if (chain != null) {
// chainMsg = "; " + chain.getClass().getName()
// + ", " + chain.getMessage();
// StringWriter w = new StringWriter();
// PrintWriter p = new PrintWriter(w);
// chain.printStackTrace(p);
// chainMsg += ", " + w.getBuffer();
// p.close();
// }
String main = "";
if (keyData != null) main = MessageFormat.format(messageFormat, keyData);
return main + chainMsg;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2005/06/21 21:28:31 $
* $Revision: 1.50 $
* $Date: 2005/11/01 00:10:53 $
* $Revision: 1.51 $
*
*******************************************************************************
*/
@ -700,8 +700,9 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
return result + "}";
}
private static final String[] searchPath = {
public static final String[] searchPath = {
"EXTRAS",
"5.0.0",
"4.1.0",
"4.0.1",
"4.0.0",