misc fixes to UnicodeProperty, etc.

X-SVN-Rev: 14468
This commit is contained in:
Mark Davis 2004-02-07 01:01:17 +00:00
parent 873f4f09fd
commit d1ef79fafd
41 changed files with 2365 additions and 1111 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/timezone/TimeZoneAliasTest.java,v $
* $Date: 2004/01/27 23:13:13 $
* $Revision: 1.2 $
* $Date: 2004/02/07 00:59:26 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
@ -27,7 +27,9 @@ import java.text.DateFormat;
import java.text.NumberFormat;
import com.ibm.icu.dev.test.*;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.util.TimeZone;
import com.ibm.icu.util.VersionInfo;
/**
@ -36,7 +38,7 @@ import com.ibm.icu.util.TimeZone;
*
*/
public class TimeZoneAliasTest extends TestFmwk {
public static void main(String[] args) throws Exception {
new TimeZoneAliasTest().run(args);
}
@ -47,6 +49,7 @@ public class TimeZoneAliasTest extends TestFmwk {
* 2. all aliases must have the same offsets
*/
public void TestAliases() {
if (skipIfBeforeICU(3,0)) return;
Zone.Seconds seconds = new Zone.Seconds();
for (Iterator it = Zone.getZoneSet().iterator(); it.hasNext(); ) {
Zone zone = (Zone)it.next();
@ -71,9 +74,9 @@ public class TimeZoneAliasTest extends TestFmwk {
if (!aliasesSet.equals(otherAliases)) {
errln(
"Aliases Unsymmetric: "
+ id + " => " + join(aliasesSet, ", ")
+ id + " => " + Zone.bf.join(aliasesSet)
+ "; "
+ otherId + " => " + join(otherAliases, ", "));
+ otherId + " => " + Zone.bf.join(otherAliases));
}
if (zone.findOffsetOrdering(otherZone, seconds) != 0) {
errln("Aliases differ: " + id + ", " + otherId
@ -87,12 +90,13 @@ public class TimeZoneAliasTest extends TestFmwk {
* We check to see that every timezone that is not an alias is actually different!
*/
public void TestDifferences() {
if (skipIfBeforeICU(3,0)) return;
Zone last = null;
Zone.Seconds diffDate = new Zone.Seconds();
for (Iterator it = Zone.getZoneSet().iterator(); it.hasNext();) {
Zone testZone = (Zone)it.next();
if (last != null) {
String common = testZone + " vs " + last + ":\t";
String common = testZone + "\tvs " + last + ":\t";
int diff = testZone.findOffsetOrdering(last, diffDate);
if (diff != 0) {
logln("\t" + common + "difference at: " + diffDate
@ -110,17 +114,17 @@ public class TimeZoneAliasTest extends TestFmwk {
/**
* Utility for printing out zones to be translated.
*/
public static void printZones() {
public static void TestGenerateZones() {
int count = 1;
for (Iterator it = Zone.getUniqueZoneSet().iterator(); it.hasNext();) {
Zone zone = (Zone)it.next();
System.out.println(zone.toString(count++));
}
}
/** Utility; ought to be someplace common
*/
/*
static String join(Collection c, String separator) {
StringBuffer result = new StringBuffer();
boolean isFirst = true;
@ -131,6 +135,7 @@ public class TimeZoneAliasTest extends TestFmwk {
}
return result.toString();
}
*/
/**
* The guts is in this subclass. It sucks in all the data from the zones,
@ -144,6 +149,7 @@ public class TimeZoneAliasTest extends TestFmwk {
*/
static class Zone implements Comparable {
// class fields
static private final BagFormatter bf = new BagFormatter().setSeparator(", ");
static private final DateFormat df = DateFormat.getDateInstance(DateFormat.LONG, Locale.US);
static private final NumberFormat nf = NumberFormat.getInstance(Locale.US);
static private final long HOUR = 1000*60*60;
@ -365,13 +371,13 @@ public class TimeZoneAliasTest extends TestFmwk {
public String getPurportedAliasesAsString() {
Set s = getPurportedAliases();
if (s.size() == 0) return "";
return " {" + join(s,", ") + "}";
return " " + bf.join(s);
}
public String getRealAliasesAsString() {
Set s = (Set)idToRealAliases.get(id);
if (s == null) return "";
return " {" + join(s,", ") + "}";
return " *" + bf.join(s);
}
public String getCity() {
@ -384,6 +390,9 @@ public class TimeZoneAliasTest extends TestFmwk {
return toString(-1);
}
/**
* Where count > 0, returns string that is set up for translation
*/
public String toString(int count) {
String city = getCity();
String hours = formatHours(minRecentOffset)
@ -393,9 +402,9 @@ public class TimeZoneAliasTest extends TestFmwk {
if (count < 0) {
return id + getPurportedAliasesAsString() + " (" + hours + ")";
}
// for getting template for translation
return "\t{\t\"" + id + "\"\t// [" + count + "] " + hours
+ getPurportedAliasesAsString() + getRealAliasesAsString() + "\r\n"
+ getRealAliasesAsString() + "\r\n"
+ "\t\t// translate the following!!\r\n"
+ (minRecentOffset != maxRecentOffset
? "\t\t\"" + city + " Standard Time\"\r\n"

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java,v $
* $Date: 2003/12/29 19:48:58 $
* $Revision: 1.5 $
* $Date: 2004/02/07 00:59:26 $
* $Revision: 1.6 $
*
*****************************************************************************************
*/
@ -22,6 +22,23 @@ import java.util.*;
import java.text.MessageFormat;
public class BagFormatter {
public static final Transliterator toHTML = Transliterator.createFromRules(
"any-html",
"'<' > '&lt;' ;" +
"'&' > '&amp;' ;" +
"'>' > '&gt;' ;" +
"'\"' > '&quot;' ; ",
Transliterator.FORWARD);
public static final Transliterator fromHTML = Transliterator.createFromRules(
"html-any",
"'<' < '&'[lL][Tt]';' ;" +
"'&' < '&'[aA][mM][pP]';' ;" +
"'>' < '&'[gG][tT]';' ;" +
"'\"' < '&'[qQ][uU][oO][tT]';' ; ",
Transliterator.REVERSE);
public static final PrintWriter CONSOLE = new PrintWriter(System.out,true);
private static PrintWriter log = CONSOLE;
@ -132,8 +149,7 @@ public class BagFormatter {
StringWriter buffer = new StringWriter();
PrintWriter output = new PrintWriter(buffer);
output.println(title);
mainVisitor.output = output;
mainVisitor.doAt(c);
mainVisitor.doAt(c, output);
return buffer.toString();
}
@ -147,8 +163,8 @@ public class BagFormatter {
*/
public void showSetNames(PrintWriter output, String title, Object c) {
output.println(title);
mainVisitor.output = output;
mainVisitor.doAt(c);
mainVisitor.doAt(c, output);
output.flush();
}
/**
@ -244,91 +260,116 @@ public class BagFormatter {
}
*/
public void setMergeRanges(boolean in) {
public BagFormatter setMergeRanges(boolean in) {
mergeRanges = in;
return this;
}
public void setShowSetAlso(boolean b) {
public BagFormatter setShowSetAlso(boolean b) {
showSetAlso = b;
return this;
}
public String getName(int codePoint) {
String hcp = "U+" + Utility.hex(codePoint, 4) + " ";
String result = nameProp.getPropertyValue(codePoint);
if (result != null)
return hcp + result;
String prop = catProp.getPropertyValue(codePoint);
if (prop.equals("Control")) {
result = nameProp.getPropertyValue(codePoint);
if (result != null)
return hcp + "<" + result + ">";
}
return hcp + "<reserved>";
/*public String getName(int codePoint) {
return getName(codePoint, false);
}*/
public String getName(String separator, int start, int end) {
if (nameSource == null || nameSource == UnicodeProperty.NULL) return "";
String result = getName(start, false);
if (start == end) return separator + result;
String endString = getName(end, false);
if (abbreviated) endString = getAbbreviatedName(endString,result,"~");
return separator + result + ".." + endString;
}
public String getName(String s) {
if (s.length() == 1) return getName(s.charAt(0)); // optimize
StringBuffer sb = new StringBuffer();
int cp;
for (int i = 0; i < s.length(); i+=UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s,i);
if (i != 0) sb.append(separator);
sb.append(getName(cp));
return getName(s, false);
}
UnicodeLabel nameSource;
static class NameLabel extends UnicodeLabel {
UnicodeProperty nameProp;
UnicodeProperty name1Prop;
UnicodeProperty catProp;
//UnicodeProperty shortCatProp;
NameLabel(UnicodeProperty.Factory source) {
nameProp = source.getProperty("Name");
name1Prop = source.getProperty("Unicode_1_Name");
catProp = source.getProperty("General_Category");
//shortCatProp = source.getProperty("General_Category");
}
public String getValue(int codePoint, boolean isShort) {
String hcp = !isShort
? "U+" + Utility.hex(codePoint, 4) + " "
: "";
String result = nameProp.getValue(codePoint);
if (result != null)
return hcp + result;
String prop = catProp.getValue(codePoint, true);
if (prop.equals("Control")) {
result = name1Prop.getValue(codePoint);
if (result != null)
return hcp + "<" + result + ">";
}
return hcp + "<reserved>";
}
return sb.toString();
}
// refactored
public String getName(int codePoint, boolean withCodePoint) {
return nameSource.getValue(codePoint, !withCodePoint);
}
public String getName(String s, boolean withCodePoint) {
return nameSource.getValue(s, separator, !withCodePoint);
}
public String hex(String s) {
if (s.length() == 1) return Utility.hex(s.charAt(0),4); // optimize
StringBuffer sb = new StringBuffer();
int cp;
for (int i = 0; i < s.length(); i+=UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s,i);
if (i != 0) sb.append(separator);
sb.append(Utility.hex(cp,4));
}
return sb.toString();
return UnicodeLabel.HEX.getValue(s, separator, true);
}
String separator = ",";
UnicodePropertySource source;
UnicodePropertySource labelSource;
public String hex(int start, int end) {
String s = Utility.hex(start,4);
if (start == end) return s;
return s + ".." + Utility.hex(end,4);
}
private String separator = ",";
private String prefix = "[";
private String suffix = "]";
UnicodeProperty.Factory source;
UnicodeLabel labelSource = UnicodeLabel.NULL;
UnicodeLabel valueSource = UnicodeLabel.NULL;
private boolean showCount = true;
UnicodePropertySource nameProp;
UnicodePropertySource name1Prop;
UnicodePropertySource catProp;
UnicodePropertySource shortCatProp;
public void setUnicodePropertySource(UnicodePropertySource source) {
public BagFormatter setUnicodePropertySource(UnicodeProperty.Factory source) {
this.source = source;
nameProp = ((UnicodePropertySource)source.clone())
.setPropertyAlias("Name");
name1Prop = ((UnicodePropertySource)source.clone())
.setPropertyAlias("Unicode_1_Name");
catProp = ((UnicodePropertySource)source.clone())
.setPropertyAlias("General_Category");
shortCatProp = ((UnicodePropertySource)source.clone())
.setPropertyAlias("General_Category")
.setNameChoice(UProperty.NameChoice.SHORT);
nameSource = new NameLabel(source);
return this;
}
{
setUnicodePropertySource(new UnicodePropertySource.ICU());
setUnicodePropertySource(ICUPropertyFactory.make());
Map labelMap = new HashMap();
labelMap.put("Lo","L&");
labelMap.put("Lu","L&");
labelMap.put("Lt","L&");
setLabelSource(new UnicodePropertySource.ICU()
.setPropertyAlias("General_Category")
.setNameChoice(UProperty.NameChoice.SHORT)
.setFilter(
new UnicodePropertySource.MapFilter().setMap(labelMap)));
setLabelSource(new UnicodeProperty.FilteredUnicodeProperty(
source.getProperty("General_Category"),
new UnicodeProperty.MapFilter(labelMap)));
}
public String join(Object o) {
return labelVisitor.join(o);
}
// ===== PRIVATES =====
private Visitor.Join labelVisitor = new Visitor.Join();
private Join labelVisitor = new Join();
private boolean mergeRanges = true;
private Transliterator showLiteral = null;
@ -375,36 +416,38 @@ public class BagFormatter {
}
}
private Tabber singleTabber =
new Tabber.MonoTabber(
new int[] {
0,
Tabber.LEFT,
6,
Tabber.LEFT,
10,
Tabber.LEFT,
14,
Tabber.LEFT });
private Tabber rangeTabber =
new Tabber.MonoTabber(
new int[] {
0,
Tabber.LEFT,
14,
Tabber.LEFT,
18,
Tabber.LEFT,
27,
Tabber.LEFT,
34,
Tabber.LEFT });
private static NumberFormat nf =
NumberFormat.getIntegerInstance(Locale.ENGLISH);
private String lineSeparator = "\r\n";
private class MyVisitor extends Visitor {
PrintWriter output;
private PrintWriter output;
Tabber.MonoTabber myTabber;
String commentSeparator = "\t# ";
public void doAt(Object c, PrintWriter output) {
this.output = output;
myTabber = new Tabber.MonoTabber();
int valueSize = valueSource.getMaxWidth(shortValue);
if (valueSize > 0) valueSize += 2;
if (!mergeRanges) {
myTabber.add(0,Tabber.LEFT);
myTabber.add(6 + valueSize,Tabber.LEFT);
myTabber.add(2 + labelSource.getMaxWidth(shortLabel),Tabber.LEFT);
myTabber.add(4,Tabber.LEFT);
} else {
myTabber.add(0,Tabber.LEFT);
myTabber.add(15 + valueSize,Tabber.LEFT);
myTabber.add(2 + labelSource.getMaxWidth(shortLabel),Tabber.LEFT);
myTabber.add(11,Tabber.LEFT);
myTabber.add(7,Tabber.LEFT);
}
commentSeparator = (showCount || showLiteral != null
|| labelSource != UnicodeProperty.NULL || nameSource != UnicodeProperty.NULL)
? "\t# " : "";
doAt(c);
}
public String format(Object o) {
StringWriter sw = new StringWriter();
@ -418,7 +461,7 @@ public class BagFormatter {
protected void doBefore(Object container, Object o) {
if (showSetAlso && container instanceof UnicodeSet) {
output.println("# " + container);
output.print("# " + container + lineSeparator);
}
}
@ -426,7 +469,7 @@ public class BagFormatter {
}
protected void doAfter(Object container, Object o) {
output.println("# Total: " + nf.format(count(container)));
output.print("# Total: " + nf.format(count(container)) + lineSeparator);
}
protected void doSimpleAt(Object o) {
@ -443,35 +486,40 @@ public class BagFormatter {
doAt((Visitor.CodePointRange) o);
} else {
String thing = o.toString();
output.println(
singleTabber.process(
output.print(
myTabber.process(
hex(thing)
+ " \t# "
+ commentSeparator
+ insertLiteral(thing)
+ " \t"
+ getName(thing)));
+ "\t"
+ getName(thing))
+ lineSeparator);
}
}
protected void doAt(Visitor.CodePointRange usi) {
if (!mergeRanges) {
for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {
String label = labelSource.getPropertyValue(cp);
String label = labelSource.getValue(cp, shortLabel);
if (label.length() != 0)
label += " ";
output.println(
singleTabber.process(
String value = valueSource.getValue(cp, shortValue);
if (value.length() != 0) {
value = "; " + value;
}
output.print(
myTabber.process(
Utility.hex(cp, 4)
+ " \t# "
+ value
+ commentSeparator
+ label
+ insertLiteral(cp)
+ " \t"
+ getName(cp)));
+ insertLiteral(cp,cp)
+ getName("\t", cp, cp))
+ lineSeparator);
}
} else {
rf.reset(usi.codepoint, usi.codepointEnd + 1);
String label;
while ((label = rf.next()) != null) {
while (rf.next()) {
/*
String label = (usi.codepoint != usi.codepointEnd)
? label = getLabels(usi.codepoint, usi.codepointEnd)
@ -479,31 +527,24 @@ public class BagFormatter {
*/
int start = rf.start;
int end = rf.limit - 1;
String label = rf.label;
if (label.length() != 0)
label += " ";
output.println(
rangeTabber.process(
Utility.hex(start, 4)
+ ((start != end)
? (".." + Utility.hex(end, 4))
: "")
+ " \t# "
String value = rf.value;
if (value.length() != 0) {
value = "; " + value;
}
String count = showCount ? "\t["+ nf.format(end - start + 1)+ "]" : "";
output.print(
myTabber.process(
hex(start, end)
+ value
+ commentSeparator
+ label
+ " \t["
+ nf.format(end - start + 1)
+ "]"
+ count
+ insertLiteral(start, end)
+ " \t"
+ getName(start)
+ ((start != end)
? (".."
+ (abbreviated
? getAbbreviatedName(
getName(end),
getName(start),
"~")
: getName(end)))
: "")));
+ getName("\t", start, end))
+ lineSeparator);
}
}
}
@ -521,11 +562,12 @@ public class BagFormatter {
: "")
+ ") ");
}
/*
private String insertLiteral(int cp) {
return (showLiteral == null ? ""
: " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") ");
}
*/
}
/**
@ -581,44 +623,67 @@ public class BagFormatter {
private class RangeFinder {
int start, limit;
private int veryLimit;
void reset(int start, int end) {
String label, value;
void reset(int start, int limit) {
this.limit = start;
this.veryLimit = end;
this.veryLimit = limit;
}
String next() {
boolean next() {
if (limit >= veryLimit)
return null;
start = limit;
String label = labelSource.getPropertyValue(limit++);
for (; limit < veryLimit; ++limit) {
String s = labelSource.getPropertyValue(limit);
if (!s.equals(label))
break;
return false;
start = limit; // set to end of last
label = labelSource.getValue(limit, shortLabel);
value = valueSource.getValue(limit, shortLabel);
limit++;
for (; limit < veryLimit; limit++) {
String s = labelSource.getValue(limit, shortLabel);
String v = valueSource.getValue(limit, shortLabel);
if (!s.equals(label) || !v.equals(value)) break;
}
return label;
// at this point, limit is the first item that has a different label than source
// OR, we got to the end, and limit == veryLimit
return true;
}
}
boolean shortLabel = true;
boolean shortValue = true;
public String getPrefix() {
return prefix;
}
public String getSuffix() {
return suffix;
}
public BagFormatter setPrefix(String string) {
prefix = string;
return this;
}
public BagFormatter setSuffix(String string) {
suffix = string;
return this;
}
public boolean isAbbreviated() {
return abbreviated;
}
public void setAbbreviated(boolean b) {
public BagFormatter setAbbreviated(boolean b) {
abbreviated = b;
return this;
}
public UnicodePropertySource getSource() {
public UnicodeProperty.Factory getSource() {
return source;
}
public UnicodePropertySource getLabelSource() {
public UnicodeLabel getLabelSource() {
return labelSource;
}
public void setLabelSource(UnicodePropertySource source) {
labelSource = source;
}
/**
* @deprecated
*/
@ -629,7 +694,7 @@ public class BagFormatter {
// UTILITIES
public static final Transliterator hex = Transliterator.getInstance(
"[^\\u0021-\\u007E\\u00A0-\\u00FF] hex");
"[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex");
public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException {
return openReader(dir,filename,"UTF-8");
@ -670,20 +735,117 @@ public class BagFormatter {
public static PrintWriter getLog() {
return log;
}
public static void setLog(PrintWriter writer) {
public BagFormatter setLog(PrintWriter writer) {
log = writer;
return this;
}
public String getSeparator() {
return separator;
}
public void setSeparator(String string) {
public BagFormatter setSeparator(String string) {
separator = string;
return this;
}
public Transliterator getShowLiteral() {
return showLiteral;
}
public void setShowLiteral(Transliterator transliterator) {
public BagFormatter setShowLiteral(Transliterator transliterator) {
showLiteral = transliterator;
return this;
}
// ===== CONVENIENCES =====
private class Join extends Visitor {
StringBuffer output = new StringBuffer();
int depth = 0;
String join (Object o) {
output.setLength(0);
doAt(o);
return output.toString();
}
protected void doBefore(Object container, Object item) {
++depth;
output.append(prefix);
}
protected void doAfter(Object container, Object item) {
output.append(suffix);
--depth;
}
protected void doBetween(Object container, Object lastItem, Object nextItem) {
output.append(separator);
}
protected void doSimpleAt(Object o) {
if (o != null) output.append(o.toString());
}
}
/**
* @return
*/
public String getLineSeparator() {
return lineSeparator;
}
/**
* @param string
*/
public void setLineSeparator(String string) {
lineSeparator = string;
}
/**
* @param label
*/
public BagFormatter setLabelSource(UnicodeLabel label) {
if (label == null) label = UnicodeLabel.NULL;
labelSource = label;
return this;
}
/**
* @return
*/
public UnicodeLabel getNameSource() {
return nameSource;
}
/**
* @param label
*/
public BagFormatter setNameSource(UnicodeLabel label) {
if (label == null) label = UnicodeLabel.NULL;
nameSource = label;
return this;
}
/**
* @return
*/
public UnicodeLabel getValueSource() {
return valueSource;
}
/**
* @param label
*/
public BagFormatter setValueSource(UnicodeLabel label) {
if (label == null) label = UnicodeLabel.NULL;
valueSource = label;
return this;
}
/**
* @return
*/
public boolean isShowCount() {
return showCount;
}
/**
* @param b
*/
public BagFormatter setShowCount(boolean b) {
showCount = b;
return this;
}
}

View file

@ -0,0 +1,390 @@
/*
*******************************************************************************
* Copyright (C) 2002-2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java,v $
* $Date: 2004/02/07 00:59:26 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Locale;
import java.util.Map;
import java.util.List;
import java.util.Arrays;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
/**
* Provides a general interface for Unicode Properties, and
* extracting sets based on those values.
* @author Davis
*/
public class ICUPropertyFactory extends UnicodeProperty.Factory {
public static class RegexMatcher implements UnicodeProperty.Matcher {
private Matcher matcher;
public UnicodeProperty.Matcher set(String pattern) {
matcher = Pattern.compile(pattern).matcher("");
return this;
}
public boolean matches(String value) {
matcher.reset(value);
return matcher.matches();
}
}
static class ICUProperty extends UnicodeProperty {
protected int propEnum = Integer.MIN_VALUE;
protected ICUProperty(String propName, int propEnum) {
this.propEnum = propEnum;
setName(propName);
setType(internalGetPropertyType(propEnum));
}
boolean shownException = false;
public String getValue(int codePoint) {
if (propEnum < UProperty.INT_LIMIT) {
int enumValue = -1;
String value = null;
try {
enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
if (enumValue >= 0) value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
} catch (IllegalArgumentException e) {
if (!shownException) {
System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint));
shownException = true;
}
}
return value != null ? value : String.valueOf(enumValue);
} else if (propEnum < UProperty.DOUBLE_LIMIT) {
double num = UCharacter.getUnicodeNumericValue(codePoint);
if (num == UCharacter.NO_NUMERIC_VALUE) return null;
return Double.toString(num);
// TODO: Fix HACK -- API deficient
} else switch(propEnum) {
case UProperty.AGE: String temp = UCharacter.getAge(codePoint).toString();
if (temp.equals("0.0.0.0")) return "UNSPECIFIED";
if (temp.endsWith(".0.0")) return temp.substring(0,temp.length()-4);
return temp;
case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(UCharacter.getMirror(codePoint));
case UProperty.CASE_FOLDING: return UCharacter.foldCase(UTF16.valueOf(codePoint),true);
case UProperty.ISO_COMMENT: return UCharacter.getISOComment(codePoint);
case UProperty.LOWERCASE_MAPPING: return UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint));
case UProperty.NAME: return UCharacter.getName(codePoint);
case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(UCharacter.foldCase(codePoint,true));
case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(UCharacter.toLowerCase(codePoint));
case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(UCharacter.toTitleCase(codePoint));
case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(UCharacter.toUpperCase(codePoint));
case UProperty.TITLECASE_MAPPING: return UCharacter.toTitleCase(Locale.ENGLISH,UTF16.valueOf(codePoint),null);
case UProperty.UNICODE_1_NAME: return UCharacter.getName1_0(codePoint);
case UProperty.UPPERCASE_MAPPING: return UCharacter.toUpperCase(Locale.ENGLISH,UTF16.valueOf(codePoint));
case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC);
case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD);
case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC);
case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD);
case isNFC: return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFC).equals(UTF16.valueOf(codePoint)));
case isNFD: return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFD).equals(UTF16.valueOf(codePoint)));
case isNFKC: return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFKC).equals(UTF16.valueOf(codePoint)));
case isNFKD: return String.valueOf(Normalizer.normalize(codePoint, Normalizer.NFKD).equals(UTF16.valueOf(codePoint)));
case isLowercase: return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint)).equals(UTF16.valueOf(codePoint)));
case isUppercase: return String.valueOf(UCharacter.toUpperCase(Locale.ENGLISH,UTF16.valueOf(codePoint)).equals(UTF16.valueOf(codePoint)));
case isTitlecase: return String.valueOf(UCharacter.toTitleCase(Locale.ENGLISH,UTF16.valueOf(codePoint),null).equals(UTF16.valueOf(codePoint)));
case isCasefolded: return String.valueOf(UCharacter.foldCase(UTF16.valueOf(codePoint),true).equals(UTF16.valueOf(codePoint)));
case isCased: return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint)).equals(UTF16.valueOf(codePoint)));
}
return null;
}
public Collection getAvailableValueAliases(Collection result) {
if (result == null) result = new ArrayList();
if (propEnum < UProperty.INT_LIMIT) {
if (Binary_Extras.isInRange(propEnum)) {
propEnum = UProperty.BINARY_START; // HACK
}
int start = UCharacter.getIntPropertyMinValue(propEnum);
int end = UCharacter.getIntPropertyMaxValue(propEnum);
for (int i = start; i <= end; ++i) {
String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG);
String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT);
if (alias == null) {
alias = alias2;
}
//System.out.println(propertyAlias + "\t" + i + ":\t" + alias);
if (alias != null && !result.contains(alias)) result.add(alias);
}
} else {
String alias = getFixedValueAlias(null, -1,UProperty.NameChoice.LONG);
if (alias != null && !result.contains(alias)) result.add(alias);
}
return result;
}
/**
* @param valueAlias null if unused.
* @param valueEnum -1 if unused
* @param nameChoice
* @return
*/
private String getFixedValueAlias(String valueAlias, int valueEnum, int nameChoice) {
if (propEnum >= UProperty.STRING_START) {
if (nameChoice != UProperty.NameChoice.LONG) return null;
return "<string>";
} else if (propEnum >= UProperty.DOUBLE_START) {
if (nameChoice != UProperty.NameChoice.LONG) return null;
return "<number>";
}
if (valueAlias != null && !valueAlias.equals("<integer>")) {
valueEnum = UCharacter.getPropertyValueEnum(propEnum,valueAlias);
}
// because these are defined badly, there may be no normal (long) name.
// if there is
String result = fixedGetPropertyValueName(propEnum, valueEnum, nameChoice);
if (result != null) return result;
// HACK try other namechoice
if (nameChoice == UProperty.NameChoice.LONG) {
result = fixedGetPropertyValueName(propEnum,valueEnum, UProperty.NameChoice.SHORT);
if (result != null) return result;
return "<integer>";
}
return null;
}
private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
try {
return UCharacter.getPropertyValueName(propEnum,valueEnum,nameChoice);
} catch (Exception e) {
return null;
}
}
public Collection getAliases(Collection result) {
if (result == null) result = new ArrayList();
String alias = String_Extras.get(propEnum);
if (alias == null) alias = Binary_Extras.get(propEnum);
if (alias != null) {
if (!result.contains(alias)) result.add(alias);
} else {
try {
for (int nameChoice = 0; ; ++nameChoice) {
alias = UCharacter.getPropertyName(propEnum, nameChoice);
if (alias == null) break;
if (nameChoice > 2) {
System.out.println("Something wrong");
}
if (!result.contains(alias)) result.add(alias);
}
} catch (IllegalArgumentException e) {
// ok, continue
}
}
return result;
}
public Collection getValueAliases(String valueAlias, Collection result) {
if (result == null) result = new ArrayList();
for (int nameChoice = 0; ; ++nameChoice) {
String alias = getFixedValueAlias(valueAlias, -1, nameChoice);
if (nameChoice > 2) break;
if (alias == null) continue;
if (!result.contains(alias)) result.add(alias);
}
return result;
}
/* (non-Javadoc)
* @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
*/
private int internalGetPropertyType(int propEnum) {
switch(propEnum) {
//case UProperty.AGE:
//case UProperty.NAME:
//case UProperty.UNICODE_1_NAME:
case UProperty.BIDI_MIRRORING_GLYPH:
case UProperty.CASE_FOLDING:
case UProperty.ISO_COMMENT:
case UProperty.LOWERCASE_MAPPING:
case UProperty.SIMPLE_CASE_FOLDING:
case UProperty.SIMPLE_LOWERCASE_MAPPING:
case UProperty.SIMPLE_TITLECASE_MAPPING:
case UProperty.SIMPLE_UPPERCASE_MAPPING:
case UProperty.TITLECASE_MAPPING:
case UProperty.UPPERCASE_MAPPING:
return UnicodeProperty.EXTENDED_STRING;
}
if (propEnum < UProperty.BINARY_START) return UnicodeProperty.UNKNOWN;
if (propEnum < UProperty.BINARY_LIMIT) return UnicodeProperty.BINARY;
if (propEnum < UProperty.INT_START) return UnicodeProperty.EXTENDED_BINARY;
if (propEnum < UProperty.INT_LIMIT) return UnicodeProperty.ENUMERATED;
if (propEnum < UProperty.DOUBLE_START) return UnicodeProperty.EXTENDED_ENUMERATED;
if (propEnum < UProperty.DOUBLE_LIMIT) return UnicodeProperty.NUMERIC;
if (propEnum < UProperty.STRING_START) return UnicodeProperty.EXTENDED_NUMERIC;
if (propEnum < UProperty.STRING_LIMIT) return UnicodeProperty.STRING;
return UnicodeProperty.EXTENDED_STRING;
}
}
/*{
matchIterator = new UnicodeSetIterator(
new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]"));
}*/
/*
* Other Missing Functions:
Expands_On_NFC
Expands_On_NFD
Expands_On_NFKC
Expands_On_NFKD
Composition_Exclusion
Decomposition_Mapping
FC_NFKC_Closure
ISO_Comment
NFC_Quick_Check
NFD_Quick_Check
NFKC_Quick_Check
NFKD_Quick_Check
Special_Case_Condition
Unicode_Radical_Stroke
*/
static final Names Binary_Extras = new Names(UProperty.BINARY_LIMIT,
new String[] {
"isNFC", "isNFD", "isNFKC", "isNFKD",
"isLowercase", "isUppercase", "isTitlecase", "isCasefolded", "isCased",
});
static final Names String_Extras = new Names(UProperty.STRING_LIMIT,
new String[] {
"toNFC", "toNFD", "toNFKC", "toNKFD",
});
static final int
isNFC = UProperty.BINARY_LIMIT,
isNFD = UProperty.BINARY_LIMIT+1,
isNFKC = UProperty.BINARY_LIMIT+2,
isNFKD = UProperty.BINARY_LIMIT+3,
isLowercase = UProperty.BINARY_LIMIT+4,
isUppercase = UProperty.BINARY_LIMIT+5,
isTitlecase = UProperty.BINARY_LIMIT+6,
isCasefolded = UProperty.BINARY_LIMIT+7,
isCased = UProperty.BINARY_LIMIT+8,
NFC = UProperty.STRING_LIMIT,
NFD = UProperty.STRING_LIMIT+1,
NFKC = UProperty.STRING_LIMIT+2,
NFKD = UProperty.STRING_LIMIT+3
;
private ICUPropertyFactory() {
Collection c = getInternalAvailablePropertyAliases(new TreeSet());
Iterator it = c.iterator();
while (it.hasNext()) {
add(getInternalProperty((String)it.next()));
}
}
private static ICUPropertyFactory singleton = null;
public static synchronized ICUPropertyFactory make() {
if (singleton != null) return singleton;
singleton = new ICUPropertyFactory();
return singleton;
}
public Collection getInternalAvailablePropertyAliases(Collection result) {
int[][] ranges = {
{UProperty.BINARY_START, UProperty.BINARY_LIMIT},
{UProperty.INT_START, UProperty.INT_LIMIT},
{UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT},
{UProperty.STRING_START, UProperty.STRING_LIMIT},
};
for (int i = 0; i < ranges.length; ++i) {
for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
String alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG);
if (!result.contains(alias)) result.add(alias);
}
}
result.addAll(String_Extras.getNames());
result.addAll(Binary_Extras.getNames());
return result;
}
public UnicodeProperty getInternalProperty(String propertyAlias) {
int propEnum;
main:
{
int possibleItem = Binary_Extras.get(propertyAlias);
if (possibleItem >= 0) {
propEnum = possibleItem;
break main;
}
possibleItem = String_Extras.get(propertyAlias);
if (possibleItem >= 0) {
propEnum = possibleItem;
break main;
}
propEnum = UCharacter.getPropertyEnum(propertyAlias);
}
return new ICUProperty(propertyAlias, propEnum);
}
/* (non-Javadoc)
* @see com.ibm.icu.dev.test.util.UnicodePropertySource#getProperty(java.lang.String)
*/
// TODO file bug on getPropertyValueName for Canonical_Combining_Class
public static class Names {
private String[] names;
private int base;
public Names(int base, String[] names) {
this.base = base;
this.names = names;
}
public int get(String name) {
for (int i = 0; i < names.length; ++i) {
if (name.equalsIgnoreCase(names[i])) return base + i;
}
return -1;
}
public String get(int number) {
number -= base;
if (number < 0 || names.length <= number) return null;
return names[number];
}
public boolean isInRange(int number) {
number -= base;
return (0 <= number && number < names.length);
}
public List getNames() {
return Arrays.asList(names);
}
}
}

View file

@ -6,13 +6,16 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Tabber.java,v $
* $Date: 2003/12/20 03:06:53 $
* $Revision: 1.2 $
* $Date: 2004/02/07 00:59:26 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
package com.ibm.icu.dev.test.util;
import java.util.ArrayList;
import java.util.List;
public abstract class Tabber {
static final byte LEFT = 0, CENTER = 1, RIGHT = 2;
@ -36,61 +39,130 @@ public abstract class Tabber {
public String process(String source) {
StringBuffer result = new StringBuffer();
int lastPos = 0;
int count = 0;
while (lastPos < source.length()) {
for (int count = 0; lastPos < source.length(); ++count) {
int pos = source.indexOf('\t', lastPos);
if (pos < 0) pos = source.length();
process_field(count, source, lastPos, pos, result);
lastPos = pos+1;
++count; // skip type
}
if (lastPos < source.length()) {
result.append(source.substring(lastPos));
}
return result.toString();
return prefix + result.toString() + postfix;
}
private String prefix = "";
private String postfix = "";
public abstract void process_field(int count, String source, int start, int limit, StringBuffer output);
public static class MonoTabber extends Tabber {
private int[] tabs;
private List stops = new ArrayList();
private List types = new ArrayList();
public MonoTabber(int[] tabs) {
this.tabs = (int[]) tabs.clone();
public void addAbsolute(int tabPos, int type) {
stops.add(new Integer(tabPos));
types.add(new Integer(type));
}
public void add(int fieldWidth, byte type) {
int last = getStop(stops.size()-1);
stops.add(new Integer(last + fieldWidth));
types.add(new Integer(type));
}
public int getStop(int fieldNumber) {
if (fieldNumber < 0) return 0;
return ((Integer)stops.get(fieldNumber)).intValue();
}
/*
public String process(String source) {
StringBuffer result = new StringBuffer();
int lastPos = 0;
int count = 0;
while (lastPos < source.length() && count < tabs.length) {
for (count = 0; lastPos < source.length() && count < stops.size(); count++) {
int pos = source.indexOf('\t', lastPos);
if (pos < 0) pos = source.length();
String piece = source.substring(lastPos, pos);
if (result.length() < tabs[count]) {
result.append(repeat(" ", tabs[count] - result.length()));
int stopPos = getStop(count);
if (result.length() < stopPos) {
result.append(repeat(" ", stopPos - result.length()));
// TODO fix type
}
result.append(piece);
lastPos = pos+1;
count += 2; // skip type
}
if (lastPos < source.length()) {
result.append(source.substring(lastPos));
}
return result.toString();
}
*/
public void process_field(int count, String source, int start, int limit, StringBuffer output) {
String piece = source.substring(start, limit);
if (output.length() < tabs[count*2]) {
output.append(repeat(" ", tabs[count*2] - output.length()));
int pos = getStop(count);
if (output.length() < pos) {
output.append(repeat(" ", pos - output.length()));
// TODO fix type
} else {
output.append(" ");
}
output.append(piece);
}
}
public static class HTMLTabber extends Tabber {
private List parameters = new ArrayList();
{
setPrefix("<tr>");
setPostfix("</tr>");
}
public void setParameters(int count, String params) {
parameters.set(count,params);
}
public void process_field(int count, String source, int start, int limit, StringBuffer output) {
output.append("<td");
String params = null;
if (count < parameters.size()) params = (String) parameters.get(count);
if (params != null) {
output.append(' ');
output.append(params);
}
output.append(">");
output.append(source.substring(start, limit));
// TODO Quote string
output.append("</td>");
}
}
/**
* @return
*/
public String getPostfix() {
return postfix;
}
/**
* @return
*/
public String getPrefix() {
return prefix;
}
/**
* @param string
*/
public Tabber setPostfix(String string) {
postfix = string;
return this;
}
/**
* @param string
*/
public Tabber setPrefix(String string) {
prefix = string;
return this;
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java,v $
* $Date: 2004/01/27 23:13:13 $
* $Revision: 1.6 $
* $Date: 2004/02/07 00:59:25 $
* $Revision: 1.7 $
*
*****************************************************************************************
*/
@ -14,6 +14,8 @@ package com.ibm.icu.dev.test.util;
// TODO integrate this into the test framework
import java.util.ArrayList;
import java.util.Collection;
import java.util.TreeSet;
import java.util.Iterator;
import java.io.IOException;
@ -28,26 +30,30 @@ import com.ibm.icu.text.Transliterator;
import com.ibm.icu.text.UnicodeSet;
public class TestBagFormatter {
static final void generatePropertyAliases(boolean showValues) {
UnicodePropertySource ups = new UnicodePropertySource.ICU().setNameChoice(UProperty.NameChoice.SHORT);
Collator order = Collator.getInstance(Locale.ENGLISH);
UnicodeProperty.Factory ups = ICUPropertyFactory.make();
TreeSet props = new TreeSet(order);
TreeSet values = new TreeSet(order);
ups.getAvailablePropertyAliases(props);
Collection aliases = new ArrayList();
BagFormatter bf = new BagFormatter();
ups.getAvailableAliases(props);
Iterator it = props.iterator();
while (it.hasNext()) {
String propAlias = (String)it.next();
ups.setPropertyAlias(propAlias);
UnicodeProperty up = ups.getProperty(propAlias);
System.out.println();
System.out.println(propAlias + ";\t" + ups.getPropertyAlias(UProperty.NameChoice.LONG));
aliases.clear();
System.out.println(bf.join(up.getAliases(aliases)));
if (!showValues) continue;
values.clear();
ups.getAvailablePropertyValueAliases(values);
up.getAvailableValueAliases(values);
Iterator it2 = values.iterator();
while (it2.hasNext()) {
String valueAlias = (String)it2.next();
System.out.println("\t" + valueAlias
+ ";\t" + ups.getPropertyValueAlias(valueAlias, UProperty.NameChoice.LONG));
aliases.clear();
System.out.println("\t" + bf.join(up.getValueAliases(valueAlias, aliases)));
}
}
}
@ -66,6 +72,10 @@ public class TestBagFormatter {
us = new UnicodeSet("[:numeric_type=numeric:]");
bf.showSetNames(BagFormatter.CONSOLE,"[:numeric_type=numeric:]", us);
UnicodeProperty.Factory ups = ICUPropertyFactory.make();
us = ups.getSet("gc=mn", null, null);
bf.showSetNames(bf.CONSOLE,"gc=mn", us);
if (true) return;
//showNames("Name", ".*MARK.*");
//showNames("NFD", "a.+");

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Tokenizer.java,v $
* $Date: 2003/12/20 03:06:53 $
* $Revision: 1.3 $
* $Date: 2004/02/07 00:59:25 $
* $Revision: 1.4 $
*
*****************************************************************************************
*/
@ -115,7 +115,7 @@ public class Tokenizer {
case UNICODESET:
return s+"n=" + getUnicodeSet() + s;
default:
return s+"c=" + usf.getName(type) + s;
return s+"c=" + usf.getName(type,true) + s;
}
}

View file

@ -0,0 +1,37 @@
package com.ibm.icu.dev.test.util;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.UTF16;
public abstract class UnicodeLabel {
public abstract String getValue(int codepoint, boolean isShort);
public String getValue(String s, String separator, boolean withCodePoint) {
if (s.length() == 1) { // optimize simple case
return getValue(s.charAt(0), withCodePoint);
}
StringBuffer sb = new StringBuffer();
int cp;
for (int i = 0; i < s.length(); i+=UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s,i);
if (i != 0) sb.append(separator);
sb.append(getValue(cp, withCodePoint));
}
return sb.toString();
}
public int getMaxWidth(boolean isShort) {
return 0;
}
private static class Hex extends UnicodeLabel {
public String getValue(int codepoint, boolean isShort) {
if (isShort) return Utility.hex(codepoint,4);
return "U+" + Utility.hex(codepoint,4);
}
}
private static class Null extends UnicodeLabel {
public String getValue(int codepoint, boolean isShort) {
return "";
}
}
public static final UnicodeLabel NULL = new Null();
public static final UnicodeLabel HEX = new Hex();
}

View file

@ -0,0 +1,112 @@
package com.ibm.icu.dev.test.util;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import com.ibm.icu.text.UnicodeSet;
/**
* Class for mapping Unicode characters to values
* Much smaller storage than using HashMap.
* @author Davis
*/
// TODO Optimize using range map
public class UnicodeMap {
// TODO optimize
private HashMap objectToSet = new HashMap();
private UnicodeSet missing = new UnicodeSet(0,0x10FFFF);
/**
* Associates code point with value. Removes any previous association.
* @param codepoint
* @param value
* @return this, for chaining
*/
public UnicodeMap put(int codepoint, Object value) {
if (!missing.contains(codepoint)) {
// remove from wherever it is.
Iterator it = objectToSet.keySet().iterator();
while (it.hasNext()) {
UnicodeSet set = (UnicodeSet) objectToSet.get(it.next());
if (set.contains(codepoint)) {
set.remove(codepoint);
break;
}
}
missing.remove(codepoint);
}
UnicodeSet set = (UnicodeSet) objectToSet.get(value);
if (set == null) {
set = new UnicodeSet();
objectToSet.put(value,set);
}
set.add(codepoint);
return this;
}
/**
* Adds bunch o' codepoints; otherwise like add.
* @param codepoints
* @param value
* @return this, for chaining
*/
public UnicodeMap putAll(UnicodeSet codepoints, Object value) {
if (!missing.containsAll(codepoints)) {
// remove from wherever it is.
Iterator it = objectToSet.keySet().iterator();
while (it.hasNext()) {
UnicodeSet set = (UnicodeSet) objectToSet.get(it.next());
set.removeAll(codepoints);
}
missing.removeAll(codepoints);
}
UnicodeSet set = (UnicodeSet) objectToSet.get(value);
if (set == null) {
set = new UnicodeSet();
objectToSet.put(value,set);
}
set.addAll(codepoints);
return this;
}
/**
* Returns the set associated with a given value. Deposits into
* result if it is not null. Remember to clear if you just want
* the new values.
* @param value
* @param result
* @return result
*/
public UnicodeSet getSet(Object value, UnicodeSet result) {
if (result == null) result = new UnicodeSet();
UnicodeSet set = (UnicodeSet) objectToSet.get(value);
if (set != null) result.addAll(set);
return result;
}
/**
* Returns the list of possible values. Deposits into
* result if it is not null. Remember to clear if you just want
* @param result
* @return
*/
public Collection getAvailableValues(Collection result) {
if (result == null) result = new HashSet();
result.addAll(objectToSet.keySet());
return result;
}
/**
* Gets the value associated with a given code point.
* Returns null, if there is no such value.
* @param codepoint
* @return
*/
public Object getValue(int codepoint) {
if (missing.contains(codepoint)) return null;
Iterator it = objectToSet.keySet().iterator();
while (it.hasNext()) {
Object value = it.next();
UnicodeSet set = (UnicodeSet) objectToSet.get(value);
if (set.contains(codepoint)) return value;
}
return null;
}
}

View file

@ -0,0 +1,448 @@
package com.ibm.icu.dev.test.util;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
public abstract class UnicodeProperty extends UnicodeLabel {
private String propertyAlias;
private int type;
private Map mapToShortName = null;
public static final int UNKNOWN = 0,
BINARY = 2, EXTENDED_BINARY = 3,
ENUMERATED = 4, EXTENDED_ENUMERATED = 5,
NUMERIC = 6, EXTENDED_NUMERIC = 7,
STRING = 8, EXTENDED_STRING = 9,
LIMIT_TYPE = 10,
EXTENDED_BIT = 1;
private static final String[] TYPE_NAMES = {
"Unknown",
"Unknown",
"Binary",
"Extended Binary",
"Enumerated",
"Extended Enumerated",
"Numeric",
"Extended Numeric",
"String",
"Extended String",
};
public static String getTypeName(int propType) {
return TYPE_NAMES[propType];
}
public final String getName() {
return propertyAlias;
}
public final int getType() {
return type;
}
protected final void setName(String string) {
propertyAlias = string;
}
protected final void setType(int i) {
type = i;
}
public abstract String getValue(int codepoint);
public abstract Collection getAliases(Collection result);
public abstract Collection getValueAliases(String valueAlias, Collection result);
abstract public Collection getAvailableValueAliases(Collection result);
static public class Factory {
Map canonicalNames = new TreeMap();
Map skeletonNames = new TreeMap();
public final Factory add(UnicodeProperty sp) {
canonicalNames.put(sp.getName(), sp);
Collection c = sp.getAliases(new TreeSet());
Iterator it = c.iterator();
while (it.hasNext()) {
skeletonNames.put(toSkeleton((String)it.next()), sp);
}
return this;
}
public final UnicodeProperty getProperty(String propertyAlias) {
return (UnicodeProperty) skeletonNames.get(toSkeleton(propertyAlias));
}
public final Collection getAvailableAliases(Collection result) {
if (result == null) result = new ArrayList();
Iterator it = canonicalNames.keySet().iterator();
while (it.hasNext()) {
addUnique(it.next(), result);
}
return result;
}
public final Collection getAvailableAliases() {
return getAvailableAliases(null);
}
public final Collection getAvailablePropertyAliases(Collection result, int propertyTypeMask) {
Iterator it = canonicalNames.keySet().iterator();
while (it.hasNext()) {
UnicodeProperty property = (UnicodeProperty)it.next();
if (((1<<property.getType())& propertyTypeMask) == 0) continue;
addUnique(property.getName(), result);
}
return result;
}
/**
* Format is:
* propname ('=' | '!=') propvalue ( '|' propValue )*
*/
public final UnicodeSet getSet(String propAndValue, Matcher matcher, UnicodeSet result) {
int equalPos = propAndValue.indexOf('=');
String prop = propAndValue.substring(0,equalPos);
boolean negative = false;
if (prop.endsWith("!")) {
prop = prop.substring(0,prop.length()-1);
}
prop = prop.trim();
String value = propAndValue.substring(equalPos+1);
UnicodeProperty up = getProperty(prop);
if (matcher != null) {
return up.getSet(matcher.set(value), result);
}
return up.getSet(value,result);
}
public final UnicodeSet getSet(String propAndValue, Matcher matcher) {
return getSet(propAndValue, matcher, null);
}
public final UnicodeSet getSet(String propAndValue) {
return getSet(propAndValue, null, null);
}
}
static class FilteredUnicodeProperty extends UnicodeProperty {
UnicodeProperty property;
protected StringFilter filter;
protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(new UnicodeSet(0,0x10FFFF));
FilteredUnicodeProperty(UnicodeProperty property, StringFilter filter) {
this.property = property;
this.filter = filter;
}
public StringFilter getFilter() {
return filter;
}
public UnicodeProperty setFilter(StringFilter filter) {
this.filter = filter;
return this;
}
public Collection getAvailableValueAliases(Collection result) {
return property.getAvailableValueAliases(result);
}
public Collection getAliases(Collection result) {
return property.getAliases(result);
}
public String getValue(int codepoint) {
return filter.remap(property.getValue(codepoint));
}
public Collection getValueAliases(
String valueAlias,
Collection result) {
return property.getValueAliases(valueAlias, result);
}
}
public static class StringFilter implements Cloneable {
public String remap(String original) {
return original;
}
public Object clone() {
try {
return super.clone();
} catch (CloneNotSupportedException e) {
throw new InternalError("Should never happen.");
}
}
}
public static class MapFilter extends StringFilter {
private Map valueMap;
public MapFilter(Map valueMap){
this.valueMap = valueMap;
}
public String remap(String original) {
Object changed = valueMap.get(original);
return changed == null ? original : (String) changed;
}
public Map getMap() {
return valueMap;
}
}
public interface Matcher {
/**
* Must be able to handle null
* @param value
* @return
*/
public boolean matches(String value);
public Matcher set(String pattern);
}
public static class SimpleMatcher implements Matcher {
Comparator comparator;
String pattern;
public SimpleMatcher(String pattern, Comparator comparator) {
this.comparator = comparator;
this.pattern = pattern;
}
public boolean matches(String value) {
if (comparator == null) return pattern.equals(value);
return comparator.compare(pattern, value) == 0;
}
public Matcher set(String pattern) {
this.pattern = pattern;
return this;
}
}
public static abstract class SimpleProperty extends UnicodeProperty {
private String shortAlias;
Collection valueAliases = new ArrayList();
Map toAlternates = new HashMap();
protected void set(String alias, String shortAlias, int propertyType, String valueAlias) {
set(alias,shortAlias,propertyType,new String[]{valueAlias},null);
}
protected void set(String alias, String shortAlias, int propertyType,
String[] valueAliases, String[] alternateValueAliases) {
setName(alias);
setType(propertyType);
this.shortAlias = shortAlias;
this.valueAliases = Arrays.asList((Object[]) valueAliases.clone());
for (int i = 0; i < valueAliases.length; ++i) {
List a = new ArrayList();
addUnique(valueAliases[i],a);
if (alternateValueAliases != null) addUnique(alternateValueAliases[i],a);
toAlternates.put(valueAliases[i], a);
}
}
protected void set(String alias, String shortAlias, int propertyType,
Collection valueAliases) {
setName(alias);
setType(propertyType);
this.shortAlias = shortAlias;
this.valueAliases = new ArrayList(valueAliases);
}
public Collection getAliases(Collection result) {
if (result == null) result = new ArrayList();
addUnique(getName(), result);
addUnique(shortAlias, result);
return result;
}
public Collection getValueAliases(String valueAlias, Collection result) {
if (result == null) result = new ArrayList();
Collection a = (Collection) toAlternates.get(valueAlias);
if (a != null) result.addAll(valueAliases);
return result;
}
public Collection getAvailableValueAliases(Collection result) {
if (result == null) result = new ArrayList();
result.addAll(valueAliases);
return result;
}
}
public final String getValue(int codepoint, boolean getShortest) {
String result = getValue(codepoint);
if (!getShortest || result == null) return result;
if (mapToShortName == null) getValueCache();
return (String)mapToShortName.get(result);
}
private void getValueCache() {
maxWidth = 0;
mapToShortName = new HashMap();
Iterator it = getAvailableValueAliases(null).iterator();
while (it.hasNext()) {
String value = (String)it.next();
String shortest = value;
Iterator it2 = getValueAliases(value, null).iterator();
while (it2.hasNext()) {
String other = (String)it2.next();
if (shortest.length() > other.length()) shortest = other;
}
mapToShortName.put(value,shortest);
if (shortest.length() > maxWidth) maxWidth = shortest.length();
}
}
private int maxWidth = -1;
public final int getMaxWidth(boolean getShortest) {
if (maxWidth < 0) getValueCache();
return maxWidth;
}
public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
int type = getType();
return getSet(new SimpleMatcher(propertyValue,
type >= STRING ? null : new SkeletonComparator()),
result);
}
private UnicodeMap cacheValueToSet = null;
public final UnicodeSet getSet(Matcher matcher, UnicodeSet result) {
if (result == null) result = new UnicodeSet();
if (type >= STRING) {
for (int i = 0; i <= 0x10FFFF; ++i) {
String value = getValue(i);
if (matcher.matches(value)) {
result.add(i);
}
}
return result;
}
if (cacheValueToSet == null) {
cacheValueToSet = new UnicodeMap();
for (int i = 0; i <= 0x10FFFF; ++i) {
cacheValueToSet.put(i, getValue(i));
}
}
Collection temp = new HashSet(); // to avoid reallocating...
Iterator it = cacheValueToSet.getAvailableValues(null).iterator();
main:
while (it.hasNext()) {
String value = (String)it.next();
temp.clear();
Iterator it2 = getValueAliases(value,temp).iterator();
while (it2.hasNext()) {
String value2 = (String)it2.next();
if (matcher.matches(value2)
|| matcher.matches(toSkeleton(value2))) {
cacheValueToSet.getSet(value, result);
continue main;
}
}
}
return result;
}
/*
public UnicodeSet getMatchSet(UnicodeSet result) {
if (result == null) result = new UnicodeSet();
addAll(matchIterator, result);
return result;
}
public void setMatchSet(UnicodeSet set) {
matchIterator = new UnicodeSetIterator(set);
}
*/
public static Collection addUnique(Object obj, Collection result) {
if (obj != null && !result.contains(obj)) result.add(obj);
return result;
}
public static Collection addAllUnique(Collection source, Collection result) {
Iterator it = source.iterator();
while (it.hasNext()) {
Object obj = it.next();
if (obj != null && !result.contains(obj)) result.add(obj);
}
return result;
}
public static class SkeletonComparator implements Comparator {
public int compare(Object o1, Object o2) {
// TODO optimize
return toSkeleton((String)o1).compareTo(toSkeleton((String)o2));
}
}
private static String toSkeleton(String source) {
StringBuffer skeletonBuffer = new StringBuffer();
boolean gotOne = false;
// remove spaces, '_', '-'
// we can do this with char, since no surrogates are involved
for (int i = 0; i < source.length(); ++i) {
char ch = source.charAt(i);
if (ch == '_' || ch == ' ' || ch == '-') {
gotOne = true;
} else {
char ch2 = Character.toLowerCase(ch);
if (ch2 != ch) {
gotOne = true;
skeletonBuffer.append(ch2);
} else {
skeletonBuffer.append(ch);
}
}
}
if (!gotOne) return source; // avoid string creation
return skeletonBuffer.toString();
}
/**
* Utility function for comparing codepoint to string without
* generating new string.
* @param codepoint
* @param other
* @return
*/
public static final boolean equals(int codepoint, String other) {
if (other.length() == 1) {
return codepoint == other.charAt(0);
}
if (other.length() == 2) {
return other.equals(UTF16.valueOf(codepoint));
}
return false;
}
/**
* Utility that should be on UnicodeSet
* @param source
* @param result
*/
static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
while (source.nextRange()) {
if (source.codepoint == UnicodeSetIterator.IS_STRING) {
result.add(source.string);
} else {
result.add(source.codepoint, source.codepointEnd);
}
}
}
}

View file

@ -6,8 +6,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Visitor.java,v $
* $Date: 2003/12/20 03:06:54 $
* $Revision: 1.3 $
* $Date: 2004/02/07 00:59:24 $
* $Revision: 1.4 $
*
*****************************************************************************************
*/
@ -134,22 +134,4 @@ public abstract class Visitor {
abstract protected void doAfter(Object container, Object item);
abstract protected void doSimpleAt(Object o);
// ===== CONVENIENCES =====
static class Join extends Visitor {
StringBuffer output = new StringBuffer();
String join (Object o) {
output.setLength(0);
doAt(o);
return output.toString();
}
protected void doBefore(Object container, Object item) {}
protected void doAfter(Object container, Object item) {}
protected void doBetween(Object container, Object lastItem, Object nextItem) {
output.append(",");
}
protected void doSimpleAt(Object o) {
output.append(o.toString());
}
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $
* $Date: 2003/08/20 03:48:47 $
* $Revision: 1.11 $
* $Date: 2004/02/07 01:01:12 $
* $Revision: 1.12 $
*
*******************************************************************************
*/
@ -63,7 +63,7 @@ public class GenOverlap implements UCD_Types, UCA_Types {
CEList.main(null);
System.out.println("# Overlap");
System.out.println("# Generated " + new Date());
System.out.println("# Generated " + Default.getDate());
ucd = UCD.make();
@ -335,7 +335,7 @@ public class GenOverlap implements UCD_Types, UCA_Types {
CEList.main(null);
System.out.println("# Generate");
System.out.println("# Generated " + new Date());
System.out.println("# Generated " + Default.getDate());
ucd = UCD.make();
@ -533,7 +533,7 @@ public class GenOverlap implements UCD_Types, UCA_Types {
collator = collatorIn;
System.out.println("# Check Hash");
System.out.println("# Generated " + new Date());
System.out.println("# Generated " + Default.getDate());
ucd = UCD.make();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCharts.java,v $
* $Date: 2004/02/06 18:32:03 $
* $Revision: 1.18 $
* $Date: 2004/02/07 01:01:12 $
* $Revision: 1.19 $
*
*******************************************************************************
*/
@ -32,11 +32,11 @@ public class WriteCharts implements UCD_Types {
static boolean HACK_KANA = false;
static public void special() {
Default.setUCD();
for (int i = 0xE000; i < 0x10000; ++i) {
if (!Default.ucd.isRepresented(i)) continue;
if (!Default.nfkc.isNormalized(i)) continue;
System.out.println(Default.ucd.getCodeAndName(i));
if (!Default.ucd().isRepresented(i)) continue;
if (!Default.nfkc().isNormalized(i)) continue;
System.out.println(Default.ucd().getCodeAndName(i));
}
}
@ -109,7 +109,7 @@ public class WriteCharts implements UCD_Types {
int cp = UTF16.charAt(s,0);
byte script = Default.ucd.getScript(cp);
byte script = Default.ucd().getScript(cp);
// get first non-zero primary
int currentPrimary = getFirstPrimary(sortKey);
@ -140,7 +140,7 @@ public class WriteCharts implements UCD_Types {
++scriptCount[script+3];
if (scriptCount[script+3] > 1) {
System.out.println("\t\tFAIL: " + scriptCount[script+3] + ", " +
getChunkName(script, LONG) + ", " + Default.ucd.getCodeAndName(s));
getChunkName(script, LONG) + ", " + Default.ucd().getCodeAndName(s));
}
output = openFile(scriptCount[script+3], folder, script);
}
@ -190,19 +190,19 @@ public class WriteCharts implements UCD_Types {
String s,
byte script,
String classname) {
String name = Default.ucd.getName(s);
String name = Default.ucd().getName(s);
if (s.equals("\u1eaf")) {
System.out.println("debug");
}
String comp = Default.nfc.normalize(s);
int cat = Default.ucd.getCategory(UTF16.charAt(comp,0));
String comp = Default.nfc().normalize(s);
int cat = Default.ucd().getCategory(UTF16.charAt(comp,0));
if (cat == Mn || cat == Mc || cat == Me) {
comp = '\u25CC' + comp;
if (s.equals("\u0300")) {
System.out.println(Default.ucd.getCodeAndName(comp));
System.out.println(Default.ucd().getCodeAndName(comp));
}
}
// TODO: merge with showCell
@ -226,27 +226,26 @@ public class WriteCharts implements UCD_Types {
}
static public void normalizationChart() throws IOException {
Default.setUCD();
HACK_KANA = false;
Set set = new TreeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isRepresented(i)) {
if (!Default.ucd().isRepresented(i)) {
if (i < 0xAC00) continue;
if (i > 0xD7A3) continue;
if (i > 0xACFF && i < 0xD700) continue;
}
byte cat = Default.ucd.getCategory(i);
byte cat = Default.ucd().getCategory(i);
if (cat == Cs || cat == Co) continue;
if (Default.nfkd.isNormalized(i)) continue;
String decomp = Default.nfkd.normalize(i);
if (Default.nfkd().isNormalized(i)) continue;
String decomp = Default.nfkd().normalize(i);
byte script = getBestScript(decomp);
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
new Pair(Default.ucd.getCase(decomp, FULL, FOLD),
new Pair(Default.ucd().getCase(decomp, FULL, FOLD),
new Integer(i))));
}
@ -302,10 +301,10 @@ public class WriteCharts implements UCD_Types {
String prefix;
String code = UTF16.valueOf(cp);
String c = Default.nfc.normalize(cp);
String d = Default.nfd.normalize(cp);
String kc = Default.nfkc.normalize(cp);
String kd = Default.nfkd.normalize(cp);
String c = Default.nfc().normalize(cp);
String d = Default.nfd().normalize(cp);
String kc = Default.nfkc().normalize(cp);
String kd = Default.nfkd().normalize(cp);
showCell(output, code, "<td class='z' ", "", false);
@ -330,23 +329,22 @@ public class WriteCharts implements UCD_Types {
}
static public void caseChart() throws IOException {
Default.setUCD();
HACK_KANA = false;
Set set = new TreeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isRepresented(i)) continue;
byte cat = Default.ucd.getCategory(i);
if (!Default.ucd().isRepresented(i)) continue;
byte cat = Default.ucd().getCategory(i);
if (cat == Cs || cat == Co) continue;
String code = UTF16.valueOf(i);
String lower = Default.ucd.getCase(i, FULL, LOWER);
String title = Default.ucd.getCase(i, FULL, TITLE);
String upper = Default.ucd.getCase(i, FULL, UPPER);
String fold = Default.ucd.getCase(i, FULL, FOLD);
String lower = Default.ucd().getCase(i, FULL, LOWER);
String title = Default.ucd().getCase(i, FULL, TITLE);
String upper = Default.ucd().getCase(i, FULL, UPPER);
String fold = Default.ucd().getCase(i, FULL, FOLD);
String decomp = Default.nfkd.normalize(i);
String decomp = Default.nfkd().normalize(i);
int script = 0;
if (lower.equals(code) && upper.equals(code) && fold.equals(code) && title.equals(code)) {
if (!containsCase(decomp)) continue;
@ -356,7 +354,7 @@ public class WriteCharts implements UCD_Types {
if (script == 0) script = getBestScript(decomp);
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
new Pair(Default.ucd.getCase(decomp, FULL, FOLD),
new Pair(Default.ucd().getCase(decomp, FULL, FOLD),
new Integer(i))));
}
@ -425,10 +423,10 @@ public class WriteCharts implements UCD_Types {
String prefix;
String code = UTF16.valueOf(cp);
String lower = Default.ucd.getCase(cp, FULL, LOWER);
String title = Default.ucd.getCase(cp, FULL, TITLE);
String upper = Default.ucd.getCase(cp, FULL, UPPER);
String fold = Default.ucd.getCase(cp, FULL, FOLD);
String lower = Default.ucd().getCase(cp, FULL, LOWER);
String title = Default.ucd().getCase(cp, FULL, TITLE);
String upper = Default.ucd().getCase(cp, FULL, UPPER);
String fold = Default.ucd().getCase(cp, FULL, FOLD);
showCell(output, code, "<td class='z' ", "", false);
@ -453,19 +451,18 @@ public class WriteCharts implements UCD_Types {
}
static public void scriptChart() throws IOException {
Default.setUCD();
HACK_KANA = false;
Set set = new TreeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isRepresented(i)) continue;
byte cat = Default.ucd.getCategory(i);
if (!Default.ucd().isRepresented(i)) continue;
byte cat = Default.ucd().getCategory(i);
if (cat == Cs || cat == Co || cat == Cn) continue;
String code = UTF16.valueOf(i);
String decomp = Default.nfkd.normalize(i);
String decomp = Default.nfkd().normalize(i);
int script = getBestScript(decomp);
set.add(new Pair(new Integer(script == COMMON_SCRIPT ? cat + CAT_OFFSET : script),
@ -548,7 +545,6 @@ public class WriteCharts implements UCD_Types {
}
static public void indexChart() throws IOException {
Default.setUCD();
HACK_KANA = false;
Map map = new TreeMap();
@ -559,21 +555,21 @@ public class WriteCharts implements UCD_Types {
System.out.println("Stop-list: " + stoplist);
for (int i = 0; i < LIMIT_SCRIPT; ++i) {
stoplist.add(Default.ucd.getScriptID_fromIndex((byte)i));
stoplist.add(Default.ucd().getScriptID_fromIndex((byte)i));
}
System.out.println("Stop-list: " + stoplist);
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isRepresented(i)) continue;
if (!Default.ucd.isAssigned(i)) continue;
if (!Default.ucd().isRepresented(i)) continue;
if (!Default.ucd().isAssigned(i)) continue;
if (0xAC00 <= i && i <= 0xD7A3) continue;
if (Default.ucd.hasComputableName(i)) continue;
if (Default.ucd().hasComputableName(i)) continue;
String s = Default.ucd.getName(i);
String s = Default.ucd().getName(i);
if (s == null) continue;
if (s.startsWith("<")) {
System.out.println("Weird character at " + Default.ucd.getCodeAndName(i));
System.out.println("Weird character at " + Default.ucd().getCodeAndName(i));
}
String ch = UTF16.valueOf(i);
int last = -1;
@ -664,13 +660,13 @@ public class WriteCharts implements UCD_Types {
if (s.equals("\u0300")) {
System.out.println();
}
String name = Default.ucd.getName(s);
String comp = Default.nfc.normalize(s);
int cat = Default.ucd.getCategory(UTF16.charAt(comp,0));
String name = Default.ucd().getName(s);
String comp = Default.nfc().normalize(s);
int cat = Default.ucd().getCategory(UTF16.charAt(comp,0));
if (cat == Mn || cat == Mc || cat == Me) {
comp = '\u25CC' + comp;
if (s.equals("\u0300")) {
System.out.println(Default.ucd.getCodeAndName(comp));
System.out.println(Default.ucd().getCodeAndName(comp));
}
}
@ -691,7 +687,7 @@ public class WriteCharts implements UCD_Types {
byte result = COMMON_SCRIPT;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i);
result = Default.ucd.getScript(cp);
result = Default.ucd().getScript(cp);
if (result != COMMON_SCRIPT && result != INHERITED_SCRIPT) return result;
}
return COMMON_SCRIPT;
@ -780,9 +776,9 @@ public class WriteCharts implements UCD_Types {
case CJK_AB: return "CJK-Extensions";
case UNSUPPORTED: return "Unsupported";
default:
if (script >= CAT_OFFSET) return Default.ucd.getCategoryID_fromIndex((byte)(script - CAT_OFFSET), length);
if (script >= CAT_OFFSET) return Default.ucd().getCategoryID_fromIndex((byte)(script - CAT_OFFSET), length);
else if (script == HIRAGANA_SCRIPT && HACK_KANA) return length == SHORT ? "Kata-Hira" : "Katakana-Hiragana";
else return Default.ucd.getCase(Default.ucd.getScriptID_fromIndex((byte)script, length), FULL, TITLE);
else return Default.ucd().getCase(Default.ucd().getScriptID_fromIndex((byte)script, length), FULL, TITLE);
}
}
@ -816,8 +812,8 @@ public class WriteCharts implements UCD_Types {
gotOne = true;
}
indexFile.println("</p><hr width='50%'><p style='font-size: 70%'>");
indexFile.println("UCD: " + Default.ucd.getVersion() + extra);
indexFile.println("<br>" + df.format(new Date()) + " <a href='http://www.macchiato.com/' target='_top'>MED</a>");
indexFile.println("UCD: " + Default.ucd().getVersion() + extra);
indexFile.println("<br>" + Default.getDate() + " <a href='http://www.macchiato.com/' target='_top'>MED</a>");
indexFile.println("</p></body></html>");
indexFile.close();
}
@ -827,10 +823,10 @@ public class WriteCharts implements UCD_Types {
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i);
// contains Lu, Lo, Lt, or Lowercase or Uppercase
byte cat = Default.ucd.getCategory(cp);
byte cat = Default.ucd().getCategory(cp);
if (cat == Lu || cat == Ll || cat == Lt) return true;
if (Default.ucd.getBinaryProperty(cp, Other_Lowercase)) return true;
if (Default.ucd.getBinaryProperty(cp, Other_Uppercase)) return true;
if (Default.ucd().getBinaryProperty(cp, Other_Lowercase)) return true;
if (Default.ucd().getBinaryProperty(cp, Other_Uppercase)) return true;
}
return false;
}
@ -839,7 +835,6 @@ public class WriteCharts implements UCD_Types {
"any-addCircle", "([[:Mn:][:Me:]]) > \u25CC $1", Transliterator.FORWARD);
public static void writeCompositionChart() throws IOException {
Default.setUCD();
UCA uca = new UCA(null,"");
Set letters = new TreeSet();
@ -873,7 +868,7 @@ public class WriteCharts implements UCD_Types {
String scriptName = "";
try {
scriptName = Default.ucd.getScriptID_fromIndex(script);
scriptName = Default.ucd().getScriptID_fromIndex(script);
Utility.fixDot();
System.out.println(scriptName);
} catch (IllegalArgumentException e) {
@ -889,15 +884,15 @@ public class WriteCharts implements UCD_Types {
printed.clear();
for (int cp = 0; cp < 0x10FFFF; ++cp) {
byte type = Default.ucd.getCategory(cp);
if (type == Default.ucd.UNASSIGNED || type == Default.ucd.PRIVATE_USE) continue; // skip chaff
byte type = Default.ucd().getCategory(cp);
if (type == Default.ucd().UNASSIGNED || type == Default.ucd().PRIVATE_USE) continue; // skip chaff
Utility.dot(cp);
byte newScript = Default.ucd.getScript(cp);
byte newScript = Default.ucd().getScript(cp);
if (newScript != script) continue;
String source = UTF16.valueOf(cp);
String decomp = Default.nfd.normalize(source);
String decomp = Default.nfd().normalize(source);
if (decomp.equals(source)) continue;
// pick up all decompositions
@ -931,7 +926,7 @@ public class WriteCharts implements UCD_Types {
Iterator it2 = letters.iterator();
while (it2.hasNext()) {
String let = (String)it2.next();
out.println("<tr>" + showCell(Default.nfc.normalize(let), "class='h'"));
out.println("<tr>" + showCell(Default.nfc().normalize(let), "class='h'"));
Iterator it3 = marks.iterator();
while (it3.hasNext()) {
String mark = (String)it3.next();
@ -942,7 +937,7 @@ public class WriteCharts implements UCD_Types {
}
String comp;
try {
comp = Default.nfc.normalize(merge);
comp = Default.nfc().normalize(merge);
} catch (Exception e) {
System.out.println("Failed when trying to compose <" + Utility.hex(e) + ">");
continue;
@ -1027,14 +1022,13 @@ public class WriteCharts implements UCD_Types {
}
return "<td "
+ classType + (classType.length() != 0 ? " " : "")
+ "title='" + Utility.hex(comp) + " " + Default.ucd.getName(comp) + "'>" + addCircle.transliterate(comp)
+ "title='" + Utility.hex(comp) + " " + Default.ucd().getName(comp) + "'>" + addCircle.transliterate(comp)
+ "<br><tt>" + Utility.hex(comp) + "</tt></td>";
}
public static void writeAllocation() throws IOException {
Default.setUCD();
String[] names = new String[300]; // HACK, 300 is plenty for now. Fix if it ever gets larger
int[] starts = new int[names.length];
int[] ends = new int[names.length];
@ -1043,7 +1037,7 @@ public class WriteCharts implements UCD_Types {
int counter = 0;
int blockId = 0;
while (Default.ucd.getBlockData(blockId++, blockData)) {
while (Default.ucd().getBlockData(blockId++, blockData)) {
names[counter] = blockData.name;
starts[counter] = blockData.start;
ends[counter] = blockData.end;
@ -1094,7 +1088,7 @@ public class WriteCharts implements UCD_Types {
int total = ends[i] - starts[i] + 1;
int alloc = 0;
for (int j = starts[i]; j <= ends[i]; ++j) {
if (Default.ucd.isAllocated(j)) ++alloc;
if (Default.ucd().isAllocated(j)) ++alloc;
}
//System.out.println(names[i] + "\t" + alloc + "\t" + total);
String color = names[i].indexOf("Surrogates") >= 0 ? "#FF0000"

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
* $Date: 2004/01/16 01:22:26 $
* $Revision: 1.38 $
* $Date: 2004/02/07 01:01:11 $
* $Revision: 1.39 $
*
*******************************************************************************
*/
@ -115,9 +115,9 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
//if (0xA000 <= a && a <= 0xA48F) continue; // skip YI
String b = Case.fold(a);
String c = Default.nfkc.normalize(b);
String c = Default.nfkc().normalize(b);
String d = Case.fold(c);
String e = Default.nfkc.normalize(d);
String e = Default.nfkc().normalize(d);
if (!e.equals(c)) {
System.out.println(Utility.hex(a) + "; " + Utility.hex(d, " ") + " # " + ucd.getName(a));
/*
@ -135,7 +135,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
*/
}
String f = Case.fold(e);
String g = Default.nfkc.normalize(f);
String g = Default.nfkc().normalize(f);
if (!f.equals(d) || !g.equals(e)) System.out.println("!!!!!!SKY IS FALLING!!!!!!");
}
}
@ -204,9 +204,9 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
for (char c = 0; c < 0xFFFF; ++c) {
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
if (0xAC00 <= c && c <= 0xD7A3) continue;
if (!Default.nfkd.isNormalized(c)) {
if (!Default.nfkd().isNormalized(c)) {
++count;
String decomp = Default.nfkd.normalize(c);
String decomp = Default.nfkd().normalize(c);
datasize += decomp.length();
if (max < decomp.length()) max = decomp.length();
if (decomp.length() > 7) ++over7;
@ -232,9 +232,9 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
for (char c = 0; c < 0xFFFF; ++c) {
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
if (0xAC00 <= c && c <= 0xD7A3) continue;
if (!Default.nfd.isNormalized(c)) {
if (!Default.nfd().isNormalized(c)) {
++count;
String decomp = Default.nfd.normalize(c);
String decomp = Default.nfd().normalize(c);
datasize += decomp.length();
if (max < decomp.length()) max = decomp.length();
csa.setElementAt(c, (short)count);
@ -256,7 +256,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
for (char c = 0; c < 0xFFFF; ++c) {
if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
int canClass = Default.nfkd.getCanonicalClass(c);
int canClass = Default.nfkd().getCanonicalClass(c);
if (canClass != 0) {
++count;
@ -295,7 +295,6 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
static void writeConformance(String filename, byte option, boolean shortPrint) throws IOException {
Default.setUCD();
//UCD ucd30 = UCD.make("3.0.0");
/*
@ -480,11 +479,11 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
// NOW, if the character decomposes, or is a combining mark (non-zero), try combinations
if (Default.ucd.getCombiningClass(firstChar) > 0
|| !Default.nfd.isNormalized(s) && !Default.ucd.isHangulSyllable(firstChar)) {
if (Default.ucd().getCombiningClass(firstChar) > 0
|| !Default.nfd().isNormalized(s) && !Default.ucd().isHangulSyllable(firstChar)) {
// if it ends with a non-starter, try the decompositions.
String decomp = Default.nfd.normalize(s);
if (Default.ucd.getCombiningClass(UTF16.charAt(decomp, decomp.length()-1)) > 0) {
String decomp = Default.nfd().normalize(s);
if (Default.ucd().getCombiningClass(UTF16.charAt(decomp, decomp.length()-1)) > 0) {
if (canIt == null) canIt = new CanonicalIterator(".");
canIt.setSource(s + LOW_ACCENT);
int limit = 4;
@ -503,7 +502,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
for (int j = 0; j < CONTRACTION_TEST.length; ++j) {
String extra = s.substring(0,i) + CONTRACTION_TEST[j] + s.substring(i);
addStringY(extra + 'a', option);
if (DEBUG) System.out.println(addCounter++ + " Adding " + Default.ucd.getCodeAndName(extra));
if (DEBUG) System.out.println(addCounter++ + " Adding " + Default.ucd().getCodeAndName(extra));
}
}
}
@ -550,12 +549,12 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
for (int ch = 0; ch < 0x10FFFF; ++ch) {
if (!ucd_uca_base.isAllocated(ch)) continue;
if (Default.nfkd.isNormalized(ch)) continue;
if (Default.nfkd().isNormalized(ch)) continue;
if (ch > 0xAC00 && ch < 0xD7A3) continue; // skip most of Hangul
if (alreadySeen.contains(ch)) continue;
Utility.dot(ch);
String decomp = Default.nfkd.normalize(ch);
String decomp = Default.nfkd().normalize(ch);
if (ch != ' ' && decomp.charAt(0) == ' ') {
skipSet.add(ch);
continue; // skip wierd decomps
@ -608,15 +607,15 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
}
static String remapSortKey(int cp, boolean decomposition) {
if (Default.nfd.isNormalized(cp)) return remapCanSortKey(cp, decomposition);
if (Default.nfd().isNormalized(cp)) return remapCanSortKey(cp, decomposition);
// we know that it is not NFKD.
String canDecomp = Default.nfd.normalize(cp);
String canDecomp = Default.nfd().normalize(cp);
String result = "";
int ch;
for (int j = 0; j < canDecomp.length(); j += UTF16.getCharCount(ch)) {
ch = UTF16.charAt(canDecomp, j);
System.out.println("* " + Default.ucd.getCodeAndName(ch));
System.out.println("* " + Default.ucd().getCodeAndName(ch));
String newSortKey = remapCanSortKey(ch, decomposition);
System.out.println("* " + UCA.toString(newSortKey));
result = mergeSortKeys(result, newSortKey);
@ -626,7 +625,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
}
static String remapCanSortKey(int ch, boolean decomposition) {
String compatDecomp = Default.nfkd.normalize(ch);
String compatDecomp = Default.nfkd().normalize(ch);
String decompSortKey = collator.getSortKey(compatDecomp, UCA.NON_IGNORABLE, decomposition);
byte type = ucd.getDecompositionType(ch);
@ -799,9 +798,9 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
log.println("compressed: " + comp);
}
log.println("Ken's : " + kenStr);
String nfkd = Default.nfkd.normalize(s);
String nfkd = Default.nfkd().normalize(s);
log.println("NFKD : " + ucd.getCodeAndName(nfkd));
String nfd = Default.nfd.normalize(s);
String nfd = Default.nfd().normalize(s);
if (!nfd.equals(nfkd)) {
log.println("NFD : " + ucd.getCodeAndName(nfd));
}
@ -824,7 +823,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
static final byte getDecompType(int cp) {
byte result = ucd.getDecompositionType(cp);
if (result == ucd.CANONICAL) {
String d = Default.nfd.normalize(cp); // TODO
String d = Default.nfd().normalize(cp); // TODO
int cp1;
for (int i = 0; i < d.length(); i += UTF16.getCharCount(cp1)) {
cp1 = UTF16.charAt(d, i);
@ -887,7 +886,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
byte type = getDecompType(UTF16.charAt(s, 0));
char ch = s.charAt(0);
String decomp = Default.nfkd.normalize(s);
String decomp = Default.nfkd().normalize(s);
int len = 0;
int markLen = collator.getCEs(decomp, true, markCes);
if (compress) markLen = kenCompress(markCes, markLen);
@ -994,14 +993,14 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
log.println("<table border='1' cellspacing='0' cellpadding='2'>");
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd());
Map map = new TreeMap();
while (true) {
String s = cc.next();
if (s == null) break;
if (!Default.nfd.isNormalized(s)) continue; // only unnormalized stuff
if (!Default.nfd().isNormalized(s)) continue; // only unnormalized stuff
if (UTF16.countCodePoint(s) == 1) {
int cat = ucd.getCategory(UTF16.charAt(s,0));
if (cat == Cn || cat == Cc || cat == Cs) continue;
@ -1033,7 +1032,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
log.println("<p>These are not necessarily errors, but should be examined for <i>possible</i> errors</p>");
log.println("<table border='1' cellspacing='0' cellpadding='2'>");
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd());
Map map = new TreeMap();
Map tails = new TreeMap();
@ -1045,7 +1044,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
String s = cc.next();
if (s == null) break;
Utility.dot(counter++);
if (!Default.nfd.isNormalized(s)) continue; // only normalized stuff
if (!Default.nfd().isNormalized(s)) continue; // only normalized stuff
CEList celist = collator.getCEList(s, true);
map.put(celist, s);
}
@ -1216,7 +1215,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
int[] ces = new int[50];
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd());
int[] lenArray = new int[1];
diLog.println("# Contractions");
@ -1287,7 +1286,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
String s = String.valueOf(ch);
int len = collator.getCEs(s, true, ces);
*/
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd());
int[] lenArray = new int[1];
Set sortedCodes = new TreeSet();
@ -1458,7 +1457,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
String s = String.valueOf(ch);
int len = collator.getCEs(s, true, ces);
*/
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd());
int[] lenArray = new int[1];
Set sortedCodes = new TreeSet();
@ -1671,7 +1670,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
Map ordered = new TreeMap(cm);
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE,
SKIP_CANONICAL_DECOMPOSIBLES ? Default.nfd : null);
SKIP_CANONICAL_DECOMPOSIBLES ? Default.nfd() : null);
int[] lenArray = new int[1];
Set alreadyDone = new HashSet();
@ -1737,7 +1736,7 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
UnicodeSet composites = new UnicodeSet();
for (int i = 0; i < 0x10FFFF; ++i) {
if (!ucd.isAllocated(i)) continue;
if (Default.nfd.isNormalized(i)) continue;
if (Default.nfd().isNormalized(i)) continue;
composites.add(i);
}
UnicodeSet CJKcomposites = new UnicodeSet(CJK).retainAll(composites);
@ -1774,9 +1773,9 @@ F900..FAFF; CJK Compatibility Ideographs
System.out.println("Adding Kanji");
for (int i = 0; i < 0x10FFFF; ++i) {
if (!ucd.isAllocated(i)) continue;
if (Default.nfkd.isNormalized(i)) continue;
if (Default.nfkd().isNormalized(i)) continue;
Utility.dot(i);
String decomp = Default.nfkd.normalize(i);
String decomp = Default.nfkd().normalize(i);
int cp;
for (int j = 0; j < decomp.length(); j += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(decomp, j);
@ -2438,7 +2437,7 @@ F900..FAFF; CJK Compatibility Ideographs
System.out.println("Fix Homeless! No back map for " + CEList.toString(ces[i])
+ " from " + CEList.toString(ces, len));
System.out.println("\t" + ucd.getCodeAndName(chr)
+ " => " + ucd.getCodeAndName(Default.nfkd.normalize(chr))
+ " => " + ucd.getCodeAndName(Default.nfkd().normalize(chr))
);
s = "[" + Utility.hex(ces[i]) + "]";
} while (false); // exactly one time, just for breaking
@ -2528,7 +2527,7 @@ F900..FAFF; CJK Compatibility Ideographs
"[[:whitespace:][:c:][:z:][[:ascii:]-[a-zA-Z0-9]]]");
// needsQuoting.remove();
}
s = Default.nfc.normalize(s);
s = Default.nfc().normalize(s);
quoteOperandBuffer.setLength(0);
boolean noQuotes = true;
boolean inQuote = false;
@ -2628,7 +2627,6 @@ F900..FAFF; CJK Compatibility Ideographs
static int[] primaryDelta;
static void writeFractionalUCA(String filename) throws IOException {
Default.setUCD();
checkImplicit();
checkFixes();
@ -2760,7 +2758,7 @@ F900..FAFF; CJK Compatibility Ideographs
for (int i = 0; i < 0x10FFFF; ++i) {
if (!ucd.isNoncharacter(i)) {
if (!ucd.isAllocated(i)) continue;
if (Default.nfd.isNormalized(i)) continue;
if (Default.nfd().isNormalized(i)) continue;
if (ucd.isHangulSyllable(i)) continue;
//if (collator.getCEType(i) >= UCA.FIXED_CE) continue;
}
@ -2795,7 +2793,7 @@ F900..FAFF; CJK Compatibility Ideographs
// Skip anything that is not FCD.
if (!Default.nfd.isFCD(s)) continue;
if (!Default.nfd().isFCD(s)) continue;
// We ONLY add if the sort key would be different
// Than what we would get if we didn't decompose!!
@ -3381,7 +3379,7 @@ F900..FAFF; CJK Compatibility Ideographs
}
String toString(boolean showEmpty) {
String src = source.length() == 0 ? "CONSTRUCTED" : Default.ucd.getCodeAndName(source);
String src = source.length() == 0 ? "CONSTRUCTED" : Default.ucd().getCodeAndName(source);
return "[" + (max ? "last " : "first ") + title + " " + formatFCE(showEmpty) + "] # " + src;
}
@ -3631,7 +3629,7 @@ F900..FAFF; CJK Compatibility Ideographs
// b. toSmallKana(NFKD(x)) != x.
static final boolean needsCaseBit(String x) {
String s = Default.nfkd.normalize(x);
String s = Default.nfkd().normalize(x);
if (!ucd.getCase(s, FULL, LOWER).equals(s)) return true;
if (!toSmallKana(s).equals(s)) return true;
return false;
@ -3952,7 +3950,7 @@ F900..FAFF; CJK Compatibility Ideographs
static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd','HH:mm:ss' GMT'");
static String getNormalDate() {
return myDateFormat.format(new Date()) + " [MD]";
return Default.getDate() + " [MD]";
}
@ -3976,7 +3974,6 @@ F900..FAFF; CJK Compatibility Ideographs
static UnicodeSet compatibilityExceptions = new UnicodeSet("[\u0CCB\u0DDD\u017F\u1E9B\uFB05]");
static void writeCollationValidityLog() throws IOException {
Default.setUCD();
//log = new PrintWriter(new FileOutputStream("CheckCollationValidity.html"));
log = Utility.openPrintWriter(UCA_GEN_DIR, "CheckCollationValidity.html", Utility.UTF8_WINDOWS);
@ -4120,7 +4117,7 @@ F900..FAFF; CJK Compatibility Ideographs
continue;
}
canIt.setSource(key);
String nfdKey = Default.nfd.normalize(key);
String nfdKey = Default.nfd().normalize(key);
boolean first = true;
while (true) {
@ -4132,7 +4129,7 @@ F900..FAFF; CJK Compatibility Ideographs
// Skip anything that is not FCD.
if (!Default.nfd.isFCD(s)) continue;
if (!Default.nfd().isFCD(s)) continue;
// We ONLY add if the sort key would be different
// Than what we would get if we didn't decompose!!
@ -4184,7 +4181,7 @@ F900..FAFF; CJK Compatibility Ideographs
int[] ces = new int[50];
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, Default.nfd());
int[] lenArray = new int[1];
int minps = Integer.MAX_VALUE;
@ -4220,7 +4217,7 @@ F900..FAFF; CJK Compatibility Ideographs
}
}
cc = collator.getContents(UCA.FIXED_CE, Default.nfd);
cc = collator.getContents(UCA.FIXED_CE, Default.nfd());
log.println("<table border='1' cellspacing='0' cellpadding='2'>");
int lastPrimary = 0;
@ -4370,7 +4367,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
static void addString(String ch, byte option) {
String colDbase = collator.getSortKey(ch, option, true);
String colNbase = collator.getSortKey(ch, option, false);
String colCbase = collator.getSortKey(Default.nfc.normalize(ch), option, false);
String colCbase = collator.getSortKey(Default.nfc().normalize(ch), option, false);
if (!colNbase.equals(colCbase) || !colNbase.equals(colDbase) ) {
/*System.out.println(Utility.hex(ch));
System.out.println(printableKey(colNbase));
@ -4540,7 +4537,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
}
static void showLine(int count, String ch, String keyD, String keyN) {
String decomp = Default.nfd.normalize(ch);
String decomp = Default.nfd().normalize(ch);
if (decomp.equals(ch)) decomp = ""; else decomp = "<br><" + Utility.hex(decomp, " ") + "> ";
log.println("<tr><td>" + count + "</td><td>"
+ Utility.hex(ch, " ")
@ -4576,8 +4573,8 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
String MN = (String)MismatchedN.get(ch);
String MC = (String)MismatchedC.get(ch);
String MD = (String)MismatchedD.get(ch);
String chInC = Default.nfc.normalize(ch);
String chInD = Default.nfd.normalize(ch);
String chInC = Default.nfc().normalize(ch);
String chInD = Default.nfd().normalize(ch);
log.println("<tr><td rowSpan='3' class='bottom'>" + Utility.replace(ucd.getName(ch), ", ", ",<br>")
+ "</td><td>NFD</td><td>" + Utility.hex(chInD)
@ -4610,7 +4607,7 @@ A4C6;YI RADICAL KE;So;0;ON;;;;;N;;;;;
static void showDiff(boolean showName, boolean firstColumn, int line, Object chobj) {
String ch = chobj.toString();
String decomp = Default.nfd.normalize(ch);
String decomp = Default.nfd().normalize(ch);
if (showName) {
if (ch.equals(decomp)) {
log.println(//title + counter + " "

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/BuildNames.java,v $
* $Date: 2002/07/30 09:56:41 $
* $Revision: 1.7 $
* $Date: 2004/02/07 01:01:17 $
* $Revision: 1.8 $
*
*******************************************************************************
*/
@ -29,8 +29,6 @@ public class BuildNames implements UCD_Types {
static final boolean DEBUG = true;
public static void main(String[] args) throws IOException {
Default.setUCD();
collectWords();
}
@ -153,13 +151,13 @@ public class BuildNames implements UCD_Types {
int longSum = 0;
for (int cp = 0; cp < 0x10FFFF; ++cp) {
if (!Default.ucd.isAllocated(cp)) continue;
if (Default.ucd.hasComputableName(cp)) continue;
if (!Default.ucd().isAllocated(cp)) continue;
if (Default.ucd().hasComputableName(cp)) continue;
Utility.dot(cp);
String name;
if (Default.ucd.isRepresented(cp)) {
name = Default.ucd.getName(cp, SHORT);
if (Default.ucd().isRepresented(cp)) {
name = Default.ucd().getName(cp, SHORT);
log.println(Utility.hex(cp) + " " + name);
String backName = Utility.replace(name, UCD_Names.NAME_ABBREVIATIONS, false);
if (!name.equals(backName)) {
@ -170,19 +168,19 @@ public class BuildNames implements UCD_Types {
// check the string, and its decomposition. This is just to get a good count.
String str = UTF16.valueOf(cp);
if (false && !Default.nfkd.isNormalized(cp)) {
str += Default.nfkd.normalize(cp);
if (false && !Default.nfkd().isNormalized(cp)) {
str += Default.nfkd().normalize(cp);
}
int cp2;
for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp2)) {
cp2 = UTF16.charAt(str, i);
name = Default.ucd.getName(cp2, SHORT);
name = Default.ucd().getName(cp2, SHORT);
if (name == null) continue;
//name = transform(name);
sum += name.length();
longSum += Default.ucd.getName(cp2).length();
longSum += Default.ucd().getName(cp2).length();
used++;
// replace numbers & letters

View file

@ -11,6 +11,7 @@ import java.util.TreeMap;
import java.util.TreeSet;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeLabel;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.dev.test.util.ICUPropertyFactory;
import com.ibm.icu.lang.UProperty;
@ -19,6 +20,7 @@ import com.ibm.text.utility.Utility;
public class CheckICU {
static final BagFormatter bf = new BagFormatter();
static final BagFormatter bf2 = new BagFormatter();
public static void main(String[] args) throws IOException {
System.out.println("Start");
@ -29,6 +31,20 @@ public class CheckICU {
static UnicodeSet itemFailures;
static ICUPropertyFactory icuFactory;
static ToolUnicodePropertySource toolFactory;
static class ReplaceLabel extends UnicodeLabel {
UnicodeProperty p;
ReplaceLabel(UnicodeProperty p) {
this.p = p;
}
public String getValue(int codepoint, boolean isShort) {
// TODO Auto-generated method stub
return p.getValue(codepoint, isShort).replace('_',' ');
}
public int getMaxWidth(boolean v) {
return p.getMaxWidth(v);
}
}
public static void test() throws IOException {
checkUCD();
@ -37,18 +53,23 @@ public class CheckICU {
toolFactory = ToolUnicodePropertySource.make("4.0.0");
String[] quickList = {
"Name",
"Block",
// "Script", "Bidi_Mirroring_Glyph", "Case_Folding",
//"Numeric_Value"
};
for (int i = 0; i < quickList.length; ++i) {
testProperty(quickList[i], -1);
//testProperty(quickList[i], -1);
bf2.setValueSource(new ReplaceLabel(toolFactory.getProperty(quickList[i])))
.setLabelSource(null)
.setNameSource(null)
.setShowCount(false);
bf2.showSetNames(bf2.CONSOLE, quickList[i], new UnicodeSet(0,0x10FFFF));
}
if (quickList.length > 0) return;
Collection availableTool = toolFactory.getAvailablePropertyAliases(new TreeSet());
Collection availableTool = toolFactory.getAvailableAliases(new TreeSet());
Collection availableICU = icuFactory.getAvailablePropertyAliases(new TreeSet());
Collection availableICU = icuFactory.getAvailableAliases(new TreeSet());
System.out.println(showDifferences("Property Aliases", "ICU", availableICU, "Tool", availableTool));
Collection common = new TreeSet(availableICU);
common.retainAll(availableTool);
@ -98,7 +119,7 @@ public class CheckICU {
private static void testProperty(String prop, int typeFilter) {
UnicodeProperty icuProp = icuFactory.getProperty(prop);
int icuType = icuProp.getPropertyType();
int icuType = icuProp.getType();
if (typeFilter >= 0 && icuType != typeFilter) return;
@ -106,18 +127,18 @@ public class CheckICU {
System.out.println("Testing: " + prop);
UnicodeProperty toolProp = toolFactory.getProperty(prop);
int toolType = toolProp.getPropertyType();
int toolType = toolProp.getType();
if (icuType != toolType) {
System.out.println("FAILURE Type: ICU: " + UnicodeProperty.getTypeName(icuType)
+ "\tTool: " + UnicodeProperty.getTypeName(toolType));
}
Collection icuAliases = icuProp.getPropertyAliases(new ArrayList());
Collection toolAliases = toolProp.getPropertyAliases(new ArrayList());
Collection icuAliases = icuProp.getAliases(new ArrayList());
Collection toolAliases = toolProp.getAliases(new ArrayList());
System.out.println(showDifferences("Aliases", "ICU", icuAliases, "Tool", toolAliases));
icuAliases = icuProp.getAvailablePropertyValueAliases(new ArrayList());
toolAliases = toolProp.getAvailablePropertyValueAliases(new ArrayList());
icuAliases = icuProp.getAvailableValueAliases(new ArrayList());
toolAliases = toolProp.getAvailableValueAliases(new ArrayList());
System.out.println(showDifferences("Value Aliases", "ICU", icuAliases, "Tool", toolAliases));
// TODO do property value aliases
@ -128,8 +149,8 @@ public class CheckICU {
System.out.println();
}
*/
String icuValue = icuProp.getPropertyValue(i);
String toolValue = toolProp.getPropertyValue(i);
String icuValue = icuProp.getValue(i);
String toolValue = toolProp.getValue(i);
if (!equals(icuValue, toolValue)) {
itemFailures.add(i);
if (firstDiffCP == null) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Compare14652.java,v $
* $Date: 2003/04/25 01:39:15 $
* $Revision: 1.2 $
* $Date: 2004/02/07 01:01:16 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
@ -150,7 +150,7 @@ tolower
pw.println("**************************************************");
pw.println(name);
pw.println("**************************************************");
Utility.showSetDifferences(pw, name, contents, guess, guessContents, false, true, null, Default.ucd);
Utility.showSetDifferences(pw, name, contents, guess, guessContents, false, true, null, Default.ucd());
//pw.println(props[i].contents);
}
}
@ -160,7 +160,7 @@ tolower
public static void main(String[] args) throws IOException {
String version = Default.ucd.getVersion();
String version = Default.ucd().getVersion();
PrintWriter log = Utility.openPrintWriter("Diff14652_" + version + ".txt", Utility.UTF8_WINDOWS);
try {
log.write('\uFEFF');
@ -171,8 +171,8 @@ tolower
UnicodeSet XID = getSet(DERIVED, Mod_ID_Start).addAll(getSet(DERIVED, Mod_ID_Continue_NO_Cf));
UnicodeSet alphanumSet = new UnicodeSet(alphaSet).addAll(digitSet).addAll(getSet(CATEGORY, Pc));
Utility.showSetDifferences("ID", ID, "XID", XID, false, Default.ucd);
Utility.showSetDifferences("ID", ID, "Alphabetic+Digit+Pc", alphanumSet, false, Default.ucd);
Utility.showSetDifferences("ID", ID, "XID", XID, false, Default.ucd());
Utility.showSetDifferences("ID", ID, "Alphabetic+Digit+Pc", alphanumSet, false, Default.ucd());
}
BufferedReader br = Utility.openReadFile("C:\\DATA\\ISO14652_CTYPE.txt", Utility.LATIN1);
@ -259,7 +259,7 @@ xdigit includes digit
.removeAll(alpha.contents)
.removeAll(cntrl.contents)
.removeAll(space.contents);
Utility.showSetNames(log, "TR Remainder: ", trRemainder, false, false, Default.ucd);
Utility.showSetNames(log, "TR Remainder: ", trRemainder, false, false, Default.ucd());
UnicodeSet propRemainder = new UnicodeSet(cnSet)
.complement()
@ -271,7 +271,7 @@ xdigit includes digit
.removeAll(alpha.guessContents)
.removeAll(cntrl.guessContents)
.removeAll(space.guessContents);
Utility.showSetNames(log, "Prop Remainder: ", propRemainder, false, false, Default.ucd);
Utility.showSetNames(log, "Prop Remainder: ", propRemainder, false, false, Default.ucd());
/*
checkDisjoint(new Prop[] {alpha, digit, punct, cntrl});
@ -318,7 +318,7 @@ xdigit includes digit
log.println();
log.println("Fails test: " + name + " disjoint-with " + name2);
UnicodeSet diff = new UnicodeSet(set).retainAll(set2);
Utility.showSetNames(log, "", diff, false, false, Default.ucd);
Utility.showSetNames(log, "", diff, false, false, Default.ucd());
}
}
@ -332,7 +332,7 @@ xdigit includes digit
log.println();
log.println("Fails test:" + name + " includes " + name2);
UnicodeSet diff = new UnicodeSet(set2).removeAll(set);
Utility.showSetNames(log, "", diff, false, false, Default.ucd);
Utility.showSetNames(log, "", diff, false, false, Default.ucd());
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CompareProperties.java,v $
* $Date: 2004/02/06 18:30:23 $
* $Revision: 1.3 $
* $Date: 2004/02/07 01:01:16 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
@ -142,9 +142,9 @@ public class CompareProperties implements UCD_Types {
int total = 0;
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
Utility.dot(cp);
int cat = Default.ucd.getCategory(cp);
int cat = Default.ucd().getCategory(cp);
// if (cat == UNASSIGNED || cat == PRIVATE_USE || cat == SURROGATE) continue;
if (!Default.ucd.isAllocated(cp)) continue;
if (!Default.ucd().isAllocated(cp)) continue;
for (int i = 0; i < count; ++i) {
UCDProperty up = props[i];
@ -170,14 +170,13 @@ public class CompareProperties implements UCD_Types {
}
private void getProperties() {
Default.setUCD();
for (int i = 0; i < LIMIT_ENUM; ++i) { // || iType == SCRIPT
int iType = i & 0xFF00;
if (iType == AGE || iType == JOINING_GROUP || iType == COMBINING_CLASS) continue;
if (i == 0x0900) {
System.out.println("debug");
}
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd());
if (up == null) continue;
if (up.getValueType() < BINARY_PROP) {
System.out.println("\tSkipping " + up.getName() + "; value varies");
@ -384,10 +383,9 @@ public class CompareProperties implements UCD_Types {
public static void listDifferences() throws IOException {
Default.setUCD();
PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX);
output.println("# Listing of relationships among properties, suitable for analysis by spreadsheet");
output.println("# Generated for " + Default.ucd.getVersion());
output.println("# Generated for " + Default.ucd().getVersion());
output.println(GenerateData.generateDateLine());
output.println("# P1 P2 R(P1,P2) C(P1&P2) C(P1-P2) C(P2-P1)");
@ -395,7 +393,7 @@ public class CompareProperties implements UCD_Types {
for (int i = 1; i < UCD_Types.LIMIT_ENUM; ++i) {
int iType = i & 0xFF00;
if (iType == UCD_Types.JOINING_GROUP || iType == UCD_Types.AGE || iType == UCD_Types.COMBINING_CLASS || iType == UCD_Types.SCRIPT) continue;
UCDProperty upi = UnifiedBinaryProperty.make(i, Default.ucd);
UCDProperty upi = UnifiedBinaryProperty.make(i, Default.ucd());
if (upi == null) continue;
if (!upi.isStandard()) {
System.out.println("Skipping " + upi.getName() + "; not standard");
@ -419,7 +417,7 @@ public class CompareProperties implements UCD_Types {
int jType = j & 0xFF00;
if (jType == UCD_Types.JOINING_GROUP || jType == UCD_Types.AGE || jType == UCD_Types.COMBINING_CLASS || jType == UCD_Types.SCRIPT
|| (jType == iType && jType != UCD_Types.BINARY_PROPERTIES)) continue;
UCDProperty upj = UnifiedBinaryProperty.make(j, Default.ucd);
UCDProperty upj = UnifiedBinaryProperty.make(j, Default.ucd());
if (upj == null) continue;
if (!upj.isStandard()) continue;
if (upj.getValueType() < UCD_Types.BINARY_PROP) continue;
@ -439,9 +437,9 @@ public class CompareProperties implements UCD_Types {
int bothCount = 0, i_jPropCount = 0, j_iPropCount = 0, iCount = 0, jCount = 0;
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
int cat = Default.ucd.getCategory(cp);
int cat = Default.ucd().getCategory(cp);
if (cat == UCD_Types.UNASSIGNED || cat == UCD_Types.PRIVATE_USE || cat == UCD_Types.SURROGATE) continue;
if (!Default.ucd.isAllocated(cp)) continue;
if (!Default.ucd().isAllocated(cp)) continue;
boolean iProp = upi.hasValue(cp);
boolean jProp = upj.hasValue(cp);

View file

@ -9,29 +9,25 @@ import java.util.TimeZone;
public final class Default implements UCD_Types {
private static String ucdVersion = UCD.latestVersion;
public static UCD ucd;
public static Normalizer nfc;
public static Normalizer nfd;
public static Normalizer nfkc;
public static Normalizer nfkd;
public static Normalizer[] nf = new Normalizer[4];
public static void ensureUCD() {
if (ucd == null) setUCD();
}
private static UCD ucd;
private static Normalizer nfc;
private static Normalizer nfd;
private static Normalizer nfkc;
private static Normalizer nfkd;
private static Normalizer[] nf = new Normalizer[4];
public static void setUCD(String version) {
setUcdVersion(version);
ucdVersion = version;
setUCD();
}
public static void setUCD() {
ucd = UCD.make(getUcdVersion());
nfd = nf[NFD] = new Normalizer(Normalizer.NFD, getUcdVersion());
nfc = nf[NFC] = new Normalizer(Normalizer.NFC, getUcdVersion());
nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, getUcdVersion());
nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, getUcdVersion());
System.out.println("Loaded UCD" + ucd.getVersion() + " " + (new Date(ucd.getDate())));
private static void setUCD() {
ucd = UCD.make(ucdVersion());
nfd = nf[NFD] = new Normalizer(Normalizer.NFD, ucdVersion());
nfc = nf[NFC] = new Normalizer(Normalizer.NFC, ucdVersion());
nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, ucdVersion());
nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, ucdVersion());
System.out.println("Loaded UCD" + ucd().getVersion() + " " + (new Date(ucd().getDate())));
}
static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd', 'HH:mm:ss' GMT'");
@ -43,12 +39,34 @@ public final class Default implements UCD_Types {
return myDateFormat.format(new Date());
}
public static void setUcdVersion(String ucdVersion) {
Default.ucdVersion = ucdVersion;
}
public static String getUcdVersion() {
public static String ucdVersion() {
if (ucd() == null) setUCD();
return ucdVersion;
}
public static UCD ucd() {
if (ucd() == null) setUCD();
return ucd;
}
public static Normalizer nfc() {
if (ucd() == null) setUCD();
return nfc;
}
public static Normalizer nfd() {
if (ucd() == null) setUCD();
return nfd;
}
public static Normalizer nfkc() {
if (ucd() == null) setUCD();
return nfkc;
}
public static Normalizer nfkd() {
if (ucd() == null) setUCD();
return nfkd;
}
public static Normalizer nf(int index) {
if (ucd() == null) setUCD();
return nf[index];
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
* $Date: 2004/02/06 18:30:22 $
* $Revision: 1.23 $
* $Date: 2004/02/07 01:01:16 $
* $Revision: 1.24 $
*
*******************************************************************************
*/
@ -32,7 +32,7 @@ public final class DerivedProperty implements UCD_Types {
// ADD CONSTANT to UCD_TYPES
static public UCDProperty make(int derivedPropertyID) {
return make(derivedPropertyID, Default.ucd);
return make(derivedPropertyID, Default.ucd());
}
static public UCDProperty make(int derivedPropertyID, UCD ucd) {
@ -961,7 +961,6 @@ of characters, the first of which has a non-zero combining class.
}
public static void test() {
Default.setUCD();
/*
DerivedProperty dprop = new DerivedProperty(Default.ucd);
for (int j = 0; j < LIMIT; ++j) {
@ -973,9 +972,9 @@ of characters, the first of which has a non-zero combining class.
for (int cp = 0xA0; cp < 0xFF; ++cp) {
System.out.println();
System.out.println(Default.ucd.getCodeAndName(cp));
System.out.println(Default.ucd().getCodeAndName(cp));
for (int j = 0; j < DERIVED_PROPERTY_LIMIT; ++j) {
String prop = make(j, Default.ucd).getValue(cp);
String prop = make(j, Default.ucd()).getValue(cp);
if (prop.length() != 0) System.out.println("\t" + prop);
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java,v $
* $Date: 2004/02/06 18:30:22 $
* $Revision: 1.8 $
* $Date: 2004/02/07 01:01:16 $
* $Revision: 1.9 $
*
*******************************************************************************
*/
@ -36,10 +36,10 @@ abstract public class GenerateBreakTest implements UCD_Types {
public static void main(String[] args) throws IOException {
System.out.println("Remember to add length marks (half & full) and other punctuation for sentence, with FF61");
//Default.setUCD();
new GenerateGraphemeBreakTest(Default.ucd).run();
new GenerateWordBreakTest(Default.ucd).run();
new GenerateLineBreakTest(Default.ucd).run();
new GenerateSentenceBreakTest(Default.ucd).run();
new GenerateGraphemeBreakTest(Default.ucd()).run();
new GenerateWordBreakTest(Default.ucd()).run();
new GenerateLineBreakTest(Default.ucd()).run();
new GenerateSentenceBreakTest(Default.ucd()).run();
}
GenerateBreakTest(UCD ucd) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
* $Date: 2004/02/06 18:30:22 $
* $Revision: 1.14 $
* $Date: 2004/02/07 01:01:15 $
* $Revision: 1.15 $
*
*******************************************************************************
*/
@ -40,7 +40,6 @@ public class GenerateCaseFolding implements UCD_Types {
public static void makeCaseFold(boolean normalized) throws java.io.IOException {
PICK_SHORT = NF_CLOSURE = normalized;
Default.setUCD();
log = Utility.openPrintWriter("CaseFoldingLog" + GenerateData.getFileSuffix(true), Utility.LATIN1_UNIX);
System.out.println("Writing Log: " + "CaseFoldingLog" + GenerateData.getFileSuffix(true));
@ -142,15 +141,15 @@ public class GenerateCaseFolding implements UCD_Types {
static void drawLine(PrintWriter out, int ch, String type, String result) {
String comment = "";
if (COMMENT_DIFFS) {
String lower = Default.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
String lower = Default.ucd().getCase(UTF16.valueOf(ch), FULL, LOWER);
if (!lower.equals(result)) {
String upper = Default.ucd.getCase(UTF16.valueOf(ch), FULL, UPPER);
String lower2 = Default.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
String upper = Default.ucd().getCase(UTF16.valueOf(ch), FULL, UPPER);
String lower2 = Default.ucd().getCase(UTF16.valueOf(ch), FULL, LOWER);
if (lower.equals(lower2)) {
comment = "[Diff " + Utility.hex(lower, " ") + "] ";
} else {
Utility.fixDot();
System.out.println("PROBLEM WITH: " + Default.ucd.getCodeAndName(ch));
System.out.println("PROBLEM WITH: " + Default.ucd().getCodeAndName(ch));
comment = "[DIFF " + Utility.hex(lower, " ") + ", " + Utility.hex(lower2, " ") + "] ";
}
}
@ -159,7 +158,7 @@ public class GenerateCaseFolding implements UCD_Types {
out.println(Utility.hex(ch)
+ "; " + type
+ "; " + Utility.hex(result, " ")
+ "; # " + comment + Default.ucd.getName(ch));
+ "; # " + comment + Default.ucd().getName(ch));
}
static int probeCh = 0x01f0;
@ -175,7 +174,7 @@ public class GenerateCaseFolding implements UCD_Types {
for (int ch = 0; ch <= 0x10FFFF; ++ch) {
Utility.dot(ch);
//if ((ch & 0x3FF) == 0) System.out.println(Utility.hex(ch));
if (!Default.ucd.isRepresented(ch)) continue;
if (!Default.ucd().isRepresented(ch)) continue;
getClosure(ch, data, full, nfClose, condition);
}
@ -221,13 +220,13 @@ public class GenerateCaseFolding implements UCD_Types {
}
Utility.fixDot();
log.println("Non-Optimal Representative " + message);
log.println(" Rep:\t" + Default.ucd.getCodeAndName(rep));
log.println(" Rep:\t" + Default.ucd().getCodeAndName(rep));
log.println(" Set:\t" + toString(set,true, true));
}
log.println();
log.println();
log.println(rep + "\t#" + Default.ucd.getName(rep));
log.println(rep + "\t#" + Default.ucd().getName(rep));
// Add it for all the elements of the set
@ -236,7 +235,7 @@ public class GenerateCaseFolding implements UCD_Types {
String s2 = (String)it2.next();
if (s2.equals(rep)) continue;
log.println(s2 + "\t#" + Default.ucd.getName(s2));
log.println(s2 + "\t#" + Default.ucd().getName(s2));
if (UTF16.countCodePoint(s2) == 1) {
repChar.put(UTF32.getCodePointSubstring(s2,0), rep);
@ -261,13 +260,13 @@ public class GenerateCaseFolding implements UCD_Types {
if (!full) result <<= 8;
String low = lower(upper(s, full, condition), full, condition);
if (s.equals(low)) result |= ISLOWER;
else if (PICK_SHORT && Default.nfd.normalize(s).equals(Default.nfd.normalize(low))) result |= ISLOWER;
else if (PICK_SHORT && Default.nfd().normalize(s).equals(Default.nfd().normalize(low))) result |= ISLOWER;
if (s.equals(Default.nfc.normalize(s))) result |= NFC_FORMAT;
if (s.equals(Default.nfc().normalize(s))) result |= NFC_FORMAT;
if (show) {
Utility.fixDot();
System.out.println(Utility.hex(result) + ", " + Default.ucd.getCodeAndName(s));
System.out.println(Utility.hex(result) + ", " + Default.ucd().getCodeAndName(s));
}
return result;
}
@ -349,10 +348,10 @@ public class GenerateCaseFolding implements UCD_Types {
// do funny stuff since we can't modify set while iterating
// We don't do this because if the source is not normalized, we don't want to normalize
if (nfClose) {
if (add(set, Default.nfd.normalize(s), data)) continue main;
if (add(set, Default.nfc.normalize(s), data)) continue main;
if (add(set, Default.nfkd.normalize(s), data)) continue main;
if (add(set, Default.nfkc.normalize(s), data)) continue main;
if (add(set, Default.nfd().normalize(s), data)) continue main;
if (add(set, Default.nfc().normalize(s), data)) continue main;
if (add(set, Default.nfkd().normalize(s), data)) continue main;
if (add(set, Default.nfkc().normalize(s), data)) continue main;
}
if (add(set, lower(s, full, condition), data)) continue main;
if (add(set, title(s, full, condition), data)) continue main;
@ -376,7 +375,7 @@ public class GenerateCaseFolding implements UCD_Types {
return Default.ucd.getCase(UTF32.char32At(s,0), SIMPLE, LOWER);
}
*/
return Default.ucd.getCase(s, full ? FULL : SIMPLE, LOWER, condition);
return Default.ucd().getCase(s, full ? FULL : SIMPLE, LOWER, condition);
}
static String upper(String s, boolean full, String condition) {
@ -385,7 +384,7 @@ public class GenerateCaseFolding implements UCD_Types {
return Default.ucd.getCase(UTF32.char32At(s,0), FULL, UPPER);
}
*/
return Default.ucd.getCase(s, full ? FULL : SIMPLE, UPPER, condition);
return Default.ucd().getCase(s, full ? FULL : SIMPLE, UPPER, condition);
}
static String title(String s, boolean full, String condition) {
@ -394,7 +393,7 @@ public class GenerateCaseFolding implements UCD_Types {
return Default.ucd.getCase(UTF32.char32At(s,0), FULL, TITLE);
}
*/
return Default.ucd.getCase(s, full ? FULL : SIMPLE, TITLE, condition);
return Default.ucd().getCase(s, full ? FULL : SIMPLE, TITLE, condition);
}
static boolean add(Set set, String s, Map data) {
@ -433,7 +432,7 @@ public class GenerateCaseFolding implements UCD_Types {
}
first = false;
if (name) {
result += Default.ucd.getCodeAndName(s2);
result += Default.ucd().getCodeAndName(s2);
} else {
result += Utility.hex(s2, " ");
}
@ -443,12 +442,12 @@ public class GenerateCaseFolding implements UCD_Types {
static boolean specialNormalizationDiffers(int ch) {
if (ch == 0x00DF) return true; // es-zed
return !Default.nfkd.isNormalized(ch);
return !Default.nfkd().isNormalized(ch);
}
static String specialNormalization(String s) {
if (s.equals("\u00DF")) return "ss";
return Default.nfkd.normalize(s);
return Default.nfkd().normalize(s);
}
static boolean isExcluded(int ch) {
@ -458,14 +457,13 @@ public class GenerateCaseFolding implements UCD_Types {
if (0x249C <= ch && ch <= 0x24B5) return true; // skip PARENTHESIZED LATIN SMALL LETTER A..
if (0x20A8 <= ch && ch <= 0x217B) return true; // skip Rupee..
byte type = Default.ucd.getDecompositionType(ch);
byte type = Default.ucd().getDecompositionType(ch);
if (type == COMPAT_SQUARE) return true;
//if (type == COMPAT_UNSPECIFIED) return true;
return false;
}
static void generateSpecialCasing(boolean normalize) throws IOException {
Default.setUCD();
Map sorted = new TreeMap();
String suffix2 = "";
@ -476,19 +474,19 @@ public class GenerateCaseFolding implements UCD_Types {
for (int ch = 0; ch <= 0x10FFFF; ++ch) {
Utility.dot(ch);
if (!Default.ucd.isRepresented(ch)) continue;
if (!Default.ucd().isRepresented(ch)) continue;
if (!specialNormalizationDiffers(ch)) continue;
String lower = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, LOWER));
String upper = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, UPPER));
String title = Default.nfc.normalize(Default.ucd.getCase(ch, SIMPLE, TITLE));
String lower = Default.nfc().normalize(Default.ucd().getCase(ch, SIMPLE, LOWER));
String upper = Default.nfc().normalize(Default.ucd().getCase(ch, SIMPLE, UPPER));
String title = Default.nfc().normalize(Default.ucd().getCase(ch, SIMPLE, TITLE));
String chstr = UTF16.valueOf(ch);
String decomp = specialNormalization(chstr);
String flower = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, LOWER));
String fupper = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, UPPER));
String ftitle = Default.nfc.normalize(Default.ucd.getCase(decomp, SIMPLE, TITLE));
String flower = Default.nfc().normalize(Default.ucd().getCase(decomp, SIMPLE, LOWER));
String fupper = Default.nfc().normalize(Default.ucd().getCase(decomp, SIMPLE, UPPER));
String ftitle = Default.nfc().normalize(Default.ucd().getCase(decomp, SIMPLE, TITLE));
String base = decomp;
String blower = specialNormalization(lower);
@ -496,42 +494,42 @@ public class GenerateCaseFolding implements UCD_Types {
String btitle = specialNormalization(title);
if (true) {
flower = Default.nfc.normalize(flower);
fupper = Default.nfc.normalize(fupper);
ftitle = Default.nfc.normalize(ftitle);
base = Default.nfc.normalize(base);
blower = Default.nfc.normalize(blower);
bupper = Default.nfc.normalize(bupper);
btitle = Default.nfc.normalize(btitle);
flower = Default.nfc().normalize(flower);
fupper = Default.nfc().normalize(fupper);
ftitle = Default.nfc().normalize(ftitle);
base = Default.nfc().normalize(base);
blower = Default.nfc().normalize(blower);
bupper = Default.nfc().normalize(bupper);
btitle = Default.nfc().normalize(btitle);
}
if (ch == CHECK_CHAR) {
System.out.println("Code: " + Default.ucd.getCodeAndName(ch));
System.out.println("Decomp: " + Default.ucd.getCodeAndName(decomp));
System.out.println("Base: " + Default.ucd.getCodeAndName(base));
System.out.println("SLower: " + Default.ucd.getCodeAndName(lower));
System.out.println("FLower: " + Default.ucd.getCodeAndName(flower));
System.out.println("BLower: " + Default.ucd.getCodeAndName(blower));
System.out.println("STitle: " + Default.ucd.getCodeAndName(title));
System.out.println("FTitle: " + Default.ucd.getCodeAndName(ftitle));
System.out.println("BTitle: " + Default.ucd.getCodeAndName(btitle));
System.out.println("SUpper: " + Default.ucd.getCodeAndName(upper));
System.out.println("FUpper: " + Default.ucd.getCodeAndName(fupper));
System.out.println("BUpper: " + Default.ucd.getCodeAndName(bupper));
System.out.println("Code: " + Default.ucd().getCodeAndName(ch));
System.out.println("Decomp: " + Default.ucd().getCodeAndName(decomp));
System.out.println("Base: " + Default.ucd().getCodeAndName(base));
System.out.println("SLower: " + Default.ucd().getCodeAndName(lower));
System.out.println("FLower: " + Default.ucd().getCodeAndName(flower));
System.out.println("BLower: " + Default.ucd().getCodeAndName(blower));
System.out.println("STitle: " + Default.ucd().getCodeAndName(title));
System.out.println("FTitle: " + Default.ucd().getCodeAndName(ftitle));
System.out.println("BTitle: " + Default.ucd().getCodeAndName(btitle));
System.out.println("SUpper: " + Default.ucd().getCodeAndName(upper));
System.out.println("FUpper: " + Default.ucd().getCodeAndName(fupper));
System.out.println("BUpper: " + Default.ucd().getCodeAndName(bupper));
}
// presumably if there is a single code point, it would already be in the simple mappings
if (UTF16.countCodePoint(flower) == 1 && UTF16.countCodePoint(fupper) == 1
&& UTF16.countCodePoint(title) == 1) {
if (ch == CHECK_CHAR) System.out.println("Skipping single code point: " + Default.ucd.getCodeAndName(ch));
if (ch == CHECK_CHAR) System.out.println("Skipping single code point: " + Default.ucd().getCodeAndName(ch));
continue;
}
// if there is no change from the base, skip
if (flower.equals(base) && fupper.equals(base) && ftitle.equals(base)) {
if (ch == CHECK_CHAR) System.out.println("Skipping equals base: " + Default.ucd.getCodeAndName(ch));
if (ch == CHECK_CHAR) System.out.println("Skipping equals base: " + Default.ucd().getCodeAndName(ch));
continue;
}
@ -544,11 +542,11 @@ public class GenerateCaseFolding implements UCD_Types {
// if there are no changes from the original, or the expanded original, skip
if (flower.equals(lower) && fupper.equals(upper) && ftitle.equals(title)) {
if (ch == CHECK_CHAR) System.out.println("Skipping unchanged: " + Default.ucd.getCodeAndName(ch));
if (ch == CHECK_CHAR) System.out.println("Skipping unchanged: " + Default.ucd().getCodeAndName(ch));
continue;
}
String name = Default.ucd.getName(ch);
String name = Default.ucd().getName(ch);
int order = name.equals("LATIN SMALL LETTER SHARP S") ? 1
: ch == 0x130 ? 2
@ -559,16 +557,16 @@ public class GenerateCaseFolding implements UCD_Types {
: UTF16.countCodePoint(fupper) == 2 ? 7
: 8;
if (ch == CHECK_CHAR) System.out.println("Order: " + order + " for " + Default.ucd.getCodeAndName(ch));
if (ch == CHECK_CHAR) System.out.println("Order: " + order + " for " + Default.ucd().getCodeAndName(ch));
// HACK
boolean denormalize = !normalize && order != 6 && order != 7;
String mapping = Utility.hex(ch)
+ "; " + Utility.hex(flower.equals(base) ? chstr : denormalize ? Default.nfd.normalize(flower) : flower)
+ "; " + Utility.hex(ftitle.equals(base) ? chstr : denormalize ? Default.nfd.normalize(ftitle) : ftitle)
+ "; " + Utility.hex(fupper.equals(base) ? chstr : denormalize ? Default.nfd.normalize(fupper) : fupper)
+ "; # " + Default.ucd.getName(ch);
+ "; " + Utility.hex(flower.equals(base) ? chstr : denormalize ? Default.nfd().normalize(flower) : flower)
+ "; " + Utility.hex(ftitle.equals(base) ? chstr : denormalize ? Default.nfd().normalize(ftitle) : ftitle)
+ "; " + Utility.hex(fupper.equals(base) ? chstr : denormalize ? Default.nfd().normalize(fupper) : fupper)
+ "; # " + Default.ucd().getName(ch);
// special exclusions
if (isExcluded(ch)) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java,v $
* $Date: 2002/10/05 01:28:58 $
* $Revision: 1.1 $
* $Date: 2004/02/07 01:01:15 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
@ -24,7 +24,6 @@ abstract public class GenerateCaseTest implements UCD_Types {
public static void main(String[] args) throws IOException {
System.out.println("Remember to add length marks (half & full) and other punctuation for sentence, with FF61");
Default.setUCD();
PrintWriter out = Utility.openPrintWriter("CaseTest.txt", Utility.UTF8_WINDOWS);
@ -34,15 +33,15 @@ abstract public class GenerateCaseTest implements UCD_Types {
for (int cp = 0; cp < 0x10FFFF; ++cp) {
Utility.dot(cp);
if (!Default.ucd.isAllocated(cp)) continue;
if (Default.ucd.isHangulSyllable(cp)) continue;
byte cat = Default.ucd.getCategory(cp);
if (!Default.ucd().isAllocated(cp)) continue;
if (Default.ucd().isHangulSyllable(cp)) continue;
byte cat = Default.ucd().getCategory(cp);
if (cp == PRIVATE_USE) continue;
String lower = Default.ucd.getCase(cp, FULL, LOWER);
String upper = Default.ucd.getCase(cp, FULL, UPPER);
String title = Default.ucd.getCase(cp, FULL, TITLE);
String fold = Default.ucd.getCase(cp, FULL, FOLD);
String lower = Default.ucd().getCase(cp, FULL, LOWER);
String upper = Default.ucd().getCase(cp, FULL, UPPER);
String title = Default.ucd().getCase(cp, FULL, TITLE);
String fold = Default.ucd().getCase(cp, FULL, FOLD);
if (lower.equals(upper)
&& lower.equals(title)
&& lower.equals(fold)) continue;
@ -54,17 +53,17 @@ abstract public class GenerateCaseTest implements UCD_Types {
s = s + testChar;
String s2 = Default.nfd.normalize(s);
String s2 = Default.nfd().normalize(s);
String lower1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, LOWER));
String upper1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, UPPER));
String title1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, TITLE));
String fold1 = Default.nfc.normalize(Default.ucd.getCase(s2, FULL, FOLD));
String lower1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, LOWER));
String upper1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, UPPER));
String title1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, TITLE));
String fold1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, FOLD));
if (lower1.equals(Default.nfc.normalize(lower+testChar))
&& upper1.equals(Default.nfc.normalize(upper+testChar))
&& title1.equals(Default.nfc.normalize(title+testChar))
&& fold1.equals(Default.nfc.normalize(fold+testChar))
if (lower1.equals(Default.nfc().normalize(lower+testChar))
&& upper1.equals(Default.nfc().normalize(upper+testChar))
&& title1.equals(Default.nfc().normalize(title+testChar))
&& fold1.equals(Default.nfc().normalize(fold+testChar))
) continue;
write(out, s, true);
@ -77,17 +76,17 @@ abstract public class GenerateCaseTest implements UCD_Types {
static int counter = 0;
static void write(PrintWriter out, String ss, boolean doComment) {
String s = Default.nfd.normalize(ss);
String lower = Default.nfc.normalize(Default.ucd.getCase(s, FULL, LOWER));
String upper = Default.nfc.normalize(Default.ucd.getCase(s, FULL, UPPER));
String title = Default.nfc.normalize(Default.ucd.getCase(s, FULL, TITLE));
String fold = Default.nfc.normalize(Default.ucd.getCase(s, FULL, FOLD));
String s = Default.nfd().normalize(ss);
String lower = Default.nfc().normalize(Default.ucd().getCase(s, FULL, LOWER));
String upper = Default.nfc().normalize(Default.ucd().getCase(s, FULL, UPPER));
String title = Default.nfc().normalize(Default.ucd().getCase(s, FULL, TITLE));
String fold = Default.nfc().normalize(Default.ucd().getCase(s, FULL, FOLD));
out.println(Utility.hex(ss) + "; "
+ Utility.hex(lower) + "; "
+ Utility.hex(upper) + "; "
+ Utility.hex(title) + "; "
+ Utility.hex(fold)
+ (doComment ? "\t# " + Default.ucd.getName(ss) : "")
+ (doComment ? "\t# " + Default.ucd().getName(ss) : "")
);
counter++;
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
* $Date: 2004/02/06 18:30:21 $
* $Revision: 1.31 $
* $Date: 2004/02/07 01:01:15 $
* $Revision: 1.32 $
*
*******************************************************************************
*/
@ -28,35 +28,34 @@ public class GenerateData implements UCD_Types {
static final String HORIZONTAL_LINE = "# ================================================";
static final void genSplit () {
Default.setUCD();
UnicodeSet split = new UnicodeSet();
UnicodeSet reordrant = new UnicodeSet(
"[\u093F\u09BF\u09c7\u09c8\u0abf\u0abf\u0b47\u0bc6\u0bc7\u0bc8"
+ "\u0d46\u0d47\u0d48\u0dd9\u0dda\u0ddb\u1031\u17be\u17c1\u17c2\u17c3]");
UnicodeSet subjoined = new UnicodeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isAssigned(i)) continue;
if (!Default.ucd().isAssigned(i)) continue;
Utility.dot(i);
int cat = Default.ucd.getCategory(i);
int cat = Default.ucd().getCategory(i);
if (cat != Mc && cat != Mn && cat != Me) continue;
if (Default.ucd.getName(i).indexOf("SUBJOINED") >= 0) {
if (Default.ucd().getName(i).indexOf("SUBJOINED") >= 0) {
System.out.print('*');
subjoined.add(i);
continue;
}
String decomp = Default.nfd.normalize(i);
String decomp = Default.nfd().normalize(i);
//int count = countTypes(decomp, Mc);
if (UTF16.countCodePoint(decomp) > 1) split.add(i);
}
Utility.fixDot();
System.out.println("Split: " + split.size());
Utility.showSetNames("", split, false, Default.ucd);
Utility.showSetNames("", split, false, Default.ucd());
System.out.println("Reordrant: " + reordrant.size());
Utility.showSetNames("", reordrant, false, Default.ucd);
Utility.showSetNames("", reordrant, false, Default.ucd());
System.out.println("Subjoined: " + subjoined.size());
Utility.showSetNames("", subjoined, false, Default.ucd);
Utility.showSetNames("", subjoined, false, Default.ucd());
}
static int countTypes(String s, int filter) {
@ -64,7 +63,7 @@ public class GenerateData implements UCD_Types {
int cp;
for (int i = 0; i < s.length(); i+= UTF16.getCharCount(cp)) {
cp = UTF16.charAt(s, i);
int cat = Default.ucd.getCategory(i);
int cat = Default.ucd().getCategory(i);
if (cat == filter) count++;
}
return count;
@ -73,7 +72,7 @@ public class GenerateData implements UCD_Types {
//static UnifiedBinaryProperty ubp
public static void checkHoffman(String test) {
String result = Default.nfkc.normalize(test);
String result = Default.nfkc().normalize(test);
System.out.println(Utility.hex(test) + " => " + Utility.hex(result));
System.out.println();
show(test, 0);
@ -85,10 +84,10 @@ public class GenerateData implements UCD_Types {
int cp;
for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
cp = UTF32.char32At(s, i);
String cc = " " + Default.ucd.getCombiningClass(cp);
String cc = " " + Default.ucd().getCombiningClass(cp);
cc = Utility.repeat(" ", 4 - cc.length()) + cc;
System.out.println(Utility.repeat(" ", indent) + Default.ucd.getCode(cp) + cc + " " + Default.ucd.getName(cp));
String decomp = Default.nfkc.normalize(cp);
System.out.println(Utility.repeat(" ", indent) + Default.ucd().getCode(cp) + cc + " " + Default.ucd().getName(cp));
String decomp = Default.nfkc().normalize(cp);
if (!decomp.equals(UTF32.valueOf32(cp))) {
show(decomp, indent + 4);
}
@ -130,13 +129,13 @@ public class GenerateData implements UCD_Types {
}
public static String getFileSuffix(boolean withDVersion) {
return "-" + Default.ucd.getVersion()
return "-" + Default.ucd().getVersion()
+ ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "")
+ ".txt";
}
public static String getHTMLFileSuffix(boolean withDVersion) {
return "-" + Default.ucd.getVersion()
return "-" + Default.ucd().getVersion()
+ ((withDVersion && dVersion >= 0) ? ("d" + dVersion) : "")
+ ".html";
}
@ -149,24 +148,24 @@ public class GenerateData implements UCD_Types {
log1.println("<diff version='" + target.getVersion() + "'>");
PrintWriter log2 = Utility.openPrintWriter("Log2.xml", Utility.LATIN1_UNIX);
log2.println("<diff version='" + Default.ucd.getVersion() + "'>");
log2.println("<diff version='" + Default.ucd().getVersion() + "'>");
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!target.isAllocated(i)) continue;
Utility.dot(i);
UData t = target.get(i, true);
UData current = Default.ucd.get(i, true);
UData current = Default.ucd().get(i, true);
if (i == 0x5E) {
System.out.println(target.getDecompositionTypeID(i)
+ ", " + Utility.hex(target.getDecompositionMapping(i)));
System.out.println(Default.ucd.getDecompositionTypeID(i)
+ ", " + Utility.hex(Default.ucd.getDecompositionMapping(i)));
System.out.println(Default.ucd().getDecompositionTypeID(i)
+ ", " + Utility.hex(Default.ucd().getDecompositionMapping(i)));
}
if (t.equals(current)) continue;
// print both for comparison
log1.println(t.toString(target, UData.ABBREVIATED));
log2.println(current.toString(Default.ucd, UData.ABBREVIATED));
log2.println(current.toString(Default.ucd(), UData.ABBREVIATED));
}
log1.println("</diff>");
log2.println("</diff>");
@ -176,7 +175,7 @@ public class GenerateData implements UCD_Types {
public static void generateDerived (byte type, boolean checkTypeAndStandard, int headerChoice, String directory, String fileName) throws IOException {
Default.setUCD();
String newFile = directory + fileName + getFileSuffix(true);
System.out.println("New File: " + newFile);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
@ -186,7 +185,7 @@ public class GenerateData implements UCD_Types {
doHeader(fileName + getFileSuffix(false), output, headerChoice);
for (int i = 0; i < DERIVED_PROPERTY_LIMIT; ++i) {
UCDProperty up = DerivedProperty.make(i, Default.ucd);
UCDProperty up = DerivedProperty.make(i, Default.ucd());
if (up == null) continue;
boolean keepGoing = true;
if (!up.isStandard()) keepGoing = false;
@ -198,7 +197,7 @@ public class GenerateData implements UCD_Types {
System.out.print('.');
output.println(HORIZONTAL_LINE);
output.println();
new DerivedPropertyLister(Default.ucd, i, output).print();
new DerivedPropertyLister(Default.ucd(), i, output).print();
output.flush();
}
output.close();
@ -227,7 +226,7 @@ public class GenerateData implements UCD_Types {
*/
public static void generateCompExclusions() throws IOException {
Default.setUCD();
String newFile = "DerivedData/CompositionExclusions" + getFileSuffix(true);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
@ -238,7 +237,7 @@ public class GenerateData implements UCD_Types {
output.println("#");
output.println("# This file lists the characters from the UAX #15 Composition Exclusion Table.");
output.println("#");
if (Default.ucd.getVersion().equals("3.2.0")) {
if (Default.ucd().getVersion().equals("3.2.0")) {
output.println("# The format of the comments in this file has been updated since the last version,");
output.println("# CompositionExclusions-3.txt. The only substantive change to this file between that");
output.println("# version and this one is the addition of U+2ADC FORKING.");
@ -300,7 +299,7 @@ public class GenerateData implements UCD_Types {
public CompLister(PrintWriter output, int type) {
this.output = output;
ucdData = Default.ucd;
ucdData = Default.ucd();
oldUCD = UCD.make("3.0.0");
// showOnConsole = true;
alwaysBreaks = type <= 2; // CHANGE LATER
@ -340,7 +339,7 @@ public class GenerateData implements UCD_Types {
}
public static void generatePropertyAliases() throws IOException {
Default.setUCD();
String prop = "";
String propAbb = "";
String value = "";
@ -409,7 +408,7 @@ public class GenerateData implements UCD_Types {
//System.out.println("debug");
}
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd());
if (up == null) continue;
if (!up.isStandard()) continue;
@ -449,7 +448,7 @@ public class GenerateData implements UCD_Types {
if (type == SCRIPT) {
value = Default.ucd.getCase(value, FULL, TITLE);
value = Default.ucd().getCase(value, FULL, TITLE);
}
valueAbb = up.getValue(SHORT);
@ -513,7 +512,7 @@ public class GenerateData implements UCD_Types {
UCD.BlockData blockData = new UCD.BlockData();
int blockId = 0;
while (Default.ucd.getBlockData(blockId++, blockData)) {
while (Default.ucd().getBlockData(blockId++, blockData)) {
addLine(sorted, "blk", "n/a", blockData.name);
}
@ -698,7 +697,7 @@ public class GenerateData implements UCD_Types {
// static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1;
public static String generateBat(String directory, String fileRoot, String suffix, String[] batName) throws IOException {
String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd.getVersion(), true, true);
String mostRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), true, true);
if (mostRecent != null) {
batName[0] = generateBatAux(directory + "DIFF/Diff_" + fileRoot + suffix,
mostRecent, directory + fileRoot + suffix);
@ -707,7 +706,7 @@ public class GenerateData implements UCD_Types {
return null;
}
String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd.getVersion(), false, true);
String lessRecent = Utility.getMostRecentUnicodeDataFile(fixFile(fileRoot), Default.ucd().getVersion(), false, true);
if (lessRecent != null && !mostRecent.equals(lessRecent)) {
generateBatAux(directory + "DIFF/OLDER-Diff_" + fileRoot + suffix,
lessRecent, directory + fileRoot + suffix);
@ -736,7 +735,7 @@ public class GenerateData implements UCD_Types {
public static void generateVerticalSlice(int startEnum, int endEnum,
int headerChoice, String directory, String file) throws IOException {
Default.setUCD();
String newFile = directory + file + getFileSuffix(true);
PrintWriter output = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
@ -745,7 +744,7 @@ public class GenerateData implements UCD_Types {
doHeader(file + getFileSuffix(false), output, headerChoice);
int last = -1;
for (int i = startEnum; i < endEnum; ++i) {
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd);
UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd());
if (up == null) continue;
if (up.skipInDerivedListing()) continue;
@ -777,7 +776,7 @@ public class GenerateData implements UCD_Types {
}
System.out.print(".");
if (DEBUG) System.out.println(i);
new MyPropertyLister(Default.ucd, i, output).print();
new MyPropertyLister(Default.ucd(), i, output).print();
output.flush();
}
if (endEnum == LIMIT_ENUM) {
@ -791,13 +790,13 @@ public class GenerateData implements UCD_Types {
Set numericValueSet = new TreeSet();
for (int i = 0; i < 0x10FFFF; ++i) {
double nv = Default.ucd.getNumericValue(i);
double nv = Default.ucd().getNumericValue(i);
if (Double.isNaN(nv)) continue;
numericValueSet.add(new Double(nv));
}
Iterator it = numericValueSet.iterator();
while(it.hasNext()) {
new MyFloatLister(Default.ucd, ((Double)it.next()).doubleValue(), output).print();
new MyFloatLister(Default.ucd(), ((Double)it.next()).doubleValue(), output).print();
output.println();
System.out.print(".");
}
@ -810,7 +809,7 @@ public class GenerateData implements UCD_Types {
}
static public void writeNormalizerTestSuite(String directory, String fileName) throws IOException {
Default.setUCD();
String newFile = directory + fileName + getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.UTF8_UNIX);
String[] batName = {""};
@ -869,8 +868,8 @@ public class GenerateData implements UCD_Types {
for (int ch = 0; ch < 0x10FFFF; ++ch) {
Utility.dot(ch);
if (!Default.ucd.isAssigned(ch)) continue;
if (Default.ucd.isPUA(ch)) continue;
if (!Default.ucd().isAssigned(ch)) continue;
if (Default.ucd().isPUA(ch)) continue;
String cc = UTF32.valueOf32(ch);
writeLine(cc,log, true);
}
@ -880,9 +879,9 @@ public class GenerateData implements UCD_Types {
for (int ch = 0; ch < 0x10FFFF; ++ch) {
Utility.dot(ch);
if (!Default.ucd.isAssigned(ch)) continue;
if (Default.ucd.isPUA(ch)) continue;
int cc = Default.ucd.getCombiningClass(ch);
if (!Default.ucd().isAssigned(ch)) continue;
if (Default.ucd().isPUA(ch)) continue;
int cc = Default.ucd().getCombiningClass(ch);
if (example[cc] == null) example[cc] = UTF32.valueOf32(ch);
}
@ -896,9 +895,9 @@ public class GenerateData implements UCD_Types {
for (int ch = 0; ch < 0x10FFFF; ++ch) {
Utility.dot(ch);
if (!Default.ucd.isAssigned(ch)) continue;
if (Default.ucd.isPUA(ch)) continue;
short c = Default.ucd.getCombiningClass(ch);
if (!Default.ucd().isAssigned(ch)) continue;
if (Default.ucd().isPUA(ch)) continue;
short c = Default.ucd().getCombiningClass(ch);
if (c == 0) continue;
// add character with higher class, same class, lower class
@ -945,19 +944,19 @@ public class GenerateData implements UCD_Types {
}
static void writeLine(String cc, PrintWriter log, boolean check) {
String c = Default.nfc.normalize(cc);
String d = Default.nfd.normalize(cc);
String kc = Default.nfkc.normalize(cc);
String kd = Default.nfkd.normalize(cc);
String c = Default.nfc().normalize(cc);
String d = Default.nfd().normalize(cc);
String kc = Default.nfkc().normalize(cc);
String kd = Default.nfkd().normalize(cc);
if (check & cc.equals(c) && cc.equals(d) && cc.equals(kc) && cc.equals(kd)) return;
// consistency check
String dc = Default.nfd.normalize(c);
String dkc = Default.nfd.normalize(kc);
String dc = Default.nfd().normalize(c);
String dkc = Default.nfd().normalize(kc);
if (!dc.equals(d) || !dkc.equals(kd)) {
System.out.println("Danger Will Robinson!");
Normalizer.SHOW_PROGRESS = true;
d = Default.nfd.normalize(cc);
d = Default.nfd().normalize(cc);
}
// printout
@ -966,7 +965,7 @@ public class GenerateData implements UCD_Types {
+ Utility.hex(kc," ") + ";" + Utility.hex(kd," ")
+ "; # ("
+ comma(cc) + "; " + comma(c) + "; " + comma(d) + "; " + comma(kc) + "; " + comma(kd) + "; "
+ ") " + Default.ucd.getName(cc));
+ ") " + Default.ucd().getName(cc));
}
static StringBuffer commaResult = new StringBuffer();
@ -977,7 +976,7 @@ public class GenerateData implements UCD_Types {
int cp;
for (int i = 0; i < s.length(); i += UTF32.count16(i)) {
cp = UTF32.char32At(s, i);
if (Default.ucd.getCategory(cp) == Mn) commaResult.append('\u25CC');
if (Default.ucd().getCategory(cp) == Mn) commaResult.append('\u25CC');
UTF32.append32(commaResult, cp);
}
return commaResult.toString();
@ -1012,7 +1011,7 @@ public class GenerateData implements UCD_Types {
static final void backwardsCompat(String directory, String filename, int[] list) throws IOException {
Default.setUCD();
String newFile = directory + filename + getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
@ -1025,7 +1024,7 @@ public class GenerateData implements UCD_Types {
int prop = list[i];
log.println();
log.println(HORIZONTAL_LINE);
log.println("###### " + DerivedProperty.make(prop, Default.ucd).getName());
log.println("###### " + DerivedProperty.make(prop, Default.ucd()).getName());
//log.println();
//log.println(HORIZONTAL_LINE);
//new DiffPropertyLister("3.2.0", "1.1.0", log, prop).print();
@ -1082,9 +1081,9 @@ public class GenerateData implements UCD_Types {
log.println();
log.println("Cummulative differences");
UCDProperty up = DerivedProperty.make(prop, Default.ucd);
UCDProperty up = DerivedProperty.make(prop, Default.ucd());
UnicodeSet newProp = up.getSet();
Utility.showSetNames(log, "", cummulative.removeAll(newProp), false, false, Default.ucd);
Utility.showSetNames(log, "", cummulative.removeAll(newProp), false, false, Default.ucd());
}
} finally {
if (log != null) {
@ -1095,7 +1094,7 @@ public class GenerateData implements UCD_Types {
}
static final void generateAge(String directory, String filename) throws IOException {
Default.setUCD();
String newFile = directory + filename + getFileSuffix(true);
PrintWriter log = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
String[] batName = {""};
@ -1195,32 +1194,32 @@ public class GenerateData implements UCD_Types {
}
public static void listCombiningAccents() throws IOException {
Default.setUCD();
PrintWriter log = Utility.openPrintWriter("ListAccents" + getFileSuffix(true), Utility.LATIN1_UNIX);
Set set = new TreeSet();
Set set2 = new TreeSet();
for (int i = 0; i < 0x10FFFF; ++i) {
Utility.dot(i);
if (!Default.ucd.isRepresented(i)) continue;
if (!Default.ucd().isRepresented(i)) continue;
if (Default.nfd.isNormalized(i)) {
if (Default.ucd.getScript(i) == LATIN_SCRIPT) {
if (Default.nfd().isNormalized(i)) {
if (Default.ucd().getScript(i) == LATIN_SCRIPT) {
int cp = i;
String hex = "u" + Utility.hex(cp, 4);
set.add("# yyy $x <> \\" + hex + " ; # " + Default.ucd.getName(cp));
set.add("# yyy $x <> \\" + hex + " ; # " + Default.ucd().getName(cp));
}
continue;
}
String decomp = Default.nfd.normalize(i);
String decomp = Default.nfd().normalize(i);
int j;
for (j = 0; j < decomp.length(); j += UTF16.getCharCount(i)) {
int cp = UTF16.charAt(decomp, j);
byte cat = Default.ucd.getCategory(cp);
byte cat = Default.ucd().getCategory(cp);
if (cat != Mn) continue;
String hex = "u" + Utility.hex(cp, 4);
set.add("# xxx $x <> \\" + hex + " ; # " + Default.ucd.getName(cp));
set.add("# xxx $x <> \\" + hex + " ; # " + Default.ucd().getName(cp));
}
}
@ -1232,7 +1231,7 @@ public class GenerateData implements UCD_Types {
}
public static void listGreekVowels() throws IOException {
Default.setUCD();
PrintWriter log = Utility.openPrintWriter("ListGreekVowels" + getFileSuffix(true), Utility.LATIN1_UNIX);
Set set = new TreeSet();
Set set2 = new TreeSet();
@ -1245,14 +1244,14 @@ public class GenerateData implements UCD_Types {
for (char i = 0; i < 0xFFFF; ++i) {
Utility.dot(i);
if (!Default.ucd.isRepresented(i)) continue;
if (Default.ucd.getScript(i) != GREEK_SCRIPT) continue;
String decomp = Default.nfd.normalize(i);
if (!Default.ucd().isRepresented(i)) continue;
if (Default.ucd().getScript(i) != GREEK_SCRIPT) continue;
String decomp = Default.nfd().normalize(i);
if (decomp.indexOf('\u0306') >= 0) continue; // skip breve
if (decomp.indexOf('\u0304') >= 0) continue; // skip macron
String comp = Default.nfc.normalize(decomp);
String comp = Default.nfc().normalize(decomp);
if (!comp.equals(String.valueOf(i))) continue; // skip compats
char first = decomp.charAt(0);
@ -1266,7 +1265,7 @@ public class GenerateData implements UCD_Types {
for (int j = 0; j < diphthongStart.length(); ++j) {
String v = diphthongStart.substring(j, j+1);
char vc = v.charAt(0);
if (Default.ucd.getCategory(vc) == Ll && Default.ucd.getCategory(first) == Lu) continue;
if (Default.ucd().getCategory(vc) == Ll && Default.ucd().getCategory(first) == Lu) continue;
if (etas.indexOf(vc) >= 0 && iotas.indexOf(first) >= 0) continue;
set.add(new Pair(h + v + first, new Pair(v + decomp, v + i)));
}
@ -1292,7 +1291,7 @@ public class GenerateData implements UCD_Types {
public static void listKatakana() throws IOException {
Default.setUCD();
for (char i = 'a'; i <= 'z'; ++i) {
doKana(String.valueOf(i));
if (i == 'c') doKana("ch");
@ -1325,18 +1324,18 @@ public class GenerateData implements UCD_Types {
}
public static void genTrailingZeros() {
Default.setUCD();
UnicodeSet result = new UnicodeSet();
for (int i = 0; i < 0x10FFFF; ++i) {
if ((i & 0xFFF) == 0) System.out.println("# " + i);
if (!Default.ucd.isAssigned(i)) continue;
if (Default.nfd.isNormalized(i)) continue;
String decomp = Default.nfd.normalize(i);
if (!Default.ucd().isAssigned(i)) continue;
if (Default.nfd().isNormalized(i)) continue;
String decomp = Default.nfd().normalize(i);
int cp;
for (int j = 0; j < decomp.length(); j += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(decomp,j);
if (j == 0) continue; // skip first
if (Default.ucd.getCombiningClass(cp) == 0) {
if (Default.ucd().getCombiningClass(cp) == 0) {
result.add(cp);
}
}
@ -1349,8 +1348,8 @@ public class GenerateData implements UCD_Types {
Utility.hex(start)
+ (start != end ? ".." + Utility.hex(end) : "")
+ "; "
+ Default.ucd.getName(start)
+ (start != end ? ".." + Default.ucd.getName(end) : ""));
+ Default.ucd().getName(start)
+ (start != end ? ".." + Default.ucd().getName(end) : ""));
}
System.out.println("TrailingZero count: " + result.size());
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
* $Date: 2004/02/06 18:30:21 $
* $Revision: 1.13 $
* $Date: 2004/02/07 01:01:15 $
* $Revision: 1.14 $
*
*******************************************************************************
*/
@ -49,7 +49,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
log.println("<title>Unihan check</title>");
log.println("</head>");
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.getUcdVersion(), true, Utility.UTF8);
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion(), true, Utility.UTF8);
Map properties = new TreeMap();
@ -252,7 +252,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
public static void main(int typeIn) {
type = typeIn;
Default.setUCD();
try {
System.out.println("Starting");
System.out.println("Quoting: " + quoteNonLetters.toRules(true));
@ -277,7 +277,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
break;
default: throw new IllegalArgumentException("Unexpected option: must be 0..2");
}
filename += Default.ucd.getVersion() + ".txt";
filename += Default.ucd().getVersion() + ".txt";
err = Utility.openPrintWriter("Transliterate_err.txt", Utility.UTF8_WINDOWS);
log = Utility.openPrintWriter("Transliterate_log.txt", Utility.UTF8_WINDOWS);
@ -325,7 +325,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
String def = (String) unihanMap.get(keyChar);
if (!isValidPinyin(def)) {
String fixedDef = fixPinyin(def);
err.println(Default.ucd.getCode(keyChar) + "\t" + keyChar + "\t" + fixedDef + "\t#" + def
err.println(Default.ucd().getCode(keyChar) + "\t" + keyChar + "\t" + fixedDef + "\t#" + def
+ (fixedDef.equals(def) ? " FAIL" : ""));
Utility.addToSet(badPinyin, def, keyChar);
}
@ -334,7 +334,7 @@ public final class GenerateHanTransliterator implements UCD_Types {
String accentDef = digitPinyin_accentPinyin.transliterate(digitDef);
if (!accentDef.equals(def)) {
err.println("Failed Digit Pinyin: "
+ Default.ucd.getCode(keyChar) + "\t" + keyChar + "\t"
+ Default.ucd().getCode(keyChar) + "\t" + keyChar + "\t"
+ def + " => " + digitDef + " => " + accentDef);
}
@ -1157,11 +1157,11 @@ U+7878
for (int i = tabPos+1; i < tabPos2; ++i) {
int cp = line.charAt(i);
int script = Default.ucd.getScript(cp);
int script = Default.ucd().getScript(cp);
if (script != HAN_SCRIPT) {
if (script != HIRAGANA_SCRIPT && script != KATAKANA_SCRIPT
&& cp != 0x30FB && cp != 0x30FC) {
System.out.println("Huh: " + Default.ucd.getCodeAndName(cp));
System.out.println("Huh: " + Default.ucd().getCodeAndName(cp));
}
continue;
}
@ -1237,15 +1237,15 @@ U+7878
UnicodeSet sPinyin = new UnicodeSet();
for (int i = 0; i < 0x10FFFF; ++i) {
if (!Default.ucd.isAllocated(i)) continue;
if (Default.ucd.getScript(i) != HAN_SCRIPT) continue;
if (!Default.ucd().isAllocated(i)) continue;
if (Default.ucd().getScript(i) != HAN_SCRIPT) continue;
Utility.dot(i);
String ch = UTF16.valueOf(i);
String pinyin = (String) unihanMap.get(ch);
if (pinyin == null) {
String ch2 = Default.nfkd.normalize(ch);
String ch2 = Default.nfkd().normalize(ch);
pinyin = (String) unihanMap.get(ch2);
if (pinyin != null) {
addCheck(ch, pinyin, "n/a");
@ -1688,8 +1688,8 @@ Bad pinyin data: \u4E7F ? LE
}
static void addCheck2(String word, String definition, String line) {
definition = Default.nfc.normalize(definition);
word = Default.nfc.normalize(word);
definition = Default.nfc().normalize(definition);
word = Default.nfc().normalize(word);
if (DO_SIMPLE && UTF16.countCodePoint(word) > 1) return;
if (pua.containsSome(word) ) {
@ -1799,7 +1799,7 @@ Bad pinyin data: \u4E7F ? LE
static void readUnihanData(String key) throws java.io.IOException {
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.getUcdVersion(), true, Utility.UTF8);
BufferedReader in = Utility.openUnicodeFile("Unihan", Default.ucdVersion(), true, Utility.UTF8);
int count = 0;
int lineCounter = 0;
@ -1892,11 +1892,11 @@ Bad pinyin data: \u4E7F ? LE
definition = fixDefinition(definition, line);
}
definition = definition.trim();
definition = Default.ucd.getCase(definition, FULL, LOWER);
definition = Default.ucd().getCase(definition, FULL, LOWER);
if (definition.length() == 0) {
Utility.fixDot();
err.println("Zero value for " + Default.ucd.getCode(cp) + " on: " + hex.transliterate(line));
err.println("Zero value for " + Default.ucd().getCode(cp) + " on: " + hex.transliterate(line));
} else {
addCheck(UTF16.valueOf(cp), definition, line);
}
@ -1914,7 +1914,7 @@ Bad pinyin data: \u4E7F ? LE
definition = definition.trim();
definition = Utility.replace(definition, " ", " ");
definition = Utility.replace(definition, " ", "-");
definition = Default.ucd.getCase(definition, FULL, LOWER);
definition = Default.ucd().getCase(definition, FULL, LOWER);
return definition;
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java,v $
* $Date: 2004/02/06 18:30:21 $
* $Revision: 1.3 $
* $Date: 2004/02/07 01:01:15 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
@ -27,11 +27,11 @@ public class GenerateLineBreakTest implements UCD_Types {
static final String[] hNames = {"L", "V", "T", "LV", "LVT"};
static byte getHangulType(int cp) {
if (Default.ucd.isLeadingJamo(cp)) return hL;
if (Default.ucd.isVowelJamo(cp)) return hV;
if (Default.ucd.isTrailingJamo(cp)) return hT;
if (Default.ucd.isHangulSyllable(cp)) {
if (Default.ucd.isDoubleHangul(cp)) return hLV;
if (Default.ucd().isLeadingJamo(cp)) return hL;
if (Default.ucd().isVowelJamo(cp)) return hV;
if (Default.ucd().isTrailingJamo(cp)) return hT;
if (Default.ucd().isHangulSyllable(cp)) {
if (Default.ucd().isDoubleHangul(cp)) return hLV;
return hLVT;
}
return hNot;
@ -62,7 +62,7 @@ public class GenerateLineBreakTest implements UCD_Types {
};
public static void main(String[] args) throws IOException {
Default.setUCD();
new GenerateLineBreakTest().run();
new GenerateWordBreakTest().run();
@ -166,7 +166,7 @@ public class GenerateLineBreakTest implements UCD_Types {
byte result = getType(cp);
if (result == LB_SUP) return "SUP";
if (result >= LB_LIMIT) return hNames[result - LB_LIMIT];
return Default.ucd.getLineBreakID_fromIndex(result);
return Default.ucd().getLineBreakID_fromIndex(result);
}
// stuff that subclasses need to override
@ -174,7 +174,7 @@ public class GenerateLineBreakTest implements UCD_Types {
if (cp > 0xFFFF) return LB_SUP;
byte result = getHangulType(cp);
if (result != hNot) return (byte)(result + LB_LIMIT);
return Default.ucd.getLineBreak(cp);
return Default.ucd().getLineBreak(cp);
}
public int getLimit() {
@ -277,7 +277,7 @@ public class GenerateLineBreakTest implements UCD_Types {
}
string.append(Utility.hex(cp));
comment.append(Default.ucd.getName(cp) + " (" + getTypeID(cp) + ")");
comment.append(Default.ucd().getName(cp) + " (" + getTypeID(cp) + ")");
status = isBreak(source, offset + UTF16.getCharCount(cp), recommended) ? BREAK : NOBREAK;
string.append(' ').append(status);
@ -290,7 +290,7 @@ public class GenerateLineBreakTest implements UCD_Types {
public void findSamples() {
for (int i = 1; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isAllocated(i)) continue;
if (!Default.ucd().isAllocated(i)) continue;
if (0xD800 <= i && i <= 0xDFFF) continue;
if(i == 0x1100) {
System.out.print("here");
@ -302,7 +302,7 @@ public class GenerateLineBreakTest implements UCD_Types {
}
for (int i = 0; i < TypeOrder.length; ++i) {
String sample = samples[i];
System.out.println(getTypeID(sample) + ":\t" + Default.ucd.getCodeAndName(sample));
System.out.println(getTypeID(sample) + ":\t" + Default.ucd().getCodeAndName(sample));
}
}
@ -684,7 +684,7 @@ public class GenerateLineBreakTest implements UCD_Types {
// other properties
// category based
byte cat = Default.ucd.getCategory(cp);
byte cat = Default.ucd().getCategory(cp);
if (cat == Cc) return Control;
if (cat == Cf) return Extend;
if (((1<<cat) & LETTER_MASK) != 0) return LetterBase;

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $
* $Date: 2004/02/06 18:30:21 $
* $Revision: 1.3 $
* $Date: 2004/02/07 01:01:14 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
@ -43,7 +43,7 @@ public final class GenerateStandardizedVariants implements UCD_Types {
# If more than one is present, there are spaces between them.
*/
static public void generate() throws IOException {
Default.setUCD();
// read the data and compose the table
@ -53,7 +53,7 @@ public final class GenerateStandardizedVariants implements UCD_Types {
String[] codes = new String[2];
String[] shapes = new String[4];
BufferedReader in = Utility.openUnicodeFile("StandardizedVariants", Default.getUcdVersion(), true, Utility.LATIN1);
BufferedReader in = Utility.openUnicodeFile("StandardizedVariants", Default.ucdVersion(), true, Utility.LATIN1);
while (true) {
String line = Utility.readDataLine(in);
if (line == null) break;
@ -88,7 +88,7 @@ public final class GenerateStandardizedVariants implements UCD_Types {
}
table += "</td>\n";
table += "<td>" + Default.ucd.getName(code) + " " + splits[1] + "</td>\n";
table += "<td>" + Default.ucd().getName(code) + " " + splits[1] + "</td>\n";
table += "</tr>";
}
in.close();
@ -102,7 +102,7 @@ public final class GenerateStandardizedVariants implements UCD_Types {
String[] batName = {""};
String mostRecent = GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true), batName);
String version = Default.ucd.getVersion();
String version = Default.ucd().getVersion();
int lastDot = version.lastIndexOf('.');
String updateDirectory = version.substring(0,lastDot) + "-Update";
int updateV = version.charAt(version.length()-1) - '0';
@ -110,7 +110,7 @@ public final class GenerateStandardizedVariants implements UCD_Types {
if (DEBUG) System.out.println("updateDirectory: " + updateDirectory);
String[] replacementList = {
"@revision@", Default.ucd.getVersion(),
"@revision@", Default.ucd().getVersion(),
"@updateDirectory@", updateDirectory,
"@date@", Default.getDate(),
"@table@", table};

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks.java,v $
* $Date: 2002/08/04 21:38:45 $
* $Revision: 1.3 $
* $Date: 2004/02/07 01:01:14 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
@ -27,7 +27,7 @@ public class GenerateThaiBreaks {
PrintWriter out = null;
try {
Default.setUCD();
UnicodeSet ignorables = new UnicodeSet();
/* new UnicodeSet(0xE30, 0xE3A);
ignorables.add(0x0E40, 0x0E44); // add logical order exception
@ -89,18 +89,18 @@ public class GenerateThaiBreaks {
UnicodeSet missingThai = new UnicodeSet("[[\u0e00-\u0e7f]-[:Cn:]]").removeAll(all);
System.out.println("Never occur: " + missingThai.toPattern(true));
Utility.showSetNames("", missingThai, true, Default.ucd);
Utility.showSetNames("", missingThai, true, Default.ucd());
System.out.println();
UnicodeSet neverInitial = new UnicodeSet(all).removeAll(initials);
UnicodeSet neverFinal = new UnicodeSet(all).removeAll(finals);
System.out.println("Never initial: " + neverInitial.toPattern(true));
Utility.showSetNames("", neverInitial, true, Default.ucd);
Utility.showSetNames("", neverInitial, true, Default.ucd());
System.out.println();
System.out.println("Never final: " + neverFinal.toPattern(true));
Utility.showSetNames("", neverFinal, true, Default.ucd);
Utility.showSetNames("", neverFinal, true, Default.ucd());
System.out.println();
initials.removeAll(medials);
@ -110,11 +110,11 @@ public class GenerateThaiBreaks {
System.out.println("finals size: " + finals.size());
System.out.println("Only Initials" + initials.toPattern(true));
Utility.showSetNames("", initials, true, Default.ucd);
Utility.showSetNames("", initials, true, Default.ucd());
System.out.println();
System.out.println("Only Finals" + finals.toPattern(true));
Utility.showSetNames("", finals, true, Default.ucd);
Utility.showSetNames("", finals, true, Default.ucd());
} finally {
br.close();
if (out != null) out.close();
@ -124,9 +124,9 @@ public class GenerateThaiBreaks {
static class MyBreaker implements Utility.Breaker {
public String get(Object current, Object old) {
if (old == null || UTF16.charAt(current.toString(), 0) == UTF16.charAt(old.toString(), 0)) {
return current.toString() + "(" + Default.ucd.getCode(current.toString().substring(1)) + "))";
return current.toString() + "(" + Default.ucd().getCode(current.toString().substring(1)) + "))";
} else {
return "\r\n" + current + "(" + Default.ucd.getCode(current.toString()) + "))";
return "\r\n" + current + "(" + Default.ucd().getCode(current.toString()) + "))";
}
}
public boolean filter(Object current) { return true; }

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
* $Date: 2004/02/06 18:30:21 $
* $Revision: 1.33 $
* $Date: 2004/02/07 01:01:14 $
* $Revision: 1.34 $
*
*******************************************************************************
*/
@ -56,7 +56,7 @@ public final class Main implements UCD_Types {
};
public static void main (String[] args) throws Exception {
System.out.println("*** Start *** " + new Date());
System.out.println("*** Start *** " + Default.getDate());
try {
for (int i = 0; i < args.length; ++i) {
@ -68,7 +68,7 @@ public final class Main implements UCD_Types {
Utility.fixDot();
System.out.println();
System.out.println("** Argument: " + args[i] + " ** " + new Date());
System.out.println("** Argument: " + args[i] + " ** " + Default.getDate());
// Expand string arguments
@ -96,7 +96,6 @@ public final class Main implements UCD_Types {
Default.setUCD(args[++i]);
continue;
}
Default.ensureUCD();
// Now handle other options
@ -106,7 +105,7 @@ public final class Main implements UCD_Types {
VerifyUCD.CheckCaseFold();
VerifyUCD.checkAgainstUInfo();
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.getUcdVersion()});
} else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.ucdVersion()});
else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics();
else if (arg.equalsIgnoreCase("NFSkippable")) NFSkippable.main(null);
else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
@ -342,7 +341,7 @@ public final class Main implements UCD_Types {
//
}
} finally {
System.out.println("*** Done *** " + new Date());
System.out.println("*** Done *** " + Default.getDate());
}
}

View file

@ -191,21 +191,21 @@ public final class NFSkippable extends UCDProperty {
static int limit = 0x10FFFF; // full version = 10ffff, for testing may use smaller
public static void main (String[] args) throws java.io.IOException {
Default.setUCD();
PrintWriter out = Utility.openPrintWriter("NFSafeSets.txt", Utility.UTF8_WINDOWS);
out.println("NFSafeSets");
out.println("Version: " + Default.ucd.getVersion());
out.println("Version: " + Default.ucd().getVersion());
out.println("Date: " + Default.getDate());
out.println();
for (int mode = NFD_UnsafeStart; mode <= NFKC_UnsafeStart; ++mode) {
UCDProperty up = DerivedProperty.make(mode, Default.ucd);
UCDProperty up = DerivedProperty.make(mode, Default.ucd());
generateSet(out, "UNSAFE[" + Normalizer.getName((byte)(mode-NFD_UnsafeStart)) + "]", up);
}
for (byte mode = NFD; mode <= NFKC; ++mode) {
NFSkippable skipper = new NFSkippable(mode, Default.ucd);
NFSkippable skipper = new NFSkippable(mode, Default.ucd());
generateSet(out, "SKIPPABLE[" + Normalizer.getName(mode) + "]", skipper);
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
* $Date: 2003/02/25 23:38:22 $
* $Revision: 1.2 $
* $Date: 2004/02/07 01:01:14 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
@ -22,7 +22,7 @@ import com.ibm.text.utility.*;
public class QuickTest implements UCD_Types {
static final void test() {
Default.setUCD();
UnicodeSet format = new UnicodeSet("[:Cf:]");
/*
[4] NameStartChar := ":" | [A-Z] | "_" | [a-z] |
@ -64,10 +64,10 @@ public class QuickTest implements UCD_Types {
UnicodeSet noncharacter = new UnicodeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (!Default.ucd.isAllocated(i)) continue;
if (!Default.nfkc.isNormalized(i)) notNFKC.add(i);
if (Default.ucd.isNoncharacter(i)) noncharacter.add(i);
if (Default.ucd.getCategory(i) == PRIVATE_USE) privateUse.add(i);
if (!Default.ucd().isAllocated(i)) continue;
if (!Default.nfkc().isNormalized(i)) notNFKC.add(i);
if (Default.ucd().isNoncharacter(i)) noncharacter.add(i);
if (Default.ucd().getCategory(i) == PRIVATE_USE) privateUse.add(i);
}
showSet("notNFKC in NameChar", new UnicodeSet(notNFKC).retainAll(nameChar));
@ -110,6 +110,6 @@ public class QuickTest implements UCD_Types {
System.out.println("\tCount:" + set1.size());
System.out.println("\tSet:" + set1.toPattern(true));
System.out.println("\tDetails:");
Utility.showSetNames("", set1, false, Default.ucd);
Utility.showSetNames("", set1, false, Default.ucd());
}
}

View file

@ -18,7 +18,7 @@ public final class TernaryStore {
static DepthPrinter dp;
static void test() throws java.io.IOException {
Default.setUCD();
PrintWriter pw = Utility.openPrintWriter("TestTernary.txt", Utility.LATIN1_WINDOWS);
try {
@ -35,7 +35,7 @@ public final class TernaryStore {
int counter = 0;
int i;
for (i = 0; counter < tests.length && i <= 0x10FFFF; ++i) {
if (Default.ucd.hasComputableName(i)) continue;
if (Default.ucd().hasComputableName(i)) continue;
String temp = UCharacter.getName(i);
if (temp != null) tests[counter++] = temp.trim();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
* $Date: 2004/02/06 18:30:20 $
* $Revision: 1.13 $
* $Date: 2004/02/07 01:01:14 $
* $Revision: 1.14 $
*
*******************************************************************************
*/
@ -35,10 +35,10 @@ public class TestData implements UCD_Types {
static UnicodeProperty.Factory upf;
public static void main (String[] args) throws IOException {
Default.setUCD();
System.out.println(new Date());
System.out.println("main: " + Default.getDate());
upf = ICUPropertyFactory.make();
System.out.println(new Date());
System.out.println("after factory: " + Default.getDate());
showPropDiff(
"gc=mn", null,
@ -56,39 +56,39 @@ public class TestData implements UCD_Types {
showPropDiff(
"General_Category=L", null,
"Script!=Inherited|Common",
UnifiedBinaryProperty.getSet("script=inherited", Default.ucd)
.addAll(UnifiedBinaryProperty.getSet("script=common", Default.ucd))
upf.getSet("script=inherited")
.addAll(UnifiedBinaryProperty.getSet("script=common", Default.ucd()))
.complement()
);
UnicodeSet sterm = UnifiedProperty.getSet("STerm", Default.ucd);
UnicodeSet term = UnifiedProperty.getSet("Terminal_Punctuation", Default.ucd);
UnicodeSet sterm = UnifiedProperty.getSet("STerm", Default.ucd());
UnicodeSet term = UnifiedProperty.getSet("Terminal_Punctuation", Default.ucd());
UnicodeSet po = new UnicodeSet("[:po:]");
UnicodeSet empty = new UnicodeSet();
Utility.showSetDifferences(
"Sentence_Terminal", sterm,
"Empty", empty,
true, Default.ucd);
true, Default.ucd());
Utility.showSetDifferences(
"Sentence_Terminal", sterm,
"Terminal_Punctuation", term,
true, Default.ucd);
true, Default.ucd());
Utility.showSetDifferences(
"Terminal_Punctuation", term,
"Punctuation_Other", po,
true, Default.ucd);
true, Default.ucd());
if (true) return;
UnicodeSet us = getSetForName("LATIN LETTER.*P");
Utility.showSetNames("",us,false,Default.ucd);
Utility.showSetNames("",us,false,Default.ucd());
us = getSetForName(".*VARIA(TION|NT).*");
Utility.showSetNames("",us,false,Default.ucd);
Utility.showSetNames("",us,false,Default.ucd());
if (true) return;
@ -128,9 +128,9 @@ public class TestData implements UCD_Types {
UnicodeSetIterator it = new UnicodeSetIterator(base);
while (it.next()) {
String s = UTF16.valueOf(it.codepoint);
String norm = Default.nfd.normalize(s);
if (s.equals(norm) && Default.nfkd.isNormalized(norm)) {
log.println("# " + s + " <> XXX # " + Default.ucd.getName(it.codepoint));
String norm = Default.nfd().normalize(s);
if (s.equals(norm) && Default.nfkd().isNormalized(norm)) {
log.println("# " + s + " <> XXX # " + Default.ucd().getName(it.codepoint));
}
}
} finally {
@ -158,10 +158,10 @@ public class TestData implements UCD_Types {
Matcher m = p.matcher("");
for (int i = 0; i < 0x10FFFF; ++i) {
Utility.dot(i);
if (!Default.ucd.isAssigned(i)) continue;
byte cat = Default.ucd.getCategory(i);
if (!Default.ucd().isAssigned(i)) continue;
byte cat = Default.ucd().getCategory(i);
if (cat == PRIVATE_USE) continue;
m.reset(Default.ucd.getName(i));
m.reset(Default.ucd().getName(i));
if (m.matches()) {
result.add(i);
}
@ -174,7 +174,7 @@ public class TestData implements UCD_Types {
System.out.println(x);
UnicodeSet ss = new UnicodeSet(x);
pw.println(x);
Utility.showSetNames(pw,"",ss,separateLines,false,Default.ucd);
Utility.showSetNames(pw,"",ss,separateLines,false,Default.ucd());
pw.println("****************************");
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java,v $
* $Date: 2003/02/26 00:35:09 $
* $Revision: 1.1 $
* $Date: 2004/02/07 01:01:13 $
* $Revision: 1.2 $
*
*******************************************************************************
*/
@ -24,7 +24,7 @@ import com.ibm.icu.text.UnicodeSet;
public class TestNameUniqueness implements UCD_Types {
public static void test() throws IOException {
Default.setUCD();
new TestNameUniqueness().checkNames();
}
@ -39,18 +39,18 @@ public class TestNameUniqueness implements UCD_Types {
out.println();
for (int cp = 0; cp < 0x10FFFF; ++cp) {
Utility.dot(cp);
if (!Default.ucd.isAllocated(cp)) continue;
if (Default.ucd.hasComputableName(cp)) continue;
int cat = Default.ucd.getCategory(cp);
if (!Default.ucd().isAllocated(cp)) continue;
if (Default.ucd().hasComputableName(cp)) continue;
int cat = Default.ucd().getCategory(cp);
if (cat == Cc) continue;
String name = Default.ucd.getName(cp);
String name = Default.ucd().getName(cp);
String processedName = processName(cp, name);
Integer existing = (Integer) names.get(processedName);
if (existing != null) {
out.println("Collision between: "
+ Default.ucd.getCodeAndName(existing.intValue())
+ ", " + Default.ucd.getCodeAndName(cp));
+ Default.ucd().getCodeAndName(existing.intValue())
+ ", " + Default.ucd().getCodeAndName(cp));
} else {
names.put(processedName, new Integer(cp));
}
@ -61,19 +61,19 @@ public class TestNameUniqueness implements UCD_Types {
for (int i = 0; i < charCount.length; ++i) {
int count = charCount[i];
if (count == 0) continue;
String sampleName = Default.ucd.getCodeAndName(samples[i]);
String sampleName = Default.ucd().getCodeAndName(samples[i]);
out.println(count + "\t'" + ((char)i)
+ "'\t" + Default.ucd.getCodeAndName(samples[i])
+ "\t=>\t" + processName(samples[i], Default.ucd.getName(samples[i])));
+ "'\t" + Default.ucd().getCodeAndName(samples[i])
+ "\t=>\t" + processName(samples[i], Default.ucd().getName(samples[i])));
}
out.println();
out.println("Name Samples");
out.println();
for (int i = 0; i < 256; ++i) {
int cat = Default.ucd.getCategory(i);
int cat = Default.ucd().getCategory(i);
if (cat == Cc) continue;
out.println(Default.ucd.getCodeAndName(i)
+ "\t=>\t" + processName(i, Default.ucd.getName(i)));
out.println(Default.ucd().getCodeAndName(i)
+ "\t=>\t" + processName(i, Default.ucd().getName(i)));
}
} finally {
out.close();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $
* $Date: 2004/02/06 18:30:20 $
* $Revision: 1.6 $
* $Date: 2004/02/07 01:01:13 $
* $Revision: 1.7 $
*
*******************************************************************************
*/
@ -36,21 +36,21 @@ public final class TestNormalization {
public static void main(String[] args) throws java.io.IOException {
System.out.println("Creating Normalizers");
Default.setUCD();
String[] testSet = {"a\u0304\u0328", "a\u0328\u0304"};
for (int i = 0; i < testSet.length; ++i) {
String s = testSet[i];
boolean test = Default.nfc.isFCD(s);
System.out.println(test + ": " + Default.ucd.getCodeAndName(s));
boolean test = Default.nfc().isFCD(s);
System.out.println(test + ": " + Default.ucd().getCodeAndName(s));
}
String x = UTF32.valueOf32(0x10000);
check("NFC", Default.nfc, x);
check("NFD", Default.nfd, x);
check("NFKC", Default.nfkc, x);
check("NFKD", Default.nfkd, x);
check("NFC", Default.nfc(), x);
check("NFD", Default.nfd(), x);
check("NFKC", Default.nfkc(), x);
check("NFKD", Default.nfkd(), x);
out = new PrintWriter(
@ -97,36 +97,36 @@ public final class TestNormalization {
}
// c2 == NFC(c1) == NFC(c2) == NFC(c3)
errorCount += check("NFCa", Default.nfc, parts[1], parts[0]);
errorCount += check("NFCb", Default.nfc, parts[1], parts[1]);
errorCount += check("NFCc", Default.nfc, parts[1], parts[2]);
errorCount += check("NFCa", Default.nfc(), parts[1], parts[0]);
errorCount += check("NFCb", Default.nfc(), parts[1], parts[1]);
errorCount += check("NFCc", Default.nfc(), parts[1], parts[2]);
// c4 == NFC(c4) == NFC(c5)
errorCount += check("NFCd", Default.nfc, parts[3], parts[3]);
errorCount += check("NFCe", Default.nfc, parts[3], parts[4]);
errorCount += check("NFCd", Default.nfc(), parts[3], parts[3]);
errorCount += check("NFCe", Default.nfc(), parts[3], parts[4]);
// c3 == NFD(c1) == NFD(c2) == NFD(c3)
errorCount += check("NFDa", Default.nfd, parts[2], parts[0]);
errorCount += check("NFDb", Default.nfd, parts[2], parts[1]);
errorCount += check("NFDc", Default.nfd, parts[2], parts[2]);
errorCount += check("NFDa", Default.nfd(), parts[2], parts[0]);
errorCount += check("NFDb", Default.nfd(), parts[2], parts[1]);
errorCount += check("NFDc", Default.nfd(), parts[2], parts[2]);
// c5 == NFD(c4) == NFD(c5)
errorCount += check("NFDd", Default.nfd, parts[4], parts[3]);
errorCount += check("NFDe", Default.nfd, parts[4], parts[4]);
errorCount += check("NFDd", Default.nfd(), parts[4], parts[3]);
errorCount += check("NFDe", Default.nfd(), parts[4], parts[4]);
// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
errorCount += check("NFKCa", Default.nfkc, parts[3], parts[0]);
errorCount += check("NFKCb", Default.nfkc, parts[3], parts[1]);
errorCount += check("NFKCc", Default.nfkc, parts[3], parts[2]);
errorCount += check("NFKCd", Default.nfkc, parts[3], parts[3]);
errorCount += check("NFKCe", Default.nfkc, parts[3], parts[4]);
errorCount += check("NFKCa", Default.nfkc(), parts[3], parts[0]);
errorCount += check("NFKCb", Default.nfkc(), parts[3], parts[1]);
errorCount += check("NFKCc", Default.nfkc(), parts[3], parts[2]);
errorCount += check("NFKCd", Default.nfkc(), parts[3], parts[3]);
errorCount += check("NFKCe", Default.nfkc(), parts[3], parts[4]);
// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
errorCount += check("NFKDa", Default.nfkd, parts[4], parts[0]);
errorCount += check("NFKDb", Default.nfkd, parts[4], parts[1]);
errorCount += check("NFKDc", Default.nfkd, parts[4], parts[2]);
errorCount += check("NFKDd", Default.nfkd, parts[4], parts[3]);
errorCount += check("NFKDe", Default.nfkd, parts[4], parts[4]);
errorCount += check("NFKDa", Default.nfkd(), parts[4], parts[0]);
errorCount += check("NFKDb", Default.nfkd(), parts[4], parts[1]);
errorCount += check("NFKDc", Default.nfkd(), parts[4], parts[2]);
errorCount += check("NFKDd", Default.nfkd(), parts[4], parts[3]);
errorCount += check("NFKDe", Default.nfkd(), parts[4], parts[4]);
}
System.out.println("Total errors in file: " + errorCount
+ ", lines: " + lineErrorCount);
@ -160,21 +160,21 @@ public final class TestNormalization {
}
String otherList = "";
if (!base.equals(other)) {
otherList = "(" + Default.ucd.getCodeAndName(other) + ")";
otherList = "(" + Default.ucd().getCodeAndName(other) + ")";
}
out.println("DIFF " + type + ": "
+ Default.ucd.getCodeAndName(base) + " != "
+ Default.ucd().getCodeAndName(base) + " != "
+ type
+ otherList
+ " == " + Default.ucd.getCodeAndName(trans)
+ " == " + Default.ucd().getCodeAndName(trans)
+ temp
);
return 1;
}
} catch (Exception e) {
throw new ChainException("DIFF " + type + ": "
+ Default.ucd.getCodeAndName(base) + " != "
+ type + "(" + Default.ucd.getCodeAndName(other) + ")", new Object[]{}, e);
+ Default.ucd().getCodeAndName(base) + " != "
+ type + "(" + Default.ucd().getCodeAndName(other) + ")", new Object[]{}, e);
}
return 0;
}
@ -188,10 +188,10 @@ public final class TestNormalization {
if ((missing & 0xFFF) == 0) System.out.println("# " + Utility.hex(missing));
if (charsListed.get(missing)) continue;
String x = UTF32.valueOf32(missing);
errorCount += check("NFC", Default.nfc, x);
errorCount += check("NFD", Default.nfd, x);
errorCount += check("NFKC", Default.nfkc, x);
errorCount += check("NFKD", Default.nfkd, x);
errorCount += check("NFC", Default.nfc(), x);
errorCount += check("NFD", Default.nfd(), x);
errorCount += check("NFKC", Default.nfkc(), x);
errorCount += check("NFKD", Default.nfkd(), x);
}
}
@ -200,9 +200,9 @@ public final class TestNormalization {
UnicodeSet leading = new UnicodeSet();
UnicodeSet trailing = new UnicodeSet();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (Default.nfc.isLeading(i)) leading.add(i);
if (Default.ucd.getCombiningClass(i) != 0) continue;
if (Default.nfc.isTrailing(i)) trailing.add(i);
if (Default.nfc().isLeading(i)) leading.add(i);
if (Default.ucd().getCombiningClass(i) != 0) continue;
if (Default.nfc().isTrailing(i)) trailing.add(i);
}
System.out.println("Leading: " + leading.size());
System.out.println("Trailing Starters: " + trailing.size());
@ -214,12 +214,12 @@ public final class TestNormalization {
trail.reset();
followers.clear();
while (trail.next()) {
if (Default.nfc.getComposition(lead.codepoint, trail.codepoint) != 0xFFFF) {
if (Default.nfc().getComposition(lead.codepoint, trail.codepoint) != 0xFFFF) {
followers.add(trail.codepoint);
}
}
if (followers.size() == 0) continue;
System.out.println(Default.ucd.getCode(lead.codepoint)
System.out.println(Default.ucd().getCode(lead.codepoint)
+ "\t" + followers.toPattern(true));
UnicodeSet possLead = (UnicodeSet) map.get(followers);
if (possLead == null) {

View file

@ -35,36 +35,37 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
add(new ToolUnicodeProperty(name));
}
add(new UnicodeProperty.SimpleProperty() {
{set("Name", "na", "<string>", UnicodeProperty.STRING);}
public String getPropertyValue(int codepoint) {
{set("Name", "na", UnicodeProperty.STRING, "<string>");}
public String getValue(int codepoint) {
if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getName(codepoint);
}
});
add(new UnicodeProperty.SimpleProperty() {
{set("Block", "blk", "<string>", UnicodeProperty.STRING);}
public String getPropertyValue(int codepoint) {
{set("Block", "blk", UnicodeProperty.ENUMERATED,
ucd.getBlockNames(null));}
public String getValue(int codepoint) {
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getBlock(codepoint);
}
});
add(new UnicodeProperty.SimpleProperty() {
{set("Bidi_Mirroring_Glyph", "bmg", "<string>", UnicodeProperty.STRING);}
public String getPropertyValue(int codepoint) {
{set("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, "<string>");}
public String getValue(int codepoint) {
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getBidiMirror(codepoint);
}
});
add(new UnicodeProperty.SimpleProperty() {
{set("Case_Folding", "cf", "<string>", UnicodeProperty.STRING);}
public String getPropertyValue(int codepoint) {
{set("Case_Folding", "cf", UnicodeProperty.STRING, "<string>");}
public String getValue(int codepoint) {
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getCase(codepoint,UCD_Types.FULL,UCD_Types.FOLD);
}
});
add(new UnicodeProperty.SimpleProperty() {
{set("Numeric_Value", "nv", "<number>", UnicodeProperty.NUMERIC);}
public String getPropertyValue(int codepoint) {
{set("Numeric_Value", "nv", UnicodeProperty.NUMERIC, "<number>");}
public String getValue(int codepoint) {
double num = ucd.getNumericValue(codepoint);
if (Double.isNaN(num)) return null;
return Double.toString(num);
@ -108,8 +109,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
setName(propertyAlias);
}
public Collection getAvailablePropertyValueAliases(Collection result) {
int type = getPropertyType() & ~EXTENDED_BIT;
public Collection getAvailableValueAliases(Collection result) {
int type = getType() & ~EXTENDED_BIT;
if (type == STRING) result.add("<string>");
else if (type == NUMERIC) result.add("<string>");
else if (type == BINARY) {
@ -155,7 +156,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
return result;
}
public Collection getPropertyAliases(Collection result) {
public Collection getAliases(Collection result) {
String longName = up.getName(UCD_Types.LONG);
addUnique(Utility.getUnskeleton(longName, true), result);
String shortName = up.getName(UCD_Types.SHORT);
@ -163,12 +164,12 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
return result;
}
public Collection getPropertyValueAliases(String valueAlias, Collection result) {
public Collection getValueAliases(String valueAlias, Collection result) {
// TODO Auto-generated method stub
return result;
}
public String getPropertyValue(int codepoint) {
public String getValue(int codepoint) {
byte style = UCD_Types.LONG;
String temp = null;
boolean titlecase = false;
@ -200,7 +201,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
temp = (ucd.getHangulSyllableTypeID_fromIndex(ucd.getHangulSyllableType(codepoint),style)); break;
}
if (temp != null) return Utility.getUnskeleton(temp,titlecase);
if (getPropertyType() == BINARY) {
if (getType() == BINARY) {
return up.hasValue(codepoint) ? "True" : "False";
}
return "<unknown>";

View file

@ -5,14 +5,15 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2004/02/06 18:30:20 $
* $Revision: 1.29 $
* $Date: 2004/02/07 01:01:13 $
* $Revision: 1.30 $
*
*******************************************************************************
*/
package com.ibm.text.UCD;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
@ -27,6 +28,8 @@ import java.io.FileInputStream;
import java.io.BufferedReader;
import com.ibm.text.utility.*;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.text.UnicodeSet;
public final class UCD implements UCD_Types {
@ -1503,10 +1506,6 @@ to guarantee identifier closure.
UData uData = new UData();
uData.readBytes(dataIn);
if (uData.codePoint == SPOT_CHECK) {
System.out.println("SPOT-CHECK: " + uData);
}
//T = Mc + (Cf - ZWNJ - ZWJ)
int cp = uData.codePoint;
byte old = uData.joiningType;
@ -1571,6 +1570,18 @@ to guarantee identifier closure.
}
return NOBLOCK;
}
public Collection getBlockNames(Collection result) {
if (result == null) result = new ArrayList();
if (blocks == null) loadBlocks();
Iterator it = blocks.iterator();
while (it.hasNext()) {
BlockData data = (BlockData) it.next();
UnicodeProperty.addUnique(data.name, result);
}
UnicodeProperty.addUnique(NOBLOCK, result);
return result;
}
public boolean getBlockData(int blockId, BlockData output) {
if (blocks == null) loadBlocks();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UData.java,v $
* $Date: 2003/07/21 15:50:05 $
* $Revision: 1.9 $
* $Date: 2004/02/07 01:01:13 $
* $Revision: 1.10 $
*
*******************************************************************************
*/
@ -186,7 +186,7 @@ class UData implements UCD_Types {
static final byte ABBREVIATED = 0, FULL = 1;
public String toString() {
return toString(Default.ucd, FULL);
return toString(Default.ucd(), FULL);
}
public String toString(UCD ucd, byte style) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $
* $Date: 2004/02/06 18:30:19 $
* $Revision: 1.15 $
* $Date: 2004/02/07 01:01:13 $
* $Revision: 1.16 $
*
*******************************************************************************
*/
@ -24,7 +24,7 @@ public final class UnifiedBinaryProperty extends UCDProperty {
// DerivedProperty dp;
public static UCDProperty make(int propMask) {
return make(propMask, Default.ucd);
return make(propMask, Default.ucd());
}
public static UCDProperty make(int propMask, UCD ucd) {
@ -323,7 +323,7 @@ public final class UnifiedBinaryProperty extends UCDProperty {
if (shortOne.length() == 0) shortOne = "xx";
String longOne = getValue(LONG);
if (majorProp == (SCRIPT>>8)) {
longOne = Default.ucd.getCase(longOne, FULL, TITLE);
longOne = Default.ucd().getCase(longOne, FULL, TITLE);
}
if (longOne.length() == 0) longOne = "none";

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedProperty.java,v $
* $Date: 2004/02/06 18:30:18 $
* $Revision: 1.5 $
* $Date: 2004/02/07 01:01:12 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -23,7 +23,7 @@ public final class UnifiedProperty extends UCDProperty {
// DerivedProperty dp;
public static UCDProperty make(int propMask) {
return make(propMask, Default.ucd);
return make(propMask, Default.ucd());
}
public static UCDProperty make(int propMask, UCD ucd) {

File diff suppressed because it is too large Load diff

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2004/02/06 18:29:39 $
* $Revision: 1.37 $
* $Date: 2004/02/07 01:01:17 $
* $Revision: 1.38 $
*
*******************************************************************************
*/
@ -590,7 +590,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
public static String getDisplay(int cp) {
String result = UTF16.valueOf(cp);
byte cat = Default.ucd.getCategory(cp);
byte cat = Default.ucd().getCategory(cp);
if (cat == Mn || cat == Me) {
result = String.valueOf(DOTTED_CIRCLE) + result;
} else if (cat == Cf || cat == Cc || cp == 0x034F || cp == 0x00AD || cp == 0x1806) {