mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
tool updates
X-SVN-Rev: 14539
This commit is contained in:
parent
31bffd79b0
commit
ffeb40756c
27 changed files with 2027 additions and 912 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java,v $
|
||||
* $Date: 2004/02/12 00:47:30 $
|
||||
* $Revision: 1.7 $
|
||||
* $Date: 2004/02/18 03:08:57 $
|
||||
* $Revision: 1.8 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -44,6 +44,19 @@ public class BagFormatter {
|
|||
private static PrintWriter log = CONSOLE;
|
||||
|
||||
private boolean abbreviated = false;
|
||||
private String separator = ",";
|
||||
private String prefix = "[";
|
||||
private String suffix = "]";
|
||||
private UnicodeProperty.Factory source;
|
||||
private UnicodeLabel nameSource;
|
||||
private UnicodeLabel labelSource;
|
||||
private UnicodeLabel valueSource;
|
||||
private String propName = "";
|
||||
private boolean showCount = true;
|
||||
private boolean skipNullValues = true;
|
||||
private boolean suppressReserved = true;
|
||||
private boolean hexValue = false;
|
||||
private static final String NULL_VALUE = "_NULL_VALUE_";
|
||||
|
||||
/**
|
||||
* Compare two UnicodeSets, and show the differences
|
||||
|
@ -273,12 +286,12 @@ public class BagFormatter {
|
|||
return this;
|
||||
}
|
||||
|
||||
/*public String getName(int codePoint) {
|
||||
return getName(codePoint, false);
|
||||
}*/
|
||||
public String getName(int codePoint) {
|
||||
return getName("", codePoint, codePoint);
|
||||
}
|
||||
|
||||
public String getName(String separator, int start, int end) {
|
||||
if (nameSource == null || nameSource == UnicodeProperty.NULL) return "";
|
||||
if (getNameSource() == null || getNameSource() == UnicodeProperty.NULL) return "";
|
||||
String result = getName(start, false);
|
||||
if (start == end) return separator + result;
|
||||
String endString = getName(end, false);
|
||||
|
@ -291,8 +304,6 @@ public class BagFormatter {
|
|||
return getName(s, false);
|
||||
}
|
||||
|
||||
UnicodeLabel nameSource;
|
||||
|
||||
class NameLabel extends UnicodeLabel {
|
||||
UnicodeProperty nameProp;
|
||||
UnicodeSet control;
|
||||
|
@ -327,14 +338,18 @@ public class BagFormatter {
|
|||
|
||||
// refactored
|
||||
public String getName(int codePoint, boolean withCodePoint) {
|
||||
return nameSource.getValue(codePoint, !withCodePoint);
|
||||
return getNameSource().getValue(codePoint, !withCodePoint);
|
||||
}
|
||||
|
||||
public String getName(String s, boolean withCodePoint) {
|
||||
return nameSource.getValue(s, separator, !withCodePoint);
|
||||
return getNameSource().getValue(s, separator, !withCodePoint);
|
||||
}
|
||||
|
||||
public String hex(String s) {
|
||||
return hex(s,separator);
|
||||
}
|
||||
|
||||
public String hex(String s, String separator) {
|
||||
return UnicodeLabel.HEX.getValue(s, separator, true);
|
||||
}
|
||||
|
||||
|
@ -344,36 +359,21 @@ public class BagFormatter {
|
|||
return s + ".." + Utility.hex(end,4);
|
||||
}
|
||||
|
||||
private String separator = ",";
|
||||
private String prefix = "[";
|
||||
private String suffix = "]";
|
||||
private UnicodeProperty.Factory source;
|
||||
private UnicodeLabel labelSource;
|
||||
private UnicodeLabel valueSource = UnicodeLabel.NULL;
|
||||
private boolean showCount = true;
|
||||
private boolean suppressReserved = true;
|
||||
|
||||
public BagFormatter setUnicodePropertySource(UnicodeProperty.Factory source) {
|
||||
public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) {
|
||||
this.source = source;
|
||||
nameSource = new NameLabel(source);
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnicodeProperty.Factory getUnicodePropertyFactory() {
|
||||
if (source == null) source = ICUPropertyFactory.make();
|
||||
return source;
|
||||
}
|
||||
|
||||
public BagFormatter () {
|
||||
this(null);
|
||||
}
|
||||
|
||||
public BagFormatter (UnicodeProperty.Factory source) {
|
||||
if (source == null) source = ICUPropertyFactory.make();
|
||||
setUnicodePropertySource(source);
|
||||
Map labelMap = new HashMap();
|
||||
//labelMap.put("Lo","L&");
|
||||
labelMap.put("Lu","L&");
|
||||
labelMap.put("Lt","L&");
|
||||
labelMap.put("Ll","L&");
|
||||
setLabelSource(new UnicodeProperty.FilteredProperty(
|
||||
source.getProperty("General_Category"),
|
||||
new UnicodeProperty.MapFilter(labelMap)));
|
||||
setUnicodePropertyFactory(source);
|
||||
}
|
||||
|
||||
public String join(Object o) {
|
||||
|
@ -441,29 +441,38 @@ public class BagFormatter {
|
|||
private PrintWriter output;
|
||||
Tabber.MonoTabber myTabber;
|
||||
String commentSeparator;
|
||||
int counter;
|
||||
int valueSize;
|
||||
int labelSize;
|
||||
|
||||
public void doAt(Object c, PrintWriter output) {
|
||||
this.output = output;
|
||||
counter = 0;
|
||||
myTabber = new Tabber.MonoTabber();
|
||||
int valueSize = valueSource.getMaxWidth(shortValue);
|
||||
if (valueSize > 0) valueSize += 2;
|
||||
if (!mergeRanges) {
|
||||
myTabber.add(6,Tabber.LEFT); // code
|
||||
if (valueSource != UnicodeProperty.NULL) myTabber.add(2 + valueSize,Tabber.LEFT); // value
|
||||
myTabber.add(2 + labelSource.getMaxWidth(shortLabel),Tabber.LEFT);
|
||||
if (showLiteral != null) myTabber.add(4,Tabber.LEFT);
|
||||
//myTabber.add(4,Tabber.LEFT);
|
||||
} else {
|
||||
myTabber.add(13,Tabber.LEFT);
|
||||
if (valueSource != UnicodeProperty.NULL) myTabber.add(2 + valueSize,Tabber.LEFT); // value
|
||||
myTabber.add(2 + labelSource.getMaxWidth(shortLabel),Tabber.LEFT);
|
||||
if (showCount) myTabber.add(8,Tabber.RIGHT);
|
||||
if (showLiteral != null) myTabber.add(4,Tabber.LEFT);
|
||||
//myTabber.add(7,Tabber.LEFT);
|
||||
}
|
||||
myTabber.add(mergeRanges ? 14 : 6,Tabber.LEFT);
|
||||
|
||||
if (propName.length() > 0) myTabber.add(propName.length() + 2,Tabber.LEFT);
|
||||
|
||||
valueSize = getValueSource().getMaxWidth(shortValue);
|
||||
System.out.println("ValueSize: " + valueSize);
|
||||
if (valueSize > 0) myTabber.add(valueSize + 2,Tabber.LEFT); // value
|
||||
|
||||
myTabber.add(3,Tabber.LEFT); // comment character
|
||||
|
||||
labelSize = getLabelSource().getMaxWidth(shortLabel);
|
||||
if (labelSize > 0) myTabber.add(labelSize + 1,Tabber.LEFT); // value
|
||||
|
||||
if (mergeRanges && showCount) myTabber.add(5,Tabber.RIGHT);
|
||||
|
||||
if (showLiteral != null) myTabber.add(4,Tabber.LEFT);
|
||||
//myTabber.add(7,Tabber.LEFT);
|
||||
|
||||
commentSeparator = (showCount || showLiteral != null
|
||||
|| labelSource != UnicodeProperty.NULL || nameSource != UnicodeProperty.NULL)
|
||||
? "\t# " : "";
|
||||
|| getLabelSource() != UnicodeProperty.NULL || getNameSource() != UnicodeProperty.NULL)
|
||||
? "\t #" : "";
|
||||
|
||||
System.out.println("Tabber: " + myTabber.toString());
|
||||
System.out.println("Tabber: " + myTabber.process("a\tb\td\td\tf\tg\th"));
|
||||
doAt(c);
|
||||
}
|
||||
|
||||
|
@ -479,7 +488,7 @@ public class BagFormatter {
|
|||
|
||||
protected void doBefore(Object container, Object o) {
|
||||
if (showSetAlso && container instanceof UnicodeSet) {
|
||||
output.print("# " + container + lineSeparator);
|
||||
output.print("#" + container + lineSeparator);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -487,7 +496,7 @@ public class BagFormatter {
|
|||
}
|
||||
|
||||
protected void doAfter(Object container, Object o) {
|
||||
output.print(lineSeparator + "# Total code points: " + nf.format(count(container)) + lineSeparator);
|
||||
output.print(lineSeparator + "# Total code points: " + nf.format(counter));
|
||||
}
|
||||
|
||||
protected void doSimpleAt(Object o) {
|
||||
|
@ -500,6 +509,7 @@ public class BagFormatter {
|
|||
output.print("->");
|
||||
doAt(value);
|
||||
doAfter(o, value);
|
||||
counter++;
|
||||
} else if (o instanceof Visitor.CodePointRange) {
|
||||
doAt((Visitor.CodePointRange) o);
|
||||
} else {
|
||||
|
@ -512,59 +522,63 @@ public class BagFormatter {
|
|||
+ "\t"
|
||||
+ getName(thing))
|
||||
+ lineSeparator);
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
|
||||
protected void doAt(Visitor.CodePointRange usi) {
|
||||
if (!mergeRanges) {
|
||||
for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {
|
||||
String label = labelSource.getValue(cp, shortLabel);
|
||||
String value = valueSource.getValue(cp, shortValue);
|
||||
if (value.length() != 0) {
|
||||
value = "\t; " + value;
|
||||
}
|
||||
output.print(
|
||||
myTabber.process(
|
||||
Utility.hex(cp, 4)
|
||||
+ value
|
||||
+ commentSeparator
|
||||
+ label
|
||||
+ insertLiteral(cp,cp)
|
||||
+ getName("\t", cp, cp))
|
||||
+ lineSeparator);
|
||||
String label = getLabelSource().getValue(cp, shortLabel);
|
||||
String value = getValue(cp, shortValue);
|
||||
showLine(cp, cp, label, value);
|
||||
}
|
||||
} else {
|
||||
rf.reset(usi.codepoint, usi.codepointEnd + 1);
|
||||
while (rf.next()) {
|
||||
/*
|
||||
String label = (usi.codepoint != usi.codepointEnd)
|
||||
? label = getLabels(usi.codepoint, usi.codepointEnd)
|
||||
: getLabel(usi.codepoint);
|
||||
*/
|
||||
int start = rf.start;
|
||||
int end = rf.limit - 1;
|
||||
String label = rf.label;
|
||||
String value = rf.value;
|
||||
if (value.length() != 0) {
|
||||
value = "\t; " + value;
|
||||
}
|
||||
String count = !showCount ? ""
|
||||
: end == start ? "\t"
|
||||
: "\t["+ nf.format(end - start + 1)+ "]";
|
||||
output.print(
|
||||
myTabber.process(
|
||||
hex(start, end)
|
||||
+ value
|
||||
+ commentSeparator
|
||||
+ label
|
||||
+ count
|
||||
+ insertLiteral(start, end)
|
||||
+ getName("\t", start, end))
|
||||
+ lineSeparator);
|
||||
showLine(rf.start, rf.limit - 1, rf.label, rf.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void showLine(int start, int end, String label, String value) {
|
||||
if (value == NULL_VALUE) return;
|
||||
|
||||
counter += end - start + 1;
|
||||
String pn = propName;
|
||||
if (pn.length() != 0) {
|
||||
pn = "\t; " + pn;
|
||||
}
|
||||
if (valueSize > 0) {
|
||||
value = "\t; " + value;
|
||||
} else if (value.length() > 0) {
|
||||
throw new IllegalArgumentException("maxwidth bogus " + value + "," + getValueSource().getMaxWidth(shortValue));
|
||||
}
|
||||
if (labelSize > 0) {
|
||||
label = "\t" + label;
|
||||
} else if (label.length() > 0) {
|
||||
throw new IllegalArgumentException("maxwidth bogus " + label + ", " + getLabelSource().getMaxWidth(shortLabel));
|
||||
}
|
||||
|
||||
String count = "";
|
||||
if (mergeRanges && showCount) {
|
||||
if (end == start) count = "\t";
|
||||
else count = "\t ["+ nf.format(end - start + 1)+ "]";
|
||||
}
|
||||
|
||||
output.print(
|
||||
myTabber.process(
|
||||
hex(start, end)
|
||||
+ pn
|
||||
+ value
|
||||
+ commentSeparator
|
||||
+ label
|
||||
+ count
|
||||
+ insertLiteral(start, end)
|
||||
+ getName("\t ", start, end))
|
||||
+ lineSeparator);
|
||||
}
|
||||
|
||||
private String insertLiteral(String thing) {
|
||||
return (showLiteral == null ? ""
|
||||
: " \t(" + showLiteral.transliterate(thing) + ") ");
|
||||
|
@ -648,19 +662,25 @@ public class BagFormatter {
|
|||
if (limit >= veryLimit)
|
||||
return false;
|
||||
start = limit; // set to end of last
|
||||
label = labelSource.getValue(limit, shortLabel);
|
||||
value = valueSource.getValue(limit, shortLabel);
|
||||
label = getLabelSource().getValue(limit, shortLabel);
|
||||
value = getValue(limit, shortLabel);
|
||||
limit++;
|
||||
for (; limit < veryLimit; limit++) {
|
||||
String s = labelSource.getValue(limit, shortLabel);
|
||||
String v = valueSource.getValue(limit, shortLabel);
|
||||
if (!s.equals(label) || !v.equals(value)) break;
|
||||
String s = getLabelSource().getValue(limit, shortLabel);
|
||||
String v = getValue(limit, shortLabel);
|
||||
if (!equalTo(s, label) || !equalTo(v, value)) break;
|
||||
}
|
||||
// at this point, limit is the first item that has a different label than source
|
||||
// OR, we got to the end, and limit == veryLimit
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
boolean equalTo(Object a, Object b) {
|
||||
if (a == b) return true;
|
||||
if (a == null) return false;
|
||||
return a.equals(b);
|
||||
}
|
||||
|
||||
boolean shortLabel = true;
|
||||
boolean shortValue = true;
|
||||
|
@ -692,11 +712,18 @@ public class BagFormatter {
|
|||
return this;
|
||||
}
|
||||
|
||||
public UnicodeProperty.Factory getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public UnicodeLabel getLabelSource() {
|
||||
if (labelSource == null) {
|
||||
Map labelMap = new HashMap();
|
||||
//labelMap.put("Lo","L&");
|
||||
labelMap.put("Lu","L&");
|
||||
labelMap.put("Lt","L&");
|
||||
labelMap.put("Ll","L&");
|
||||
labelSource = new UnicodeProperty.FilteredProperty(
|
||||
getUnicodePropertyFactory().getProperty("General_Category"),
|
||||
new UnicodeProperty.MapFilter(labelMap)
|
||||
).setAllowValueAliasCollisions(true);
|
||||
}
|
||||
return labelSource;
|
||||
}
|
||||
|
||||
|
@ -821,6 +848,9 @@ public class BagFormatter {
|
|||
* @return
|
||||
*/
|
||||
public UnicodeLabel getNameSource() {
|
||||
if (nameSource == null) {
|
||||
nameSource = new NameLabel(getUnicodePropertyFactory());
|
||||
}
|
||||
return nameSource;
|
||||
}
|
||||
|
||||
|
@ -837,9 +867,17 @@ public class BagFormatter {
|
|||
* @return
|
||||
*/
|
||||
public UnicodeLabel getValueSource() {
|
||||
if (valueSource == null) valueSource = UnicodeLabel.NULL;
|
||||
return valueSource;
|
||||
}
|
||||
|
||||
private String getValue(int cp, boolean shortValue) {
|
||||
String result = getValueSource().getValue(cp, shortValue);
|
||||
if (result == null) return NULL_VALUE;
|
||||
if (hexValue) result = hex(result, " ");
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param label
|
||||
*/
|
||||
|
@ -868,4 +906,35 @@ public class BagFormatter {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
public String getPropName() {
|
||||
return propName;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string
|
||||
*/
|
||||
public BagFormatter setPropName(String string) {
|
||||
if (string == null) string = "";
|
||||
propName = string;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
public boolean isHexValue() {
|
||||
return hexValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param b
|
||||
*/
|
||||
public BagFormatter setHexValue(boolean b) {
|
||||
hexValue = b;
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
|
@ -6,8 +6,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java,v $
|
||||
* $Date: 2004/02/12 00:47:30 $
|
||||
* $Revision: 1.2 $
|
||||
* $Date: 2004/02/18 03:08:57 $
|
||||
* $Revision: 1.3 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -45,52 +45,21 @@ import com.ibm.icu.util.VersionInfo;
|
|||
|
||||
public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
||||
|
||||
public static class RegexMatcher implements UnicodeProperty.Matcher {
|
||||
private Matcher matcher;
|
||||
|
||||
public UnicodeProperty.Matcher set(String pattern) {
|
||||
matcher = Pattern.compile(pattern).matcher("");
|
||||
return this;
|
||||
}
|
||||
public boolean matches(String value) {
|
||||
matcher.reset(value);
|
||||
return matcher.matches();
|
||||
}
|
||||
}
|
||||
|
||||
static class ICUProperty extends UnicodeProperty {
|
||||
protected int propEnum = Integer.MIN_VALUE;
|
||||
|
||||
protected ICUProperty(String propName, int propEnum) {
|
||||
this.propEnum = propEnum;
|
||||
setName(propName);
|
||||
this.propEnum = propEnum;
|
||||
setType(internalGetPropertyType(propEnum));
|
||||
}
|
||||
|
||||
boolean shownException = false;
|
||||
|
||||
public String _getValue(int codePoint) {
|
||||
if (propEnum < UProperty.INT_LIMIT) {
|
||||
int enumValue = -1;
|
||||
String value = null;
|
||||
try {
|
||||
enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
|
||||
if (enumValue >= 0) value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (!shownException) {
|
||||
System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint));
|
||||
shownException = true;
|
||||
}
|
||||
}
|
||||
return value != null ? value : String.valueOf(enumValue);
|
||||
} else if (propEnum < UProperty.DOUBLE_LIMIT) {
|
||||
double num = UCharacter.getUnicodeNumericValue(codePoint);
|
||||
if (num == UCharacter.NO_NUMERIC_VALUE) return null;
|
||||
return Double.toString(num);
|
||||
// TODO: Fix HACK -- API deficient
|
||||
} else switch(propEnum) {
|
||||
switch(propEnum) {
|
||||
case UProperty.AGE: String temp = UCharacter.getAge(codePoint).toString();
|
||||
if (temp.equals("0.0.0.0")) return "UNSPECIFIED";
|
||||
if (temp.equals("0.0.0.0")) return "unassigned";
|
||||
if (temp.endsWith(".0.0")) return temp.substring(0,temp.length()-4);
|
||||
return temp;
|
||||
case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(UCharacter.getMirror(codePoint));
|
||||
|
@ -119,31 +88,26 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
|||
case isCasefolded: return String.valueOf(UCharacter.foldCase(UTF16.valueOf(codePoint),true).equals(UTF16.valueOf(codePoint)));
|
||||
case isCased: return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint)).equals(UTF16.valueOf(codePoint)));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public Collection _getAvailableValueAliases(Collection result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
if (propEnum < UProperty.INT_LIMIT) {
|
||||
if (Binary_Extras.isInRange(propEnum)) {
|
||||
propEnum = UProperty.BINARY_START; // HACK
|
||||
}
|
||||
int start = UCharacter.getIntPropertyMinValue(propEnum);
|
||||
int end = UCharacter.getIntPropertyMaxValue(propEnum);
|
||||
for (int i = start; i <= end; ++i) {
|
||||
String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG);
|
||||
String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT);
|
||||
if (alias == null) {
|
||||
alias = alias2;
|
||||
int enumValue = -1;
|
||||
String value = null;
|
||||
try {
|
||||
enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
|
||||
if (enumValue >= 0) value = fixedGetPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (!shownException) {
|
||||
System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint));
|
||||
shownException = true;
|
||||
}
|
||||
//System.out.println(propertyAlias + "\t" + i + ":\t" + alias);
|
||||
if (alias != null && !result.contains(alias)) result.add(alias);
|
||||
}
|
||||
} else {
|
||||
String alias = getFixedValueAlias(null, -1,UProperty.NameChoice.LONG);
|
||||
if (alias != null && !result.contains(alias)) result.add(alias);
|
||||
return value != null ? value : String.valueOf(enumValue);
|
||||
} else if (propEnum < UProperty.DOUBLE_LIMIT) {
|
||||
double num = UCharacter.getUnicodeNumericValue(codePoint);
|
||||
if (num == UCharacter.NO_NUMERIC_VALUE) return null;
|
||||
return Double.toString(num);
|
||||
// TODO: Fix HACK -- API deficient
|
||||
}
|
||||
return result;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -161,7 +125,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
|||
return "<number>";
|
||||
}
|
||||
if (valueAlias != null && !valueAlias.equals("<integer>")) {
|
||||
valueEnum = UCharacter.getPropertyValueEnum(propEnum,valueAlias);
|
||||
valueEnum = fixedGetPropertyValueEnum(propEnum,valueAlias);
|
||||
}
|
||||
// because these are defined badly, there may be no normal (long) name.
|
||||
// if there is
|
||||
|
@ -171,50 +135,112 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
|||
if (nameChoice == UProperty.NameChoice.LONG) {
|
||||
result = fixedGetPropertyValueName(propEnum,valueEnum, UProperty.NameChoice.SHORT);
|
||||
if (result != null) return result;
|
||||
if (propEnum == UProperty.CANONICAL_COMBINING_CLASS) return null;
|
||||
return "<integer>";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
|
||||
private static int fixedGetPropertyValueEnum(int propEnum, String valueAlias) {
|
||||
try {
|
||||
return UCharacter.getPropertyValueName(propEnum,valueEnum,nameChoice);
|
||||
return UCharacter.getPropertyValueEnum(propEnum, valueAlias);
|
||||
} catch (Exception e) {
|
||||
return Integer.parseInt(valueAlias);
|
||||
}
|
||||
}
|
||||
|
||||
static Map fixSkeleton = new HashMap();
|
||||
private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
|
||||
|
||||
try {
|
||||
String value = UCharacter.getPropertyValueName(propEnum,valueEnum,nameChoice);
|
||||
String newValue = (String) fixSkeleton.get(value);
|
||||
if (newValue == null) {
|
||||
newValue = value;
|
||||
if (propEnum == UProperty.JOINING_GROUP) {
|
||||
newValue = newValue.toLowerCase(Locale.ENGLISH);
|
||||
}
|
||||
newValue = regularize(newValue, true);
|
||||
fixSkeleton.put(value, newValue);
|
||||
}
|
||||
return newValue;
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Collection _getAliases(Collection result) {
|
||||
public List _getNameAliases(List result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
String alias = String_Extras.get(propEnum);
|
||||
if (alias == null) alias = Binary_Extras.get(propEnum);
|
||||
if (alias != null) {
|
||||
if (!result.contains(alias)) result.add(alias);
|
||||
addUnique(alias, result);
|
||||
} else {
|
||||
try {
|
||||
for (int nameChoice = 0; ; ++nameChoice) {
|
||||
alias = UCharacter.getPropertyName(propEnum, nameChoice);
|
||||
if (alias == null) break;
|
||||
if (nameChoice > 2) {
|
||||
System.out.println("Something wrong");
|
||||
}
|
||||
if (!result.contains(alias)) result.add(alias);
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
// ok, continue
|
||||
addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.SHORT), result);
|
||||
addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.LONG), result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public String getFixedPropertyName(int propName, int nameChoice) {
|
||||
try {
|
||||
return UCharacter.getPropertyName(propEnum, nameChoice);
|
||||
} catch (IllegalArgumentException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private Map cccHack = new HashMap();
|
||||
boolean needCccHack = true;
|
||||
|
||||
public List _getAvailableValues(List result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
if (propEnum == UProperty.AGE) {
|
||||
addAllUnique(new String[] {
|
||||
"unassigned","1.1","2.0","2.1","3.0","3.1","3.2","4.0"},
|
||||
result);
|
||||
return result;
|
||||
}
|
||||
if (propEnum < UProperty.INT_LIMIT) {
|
||||
if (Binary_Extras.isInRange(propEnum)) {
|
||||
propEnum = UProperty.BINARY_START; // HACK
|
||||
}
|
||||
int start = UCharacter.getIntPropertyMinValue(propEnum);
|
||||
int end = UCharacter.getIntPropertyMaxValue(propEnum);
|
||||
for (int i = start; i <= end; ++i) {
|
||||
String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG);
|
||||
String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT);
|
||||
if (alias == null) {
|
||||
alias = alias2;
|
||||
if (alias == null && propEnum == UProperty.CANONICAL_COMBINING_CLASS) {
|
||||
alias = String.valueOf(i);
|
||||
}
|
||||
}
|
||||
if (needCccHack && propEnum == UProperty.CANONICAL_COMBINING_CLASS) { // HACK
|
||||
cccHack.put(alias, String.valueOf(i));
|
||||
}
|
||||
//System.out.println(propertyAlias + "\t" + i + ":\t" + alias);
|
||||
addUnique(alias, result);
|
||||
}
|
||||
needCccHack = false;
|
||||
} else {
|
||||
String alias = getFixedValueAlias(null, -1,UProperty.NameChoice.LONG);
|
||||
addUnique(alias, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public Collection _getValueAliases(String valueAlias, Collection result) {
|
||||
public List _getValueAliases(String valueAlias, List result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
for (int nameChoice = 0; ; ++nameChoice) {
|
||||
String alias = getFixedValueAlias(valueAlias, -1, nameChoice);
|
||||
if (nameChoice > 2) break;
|
||||
if (alias == null) continue;
|
||||
if (!result.contains(alias)) result.add(alias);
|
||||
if (propEnum == UProperty.AGE) {
|
||||
addUnique(valueAlias, result);
|
||||
return result;
|
||||
}
|
||||
if (propEnum == UProperty.CANONICAL_COMBINING_CLASS) {
|
||||
addUnique(cccHack.get(valueAlias), result); // add number
|
||||
}
|
||||
addUnique(getFixedValueAlias(valueAlias, -1, UProperty.NameChoice.SHORT), result);
|
||||
addUnique(getFixedValueAlias(valueAlias, -1, UProperty.NameChoice.LONG), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -224,12 +250,16 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
|||
*/
|
||||
private int internalGetPropertyType(int propEnum) {
|
||||
switch(propEnum) {
|
||||
//case UProperty.AGE:
|
||||
//case UProperty.NAME:
|
||||
//case UProperty.UNICODE_1_NAME:
|
||||
case UProperty.AGE:
|
||||
case UProperty.BLOCK:
|
||||
case UProperty.SCRIPT:
|
||||
return UnicodeProperty.CATALOG;
|
||||
case UProperty.ISO_COMMENT:
|
||||
case UProperty.NAME:
|
||||
case UProperty.UNICODE_1_NAME:
|
||||
return UnicodeProperty.MISC;
|
||||
case UProperty.BIDI_MIRRORING_GLYPH:
|
||||
case UProperty.CASE_FOLDING:
|
||||
case UProperty.ISO_COMMENT:
|
||||
case UProperty.LOWERCASE_MAPPING:
|
||||
case UProperty.SIMPLE_CASE_FOLDING:
|
||||
case UProperty.SIMPLE_LOWERCASE_MAPPING:
|
||||
|
@ -237,7 +267,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
|||
case UProperty.SIMPLE_UPPERCASE_MAPPING:
|
||||
case UProperty.TITLECASE_MAPPING:
|
||||
case UProperty.UPPERCASE_MAPPING:
|
||||
return UnicodeProperty.EXTENDED_STRING;
|
||||
return UnicodeProperty.EXTENDED_STRING;
|
||||
}
|
||||
if (propEnum < UProperty.BINARY_START) return UnicodeProperty.UNKNOWN;
|
||||
if (propEnum < UProperty.BINARY_LIMIT) return UnicodeProperty.BINARY;
|
||||
|
@ -312,7 +342,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
|||
;
|
||||
|
||||
private ICUPropertyFactory() {
|
||||
Collection c = getInternalAvailablePropertyAliases(new TreeSet());
|
||||
Collection c = getInternalAvailablePropertyAliases(new ArrayList());
|
||||
Iterator it = c.iterator();
|
||||
while (it.hasNext()) {
|
||||
add(getInternalProperty((String)it.next()));
|
||||
|
@ -327,7 +357,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
|||
return singleton;
|
||||
}
|
||||
|
||||
public Collection getInternalAvailablePropertyAliases(Collection result) {
|
||||
public List getInternalAvailablePropertyAliases(List result) {
|
||||
int[][] ranges = {
|
||||
{UProperty.BINARY_START, UProperty.BINARY_LIMIT},
|
||||
{UProperty.INT_START, UProperty.INT_LIMIT},
|
||||
|
@ -337,6 +367,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
|||
for (int i = 0; i < ranges.length; ++i) {
|
||||
for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
|
||||
String alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG);
|
||||
UnicodeProperty.addUnique(alias, result);
|
||||
if (!result.contains(alias)) result.add(alias);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Tabber.java,v $
|
||||
* $Date: 2004/02/12 00:47:30 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2004/02/18 03:08:57 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -17,7 +17,8 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
public abstract class Tabber {
|
||||
static final byte LEFT = 0, CENTER = 1, RIGHT = 2;
|
||||
public static final byte LEFT = 0, CENTER = 1, RIGHT = 2;
|
||||
private static final String[] ALIGNMENT_NAMES = {"Left", "Center", "Right"};
|
||||
|
||||
/**
|
||||
* Repeats a string n times
|
||||
|
@ -54,18 +55,32 @@ public abstract class Tabber {
|
|||
public abstract void process_field(int count, String source, int start, int limit, StringBuffer output);
|
||||
|
||||
public static class MonoTabber extends Tabber {
|
||||
int minGap = 0;
|
||||
|
||||
private List stops = new ArrayList();
|
||||
private List types = new ArrayList();
|
||||
|
||||
public String toString() {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
for (int i = 0; i < stops.size(); ++i) {
|
||||
if (i != 0) buffer.append("; ");
|
||||
buffer
|
||||
.append(ALIGNMENT_NAMES[((Integer)types.get(i)).intValue()])
|
||||
.append(",")
|
||||
.append(stops.get(i));
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds tab stop and how to align the text UP TO that stop
|
||||
* @param tabPos
|
||||
* @param type
|
||||
*/
|
||||
public void addAbsolute(int tabPos, int type) {
|
||||
public MonoTabber addAbsolute(int tabPos, int type) {
|
||||
stops.add(new Integer(tabPos));
|
||||
types.add(new Integer(type));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -73,10 +88,11 @@ public abstract class Tabber {
|
|||
* @param tabPos
|
||||
* @param type
|
||||
*/
|
||||
public void add(int fieldWidth, byte type) {
|
||||
public MonoTabber add(int fieldWidth, byte type) {
|
||||
int last = getStop(stops.size()-1);
|
||||
stops.add(new Integer(last + fieldWidth));
|
||||
types.add(new Integer(type));
|
||||
return this;
|
||||
}
|
||||
|
||||
public int getStop(int fieldNumber) {
|
||||
|
@ -116,7 +132,7 @@ public abstract class Tabber {
|
|||
public void process_field(int count, String source, int start, int limit, StringBuffer output) {
|
||||
String piece = source.substring(start, limit);
|
||||
int startPos = getStop(count-1);
|
||||
int endPos = getStop(count) - 1;
|
||||
int endPos = getStop(count) - minGap;
|
||||
int type = getType(count);
|
||||
switch (type) {
|
||||
case LEFT:
|
||||
|
@ -129,11 +145,9 @@ public abstract class Tabber {
|
|||
break;
|
||||
}
|
||||
|
||||
if (output.length() < startPos) {
|
||||
output.append(repeat(" ", startPos - output.length()));
|
||||
} else if (startPos != 0) { // don't do anything on first instance
|
||||
output.append(" "); // otherwise minimum of first space
|
||||
}
|
||||
int gap = startPos - output.length();
|
||||
if (count != 0 && gap < minGap) gap = minGap;
|
||||
if (gap > 0) output.append(repeat(" ", gap));
|
||||
output.append(piece);
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java,v $
|
||||
* $Date: 2004/02/12 00:47:30 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2004/02/18 03:08:57 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -16,6 +16,7 @@ package com.ibm.icu.dev.test.util;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.TreeSet;
|
||||
import java.util.Iterator;
|
||||
import java.io.IOException;
|
||||
|
@ -29,47 +30,78 @@ import com.ibm.icu.lang.UProperty;
|
|||
import com.ibm.icu.text.Transliterator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
// TODO change to use test framework
|
||||
public class TestBagFormatter {
|
||||
|
||||
static final void generatePropertyAliases(boolean showValues) {
|
||||
generatePropertyAliases(showValues, ICUPropertyFactory.make());
|
||||
}
|
||||
|
||||
static final void generatePropertyAliases(boolean showValues, UnicodeProperty.Factory ups) {
|
||||
Collator order = Collator.getInstance(Locale.ENGLISH);
|
||||
UnicodeProperty.Factory ups = ICUPropertyFactory.make();
|
||||
TreeSet props = new TreeSet(order);
|
||||
TreeSet values = new TreeSet(order);
|
||||
Collection aliases = new ArrayList();
|
||||
BagFormatter bf = new BagFormatter();
|
||||
ups.getAvailableAliases(props);
|
||||
Iterator it = props.iterator();
|
||||
while (it.hasNext()) {
|
||||
String propAlias = (String)it.next();
|
||||
UnicodeProperty up = ups.getProperty(propAlias);
|
||||
System.out.println();
|
||||
aliases.clear();
|
||||
System.out.println(bf.join(up.getAliases(aliases)));
|
||||
if (!showValues) continue;
|
||||
values.clear();
|
||||
up.getAvailableValueAliases(values);
|
||||
Iterator it2 = values.iterator();
|
||||
while (it2.hasNext()) {
|
||||
String valueAlias = (String)it2.next();
|
||||
aliases.clear();
|
||||
System.out.println("\t" + bf.join(up.getValueAliases(valueAlias, aliases)));
|
||||
props.addAll(ups.getAvailableNames());
|
||||
for (int i = UnicodeProperty.BINARY; i < UnicodeProperty.LIMIT_TYPE; ++i) {
|
||||
System.out.println(UnicodeProperty.getTypeName(i));
|
||||
Iterator it = props.iterator();
|
||||
while (it.hasNext()) {
|
||||
String propAlias = (String)it.next();
|
||||
UnicodeProperty up = ups.getProperty(propAlias);
|
||||
int type = up.getType();
|
||||
if (type != i) continue;
|
||||
System.out.println();
|
||||
System.out.println(propAlias + "\t" + bf.join(up.getNameAliases()));
|
||||
if (!showValues) continue;
|
||||
values.clear();
|
||||
if (type == UnicodeProperty.NUMERIC || type == UnicodeProperty.EXTENDED_NUMERIC) {
|
||||
UnicodeMap um = new UnicodeMap();
|
||||
um.putAll(up);
|
||||
System.out.println(um.toString(new NumberComparator()));
|
||||
continue;
|
||||
}
|
||||
values.clear();
|
||||
values.addAll(up.getAvailableValues());
|
||||
Iterator it2 = values.iterator();
|
||||
while (it2.hasNext()) {
|
||||
String valueAlias = (String)it2.next();
|
||||
System.out.println("\t" + bf.join(valueAlias + "\t" + up.getValueAliases(valueAlias)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class NumberComparator implements Comparator {
|
||||
public int compare(Object o1, Object o2) {
|
||||
if (o1 == o2) return 0;
|
||||
if (o1 == null) return 1;
|
||||
if (o2 == null) return -1;
|
||||
double n1 = Double.parseDouble((String)o1);
|
||||
double n2 = Double.parseDouble((String)o2);
|
||||
return n1 < n2 ? -1 : n1 > n2 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
System.out.println("Start");
|
||||
try {
|
||||
//readCharacters();
|
||||
UnicodeProperty prop = ICUPropertyFactory.make().getProperty("Canonicalcombiningclass");
|
||||
prop.getAvailableValues();
|
||||
|
||||
generatePropertyAliases(true);
|
||||
|
||||
BagFormatter bf = new BagFormatter();
|
||||
|
||||
UnicodeSet us = new UnicodeSet("[:numeric_value=2:]");
|
||||
UnicodeSet us = new UnicodeSet("[:gc=nd:]");
|
||||
BagFormatter.CONSOLE.println("[:gc=nd:]");
|
||||
bf.showSetNames(BagFormatter.CONSOLE,us);
|
||||
|
||||
us = new UnicodeSet("[:numeric_value=2:]");
|
||||
BagFormatter.CONSOLE.println("[:numeric_value=2:]");
|
||||
bf.showSetNames(BagFormatter.CONSOLE,us);
|
||||
|
||||
us = new UnicodeSet("[:numeric_type=numeric:]");
|
||||
BagFormatter.CONSOLE.println("[:numeric_type=numeric:]");
|
||||
bf.showSetNames(BagFormatter.CONSOLE,us);
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.Method;
|
||||
import java.text.NumberFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
@ -11,6 +15,8 @@ import java.util.SortedSet;
|
|||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.test.AbstractTestLog;
|
||||
import com.ibm.icu.dev.test.TestBoilerplate;
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
|
@ -32,15 +38,16 @@ public class TestUtilities extends TestFmwk {
|
|||
Map map3 = new TreeMap();
|
||||
UnicodeMap.Equator equator = new UnicodeMap.SimpleEquator();
|
||||
SortedSet log = new TreeSet();
|
||||
static String[] TEST_VALUES = {null, "A", "B", "C", "D", "E", "F"};
|
||||
static Random random = new Random(12345);
|
||||
|
||||
public void TestUnicodeMap() {
|
||||
Random random = new Random(12345);
|
||||
String[] values = {null, "A", "B", "C", "D", "E", "F"};
|
||||
random.setSeed(12345);
|
||||
// do random change to both, then compare
|
||||
logln("Comparing against HashMap");
|
||||
for (int counter = 0; counter < ITERATIONS; ++counter) {
|
||||
int start = random.nextInt(LIMIT);
|
||||
String value = values[random.nextInt(values.length)];
|
||||
String value = TEST_VALUES[random.nextInt(TEST_VALUES.length)];
|
||||
String logline = Utility.hex(start) + "\t" + value;
|
||||
if (SHOW_PROGRESS) logln(counter + "\t" + logline);
|
||||
log.add(logline);
|
||||
|
@ -67,33 +74,30 @@ public class TestUtilities extends TestFmwk {
|
|||
Set values1 = (Set) map1.getAvailableValues(new TreeSet());
|
||||
Set values2 = new TreeSet(map2.values());
|
||||
Set temp;
|
||||
if (!values1.equals(values2)) {
|
||||
errln("Values differ:");
|
||||
errln("UnicodeMap - HashMap");
|
||||
temp = new TreeSet(values1);
|
||||
temp.removeAll(values2);
|
||||
errln(show(temp));
|
||||
errln("HashMap - UnicodeMap");
|
||||
temp = new TreeSet(values2);
|
||||
temp.removeAll(values1);
|
||||
errln(show(temp));
|
||||
} else {
|
||||
logln("Comparing Sets");
|
||||
for (Iterator it = values1.iterator(); it.hasNext();) {
|
||||
Object value = it.next();
|
||||
logln(value == null ? "null" : value.toString());
|
||||
UnicodeSet set1 = map1.getSet(value);
|
||||
UnicodeSet set2 = getSet(map2, value);
|
||||
if (!set1.equals(set2)) {
|
||||
errln("Sets differ:");
|
||||
errln("UnicodeMap - HashMap");
|
||||
errln(new UnicodeSet(set1).removeAll(set2).toPattern(true));
|
||||
errln("HashMap - UnicodeMap");
|
||||
errln(new UnicodeSet(set2).removeAll(set1).toPattern(true));
|
||||
}
|
||||
}
|
||||
if (!TestBoilerplate.verifySetsIdentical(this, values1, values2)) {
|
||||
throw new IllegalArgumentException("Halting");
|
||||
}
|
||||
|
||||
logln("Comparing Sets");
|
||||
for (Iterator it = values1.iterator(); it.hasNext();) {
|
||||
Object value = it.next();
|
||||
logln(value == null ? "null" : value.toString());
|
||||
UnicodeSet set1 = map1.getSet(value);
|
||||
UnicodeSet set2 = TestBoilerplate.getSet(map2, value);
|
||||
if (!TestBoilerplate.verifySetsIdentical(this, set1, set2)) {
|
||||
throw new IllegalArgumentException("Halting");
|
||||
}
|
||||
}
|
||||
|
||||
// check boilerplate
|
||||
List argList = new ArrayList();
|
||||
argList.add("TestMain");
|
||||
if (params.nothrow) argList.add("-nothrow");
|
||||
if (params.verbose) argList.add("-verbose");
|
||||
String[] args = new String[argList.size()];
|
||||
argList.toArray(args);
|
||||
new UnicodeMapBoilerplate().run(args);
|
||||
// TODO: the following is not being reached
|
||||
new UnicodeSetBoilerplate().run(args);
|
||||
}
|
||||
|
||||
public void check(int counter) {
|
||||
|
@ -105,9 +109,8 @@ public class TestUtilities extends TestFmwk {
|
|||
+ "\t UnicodeMap: " + value1
|
||||
+ "\t HashMap: " + value2);
|
||||
errln("UnicodeMap: " + map1);
|
||||
errln("Log: " + show(log));
|
||||
errln("HashMap: " + show(map2));
|
||||
throw new IllegalArgumentException("Halting");
|
||||
errln("Log: " + TestBoilerplate.show(log));
|
||||
errln("HashMap: " + TestBoilerplate.show(map2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -189,31 +192,78 @@ public class TestUtilities extends TestFmwk {
|
|||
return (end-start)/1000/iterations;
|
||||
}
|
||||
|
||||
String show(Collection c) {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
for (Iterator it = c.iterator(); it.hasNext();) {
|
||||
buffer.append(it.next() + "\r\n");
|
||||
static class UnicodeMapBoilerplate extends TestBoilerplate {
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
protected boolean _hasSameBehavior(Object a, Object b) {
|
||||
// we are pretty confident in the equals method, so won't bother with this right now.
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
|
||||
*/
|
||||
protected boolean _addTestObject(List list) {
|
||||
if (list.size() > 30) return false;
|
||||
UnicodeMap result = new UnicodeMap();
|
||||
for (int i = 0; i < 50; ++i) {
|
||||
int start = random.nextInt(25);
|
||||
String value = TEST_VALUES[random.nextInt(TEST_VALUES.length)];
|
||||
result.put(start, value);
|
||||
}
|
||||
list.add(result);
|
||||
return true;
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
String show(Map m) {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
for (Iterator it = m.keySet().iterator(); it.hasNext();) {
|
||||
Object key = it.next();
|
||||
buffer.append(key + "=>" + m.get(key) + "\r\n");
|
||||
static class StringBoilerplate extends TestBoilerplate {
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
protected boolean _hasSameBehavior(Object a, Object b) {
|
||||
// we are pretty confident in the equals method, so won't bother with this right now.
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
|
||||
*/
|
||||
protected boolean _addTestObject(List list) {
|
||||
if (list.size() > 31) return false;
|
||||
StringBuffer result = new StringBuffer();
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
result.append((char)random.nextInt(0xFF));
|
||||
}
|
||||
list.add(result.toString());
|
||||
return true;
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
UnicodeSet getSet(Map m, Object value) {
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
for (Iterator it = m.keySet().iterator(); it.hasNext();) {
|
||||
Object key = it.next();
|
||||
Object val = m.get(key);
|
||||
if (!val.equals(value)) continue;
|
||||
result.add(((Integer)key).intValue());
|
||||
static class UnicodeSetBoilerplate extends TestBoilerplate {
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
protected boolean _hasSameBehavior(Object a, Object b) {
|
||||
// we are pretty confident in the equals method, so won't bother with this right now.
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
|
||||
*/
|
||||
protected boolean _addTestObject(List list) {
|
||||
if (list.size() > 32) return false;
|
||||
UnicodeSet result = new UnicodeSet();
|
||||
for (int i = 0; i < 50; ++i) {
|
||||
result.add(random.nextInt(100));
|
||||
}
|
||||
list.add(result.toString());
|
||||
return true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
|
@ -21,15 +21,18 @@ public abstract class UnicodeLabel {
|
|||
public int getMaxWidth(boolean isShort) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static class Hex extends UnicodeLabel {
|
||||
public String getValue(int codepoint, boolean isShort) {
|
||||
if (isShort) return Utility.hex(codepoint,4);
|
||||
return "U+" + Utility.hex(codepoint,4);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Constant extends UnicodeLabel {
|
||||
private String value;
|
||||
public Constant(String value) {
|
||||
if (value == null) value = "";
|
||||
this.value = value;
|
||||
}
|
||||
public String getValue(int codepoint, boolean isShort) {
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
@ -14,7 +18,7 @@ import com.ibm.icu.text.UnicodeSetIterator;
|
|||
* @author Davis
|
||||
*/
|
||||
// TODO Optimize using range map
|
||||
public final class UnicodeMap {
|
||||
public final class UnicodeMap implements Cloneable {
|
||||
static final boolean ASSERTIONS = false;
|
||||
static final long GROWTH_PERCENT = 200; // 100 is no growth!
|
||||
static final long GROWTH_GAP = 10; // extra bump!
|
||||
|
@ -23,8 +27,48 @@ public final class UnicodeMap {
|
|||
private int[] transitions = {0,0x110000,0,0,0,0,0,0,0,0};
|
||||
private Object[] values = new Object[10];
|
||||
{
|
||||
values[1] = "TERMINAL";
|
||||
values[1] = "TERMINAL"; // just for debugging
|
||||
}
|
||||
private int lastIndex = 0;
|
||||
|
||||
/* Boilerplate */
|
||||
public boolean equals(Object other) {
|
||||
if (other == null) return false;
|
||||
try {
|
||||
UnicodeMap that = (UnicodeMap) other;
|
||||
if (length != that.length || !equator.equals(that.equator)) return false;
|
||||
for (int i = 0; i < length-1; ++i) {
|
||||
if (transitions[i] != that.transitions[i]) return false;
|
||||
if (!equator.isEqual(values[i], that.values[i])) return false;
|
||||
}
|
||||
return true;
|
||||
} catch (ClassCastException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
int result = length;
|
||||
// TODO might want to abbreviate this for speed.
|
||||
for (int i = 0; i < length-1; ++i) {
|
||||
result = 37*result + transitions[i];
|
||||
result = 37*result + equator.getHashCode(values[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Standard clone. Warning, as with Collections, does not do deep clone.
|
||||
*/
|
||||
public Object clone() {
|
||||
UnicodeMap that = new UnicodeMap();
|
||||
that.length = length;
|
||||
that.transitions = (int[]) transitions.clone();
|
||||
that.values = (Object[]) values.clone();
|
||||
return that;
|
||||
}
|
||||
|
||||
/* for internal consistency checking */
|
||||
|
||||
void _checkInvariants() {
|
||||
if (length < 2
|
||||
|
@ -62,17 +106,28 @@ public final class UnicodeMap {
|
|||
* @return
|
||||
*/
|
||||
public boolean isEqual(Object a, Object b);
|
||||
|
||||
/**
|
||||
* @param object
|
||||
* @return
|
||||
*/
|
||||
public int getHashCode(Object object);
|
||||
}
|
||||
|
||||
public static class SimpleEquator implements Equator {
|
||||
public static final class SimpleEquator implements Equator {
|
||||
public boolean isEqual(Object a, Object b) {
|
||||
if (a == b) return true;
|
||||
if (a == null || b == null) return false;
|
||||
return a.equals(b);
|
||||
}
|
||||
public int getHashCode(Object a) {
|
||||
if (a == null) return 0;
|
||||
return a.hashCode();
|
||||
}
|
||||
}
|
||||
private Equator equator = new SimpleEquator();
|
||||
|
||||
private static Equator SIMPLE = new SimpleEquator();
|
||||
private Equator equator = SIMPLE;
|
||||
|
||||
/**
|
||||
* Finds an index such that inversionList[i] <= codepoint < inversionList[i+1]
|
||||
* Assumes that 0 <= codepoint <= 0x10FFFF
|
||||
|
@ -181,41 +236,53 @@ public final class UnicodeMap {
|
|||
* @return this, for chaining
|
||||
*/
|
||||
private UnicodeMap _put(int codepoint, Object value) {
|
||||
int baseIndex = _findIndex(codepoint);
|
||||
// Warning: baseIndex is an invariant; must
|
||||
// be defined such that transitions[baseIndex] < codepoint
|
||||
// at end of this routine.
|
||||
int baseIndex;
|
||||
if (transitions[lastIndex] <= codepoint
|
||||
&& codepoint < transitions[lastIndex+1]) {
|
||||
baseIndex = lastIndex;
|
||||
} else {
|
||||
baseIndex = _findIndex(codepoint);
|
||||
}
|
||||
int limitIndex = baseIndex + 1;
|
||||
// cases are (a) value is already set
|
||||
if (equator.isEqual(values[baseIndex], value)) return this;
|
||||
int baseCP = transitions[baseIndex];
|
||||
int limitCP = transitions[limitIndex];
|
||||
// CASE: At very start of range
|
||||
// we now start walking through the difference case,
|
||||
// based on whether we are at the start or end of range
|
||||
// and whether the range is a single character or multiple
|
||||
|
||||
if (baseCP == codepoint) {
|
||||
// CASE: At very start of range
|
||||
boolean connectsWithPrevious =
|
||||
baseIndex != 0 && equator.isEqual(value, values[baseIndex-1]);
|
||||
|
||||
// CASE: Single codepoint range
|
||||
if (limitCP == codepoint + 1) {
|
||||
// CASE: Single codepoint range
|
||||
boolean connectsWithFollowing =
|
||||
baseIndex < length - 1 && equator.isEqual(value, values[limitIndex]);
|
||||
// A1a connects with previous & following, so remove index
|
||||
|
||||
if (connectsWithPrevious) {
|
||||
// A1a connects with previous & following, so remove index
|
||||
if (connectsWithFollowing) {
|
||||
_removeAt(baseIndex, 2);
|
||||
return this;
|
||||
} else {
|
||||
_removeAt(baseIndex, 1); // extend previous
|
||||
}
|
||||
_removeAt(baseIndex, 1); // extend previous
|
||||
return this;
|
||||
--baseIndex; // fix up
|
||||
} else if (connectsWithFollowing) {
|
||||
_removeAt(baseIndex, 1); // extend following backwards
|
||||
transitions[baseIndex] = codepoint;
|
||||
return this;
|
||||
} else {
|
||||
// doesn't connect on either side, just reset
|
||||
values[baseIndex] = value;
|
||||
}
|
||||
// doesn't connect on either side, just reset
|
||||
values[baseIndex] = value;
|
||||
return this;
|
||||
}
|
||||
} else if (connectsWithPrevious) {
|
||||
// A.1: start of multi codepoint range
|
||||
// if connects
|
||||
if (connectsWithPrevious) {
|
||||
++transitions[baseIndex]; // extend previous
|
||||
} else {
|
||||
// otherwise insert new transition
|
||||
|
@ -224,10 +291,8 @@ public final class UnicodeMap {
|
|||
values[baseIndex] = value;
|
||||
transitions[baseIndex] = codepoint;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
// CASE: at end of range
|
||||
if (limitCP == codepoint + 1) {
|
||||
} else if (limitCP == codepoint + 1) {
|
||||
// CASE: at end of range
|
||||
// if connects, just back up range
|
||||
boolean connectsWithFollowing =
|
||||
baseIndex < length - 1 && equator.isEqual(value, values[limitIndex]);
|
||||
|
@ -240,14 +305,16 @@ public final class UnicodeMap {
|
|||
transitions[limitIndex] = codepoint;
|
||||
values[limitIndex] = value;
|
||||
}
|
||||
return this;
|
||||
} else {
|
||||
// CASE: in middle of range
|
||||
// insert gap, then set the new range
|
||||
_insertGapAt(++baseIndex,2);
|
||||
transitions[baseIndex] = codepoint;
|
||||
values[baseIndex] = value;
|
||||
transitions[baseIndex+1] = codepoint + 1;
|
||||
values[baseIndex+1] = values[baseIndex-1]; // copy lower range values
|
||||
}
|
||||
// CASE: in middle of range
|
||||
_insertGapAt(++baseIndex,2);
|
||||
transitions[baseIndex] = codepoint;
|
||||
values[baseIndex] = value;
|
||||
transitions[++baseIndex] = codepoint + 1;
|
||||
values[baseIndex] = values[baseIndex-2]; // copy lower range values
|
||||
lastIndex = baseIndex; // store for next time
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
|
@ -331,7 +398,9 @@ public final class UnicodeMap {
|
|||
public UnicodeSet getSet(Object value, UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
for (int i = 0; i < length - 1; ++i) {
|
||||
if (values[i] == value) result.add(transitions[i], transitions[i+1]-1);
|
||||
if (equator.isEqual(value, values[i])) {
|
||||
result.add(transitions[i], transitions[i+1]-1);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -339,14 +408,15 @@ public final class UnicodeMap {
|
|||
return getSet(value,null);
|
||||
}
|
||||
/**
|
||||
* Returns the list of possible values. Deposits into
|
||||
* result if it is not null. Remember to clear if you just want
|
||||
* Returns the list of possible values. Deposits each non-null value into
|
||||
* result. Creates result if it is null. Remember to clear result if
|
||||
* you are not appending to existing collection.
|
||||
* @param result
|
||||
* @return
|
||||
*/
|
||||
public Collection getAvailableValues(Collection result) {
|
||||
if (result == null) result = new HashSet();
|
||||
for (int i = 0; i < length - 1; ++i) {
|
||||
if (result == null) result = new ArrayList(1);
|
||||
for (int i = 0; i < length - 1; ++i) {
|
||||
Object value = values[i];
|
||||
if (value == null) continue;
|
||||
if (result.contains(value)) continue;
|
||||
|
@ -354,6 +424,13 @@ public final class UnicodeMap {
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method
|
||||
*/
|
||||
public Collection getAvailableValues() {
|
||||
return getAvailableValues(null);
|
||||
}
|
||||
/**
|
||||
* Gets the value associated with a given code point.
|
||||
* Returns null, if there is no such value.
|
||||
|
@ -368,18 +445,33 @@ public final class UnicodeMap {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
return toString(null);
|
||||
}
|
||||
public String toString(Comparator collected) {
|
||||
StringBuffer result = new StringBuffer();
|
||||
for (int i = 0; i < length-1; ++i) {
|
||||
Object value = values[i];
|
||||
if (value == null) continue;
|
||||
int start = transitions[i];
|
||||
int end = transitions[i+1]-1;
|
||||
result.append(Utility.hex(start));
|
||||
if (start != end) result.append("..")
|
||||
.append(Utility.hex(end));
|
||||
result.append("\t=>")
|
||||
.append(values[i] == null ? "null" : values[i].toString())
|
||||
.append("\r\n");
|
||||
if (collected == null) {
|
||||
for (int i = 0; i < length-1; ++i) {
|
||||
Object value = values[i];
|
||||
if (value == null) continue;
|
||||
int start = transitions[i];
|
||||
int end = transitions[i+1]-1;
|
||||
result.append(Utility.hex(start));
|
||||
if (start != end) result.append("..")
|
||||
.append(Utility.hex(end));
|
||||
result.append("\t=> ")
|
||||
.append(values[i] == null ? "null" : values[i].toString())
|
||||
.append("\r\n");
|
||||
}
|
||||
} else {
|
||||
Set set = (Set) getAvailableValues(new TreeSet(collected));
|
||||
for (Iterator it = set.iterator(); it.hasNext();) {
|
||||
Object value = it.next();
|
||||
UnicodeSet s = getSet(value);
|
||||
result.append(value)
|
||||
.append("\t=> ")
|
||||
.append(s.toPattern(true))
|
||||
.append("\r\n");
|
||||
}
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
|
|
@ -14,26 +14,41 @@ import java.util.Map;
|
|||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import sun.io.UnknownCharacterException;
|
||||
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
|
||||
public abstract class UnicodeProperty extends UnicodeLabel {
|
||||
|
||||
public static boolean DEBUG = false;
|
||||
public static boolean DEBUG = true;
|
||||
public static String CHECK_NAME = "FC_NFKC_Closure";
|
||||
public static int CHECK_VALUE = 0x037A;
|
||||
|
||||
private String propertyAlias;
|
||||
private String shortestPropertyAlias = null;
|
||||
private String name;
|
||||
private String firstNameAlias = null;
|
||||
private int type;
|
||||
private Map valueToShortValue = null;
|
||||
private Map valueToFirstValueAlias = null;
|
||||
|
||||
public static final int UNKNOWN = 0,
|
||||
BINARY = 2, EXTENDED_BINARY = 3,
|
||||
ENUMERATED = 4, EXTENDED_ENUMERATED = 5,
|
||||
NUMERIC = 6, EXTENDED_NUMERIC = 7,
|
||||
STRING = 8, EXTENDED_STRING = 9,
|
||||
LIMIT_TYPE = 10,
|
||||
EXTENDED_BIT = 1;
|
||||
CATALOG = 6, EXTENDED_CATALOG = 7,
|
||||
MISC = 8, EXTENDED_MISC = 9,
|
||||
STRING = 10, EXTENDED_STRING = 11,
|
||||
NUMERIC = 12, EXTENDED_NUMERIC = 13,
|
||||
START_TYPE = 2,
|
||||
LIMIT_TYPE = 14,
|
||||
EXTENDED_MASK = 1,
|
||||
CORE_MASK = ~EXTENDED_MASK,
|
||||
BINARY_MASK = (1<<BINARY) | (1<<EXTENDED_BINARY),
|
||||
STRING_OR_MISC_MASK = (1<<STRING) | (1<<EXTENDED_STRING)
|
||||
| (1<<MISC) | (1<<EXTENDED_MISC),
|
||||
ENUMERATED_OR_CATALOG_MASK = (1<<ENUMERATED) | (1<<EXTENDED_ENUMERATED)
|
||||
| (1<<CATALOG) | (1<<EXTENDED_CATALOG);
|
||||
|
||||
|
||||
private static final String[] TYPE_NAMES = {
|
||||
"Unknown",
|
||||
|
@ -42,10 +57,14 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
"Extended Binary",
|
||||
"Enumerated",
|
||||
"Extended Enumerated",
|
||||
"Numeric",
|
||||
"Extended Numeric",
|
||||
"Catalog",
|
||||
"Extended Catalog",
|
||||
"Miscellaneous",
|
||||
"Extended Miscellaneous",
|
||||
"String",
|
||||
"Extended String",
|
||||
"Numeric",
|
||||
"Extended Numeric",
|
||||
};
|
||||
|
||||
public static String getTypeName(int propType) {
|
||||
|
@ -53,15 +72,20 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
}
|
||||
|
||||
public final String getName() {
|
||||
return propertyAlias;
|
||||
return name;
|
||||
}
|
||||
|
||||
public final int getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public final boolean isType(int mask) {
|
||||
return ((1<<type) & mask) != 0;
|
||||
}
|
||||
|
||||
protected final void setName(String string) {
|
||||
propertyAlias = string;
|
||||
if (string == null) throw new IllegalArgumentException("Name must not be null");
|
||||
name = string;
|
||||
}
|
||||
|
||||
protected final void setType(int i) {
|
||||
|
@ -72,48 +96,59 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
return _getVersion();
|
||||
}
|
||||
public String getValue(int codepoint) {
|
||||
if (DEBUG && CHECK_VALUE == codepoint && CHECK_NAME.equals(getName())) {
|
||||
String value = _getValue(codepoint);
|
||||
System.out.println(getName() + "(" + Utility.hex(codepoint) + "):" +
|
||||
(getType() == STRING ? Utility.hex(value) : value));
|
||||
return value;
|
||||
}
|
||||
return _getValue(codepoint);
|
||||
}
|
||||
public Collection getAliases(Collection result) {
|
||||
return _getAliases(result);
|
||||
|
||||
public List getNameAliases(List result) {
|
||||
if (result == null) result = new ArrayList(1);
|
||||
return _getNameAliases(result);
|
||||
}
|
||||
public Collection getValueAliases(String valueAlias, Collection result) {
|
||||
public List getValueAliases(String valueAlias, List result) {
|
||||
if (result == null) result = new ArrayList(1);
|
||||
result = _getValueAliases(valueAlias, result);
|
||||
if (!result.contains(valueAlias) && type < NUMERIC) {
|
||||
throw new IllegalArgumentException(
|
||||
"Internal error: result doesn't contain " + valueAlias);
|
||||
"Internal error: " + getName() + " doesn't contain " + valueAlias
|
||||
+ ": " + new BagFormatter().join(result));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
public Collection getAvailableValueAliases(Collection result) {
|
||||
return _getAvailableValueAliases(result);
|
||||
public List getAvailableValues(List result) {
|
||||
if (result == null) result = new ArrayList(1);
|
||||
return _getAvailableValues(result);
|
||||
}
|
||||
|
||||
protected abstract String _getVersion();
|
||||
protected abstract String _getValue(int codepoint);
|
||||
protected abstract Collection _getAliases(Collection result);
|
||||
protected abstract Collection _getValueAliases(String valueAlias, Collection result);
|
||||
protected abstract Collection _getAvailableValueAliases(Collection result);
|
||||
protected abstract List _getNameAliases(List result);
|
||||
protected abstract List _getValueAliases(String valueAlias, List result);
|
||||
protected abstract List _getAvailableValues(List result);
|
||||
|
||||
// conveniences
|
||||
public final Collection getAliases() {
|
||||
return _getAliases(null);
|
||||
public final List getNameAliases() {
|
||||
return getNameAliases(null);
|
||||
}
|
||||
public final Collection getValueAliases(String valueAlias) {
|
||||
return _getValueAliases(valueAlias, null);
|
||||
public final List getValueAliases(String valueAlias) {
|
||||
return getValueAliases(valueAlias, null);
|
||||
}
|
||||
public final Collection getAvailableValueAliases() {
|
||||
return _getAvailableValueAliases(null);
|
||||
public final List getAvailableValues() {
|
||||
return getAvailableValues(null);
|
||||
}
|
||||
|
||||
static public class Factory {
|
||||
Map canonicalNames = new TreeMap();
|
||||
Map skeletonNames = new TreeMap();
|
||||
Map propertyCache = new HashMap();
|
||||
Map propertyCache = new HashMap(1);
|
||||
|
||||
public final Factory add(UnicodeProperty sp) {
|
||||
canonicalNames.put(sp.getName(), sp);
|
||||
Collection c = sp.getAliases(new TreeSet());
|
||||
List c = sp.getNameAliases(new ArrayList(1));
|
||||
Iterator it = c.iterator();
|
||||
while (it.hasNext()) {
|
||||
skeletonNames.put(toSkeleton((String)it.next()), sp);
|
||||
|
@ -125,23 +160,34 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
return (UnicodeProperty) skeletonNames.get(toSkeleton(propertyAlias));
|
||||
}
|
||||
|
||||
public final Collection getAvailableAliases(Collection result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
public final List getAvailableNames() {
|
||||
return getAvailableNames(null);
|
||||
}
|
||||
|
||||
public final List getAvailableNames(List result) {
|
||||
if (result == null) result = new ArrayList(1);
|
||||
Iterator it = canonicalNames.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
addUnique(it.next(), result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
public final Collection getAvailableAliases() {
|
||||
return getAvailableAliases(null);
|
||||
}
|
||||
|
||||
public final Collection getAvailablePropertyAliases(Collection result, int propertyTypeMask) {
|
||||
public final List getAvailableNames(int propertyTypeMask) {
|
||||
return getAvailableNames(propertyTypeMask, null);
|
||||
}
|
||||
|
||||
public final List getAvailableNames(int propertyTypeMask, List result) {
|
||||
if (result == null) result = new ArrayList(1);
|
||||
Iterator it = canonicalNames.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
UnicodeProperty property = (UnicodeProperty)it.next();
|
||||
if (((1<<property.getType())& propertyTypeMask) == 0) continue;
|
||||
String item = (String)it.next();
|
||||
UnicodeProperty property = getProperty(item);
|
||||
if (DEBUG) System.out.println("Properties: " + item + "," + property.getType());
|
||||
if (!property.isType(propertyTypeMask)) {
|
||||
//System.out.println("Masking: " + property.getType() + "," + propertyTypeMask);
|
||||
continue;
|
||||
}
|
||||
addUnique(property.getName(), result);
|
||||
}
|
||||
return result;
|
||||
|
@ -164,7 +210,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
UnicodeProperty up = getProperty(prop);
|
||||
if (matcher == null) {
|
||||
matcher = new SimpleMatcher(value,
|
||||
up.getType() >= STRING ? null : new SkeletonComparator());
|
||||
up.isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR);
|
||||
}
|
||||
if (negative) {
|
||||
inverseMatcher.set(matcher);
|
||||
|
@ -186,6 +232,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
protected StringFilter filter;
|
||||
protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(new UnicodeSet(0,0x10FFFF));
|
||||
protected HashMap backmap;
|
||||
boolean allowValueAliasCollisions = false;
|
||||
|
||||
public FilteredProperty(UnicodeProperty property, StringFilter filter) {
|
||||
this.property = property;
|
||||
|
@ -201,52 +248,63 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
return this;
|
||||
}
|
||||
|
||||
Collection temp = new ArrayList();
|
||||
List temp = new ArrayList(1);
|
||||
|
||||
public Collection _getAvailableValueAliases(Collection result) {
|
||||
public List _getAvailableValues(List result) {
|
||||
temp.clear();
|
||||
return filter.addUnique(property.getAvailableValueAliases(temp), result);
|
||||
return filter.addUnique(property.getAvailableValues(temp), result);
|
||||
}
|
||||
|
||||
public Collection _getAliases(Collection result) {
|
||||
public List _getNameAliases(List result) {
|
||||
temp.clear();
|
||||
return filter.addUnique(
|
||||
property.getAliases(temp), result);
|
||||
property.getNameAliases(temp), result);
|
||||
}
|
||||
|
||||
public String _getValue(int codepoint) {
|
||||
return filter.remap(property.getValue(codepoint));
|
||||
}
|
||||
|
||||
public Collection _getValueAliases(String valueAlias, Collection result) {
|
||||
temp.clear();
|
||||
public List _getValueAliases(String valueAlias, List result) {
|
||||
if (backmap == null) {
|
||||
backmap = new HashMap();
|
||||
backmap = new HashMap(1);
|
||||
temp.clear();
|
||||
Iterator it = property.getAvailableValueAliases(temp).iterator();
|
||||
Iterator it = property.getAvailableValues(temp).iterator();
|
||||
while (it.hasNext()) {
|
||||
String item = (String) it.next();
|
||||
String mappedItem = filter.remap(item);
|
||||
if (backmap.get(mappedItem) != null) {
|
||||
throw new IllegalArgumentException("Filter makes values collide!");
|
||||
if (backmap.get(mappedItem) != null && !allowValueAliasCollisions) {
|
||||
throw new IllegalArgumentException("Filter makes values collide! "
|
||||
+ item + ", " + mappedItem);
|
||||
}
|
||||
backmap.put(mappedItem, item);
|
||||
}
|
||||
}
|
||||
return filter.addUnique(
|
||||
property.getValueAliases((String) backmap.get(valueAlias), temp), result);
|
||||
valueAlias = (String) backmap.get(valueAlias);
|
||||
temp.clear();
|
||||
return filter.addUnique(property.getValueAliases(valueAlias, temp), result);
|
||||
}
|
||||
|
||||
public String _getVersion() {
|
||||
return property.getVersion();
|
||||
}
|
||||
|
||||
public boolean isAllowValueAliasCollisions() {
|
||||
return allowValueAliasCollisions;
|
||||
}
|
||||
|
||||
public FilteredProperty setAllowValueAliasCollisions(boolean b) {
|
||||
allowValueAliasCollisions = b;
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static abstract class StringFilter implements Cloneable {
|
||||
public abstract String remap(String original);
|
||||
public final Collection addUnique(Collection source, Collection result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
|
||||
public final List addUnique(Collection source, List result) {
|
||||
if (result == null) result = new ArrayList(1);
|
||||
Iterator it = source.iterator();
|
||||
while (it.hasNext()) {
|
||||
UnicodeProperty.addUnique(
|
||||
|
@ -322,123 +380,139 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
}
|
||||
|
||||
public static abstract class SimpleProperty extends UnicodeProperty {
|
||||
private String shortAlias;
|
||||
Collection valueAliases = new ArrayList();
|
||||
Map toAlternates = new HashMap();
|
||||
private List propertyAliases = new ArrayList(1);
|
||||
List values;
|
||||
Map toValueAliases = new HashMap(1);
|
||||
String version;
|
||||
|
||||
public SimpleProperty setMain(String alias, String shortAlias, int propertyType,
|
||||
String version) {
|
||||
setName(alias);
|
||||
setType(propertyType);
|
||||
this.shortAlias = shortAlias;
|
||||
propertyAliases.add(shortAlias);
|
||||
propertyAliases.add(alias);
|
||||
this.version = version;
|
||||
return this;
|
||||
}
|
||||
|
||||
public SimpleProperty addName(String alias) {
|
||||
propertyAliases.add(alias);
|
||||
return this;
|
||||
}
|
||||
|
||||
public SimpleProperty setValues(String valueAlias) {
|
||||
setValues(new String[]{valueAlias}, null);
|
||||
_addToValues(valueAlias, null);
|
||||
return this;
|
||||
}
|
||||
|
||||
public SimpleProperty setValues(String[] valueAliases, String[] alternateValueAliases) {
|
||||
this.valueAliases = Arrays.asList((Object[]) valueAliases.clone());
|
||||
|
||||
for (int i = 0; i < valueAliases.length; ++i) {
|
||||
List a = new ArrayList();
|
||||
addUnique(valueAliases[i],a);
|
||||
if (alternateValueAliases != null) addUnique(alternateValueAliases[i],a);
|
||||
toAlternates.put(valueAliases[i], a);
|
||||
if (valueAliases[i].equals(UNUSED)) continue;
|
||||
_addToValues(valueAliases[i],
|
||||
alternateValueAliases != null ? alternateValueAliases[i] : null);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public SimpleProperty setValues(Collection valueAliases) {
|
||||
this.valueAliases = new ArrayList(valueAliases);
|
||||
for (Iterator it = this.valueAliases.iterator(); it.hasNext(); ) {
|
||||
Object item = it.next();
|
||||
List list = new ArrayList();
|
||||
list.add(item);
|
||||
toAlternates.put(item, list);
|
||||
public SimpleProperty setValues(List valueAliases) {
|
||||
this.values = new ArrayList(valueAliases);
|
||||
for (Iterator it = this.values.iterator(); it.hasNext(); ) {
|
||||
_addToValues(it.next(), null);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public Collection _getAliases(Collection result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
addUnique(getName(), result);
|
||||
addUnique(shortAlias, result);
|
||||
|
||||
public List _getNameAliases(List result) {
|
||||
addAllUnique(propertyAliases, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public Collection _getValueAliases(String valueAlias, Collection result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
Collection a = (Collection) toAlternates.get(valueAlias);
|
||||
public List _getValueAliases(String valueAlias, List result) {
|
||||
if (toValueAliases == null) _fillValues();
|
||||
List a = (List) toValueAliases.get(valueAlias);
|
||||
if (a != null) addAllUnique(a, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public Collection _getAvailableValueAliases(Collection result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
result.addAll(valueAliases);
|
||||
public List _getAvailableValues(List result) {
|
||||
if (values == null) _fillValues();
|
||||
result.addAll(values);
|
||||
return result;
|
||||
}
|
||||
|
||||
private void _fillValues() {
|
||||
List newvalues = (List) getUnicodeMap().getAvailableValues(new ArrayList());
|
||||
for (Iterator it = newvalues.iterator(); it.hasNext();) {
|
||||
_addToValues(it.next(), null);
|
||||
}
|
||||
}
|
||||
|
||||
private void _addToValues(Object item, Object alias) {
|
||||
if (values == null) values = new ArrayList(1);
|
||||
addUnique(item, values);
|
||||
List aliases = (List) toValueAliases.get(item);
|
||||
if (aliases == null) {
|
||||
aliases = new ArrayList(1);
|
||||
toValueAliases.put(item, aliases);
|
||||
}
|
||||
addUnique(alias, aliases);
|
||||
addUnique(item, aliases);
|
||||
}
|
||||
|
||||
public String _getVersion() {
|
||||
return version;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public final String getValue(int codepoint, boolean getShortest) {
|
||||
String result = getValue(codepoint);
|
||||
if (!getShortest || result == null) return result;
|
||||
return getShortestValueAlias(result);
|
||||
if (type >= MISC || result == null || !getShortest) return result;
|
||||
return getFirstValueAlias(result);
|
||||
}
|
||||
|
||||
public final String getShortestValueAlias(String value) {
|
||||
if (valueToShortValue == null) getValueCache();
|
||||
return (String)valueToShortValue.get(value);
|
||||
}
|
||||
|
||||
public final String getShortestAlias() {
|
||||
if (shortestPropertyAlias == null) {
|
||||
shortestPropertyAlias = propertyAlias;
|
||||
for (Iterator it = _getAliases(null).iterator(); it.hasNext();) {
|
||||
String item = (String) it.next();
|
||||
if (item.length() < shortestPropertyAlias.length()) {
|
||||
shortestPropertyAlias = item;
|
||||
}
|
||||
}
|
||||
public final String getFirstNameAlias() {
|
||||
if (firstNameAlias == null) {
|
||||
firstNameAlias = (String) getNameAliases().get(0);
|
||||
}
|
||||
return shortestPropertyAlias;
|
||||
return firstNameAlias;
|
||||
}
|
||||
|
||||
private void getValueCache() {
|
||||
public final String getFirstValueAlias(String value) {
|
||||
if (valueToFirstValueAlias == null) _getFirstValueAliasCache();
|
||||
return (String)valueToFirstValueAlias.get(value);
|
||||
}
|
||||
|
||||
private void _getFirstValueAliasCache() {
|
||||
maxValueWidth = 0;
|
||||
maxShortestValueWidth = 0;
|
||||
valueToShortValue = new HashMap();
|
||||
Iterator it = getAvailableValueAliases(null).iterator();
|
||||
maxFirstValueAliasWidth = 0;
|
||||
valueToFirstValueAlias = new HashMap(1);
|
||||
Iterator it = getAvailableValues().iterator();
|
||||
while (it.hasNext()) {
|
||||
String value = (String)it.next();
|
||||
String shortest = value;
|
||||
Iterator it2 = getValueAliases(value, null).iterator();
|
||||
while (it2.hasNext()) {
|
||||
String other = (String)it2.next();
|
||||
if (shortest.length() > other.length()) shortest = other;
|
||||
String first = (String) getValueAliases(value).get(0);
|
||||
if (first == null) { // internal error
|
||||
throw new IllegalArgumentException("Value not in value aliases: " + value);
|
||||
}
|
||||
valueToShortValue.put(value,shortest);
|
||||
if (value.length() > maxValueWidth) maxValueWidth = value.length();
|
||||
if (shortest.length() > maxShortestValueWidth) maxShortestValueWidth = shortest.length();
|
||||
if (DEBUG && CHECK_NAME.equals(getName())) {
|
||||
System.out.println("First Alias: " + getName() + ": " + value + " => "
|
||||
+ first + new BagFormatter().join(getValueAliases(value)));
|
||||
}
|
||||
valueToFirstValueAlias.put(value,first);
|
||||
if (value.length() > maxValueWidth) {
|
||||
maxValueWidth = value.length();
|
||||
}
|
||||
if (first.length() > maxFirstValueAliasWidth) {
|
||||
maxFirstValueAliasWidth = first.length();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int maxValueWidth = -1;
|
||||
private int maxShortestValueWidth = -1;
|
||||
private int maxFirstValueAliasWidth = -1;
|
||||
|
||||
public final int getMaxWidth(boolean getShortest) {
|
||||
if (maxValueWidth < 0) getValueCache();
|
||||
if (getShortest) return maxShortestValueWidth;
|
||||
public int getMaxWidth(boolean getShortest) {
|
||||
if (maxValueWidth < 0) _getFirstValueAliasCache();
|
||||
if (getShortest) return maxFirstValueAliasWidth;
|
||||
return maxValueWidth;
|
||||
}
|
||||
|
||||
|
@ -450,17 +524,18 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
}
|
||||
|
||||
public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
|
||||
int type = getType();
|
||||
return getSet(new SimpleMatcher(propertyValue,
|
||||
type >= STRING ? null : new SkeletonComparator()),
|
||||
isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
|
||||
result);
|
||||
}
|
||||
|
||||
private UnicodeMap cacheValueToSet = null;
|
||||
private UnicodeMap unicodeMap = null;
|
||||
|
||||
public static final String UNUSED = "??";
|
||||
|
||||
public final UnicodeSet getSet(Matcher matcher, UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
if (type >= STRING) {
|
||||
if (isType(STRING_OR_MISC_MASK)) {
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
String value = getValue(i);
|
||||
if (matcher.matches(value)) {
|
||||
|
@ -469,9 +544,9 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
}
|
||||
return result;
|
||||
}
|
||||
if (cacheValueToSet == null) cacheValueToSet = _getUnicodeMap();
|
||||
Collection temp = new HashSet(); // to avoid reallocating...
|
||||
Iterator it = cacheValueToSet.getAvailableValues(null).iterator();
|
||||
List temp = new ArrayList(1); // to avoid reallocating...
|
||||
UnicodeMap um = getUnicodeMap();
|
||||
Iterator it = um.getAvailableValues(null).iterator();
|
||||
main:
|
||||
while (it.hasNext()) {
|
||||
String value = (String)it.next();
|
||||
|
@ -479,10 +554,10 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
Iterator it2 = getValueAliases(value,temp).iterator();
|
||||
while (it2.hasNext()) {
|
||||
String value2 = (String)it2.next();
|
||||
System.out.println("Values:" + value2);
|
||||
//System.out.println("Values:" + value2);
|
||||
if (matcher.matches(value2)
|
||||
|| matcher.matches(toSkeleton(value2))) {
|
||||
cacheValueToSet.getSet(value, result);
|
||||
um.getSet(value, result);
|
||||
continue main;
|
||||
}
|
||||
}
|
||||
|
@ -490,21 +565,6 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
return result;
|
||||
}
|
||||
|
||||
protected UnicodeMap _getUnicodeMap() {
|
||||
UnicodeMap result = new UnicodeMap();
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
|
||||
result.put(i, getValue(i));
|
||||
}
|
||||
if (DEBUG) {
|
||||
System.out.println(getName() + ":\t" + getClass().getName()
|
||||
+ "\t" + getVersion());
|
||||
System.out.println(getStack());
|
||||
System.out.println(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
public UnicodeSet getMatchSet(UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
|
@ -516,7 +576,10 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
matchIterator = new UnicodeSetIterator(set);
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Utility for debugging
|
||||
*/
|
||||
public static String getStack() {
|
||||
Exception e = new Exception();
|
||||
StringWriter sw = new StringWriter();
|
||||
|
@ -526,32 +589,71 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
return "Showing Stack with fake " + sw.getBuffer().toString();
|
||||
}
|
||||
|
||||
|
||||
public static Collection addUnique(Object obj, Collection result) {
|
||||
if (obj != null && !result.contains(obj)) result.add(obj);
|
||||
return result;
|
||||
}
|
||||
|
||||
public static Collection addAllUnique(Collection source, Collection result) {
|
||||
Iterator it = source.iterator();
|
||||
while (it.hasNext()) {
|
||||
Object obj = it.next();
|
||||
if (obj != null && !result.contains(obj)) result.add(obj);
|
||||
// TODO use this instead of plain strings
|
||||
public static class Name implements Comparable {
|
||||
private static Map skeletonCache;
|
||||
private String skeleton;
|
||||
private String pretty;
|
||||
public final int RAW = 0, TITLE = 1, NORMAL = 2;
|
||||
public Name(String name, int style) {
|
||||
if (name == null) name = "";
|
||||
if (style == RAW) {
|
||||
skeleton = pretty = name;
|
||||
} else {
|
||||
pretty = regularize(name, style == TITLE);
|
||||
skeleton = toSkeleton(pretty);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
public int compareTo(Object o) {
|
||||
return skeleton.compareTo(((Name)o).skeleton);
|
||||
}
|
||||
public boolean equals(Object o) {
|
||||
return skeleton.equals(((Name)o).skeleton);
|
||||
}
|
||||
public int hashCode() {
|
||||
return skeleton.hashCode();
|
||||
}
|
||||
public String toString() {
|
||||
return pretty;
|
||||
}
|
||||
}
|
||||
|
||||
public static class SkeletonComparator implements Comparator {
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*/
|
||||
public static final Comparator PROPERTY_COMPARATOR = new Comparator() {
|
||||
public int compare(Object o1, Object o2) {
|
||||
// TODO optimize
|
||||
if (o1 == o2) return 0;
|
||||
if (o1 == null) return -1;
|
||||
if (o2 == null) return 1;
|
||||
return toSkeleton((String)o1).compareTo(toSkeleton((String)o2));
|
||||
return compareNames((String)o1, (String)o2);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*
|
||||
*/
|
||||
// TODO optimize
|
||||
public static boolean equalNames(String a, String b) {
|
||||
if (a == b) return true;
|
||||
if (a == null) return false;
|
||||
return toSkeleton(a).equals(toSkeleton(b));
|
||||
}
|
||||
|
||||
private static String toSkeleton(String source) {
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*/
|
||||
// TODO optimize
|
||||
public static int compareNames(String a, String b) {
|
||||
if (a == b) return 0;
|
||||
if (a == null) return -1;
|
||||
if (b == null) return 1;
|
||||
return toSkeleton(a).compareTo(toSkeleton(b));
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility for managing property & non-string value aliases
|
||||
*/
|
||||
// TODO account for special names, tibetan, hangul
|
||||
public static String toSkeleton(String source) {
|
||||
if (source == null) return null;
|
||||
StringBuffer skeletonBuffer = new StringBuffer();
|
||||
boolean gotOne = false;
|
||||
// remove spaces, '_', '-'
|
||||
|
@ -574,6 +676,44 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
return skeletonBuffer.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* These routines use the Java functions, because they only need to act on ASCII
|
||||
* Changes space, - into _, inserts _ between lower and UPPER.
|
||||
*/
|
||||
public static String regularize(String source, boolean titlecaseStart) {
|
||||
if (source == null) return source;
|
||||
/*if (source.equals("noBreak")) { // HACK
|
||||
if (titlecaseStart) return "NoBreak";
|
||||
return source;
|
||||
}
|
||||
*/
|
||||
StringBuffer result = new StringBuffer();
|
||||
int lastCat = -1;
|
||||
boolean haveFirstCased = true;
|
||||
for (int i = 0; i < source.length(); ++i) {
|
||||
char c = source.charAt(i);
|
||||
if (c == ' ' || c == '-' || c == '_') {
|
||||
c = '_';
|
||||
haveFirstCased = true;
|
||||
}
|
||||
if (c == '=') haveFirstCased = true;
|
||||
int cat = Character.getType(c);
|
||||
if (lastCat == Character.LOWERCASE_LETTER && cat == Character.UPPERCASE_LETTER) {
|
||||
result.append('_');
|
||||
}
|
||||
if (haveFirstCased && (cat == Character.LOWERCASE_LETTER
|
||||
|| cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
|
||||
if (titlecaseStart) {
|
||||
c = Character.toUpperCase(c);
|
||||
}
|
||||
haveFirstCased = false;
|
||||
}
|
||||
result.append(c);
|
||||
lastCat = cat;
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function for comparing codepoint to string without
|
||||
* generating new string.
|
||||
|
@ -605,5 +745,57 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
|
||||
*/
|
||||
public static Collection addUnique(Object obj, Collection result) {
|
||||
if (obj != null && !result.contains(obj)) result.add(obj);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
|
||||
*/
|
||||
public static Collection addAllUnique(Collection source, Collection result) {
|
||||
for (Iterator it = source.iterator(); it.hasNext();) {
|
||||
addUnique(it.next(), result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
|
||||
*/
|
||||
public static Collection addAllUnique(Object[] source, Collection result) {
|
||||
for (int i = 0; i < source.length; ++i) {
|
||||
addUnique(source[i], result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
protected UnicodeMap getUnicodeMap() {
|
||||
if (unicodeMap == null) unicodeMap = _getUnicodeMap();
|
||||
return unicodeMap;
|
||||
}
|
||||
|
||||
protected UnicodeMap _getUnicodeMap() {
|
||||
UnicodeMap result = new UnicodeMap();
|
||||
for (int i = 0; i <= 0x10FFFF; ++i) {
|
||||
//if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
|
||||
result.put(i, getValue(i));
|
||||
}
|
||||
if (DEBUG && CHECK_NAME.equals(getName())) {
|
||||
System.out.println(getName() + ":\t" + getClass().getName()
|
||||
+ "\t" + getVersion());
|
||||
System.out.println(getStack());
|
||||
System.out.println(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Cyrillic_Latin.txt,v $
|
||||
# $Date: 2001/12/01 00:14:55 $
|
||||
# $Revision: 1.9 $
|
||||
# $Date: 2004/02/18 03:08:58 $
|
||||
# $Revision: 1.10 $
|
||||
#--------------------------------------------------------------------
|
||||
# TODO: add remaining characters
|
||||
# Should add variants for Russian-English, Russian-German
|
||||
|
@ -28,6 +28,75 @@ $dot = \u0307 ;
|
|||
$caron = \u030C ;
|
||||
$comma = \u0326 ;
|
||||
$under = \u0331 ;
|
||||
$hookbelow = \u0321;
|
||||
$rhookbelow = \u0322;
|
||||
$linebelow = \u0329;
|
||||
$lineoverlay = \u20D3;
|
||||
$bar = \u0335;
|
||||
$horn = \u031B;
|
||||
|
||||
$titlo = \u0483;
|
||||
|
||||
# Simple decompositions
|
||||
|
||||
| ҷ <> | ч $linebelow ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
|
||||
| Ҷ <> | Ч $linebelow ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
|
||||
| ӌ <> | ч $hookbelow; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
|
||||
| Ӌ <> | Ч $hookbelow; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
||||
| ҹ <> | ч $lineoverlay ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
|
||||
| Ҹ <> | Ч $lineoverlay; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
|
||||
|
||||
| җ <> | ж $linebelow; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
|
||||
| Җ <> | Ж $linebelow; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
|
||||
|
||||
| қ <> | к $linebelow; # CYRILLIC SMALL LETTER KA WITH DESCENDER
|
||||
| Қ <> | К $linebelow; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
|
||||
| ӄ <> | к $hookbelow ; # CYRILLIC SMALL LETTER KA WITH HOOK
|
||||
| Ӄ <> | К $hookbelow ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
|
||||
| ҡ <> | к ; # CYRILLIC SMALL LETTER BASHKIR KA
|
||||
| Ҡ <> | К ; # CYRILLIC CAPITAL LETTER BASHKIR KA
|
||||
| ҟ <> | к $bar ; # CYRILLIC SMALL LETTER KA WITH STROKE
|
||||
| Ҟ <> | К $bar ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
|
||||
| ҝ <> | к $lineoverlay ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
|
||||
| Ҝ <> | К $lineoverlay ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
|
||||
|
||||
| ң <> | н $linebelow; # CYRILLIC SMALL LETTER EN WITH DESCENDER
|
||||
| Ң <> | Н $linebelow; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
|
||||
| ӈ <> | н $hookbelow; # CYRILLIC SMALL LETTER EN WITH HOOK
|
||||
| Ӈ <> | Н $hookbelow; # CYRILLIC CAPITAL LETTER EN WITH HOOK
|
||||
| ҥ <> | н $horn; # CYRILLIC SMALL LIGATURE EN GHE
|
||||
| Ҥ <> | Н $horn; # CYRILLIC CAPITAL LIGATURE EN GHE
|
||||
|
||||
| ҧ <> | п $hookbelow; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
|
||||
| Ҧ <> | П $hookbelow; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
|
||||
|
||||
| ҁ <> | с $linebelow ; # CYRILLIC SMALL LETTER KOPPA
|
||||
| Ҁ <> | С $linebelow; # CYRILLIC CAPITAL LETTER KOPPA
|
||||
| ҏ <> | р $lineoverlay ; # CYRILLIC SMALL LETTER ER WITH TICK
|
||||
| Ҏ <> | Р $lineoverlay ; # CYRILLIC CAPITAL LETTER ER WITH TICK
|
||||
| ҫ <> | с $rhookbelow ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
|
||||
| Ҫ <> | С $rhookbelow ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
|
||||
| ҭ <> | т $linebelow ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
|
||||
| Ҭ <> | Т $linebelow ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
|
||||
|
||||
| ҳ <> | х $linebelow ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
|
||||
| Ҳ <> | Х $linebelow ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
|
||||
|
||||
# ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
|
||||
# Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
|
||||
| ѹ <> oу ; # CYRILLIC SMALL LETTER UK
|
||||
| Ѹ <> У ; # CYRILLIC CAPITAL LETTER UK
|
||||
|
||||
# һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
|
||||
# Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
|
||||
# ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
|
||||
# Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
|
||||
# ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
|
||||
# Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
|
||||
| ѽ <> | ѡ $titlo ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
|
||||
| Ѽ <> | Ѡ $titlo; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
||||
# ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
|
||||
# Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
|
||||
|
||||
# move up so not masked
|
||||
|
||||
|
@ -36,12 +105,6 @@ $under = \u0331 ;
|
|||
|
||||
ч <> c $caron ; # CYRILLIC SMALL LETTER CHE
|
||||
Ч <> C $caron; # CYRILLIC CAPITAL LETTER CHE
|
||||
# ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
|
||||
# Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
|
||||
# ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
|
||||
# Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
||||
# ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
|
||||
# Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
|
||||
|
||||
э <> e $acute; # CYRILLIC SMALL LETTER E
|
||||
Э <> E $acute; # CYRILLIC CAPITAL LETTER E
|
||||
|
@ -55,8 +118,8 @@ $under = \u0331 ;
|
|||
|
||||
ѕ <> z $hat ; # CYRILLIC SMALL LETTER DZE
|
||||
Ѕ <> Z $hat; # CYRILLIC CAPITAL LETTER DZE
|
||||
# ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
|
||||
# Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
||||
ӡ <> ʒ ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
|
||||
Ӡ <> Ʒ ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
||||
|
||||
ю <> u $hat ; # CYRILLIC SMALL LETTER YU
|
||||
Ю <> U $hat ; # CYRILLIC CAPITAL LETTER YU
|
||||
|
@ -111,9 +174,6 @@ $under = \u0331 ;
|
|||
ж <> z $caron; # CYRILLIC SMALL LETTER ZHE
|
||||
Ж <> Z $caron; # CYRILLIC CAPITAL LETTER ZHE
|
||||
|
||||
# җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
|
||||
# Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
|
||||
|
||||
з <> z ; # CYRILLIC SMALL LETTER ZE
|
||||
З <> Z; # CYRILLIC CAPITAL LETTER ZE
|
||||
|
||||
|
@ -125,16 +185,6 @@ $under = \u0331 ;
|
|||
к <> k ; # CYRILLIC SMALL LETTER KA
|
||||
К <> K; # CYRILLIC CAPITAL LETTER KA
|
||||
|
||||
# қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
|
||||
# Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
|
||||
# ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
|
||||
# Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
|
||||
# ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
|
||||
# Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
|
||||
# ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
|
||||
# Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
|
||||
# ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
|
||||
# Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
|
||||
л <> l ; # CYRILLIC SMALL LETTER EL
|
||||
Л <> L; # CYRILLIC CAPITAL LETTER EL
|
||||
|
||||
|
@ -142,64 +192,42 @@ $under = \u0331 ;
|
|||
М <> M ; # CYRILLIC CAPITAL LETTER EM
|
||||
н <> n ; # CYRILLIC SMALL LETTER EN
|
||||
Н <> N; # CYRILLIC CAPITAL LETTER EN
|
||||
# ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
|
||||
# Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
|
||||
# ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
|
||||
# Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
|
||||
# ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
|
||||
# Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
|
||||
|
||||
ө <> o $bar ; # CYRILLIC SMALL LETTER BARRED O
|
||||
Ө <> O $bar; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
|
||||
о <> o ; # CYRILLIC SMALL LETTER O
|
||||
О <> O ; # CYRILLIC CAPITAL LETTER O
|
||||
# ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
||||
# Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
|
||||
п <> p ; # CYRILLIC SMALL LETTER PE
|
||||
П <> P ; # CYRILLIC CAPITAL LETTER PE
|
||||
# ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
|
||||
# Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
|
||||
# ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
|
||||
# Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
|
||||
|
||||
р <> r ; # CYRILLIC SMALL LETTER ER
|
||||
Р <> R ; # CYRILLIC CAPITAL LETTER ER
|
||||
# ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
|
||||
# Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
|
||||
|
||||
с <> s ; # CYRILLIC SMALL LETTER ES
|
||||
С <> S ; # CYRILLIC CAPITAL LETTER ES
|
||||
# ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
|
||||
# Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
|
||||
|
||||
т <> t ; # CYRILLIC SMALL LETTER TE
|
||||
Т <> T ; # CYRILLIC CAPITAL LETTER TE
|
||||
# ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
|
||||
# Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
|
||||
|
||||
ү <> u $circumflex ; # CYRILLIC SMALL LETTER STRAIGHT U
|
||||
Ү <> U $circumflex; # CYRILLIC CAPITAL LETTER STRAIGHT U
|
||||
|
||||
у <> u ; # CYRILLIC SMALL LETTER U
|
||||
У <> U ; # CYRILLIC CAPITAL LETTER U
|
||||
# ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
|
||||
# Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
|
||||
# ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
|
||||
# Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
|
||||
# ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
|
||||
# Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
|
||||
|
||||
ф <> f ; # CYRILLIC SMALL LETTER EF
|
||||
Ф <> F ; # CYRILLIC CAPITAL LETTER EF
|
||||
|
||||
х <> h ; # CYRILLIC SMALL LETTER HA
|
||||
Х <> H; # CYRILLIC CAPITAL LETTER HA
|
||||
# ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
|
||||
# Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
|
||||
# һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
|
||||
# Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
|
||||
# ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
|
||||
# Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
|
||||
# ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
|
||||
# Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
|
||||
# ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
|
||||
# Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
||||
# ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
|
||||
# Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
|
||||
|
||||
ц <> c ; # CYRILLIC SMALL LETTER TSE
|
||||
Ц <> C; # CYRILLIC CAPITAL LETTER TSE
|
||||
# ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
|
||||
# Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
|
||||
|
||||
ҵ <> c $cedilla ; # CYRILLIC SMALL LIGATURE TE TSE
|
||||
Ҵ <> C $cedilla ; # CYRILLIC CAPITAL LIGATURE TE TSE
|
||||
|
||||
# ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
|
||||
# Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
||||
|
@ -220,6 +248,10 @@ $under = \u0331 ;
|
|||
# ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
|
||||
# Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
|
||||
|
||||
ѱ <> ps $underline ; # CYRILLIC SMALL LETTER PSI
|
||||
Ѱ } [:lower:] <> Ps $underline ; # CYRILLIC CAPITAL LETTER PSI
|
||||
Ѱ <> PS $underline ; # CYRILLIC CAPITAL LETTER PSI
|
||||
|
||||
# ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
|
||||
# Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
|
||||
# ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
|
||||
|
@ -230,10 +262,10 @@ $under = \u0331 ;
|
|||
# Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
|
||||
# ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
|
||||
# Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
|
||||
# ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
|
||||
# Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
|
||||
# ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
|
||||
# Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
|
||||
|
||||
ѯ <> x $underline ; # CYRILLIC SMALL LETTER KSI
|
||||
Ѯ <> X $underline ; # CYRILLIC CAPITAL LETTER KSI
|
||||
|
||||
# ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
|
||||
# Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
|
||||
# ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
||||
|
@ -241,6 +273,7 @@ $under = \u0331 ;
|
|||
# ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
|
||||
# Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
|
||||
# Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
|
||||
|
||||
### ӑ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
### Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
||||
### ӓ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
|
|
|
@ -53,14 +53,13 @@ public class CheckICU {
|
|||
public static void test() throws IOException {
|
||||
//generateFile("4.0.0", "DerivedCombiningClass");
|
||||
//generateFile("4.0.0", "DerivedCoreProperties");
|
||||
if (true) return;
|
||||
checkUCD();
|
||||
itemFailures = new UnicodeSet();
|
||||
icuFactory = ICUPropertyFactory.make();
|
||||
toolFactory = ToolUnicodePropertySource.make("4.0.0");
|
||||
|
||||
String[] quickList = {
|
||||
"Math",
|
||||
"Canonical_Combining_Class",
|
||||
// "Script", "Bidi_Mirroring_Glyph", "Case_Folding",
|
||||
//"Numeric_Value"
|
||||
};
|
||||
|
@ -69,9 +68,9 @@ public class CheckICU {
|
|||
}
|
||||
if (quickList.length > 0) return;
|
||||
|
||||
Collection availableTool = toolFactory.getAvailableAliases(new TreeSet());
|
||||
Collection availableTool = toolFactory.getAvailableNames();
|
||||
|
||||
Collection availableICU = icuFactory.getAvailableAliases(new TreeSet());
|
||||
Collection availableICU = icuFactory.getAvailableNames();
|
||||
System.out.println(showDifferences("Property Aliases", "ICU", availableICU, "Tool", availableTool));
|
||||
Collection common = new TreeSet(availableICU);
|
||||
common.retainAll(availableTool);
|
||||
|
@ -140,12 +139,12 @@ public class CheckICU {
|
|||
+ "\tTool: " + UnicodeProperty.getTypeName(toolType));
|
||||
}
|
||||
|
||||
Collection icuAliases = icuProp.getAliases(new ArrayList());
|
||||
Collection toolAliases = toolProp.getAliases(new ArrayList());
|
||||
Collection icuAliases = icuProp.getNameAliases(new ArrayList());
|
||||
Collection toolAliases = toolProp.getNameAliases(new ArrayList());
|
||||
System.out.println(showDifferences("Aliases", "ICU", icuAliases, "Tool", toolAliases));
|
||||
|
||||
icuAliases = icuProp.getAvailableValueAliases(new ArrayList());
|
||||
toolAliases = toolProp.getAvailableValueAliases(new ArrayList());
|
||||
icuAliases = icuProp.getAvailableValues(new ArrayList());
|
||||
toolAliases = toolProp.getAvailableValues(new ArrayList());
|
||||
System.out.println(showDifferences("Value Aliases", "ICU", icuAliases, "Tool", toolAliases));
|
||||
|
||||
// TODO do property value aliases
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
|
||||
* $Date: 2004/02/07 01:01:16 $
|
||||
* $Revision: 1.24 $
|
||||
* $Date: 2004/02/18 03:08:59 $
|
||||
* $Revision: 1.25 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -285,7 +285,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
byte val;
|
||||
CaseDProp (int i) {
|
||||
type = DERIVED_CORE;
|
||||
isStandard = false;
|
||||
isStandard = false;
|
||||
val = (i == Missing_Uppercase ? Lu : i == Missing_Lowercase ? Ll : Lt);
|
||||
name = "Possible_Missing_" + CaseNames[i-Missing_Uppercase];
|
||||
header = "# Derived Property: " + name
|
||||
|
@ -306,7 +306,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
String MAYBE;
|
||||
Normalizer nfx;
|
||||
QuickDProp (int i) {
|
||||
// setValueType((i == NFC || i == NFKC) ? ENUMERATED : BINARY);
|
||||
//setValueType((i == NFC || i == NFKC) ? ENUMERATED_PROP : BINARY_PROP);
|
||||
setValueType(ENUMERATED_PROP);
|
||||
type = DERIVED_NORMALIZATION;
|
||||
nfx = nf[i];
|
||||
|
@ -611,7 +611,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
+ "\r\n# Uses the full case folding from CaseFolding.txt, without the T option."
|
||||
;
|
||||
}
|
||||
public String getValue(int cp, byte style) {
|
||||
public String getValue(int cp, byte style) {
|
||||
if (!ucdData.isRepresented(cp)) return "";
|
||||
String b = nfkc.normalize(fold(cp));
|
||||
String c = nfkc.normalize(fold(b));
|
||||
|
@ -860,7 +860,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "Grapheme_Extend";
|
||||
shortName = "GrExt";
|
||||
shortName = "Gr_Ext";
|
||||
header = header = "# Derived Property: " + name
|
||||
+ "\r\n# Generated from: Me + Mn + Other_Grapheme_Extend"
|
||||
+ "\r\n# Note: depending on an application's interpretation of Co (private use),"
|
||||
|
@ -883,7 +883,7 @@ of characters, the first of which has a non-zero combining class.
|
|||
{
|
||||
type = DERIVED_CORE;
|
||||
name = "Grapheme_Base";
|
||||
shortName = "GrBase";
|
||||
shortName = "Gr_Base";
|
||||
|
||||
header = header = "# Derived Property: " + name
|
||||
+ "\r\n# Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Extend"
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java,v $
|
||||
* $Date: 2004/02/12 08:23:15 $
|
||||
* $Revision: 1.10 $
|
||||
* $Date: 2004/02/18 03:08:59 $
|
||||
* $Revision: 1.11 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -149,7 +149,7 @@ abstract public class GenerateBreakTest implements UCD_Types {
|
|||
result.append(ucd.getCodeAndName(cp));
|
||||
for (int j = 0; j < props.length; ++j) {
|
||||
result.append(", ");
|
||||
result.append(props[j].getProperty(SHORT)).append('=').append(props[j].getValue(cp,SHORT));
|
||||
result.append(props[j].getPropertyName(SHORT)).append('=').append(props[j].getValue(cp,SHORT));
|
||||
}
|
||||
}
|
||||
return result.toString();
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2004/02/12 08:23:15 $
|
||||
* $Revision: 1.33 $
|
||||
* $Date: 2004/02/18 03:08:59 $
|
||||
* $Revision: 1.34 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -17,6 +17,7 @@ import java.util.*;
|
|||
import java.io.*;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
|
@ -391,8 +392,8 @@ public class GenerateData implements UCD_Types {
|
|||
if (propAbb.equals("") || propAbb.equals(UCD_Names.YN_TABLE[1])) {
|
||||
System.out.println("WHOOPS: " + Utility.hex(i));
|
||||
}
|
||||
propAbb = Utility.getUnskeleton(up.getProperty(SHORT), false);
|
||||
prop = Utility.getUnskeleton(up.getProperty(LONG), true);
|
||||
propAbb = Utility.getUnskeleton(up.getPropertyName(SHORT), false);
|
||||
prop = Utility.getUnskeleton(up.getPropertyName(LONG), true);
|
||||
addLine(sorted,
|
||||
type == SCRIPT
|
||||
? UCD_Names.PROP_TYPE_NAMES[CATALOG_PROP][1]
|
||||
|
@ -407,7 +408,7 @@ public class GenerateData implements UCD_Types {
|
|||
if (up.getValueType() < BINARY_PROP) continue;
|
||||
value = up.getValue(LONG);
|
||||
if (value.length() == 0) value = "none";
|
||||
else if (value.equals("<unused>")) continue;
|
||||
else if (value.equals(UnicodeProperty.UNUSED)) continue;
|
||||
|
||||
if (type != DECOMPOSITION_TYPE) {
|
||||
value = Utility.getUnskeleton(value, true);
|
||||
|
@ -429,7 +430,7 @@ public class GenerateData implements UCD_Types {
|
|||
|
||||
|
||||
if (type == COMBINING_CLASS) {
|
||||
if (value.startsWith("Fixed_")) { continue; }
|
||||
if (value.charAt(0) <= '9') { continue; }
|
||||
}
|
||||
|
||||
|
||||
|
@ -502,7 +503,7 @@ public class GenerateData implements UCD_Types {
|
|||
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
log.println(UnicodeDataFile.generateDateLine());
|
||||
log.println("#");
|
||||
Utility.appendFile("PropertyAliasHeader.txt", Utility.LATIN1, log);
|
||||
Utility.appendFile("PropertyAliasesHeader.txt", Utility.LATIN1, log);
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
int count = Utility.print(log, sorted, "\r\n", new MyBreaker(true));
|
||||
|
@ -522,7 +523,7 @@ public class GenerateData implements UCD_Types {
|
|||
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
log.println(UnicodeDataFile.generateDateLine());
|
||||
log.println("#");
|
||||
Utility.appendFile("PropertyValueAliasHeader.txt", Utility.LATIN1, log);
|
||||
Utility.appendFile("PropertyValueAliasesHeader.txt", Utility.LATIN1, log);
|
||||
log.println(HORIZONTAL_LINE);
|
||||
log.println();
|
||||
Utility.print(log, sorted, "\r\n", new MyBreaker(false));
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
package com.ibm.text.UCD;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
|
@ -10,106 +13,194 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.ResourceBundle;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.dev.test.util.Tabber;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.text.utility.UnicodeDataFile;
|
||||
import com.ibm.text.utility.Utility;
|
||||
import com.ibm.icu.text.Collator;
|
||||
|
||||
public class MakeUnicodeFiles {
|
||||
|
||||
static boolean DEBUG = true;
|
||||
|
||||
public static void main() throws IOException {
|
||||
generateFile("Scripts","z");
|
||||
generateFile("*");
|
||||
}
|
||||
|
||||
static class OrderedMap {
|
||||
HashMap map = new HashMap();
|
||||
ArrayList keys = new ArrayList();
|
||||
void put(Object o, Object t) {
|
||||
map.put(o,t);
|
||||
keys.add(o);
|
||||
}
|
||||
List keyset() {
|
||||
return keys;
|
||||
}
|
||||
}
|
||||
|
||||
static class PrintStyle {
|
||||
boolean longForm = false;
|
||||
|
||||
static String[] FILE_OPTIONS = {
|
||||
"Script nameStyle=none makeUppercase skipUnassigned=Common hackValues",
|
||||
"Age nameStyle=none noLabel skipValue=unassigned",
|
||||
"Numeric_Type nameStyle=none makeFirstLetterLowercase skipValue=None",
|
||||
"General_Category nameStyle=none valueStyle=short noLabel",
|
||||
"Line_Break nameStyle=none valueStyle=short skipUnassigned=Unknown",
|
||||
"Joining_Type nameStyle=none valueStyle=short skipValue=Non_Joining",
|
||||
"Joining_Group nameStyle=none skipValue=No_Joining_Group makeUppercase",
|
||||
"East_Asian_Width nameStyle=none valueStyle=short skipUnassigned=Neutral",
|
||||
"Decomposition_Type nameStyle=none skipValue=None makeFirstLetterLowercase hackValues",
|
||||
"Bidi_Class nameStyle=none valueStyle=short skipUnassigned=Left_To_Right",
|
||||
"Block nameStyle=none noLabel valueList",
|
||||
"Canonical_Combining_Class nameStyle=none valueStyle=short skipUnassigned=Not_Reordered longValueHeading=ccc",
|
||||
"Hangul_Syllable_Type nameStyle=none valueStyle=short skipValue=Not_Applicable",
|
||||
"NFD_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
|
||||
"NFC_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
|
||||
"NFKC_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
|
||||
"NFKD_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
|
||||
"FC_NFKC_Closure nameStyle=short"
|
||||
};
|
||||
|
||||
static String[] hackNameList = {
|
||||
"noBreak", "Arabic_Presentation_Forms-A", "Arabic_Presentation_Forms-B",
|
||||
"CJK_Symbols_and_Punctuation", "Combining_Diacritical_Marks_for_Symbols",
|
||||
"Enclosed_CJK_Letters_and_Months", "Greek_and_Coptic",
|
||||
"Halfwidth_and_Fullwidth_Forms", "Latin-1_Supplement", "Latin_Extended-A",
|
||||
"Latin_Extended-B", "Miscellaneous_Mathematical_Symbols-A",
|
||||
"Miscellaneous_Mathematical_Symbols-B", "Miscellaneous_Symbols_and_Arrows",
|
||||
"Superscripts_and_Subscripts", "Supplemental_Arrows-A", "Supplemental_Arrows-B",
|
||||
"Supplementary_Private_Use_Area-A", "Supplementary_Private_Use_Area-B",
|
||||
"Canadian-Aboriginal", "Old-Italic"
|
||||
};
|
||||
|
||||
static class PrintStyle {
|
||||
static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle();
|
||||
static Map PRINT_STYLE_MAP = new TreeMap(UnicodeProperty.PROPERTY_COMPARATOR);
|
||||
boolean noLabel = false;
|
||||
boolean makeUppercase = false;
|
||||
boolean makeFirstLetterLowercase = false;
|
||||
String skipValue = null;
|
||||
String skipUnassigned = null;
|
||||
boolean orderByRangeStart = false;
|
||||
boolean valueList = false;
|
||||
boolean interleaveValues = false;
|
||||
boolean hackValues = false;
|
||||
String nameStyle = "none";
|
||||
String valueStyle = "long";
|
||||
String skipValue = null;
|
||||
String skipUnassigned = null;
|
||||
String longValueHeading = null;
|
||||
|
||||
PrintStyle setLongForm(boolean value) {
|
||||
longForm = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setSkipUnassigned(String value) {
|
||||
skipUnassigned = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setNoLabel(boolean value) {
|
||||
noLabel = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setMakeUppercase(boolean value) {
|
||||
makeUppercase = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setMakeFirstLetterLowercase(boolean value) {
|
||||
makeFirstLetterLowercase = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setSkipValue(String value) {
|
||||
skipValue = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setOrderByRangeStart(boolean value) {
|
||||
orderByRangeStart = value;
|
||||
return this;
|
||||
}
|
||||
PrintStyle setValueList(boolean value) {
|
||||
valueList = value;
|
||||
return this;
|
||||
}
|
||||
static void add(String options) {
|
||||
PrintStyle result = new PrintStyle();
|
||||
PRINT_STYLE_MAP.put(result.parse(options), result);
|
||||
}
|
||||
static PrintStyle get(String propname) {
|
||||
PrintStyle result = (PrintStyle) PRINT_STYLE_MAP.get(propname);
|
||||
if (result != null) return result;
|
||||
if (DEBUG) System.out.println("Using default style!");
|
||||
return DEFAULT_PRINT_STYLE;
|
||||
}
|
||||
String parse(String options) {
|
||||
options = options.replace('\t', ' ');
|
||||
String[] pieces = Utility.split(options, ' ');
|
||||
for (int i = 1; i < pieces.length; ++i) {
|
||||
String piece = pieces[i];
|
||||
// binary
|
||||
if (piece.equals("noLabel")) noLabel = true;
|
||||
else if (piece.equals("makeUppercase")) makeUppercase = true;
|
||||
else if (piece.equals("makeFirstLetterLowercase")) makeFirstLetterLowercase = true;
|
||||
else if (piece.equals("orderByRangeStart")) orderByRangeStart = true;
|
||||
else if (piece.equals("valueList")) interleaveValues = true;
|
||||
else if (piece.equals("hackValues")) hackValues = true;
|
||||
// with parameter
|
||||
else if (piece.startsWith("valueStyle=")) valueStyle = afterEquals(piece);
|
||||
else if (piece.startsWith("nameStyle=")) nameStyle = afterEquals(piece);
|
||||
else if (piece.startsWith("longValueHeading=")) longValueHeading = afterEquals(piece);
|
||||
else if (piece.startsWith("skipValue=")) skipValue = afterEquals(piece);
|
||||
else if (piece.startsWith("skipUnassigned=")) skipUnassigned = afterEquals(piece);
|
||||
else if (piece.length() != 0) {
|
||||
throw new IllegalArgumentException("Illegal PrintStyle Parameter: " + piece + " in " + pieces[0]);
|
||||
}
|
||||
}
|
||||
if (DEBUG && options.indexOf('=') >= 0) {
|
||||
System.out.println(pieces[0]);
|
||||
if (longValueHeading != null)System.out.println(" name " + longValueHeading);
|
||||
if (nameStyle != null) System.out.println(" nameStyle " + nameStyle);
|
||||
if (longValueHeading != null) System.out.println(" longValueHeading " + longValueHeading);
|
||||
if (skipValue != null) System.out.println(" skipValue " + skipValue);
|
||||
if (skipUnassigned != null) System.out.println(" skipUnassigned " + skipUnassigned);
|
||||
}
|
||||
return pieces[0];
|
||||
}
|
||||
String afterEquals(String source) {
|
||||
return source.substring(source.indexOf('=')+1);
|
||||
}
|
||||
}
|
||||
static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle();
|
||||
static Comparator skeletonComparator = new UnicodeProperty.SkeletonComparator();
|
||||
static Map printStyles = new TreeMap(/*skeletonComparator*/);
|
||||
static {
|
||||
printStyles.put("Script", new PrintStyle().setLongForm(true)
|
||||
.setMakeUppercase(true).setSkipUnassigned("Common"));
|
||||
printStyles.put("Age", new PrintStyle().setNoLabel(true));
|
||||
printStyles.put("Numeric_Type", new PrintStyle().setLongForm(true)
|
||||
.setMakeFirstLetterLowercase(true).setSkipUnassigned("none"));
|
||||
printStyles.put("General_Category", new PrintStyle().setNoLabel(true)
|
||||
//.setSkipUnassigned(true)
|
||||
);
|
||||
printStyles.put("Line_Break", new PrintStyle().setSkipUnassigned("Unknown"));
|
||||
printStyles.put("Joining_Type", new PrintStyle().setSkipValue("Non_Joining"));
|
||||
printStyles.put("Joining_Group", new PrintStyle().setSkipValue("No_Joining_Group")
|
||||
.setMakeUppercase(true));
|
||||
printStyles.put("East_Asian_Width", new PrintStyle().setSkipUnassigned("Neutral"));
|
||||
printStyles.put("Decomposition_Type", new PrintStyle().setLongForm(true)
|
||||
.setSkipValue("None").setMakeFirstLetterLowercase(true));
|
||||
printStyles.put("Bidi_Class", new PrintStyle().setSkipUnassigned("Left_To_Right"));
|
||||
printStyles.put("Block", new PrintStyle().setNoLabel(true)
|
||||
.setValueList(true));
|
||||
printStyles.put("Age", new PrintStyle().setSkipValue("unassigned"));
|
||||
printStyles.put("Canonical_Combining_Class", new PrintStyle().setSkipValue("0"));
|
||||
printStyles.put("Hangul_Syllable_Type", new PrintStyle().setSkipValue("NA"));
|
||||
|
||||
for (int i = 0; i < FILE_OPTIONS.length; ++i) {
|
||||
PrintStyle.add(FILE_OPTIONS[i]);
|
||||
}
|
||||
}
|
||||
//PropertyAliases
|
||||
//PropertyValueAliases
|
||||
|
||||
static Map hackMap = new HashMap();
|
||||
static {
|
||||
for (int i = 0; i < hackNameList.length; ++i) {
|
||||
String item = hackNameList[i];
|
||||
String regularItem = UnicodeProperty.regularize(item,true);
|
||||
hackMap.put(regularItem, item);
|
||||
}
|
||||
}
|
||||
static UnicodeProperty.MapFilter hackMapFilter = new UnicodeProperty.MapFilter(hackMap);
|
||||
|
||||
static class ValueComments {
|
||||
TreeMap propertyToValueToComments = new TreeMap();
|
||||
ValueComments add(String property, String value, String comments) {
|
||||
TreeMap valueToComments = (TreeMap) propertyToValueToComments.get(property);
|
||||
if (valueToComments == null) {
|
||||
valueToComments = new TreeMap();
|
||||
propertyToValueToComments.put(property, valueToComments);
|
||||
}
|
||||
valueToComments.put(value, comments);
|
||||
return this;
|
||||
}
|
||||
String get(String property, String value) {
|
||||
TreeMap valueToComments = (TreeMap) propertyToValueToComments.get(property);
|
||||
if (valueToComments != null) return (String) valueToComments.get(value);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
static ValueComments valueComments = new ValueComments();
|
||||
static {
|
||||
for (int i = 0; i < UCD_Names.UNIFIED_PROPERTIES.length; ++i) {
|
||||
String name = Utility.getUnskeleton(UCD_Names.UNIFIED_PROPERTIES[i], false);
|
||||
valueComments.add(name, "*", "# " + UCD_Names.UNIFIED_PROPERTY_HEADERS[i]);
|
||||
}
|
||||
// HACK
|
||||
valueComments.add("Bidi_Mirroring", "*", "# " + UCD_Names.UNIFIED_PROPERTY_HEADERS[9]);
|
||||
try {
|
||||
BufferedReader br = Utility.openReadFile("MakeUnicodeFiles.txt", Utility.UTF8);
|
||||
String key = null;
|
||||
String value = "";
|
||||
while (true) {
|
||||
String line = br.readLine();
|
||||
if (line == null) break;
|
||||
if (!line.startsWith("#")) {
|
||||
if (key != null) {// store
|
||||
String[] pieces = Utility.split(key, '=');
|
||||
if (pieces.length == 1) {
|
||||
valueComments.add(pieces[0].trim(), "*", value);
|
||||
} else {
|
||||
valueComments.add(pieces[0].trim(), pieces[1].trim(), value);
|
||||
}
|
||||
value = "";
|
||||
}
|
||||
key = line;
|
||||
} else {
|
||||
value += line + "\n";
|
||||
}
|
||||
}
|
||||
br.close();
|
||||
} catch (IOException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
throw new IllegalArgumentException("File missing");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//CompositionExclusions
|
||||
//SpecialCasing
|
||||
//NormalizationTest
|
||||
|
@ -119,15 +210,15 @@ public class MakeUnicodeFiles {
|
|||
contents.put(name, properties);
|
||||
}
|
||||
static {
|
||||
add("Blocks", new String[] {"Block"});
|
||||
add("PropertyValueAliases", null);
|
||||
add("PropertyAliases", null);
|
||||
add("SpecialCasing", null);
|
||||
add("NormalizationTest", null);
|
||||
add("StandardizedVariants", null);
|
||||
add("CaseFolding", null);
|
||||
add("DerivedAge", new String[] {"Age"});
|
||||
add("Scripts", new String[] {"Script"});
|
||||
add("HangulSyllableType", new String[] {"HangulSyllableType"});
|
||||
if (false) add("DerivedNormalizationProps", new String[] {
|
||||
"FNC", "Full_Composition_Exclusion",
|
||||
"NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck",
|
||||
"Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC"});
|
||||
|
||||
add("DerivedBidiClass", new String[] {"BidiClass"});
|
||||
add("DerivedBinaryProperties", new String[] {"BidiMirrored"});
|
||||
add("DerivedCombiningClass", new String[] {"CanonicalCombiningClass"});
|
||||
|
@ -165,93 +256,390 @@ public class MakeUnicodeFiles {
|
|||
"Default_Ignorable_Code_Point",
|
||||
"Grapheme_Extend", "Grapheme_Base"
|
||||
});
|
||||
add("DerivedNormalizationProps", new String[] {
|
||||
"FC_NFKC_Closure",
|
||||
"Full_Composition_Exclusion",
|
||||
"NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck",
|
||||
"Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC"
|
||||
});
|
||||
}
|
||||
|
||||
public static void generateFile(String atOrAfter, String atOrBefore) throws IOException {
|
||||
public static void generateFile(String atOrAfter, String atOrBefore) throws IOException {
|
||||
Iterator it = contents.keySet().iterator();
|
||||
while (it.hasNext()) {
|
||||
String propname = (String) it.next();
|
||||
if (propname.compareTo(atOrAfter) < 0) continue;
|
||||
if (propname.compareTo(atOrBefore) > 0) continue;
|
||||
if (propname.compareToIgnoreCase(atOrAfter) < 0) continue;
|
||||
if (propname.compareToIgnoreCase(atOrBefore) > 0) continue;
|
||||
generateFile(propname);
|
||||
}
|
||||
}
|
||||
|
||||
public static void generateFile(String filename) throws IOException {
|
||||
if (filename.equals("*")) {
|
||||
generateFile("", "\uFFFD");
|
||||
} else if (filename.endsWith("Aliases")) {
|
||||
if (filename.endsWith("ValueAliases")) generateValueAliasFile(filename);
|
||||
else generateAliasFile(filename);
|
||||
} else if (filename.equals("NormalizationTest")) {
|
||||
GenerateData.writeNormalizerTestSuite("DerivedData/", "NormalizationTest");
|
||||
} else if (filename.equals("CaseFolding")) {
|
||||
GenerateCaseFolding.makeCaseFold(false);
|
||||
} else if (filename.equals("SpecialCasing")) {
|
||||
GenerateCaseFolding.generateSpecialCasing(false);
|
||||
} else if (filename.equals("StandardizedVariants")) {
|
||||
GenerateStandardizedVariants.generate();
|
||||
} else {
|
||||
generatePropertyFile(filename);
|
||||
}
|
||||
}
|
||||
|
||||
public static void generateFile(String filename) throws IOException {
|
||||
static final String SEPARATOR = "# ================================================";
|
||||
|
||||
public static void generateAliasFile(String filename) throws IOException {
|
||||
UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename);
|
||||
PrintWriter pw = udf.out;
|
||||
UnicodeProperty.Factory ups
|
||||
= ToolUnicodePropertySource.make(Default.ucdVersion());
|
||||
TreeSet sortedSet = new TreeSet(CASELESS_COMPARATOR);
|
||||
BagFormatter bf = new BagFormatter();
|
||||
Tabber.MonoTabber mt = new Tabber.MonoTabber()
|
||||
.add(10,Tabber.LEFT);
|
||||
int count = 0;
|
||||
|
||||
for (int i = UnicodeProperty.LIMIT_TYPE - 1; i >= UnicodeProperty.BINARY; --i) {
|
||||
if ((i & UnicodeProperty.EXTENDED_MASK) != 0) continue;
|
||||
List list = ups.getAvailableNames(1<<i);
|
||||
//if (list.size() == 0) continue;
|
||||
sortedSet.clear();
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
for (Iterator it = list.iterator(); it.hasNext();) {
|
||||
String propAlias = (String)it.next();
|
||||
|
||||
UnicodeProperty up = ups.getProperty(propAlias);
|
||||
List aliases = up.getNameAliases();
|
||||
if (aliases.size() == 1) {
|
||||
sortedSet.add(mt.process(aliases.get(0) + "\t; " + aliases.get(0)));
|
||||
} else {
|
||||
buffer.setLength(0);
|
||||
boolean isFirst = true;
|
||||
for (Iterator it2 = aliases.iterator(); it2.hasNext();) {
|
||||
if (isFirst) isFirst = false;
|
||||
else buffer.append("\t; ");
|
||||
buffer.append(it2.next());
|
||||
}
|
||||
if (aliases.size() == 1) {
|
||||
// repeat
|
||||
buffer.append("\t; ").append(aliases.get(0));
|
||||
}
|
||||
sortedSet.add(mt.process(buffer.toString()));
|
||||
}
|
||||
}
|
||||
if (i == UnicodeProperty.STRING) {
|
||||
for (int j = 0; j < specialString.length; ++j) {
|
||||
sortedSet.add(mt.process(specialString[j]));
|
||||
}
|
||||
} else if (i == UnicodeProperty.MISC) {
|
||||
for (int j = 0; j < specialMisc.length; ++j) {
|
||||
sortedSet.add(mt.process(specialMisc[j]));
|
||||
}
|
||||
}
|
||||
pw.println();
|
||||
pw.println(SEPARATOR);
|
||||
pw.println("# " + UnicodeProperty.getTypeName(i) + " Properties");
|
||||
pw.println(SEPARATOR);
|
||||
for (Iterator it = sortedSet.iterator(); it.hasNext();) {
|
||||
pw.println(it.next());
|
||||
count++;
|
||||
}
|
||||
}
|
||||
pw.println();
|
||||
pw.println(SEPARATOR);
|
||||
pw.println("#Total: " + count);
|
||||
pw.println();
|
||||
udf.close();
|
||||
}
|
||||
|
||||
static String[] specialMisc = {
|
||||
"isc\t; ISO_Comment",
|
||||
"na1\t; Unicode_1_Name",
|
||||
"URS\t; Unicode_Radical_Stroke"};
|
||||
|
||||
static String[] specialString = {
|
||||
"dm\t; Decomposition_Mapping",
|
||||
"lc\t; Lowercase_Mapping",
|
||||
"scc\t; Special_Case_Condition",
|
||||
"sfc\t; Simple_Case_Folding",
|
||||
"slc\t; Simple_Lowercase_Mapping",
|
||||
"stc\t; Simple_Titlecase_Mapping",
|
||||
"suc\t; Simple_Uppercase_Mapping",
|
||||
"tc\t; Titlecase_Mapping",
|
||||
"uc\t; Uppercase_Mapping"};
|
||||
|
||||
static String[] specialGC = {
|
||||
"gc\t;\tC\t;\tOther\t# Cc | Cf | Cn | Co | Cs",
|
||||
"gc\t;\tL\t;\tLetter\t# Ll | Lm | Lo | Lt | Lu",
|
||||
"gc\t;\tLC\t;\tCased_Letter\t# Ll | Lt | Lu",
|
||||
"gc\t;\tM\t;\tMark\t# Mc | Me | Mn",
|
||||
"gc\t;\tN\t;\tNumber\t# Nd | Nl | No",
|
||||
"gc\t;\tP\t;\tPunctuation\t# Pc | Pd | Pe | Pf | Pi | Po | Ps",
|
||||
"gc\t;\tS\t;\tSymbol\t# Sc | Sk | Sm | So",
|
||||
"gc\t;\tZ\t;\tSeparator\t# Zl | Zp | Zs"};
|
||||
|
||||
public static void generateValueAliasFile(String filename) throws IOException {
|
||||
UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename);
|
||||
PrintWriter pw = udf.out;
|
||||
UnicodeProperty.Factory toolFactory
|
||||
= ToolUnicodePropertySource.make(Default.ucdVersion());
|
||||
BagFormatter bf = new BagFormatter(toolFactory);
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
Set sortedSet = new TreeSet(CASELESS_COMPARATOR);
|
||||
|
||||
//gc ; C ; Other # Cc | Cf | Cn | Co | Cs
|
||||
// 123456789012345678901234567890123
|
||||
|
||||
// sc ; Arab ; Arabic
|
||||
Tabber.MonoTabber mt2 = new Tabber.MonoTabber()
|
||||
.add(3,Tabber.LEFT)
|
||||
.add(2,Tabber.LEFT) // ;
|
||||
.add(10,Tabber.LEFT)
|
||||
.add(2,Tabber.LEFT) // ;
|
||||
.add(33,Tabber.LEFT)
|
||||
.add(2,Tabber.LEFT) // ;
|
||||
.add(33,Tabber.LEFT);
|
||||
|
||||
// ccc; 216; ATAR ; Attached_Above_Right
|
||||
Tabber.MonoTabber mt3 = new Tabber.MonoTabber()
|
||||
.add(3,Tabber.LEFT)
|
||||
.add(2,Tabber.LEFT) // ;
|
||||
.add(3,Tabber.RIGHT)
|
||||
.add(2,Tabber.LEFT) // ;
|
||||
.add(5,Tabber.LEFT)
|
||||
.add(2,Tabber.LEFT) // ;
|
||||
.add(33,Tabber.LEFT)
|
||||
.add(2,Tabber.LEFT) // ;
|
||||
.add(33,Tabber.LEFT);
|
||||
|
||||
for (Iterator it = toolFactory.getAvailableNames(UnicodeProperty.ENUMERATED_OR_CATALOG_MASK).iterator(); it.hasNext();) {
|
||||
String propName = (String) it.next();
|
||||
UnicodeProperty up = toolFactory.getProperty(propName);
|
||||
String shortProp = up.getFirstNameAlias();
|
||||
sortedSet.clear();
|
||||
|
||||
for (Iterator it2 = up.getAvailableValues().iterator(); it2.hasNext();) {
|
||||
String value = (String) it2.next();
|
||||
List l = up.getValueAliases(value);
|
||||
System.out.println(value + "\t" + bf.join(l));
|
||||
|
||||
// HACK
|
||||
Tabber mt = mt2;
|
||||
if (l.size() == 1) {
|
||||
if (propName.equals("Canonical_Combining_Class")) continue;
|
||||
if (propName.equals("Block")
|
||||
|| propName.equals("Joining_Group")
|
||||
//|| propName.equals("Numeric_Type")
|
||||
|| propName.equals("Age")) {
|
||||
l.add(0, "n/a");
|
||||
} else {
|
||||
l.add(0, l.get(0)); // double up
|
||||
}
|
||||
} else if (l.size() > 2) {
|
||||
mt = mt3;
|
||||
}
|
||||
if (UnicodeProperty.equalNames(value,"Cyrillic_Supplement")) {
|
||||
l.add("Cyrillic_Supplementary");
|
||||
}
|
||||
|
||||
buffer.setLength(0);
|
||||
buffer.append(shortProp);
|
||||
for (Iterator it3 = l.iterator(); it3.hasNext();) {
|
||||
buffer.append("\t; \t" + it3.next());
|
||||
}
|
||||
|
||||
sortedSet.add(mt.process(buffer.toString()));
|
||||
}
|
||||
// HACK
|
||||
if (propName.equals("General_Category")) {
|
||||
for (int i = 0; i < specialGC.length; ++i) {
|
||||
sortedSet.add(mt2.process(specialGC[i]));
|
||||
}
|
||||
}
|
||||
pw.println();
|
||||
for (Iterator it4 = sortedSet.iterator(); it4.hasNext();) {
|
||||
String line = (String) it4.next();
|
||||
pw.println(line);
|
||||
}
|
||||
}
|
||||
udf.close();
|
||||
}
|
||||
|
||||
public static void generatePropertyFile(String filename) throws IOException {
|
||||
String[] propList = (String[]) contents.get(filename);
|
||||
UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename);
|
||||
PrintWriter pw = udf.out; // bf2.openUTF8Writer(UCD_Types.GEN_DIR, "Test" + filename + ".txt");
|
||||
UnicodeProperty.Factory toolFactory
|
||||
= ToolUnicodePropertySource.make(Default.ucdVersion());
|
||||
BagFormatter bf2 = new BagFormatter(toolFactory);
|
||||
UnicodeSet unassigned = toolFactory.getSet("gc=cn")
|
||||
.addAll(toolFactory.getSet("gc=cs"));
|
||||
//System.out.println(unassigned.toPattern(true));
|
||||
// .removeAll(toolFactory.getSet("noncharactercodepoint=true"));
|
||||
String separator = bf2.getLineSeparator()
|
||||
+ "# ================================================"
|
||||
+ bf2.getLineSeparator() + bf2.getLineSeparator();
|
||||
|
||||
|
||||
for (int i = 0; i < propList.length; ++i) {
|
||||
BagFormatter bf = new BagFormatter(toolFactory);
|
||||
UnicodeProperty prop = toolFactory.getProperty(propList[i]);
|
||||
System.out.println(prop.getName());
|
||||
pw.print(separator);
|
||||
PrintStyle ps = (PrintStyle) printStyles.get(prop.getName());
|
||||
if (ps == null) {
|
||||
ps = DEFAULT_PRINT_STYLE;
|
||||
System.out.println("Using default style!");
|
||||
}
|
||||
if (ps.noLabel) bf2.setLabelSource(null);
|
||||
|
||||
if (ps.valueList) {
|
||||
bf2.setValueSource(new UnicodeProperty.FilteredProperty(prop, new ReplaceFilter()))
|
||||
.setNameSource(null)
|
||||
.setShowCount(false)
|
||||
.showSetNames(pw,new UnicodeSet(0,0x10FFFF));
|
||||
} else if (prop.getType() <= prop.EXTENDED_BINARY) {
|
||||
UnicodeSet s = prop.getSet("True");
|
||||
bf2.setValueSource(prop.getName());
|
||||
bf2.showSetNames(pw, s);
|
||||
} else {
|
||||
bf2.setValueSource(prop);
|
||||
Collection aliases = prop.getAvailableValueAliases();
|
||||
if (ps.orderByRangeStart) {
|
||||
System.out.println("Reordering");
|
||||
TreeSet temp2 = new TreeSet(new RangeStartComparator(prop));
|
||||
temp2.addAll(aliases);
|
||||
aliases = temp2;
|
||||
}
|
||||
Iterator it = aliases.iterator();
|
||||
while (it.hasNext()) {
|
||||
String value = (String)it.next();
|
||||
UnicodeSet s = prop.getSet(value);
|
||||
|
||||
System.out.println(value + "\t" + prop.getShortestValueAlias(value) + "\t" + ps.skipValue);
|
||||
System.out.println(s.toPattern(true));
|
||||
|
||||
if (skeletonComparator.compare(value, ps.skipValue) == 0) continue;
|
||||
if (skeletonComparator.compare(value, ps.skipUnassigned) == 0) {
|
||||
s.removeAll(unassigned);
|
||||
}
|
||||
|
||||
if (s.size() == 0) continue;
|
||||
//if (unassigned.containsAll(s)) continue; // skip if all unassigned
|
||||
//if (s.contains(0xD0000)) continue; // skip unassigned
|
||||
pw.print(separator);
|
||||
if (!ps.longForm) value = prop.getShortestValueAlias(value);
|
||||
if (ps.makeUppercase) value = value.toUpperCase(Locale.ENGLISH);
|
||||
if (ps.makeFirstLetterLowercase) {
|
||||
// NOTE: this is ok since we are only working in ASCII
|
||||
value = value.substring(0,1).toLowerCase(Locale.ENGLISH)
|
||||
+ value.substring(1);
|
||||
}
|
||||
bf2.setValueSource(value);
|
||||
bf2.showSetNames(pw, s);
|
||||
}
|
||||
String name = prop.getName();
|
||||
System.out.println("Property: " + name + "; " + prop.getTypeName(prop.getType()));
|
||||
pw.println("\n" + SEPARATOR + "\n");
|
||||
String propComment = valueComments.get(name, "*");
|
||||
if (propComment != null) {
|
||||
pw.print(propComment);
|
||||
}
|
||||
pw.println();
|
||||
PrintStyle ps = PrintStyle.get(name);
|
||||
|
||||
if (!ps.interleaveValues && prop.isType(UnicodeProperty.BINARY_MASK)) {
|
||||
if (DEBUG) System.out.println("Resetting Binary Values");
|
||||
ps.skipValue = "False";
|
||||
if (ps.nameStyle.equals("none")) ps.nameStyle = "long";
|
||||
ps.valueStyle = "none";
|
||||
}
|
||||
|
||||
if (ps.noLabel) bf.setLabelSource(null);
|
||||
if (ps.nameStyle.equals("none")) bf.setPropName(null);
|
||||
else if (ps.nameStyle.equals("short")) bf.setPropName(prop.getFirstNameAlias());
|
||||
else bf.setPropName(name);
|
||||
|
||||
if (ps.interleaveValues) {
|
||||
writeInterleavedValues(pw, bf, prop);
|
||||
} else if (prop.isType(UnicodeProperty.STRING_OR_MISC_MASK)) {
|
||||
writeStringValues(pw, bf, prop);
|
||||
//} else if (prop.isType(UnicodeProperty.BINARY_MASK)) {
|
||||
// writeBinaryValues(pw, bf, prop);
|
||||
} else {
|
||||
writeEnumeratedValues(pw, bf, unassigned, prop, ps);
|
||||
}
|
||||
pw.println();
|
||||
}
|
||||
udf.close();
|
||||
}
|
||||
private static void writeEnumeratedValues(
|
||||
PrintWriter pw,
|
||||
BagFormatter bf,
|
||||
UnicodeSet unassigned,
|
||||
UnicodeProperty prop,
|
||||
PrintStyle ps) {
|
||||
if (DEBUG) System.out.println("Writing Enumerated Values: " + prop.getName());
|
||||
|
||||
bf.setValueSource(new UnicodeProperty.FilteredProperty(prop, hackMapFilter));
|
||||
Collection aliases = prop.getAvailableValues();
|
||||
if (ps.orderByRangeStart) {
|
||||
System.out.println("Reordering");
|
||||
TreeSet temp2 = new TreeSet(new RangeStartComparator(prop));
|
||||
temp2.addAll(aliases);
|
||||
aliases = temp2;
|
||||
}
|
||||
for (Iterator it = aliases.iterator(); it.hasNext();) {
|
||||
String value = (String)it.next();
|
||||
UnicodeSet s = prop.getSet(value);
|
||||
if (DEBUG) System.out.println("Getting value " + value);
|
||||
String valueComment = valueComments.get(prop.getName(), value);
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println(value + "\t" + prop.getFirstValueAlias(value) + "\tskip:" + ps.skipValue);
|
||||
System.out.println(s.toPattern(true));
|
||||
}
|
||||
|
||||
int totalSize = s.size();
|
||||
if (s.size() == 0) continue;
|
||||
|
||||
if (UnicodeProperty.compareNames(value, ps.skipValue) == 0) {
|
||||
System.out.println("Skipping: " + value);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (UnicodeProperty.compareNames(value, ps.skipUnassigned) == 0) {
|
||||
System.out.println("Removing Unassigneds: " + value);
|
||||
s.removeAll(unassigned);
|
||||
}
|
||||
|
||||
//if (s.size() == 0) continue;
|
||||
//if (unassigned.containsAll(s)) continue; // skip if all unassigned
|
||||
//if (s.contains(0xD0000)) continue; // skip unassigned
|
||||
pw.print("\n" + SEPARATOR + "\n\n");
|
||||
|
||||
String displayValue = value;
|
||||
if (ps.valueStyle.equals("none")) {
|
||||
displayValue = null;
|
||||
} else if (ps.valueStyle.equals("short")) {
|
||||
displayValue = prop.getFirstValueAlias(displayValue);
|
||||
if (DEBUG) System.out.println("Changing value " + displayValue);
|
||||
}
|
||||
if (ps.makeUppercase && displayValue != null) {
|
||||
displayValue = displayValue.toUpperCase(Locale.ENGLISH);
|
||||
if (DEBUG) System.out.println("Changing value2 " + displayValue);
|
||||
}
|
||||
if (ps.makeFirstLetterLowercase && displayValue != null) {
|
||||
// NOTE: this is ok since we are only working in ASCII
|
||||
displayValue = displayValue.substring(0,1).toLowerCase(Locale.ENGLISH)
|
||||
+ displayValue.substring(1);
|
||||
if (DEBUG) System.out.println("Changing value2 " + displayValue);
|
||||
}
|
||||
if (DEBUG) System.out.println("Setting value " + displayValue);
|
||||
bf.setValueSource(displayValue);
|
||||
if (valueComment != null) {
|
||||
pw.println(valueComment);
|
||||
pw.println();
|
||||
}
|
||||
if (ps.longValueHeading != null) {
|
||||
String headingValue = value;
|
||||
if (ps.longValueHeading == "ccc") {
|
||||
headingValue = Utility.replace(value, "_", "");
|
||||
char c = headingValue.charAt(0);
|
||||
if ('0' <= c && c <= '9') headingValue = "Other Combining Class";
|
||||
}
|
||||
pw.println("# " + headingValue);
|
||||
pw.println();
|
||||
}
|
||||
if (s.size() != 0) bf.showSetNames(pw, s);
|
||||
if (s.size() != totalSize) {
|
||||
pw.println();
|
||||
pw.print("# Not Listed: " + totalSize);
|
||||
}
|
||||
pw.println();
|
||||
}
|
||||
|
||||
}
|
||||
/*
|
||||
private static void writeBinaryValues(
|
||||
PrintWriter pw,
|
||||
BagFormatter bf,
|
||||
UnicodeProperty prop) {
|
||||
if (DEBUG) System.out.println("Writing Binary Values: " + prop.getName());
|
||||
UnicodeSet s = prop.getSet("True");
|
||||
bf.setValueSource(prop.getName());
|
||||
bf.showSetNames(pw, s);
|
||||
}
|
||||
*/
|
||||
|
||||
private static void writeInterleavedValues(
|
||||
PrintWriter pw,
|
||||
BagFormatter bf,
|
||||
UnicodeProperty prop) {
|
||||
if (DEBUG) System.out.println("Writing Interleaved Values: " + prop.getName());
|
||||
bf.setValueSource(new UnicodeProperty.FilteredProperty(prop, new RestoreSpacesFilter()))
|
||||
.setNameSource(null)
|
||||
.setShowCount(false)
|
||||
.showSetNames(pw,new UnicodeSet(0,0x10FFFF));
|
||||
}
|
||||
|
||||
private static void writeStringValues(
|
||||
PrintWriter pw,
|
||||
BagFormatter bf,
|
||||
UnicodeProperty prop) {
|
||||
if (DEBUG) System.out.println("Writing String Values: " + prop.getName());
|
||||
bf.setValueSource(prop).setHexValue(true).setMergeRanges(false);
|
||||
bf.showSetNames(pw,new UnicodeSet(0,0x10FFFF));
|
||||
}
|
||||
|
||||
static class RangeStartComparator implements Comparator {
|
||||
UnicodeProperty prop;
|
||||
CompareProperties.UnicodeSetComparator comp = new CompareProperties.UnicodeSetComparator();
|
||||
|
@ -269,12 +657,35 @@ public class MakeUnicodeFiles {
|
|||
|
||||
}
|
||||
|
||||
public static class ReplaceFilter extends UnicodeProperty.StringFilter {
|
||||
static class RestoreSpacesFilter extends UnicodeProperty.StringFilter {
|
||||
public String remap(String original) {
|
||||
// ok, because doesn't change length
|
||||
String mod = (String) hackMap.get(original);
|
||||
if (mod != null) original = mod;
|
||||
return original.replace('_',' ');
|
||||
}
|
||||
}
|
||||
|
||||
static Comparator CASELESS_COMPARATOR = new Comparator() {
|
||||
public int compare(Object o1, Object o2) {
|
||||
String s = o1.toString();
|
||||
String t = o2.toString();
|
||||
return s.compareToIgnoreCase(t);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
static class OrderedMap {
|
||||
HashMap map = new HashMap();
|
||||
ArrayList keys = new ArrayList();
|
||||
void put(Object o, Object t) {
|
||||
map.put(o,t);
|
||||
keys.add(o);
|
||||
}
|
||||
List keyset() {
|
||||
return keys;
|
||||
}
|
||||
}
|
||||
*/
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyPropertyLister.java,v $
|
||||
* $Date: 2004/02/06 18:30:20 $
|
||||
* $Revision: 1.11 $
|
||||
* $Date: 2004/02/18 03:08:59 $
|
||||
* $Revision: 1.12 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -39,7 +39,7 @@ final class MyPropertyLister extends PropertyLister {
|
|||
int main = (propMask & 0xFF00);
|
||||
if (main == COMBINING_CLASS) {
|
||||
String s = UCD.getCombiningClassID_fromIndex((short)(propMask & 0xFF), LONG);
|
||||
if (s.startsWith("Fixed")) s = "Other Combining Class";
|
||||
if (s.charAt(0) <= '9') s = "Other Combining Class";
|
||||
return "# " + s;
|
||||
} else if (main == BINARY_PROPERTIES) {
|
||||
return "";
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
# This file contains aliases for properties used in the UCD.
|
||||
# These names can be used for XML formats of UCD data, for regular-expression
|
||||
# property tests, and other programmatic textual descriptions of Unicode data.
|
||||
# For information on which properties are normative, see UCD.html.
|
||||
#
|
||||
# The names may be translated in appropriate environments, and additional
|
||||
# aliases may be useful.
|
||||
#
|
||||
# FORMAT
|
||||
#
|
||||
# Each line has two fields, separated by semicolons.
|
||||
#
|
||||
# First Field: The first field is an abbreviated name for the property.
|
||||
#
|
||||
# Second Field: The second field is a long name
|
||||
#
|
||||
# With loose matching of property names, the case distinctions, whitespace,
|
||||
# and '_' are ignored.
|
||||
#
|
||||
# NOTE: Currently there is at most one abbreviated name and one long name for
|
||||
# each property. However, in the future additional aliases may be added.
|
||||
#
|
||||
# NOTE: The property value names are NOT unique across properties, especially
|
||||
# with loose matches. For example:
|
||||
#
|
||||
# AL means Arabic Letter for the Bidi_Class property, and
|
||||
# AL means Alpha_Left for the Combining_Class property, and
|
||||
# AL means Alphabetic for the Line_Break property.
|
||||
#
|
||||
# In addition, some property names may be the same as some property value names.
|
||||
#
|
||||
# The combination of property value and property name is, however, unique.
|
||||
# For more information, see UTR #18: Regular Expression Guidelines
|
|
@ -1,48 +0,0 @@
|
|||
# This file contains aliases for property values used in the UCD.
|
||||
# These names can be used for XML formats of UCD data, for regular-expression
|
||||
# property tests, and other programmatic textual descriptions of Unicode data.
|
||||
# For information on which properties are normative, see UCD.html.
|
||||
#
|
||||
# The names may be translated in appropriate environments, and additional
|
||||
# aliases may be useful.
|
||||
#
|
||||
# FORMAT
|
||||
#
|
||||
# Each line describes a property value name.
|
||||
# This consists of three fields, separated by semicolons.
|
||||
#
|
||||
# First Field: The first field describes the property for which that
|
||||
# property value name is used.
|
||||
# There is one special pseudo-property: "qc" stands for any quick-check property
|
||||
#
|
||||
# Second Field: The second field is an abbreviated name.
|
||||
# If there is no abbreviated name available, the field is marked with "n/a".
|
||||
#
|
||||
# Third Field: The third field is a long name.
|
||||
#
|
||||
# In the case of ccc, there are 4 fields. The second field is numeric, third
|
||||
# is abbreviated, and fourth is long.
|
||||
#
|
||||
# With loose matching of property names, the case distinctions, whitespace,
|
||||
# and '_' are ignored.
|
||||
#
|
||||
# NOTE: Currently there is at most one abbreviated name and one long name for
|
||||
# property value. However, in the future additional aliases may be added.
|
||||
# In such a case, the first line for the property value would have
|
||||
# the preferred alias for output.
|
||||
#
|
||||
# NOTE: The property value names are NOT unique across properties, especially
|
||||
# with loose matches. For example:
|
||||
#
|
||||
# AL means Arabic Letter for the Bidi_Class property, and
|
||||
# AL means Alpha_Left for the Combining_Class property, and
|
||||
# AL means Alphabetic for the Line_Break property.
|
||||
#
|
||||
# In addition, some property names may be the same as some property value names.
|
||||
# For example:
|
||||
#
|
||||
# cc means Combining_Class property, and
|
||||
# cc means the General_Category property value Control (cc)
|
||||
#
|
||||
# The combination of property value and property name is, however, unique.
|
||||
# For more information, see UTR #18: Regular Expression Guidelines
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
|
||||
* $Date: 2004/02/07 01:01:14 $
|
||||
* $Revision: 1.14 $
|
||||
* $Date: 2004/02/18 03:09:00 $
|
||||
* $Revision: 1.15 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -138,8 +138,21 @@ public class TestData implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
public static class RegexMatcher implements UnicodeProperty.Matcher {
|
||||
private Matcher matcher;
|
||||
|
||||
public UnicodeProperty.Matcher set(String pattern) {
|
||||
matcher = Pattern.compile(pattern).matcher("");
|
||||
return this;
|
||||
}
|
||||
public boolean matches(String value) {
|
||||
matcher.reset(value);
|
||||
return matcher.matches();
|
||||
}
|
||||
}
|
||||
|
||||
static BagFormatter bf = new BagFormatter();
|
||||
static UnicodeProperty.Matcher matcher = new ICUPropertyFactory.RegexMatcher();
|
||||
static UnicodeProperty.Matcher matcher = new RegexMatcher();
|
||||
|
||||
private static void showPropDiff(String p1, UnicodeSet s1, String p2, UnicodeSet s2) {
|
||||
System.out.println("Property Listing");
|
||||
|
|
|
@ -4,6 +4,7 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
@ -13,7 +14,10 @@ import com.ibm.icu.lang.UCharacter;
|
|||
import com.ibm.text.utility.Utility;
|
||||
|
||||
public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
||||
static final boolean DEBUG = false;
|
||||
private UCD ucd;
|
||||
private Normalizer nfc, nfd, nfkd, nfkc;
|
||||
|
||||
private static boolean needAgeCache = true;
|
||||
private static UCD[] ucdCache = new UCD[UCD_Types.LIMIT_AGE];
|
||||
|
||||
|
@ -29,22 +33,22 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
|
||||
private ToolUnicodePropertySource(String version) {
|
||||
ucd = UCD.make(version);
|
||||
version = ucd.getVersion();
|
||||
TreeSet names = new TreeSet();
|
||||
UnifiedProperty.getAvailablePropertiesAliases(names,ucd);
|
||||
Iterator it = names.iterator();
|
||||
while (it.hasNext()) {
|
||||
String name = (String) it.next();
|
||||
//System.out.println("Name: " + name);
|
||||
add(new ToolUnicodeProperty(name));
|
||||
}
|
||||
|
||||
nfc = new Normalizer(Normalizer.NFC, ucd.getVersion());
|
||||
nfd = new Normalizer(Normalizer.NFD, ucd.getVersion());
|
||||
nfkc = new Normalizer(Normalizer.NFKC, ucd.getVersion());
|
||||
nfkd = new Normalizer(Normalizer.NFKD, ucd.getVersion());
|
||||
|
||||
version = ucd.getVersion(); // regularize
|
||||
|
||||
// first the special cases
|
||||
if (DEBUG) System.out.println("Adding Simple Cases");
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
return ucd.getName(codepoint);
|
||||
}
|
||||
}.setMain("Name", "na", UnicodeProperty.STRING, version)
|
||||
}.setMain("Name", "na", UnicodeProperty.MISC, version)
|
||||
.setValues("<string>"));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
|
@ -58,7 +62,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
protected UnicodeMap _getUnicodeMap() {
|
||||
return ucd.blockData;
|
||||
}
|
||||
}.setMain("Block", "blk", UnicodeProperty.ENUMERATED, version)
|
||||
}.setMain("Block", "blk", UnicodeProperty.CATALOG, version)
|
||||
.setValues(ucd.getBlockNames(null)));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
|
@ -83,9 +87,125 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
if (Double.isNaN(num)) return null;
|
||||
return Double.toString(num);
|
||||
}
|
||||
}.setMain("Numeric_Value", "nv", UnicodeProperty.NUMERIC, version)
|
||||
}.setMain("Numeric_Value", "nv", UnicodeProperty.NUMERIC, version));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int cp) {
|
||||
if (!ucd.isRepresented(cp)) return null;
|
||||
String b = nfkc.normalize(ucd.getCase(cp, UCD_Types.FULL, UCD_Types.FOLD));
|
||||
String c = nfkc.normalize(ucd.getCase(b, UCD_Types.FULL, UCD_Types.FOLD));
|
||||
if (c.equals(b)) return null;
|
||||
return c;
|
||||
}
|
||||
public int getMaxWidth(boolean isShort) {
|
||||
return 14;
|
||||
}
|
||||
}.setMain("FC_NFKC_Closure", "FNC", UnicodeProperty.STRING, version)
|
||||
.addName("FC_NFKC"));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
if (!nfd.isNormalized(codepoint)) return "No";
|
||||
else if (nfd.isTrailing(codepoint)) throw new IllegalArgumentException("Internal Error!");
|
||||
else return "Yes";
|
||||
}
|
||||
public int getMaxWidth(boolean isShort) {
|
||||
return 15;
|
||||
}
|
||||
}.setMain("NFD_Quick_Check", "NFD_QC", UnicodeProperty.ENUMERATED, version)
|
||||
.setValues(LONG_YES_NO, YES_NO));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
if (!nfc.isNormalized(codepoint)) return "No";
|
||||
else if (nfc.isTrailing(codepoint)) return "Maybe";
|
||||
else return "Yes";
|
||||
}
|
||||
public int getMaxWidth(boolean isShort) {
|
||||
return 15;
|
||||
}
|
||||
}.setMain("NFC_Quick_Check", "NFC_QC", UnicodeProperty.ENUMERATED, version)
|
||||
.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
if (!nfkd.isNormalized(codepoint)) return "No";
|
||||
else if (nfkd.isTrailing(codepoint)) throw new IllegalArgumentException("Internal Error!");
|
||||
else return "Yes";
|
||||
}
|
||||
public int getMaxWidth(boolean isShort) {
|
||||
return 15;
|
||||
}
|
||||
}.setMain("NFKD_Quick_Check", "NFKD_QC", UnicodeProperty.ENUMERATED, version)
|
||||
.setValues(LONG_YES_NO, YES_NO));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
if (!nfkc.isNormalized(codepoint)) return "No";
|
||||
else if (nfkc.isTrailing(codepoint)) return "Maybe";
|
||||
else return "Yes";
|
||||
}
|
||||
public int getMaxWidth(boolean isShort) {
|
||||
return 15;
|
||||
}
|
||||
}.setMain("NFKC_Quick_Check", "NFKC_QC", UnicodeProperty.ENUMERATED, version)
|
||||
.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE));
|
||||
|
||||
/*
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
if (!nfx.isNormalized(codepoint)) return NO;
|
||||
else if (nfx.isTrailing(codepoint)) return MAYBE;
|
||||
else return "";
|
||||
}
|
||||
}.setMain("NFD_QuickCheck", "nv", UnicodeProperty.NUMERIC, version)
|
||||
.setValues("<number>"));
|
||||
*/
|
||||
|
||||
// Now the derived properties
|
||||
if (DEBUG) System.out.println("Derived Properties");
|
||||
for (int i = 0; i < DerivedProperty.DERIVED_PROPERTY_LIMIT; ++i) {
|
||||
UCDProperty prop = DerivedProperty.make(i);
|
||||
if (prop == null) continue;
|
||||
if (!prop.isStandard()) continue;
|
||||
String name = prop.getName();
|
||||
if (getProperty(name) != null) {
|
||||
if (DEBUG) System.out.println("Iterated Names: " + name + ", ALREADY PRESENT*");
|
||||
continue; // skip if already there
|
||||
}
|
||||
int type = prop.getValueType();
|
||||
if (i == UCD_Types.FC_NFKC_Closure) type = UnicodeProperty.STRING;
|
||||
else if (i == UCD_Types.FullCompExclusion) type = UnicodeProperty.BINARY;
|
||||
else type = remapUCDType(type);
|
||||
|
||||
if (DEBUG) System.out.println(prop.getName());
|
||||
add(new UCDPropertyWrapper(prop,type,false));
|
||||
}
|
||||
|
||||
// then the general stuff
|
||||
|
||||
if (DEBUG) System.out.println("Other Properties");
|
||||
List names = new ArrayList();
|
||||
UnifiedProperty.getAvailablePropertiesAliases(names,ucd);
|
||||
Iterator it = names.iterator();
|
||||
while (it.hasNext()) {
|
||||
String name = (String) it.next();
|
||||
if (getProperty(name) != null) {
|
||||
if (DEBUG) System.out.println("Iterated Names: " + name + ", ALREADY PRESENT");
|
||||
continue; // skip if already there
|
||||
}
|
||||
if (DEBUG) System.out.println("Iterated Names: " + name);
|
||||
add(new ToolUnicodeProperty(name));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static String[] YES_NO_MAYBE = {"N", "M", "Y"};
|
||||
static String[] LONG_YES_NO_MAYBE = {"No", "Maybe", "Yes"};
|
||||
|
||||
static String[] YES_NO = {"N", "Y"};
|
||||
static String[] LONG_YES_NO = {"No", "Yes"};
|
||||
|
||||
/*
|
||||
"Bidi_Mirroring_Glyph", "Block", "Case_Folding", "Case_Sensitive", "ISO_Comment",
|
||||
"Lowercase_Mapping", "Name", "Numeric_Value", "Simple_Case_Folding",
|
||||
|
@ -105,6 +225,57 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
}
|
||||
}
|
||||
*/
|
||||
|
||||
static class UCDPropertyWrapper extends UnicodeProperty {
|
||||
UCDProperty ucdProperty;
|
||||
boolean yes_no_maybe;
|
||||
|
||||
UCDPropertyWrapper(UCDProperty ucdProperty, int type, boolean yes_no_maybe) {
|
||||
this.ucdProperty = ucdProperty;
|
||||
setType(type);
|
||||
String name = ucdProperty.getName(UCDProperty.LONG);
|
||||
if (name == null) ucdProperty.getName(UCDProperty.SHORT);
|
||||
setName(name);
|
||||
this.yes_no_maybe = yes_no_maybe;
|
||||
}
|
||||
protected String _getVersion() {
|
||||
return ucdProperty.getUCD().getVersion();
|
||||
}
|
||||
protected String _getValue(int codepoint) {
|
||||
return ucdProperty.getValue(codepoint, UCDProperty.LONG);
|
||||
}
|
||||
protected List _getNameAliases(List result) {
|
||||
addUnique(ucdProperty.getName(UCDProperty.SHORT), result);
|
||||
addUnique(getName(), result);
|
||||
return result;
|
||||
}
|
||||
protected List _getValueAliases(String valueAlias, List result) {
|
||||
if (isType(BINARY_MASK)) {
|
||||
if (valueAlias.equals("True")) addUnique("T", result);
|
||||
else if (valueAlias.equals("False")) addUnique("F", result);
|
||||
addUnique(valueAlias, result);
|
||||
}
|
||||
if (yes_no_maybe) {
|
||||
if (valueAlias.equals("Yes")) addUnique("Y", result);
|
||||
else if (valueAlias.equals("No")) addUnique("N", result);
|
||||
else if (valueAlias.equals("Maybe")) addUnique("M", result);
|
||||
addUnique(valueAlias, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
protected List _getAvailableValues(List result) {
|
||||
if (isType(BINARY_MASK)) {
|
||||
addUnique("True", result);
|
||||
addUnique("False", result);
|
||||
}
|
||||
if (yes_no_maybe) {
|
||||
addUnique("No",result);
|
||||
addUnique("Maybe",result);
|
||||
addUnique("Yes",result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
static final int ODD_BALLS = (1<<UCD_Types.Cn) | (1<<UCD_Types.Co) | (1<<UCD_Types.Cs) | (1<<UCD.Cc);
|
||||
|
||||
/* (non-Javadoc)
|
||||
|
@ -119,30 +290,37 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
private ToolUnicodeProperty(String propertyAlias) {
|
||||
propMask = UnifiedProperty.getPropmask(propertyAlias, ucd);
|
||||
up = UnifiedProperty.make(propMask, ucd);
|
||||
if (up == null) throw new IllegalArgumentException("Not found: " + propertyAlias);
|
||||
if (propertyAlias.equals("Case_Fold_Turkish_I")) {
|
||||
System.out.println(propertyAlias + " " + getTypeName(getType()));
|
||||
}
|
||||
setType(getPropertyTypeInternal());
|
||||
setName(propertyAlias);
|
||||
}
|
||||
|
||||
public Collection _getAvailableValueAliases(Collection result) {
|
||||
public List _getAvailableValues(List result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
int type = getType() & ~EXTENDED_BIT;
|
||||
if (type == STRING) result.add("<string>");
|
||||
else if (type == NUMERIC) result.add("<string>");
|
||||
int type = getType() & CORE_MASK;
|
||||
if (type == STRING || type == MISC) result.add("<string>");
|
||||
else if (type == NUMERIC) result.add("<number>");
|
||||
else if (type == BINARY) {
|
||||
result.add("True");
|
||||
result.add("False");
|
||||
} else if (type == ENUMERATED) {
|
||||
} else if (type == ENUMERATED || type == CATALOG) {
|
||||
byte style = UCD_Types.LONG;
|
||||
int prop = propMask>>8;
|
||||
String temp = null;
|
||||
boolean titlecase = false;
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
try {
|
||||
boolean check = false;
|
||||
try {
|
||||
switch (prop) {
|
||||
case UCD_Types.CATEGORY>>8: temp = (ucd.getCategoryID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex((short)i, style)); break;
|
||||
case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex((byte)i, style));
|
||||
check = temp != null;
|
||||
break;
|
||||
case UCD_Types.NUMERIC_TYPE>>8: temp = (ucd.getNumericTypeID_fromIndex((byte)i, style));
|
||||
titlecase = true;
|
||||
break;
|
||||
|
@ -151,7 +329,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
case UCD_Types.JOINING_TYPE>>8: temp = (ucd.getJoiningTypeID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.JOINING_GROUP>>8: temp = (ucd.getJoiningGroupID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.SCRIPT>>8: temp = (ucd.getScriptID_fromIndex((byte)i, style)); titlecase = true;
|
||||
if ("<unused>".equals(temp)) continue;
|
||||
if (UnicodeProperty.UNUSED.equals(temp)) continue;
|
||||
if (temp != null) temp = UCharacter.toTitleCase(Locale.ENGLISH,temp,null);
|
||||
break;
|
||||
case UCD_Types.AGE>>8: temp = (ucd.getAgeID_fromIndex((byte)i, style)); break;
|
||||
|
@ -162,7 +340,11 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
continue;
|
||||
}
|
||||
if (temp != null && temp.length() != 0) result.add(Utility.getUnskeleton(temp, titlecase));
|
||||
if (check) System.out.println("Value: " + temp);
|
||||
if (temp != null && temp.length() != 0 && !temp.equals(UNUSED)) {
|
||||
result.add(Utility.getUnskeleton(temp, titlecase));
|
||||
}
|
||||
if (check) System.out.println("Value2: " + temp);
|
||||
}
|
||||
//if (prop == (UCD_Types.DECOMPOSITION_TYPE>>8)) result.add("none");
|
||||
//if (prop == (UCD_Types.JOINING_TYPE>>8)) result.add("Non_Joining");
|
||||
|
@ -171,24 +353,22 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
return result;
|
||||
}
|
||||
|
||||
public Collection _getAliases(Collection result) {
|
||||
public List _getNameAliases(List result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
String longName = up.getName(UCD_Types.LONG);
|
||||
addUnique(Utility.getUnskeleton(longName, true), result);
|
||||
String shortName = up.getName(UCD_Types.SHORT);
|
||||
addUnique(Utility.getUnskeleton(shortName, false), result);
|
||||
addUnique(Utility.getUnskeleton(up.getName(UCD_Types.SHORT), false), result);
|
||||
addUnique(Utility.getUnskeleton(up.getName(UCD_Types.LONG), true), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public Collection _getValueAliases(String valueAlias, Collection result) {
|
||||
public List _getValueAliases(String valueAlias, List result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
int type = getType() & ~EXTENDED_BIT;
|
||||
if (type == STRING) return result;
|
||||
int type = getType() & CORE_MASK;
|
||||
if (type == STRING || type == MISC) return result;
|
||||
else if (type == NUMERIC) return result;
|
||||
else if (type == BINARY) {
|
||||
UnicodeProperty.addUnique(valueAlias, result);
|
||||
return lookup(valueAlias, UCD_Names.YN_TABLE_LONG, UCD_Names.YN_TABLE, result);
|
||||
} else if (type == ENUMERATED) {
|
||||
} else if (type == ENUMERATED || type == CATALOG) {
|
||||
byte style = UCD_Types.LONG;
|
||||
int prop = propMask>>8;
|
||||
boolean titlecase = false;
|
||||
|
@ -198,7 +378,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
case UCD_Types.CATEGORY>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_GENERAL_CATEGORY, UCD_Names.GENERAL_CATEGORY, result);
|
||||
case UCD_Types.COMBINING_CLASS>>8:
|
||||
addUnique(""+i, result);
|
||||
addUnique(String.valueOf(0xFF&Utility.lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, true)), result);
|
||||
return lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, UCD_Names.COMBINING_CLASS, result);
|
||||
case UCD_Types.BIDI_CLASS>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_BIDI_CLASS, UCD_Names.BIDI_CLASS, result);
|
||||
|
@ -262,7 +442,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
temp = (ucd.getHangulSyllableTypeID_fromIndex(ucd.getHangulSyllableType(codepoint),style)); break;
|
||||
}
|
||||
if (temp != null) return Utility.getUnskeleton(temp,titlecase);
|
||||
if (getType() == BINARY) {
|
||||
if (isType(BINARY_MASK)) {
|
||||
return up.hasValue(codepoint) ? "True" : "False";
|
||||
}
|
||||
return "<unknown>";
|
||||
|
@ -285,23 +465,21 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
* @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
|
||||
*/
|
||||
private int getPropertyTypeInternal() {
|
||||
int result = 0;
|
||||
String name = up.getName(UCD_Types.LONG);
|
||||
if ("Age".equals(name)) return ENUMERATED;
|
||||
switch (up.getValueType()) {
|
||||
case UCD_Types.NUMERIC_PROP: result = NUMERIC; break;
|
||||
case UCD_Types.STRING_PROP: result = STRING; break;
|
||||
case UCD_Types.MISC_PROP: result = STRING; break;
|
||||
case UCD_Types.CATALOG_PROP: result = ENUMERATED; break;
|
||||
case UCD_Types.FLATTENED_BINARY_PROP:
|
||||
case UCD_Types.ENUMERATED_PROP: result = ENUMERATED; break;
|
||||
case UCD_Types.BINARY_PROP: result = BINARY; break;
|
||||
case UCD_Types.UNKNOWN_PROP:
|
||||
default:
|
||||
throw new IllegalArgumentException("Type: UNKNOWN_PROP");
|
||||
|
||||
switch(propMask) {
|
||||
case UCD_Types.BINARY_PROPERTIES | UCD_Types.CaseFoldTurkishI:
|
||||
case UCD_Types.BINARY_PROPERTIES | UCD_Types.Non_break:
|
||||
return EXTENDED_BINARY;
|
||||
}
|
||||
if (!up.isStandard()) result |= EXTENDED_BIT;
|
||||
return result;
|
||||
|
||||
switch(propMask>>8) {
|
||||
case UCD_Types.SCRIPT>>8:
|
||||
case UCD_Types.AGE>>8:
|
||||
return CATALOG;
|
||||
}
|
||||
int mask = 0;
|
||||
if (!up.isStandard()) mask = EXTENDED_MASK;
|
||||
return remapUCDType(up.getValueType()) | mask;
|
||||
}
|
||||
|
||||
public String _getVersion() {
|
||||
|
@ -309,13 +487,66 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
}
|
||||
|
||||
}
|
||||
static Collection lookup(String valueAlias, String[] main, String[] aux, Collection result) {
|
||||
//System.out.println(valueAlias + "=>");
|
||||
int pos = 0xFF & Utility.lookup(valueAlias, main, true);
|
||||
//System.out.println("=>" + aux[pos]);
|
||||
UnicodeProperty.addUnique(valueAlias, result);
|
||||
if (aux == null) return result;
|
||||
return UnicodeProperty.addUnique(aux[pos], result);
|
||||
|
||||
private int remapUCDType(int result) {
|
||||
switch (result) {
|
||||
case UCD_Types.NUMERIC_PROP: result = UnicodeProperty.NUMERIC; break;
|
||||
case UCD_Types.STRING_PROP: result = UnicodeProperty.STRING; break;
|
||||
case UCD_Types.MISC_PROP: result = UnicodeProperty.STRING; break;
|
||||
case UCD_Types.CATALOG_PROP: result = UnicodeProperty.ENUMERATED; break;
|
||||
case UCD_Types.FLATTENED_BINARY_PROP:
|
||||
case UCD_Types.ENUMERATED_PROP: result = UnicodeProperty.ENUMERATED; break;
|
||||
case UCD_Types.BINARY_PROP: result = UnicodeProperty.BINARY; break;
|
||||
case UCD_Types.UNKNOWN_PROP:
|
||||
default:
|
||||
result = UnicodeProperty.STRING;
|
||||
//throw new IllegalArgumentException("Type: UNKNOWN_PROP");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static List lookup(String valueAlias, String[] main, String[] aux, List result) {
|
||||
//System.out.println(valueAlias + "=>");
|
||||
//System.out.println("=>" + aux[pos]);
|
||||
if (aux != null) {
|
||||
int pos = 0xFF & Utility.lookup(valueAlias, main, true);
|
||||
UnicodeProperty.addUnique(aux[pos], result);
|
||||
}
|
||||
return (List) UnicodeProperty.addUnique(valueAlias, result);
|
||||
}
|
||||
|
||||
/*
|
||||
static class DerivedPropertyWrapper extends UnicodeProperty {
|
||||
UCDProperty derivedProperty;
|
||||
UCD ucd;
|
||||
|
||||
DerivedPropertyWrapper(int derivedPropertyID, UCD ucd) {
|
||||
this.ucd = ucd;
|
||||
derivedProperty = DerivedProperty.make(derivedPropertyID, ucd);
|
||||
}
|
||||
protected String _getVersion() {
|
||||
return ucd.getVersion();
|
||||
}
|
||||
|
||||
protected String _getValue(int codepoint) {
|
||||
return derivedProperty.getValue(codepoint, UCD_Types.LONG);
|
||||
}
|
||||
protected List _getNameAliases(List result) {
|
||||
if (result != null) result = new ArrayList(1);
|
||||
addUnique(derivedProperty.getName(UCD_Types.SHORT), result);
|
||||
addUnique(derivedProperty.getName(UCD_Types.LONG), result);
|
||||
return null;
|
||||
}
|
||||
|
||||
protected List _getValueAliases(String valueAlias, List result) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
protected List _getAvailableValues(List result) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2004/02/12 08:23:16 $
|
||||
* $Revision: 1.31 $
|
||||
* $Date: 2004/02/18 03:09:01 $
|
||||
* $Revision: 1.32 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -865,10 +865,17 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
static String getCombiningClassID_fromIndex (short index, byte style) {
|
||||
return index < 0
|
||||
|| index >= UCD_Names.COMBINING_CLASS.length
|
||||
? null
|
||||
: style == SHORT
|
||||
? UCD_Names.COMBINING_CLASS[index]
|
||||
: UCD_Names.LONG_COMBINING_CLASS[index];
|
||||
/*
|
||||
if (index > 255) return null;
|
||||
index &= 0xFF;
|
||||
if (style == NORMAL || style == NUMBER) return String.valueOf(index);
|
||||
String s = "Fixed";
|
||||
String s = "";
|
||||
switch (index) {
|
||||
case 0: s = style < LONG ? "NR" : "NotReordered"; break;
|
||||
case 1: s = style < LONG ? "OV" : "Overlay"; break;
|
||||
|
@ -894,9 +901,10 @@ public final class UCD implements UCD_Types {
|
|||
case 233: s = style < LONG ? "DB" : "DoubleBelow"; break;
|
||||
case 234: s = style < LONG ? "DA" : "DoubleAbove"; break;
|
||||
case 240: s = style < LONG ? "IS" : "IotaSubscript"; break;
|
||||
default: s += "_" + index;
|
||||
default: s += "" + index;
|
||||
}
|
||||
return s;
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
|
@ -1309,6 +1317,7 @@ to guarantee identifier closure.
|
|||
isRemapped = true;
|
||||
result.name = null; // clean this up, since we reuse UNASSIGNED
|
||||
result.shortName = null;
|
||||
result.decompositionType = NONE;
|
||||
if (fixStrings) {
|
||||
constructedName = "<reserved-" + Utility.hex(codePoint, 4) + ">";
|
||||
//result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
|
||||
|
@ -1570,13 +1579,13 @@ to guarantee identifier closure.
|
|||
if (blockData == null) loadBlocks();
|
||||
return (String)blockData.getValue(codePoint);
|
||||
}
|
||||
public Collection getBlockNames() {
|
||||
public List getBlockNames() {
|
||||
return getBlockNames(null);
|
||||
}
|
||||
public Collection getBlockNames(Collection result) {
|
||||
public List getBlockNames(List result) {
|
||||
if (result == null) result = new ArrayList();
|
||||
if (blockData == null) loadBlocks();
|
||||
return blockData.getAvailableValues(result);
|
||||
return (List)blockData.getAvailableValues(result);
|
||||
}
|
||||
public UnicodeSet getBlockSet(String value, UnicodeSet result) {
|
||||
if (result == null) result = new UnicodeSet();
|
||||
|
|
|
@ -70,7 +70,7 @@ public abstract class UCDProperty implements UCD_Types {
|
|||
* Get the full name. Style is SHORT, NORMAL, LONG
|
||||
*/
|
||||
public String getFullName(byte style) {
|
||||
return getProperty(style) + "=" + getValue(style);
|
||||
return getPropertyName(style) + "=" + getValue(style);
|
||||
}
|
||||
|
||||
public String getFullName() {
|
||||
|
@ -79,7 +79,7 @@ public abstract class UCDProperty implements UCD_Types {
|
|||
/**
|
||||
* Get the property name. Style is SHORT, NORMAL, LONG
|
||||
*/
|
||||
public String getProperty(byte style) {
|
||||
public String getPropertyName(byte style) {
|
||||
if (style == NORMAL) style = defaultPropertyStyle;
|
||||
switch (style) {
|
||||
case LONG: return skeletonize ? Utility.getUnskeleton(name.toString(), false) : name.toString();
|
||||
|
@ -89,9 +89,9 @@ public abstract class UCDProperty implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
public String getProperty() { return getProperty(NORMAL); }
|
||||
public String getPropertyName() { return getPropertyName(NORMAL); }
|
||||
|
||||
public void setProperty(byte style, String in) {
|
||||
public void setPropertyName(byte style, String in) {
|
||||
if (style == NORMAL) style = defaultPropertyStyle;
|
||||
switch (style) {
|
||||
case LONG: name = Utility.getUnskeleton(in, false); break;
|
||||
|
@ -145,7 +145,7 @@ public abstract class UCDProperty implements UCD_Types {
|
|||
*/
|
||||
public String getListingValue(int cp) {
|
||||
if (getValueType() != BINARY_PROP) return getValue(cp, LONG);
|
||||
return getProperty(LONG);
|
||||
return getPropertyName(LONG);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -173,8 +173,8 @@ public abstract class UCDProperty implements UCD_Types {
|
|||
|
||||
// Old Name for compatibility
|
||||
boolean isTest() { return isStandard(); }
|
||||
String getName(byte style) { return getProperty(style); }
|
||||
String getName() { return getProperty(); }
|
||||
String getName(byte style) { return getPropertyName(style); }
|
||||
String getName() { return getPropertyName(); }
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
|
||||
* $Date: 2004/02/12 08:23:17 $
|
||||
* $Revision: 1.25 $
|
||||
* $Date: 2004/02/18 03:09:01 $
|
||||
* $Revision: 1.26 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -15,6 +15,7 @@ package com.ibm.text.UCD;
|
|||
|
||||
import java.util.Locale;
|
||||
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
import com.ibm.text.utility.*;
|
||||
|
||||
|
||||
|
@ -297,7 +298,7 @@ final class UCD_Names implements UCD_Types {
|
|||
"TIBETAN", // TIBETAN
|
||||
"MYANMAR", // MYANMAR
|
||||
"GEORGIAN", // GEORGIAN
|
||||
"<unused>", // JAMO -- NOT SEPARATED FROM HANGUL IN 15924
|
||||
UnicodeProperty.UNUSED, // JAMO -- NOT SEPARATED FROM HANGUL IN 15924
|
||||
"HANGUL", // HANGUL
|
||||
"ETHIOPIC", // ETHIOPIC
|
||||
"CHEROKEE", // CHEROKEE
|
||||
|
@ -355,7 +356,7 @@ final class UCD_Names implements UCD_Types {
|
|||
"Tibt", // TIBETAN
|
||||
"Mymr", // MYANMAR
|
||||
"Geor", // GEORGIAN
|
||||
"<unused>", // JAMO -- NOT SEPARATED FROM HANGUL IN 15924
|
||||
UnicodeProperty.UNUSED, // JAMO -- NOT SEPARATED FROM HANGUL IN 15924
|
||||
"Hang", // HANGUL
|
||||
"Ethi", // ETHIOPIC
|
||||
"Cher", // CHEROKEE
|
||||
|
@ -434,7 +435,7 @@ final class UCD_Names implements UCD_Types {
|
|||
|
||||
"Cc", // = Other, Control 15
|
||||
"Cf", // = Other, Format 16
|
||||
"<unused>", // missing
|
||||
UnicodeProperty.UNUSED, // missing
|
||||
"Co", // = Other, Private Use 18
|
||||
"Cs", // = Other, Surrogate 19
|
||||
|
||||
|
@ -477,7 +478,7 @@ final class UCD_Names implements UCD_Types {
|
|||
|
||||
"Control", // = Other, Control 15
|
||||
"Format", // = Other, Format 16
|
||||
"<unused>", // missing
|
||||
UnicodeProperty.UNUSED, // missing
|
||||
"PrivateUse", // = Other, Private Use 18
|
||||
"Surrogate", // = Other, Surrogate 19
|
||||
|
||||
|
@ -522,7 +523,7 @@ final class UCD_Names implements UCD_Types {
|
|||
"S", // Segment Separator
|
||||
"WS", // Whitespace
|
||||
"ON", // Other Neutrals ; All other characters: punctuation, symbols
|
||||
"<unused>", "BN", "NSM", "AL", "LRO", "RLO", "LRE", "RLE", "PDF"
|
||||
UnicodeProperty.UNUSED, "BN", "NSM", "AL", "LRO", "RLO", "LRE", "RLE", "PDF"
|
||||
};
|
||||
|
||||
static String[] LONG_BIDI_CLASS = {
|
||||
|
@ -537,7 +538,7 @@ final class UCD_Names implements UCD_Types {
|
|||
"SegmentSeparator", // Segment Separator
|
||||
"WhiteSpace", // Whitespace
|
||||
"OtherNeutral", // Other Neutrals ; All other characters: punctuation, symbols
|
||||
"<unused>",
|
||||
UnicodeProperty.UNUSED,
|
||||
"BoundaryNeutral", "NonspacingMark", "ArabicLetter",
|
||||
"LeftToRightOverride",
|
||||
"RightToLeftOverride", "LeftToRightEmbedding",
|
||||
|
@ -590,7 +591,7 @@ final class UCD_Names implements UCD_Types {
|
|||
};
|
||||
static {
|
||||
fixArray(LONG_DECOMPOSITION_TYPE);
|
||||
fixArray(DECOMPOSITION_TYPE);
|
||||
//fixArray(DECOMPOSITION_TYPE);
|
||||
}
|
||||
|
||||
|
||||
|
@ -659,7 +660,7 @@ final class UCD_Names implements UCD_Types {
|
|||
case 233: s = style < LONG ? "DB" : "DoubleBelow"; break;
|
||||
case 234: s = style < LONG ? "DA" : "DoubleAbove"; break;
|
||||
case 240: s = style < LONG ? "IS" : "IotaSubscript"; break;
|
||||
default: s = style < LONG ? "" + index : "Fixed_" + index;
|
||||
default: s = "" + index;
|
||||
}
|
||||
if (style < LONG) COMBINING_CLASS[index] = s;
|
||||
else LONG_COMBINING_CLASS[index] = s;
|
||||
|
@ -812,6 +813,13 @@ final class UCD_Names implements UCD_Types {
|
|||
}
|
||||
}
|
||||
|
||||
static void titlecase (String[] array) {
|
||||
for (int i = 0; i < array.length; ++i) {
|
||||
array[i] = array[1].substring(0,1).toUpperCase()
|
||||
+ array[i].substring(1);
|
||||
}
|
||||
}
|
||||
|
||||
public static String[] OLD_JOINING_GROUP = {
|
||||
"<no shaping>",
|
||||
"AIN",
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
|
||||
* $Date: 2004/02/06 18:30:19 $
|
||||
* $Revision: 1.26 $
|
||||
* $Date: 2004/02/18 03:09:01 $
|
||||
* $Revision: 1.27 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -15,7 +15,7 @@ package com.ibm.text.UCD;
|
|||
|
||||
public interface UCD_Types {
|
||||
|
||||
public static final int dVersion = 5; // change to fix the generated file D version. If less than zero, no "d"
|
||||
public static final int dVersion = 6; // change to fix the generated file D version. If less than zero, no "d"
|
||||
static final byte BINARY_FORMAT = 14; // bumped if binary format of UCD changes. Forces rebuild
|
||||
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $
|
||||
* $Date: 2004/02/07 01:01:13 $
|
||||
* $Revision: 1.16 $
|
||||
* $Date: 2004/02/18 03:09:01 $
|
||||
* $Revision: 1.17 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -63,9 +63,9 @@ public final class UnifiedBinaryProperty extends UCDProperty {
|
|||
if (!up.isStandard()) continue;
|
||||
if (up.getValueType() < BINARY_PROP) continue;
|
||||
String shortValue = Utility.getSkeleton(up.getValue(SHORT));
|
||||
String shortName = Utility.getSkeleton(up.getProperty(SHORT));
|
||||
String shortName = Utility.getSkeleton(up.getPropertyName(SHORT));
|
||||
String longValue = Utility.getSkeleton(up.getValue(LONG));
|
||||
String longName = Utility.getSkeleton(up.getProperty(LONG));
|
||||
String longName = Utility.getSkeleton(up.getPropertyName(LONG));
|
||||
Integer result = new Integer(i);
|
||||
propNameCache.put(longName + "=" + longValue, result);
|
||||
propNameCache.put(longName + "=" + shortValue, result);
|
||||
|
@ -313,8 +313,8 @@ public final class UnifiedBinaryProperty extends UCDProperty {
|
|||
public String getFullName(byte style) {
|
||||
String pre = "";
|
||||
/*if ((majorProp) != BINARY_PROPERTIES>>8)*/ {
|
||||
String preShort = getProperty(SHORT) + "=";
|
||||
String preLong = getProperty(LONG) + "=";
|
||||
String preShort = getPropertyName(SHORT) + "=";
|
||||
String preLong = getPropertyName(LONG) + "=";
|
||||
if (style < LONG) pre = preShort;
|
||||
else if (style == LONG || preShort.equals(preLong)) pre = preLong;
|
||||
else pre = preShort + "(" + preLong + ")";
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedProperty.java,v $
|
||||
* $Date: 2004/02/07 01:01:12 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2004/02/18 03:09:02 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -27,10 +27,6 @@ public final class UnifiedProperty extends UCDProperty {
|
|||
}
|
||||
|
||||
public static UCDProperty make(int propMask, UCD ucd) {
|
||||
if (propMask == AGE) {
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
if ((propMask & 0xFF00) == (BINARY_PROPERTIES & 0xFF00)) {
|
||||
return UnifiedBinaryProperty.make(propMask, ucd);
|
||||
}
|
||||
|
@ -81,25 +77,29 @@ public final class UnifiedProperty extends UCDProperty {
|
|||
}
|
||||
|
||||
private static void cacheNames(UCD ucd) {
|
||||
System.out.println("Caching Property Names");
|
||||
//System.out.println("Caching Property Names");
|
||||
propNameCache = new HashMap();
|
||||
|
||||
for (int i = 0; i < LIMIT_ENUM; ++i) {
|
||||
UCDProperty up = UnifiedProperty.make(i, ucd);
|
||||
if (up == null) continue;
|
||||
if (!up.isStandard()) continue;
|
||||
if (up.getValueType() < BINARY_PROP) continue;
|
||||
String shortRaw = up.getProperty(SHORT);
|
||||
String shortName = Utility.getSkeleton(shortRaw);
|
||||
String longRaw = up.getProperty(LONG);
|
||||
String longName = Utility.getSkeleton(longRaw);
|
||||
//if (up.getValueType() < BINARY_PROP) continue;
|
||||
Integer result = new Integer(i);
|
||||
if (!propNameCache.keySet().contains(longName)) propNameCache.put(longName, result);
|
||||
if (!propNameCache.keySet().contains(shortName)) propNameCache.put(shortName, result);
|
||||
|
||||
String longRaw = up.getPropertyName(LONG);
|
||||
String longName = Utility.getSkeleton(longRaw);
|
||||
String shortRaw = up.getPropertyName(SHORT);
|
||||
String shortName = Utility.getSkeleton(shortRaw);
|
||||
//System.out.println("Caching Names: " + longRaw + ", " + shortRaw);
|
||||
if (longName != null && !propNameCache.keySet().contains(longName)) propNameCache.put(longName, result);
|
||||
|
||||
if (shortName != null && !propNameCache.keySet().contains(shortName)) propNameCache.put(shortName, result);
|
||||
|
||||
String key = longRaw != null ? longRaw : shortRaw;
|
||||
availablePropNames.add(key);
|
||||
}
|
||||
System.out.println("Done Caching");
|
||||
//System.out.println("Done Caching");
|
||||
}
|
||||
|
||||
static Map cache = new HashMap();
|
||||
|
@ -185,8 +185,8 @@ public final class UnifiedProperty extends UCDProperty {
|
|||
|
||||
public String getFullName(byte style) {
|
||||
String pre = "";
|
||||
String preShort = getProperty(SHORT);
|
||||
String preLong = getProperty(LONG);
|
||||
String preShort = getPropertyName(SHORT);
|
||||
String preLong = getPropertyName(LONG);
|
||||
if (style < LONG) pre = preShort;
|
||||
else if (style == LONG || preShort.equals(preLong)) pre = preLong;
|
||||
else pre = preShort + "(" + preLong + ")";
|
||||
|
|
|
@ -37,6 +37,7 @@ public class UnicodeDataFile {
|
|||
result.out.println("# if they have default property values.");
|
||||
result.out.println("# ================================================");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2004/02/12 08:23:14 $
|
||||
* $Revision: 1.39 $
|
||||
* $Date: 2004/02/18 03:09:02 $
|
||||
* $Revision: 1.40 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -22,6 +22,7 @@ import com.ibm.icu.text.UTF16;
|
|||
import com.ibm.icu.text.Replaceable;
|
||||
import com.ibm.icu.text.ReplaceableString;
|
||||
import com.ibm.icu.text.UnicodeMatcher;
|
||||
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
||||
|
||||
import com.ibm.text.UCD.*;
|
||||
|
||||
|
@ -119,6 +120,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
*/
|
||||
|
||||
public static String getSkeleton(String source) {
|
||||
return UnicodeProperty.toSkeleton(source);
|
||||
/*
|
||||
skeletonBuffer.setLength(0);
|
||||
boolean gotOne = false;
|
||||
// remove spaces, '_', '-'
|
||||
|
@ -139,6 +142,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
}
|
||||
if (!gotOne) return source; // avoid string creation
|
||||
return skeletonBuffer.toString();
|
||||
*/
|
||||
}
|
||||
|
||||
private static StringBuffer skeletonBuffer = new StringBuffer();
|
||||
|
@ -149,6 +153,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
*/
|
||||
|
||||
public static String getUnskeleton(String source, boolean titlecaseStart) {
|
||||
return UnicodeProperty.regularize(source, titlecaseStart);
|
||||
/*
|
||||
if (source == null) return source;
|
||||
if (source.equals("noBreak")) return source; // HACK
|
||||
StringBuffer result = new StringBuffer();
|
||||
|
@ -176,6 +182,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
lastCat = cat;
|
||||
}
|
||||
return result.toString();
|
||||
*/
|
||||
}
|
||||
|
||||
public static String findSubstring(String source, Set target, boolean invert) {
|
||||
|
|
Loading…
Add table
Reference in a new issue