tool updates

X-SVN-Rev: 14539
This commit is contained in:
Mark Davis 2004-02-18 03:09:02 +00:00
parent 31bffd79b0
commit ffeb40756c
27 changed files with 2027 additions and 912 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java,v $
* $Date: 2004/02/12 00:47:30 $
* $Revision: 1.7 $
* $Date: 2004/02/18 03:08:57 $
* $Revision: 1.8 $
*
*****************************************************************************************
*/
@ -44,6 +44,19 @@ public class BagFormatter {
private static PrintWriter log = CONSOLE;
private boolean abbreviated = false;
private String separator = ",";
private String prefix = "[";
private String suffix = "]";
private UnicodeProperty.Factory source;
private UnicodeLabel nameSource;
private UnicodeLabel labelSource;
private UnicodeLabel valueSource;
private String propName = "";
private boolean showCount = true;
private boolean skipNullValues = true;
private boolean suppressReserved = true;
private boolean hexValue = false;
private static final String NULL_VALUE = "_NULL_VALUE_";
/**
* Compare two UnicodeSets, and show the differences
@ -273,12 +286,12 @@ public class BagFormatter {
return this;
}
/*public String getName(int codePoint) {
return getName(codePoint, false);
}*/
public String getName(int codePoint) {
return getName("", codePoint, codePoint);
}
public String getName(String separator, int start, int end) {
if (nameSource == null || nameSource == UnicodeProperty.NULL) return "";
if (getNameSource() == null || getNameSource() == UnicodeProperty.NULL) return "";
String result = getName(start, false);
if (start == end) return separator + result;
String endString = getName(end, false);
@ -291,8 +304,6 @@ public class BagFormatter {
return getName(s, false);
}
UnicodeLabel nameSource;
class NameLabel extends UnicodeLabel {
UnicodeProperty nameProp;
UnicodeSet control;
@ -327,14 +338,18 @@ public class BagFormatter {
// refactored
public String getName(int codePoint, boolean withCodePoint) {
return nameSource.getValue(codePoint, !withCodePoint);
return getNameSource().getValue(codePoint, !withCodePoint);
}
public String getName(String s, boolean withCodePoint) {
return nameSource.getValue(s, separator, !withCodePoint);
return getNameSource().getValue(s, separator, !withCodePoint);
}
public String hex(String s) {
return hex(s,separator);
}
public String hex(String s, String separator) {
return UnicodeLabel.HEX.getValue(s, separator, true);
}
@ -344,36 +359,21 @@ public class BagFormatter {
return s + ".." + Utility.hex(end,4);
}
private String separator = ",";
private String prefix = "[";
private String suffix = "]";
private UnicodeProperty.Factory source;
private UnicodeLabel labelSource;
private UnicodeLabel valueSource = UnicodeLabel.NULL;
private boolean showCount = true;
private boolean suppressReserved = true;
public BagFormatter setUnicodePropertySource(UnicodeProperty.Factory source) {
public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) {
this.source = source;
nameSource = new NameLabel(source);
return this;
}
public UnicodeProperty.Factory getUnicodePropertyFactory() {
if (source == null) source = ICUPropertyFactory.make();
return source;
}
public BagFormatter () {
this(null);
}
public BagFormatter (UnicodeProperty.Factory source) {
if (source == null) source = ICUPropertyFactory.make();
setUnicodePropertySource(source);
Map labelMap = new HashMap();
//labelMap.put("Lo","L&");
labelMap.put("Lu","L&");
labelMap.put("Lt","L&");
labelMap.put("Ll","L&");
setLabelSource(new UnicodeProperty.FilteredProperty(
source.getProperty("General_Category"),
new UnicodeProperty.MapFilter(labelMap)));
setUnicodePropertyFactory(source);
}
public String join(Object o) {
@ -441,29 +441,38 @@ public class BagFormatter {
private PrintWriter output;
Tabber.MonoTabber myTabber;
String commentSeparator;
int counter;
int valueSize;
int labelSize;
public void doAt(Object c, PrintWriter output) {
this.output = output;
counter = 0;
myTabber = new Tabber.MonoTabber();
int valueSize = valueSource.getMaxWidth(shortValue);
if (valueSize > 0) valueSize += 2;
if (!mergeRanges) {
myTabber.add(6,Tabber.LEFT); // code
if (valueSource != UnicodeProperty.NULL) myTabber.add(2 + valueSize,Tabber.LEFT); // value
myTabber.add(2 + labelSource.getMaxWidth(shortLabel),Tabber.LEFT);
if (showLiteral != null) myTabber.add(4,Tabber.LEFT);
//myTabber.add(4,Tabber.LEFT);
} else {
myTabber.add(13,Tabber.LEFT);
if (valueSource != UnicodeProperty.NULL) myTabber.add(2 + valueSize,Tabber.LEFT); // value
myTabber.add(2 + labelSource.getMaxWidth(shortLabel),Tabber.LEFT);
if (showCount) myTabber.add(8,Tabber.RIGHT);
if (showLiteral != null) myTabber.add(4,Tabber.LEFT);
//myTabber.add(7,Tabber.LEFT);
}
myTabber.add(mergeRanges ? 14 : 6,Tabber.LEFT);
if (propName.length() > 0) myTabber.add(propName.length() + 2,Tabber.LEFT);
valueSize = getValueSource().getMaxWidth(shortValue);
System.out.println("ValueSize: " + valueSize);
if (valueSize > 0) myTabber.add(valueSize + 2,Tabber.LEFT); // value
myTabber.add(3,Tabber.LEFT); // comment character
labelSize = getLabelSource().getMaxWidth(shortLabel);
if (labelSize > 0) myTabber.add(labelSize + 1,Tabber.LEFT); // value
if (mergeRanges && showCount) myTabber.add(5,Tabber.RIGHT);
if (showLiteral != null) myTabber.add(4,Tabber.LEFT);
//myTabber.add(7,Tabber.LEFT);
commentSeparator = (showCount || showLiteral != null
|| labelSource != UnicodeProperty.NULL || nameSource != UnicodeProperty.NULL)
? "\t# " : "";
|| getLabelSource() != UnicodeProperty.NULL || getNameSource() != UnicodeProperty.NULL)
? "\t #" : "";
System.out.println("Tabber: " + myTabber.toString());
System.out.println("Tabber: " + myTabber.process("a\tb\td\td\tf\tg\th"));
doAt(c);
}
@ -479,7 +488,7 @@ public class BagFormatter {
protected void doBefore(Object container, Object o) {
if (showSetAlso && container instanceof UnicodeSet) {
output.print("# " + container + lineSeparator);
output.print("#" + container + lineSeparator);
}
}
@ -487,7 +496,7 @@ public class BagFormatter {
}
protected void doAfter(Object container, Object o) {
output.print(lineSeparator + "# Total code points: " + nf.format(count(container)) + lineSeparator);
output.print(lineSeparator + "# Total code points: " + nf.format(counter));
}
protected void doSimpleAt(Object o) {
@ -500,6 +509,7 @@ public class BagFormatter {
output.print("->");
doAt(value);
doAfter(o, value);
counter++;
} else if (o instanceof Visitor.CodePointRange) {
doAt((Visitor.CodePointRange) o);
} else {
@ -512,59 +522,63 @@ public class BagFormatter {
+ "\t"
+ getName(thing))
+ lineSeparator);
counter++;
}
}
protected void doAt(Visitor.CodePointRange usi) {
if (!mergeRanges) {
for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {
String label = labelSource.getValue(cp, shortLabel);
String value = valueSource.getValue(cp, shortValue);
if (value.length() != 0) {
value = "\t; " + value;
}
output.print(
myTabber.process(
Utility.hex(cp, 4)
+ value
+ commentSeparator
+ label
+ insertLiteral(cp,cp)
+ getName("\t", cp, cp))
+ lineSeparator);
String label = getLabelSource().getValue(cp, shortLabel);
String value = getValue(cp, shortValue);
showLine(cp, cp, label, value);
}
} else {
rf.reset(usi.codepoint, usi.codepointEnd + 1);
while (rf.next()) {
/*
String label = (usi.codepoint != usi.codepointEnd)
? label = getLabels(usi.codepoint, usi.codepointEnd)
: getLabel(usi.codepoint);
*/
int start = rf.start;
int end = rf.limit - 1;
String label = rf.label;
String value = rf.value;
if (value.length() != 0) {
value = "\t; " + value;
}
String count = !showCount ? ""
: end == start ? "\t"
: "\t["+ nf.format(end - start + 1)+ "]";
output.print(
myTabber.process(
hex(start, end)
+ value
+ commentSeparator
+ label
+ count
+ insertLiteral(start, end)
+ getName("\t", start, end))
+ lineSeparator);
showLine(rf.start, rf.limit - 1, rf.label, rf.value);
}
}
}
private void showLine(int start, int end, String label, String value) {
if (value == NULL_VALUE) return;
counter += end - start + 1;
String pn = propName;
if (pn.length() != 0) {
pn = "\t; " + pn;
}
if (valueSize > 0) {
value = "\t; " + value;
} else if (value.length() > 0) {
throw new IllegalArgumentException("maxwidth bogus " + value + "," + getValueSource().getMaxWidth(shortValue));
}
if (labelSize > 0) {
label = "\t" + label;
} else if (label.length() > 0) {
throw new IllegalArgumentException("maxwidth bogus " + label + ", " + getLabelSource().getMaxWidth(shortLabel));
}
String count = "";
if (mergeRanges && showCount) {
if (end == start) count = "\t";
else count = "\t ["+ nf.format(end - start + 1)+ "]";
}
output.print(
myTabber.process(
hex(start, end)
+ pn
+ value
+ commentSeparator
+ label
+ count
+ insertLiteral(start, end)
+ getName("\t ", start, end))
+ lineSeparator);
}
private String insertLiteral(String thing) {
return (showLiteral == null ? ""
: " \t(" + showLiteral.transliterate(thing) + ") ");
@ -648,19 +662,25 @@ public class BagFormatter {
if (limit >= veryLimit)
return false;
start = limit; // set to end of last
label = labelSource.getValue(limit, shortLabel);
value = valueSource.getValue(limit, shortLabel);
label = getLabelSource().getValue(limit, shortLabel);
value = getValue(limit, shortLabel);
limit++;
for (; limit < veryLimit; limit++) {
String s = labelSource.getValue(limit, shortLabel);
String v = valueSource.getValue(limit, shortLabel);
if (!s.equals(label) || !v.equals(value)) break;
String s = getLabelSource().getValue(limit, shortLabel);
String v = getValue(limit, shortLabel);
if (!equalTo(s, label) || !equalTo(v, value)) break;
}
// at this point, limit is the first item that has a different label than source
// OR, we got to the end, and limit == veryLimit
return true;
}
}
boolean equalTo(Object a, Object b) {
if (a == b) return true;
if (a == null) return false;
return a.equals(b);
}
boolean shortLabel = true;
boolean shortValue = true;
@ -692,11 +712,18 @@ public class BagFormatter {
return this;
}
public UnicodeProperty.Factory getSource() {
return source;
}
public UnicodeLabel getLabelSource() {
if (labelSource == null) {
Map labelMap = new HashMap();
//labelMap.put("Lo","L&");
labelMap.put("Lu","L&");
labelMap.put("Lt","L&");
labelMap.put("Ll","L&");
labelSource = new UnicodeProperty.FilteredProperty(
getUnicodePropertyFactory().getProperty("General_Category"),
new UnicodeProperty.MapFilter(labelMap)
).setAllowValueAliasCollisions(true);
}
return labelSource;
}
@ -821,6 +848,9 @@ public class BagFormatter {
* @return
*/
public UnicodeLabel getNameSource() {
if (nameSource == null) {
nameSource = new NameLabel(getUnicodePropertyFactory());
}
return nameSource;
}
@ -837,9 +867,17 @@ public class BagFormatter {
* @return
*/
public UnicodeLabel getValueSource() {
if (valueSource == null) valueSource = UnicodeLabel.NULL;
return valueSource;
}
private String getValue(int cp, boolean shortValue) {
String result = getValueSource().getValue(cp, shortValue);
if (result == null) return NULL_VALUE;
if (hexValue) result = hex(result, " ");
return result;
}
/**
* @param label
*/
@ -868,4 +906,35 @@ public class BagFormatter {
return this;
}
/**
* @return
*/
public String getPropName() {
return propName;
}
/**
* @param string
*/
public BagFormatter setPropName(String string) {
if (string == null) string = "";
propName = string;
return this;
}
/**
* @return
*/
public boolean isHexValue() {
return hexValue;
}
/**
* @param b
*/
public BagFormatter setHexValue(boolean b) {
hexValue = b;
return this;
}
}

View file

@ -6,8 +6,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java,v $
* $Date: 2004/02/12 00:47:30 $
* $Revision: 1.2 $
* $Date: 2004/02/18 03:08:57 $
* $Revision: 1.3 $
*
*****************************************************************************************
*/
@ -45,52 +45,21 @@ import com.ibm.icu.util.VersionInfo;
public class ICUPropertyFactory extends UnicodeProperty.Factory {
public static class RegexMatcher implements UnicodeProperty.Matcher {
private Matcher matcher;
public UnicodeProperty.Matcher set(String pattern) {
matcher = Pattern.compile(pattern).matcher("");
return this;
}
public boolean matches(String value) {
matcher.reset(value);
return matcher.matches();
}
}
static class ICUProperty extends UnicodeProperty {
protected int propEnum = Integer.MIN_VALUE;
protected ICUProperty(String propName, int propEnum) {
this.propEnum = propEnum;
setName(propName);
this.propEnum = propEnum;
setType(internalGetPropertyType(propEnum));
}
boolean shownException = false;
public String _getValue(int codePoint) {
if (propEnum < UProperty.INT_LIMIT) {
int enumValue = -1;
String value = null;
try {
enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
if (enumValue >= 0) value = UCharacter.getPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
} catch (IllegalArgumentException e) {
if (!shownException) {
System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint));
shownException = true;
}
}
return value != null ? value : String.valueOf(enumValue);
} else if (propEnum < UProperty.DOUBLE_LIMIT) {
double num = UCharacter.getUnicodeNumericValue(codePoint);
if (num == UCharacter.NO_NUMERIC_VALUE) return null;
return Double.toString(num);
// TODO: Fix HACK -- API deficient
} else switch(propEnum) {
switch(propEnum) {
case UProperty.AGE: String temp = UCharacter.getAge(codePoint).toString();
if (temp.equals("0.0.0.0")) return "UNSPECIFIED";
if (temp.equals("0.0.0.0")) return "unassigned";
if (temp.endsWith(".0.0")) return temp.substring(0,temp.length()-4);
return temp;
case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(UCharacter.getMirror(codePoint));
@ -119,31 +88,26 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
case isCasefolded: return String.valueOf(UCharacter.foldCase(UTF16.valueOf(codePoint),true).equals(UTF16.valueOf(codePoint)));
case isCased: return String.valueOf(UCharacter.toLowerCase(Locale.ENGLISH,UTF16.valueOf(codePoint)).equals(UTF16.valueOf(codePoint)));
}
return null;
}
public Collection _getAvailableValueAliases(Collection result) {
if (result == null) result = new ArrayList();
if (propEnum < UProperty.INT_LIMIT) {
if (Binary_Extras.isInRange(propEnum)) {
propEnum = UProperty.BINARY_START; // HACK
}
int start = UCharacter.getIntPropertyMinValue(propEnum);
int end = UCharacter.getIntPropertyMaxValue(propEnum);
for (int i = start; i <= end; ++i) {
String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG);
String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT);
if (alias == null) {
alias = alias2;
int enumValue = -1;
String value = null;
try {
enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum);
if (enumValue >= 0) value = fixedGetPropertyValueName(propEnum,enumValue, UProperty.NameChoice.LONG);
} catch (IllegalArgumentException e) {
if (!shownException) {
System.out.println("Fail: " + getName() + ", " + Integer.toHexString(codePoint));
shownException = true;
}
//System.out.println(propertyAlias + "\t" + i + ":\t" + alias);
if (alias != null && !result.contains(alias)) result.add(alias);
}
} else {
String alias = getFixedValueAlias(null, -1,UProperty.NameChoice.LONG);
if (alias != null && !result.contains(alias)) result.add(alias);
return value != null ? value : String.valueOf(enumValue);
} else if (propEnum < UProperty.DOUBLE_LIMIT) {
double num = UCharacter.getUnicodeNumericValue(codePoint);
if (num == UCharacter.NO_NUMERIC_VALUE) return null;
return Double.toString(num);
// TODO: Fix HACK -- API deficient
}
return result;
return null;
}
/**
@ -161,7 +125,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
return "<number>";
}
if (valueAlias != null && !valueAlias.equals("<integer>")) {
valueEnum = UCharacter.getPropertyValueEnum(propEnum,valueAlias);
valueEnum = fixedGetPropertyValueEnum(propEnum,valueAlias);
}
// because these are defined badly, there may be no normal (long) name.
// if there is
@ -171,50 +135,112 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
if (nameChoice == UProperty.NameChoice.LONG) {
result = fixedGetPropertyValueName(propEnum,valueEnum, UProperty.NameChoice.SHORT);
if (result != null) return result;
if (propEnum == UProperty.CANONICAL_COMBINING_CLASS) return null;
return "<integer>";
}
return null;
}
private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
private static int fixedGetPropertyValueEnum(int propEnum, String valueAlias) {
try {
return UCharacter.getPropertyValueName(propEnum,valueEnum,nameChoice);
return UCharacter.getPropertyValueEnum(propEnum, valueAlias);
} catch (Exception e) {
return Integer.parseInt(valueAlias);
}
}
static Map fixSkeleton = new HashMap();
private static String fixedGetPropertyValueName(int propEnum, int valueEnum, int nameChoice) {
try {
String value = UCharacter.getPropertyValueName(propEnum,valueEnum,nameChoice);
String newValue = (String) fixSkeleton.get(value);
if (newValue == null) {
newValue = value;
if (propEnum == UProperty.JOINING_GROUP) {
newValue = newValue.toLowerCase(Locale.ENGLISH);
}
newValue = regularize(newValue, true);
fixSkeleton.put(value, newValue);
}
return newValue;
} catch (Exception e) {
return null;
}
}
public Collection _getAliases(Collection result) {
public List _getNameAliases(List result) {
if (result == null) result = new ArrayList();
String alias = String_Extras.get(propEnum);
if (alias == null) alias = Binary_Extras.get(propEnum);
if (alias != null) {
if (!result.contains(alias)) result.add(alias);
addUnique(alias, result);
} else {
try {
for (int nameChoice = 0; ; ++nameChoice) {
alias = UCharacter.getPropertyName(propEnum, nameChoice);
if (alias == null) break;
if (nameChoice > 2) {
System.out.println("Something wrong");
}
if (!result.contains(alias)) result.add(alias);
}
} catch (IllegalArgumentException e) {
// ok, continue
addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.SHORT), result);
addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.LONG), result);
}
return result;
}
public String getFixedPropertyName(int propName, int nameChoice) {
try {
return UCharacter.getPropertyName(propEnum, nameChoice);
} catch (IllegalArgumentException e) {
return null;
}
}
private Map cccHack = new HashMap();
boolean needCccHack = true;
public List _getAvailableValues(List result) {
if (result == null) result = new ArrayList();
if (propEnum == UProperty.AGE) {
addAllUnique(new String[] {
"unassigned","1.1","2.0","2.1","3.0","3.1","3.2","4.0"},
result);
return result;
}
if (propEnum < UProperty.INT_LIMIT) {
if (Binary_Extras.isInRange(propEnum)) {
propEnum = UProperty.BINARY_START; // HACK
}
int start = UCharacter.getIntPropertyMinValue(propEnum);
int end = UCharacter.getIntPropertyMaxValue(propEnum);
for (int i = start; i <= end; ++i) {
String alias = getFixedValueAlias(null, i, UProperty.NameChoice.LONG);
String alias2 = getFixedValueAlias(null, i, UProperty.NameChoice.SHORT);
if (alias == null) {
alias = alias2;
if (alias == null && propEnum == UProperty.CANONICAL_COMBINING_CLASS) {
alias = String.valueOf(i);
}
}
if (needCccHack && propEnum == UProperty.CANONICAL_COMBINING_CLASS) { // HACK
cccHack.put(alias, String.valueOf(i));
}
//System.out.println(propertyAlias + "\t" + i + ":\t" + alias);
addUnique(alias, result);
}
needCccHack = false;
} else {
String alias = getFixedValueAlias(null, -1,UProperty.NameChoice.LONG);
addUnique(alias, result);
}
return result;
}
public Collection _getValueAliases(String valueAlias, Collection result) {
public List _getValueAliases(String valueAlias, List result) {
if (result == null) result = new ArrayList();
for (int nameChoice = 0; ; ++nameChoice) {
String alias = getFixedValueAlias(valueAlias, -1, nameChoice);
if (nameChoice > 2) break;
if (alias == null) continue;
if (!result.contains(alias)) result.add(alias);
if (propEnum == UProperty.AGE) {
addUnique(valueAlias, result);
return result;
}
if (propEnum == UProperty.CANONICAL_COMBINING_CLASS) {
addUnique(cccHack.get(valueAlias), result); // add number
}
addUnique(getFixedValueAlias(valueAlias, -1, UProperty.NameChoice.SHORT), result);
addUnique(getFixedValueAlias(valueAlias, -1, UProperty.NameChoice.LONG), result);
return result;
}
@ -224,12 +250,16 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
*/
private int internalGetPropertyType(int propEnum) {
switch(propEnum) {
//case UProperty.AGE:
//case UProperty.NAME:
//case UProperty.UNICODE_1_NAME:
case UProperty.AGE:
case UProperty.BLOCK:
case UProperty.SCRIPT:
return UnicodeProperty.CATALOG;
case UProperty.ISO_COMMENT:
case UProperty.NAME:
case UProperty.UNICODE_1_NAME:
return UnicodeProperty.MISC;
case UProperty.BIDI_MIRRORING_GLYPH:
case UProperty.CASE_FOLDING:
case UProperty.ISO_COMMENT:
case UProperty.LOWERCASE_MAPPING:
case UProperty.SIMPLE_CASE_FOLDING:
case UProperty.SIMPLE_LOWERCASE_MAPPING:
@ -237,7 +267,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
case UProperty.SIMPLE_UPPERCASE_MAPPING:
case UProperty.TITLECASE_MAPPING:
case UProperty.UPPERCASE_MAPPING:
return UnicodeProperty.EXTENDED_STRING;
return UnicodeProperty.EXTENDED_STRING;
}
if (propEnum < UProperty.BINARY_START) return UnicodeProperty.UNKNOWN;
if (propEnum < UProperty.BINARY_LIMIT) return UnicodeProperty.BINARY;
@ -312,7 +342,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
;
private ICUPropertyFactory() {
Collection c = getInternalAvailablePropertyAliases(new TreeSet());
Collection c = getInternalAvailablePropertyAliases(new ArrayList());
Iterator it = c.iterator();
while (it.hasNext()) {
add(getInternalProperty((String)it.next()));
@ -327,7 +357,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
return singleton;
}
public Collection getInternalAvailablePropertyAliases(Collection result) {
public List getInternalAvailablePropertyAliases(List result) {
int[][] ranges = {
{UProperty.BINARY_START, UProperty.BINARY_LIMIT},
{UProperty.INT_START, UProperty.INT_LIMIT},
@ -337,6 +367,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
for (int i = 0; i < ranges.length; ++i) {
for (int j = ranges[i][0]; j < ranges[i][1]; ++j) {
String alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG);
UnicodeProperty.addUnique(alias, result);
if (!result.contains(alias)) result.add(alias);
}
}

View file

@ -6,8 +6,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/Tabber.java,v $
* $Date: 2004/02/12 00:47:30 $
* $Revision: 1.4 $
* $Date: 2004/02/18 03:08:57 $
* $Revision: 1.5 $
*
*****************************************************************************************
*/
@ -17,7 +17,8 @@ import java.util.ArrayList;
import java.util.List;
public abstract class Tabber {
static final byte LEFT = 0, CENTER = 1, RIGHT = 2;
public static final byte LEFT = 0, CENTER = 1, RIGHT = 2;
private static final String[] ALIGNMENT_NAMES = {"Left", "Center", "Right"};
/**
* Repeats a string n times
@ -54,18 +55,32 @@ public abstract class Tabber {
public abstract void process_field(int count, String source, int start, int limit, StringBuffer output);
public static class MonoTabber extends Tabber {
int minGap = 0;
private List stops = new ArrayList();
private List types = new ArrayList();
public String toString() {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < stops.size(); ++i) {
if (i != 0) buffer.append("; ");
buffer
.append(ALIGNMENT_NAMES[((Integer)types.get(i)).intValue()])
.append(",")
.append(stops.get(i));
}
return buffer.toString();
}
/**
* Adds tab stop and how to align the text UP TO that stop
* @param tabPos
* @param type
*/
public void addAbsolute(int tabPos, int type) {
public MonoTabber addAbsolute(int tabPos, int type) {
stops.add(new Integer(tabPos));
types.add(new Integer(type));
return this;
}
/**
@ -73,10 +88,11 @@ public abstract class Tabber {
* @param tabPos
* @param type
*/
public void add(int fieldWidth, byte type) {
public MonoTabber add(int fieldWidth, byte type) {
int last = getStop(stops.size()-1);
stops.add(new Integer(last + fieldWidth));
types.add(new Integer(type));
return this;
}
public int getStop(int fieldNumber) {
@ -116,7 +132,7 @@ public abstract class Tabber {
public void process_field(int count, String source, int start, int limit, StringBuffer output) {
String piece = source.substring(start, limit);
int startPos = getStop(count-1);
int endPos = getStop(count) - 1;
int endPos = getStop(count) - minGap;
int type = getType(count);
switch (type) {
case LEFT:
@ -129,11 +145,9 @@ public abstract class Tabber {
break;
}
if (output.length() < startPos) {
output.append(repeat(" ", startPos - output.length()));
} else if (startPos != 0) { // don't do anything on first instance
output.append(" "); // otherwise minimum of first space
}
int gap = startPos - output.length();
if (count != 0 && gap < minGap) gap = minGap;
if (gap > 0) output.append(repeat(" ", gap));
output.append(piece);
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/util/TestBagFormatter.java,v $
* $Date: 2004/02/12 00:47:30 $
* $Revision: 1.8 $
* $Date: 2004/02/18 03:08:57 $
* $Revision: 1.9 $
*
*****************************************************************************************
*/
@ -16,6 +16,7 @@ package com.ibm.icu.dev.test.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.TreeSet;
import java.util.Iterator;
import java.io.IOException;
@ -29,47 +30,78 @@ import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.Transliterator;
import com.ibm.icu.text.UnicodeSet;
// TODO change to use test framework
public class TestBagFormatter {
static final void generatePropertyAliases(boolean showValues) {
generatePropertyAliases(showValues, ICUPropertyFactory.make());
}
static final void generatePropertyAliases(boolean showValues, UnicodeProperty.Factory ups) {
Collator order = Collator.getInstance(Locale.ENGLISH);
UnicodeProperty.Factory ups = ICUPropertyFactory.make();
TreeSet props = new TreeSet(order);
TreeSet values = new TreeSet(order);
Collection aliases = new ArrayList();
BagFormatter bf = new BagFormatter();
ups.getAvailableAliases(props);
Iterator it = props.iterator();
while (it.hasNext()) {
String propAlias = (String)it.next();
UnicodeProperty up = ups.getProperty(propAlias);
System.out.println();
aliases.clear();
System.out.println(bf.join(up.getAliases(aliases)));
if (!showValues) continue;
values.clear();
up.getAvailableValueAliases(values);
Iterator it2 = values.iterator();
while (it2.hasNext()) {
String valueAlias = (String)it2.next();
aliases.clear();
System.out.println("\t" + bf.join(up.getValueAliases(valueAlias, aliases)));
props.addAll(ups.getAvailableNames());
for (int i = UnicodeProperty.BINARY; i < UnicodeProperty.LIMIT_TYPE; ++i) {
System.out.println(UnicodeProperty.getTypeName(i));
Iterator it = props.iterator();
while (it.hasNext()) {
String propAlias = (String)it.next();
UnicodeProperty up = ups.getProperty(propAlias);
int type = up.getType();
if (type != i) continue;
System.out.println();
System.out.println(propAlias + "\t" + bf.join(up.getNameAliases()));
if (!showValues) continue;
values.clear();
if (type == UnicodeProperty.NUMERIC || type == UnicodeProperty.EXTENDED_NUMERIC) {
UnicodeMap um = new UnicodeMap();
um.putAll(up);
System.out.println(um.toString(new NumberComparator()));
continue;
}
values.clear();
values.addAll(up.getAvailableValues());
Iterator it2 = values.iterator();
while (it2.hasNext()) {
String valueAlias = (String)it2.next();
System.out.println("\t" + bf.join(valueAlias + "\t" + up.getValueAliases(valueAlias)));
}
}
}
}
static class NumberComparator implements Comparator {
public int compare(Object o1, Object o2) {
if (o1 == o2) return 0;
if (o1 == null) return 1;
if (o2 == null) return -1;
double n1 = Double.parseDouble((String)o1);
double n2 = Double.parseDouble((String)o2);
return n1 < n2 ? -1 : n1 > n2 ? 1 : 0;
}
}
public static void main(String[] args) throws Exception {
System.out.println("Start");
try {
//readCharacters();
UnicodeProperty prop = ICUPropertyFactory.make().getProperty("Canonicalcombiningclass");
prop.getAvailableValues();
generatePropertyAliases(true);
BagFormatter bf = new BagFormatter();
UnicodeSet us = new UnicodeSet("[:numeric_value=2:]");
UnicodeSet us = new UnicodeSet("[:gc=nd:]");
BagFormatter.CONSOLE.println("[:gc=nd:]");
bf.showSetNames(BagFormatter.CONSOLE,us);
us = new UnicodeSet("[:numeric_value=2:]");
BagFormatter.CONSOLE.println("[:numeric_value=2:]");
bf.showSetNames(BagFormatter.CONSOLE,us);
us = new UnicodeSet("[:numeric_type=numeric:]");
BagFormatter.CONSOLE.println("[:numeric_type=numeric:]");
bf.showSetNames(BagFormatter.CONSOLE,us);

View file

@ -1,9 +1,13 @@
package com.ibm.icu.dev.test.util;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
@ -11,6 +15,8 @@ import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import com.ibm.icu.dev.test.AbstractTestLog;
import com.ibm.icu.dev.test.TestBoilerplate;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
@ -32,15 +38,16 @@ public class TestUtilities extends TestFmwk {
Map map3 = new TreeMap();
UnicodeMap.Equator equator = new UnicodeMap.SimpleEquator();
SortedSet log = new TreeSet();
static String[] TEST_VALUES = {null, "A", "B", "C", "D", "E", "F"};
static Random random = new Random(12345);
public void TestUnicodeMap() {
Random random = new Random(12345);
String[] values = {null, "A", "B", "C", "D", "E", "F"};
random.setSeed(12345);
// do random change to both, then compare
logln("Comparing against HashMap");
for (int counter = 0; counter < ITERATIONS; ++counter) {
int start = random.nextInt(LIMIT);
String value = values[random.nextInt(values.length)];
String value = TEST_VALUES[random.nextInt(TEST_VALUES.length)];
String logline = Utility.hex(start) + "\t" + value;
if (SHOW_PROGRESS) logln(counter + "\t" + logline);
log.add(logline);
@ -67,33 +74,30 @@ public class TestUtilities extends TestFmwk {
Set values1 = (Set) map1.getAvailableValues(new TreeSet());
Set values2 = new TreeSet(map2.values());
Set temp;
if (!values1.equals(values2)) {
errln("Values differ:");
errln("UnicodeMap - HashMap");
temp = new TreeSet(values1);
temp.removeAll(values2);
errln(show(temp));
errln("HashMap - UnicodeMap");
temp = new TreeSet(values2);
temp.removeAll(values1);
errln(show(temp));
} else {
logln("Comparing Sets");
for (Iterator it = values1.iterator(); it.hasNext();) {
Object value = it.next();
logln(value == null ? "null" : value.toString());
UnicodeSet set1 = map1.getSet(value);
UnicodeSet set2 = getSet(map2, value);
if (!set1.equals(set2)) {
errln("Sets differ:");
errln("UnicodeMap - HashMap");
errln(new UnicodeSet(set1).removeAll(set2).toPattern(true));
errln("HashMap - UnicodeMap");
errln(new UnicodeSet(set2).removeAll(set1).toPattern(true));
}
}
if (!TestBoilerplate.verifySetsIdentical(this, values1, values2)) {
throw new IllegalArgumentException("Halting");
}
logln("Comparing Sets");
for (Iterator it = values1.iterator(); it.hasNext();) {
Object value = it.next();
logln(value == null ? "null" : value.toString());
UnicodeSet set1 = map1.getSet(value);
UnicodeSet set2 = TestBoilerplate.getSet(map2, value);
if (!TestBoilerplate.verifySetsIdentical(this, set1, set2)) {
throw new IllegalArgumentException("Halting");
}
}
// check boilerplate
List argList = new ArrayList();
argList.add("TestMain");
if (params.nothrow) argList.add("-nothrow");
if (params.verbose) argList.add("-verbose");
String[] args = new String[argList.size()];
argList.toArray(args);
new UnicodeMapBoilerplate().run(args);
// TODO: the following is not being reached
new UnicodeSetBoilerplate().run(args);
}
public void check(int counter) {
@ -105,9 +109,8 @@ public class TestUtilities extends TestFmwk {
+ "\t UnicodeMap: " + value1
+ "\t HashMap: " + value2);
errln("UnicodeMap: " + map1);
errln("Log: " + show(log));
errln("HashMap: " + show(map2));
throw new IllegalArgumentException("Halting");
errln("Log: " + TestBoilerplate.show(log));
errln("HashMap: " + TestBoilerplate.show(map2));
}
}
}
@ -189,31 +192,78 @@ public class TestUtilities extends TestFmwk {
return (end-start)/1000/iterations;
}
String show(Collection c) {
StringBuffer buffer = new StringBuffer();
for (Iterator it = c.iterator(); it.hasNext();) {
buffer.append(it.next() + "\r\n");
static class UnicodeMapBoilerplate extends TestBoilerplate {
/*
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
*/
protected boolean _hasSameBehavior(Object a, Object b) {
// we are pretty confident in the equals method, so won't bother with this right now.
return true;
}
/*
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
*/
protected boolean _addTestObject(List list) {
if (list.size() > 30) return false;
UnicodeMap result = new UnicodeMap();
for (int i = 0; i < 50; ++i) {
int start = random.nextInt(25);
String value = TEST_VALUES[random.nextInt(TEST_VALUES.length)];
result.put(start, value);
}
list.add(result);
return true;
}
return buffer.toString();
}
String show(Map m) {
StringBuffer buffer = new StringBuffer();
for (Iterator it = m.keySet().iterator(); it.hasNext();) {
Object key = it.next();
buffer.append(key + "=>" + m.get(key) + "\r\n");
static class StringBoilerplate extends TestBoilerplate {
/*
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
*/
protected boolean _hasSameBehavior(Object a, Object b) {
// we are pretty confident in the equals method, so won't bother with this right now.
return true;
}
/*
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
*/
protected boolean _addTestObject(List list) {
if (list.size() > 31) return false;
StringBuffer result = new StringBuffer();
for (int i = 0; i < 10; ++i) {
result.append((char)random.nextInt(0xFF));
}
list.add(result.toString());
return true;
}
return buffer.toString();
}
UnicodeSet getSet(Map m, Object value) {
UnicodeSet result = new UnicodeSet();
for (Iterator it = m.keySet().iterator(); it.hasNext();) {
Object key = it.next();
Object val = m.get(key);
if (!val.equals(value)) continue;
result.add(((Integer)key).intValue());
static class UnicodeSetBoilerplate extends TestBoilerplate {
/*
* @see com.ibm.icu.dev.test.TestBoilerplate#_hasSameBehavior(java.lang.Object, java.lang.Object)
*/
protected boolean _hasSameBehavior(Object a, Object b) {
// we are pretty confident in the equals method, so won't bother with this right now.
return true;
}
/*
* @see com.ibm.icu.dev.test.TestBoilerplate#_createTestObject()
*/
protected boolean _addTestObject(List list) {
if (list.size() > 32) return false;
UnicodeSet result = new UnicodeSet();
for (int i = 0; i < 50; ++i) {
result.add(random.nextInt(100));
}
list.add(result.toString());
return true;
}
return result;
}
}

View file

@ -21,15 +21,18 @@ public abstract class UnicodeLabel {
public int getMaxWidth(boolean isShort) {
return 0;
}
private static class Hex extends UnicodeLabel {
public String getValue(int codepoint, boolean isShort) {
if (isShort) return Utility.hex(codepoint,4);
return "U+" + Utility.hex(codepoint,4);
}
}
public static class Constant extends UnicodeLabel {
private String value;
public Constant(String value) {
if (value == null) value = "";
this.value = value;
}
public String getValue(int codepoint, boolean isShort) {

View file

@ -1,9 +1,13 @@
package com.ibm.icu.dev.test.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.UnicodeSet;
@ -14,7 +18,7 @@ import com.ibm.icu.text.UnicodeSetIterator;
* @author Davis
*/
// TODO Optimize using range map
public final class UnicodeMap {
public final class UnicodeMap implements Cloneable {
static final boolean ASSERTIONS = false;
static final long GROWTH_PERCENT = 200; // 100 is no growth!
static final long GROWTH_GAP = 10; // extra bump!
@ -23,8 +27,48 @@ public final class UnicodeMap {
private int[] transitions = {0,0x110000,0,0,0,0,0,0,0,0};
private Object[] values = new Object[10];
{
values[1] = "TERMINAL";
values[1] = "TERMINAL"; // just for debugging
}
private int lastIndex = 0;
/* Boilerplate */
public boolean equals(Object other) {
if (other == null) return false;
try {
UnicodeMap that = (UnicodeMap) other;
if (length != that.length || !equator.equals(that.equator)) return false;
for (int i = 0; i < length-1; ++i) {
if (transitions[i] != that.transitions[i]) return false;
if (!equator.isEqual(values[i], that.values[i])) return false;
}
return true;
} catch (ClassCastException e) {
return false;
}
}
public int hashCode() {
int result = length;
// TODO might want to abbreviate this for speed.
for (int i = 0; i < length-1; ++i) {
result = 37*result + transitions[i];
result = 37*result + equator.getHashCode(values[i]);
}
return result;
}
/**
* Standard clone. Warning, as with Collections, does not do deep clone.
*/
public Object clone() {
UnicodeMap that = new UnicodeMap();
that.length = length;
that.transitions = (int[]) transitions.clone();
that.values = (Object[]) values.clone();
return that;
}
/* for internal consistency checking */
void _checkInvariants() {
if (length < 2
@ -62,17 +106,28 @@ public final class UnicodeMap {
* @return
*/
public boolean isEqual(Object a, Object b);
/**
* @param object
* @return
*/
public int getHashCode(Object object);
}
public static class SimpleEquator implements Equator {
public static final class SimpleEquator implements Equator {
public boolean isEqual(Object a, Object b) {
if (a == b) return true;
if (a == null || b == null) return false;
return a.equals(b);
}
public int getHashCode(Object a) {
if (a == null) return 0;
return a.hashCode();
}
}
private Equator equator = new SimpleEquator();
private static Equator SIMPLE = new SimpleEquator();
private Equator equator = SIMPLE;
/**
* Finds an index such that inversionList[i] <= codepoint < inversionList[i+1]
* Assumes that 0 <= codepoint <= 0x10FFFF
@ -181,41 +236,53 @@ public final class UnicodeMap {
* @return this, for chaining
*/
private UnicodeMap _put(int codepoint, Object value) {
int baseIndex = _findIndex(codepoint);
// Warning: baseIndex is an invariant; must
// be defined such that transitions[baseIndex] < codepoint
// at end of this routine.
int baseIndex;
if (transitions[lastIndex] <= codepoint
&& codepoint < transitions[lastIndex+1]) {
baseIndex = lastIndex;
} else {
baseIndex = _findIndex(codepoint);
}
int limitIndex = baseIndex + 1;
// cases are (a) value is already set
if (equator.isEqual(values[baseIndex], value)) return this;
int baseCP = transitions[baseIndex];
int limitCP = transitions[limitIndex];
// CASE: At very start of range
// we now start walking through the difference case,
// based on whether we are at the start or end of range
// and whether the range is a single character or multiple
if (baseCP == codepoint) {
// CASE: At very start of range
boolean connectsWithPrevious =
baseIndex != 0 && equator.isEqual(value, values[baseIndex-1]);
// CASE: Single codepoint range
if (limitCP == codepoint + 1) {
// CASE: Single codepoint range
boolean connectsWithFollowing =
baseIndex < length - 1 && equator.isEqual(value, values[limitIndex]);
// A1a connects with previous & following, so remove index
if (connectsWithPrevious) {
// A1a connects with previous & following, so remove index
if (connectsWithFollowing) {
_removeAt(baseIndex, 2);
return this;
} else {
_removeAt(baseIndex, 1); // extend previous
}
_removeAt(baseIndex, 1); // extend previous
return this;
--baseIndex; // fix up
} else if (connectsWithFollowing) {
_removeAt(baseIndex, 1); // extend following backwards
transitions[baseIndex] = codepoint;
return this;
} else {
// doesn't connect on either side, just reset
values[baseIndex] = value;
}
// doesn't connect on either side, just reset
values[baseIndex] = value;
return this;
}
} else if (connectsWithPrevious) {
// A.1: start of multi codepoint range
// if connects
if (connectsWithPrevious) {
++transitions[baseIndex]; // extend previous
} else {
// otherwise insert new transition
@ -224,10 +291,8 @@ public final class UnicodeMap {
values[baseIndex] = value;
transitions[baseIndex] = codepoint;
}
return this;
}
// CASE: at end of range
if (limitCP == codepoint + 1) {
} else if (limitCP == codepoint + 1) {
// CASE: at end of range
// if connects, just back up range
boolean connectsWithFollowing =
baseIndex < length - 1 && equator.isEqual(value, values[limitIndex]);
@ -240,14 +305,16 @@ public final class UnicodeMap {
transitions[limitIndex] = codepoint;
values[limitIndex] = value;
}
return this;
} else {
// CASE: in middle of range
// insert gap, then set the new range
_insertGapAt(++baseIndex,2);
transitions[baseIndex] = codepoint;
values[baseIndex] = value;
transitions[baseIndex+1] = codepoint + 1;
values[baseIndex+1] = values[baseIndex-1]; // copy lower range values
}
// CASE: in middle of range
_insertGapAt(++baseIndex,2);
transitions[baseIndex] = codepoint;
values[baseIndex] = value;
transitions[++baseIndex] = codepoint + 1;
values[baseIndex] = values[baseIndex-2]; // copy lower range values
lastIndex = baseIndex; // store for next time
return this;
}
/**
@ -331,7 +398,9 @@ public final class UnicodeMap {
public UnicodeSet getSet(Object value, UnicodeSet result) {
if (result == null) result = new UnicodeSet();
for (int i = 0; i < length - 1; ++i) {
if (values[i] == value) result.add(transitions[i], transitions[i+1]-1);
if (equator.isEqual(value, values[i])) {
result.add(transitions[i], transitions[i+1]-1);
}
}
return result;
}
@ -339,14 +408,15 @@ public final class UnicodeMap {
return getSet(value,null);
}
/**
* Returns the list of possible values. Deposits into
* result if it is not null. Remember to clear if you just want
* Returns the list of possible values. Deposits each non-null value into
* result. Creates result if it is null. Remember to clear result if
* you are not appending to existing collection.
* @param result
* @return
*/
public Collection getAvailableValues(Collection result) {
if (result == null) result = new HashSet();
for (int i = 0; i < length - 1; ++i) {
if (result == null) result = new ArrayList(1);
for (int i = 0; i < length - 1; ++i) {
Object value = values[i];
if (value == null) continue;
if (result.contains(value)) continue;
@ -354,6 +424,13 @@ public final class UnicodeMap {
}
return result;
}
/**
* Convenience method
*/
public Collection getAvailableValues() {
return getAvailableValues(null);
}
/**
* Gets the value associated with a given code point.
* Returns null, if there is no such value.
@ -368,18 +445,33 @@ public final class UnicodeMap {
}
public String toString() {
return toString(null);
}
public String toString(Comparator collected) {
StringBuffer result = new StringBuffer();
for (int i = 0; i < length-1; ++i) {
Object value = values[i];
if (value == null) continue;
int start = transitions[i];
int end = transitions[i+1]-1;
result.append(Utility.hex(start));
if (start != end) result.append("..")
.append(Utility.hex(end));
result.append("\t=>")
.append(values[i] == null ? "null" : values[i].toString())
.append("\r\n");
if (collected == null) {
for (int i = 0; i < length-1; ++i) {
Object value = values[i];
if (value == null) continue;
int start = transitions[i];
int end = transitions[i+1]-1;
result.append(Utility.hex(start));
if (start != end) result.append("..")
.append(Utility.hex(end));
result.append("\t=> ")
.append(values[i] == null ? "null" : values[i].toString())
.append("\r\n");
}
} else {
Set set = (Set) getAvailableValues(new TreeSet(collected));
for (Iterator it = set.iterator(); it.hasNext();) {
Object value = it.next();
UnicodeSet s = getSet(value);
result.append(value)
.append("\t=> ")
.append(s.toPattern(true))
.append("\r\n");
}
}
return result.toString();
}

View file

@ -14,26 +14,41 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import sun.io.UnknownCharacterException;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
public abstract class UnicodeProperty extends UnicodeLabel {
public static boolean DEBUG = false;
public static boolean DEBUG = true;
public static String CHECK_NAME = "FC_NFKC_Closure";
public static int CHECK_VALUE = 0x037A;
private String propertyAlias;
private String shortestPropertyAlias = null;
private String name;
private String firstNameAlias = null;
private int type;
private Map valueToShortValue = null;
private Map valueToFirstValueAlias = null;
public static final int UNKNOWN = 0,
BINARY = 2, EXTENDED_BINARY = 3,
ENUMERATED = 4, EXTENDED_ENUMERATED = 5,
NUMERIC = 6, EXTENDED_NUMERIC = 7,
STRING = 8, EXTENDED_STRING = 9,
LIMIT_TYPE = 10,
EXTENDED_BIT = 1;
CATALOG = 6, EXTENDED_CATALOG = 7,
MISC = 8, EXTENDED_MISC = 9,
STRING = 10, EXTENDED_STRING = 11,
NUMERIC = 12, EXTENDED_NUMERIC = 13,
START_TYPE = 2,
LIMIT_TYPE = 14,
EXTENDED_MASK = 1,
CORE_MASK = ~EXTENDED_MASK,
BINARY_MASK = (1<<BINARY) | (1<<EXTENDED_BINARY),
STRING_OR_MISC_MASK = (1<<STRING) | (1<<EXTENDED_STRING)
| (1<<MISC) | (1<<EXTENDED_MISC),
ENUMERATED_OR_CATALOG_MASK = (1<<ENUMERATED) | (1<<EXTENDED_ENUMERATED)
| (1<<CATALOG) | (1<<EXTENDED_CATALOG);
private static final String[] TYPE_NAMES = {
"Unknown",
@ -42,10 +57,14 @@ public abstract class UnicodeProperty extends UnicodeLabel {
"Extended Binary",
"Enumerated",
"Extended Enumerated",
"Numeric",
"Extended Numeric",
"Catalog",
"Extended Catalog",
"Miscellaneous",
"Extended Miscellaneous",
"String",
"Extended String",
"Numeric",
"Extended Numeric",
};
public static String getTypeName(int propType) {
@ -53,15 +72,20 @@ public abstract class UnicodeProperty extends UnicodeLabel {
}
public final String getName() {
return propertyAlias;
return name;
}
public final int getType() {
return type;
}
public final boolean isType(int mask) {
return ((1<<type) & mask) != 0;
}
protected final void setName(String string) {
propertyAlias = string;
if (string == null) throw new IllegalArgumentException("Name must not be null");
name = string;
}
protected final void setType(int i) {
@ -72,48 +96,59 @@ public abstract class UnicodeProperty extends UnicodeLabel {
return _getVersion();
}
public String getValue(int codepoint) {
if (DEBUG && CHECK_VALUE == codepoint && CHECK_NAME.equals(getName())) {
String value = _getValue(codepoint);
System.out.println(getName() + "(" + Utility.hex(codepoint) + "):" +
(getType() == STRING ? Utility.hex(value) : value));
return value;
}
return _getValue(codepoint);
}
public Collection getAliases(Collection result) {
return _getAliases(result);
public List getNameAliases(List result) {
if (result == null) result = new ArrayList(1);
return _getNameAliases(result);
}
public Collection getValueAliases(String valueAlias, Collection result) {
public List getValueAliases(String valueAlias, List result) {
if (result == null) result = new ArrayList(1);
result = _getValueAliases(valueAlias, result);
if (!result.contains(valueAlias) && type < NUMERIC) {
throw new IllegalArgumentException(
"Internal error: result doesn't contain " + valueAlias);
"Internal error: " + getName() + " doesn't contain " + valueAlias
+ ": " + new BagFormatter().join(result));
}
return result;
}
public Collection getAvailableValueAliases(Collection result) {
return _getAvailableValueAliases(result);
public List getAvailableValues(List result) {
if (result == null) result = new ArrayList(1);
return _getAvailableValues(result);
}
protected abstract String _getVersion();
protected abstract String _getValue(int codepoint);
protected abstract Collection _getAliases(Collection result);
protected abstract Collection _getValueAliases(String valueAlias, Collection result);
protected abstract Collection _getAvailableValueAliases(Collection result);
protected abstract List _getNameAliases(List result);
protected abstract List _getValueAliases(String valueAlias, List result);
protected abstract List _getAvailableValues(List result);
// conveniences
public final Collection getAliases() {
return _getAliases(null);
public final List getNameAliases() {
return getNameAliases(null);
}
public final Collection getValueAliases(String valueAlias) {
return _getValueAliases(valueAlias, null);
public final List getValueAliases(String valueAlias) {
return getValueAliases(valueAlias, null);
}
public final Collection getAvailableValueAliases() {
return _getAvailableValueAliases(null);
public final List getAvailableValues() {
return getAvailableValues(null);
}
static public class Factory {
Map canonicalNames = new TreeMap();
Map skeletonNames = new TreeMap();
Map propertyCache = new HashMap();
Map propertyCache = new HashMap(1);
public final Factory add(UnicodeProperty sp) {
canonicalNames.put(sp.getName(), sp);
Collection c = sp.getAliases(new TreeSet());
List c = sp.getNameAliases(new ArrayList(1));
Iterator it = c.iterator();
while (it.hasNext()) {
skeletonNames.put(toSkeleton((String)it.next()), sp);
@ -125,23 +160,34 @@ public abstract class UnicodeProperty extends UnicodeLabel {
return (UnicodeProperty) skeletonNames.get(toSkeleton(propertyAlias));
}
public final Collection getAvailableAliases(Collection result) {
if (result == null) result = new ArrayList();
public final List getAvailableNames() {
return getAvailableNames(null);
}
public final List getAvailableNames(List result) {
if (result == null) result = new ArrayList(1);
Iterator it = canonicalNames.keySet().iterator();
while (it.hasNext()) {
addUnique(it.next(), result);
}
return result;
}
public final Collection getAvailableAliases() {
return getAvailableAliases(null);
}
public final Collection getAvailablePropertyAliases(Collection result, int propertyTypeMask) {
public final List getAvailableNames(int propertyTypeMask) {
return getAvailableNames(propertyTypeMask, null);
}
public final List getAvailableNames(int propertyTypeMask, List result) {
if (result == null) result = new ArrayList(1);
Iterator it = canonicalNames.keySet().iterator();
while (it.hasNext()) {
UnicodeProperty property = (UnicodeProperty)it.next();
if (((1<<property.getType())& propertyTypeMask) == 0) continue;
String item = (String)it.next();
UnicodeProperty property = getProperty(item);
if (DEBUG) System.out.println("Properties: " + item + "," + property.getType());
if (!property.isType(propertyTypeMask)) {
//System.out.println("Masking: " + property.getType() + "," + propertyTypeMask);
continue;
}
addUnique(property.getName(), result);
}
return result;
@ -164,7 +210,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
UnicodeProperty up = getProperty(prop);
if (matcher == null) {
matcher = new SimpleMatcher(value,
up.getType() >= STRING ? null : new SkeletonComparator());
up.isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR);
}
if (negative) {
inverseMatcher.set(matcher);
@ -186,6 +232,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
protected StringFilter filter;
protected UnicodeSetIterator matchIterator = new UnicodeSetIterator(new UnicodeSet(0,0x10FFFF));
protected HashMap backmap;
boolean allowValueAliasCollisions = false;
public FilteredProperty(UnicodeProperty property, StringFilter filter) {
this.property = property;
@ -201,52 +248,63 @@ public abstract class UnicodeProperty extends UnicodeLabel {
return this;
}
Collection temp = new ArrayList();
List temp = new ArrayList(1);
public Collection _getAvailableValueAliases(Collection result) {
public List _getAvailableValues(List result) {
temp.clear();
return filter.addUnique(property.getAvailableValueAliases(temp), result);
return filter.addUnique(property.getAvailableValues(temp), result);
}
public Collection _getAliases(Collection result) {
public List _getNameAliases(List result) {
temp.clear();
return filter.addUnique(
property.getAliases(temp), result);
property.getNameAliases(temp), result);
}
public String _getValue(int codepoint) {
return filter.remap(property.getValue(codepoint));
}
public Collection _getValueAliases(String valueAlias, Collection result) {
temp.clear();
public List _getValueAliases(String valueAlias, List result) {
if (backmap == null) {
backmap = new HashMap();
backmap = new HashMap(1);
temp.clear();
Iterator it = property.getAvailableValueAliases(temp).iterator();
Iterator it = property.getAvailableValues(temp).iterator();
while (it.hasNext()) {
String item = (String) it.next();
String mappedItem = filter.remap(item);
if (backmap.get(mappedItem) != null) {
throw new IllegalArgumentException("Filter makes values collide!");
if (backmap.get(mappedItem) != null && !allowValueAliasCollisions) {
throw new IllegalArgumentException("Filter makes values collide! "
+ item + ", " + mappedItem);
}
backmap.put(mappedItem, item);
}
}
return filter.addUnique(
property.getValueAliases((String) backmap.get(valueAlias), temp), result);
valueAlias = (String) backmap.get(valueAlias);
temp.clear();
return filter.addUnique(property.getValueAliases(valueAlias, temp), result);
}
public String _getVersion() {
return property.getVersion();
}
public boolean isAllowValueAliasCollisions() {
return allowValueAliasCollisions;
}
public FilteredProperty setAllowValueAliasCollisions(boolean b) {
allowValueAliasCollisions = b;
return this;
}
}
public static abstract class StringFilter implements Cloneable {
public abstract String remap(String original);
public final Collection addUnique(Collection source, Collection result) {
if (result == null) result = new ArrayList();
public final List addUnique(Collection source, List result) {
if (result == null) result = new ArrayList(1);
Iterator it = source.iterator();
while (it.hasNext()) {
UnicodeProperty.addUnique(
@ -322,123 +380,139 @@ public abstract class UnicodeProperty extends UnicodeLabel {
}
public static abstract class SimpleProperty extends UnicodeProperty {
private String shortAlias;
Collection valueAliases = new ArrayList();
Map toAlternates = new HashMap();
private List propertyAliases = new ArrayList(1);
List values;
Map toValueAliases = new HashMap(1);
String version;
public SimpleProperty setMain(String alias, String shortAlias, int propertyType,
String version) {
setName(alias);
setType(propertyType);
this.shortAlias = shortAlias;
propertyAliases.add(shortAlias);
propertyAliases.add(alias);
this.version = version;
return this;
}
public SimpleProperty addName(String alias) {
propertyAliases.add(alias);
return this;
}
public SimpleProperty setValues(String valueAlias) {
setValues(new String[]{valueAlias}, null);
_addToValues(valueAlias, null);
return this;
}
public SimpleProperty setValues(String[] valueAliases, String[] alternateValueAliases) {
this.valueAliases = Arrays.asList((Object[]) valueAliases.clone());
for (int i = 0; i < valueAliases.length; ++i) {
List a = new ArrayList();
addUnique(valueAliases[i],a);
if (alternateValueAliases != null) addUnique(alternateValueAliases[i],a);
toAlternates.put(valueAliases[i], a);
if (valueAliases[i].equals(UNUSED)) continue;
_addToValues(valueAliases[i],
alternateValueAliases != null ? alternateValueAliases[i] : null);
}
return this;
}
public SimpleProperty setValues(Collection valueAliases) {
this.valueAliases = new ArrayList(valueAliases);
for (Iterator it = this.valueAliases.iterator(); it.hasNext(); ) {
Object item = it.next();
List list = new ArrayList();
list.add(item);
toAlternates.put(item, list);
public SimpleProperty setValues(List valueAliases) {
this.values = new ArrayList(valueAliases);
for (Iterator it = this.values.iterator(); it.hasNext(); ) {
_addToValues(it.next(), null);
}
return this;
}
public Collection _getAliases(Collection result) {
if (result == null) result = new ArrayList();
addUnique(getName(), result);
addUnique(shortAlias, result);
public List _getNameAliases(List result) {
addAllUnique(propertyAliases, result);
return result;
}
public Collection _getValueAliases(String valueAlias, Collection result) {
if (result == null) result = new ArrayList();
Collection a = (Collection) toAlternates.get(valueAlias);
public List _getValueAliases(String valueAlias, List result) {
if (toValueAliases == null) _fillValues();
List a = (List) toValueAliases.get(valueAlias);
if (a != null) addAllUnique(a, result);
return result;
}
public Collection _getAvailableValueAliases(Collection result) {
if (result == null) result = new ArrayList();
result.addAll(valueAliases);
public List _getAvailableValues(List result) {
if (values == null) _fillValues();
result.addAll(values);
return result;
}
private void _fillValues() {
List newvalues = (List) getUnicodeMap().getAvailableValues(new ArrayList());
for (Iterator it = newvalues.iterator(); it.hasNext();) {
_addToValues(it.next(), null);
}
}
private void _addToValues(Object item, Object alias) {
if (values == null) values = new ArrayList(1);
addUnique(item, values);
List aliases = (List) toValueAliases.get(item);
if (aliases == null) {
aliases = new ArrayList(1);
toValueAliases.put(item, aliases);
}
addUnique(alias, aliases);
addUnique(item, aliases);
}
public String _getVersion() {
return version;
}
}
public final String getValue(int codepoint, boolean getShortest) {
String result = getValue(codepoint);
if (!getShortest || result == null) return result;
return getShortestValueAlias(result);
if (type >= MISC || result == null || !getShortest) return result;
return getFirstValueAlias(result);
}
public final String getShortestValueAlias(String value) {
if (valueToShortValue == null) getValueCache();
return (String)valueToShortValue.get(value);
}
public final String getShortestAlias() {
if (shortestPropertyAlias == null) {
shortestPropertyAlias = propertyAlias;
for (Iterator it = _getAliases(null).iterator(); it.hasNext();) {
String item = (String) it.next();
if (item.length() < shortestPropertyAlias.length()) {
shortestPropertyAlias = item;
}
}
public final String getFirstNameAlias() {
if (firstNameAlias == null) {
firstNameAlias = (String) getNameAliases().get(0);
}
return shortestPropertyAlias;
return firstNameAlias;
}
private void getValueCache() {
public final String getFirstValueAlias(String value) {
if (valueToFirstValueAlias == null) _getFirstValueAliasCache();
return (String)valueToFirstValueAlias.get(value);
}
private void _getFirstValueAliasCache() {
maxValueWidth = 0;
maxShortestValueWidth = 0;
valueToShortValue = new HashMap();
Iterator it = getAvailableValueAliases(null).iterator();
maxFirstValueAliasWidth = 0;
valueToFirstValueAlias = new HashMap(1);
Iterator it = getAvailableValues().iterator();
while (it.hasNext()) {
String value = (String)it.next();
String shortest = value;
Iterator it2 = getValueAliases(value, null).iterator();
while (it2.hasNext()) {
String other = (String)it2.next();
if (shortest.length() > other.length()) shortest = other;
String first = (String) getValueAliases(value).get(0);
if (first == null) { // internal error
throw new IllegalArgumentException("Value not in value aliases: " + value);
}
valueToShortValue.put(value,shortest);
if (value.length() > maxValueWidth) maxValueWidth = value.length();
if (shortest.length() > maxShortestValueWidth) maxShortestValueWidth = shortest.length();
if (DEBUG && CHECK_NAME.equals(getName())) {
System.out.println("First Alias: " + getName() + ": " + value + " => "
+ first + new BagFormatter().join(getValueAliases(value)));
}
valueToFirstValueAlias.put(value,first);
if (value.length() > maxValueWidth) {
maxValueWidth = value.length();
}
if (first.length() > maxFirstValueAliasWidth) {
maxFirstValueAliasWidth = first.length();
}
}
}
private int maxValueWidth = -1;
private int maxShortestValueWidth = -1;
private int maxFirstValueAliasWidth = -1;
public final int getMaxWidth(boolean getShortest) {
if (maxValueWidth < 0) getValueCache();
if (getShortest) return maxShortestValueWidth;
public int getMaxWidth(boolean getShortest) {
if (maxValueWidth < 0) _getFirstValueAliasCache();
if (getShortest) return maxFirstValueAliasWidth;
return maxValueWidth;
}
@ -450,17 +524,18 @@ public abstract class UnicodeProperty extends UnicodeLabel {
}
public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
int type = getType();
return getSet(new SimpleMatcher(propertyValue,
type >= STRING ? null : new SkeletonComparator()),
isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
result);
}
private UnicodeMap cacheValueToSet = null;
private UnicodeMap unicodeMap = null;
public static final String UNUSED = "??";
public final UnicodeSet getSet(Matcher matcher, UnicodeSet result) {
if (result == null) result = new UnicodeSet();
if (type >= STRING) {
if (isType(STRING_OR_MISC_MASK)) {
for (int i = 0; i <= 0x10FFFF; ++i) {
String value = getValue(i);
if (matcher.matches(value)) {
@ -469,9 +544,9 @@ public abstract class UnicodeProperty extends UnicodeLabel {
}
return result;
}
if (cacheValueToSet == null) cacheValueToSet = _getUnicodeMap();
Collection temp = new HashSet(); // to avoid reallocating...
Iterator it = cacheValueToSet.getAvailableValues(null).iterator();
List temp = new ArrayList(1); // to avoid reallocating...
UnicodeMap um = getUnicodeMap();
Iterator it = um.getAvailableValues(null).iterator();
main:
while (it.hasNext()) {
String value = (String)it.next();
@ -479,10 +554,10 @@ public abstract class UnicodeProperty extends UnicodeLabel {
Iterator it2 = getValueAliases(value,temp).iterator();
while (it2.hasNext()) {
String value2 = (String)it2.next();
System.out.println("Values:" + value2);
//System.out.println("Values:" + value2);
if (matcher.matches(value2)
|| matcher.matches(toSkeleton(value2))) {
cacheValueToSet.getSet(value, result);
um.getSet(value, result);
continue main;
}
}
@ -490,21 +565,6 @@ public abstract class UnicodeProperty extends UnicodeLabel {
return result;
}
protected UnicodeMap _getUnicodeMap() {
UnicodeMap result = new UnicodeMap();
for (int i = 0; i <= 0x10FFFF; ++i) {
if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
result.put(i, getValue(i));
}
if (DEBUG) {
System.out.println(getName() + ":\t" + getClass().getName()
+ "\t" + getVersion());
System.out.println(getStack());
System.out.println(result);
}
return result;
}
/*
public UnicodeSet getMatchSet(UnicodeSet result) {
if (result == null) result = new UnicodeSet();
@ -516,7 +576,10 @@ public abstract class UnicodeProperty extends UnicodeLabel {
matchIterator = new UnicodeSetIterator(set);
}
*/
/**
* Utility for debugging
*/
public static String getStack() {
Exception e = new Exception();
StringWriter sw = new StringWriter();
@ -526,32 +589,71 @@ public abstract class UnicodeProperty extends UnicodeLabel {
return "Showing Stack with fake " + sw.getBuffer().toString();
}
public static Collection addUnique(Object obj, Collection result) {
if (obj != null && !result.contains(obj)) result.add(obj);
return result;
}
public static Collection addAllUnique(Collection source, Collection result) {
Iterator it = source.iterator();
while (it.hasNext()) {
Object obj = it.next();
if (obj != null && !result.contains(obj)) result.add(obj);
// TODO use this instead of plain strings
public static class Name implements Comparable {
private static Map skeletonCache;
private String skeleton;
private String pretty;
public final int RAW = 0, TITLE = 1, NORMAL = 2;
public Name(String name, int style) {
if (name == null) name = "";
if (style == RAW) {
skeleton = pretty = name;
} else {
pretty = regularize(name, style == TITLE);
skeleton = toSkeleton(pretty);
}
}
return result;
public int compareTo(Object o) {
return skeleton.compareTo(((Name)o).skeleton);
}
public boolean equals(Object o) {
return skeleton.equals(((Name)o).skeleton);
}
public int hashCode() {
return skeleton.hashCode();
}
public String toString() {
return pretty;
}
}
public static class SkeletonComparator implements Comparator {
/**
* Utility for managing property & non-string value aliases
*/
public static final Comparator PROPERTY_COMPARATOR = new Comparator() {
public int compare(Object o1, Object o2) {
// TODO optimize
if (o1 == o2) return 0;
if (o1 == null) return -1;
if (o2 == null) return 1;
return toSkeleton((String)o1).compareTo(toSkeleton((String)o2));
return compareNames((String)o1, (String)o2);
}
};
/**
* Utility for managing property & non-string value aliases
*
*/
// TODO optimize
public static boolean equalNames(String a, String b) {
if (a == b) return true;
if (a == null) return false;
return toSkeleton(a).equals(toSkeleton(b));
}
private static String toSkeleton(String source) {
/**
* Utility for managing property & non-string value aliases
*/
// TODO optimize
public static int compareNames(String a, String b) {
if (a == b) return 0;
if (a == null) return -1;
if (b == null) return 1;
return toSkeleton(a).compareTo(toSkeleton(b));
}
/**
* Utility for managing property & non-string value aliases
*/
// TODO account for special names, tibetan, hangul
public static String toSkeleton(String source) {
if (source == null) return null;
StringBuffer skeletonBuffer = new StringBuffer();
boolean gotOne = false;
// remove spaces, '_', '-'
@ -574,6 +676,44 @@ public abstract class UnicodeProperty extends UnicodeLabel {
return skeletonBuffer.toString();
}
/**
* These routines use the Java functions, because they only need to act on ASCII
* Changes space, - into _, inserts _ between lower and UPPER.
*/
public static String regularize(String source, boolean titlecaseStart) {
if (source == null) return source;
/*if (source.equals("noBreak")) { // HACK
if (titlecaseStart) return "NoBreak";
return source;
}
*/
StringBuffer result = new StringBuffer();
int lastCat = -1;
boolean haveFirstCased = true;
for (int i = 0; i < source.length(); ++i) {
char c = source.charAt(i);
if (c == ' ' || c == '-' || c == '_') {
c = '_';
haveFirstCased = true;
}
if (c == '=') haveFirstCased = true;
int cat = Character.getType(c);
if (lastCat == Character.LOWERCASE_LETTER && cat == Character.UPPERCASE_LETTER) {
result.append('_');
}
if (haveFirstCased && (cat == Character.LOWERCASE_LETTER
|| cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
if (titlecaseStart) {
c = Character.toUpperCase(c);
}
haveFirstCased = false;
}
result.append(c);
lastCat = cat;
}
return result.toString();
}
/**
* Utility function for comparing codepoint to string without
* generating new string.
@ -605,5 +745,57 @@ public abstract class UnicodeProperty extends UnicodeLabel {
}
}
}
/**
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
*/
public static Collection addUnique(Object obj, Collection result) {
if (obj != null && !result.contains(obj)) result.add(obj);
return result;
}
/**
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
*/
public static Collection addAllUnique(Collection source, Collection result) {
for (Iterator it = source.iterator(); it.hasNext();) {
addUnique(it.next(), result);
}
return result;
}
/**
* Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
*/
public static Collection addAllUnique(Object[] source, Collection result) {
for (int i = 0; i < source.length; ++i) {
addUnique(source[i], result);
}
return result;
}
/**
* @return
*/
protected UnicodeMap getUnicodeMap() {
if (unicodeMap == null) unicodeMap = _getUnicodeMap();
return unicodeMap;
}
protected UnicodeMap _getUnicodeMap() {
UnicodeMap result = new UnicodeMap();
for (int i = 0; i <= 0x10FFFF; ++i) {
//if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
result.put(i, getValue(i));
}
if (DEBUG && CHECK_NAME.equals(getName())) {
System.out.println(getName() + ":\t" + getClass().getName()
+ "\t" + getVersion());
System.out.println(getStack());
System.out.println(result);
}
return result;
}
}

View file

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Cyrillic_Latin.txt,v $
# $Date: 2001/12/01 00:14:55 $
# $Revision: 1.9 $
# $Date: 2004/02/18 03:08:58 $
# $Revision: 1.10 $
#--------------------------------------------------------------------
# TODO: add remaining characters
# Should add variants for Russian-English, Russian-German
@ -28,6 +28,75 @@ $dot = \u0307 ;
$caron = \u030C ;
$comma = \u0326 ;
$under = \u0331 ;
$hookbelow = \u0321;
$rhookbelow = \u0322;
$linebelow = \u0329;
$lineoverlay = \u20D3;
$bar = \u0335;
$horn = \u031B;
$titlo = \u0483;
# Simple decompositions
| ҷ <> | ч $linebelow ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
| Ҷ <> | Ч $linebelow ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
| ӌ <> | ч $hookbelow; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
| Ӌ <> | Ч $hookbelow; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
| ҹ <> | ч $lineoverlay ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
| Ҹ <> | Ч $lineoverlay; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
| җ <> | ж $linebelow; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
| Җ <> | Ж $linebelow; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
| қ <> | к $linebelow; # CYRILLIC SMALL LETTER KA WITH DESCENDER
| Қ <> | К $linebelow; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
| ӄ <> | к $hookbelow ; # CYRILLIC SMALL LETTER KA WITH HOOK
| Ӄ <> | К $hookbelow ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
| ҡ <> | к ; # CYRILLIC SMALL LETTER BASHKIR KA
| Ҡ <> | К ; # CYRILLIC CAPITAL LETTER BASHKIR KA
| ҟ <> | к $bar ; # CYRILLIC SMALL LETTER KA WITH STROKE
| Ҟ <> | К $bar ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
| ҝ <> | к $lineoverlay ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
| Ҝ <> | К $lineoverlay ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
| ң <> | н $linebelow; # CYRILLIC SMALL LETTER EN WITH DESCENDER
| Ң <> | Н $linebelow; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
| ӈ <> | н $hookbelow; # CYRILLIC SMALL LETTER EN WITH HOOK
| Ӈ <> | Н $hookbelow; # CYRILLIC CAPITAL LETTER EN WITH HOOK
| ҥ <> | н $horn; # CYRILLIC SMALL LIGATURE EN GHE
| Ҥ <> | Н $horn; # CYRILLIC CAPITAL LIGATURE EN GHE
| ҧ <> | п $hookbelow; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
| Ҧ <> | П $hookbelow; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
| ҁ <> | с $linebelow ; # CYRILLIC SMALL LETTER KOPPA
| Ҁ <> | С $linebelow; # CYRILLIC CAPITAL LETTER KOPPA
| ҏ <> | р $lineoverlay ; # CYRILLIC SMALL LETTER ER WITH TICK
| Ҏ <> | Р $lineoverlay ; # CYRILLIC CAPITAL LETTER ER WITH TICK
| ҫ <> | с $rhookbelow ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
| Ҫ <> | С $rhookbelow ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
| ҭ <> | т $linebelow ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
| Ҭ <> | Т $linebelow ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
| ҳ <> | х $linebelow ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
| Ҳ <> | Х $linebelow ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
# ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
# Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
| ѹ <> oу ; # CYRILLIC SMALL LETTER UK
| Ѹ <> У ; # CYRILLIC CAPITAL LETTER UK
# һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
# Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
# ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
# Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
# ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
# Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
| ѽ <> | ѡ $titlo ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
| Ѽ <> | Ѡ $titlo; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
# ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
# Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
# move up so not masked
@ -36,12 +105,6 @@ $under = \u0331 ;
ч <> c $caron ; # CYRILLIC SMALL LETTER CHE
Ч <> C $caron; # CYRILLIC CAPITAL LETTER CHE
# ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
# Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
# ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
# Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
# ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
# Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
э <> e $acute; # CYRILLIC SMALL LETTER E
Э <> E $acute; # CYRILLIC CAPITAL LETTER E
@ -55,8 +118,8 @@ $under = \u0331 ;
ѕ <> z $hat ; # CYRILLIC SMALL LETTER DZE
Ѕ <> Z $hat; # CYRILLIC CAPITAL LETTER DZE
# ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
# Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
ӡ <> ʒ ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
Ӡ <> Ʒ ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
ю <> u $hat ; # CYRILLIC SMALL LETTER YU
Ю <> U $hat ; # CYRILLIC CAPITAL LETTER YU
@ -111,9 +174,6 @@ $under = \u0331 ;
ж <> z $caron; # CYRILLIC SMALL LETTER ZHE
Ж <> Z $caron; # CYRILLIC CAPITAL LETTER ZHE
# җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
# Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
з <> z ; # CYRILLIC SMALL LETTER ZE
З <> Z; # CYRILLIC CAPITAL LETTER ZE
@ -125,16 +185,6 @@ $under = \u0331 ;
к <> k ; # CYRILLIC SMALL LETTER KA
К <> K; # CYRILLIC CAPITAL LETTER KA
# қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
# Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
# ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
# Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
# ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
# Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
# ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
# Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
# ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
# Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
л <> l ; # CYRILLIC SMALL LETTER EL
Л <> L; # CYRILLIC CAPITAL LETTER EL
@ -142,64 +192,42 @@ $under = \u0331 ;
М <> M ; # CYRILLIC CAPITAL LETTER EM
н <> n ; # CYRILLIC SMALL LETTER EN
Н <> N; # CYRILLIC CAPITAL LETTER EN
# ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
# Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
# ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
# Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
# ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
# Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
ө <> o $bar ; # CYRILLIC SMALL LETTER BARRED O
Ө <> O $bar; # CYRILLIC CAPITAL LETTER BARRED O
о <> o ; # CYRILLIC SMALL LETTER O
О <> O ; # CYRILLIC CAPITAL LETTER O
# ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
# Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
п <> p ; # CYRILLIC SMALL LETTER PE
П <> P ; # CYRILLIC CAPITAL LETTER PE
# ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
# Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
# ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
# Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
р <> r ; # CYRILLIC SMALL LETTER ER
Р <> R ; # CYRILLIC CAPITAL LETTER ER
# ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
# Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
с <> s ; # CYRILLIC SMALL LETTER ES
С <> S ; # CYRILLIC CAPITAL LETTER ES
# ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
# Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
т <> t ; # CYRILLIC SMALL LETTER TE
Т <> T ; # CYRILLIC CAPITAL LETTER TE
# ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
# Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
ү <> u $circumflex ; # CYRILLIC SMALL LETTER STRAIGHT U
Ү <> U $circumflex; # CYRILLIC CAPITAL LETTER STRAIGHT U
у <> u ; # CYRILLIC SMALL LETTER U
У <> U ; # CYRILLIC CAPITAL LETTER U
# ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
# Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
# ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
# Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
# ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
# Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
ф <> f ; # CYRILLIC SMALL LETTER EF
Ф <> F ; # CYRILLIC CAPITAL LETTER EF
х <> h ; # CYRILLIC SMALL LETTER HA
Х <> H; # CYRILLIC CAPITAL LETTER HA
# ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
# Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
# һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
# Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
# ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
# Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
# ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
# Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
# ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
# Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
# ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
# Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
ц <> c ; # CYRILLIC SMALL LETTER TSE
Ц <> C; # CYRILLIC CAPITAL LETTER TSE
# ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
# Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
ҵ <> c $cedilla ; # CYRILLIC SMALL LIGATURE TE TSE
Ҵ <> C $cedilla ; # CYRILLIC CAPITAL LIGATURE TE TSE
# ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
# Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
@ -220,6 +248,10 @@ $under = \u0331 ;
# ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
# Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
ѱ <> ps $underline ; # CYRILLIC SMALL LETTER PSI
Ѱ } [:lower:] <> Ps $underline ; # CYRILLIC CAPITAL LETTER PSI
Ѱ <> PS $underline ; # CYRILLIC CAPITAL LETTER PSI
# ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
# Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
# ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
@ -230,10 +262,10 @@ $under = \u0331 ;
# Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
# ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
# Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
# ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
# Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
# ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
# Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
ѯ <> x $underline ; # CYRILLIC SMALL LETTER KSI
Ѯ <> X $underline ; # CYRILLIC CAPITAL LETTER KSI
# ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
# Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
# ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
@ -241,6 +273,7 @@ $under = \u0331 ;
# ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
# Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
# Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
### ӑ <> XXX ; # CYRILLIC SMALL LETTER A
### Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
### ӓ <> XXX ; # CYRILLIC SMALL LETTER A

View file

@ -53,14 +53,13 @@ public class CheckICU {
public static void test() throws IOException {
//generateFile("4.0.0", "DerivedCombiningClass");
//generateFile("4.0.0", "DerivedCoreProperties");
if (true) return;
checkUCD();
itemFailures = new UnicodeSet();
icuFactory = ICUPropertyFactory.make();
toolFactory = ToolUnicodePropertySource.make("4.0.0");
String[] quickList = {
"Math",
"Canonical_Combining_Class",
// "Script", "Bidi_Mirroring_Glyph", "Case_Folding",
//"Numeric_Value"
};
@ -69,9 +68,9 @@ public class CheckICU {
}
if (quickList.length > 0) return;
Collection availableTool = toolFactory.getAvailableAliases(new TreeSet());
Collection availableTool = toolFactory.getAvailableNames();
Collection availableICU = icuFactory.getAvailableAliases(new TreeSet());
Collection availableICU = icuFactory.getAvailableNames();
System.out.println(showDifferences("Property Aliases", "ICU", availableICU, "Tool", availableTool));
Collection common = new TreeSet(availableICU);
common.retainAll(availableTool);
@ -140,12 +139,12 @@ public class CheckICU {
+ "\tTool: " + UnicodeProperty.getTypeName(toolType));
}
Collection icuAliases = icuProp.getAliases(new ArrayList());
Collection toolAliases = toolProp.getAliases(new ArrayList());
Collection icuAliases = icuProp.getNameAliases(new ArrayList());
Collection toolAliases = toolProp.getNameAliases(new ArrayList());
System.out.println(showDifferences("Aliases", "ICU", icuAliases, "Tool", toolAliases));
icuAliases = icuProp.getAvailableValueAliases(new ArrayList());
toolAliases = toolProp.getAvailableValueAliases(new ArrayList());
icuAliases = icuProp.getAvailableValues(new ArrayList());
toolAliases = toolProp.getAvailableValues(new ArrayList());
System.out.println(showDifferences("Value Aliases", "ICU", icuAliases, "Tool", toolAliases));
// TODO do property value aliases

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
* $Date: 2004/02/07 01:01:16 $
* $Revision: 1.24 $
* $Date: 2004/02/18 03:08:59 $
* $Revision: 1.25 $
*
*******************************************************************************
*/
@ -285,7 +285,7 @@ public final class DerivedProperty implements UCD_Types {
byte val;
CaseDProp (int i) {
type = DERIVED_CORE;
isStandard = false;
isStandard = false;
val = (i == Missing_Uppercase ? Lu : i == Missing_Lowercase ? Ll : Lt);
name = "Possible_Missing_" + CaseNames[i-Missing_Uppercase];
header = "# Derived Property: " + name
@ -306,7 +306,7 @@ public final class DerivedProperty implements UCD_Types {
String MAYBE;
Normalizer nfx;
QuickDProp (int i) {
// setValueType((i == NFC || i == NFKC) ? ENUMERATED : BINARY);
//setValueType((i == NFC || i == NFKC) ? ENUMERATED_PROP : BINARY_PROP);
setValueType(ENUMERATED_PROP);
type = DERIVED_NORMALIZATION;
nfx = nf[i];
@ -611,7 +611,7 @@ of characters, the first of which has a non-zero combining class.
+ "\r\n# Uses the full case folding from CaseFolding.txt, without the T option."
;
}
public String getValue(int cp, byte style) {
public String getValue(int cp, byte style) {
if (!ucdData.isRepresented(cp)) return "";
String b = nfkc.normalize(fold(cp));
String c = nfkc.normalize(fold(b));
@ -860,7 +860,7 @@ of characters, the first of which has a non-zero combining class.
{
type = DERIVED_CORE;
name = "Grapheme_Extend";
shortName = "GrExt";
shortName = "Gr_Ext";
header = header = "# Derived Property: " + name
+ "\r\n# Generated from: Me + Mn + Other_Grapheme_Extend"
+ "\r\n# Note: depending on an application's interpretation of Co (private use),"
@ -883,7 +883,7 @@ of characters, the first of which has a non-zero combining class.
{
type = DERIVED_CORE;
name = "Grapheme_Base";
shortName = "GrBase";
shortName = "Gr_Base";
header = header = "# Derived Property: " + name
+ "\r\n# Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Extend"

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java,v $
* $Date: 2004/02/12 08:23:15 $
* $Revision: 1.10 $
* $Date: 2004/02/18 03:08:59 $
* $Revision: 1.11 $
*
*******************************************************************************
*/
@ -149,7 +149,7 @@ abstract public class GenerateBreakTest implements UCD_Types {
result.append(ucd.getCodeAndName(cp));
for (int j = 0; j < props.length; ++j) {
result.append(", ");
result.append(props[j].getProperty(SHORT)).append('=').append(props[j].getValue(cp,SHORT));
result.append(props[j].getPropertyName(SHORT)).append('=').append(props[j].getValue(cp,SHORT));
}
}
return result.toString();

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
* $Date: 2004/02/12 08:23:15 $
* $Revision: 1.33 $
* $Date: 2004/02/18 03:08:59 $
* $Revision: 1.34 $
*
*******************************************************************************
*/
@ -17,6 +17,7 @@ import java.util.*;
import java.io.*;
import com.ibm.text.utility.*;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
@ -391,8 +392,8 @@ public class GenerateData implements UCD_Types {
if (propAbb.equals("") || propAbb.equals(UCD_Names.YN_TABLE[1])) {
System.out.println("WHOOPS: " + Utility.hex(i));
}
propAbb = Utility.getUnskeleton(up.getProperty(SHORT), false);
prop = Utility.getUnskeleton(up.getProperty(LONG), true);
propAbb = Utility.getUnskeleton(up.getPropertyName(SHORT), false);
prop = Utility.getUnskeleton(up.getPropertyName(LONG), true);
addLine(sorted,
type == SCRIPT
? UCD_Names.PROP_TYPE_NAMES[CATALOG_PROP][1]
@ -407,7 +408,7 @@ public class GenerateData implements UCD_Types {
if (up.getValueType() < BINARY_PROP) continue;
value = up.getValue(LONG);
if (value.length() == 0) value = "none";
else if (value.equals("<unused>")) continue;
else if (value.equals(UnicodeProperty.UNUSED)) continue;
if (type != DECOMPOSITION_TYPE) {
value = Utility.getUnskeleton(value, true);
@ -429,7 +430,7 @@ public class GenerateData implements UCD_Types {
if (type == COMBINING_CLASS) {
if (value.startsWith("Fixed_")) { continue; }
if (value.charAt(0) <= '9') { continue; }
}
@ -502,7 +503,7 @@ public class GenerateData implements UCD_Types {
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
log.println("#");
Utility.appendFile("PropertyAliasHeader.txt", Utility.LATIN1, log);
Utility.appendFile("PropertyAliasesHeader.txt", Utility.LATIN1, log);
log.println(HORIZONTAL_LINE);
log.println();
int count = Utility.print(log, sorted, "\r\n", new MyBreaker(true));
@ -522,7 +523,7 @@ public class GenerateData implements UCD_Types {
log.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
log.println(UnicodeDataFile.generateDateLine());
log.println("#");
Utility.appendFile("PropertyValueAliasHeader.txt", Utility.LATIN1, log);
Utility.appendFile("PropertyValueAliasesHeader.txt", Utility.LATIN1, log);
log.println(HORIZONTAL_LINE);
log.println();
Utility.print(log, sorted, "\r\n", new MyBreaker(false));

View file

@ -1,7 +1,10 @@
package com.ibm.text.UCD;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
@ -10,106 +13,194 @@ import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.ResourceBundle;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.Tabber;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.text.utility.UnicodeDataFile;
import com.ibm.text.utility.Utility;
import com.ibm.icu.text.Collator;
public class MakeUnicodeFiles {
static boolean DEBUG = true;
public static void main() throws IOException {
generateFile("Scripts","z");
generateFile("*");
}
static class OrderedMap {
HashMap map = new HashMap();
ArrayList keys = new ArrayList();
void put(Object o, Object t) {
map.put(o,t);
keys.add(o);
}
List keyset() {
return keys;
}
}
static class PrintStyle {
boolean longForm = false;
static String[] FILE_OPTIONS = {
"Script nameStyle=none makeUppercase skipUnassigned=Common hackValues",
"Age nameStyle=none noLabel skipValue=unassigned",
"Numeric_Type nameStyle=none makeFirstLetterLowercase skipValue=None",
"General_Category nameStyle=none valueStyle=short noLabel",
"Line_Break nameStyle=none valueStyle=short skipUnassigned=Unknown",
"Joining_Type nameStyle=none valueStyle=short skipValue=Non_Joining",
"Joining_Group nameStyle=none skipValue=No_Joining_Group makeUppercase",
"East_Asian_Width nameStyle=none valueStyle=short skipUnassigned=Neutral",
"Decomposition_Type nameStyle=none skipValue=None makeFirstLetterLowercase hackValues",
"Bidi_Class nameStyle=none valueStyle=short skipUnassigned=Left_To_Right",
"Block nameStyle=none noLabel valueList",
"Canonical_Combining_Class nameStyle=none valueStyle=short skipUnassigned=Not_Reordered longValueHeading=ccc",
"Hangul_Syllable_Type nameStyle=none valueStyle=short skipValue=Not_Applicable",
"NFD_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
"NFC_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
"NFKC_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
"NFKD_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
"FC_NFKC_Closure nameStyle=short"
};
static String[] hackNameList = {
"noBreak", "Arabic_Presentation_Forms-A", "Arabic_Presentation_Forms-B",
"CJK_Symbols_and_Punctuation", "Combining_Diacritical_Marks_for_Symbols",
"Enclosed_CJK_Letters_and_Months", "Greek_and_Coptic",
"Halfwidth_and_Fullwidth_Forms", "Latin-1_Supplement", "Latin_Extended-A",
"Latin_Extended-B", "Miscellaneous_Mathematical_Symbols-A",
"Miscellaneous_Mathematical_Symbols-B", "Miscellaneous_Symbols_and_Arrows",
"Superscripts_and_Subscripts", "Supplemental_Arrows-A", "Supplemental_Arrows-B",
"Supplementary_Private_Use_Area-A", "Supplementary_Private_Use_Area-B",
"Canadian-Aboriginal", "Old-Italic"
};
static class PrintStyle {
static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle();
static Map PRINT_STYLE_MAP = new TreeMap(UnicodeProperty.PROPERTY_COMPARATOR);
boolean noLabel = false;
boolean makeUppercase = false;
boolean makeFirstLetterLowercase = false;
String skipValue = null;
String skipUnassigned = null;
boolean orderByRangeStart = false;
boolean valueList = false;
boolean interleaveValues = false;
boolean hackValues = false;
String nameStyle = "none";
String valueStyle = "long";
String skipValue = null;
String skipUnassigned = null;
String longValueHeading = null;
PrintStyle setLongForm(boolean value) {
longForm = value;
return this;
}
PrintStyle setSkipUnassigned(String value) {
skipUnassigned = value;
return this;
}
PrintStyle setNoLabel(boolean value) {
noLabel = value;
return this;
}
PrintStyle setMakeUppercase(boolean value) {
makeUppercase = value;
return this;
}
PrintStyle setMakeFirstLetterLowercase(boolean value) {
makeFirstLetterLowercase = value;
return this;
}
PrintStyle setSkipValue(String value) {
skipValue = value;
return this;
}
PrintStyle setOrderByRangeStart(boolean value) {
orderByRangeStart = value;
return this;
}
PrintStyle setValueList(boolean value) {
valueList = value;
return this;
}
static void add(String options) {
PrintStyle result = new PrintStyle();
PRINT_STYLE_MAP.put(result.parse(options), result);
}
static PrintStyle get(String propname) {
PrintStyle result = (PrintStyle) PRINT_STYLE_MAP.get(propname);
if (result != null) return result;
if (DEBUG) System.out.println("Using default style!");
return DEFAULT_PRINT_STYLE;
}
String parse(String options) {
options = options.replace('\t', ' ');
String[] pieces = Utility.split(options, ' ');
for (int i = 1; i < pieces.length; ++i) {
String piece = pieces[i];
// binary
if (piece.equals("noLabel")) noLabel = true;
else if (piece.equals("makeUppercase")) makeUppercase = true;
else if (piece.equals("makeFirstLetterLowercase")) makeFirstLetterLowercase = true;
else if (piece.equals("orderByRangeStart")) orderByRangeStart = true;
else if (piece.equals("valueList")) interleaveValues = true;
else if (piece.equals("hackValues")) hackValues = true;
// with parameter
else if (piece.startsWith("valueStyle=")) valueStyle = afterEquals(piece);
else if (piece.startsWith("nameStyle=")) nameStyle = afterEquals(piece);
else if (piece.startsWith("longValueHeading=")) longValueHeading = afterEquals(piece);
else if (piece.startsWith("skipValue=")) skipValue = afterEquals(piece);
else if (piece.startsWith("skipUnassigned=")) skipUnassigned = afterEquals(piece);
else if (piece.length() != 0) {
throw new IllegalArgumentException("Illegal PrintStyle Parameter: " + piece + " in " + pieces[0]);
}
}
if (DEBUG && options.indexOf('=') >= 0) {
System.out.println(pieces[0]);
if (longValueHeading != null)System.out.println(" name " + longValueHeading);
if (nameStyle != null) System.out.println(" nameStyle " + nameStyle);
if (longValueHeading != null) System.out.println(" longValueHeading " + longValueHeading);
if (skipValue != null) System.out.println(" skipValue " + skipValue);
if (skipUnassigned != null) System.out.println(" skipUnassigned " + skipUnassigned);
}
return pieces[0];
}
String afterEquals(String source) {
return source.substring(source.indexOf('=')+1);
}
}
static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle();
static Comparator skeletonComparator = new UnicodeProperty.SkeletonComparator();
static Map printStyles = new TreeMap(/*skeletonComparator*/);
static {
printStyles.put("Script", new PrintStyle().setLongForm(true)
.setMakeUppercase(true).setSkipUnassigned("Common"));
printStyles.put("Age", new PrintStyle().setNoLabel(true));
printStyles.put("Numeric_Type", new PrintStyle().setLongForm(true)
.setMakeFirstLetterLowercase(true).setSkipUnassigned("none"));
printStyles.put("General_Category", new PrintStyle().setNoLabel(true)
//.setSkipUnassigned(true)
);
printStyles.put("Line_Break", new PrintStyle().setSkipUnassigned("Unknown"));
printStyles.put("Joining_Type", new PrintStyle().setSkipValue("Non_Joining"));
printStyles.put("Joining_Group", new PrintStyle().setSkipValue("No_Joining_Group")
.setMakeUppercase(true));
printStyles.put("East_Asian_Width", new PrintStyle().setSkipUnassigned("Neutral"));
printStyles.put("Decomposition_Type", new PrintStyle().setLongForm(true)
.setSkipValue("None").setMakeFirstLetterLowercase(true));
printStyles.put("Bidi_Class", new PrintStyle().setSkipUnassigned("Left_To_Right"));
printStyles.put("Block", new PrintStyle().setNoLabel(true)
.setValueList(true));
printStyles.put("Age", new PrintStyle().setSkipValue("unassigned"));
printStyles.put("Canonical_Combining_Class", new PrintStyle().setSkipValue("0"));
printStyles.put("Hangul_Syllable_Type", new PrintStyle().setSkipValue("NA"));
for (int i = 0; i < FILE_OPTIONS.length; ++i) {
PrintStyle.add(FILE_OPTIONS[i]);
}
}
//PropertyAliases
//PropertyValueAliases
static Map hackMap = new HashMap();
static {
for (int i = 0; i < hackNameList.length; ++i) {
String item = hackNameList[i];
String regularItem = UnicodeProperty.regularize(item,true);
hackMap.put(regularItem, item);
}
}
static UnicodeProperty.MapFilter hackMapFilter = new UnicodeProperty.MapFilter(hackMap);
static class ValueComments {
TreeMap propertyToValueToComments = new TreeMap();
ValueComments add(String property, String value, String comments) {
TreeMap valueToComments = (TreeMap) propertyToValueToComments.get(property);
if (valueToComments == null) {
valueToComments = new TreeMap();
propertyToValueToComments.put(property, valueToComments);
}
valueToComments.put(value, comments);
return this;
}
String get(String property, String value) {
TreeMap valueToComments = (TreeMap) propertyToValueToComments.get(property);
if (valueToComments != null) return (String) valueToComments.get(value);
return null;
}
}
static ValueComments valueComments = new ValueComments();
static {
for (int i = 0; i < UCD_Names.UNIFIED_PROPERTIES.length; ++i) {
String name = Utility.getUnskeleton(UCD_Names.UNIFIED_PROPERTIES[i], false);
valueComments.add(name, "*", "# " + UCD_Names.UNIFIED_PROPERTY_HEADERS[i]);
}
// HACK
valueComments.add("Bidi_Mirroring", "*", "# " + UCD_Names.UNIFIED_PROPERTY_HEADERS[9]);
try {
BufferedReader br = Utility.openReadFile("MakeUnicodeFiles.txt", Utility.UTF8);
String key = null;
String value = "";
while (true) {
String line = br.readLine();
if (line == null) break;
if (!line.startsWith("#")) {
if (key != null) {// store
String[] pieces = Utility.split(key, '=');
if (pieces.length == 1) {
valueComments.add(pieces[0].trim(), "*", value);
} else {
valueComments.add(pieces[0].trim(), pieces[1].trim(), value);
}
value = "";
}
key = line;
} else {
value += line + "\n";
}
}
br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
throw new IllegalArgumentException("File missing");
}
}
//CompositionExclusions
//SpecialCasing
//NormalizationTest
@ -119,15 +210,15 @@ public class MakeUnicodeFiles {
contents.put(name, properties);
}
static {
add("Blocks", new String[] {"Block"});
add("PropertyValueAliases", null);
add("PropertyAliases", null);
add("SpecialCasing", null);
add("NormalizationTest", null);
add("StandardizedVariants", null);
add("CaseFolding", null);
add("DerivedAge", new String[] {"Age"});
add("Scripts", new String[] {"Script"});
add("HangulSyllableType", new String[] {"HangulSyllableType"});
if (false) add("DerivedNormalizationProps", new String[] {
"FNC", "Full_Composition_Exclusion",
"NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck",
"Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC"});
add("DerivedBidiClass", new String[] {"BidiClass"});
add("DerivedBinaryProperties", new String[] {"BidiMirrored"});
add("DerivedCombiningClass", new String[] {"CanonicalCombiningClass"});
@ -165,93 +256,390 @@ public class MakeUnicodeFiles {
"Default_Ignorable_Code_Point",
"Grapheme_Extend", "Grapheme_Base"
});
add("DerivedNormalizationProps", new String[] {
"FC_NFKC_Closure",
"Full_Composition_Exclusion",
"NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck",
"Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC"
});
}
public static void generateFile(String atOrAfter, String atOrBefore) throws IOException {
public static void generateFile(String atOrAfter, String atOrBefore) throws IOException {
Iterator it = contents.keySet().iterator();
while (it.hasNext()) {
String propname = (String) it.next();
if (propname.compareTo(atOrAfter) < 0) continue;
if (propname.compareTo(atOrBefore) > 0) continue;
if (propname.compareToIgnoreCase(atOrAfter) < 0) continue;
if (propname.compareToIgnoreCase(atOrBefore) > 0) continue;
generateFile(propname);
}
}
public static void generateFile(String filename) throws IOException {
if (filename.equals("*")) {
generateFile("", "\uFFFD");
} else if (filename.endsWith("Aliases")) {
if (filename.endsWith("ValueAliases")) generateValueAliasFile(filename);
else generateAliasFile(filename);
} else if (filename.equals("NormalizationTest")) {
GenerateData.writeNormalizerTestSuite("DerivedData/", "NormalizationTest");
} else if (filename.equals("CaseFolding")) {
GenerateCaseFolding.makeCaseFold(false);
} else if (filename.equals("SpecialCasing")) {
GenerateCaseFolding.generateSpecialCasing(false);
} else if (filename.equals("StandardizedVariants")) {
GenerateStandardizedVariants.generate();
} else {
generatePropertyFile(filename);
}
}
public static void generateFile(String filename) throws IOException {
static final String SEPARATOR = "# ================================================";
public static void generateAliasFile(String filename) throws IOException {
UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename);
PrintWriter pw = udf.out;
UnicodeProperty.Factory ups
= ToolUnicodePropertySource.make(Default.ucdVersion());
TreeSet sortedSet = new TreeSet(CASELESS_COMPARATOR);
BagFormatter bf = new BagFormatter();
Tabber.MonoTabber mt = new Tabber.MonoTabber()
.add(10,Tabber.LEFT);
int count = 0;
for (int i = UnicodeProperty.LIMIT_TYPE - 1; i >= UnicodeProperty.BINARY; --i) {
if ((i & UnicodeProperty.EXTENDED_MASK) != 0) continue;
List list = ups.getAvailableNames(1<<i);
//if (list.size() == 0) continue;
sortedSet.clear();
StringBuffer buffer = new StringBuffer();
for (Iterator it = list.iterator(); it.hasNext();) {
String propAlias = (String)it.next();
UnicodeProperty up = ups.getProperty(propAlias);
List aliases = up.getNameAliases();
if (aliases.size() == 1) {
sortedSet.add(mt.process(aliases.get(0) + "\t; " + aliases.get(0)));
} else {
buffer.setLength(0);
boolean isFirst = true;
for (Iterator it2 = aliases.iterator(); it2.hasNext();) {
if (isFirst) isFirst = false;
else buffer.append("\t; ");
buffer.append(it2.next());
}
if (aliases.size() == 1) {
// repeat
buffer.append("\t; ").append(aliases.get(0));
}
sortedSet.add(mt.process(buffer.toString()));
}
}
if (i == UnicodeProperty.STRING) {
for (int j = 0; j < specialString.length; ++j) {
sortedSet.add(mt.process(specialString[j]));
}
} else if (i == UnicodeProperty.MISC) {
for (int j = 0; j < specialMisc.length; ++j) {
sortedSet.add(mt.process(specialMisc[j]));
}
}
pw.println();
pw.println(SEPARATOR);
pw.println("# " + UnicodeProperty.getTypeName(i) + " Properties");
pw.println(SEPARATOR);
for (Iterator it = sortedSet.iterator(); it.hasNext();) {
pw.println(it.next());
count++;
}
}
pw.println();
pw.println(SEPARATOR);
pw.println("#Total: " + count);
pw.println();
udf.close();
}
static String[] specialMisc = {
"isc\t; ISO_Comment",
"na1\t; Unicode_1_Name",
"URS\t; Unicode_Radical_Stroke"};
static String[] specialString = {
"dm\t; Decomposition_Mapping",
"lc\t; Lowercase_Mapping",
"scc\t; Special_Case_Condition",
"sfc\t; Simple_Case_Folding",
"slc\t; Simple_Lowercase_Mapping",
"stc\t; Simple_Titlecase_Mapping",
"suc\t; Simple_Uppercase_Mapping",
"tc\t; Titlecase_Mapping",
"uc\t; Uppercase_Mapping"};
static String[] specialGC = {
"gc\t;\tC\t;\tOther\t# Cc | Cf | Cn | Co | Cs",
"gc\t;\tL\t;\tLetter\t# Ll | Lm | Lo | Lt | Lu",
"gc\t;\tLC\t;\tCased_Letter\t# Ll | Lt | Lu",
"gc\t;\tM\t;\tMark\t# Mc | Me | Mn",
"gc\t;\tN\t;\tNumber\t# Nd | Nl | No",
"gc\t;\tP\t;\tPunctuation\t# Pc | Pd | Pe | Pf | Pi | Po | Ps",
"gc\t;\tS\t;\tSymbol\t# Sc | Sk | Sm | So",
"gc\t;\tZ\t;\tSeparator\t# Zl | Zp | Zs"};
public static void generateValueAliasFile(String filename) throws IOException {
UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename);
PrintWriter pw = udf.out;
UnicodeProperty.Factory toolFactory
= ToolUnicodePropertySource.make(Default.ucdVersion());
BagFormatter bf = new BagFormatter(toolFactory);
StringBuffer buffer = new StringBuffer();
Set sortedSet = new TreeSet(CASELESS_COMPARATOR);
//gc ; C ; Other # Cc | Cf | Cn | Co | Cs
// 123456789012345678901234567890123
// sc ; Arab ; Arabic
Tabber.MonoTabber mt2 = new Tabber.MonoTabber()
.add(3,Tabber.LEFT)
.add(2,Tabber.LEFT) // ;
.add(10,Tabber.LEFT)
.add(2,Tabber.LEFT) // ;
.add(33,Tabber.LEFT)
.add(2,Tabber.LEFT) // ;
.add(33,Tabber.LEFT);
// ccc; 216; ATAR ; Attached_Above_Right
Tabber.MonoTabber mt3 = new Tabber.MonoTabber()
.add(3,Tabber.LEFT)
.add(2,Tabber.LEFT) // ;
.add(3,Tabber.RIGHT)
.add(2,Tabber.LEFT) // ;
.add(5,Tabber.LEFT)
.add(2,Tabber.LEFT) // ;
.add(33,Tabber.LEFT)
.add(2,Tabber.LEFT) // ;
.add(33,Tabber.LEFT);
for (Iterator it = toolFactory.getAvailableNames(UnicodeProperty.ENUMERATED_OR_CATALOG_MASK).iterator(); it.hasNext();) {
String propName = (String) it.next();
UnicodeProperty up = toolFactory.getProperty(propName);
String shortProp = up.getFirstNameAlias();
sortedSet.clear();
for (Iterator it2 = up.getAvailableValues().iterator(); it2.hasNext();) {
String value = (String) it2.next();
List l = up.getValueAliases(value);
System.out.println(value + "\t" + bf.join(l));
// HACK
Tabber mt = mt2;
if (l.size() == 1) {
if (propName.equals("Canonical_Combining_Class")) continue;
if (propName.equals("Block")
|| propName.equals("Joining_Group")
//|| propName.equals("Numeric_Type")
|| propName.equals("Age")) {
l.add(0, "n/a");
} else {
l.add(0, l.get(0)); // double up
}
} else if (l.size() > 2) {
mt = mt3;
}
if (UnicodeProperty.equalNames(value,"Cyrillic_Supplement")) {
l.add("Cyrillic_Supplementary");
}
buffer.setLength(0);
buffer.append(shortProp);
for (Iterator it3 = l.iterator(); it3.hasNext();) {
buffer.append("\t; \t" + it3.next());
}
sortedSet.add(mt.process(buffer.toString()));
}
// HACK
if (propName.equals("General_Category")) {
for (int i = 0; i < specialGC.length; ++i) {
sortedSet.add(mt2.process(specialGC[i]));
}
}
pw.println();
for (Iterator it4 = sortedSet.iterator(); it4.hasNext();) {
String line = (String) it4.next();
pw.println(line);
}
}
udf.close();
}
public static void generatePropertyFile(String filename) throws IOException {
String[] propList = (String[]) contents.get(filename);
UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename);
PrintWriter pw = udf.out; // bf2.openUTF8Writer(UCD_Types.GEN_DIR, "Test" + filename + ".txt");
UnicodeProperty.Factory toolFactory
= ToolUnicodePropertySource.make(Default.ucdVersion());
BagFormatter bf2 = new BagFormatter(toolFactory);
UnicodeSet unassigned = toolFactory.getSet("gc=cn")
.addAll(toolFactory.getSet("gc=cs"));
//System.out.println(unassigned.toPattern(true));
// .removeAll(toolFactory.getSet("noncharactercodepoint=true"));
String separator = bf2.getLineSeparator()
+ "# ================================================"
+ bf2.getLineSeparator() + bf2.getLineSeparator();
for (int i = 0; i < propList.length; ++i) {
BagFormatter bf = new BagFormatter(toolFactory);
UnicodeProperty prop = toolFactory.getProperty(propList[i]);
System.out.println(prop.getName());
pw.print(separator);
PrintStyle ps = (PrintStyle) printStyles.get(prop.getName());
if (ps == null) {
ps = DEFAULT_PRINT_STYLE;
System.out.println("Using default style!");
}
if (ps.noLabel) bf2.setLabelSource(null);
if (ps.valueList) {
bf2.setValueSource(new UnicodeProperty.FilteredProperty(prop, new ReplaceFilter()))
.setNameSource(null)
.setShowCount(false)
.showSetNames(pw,new UnicodeSet(0,0x10FFFF));
} else if (prop.getType() <= prop.EXTENDED_BINARY) {
UnicodeSet s = prop.getSet("True");
bf2.setValueSource(prop.getName());
bf2.showSetNames(pw, s);
} else {
bf2.setValueSource(prop);
Collection aliases = prop.getAvailableValueAliases();
if (ps.orderByRangeStart) {
System.out.println("Reordering");
TreeSet temp2 = new TreeSet(new RangeStartComparator(prop));
temp2.addAll(aliases);
aliases = temp2;
}
Iterator it = aliases.iterator();
while (it.hasNext()) {
String value = (String)it.next();
UnicodeSet s = prop.getSet(value);
System.out.println(value + "\t" + prop.getShortestValueAlias(value) + "\t" + ps.skipValue);
System.out.println(s.toPattern(true));
if (skeletonComparator.compare(value, ps.skipValue) == 0) continue;
if (skeletonComparator.compare(value, ps.skipUnassigned) == 0) {
s.removeAll(unassigned);
}
if (s.size() == 0) continue;
//if (unassigned.containsAll(s)) continue; // skip if all unassigned
//if (s.contains(0xD0000)) continue; // skip unassigned
pw.print(separator);
if (!ps.longForm) value = prop.getShortestValueAlias(value);
if (ps.makeUppercase) value = value.toUpperCase(Locale.ENGLISH);
if (ps.makeFirstLetterLowercase) {
// NOTE: this is ok since we are only working in ASCII
value = value.substring(0,1).toLowerCase(Locale.ENGLISH)
+ value.substring(1);
}
bf2.setValueSource(value);
bf2.showSetNames(pw, s);
}
String name = prop.getName();
System.out.println("Property: " + name + "; " + prop.getTypeName(prop.getType()));
pw.println("\n" + SEPARATOR + "\n");
String propComment = valueComments.get(name, "*");
if (propComment != null) {
pw.print(propComment);
}
pw.println();
PrintStyle ps = PrintStyle.get(name);
if (!ps.interleaveValues && prop.isType(UnicodeProperty.BINARY_MASK)) {
if (DEBUG) System.out.println("Resetting Binary Values");
ps.skipValue = "False";
if (ps.nameStyle.equals("none")) ps.nameStyle = "long";
ps.valueStyle = "none";
}
if (ps.noLabel) bf.setLabelSource(null);
if (ps.nameStyle.equals("none")) bf.setPropName(null);
else if (ps.nameStyle.equals("short")) bf.setPropName(prop.getFirstNameAlias());
else bf.setPropName(name);
if (ps.interleaveValues) {
writeInterleavedValues(pw, bf, prop);
} else if (prop.isType(UnicodeProperty.STRING_OR_MISC_MASK)) {
writeStringValues(pw, bf, prop);
//} else if (prop.isType(UnicodeProperty.BINARY_MASK)) {
// writeBinaryValues(pw, bf, prop);
} else {
writeEnumeratedValues(pw, bf, unassigned, prop, ps);
}
pw.println();
}
udf.close();
}
private static void writeEnumeratedValues(
PrintWriter pw,
BagFormatter bf,
UnicodeSet unassigned,
UnicodeProperty prop,
PrintStyle ps) {
if (DEBUG) System.out.println("Writing Enumerated Values: " + prop.getName());
bf.setValueSource(new UnicodeProperty.FilteredProperty(prop, hackMapFilter));
Collection aliases = prop.getAvailableValues();
if (ps.orderByRangeStart) {
System.out.println("Reordering");
TreeSet temp2 = new TreeSet(new RangeStartComparator(prop));
temp2.addAll(aliases);
aliases = temp2;
}
for (Iterator it = aliases.iterator(); it.hasNext();) {
String value = (String)it.next();
UnicodeSet s = prop.getSet(value);
if (DEBUG) System.out.println("Getting value " + value);
String valueComment = valueComments.get(prop.getName(), value);
if (DEBUG) {
System.out.println(value + "\t" + prop.getFirstValueAlias(value) + "\tskip:" + ps.skipValue);
System.out.println(s.toPattern(true));
}
int totalSize = s.size();
if (s.size() == 0) continue;
if (UnicodeProperty.compareNames(value, ps.skipValue) == 0) {
System.out.println("Skipping: " + value);
continue;
}
if (UnicodeProperty.compareNames(value, ps.skipUnassigned) == 0) {
System.out.println("Removing Unassigneds: " + value);
s.removeAll(unassigned);
}
//if (s.size() == 0) continue;
//if (unassigned.containsAll(s)) continue; // skip if all unassigned
//if (s.contains(0xD0000)) continue; // skip unassigned
pw.print("\n" + SEPARATOR + "\n\n");
String displayValue = value;
if (ps.valueStyle.equals("none")) {
displayValue = null;
} else if (ps.valueStyle.equals("short")) {
displayValue = prop.getFirstValueAlias(displayValue);
if (DEBUG) System.out.println("Changing value " + displayValue);
}
if (ps.makeUppercase && displayValue != null) {
displayValue = displayValue.toUpperCase(Locale.ENGLISH);
if (DEBUG) System.out.println("Changing value2 " + displayValue);
}
if (ps.makeFirstLetterLowercase && displayValue != null) {
// NOTE: this is ok since we are only working in ASCII
displayValue = displayValue.substring(0,1).toLowerCase(Locale.ENGLISH)
+ displayValue.substring(1);
if (DEBUG) System.out.println("Changing value2 " + displayValue);
}
if (DEBUG) System.out.println("Setting value " + displayValue);
bf.setValueSource(displayValue);
if (valueComment != null) {
pw.println(valueComment);
pw.println();
}
if (ps.longValueHeading != null) {
String headingValue = value;
if (ps.longValueHeading == "ccc") {
headingValue = Utility.replace(value, "_", "");
char c = headingValue.charAt(0);
if ('0' <= c && c <= '9') headingValue = "Other Combining Class";
}
pw.println("# " + headingValue);
pw.println();
}
if (s.size() != 0) bf.showSetNames(pw, s);
if (s.size() != totalSize) {
pw.println();
pw.print("# Not Listed: " + totalSize);
}
pw.println();
}
}
/*
private static void writeBinaryValues(
PrintWriter pw,
BagFormatter bf,
UnicodeProperty prop) {
if (DEBUG) System.out.println("Writing Binary Values: " + prop.getName());
UnicodeSet s = prop.getSet("True");
bf.setValueSource(prop.getName());
bf.showSetNames(pw, s);
}
*/
private static void writeInterleavedValues(
PrintWriter pw,
BagFormatter bf,
UnicodeProperty prop) {
if (DEBUG) System.out.println("Writing Interleaved Values: " + prop.getName());
bf.setValueSource(new UnicodeProperty.FilteredProperty(prop, new RestoreSpacesFilter()))
.setNameSource(null)
.setShowCount(false)
.showSetNames(pw,new UnicodeSet(0,0x10FFFF));
}
private static void writeStringValues(
PrintWriter pw,
BagFormatter bf,
UnicodeProperty prop) {
if (DEBUG) System.out.println("Writing String Values: " + prop.getName());
bf.setValueSource(prop).setHexValue(true).setMergeRanges(false);
bf.showSetNames(pw,new UnicodeSet(0,0x10FFFF));
}
static class RangeStartComparator implements Comparator {
UnicodeProperty prop;
CompareProperties.UnicodeSetComparator comp = new CompareProperties.UnicodeSetComparator();
@ -269,12 +657,35 @@ public class MakeUnicodeFiles {
}
public static class ReplaceFilter extends UnicodeProperty.StringFilter {
static class RestoreSpacesFilter extends UnicodeProperty.StringFilter {
public String remap(String original) {
// ok, because doesn't change length
String mod = (String) hackMap.get(original);
if (mod != null) original = mod;
return original.replace('_',' ');
}
}
static Comparator CASELESS_COMPARATOR = new Comparator() {
public int compare(Object o1, Object o2) {
String s = o1.toString();
String t = o2.toString();
return s.compareToIgnoreCase(t);
}
};
}
}
/*
static class OrderedMap {
HashMap map = new HashMap();
ArrayList keys = new ArrayList();
void put(Object o, Object t) {
map.put(o,t);
keys.add(o);
}
List keyset() {
return keys;
}
}
*/

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyPropertyLister.java,v $
* $Date: 2004/02/06 18:30:20 $
* $Revision: 1.11 $
* $Date: 2004/02/18 03:08:59 $
* $Revision: 1.12 $
*
*******************************************************************************
*/
@ -39,7 +39,7 @@ final class MyPropertyLister extends PropertyLister {
int main = (propMask & 0xFF00);
if (main == COMBINING_CLASS) {
String s = UCD.getCombiningClassID_fromIndex((short)(propMask & 0xFF), LONG);
if (s.startsWith("Fixed")) s = "Other Combining Class";
if (s.charAt(0) <= '9') s = "Other Combining Class";
return "# " + s;
} else if (main == BINARY_PROPERTIES) {
return "";

View file

@ -1,33 +0,0 @@
# This file contains aliases for properties used in the UCD.
# These names can be used for XML formats of UCD data, for regular-expression
# property tests, and other programmatic textual descriptions of Unicode data.
# For information on which properties are normative, see UCD.html.
#
# The names may be translated in appropriate environments, and additional
# aliases may be useful.
#
# FORMAT
#
# Each line has two fields, separated by semicolons.
#
# First Field: The first field is an abbreviated name for the property.
#
# Second Field: The second field is a long name
#
# With loose matching of property names, the case distinctions, whitespace,
# and '_' are ignored.
#
# NOTE: Currently there is at most one abbreviated name and one long name for
# each property. However, in the future additional aliases may be added.
#
# NOTE: The property value names are NOT unique across properties, especially
# with loose matches. For example:
#
# AL means Arabic Letter for the Bidi_Class property, and
# AL means Alpha_Left for the Combining_Class property, and
# AL means Alphabetic for the Line_Break property.
#
# In addition, some property names may be the same as some property value names.
#
# The combination of property value and property name is, however, unique.
# For more information, see UTR #18: Regular Expression Guidelines

View file

@ -1,48 +0,0 @@
# This file contains aliases for property values used in the UCD.
# These names can be used for XML formats of UCD data, for regular-expression
# property tests, and other programmatic textual descriptions of Unicode data.
# For information on which properties are normative, see UCD.html.
#
# The names may be translated in appropriate environments, and additional
# aliases may be useful.
#
# FORMAT
#
# Each line describes a property value name.
# This consists of three fields, separated by semicolons.
#
# First Field: The first field describes the property for which that
# property value name is used.
# There is one special pseudo-property: "qc" stands for any quick-check property
#
# Second Field: The second field is an abbreviated name.
# If there is no abbreviated name available, the field is marked with "n/a".
#
# Third Field: The third field is a long name.
#
# In the case of ccc, there are 4 fields. The second field is numeric, third
# is abbreviated, and fourth is long.
#
# With loose matching of property names, the case distinctions, whitespace,
# and '_' are ignored.
#
# NOTE: Currently there is at most one abbreviated name and one long name for
# property value. However, in the future additional aliases may be added.
# In such a case, the first line for the property value would have
# the preferred alias for output.
#
# NOTE: The property value names are NOT unique across properties, especially
# with loose matches. For example:
#
# AL means Arabic Letter for the Bidi_Class property, and
# AL means Alpha_Left for the Combining_Class property, and
# AL means Alphabetic for the Line_Break property.
#
# In addition, some property names may be the same as some property value names.
# For example:
#
# cc means Combining_Class property, and
# cc means the General_Category property value Control (cc)
#
# The combination of property value and property name is, however, unique.
# For more information, see UTR #18: Regular Expression Guidelines

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
* $Date: 2004/02/07 01:01:14 $
* $Revision: 1.14 $
* $Date: 2004/02/18 03:09:00 $
* $Revision: 1.15 $
*
*******************************************************************************
*/
@ -138,8 +138,21 @@ public class TestData implements UCD_Types {
}
}
public static class RegexMatcher implements UnicodeProperty.Matcher {
private Matcher matcher;
public UnicodeProperty.Matcher set(String pattern) {
matcher = Pattern.compile(pattern).matcher("");
return this;
}
public boolean matches(String value) {
matcher.reset(value);
return matcher.matches();
}
}
static BagFormatter bf = new BagFormatter();
static UnicodeProperty.Matcher matcher = new ICUPropertyFactory.RegexMatcher();
static UnicodeProperty.Matcher matcher = new RegexMatcher();
private static void showPropDiff(String p1, UnicodeSet s1, String p2, UnicodeSet s2) {
System.out.println("Property Listing");

View file

@ -4,6 +4,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.TreeSet;
@ -13,7 +14,10 @@ import com.ibm.icu.lang.UCharacter;
import com.ibm.text.utility.Utility;
public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
static final boolean DEBUG = false;
private UCD ucd;
private Normalizer nfc, nfd, nfkd, nfkc;
private static boolean needAgeCache = true;
private static UCD[] ucdCache = new UCD[UCD_Types.LIMIT_AGE];
@ -29,22 +33,22 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
private ToolUnicodePropertySource(String version) {
ucd = UCD.make(version);
version = ucd.getVersion();
TreeSet names = new TreeSet();
UnifiedProperty.getAvailablePropertiesAliases(names,ucd);
Iterator it = names.iterator();
while (it.hasNext()) {
String name = (String) it.next();
//System.out.println("Name: " + name);
add(new ToolUnicodeProperty(name));
}
nfc = new Normalizer(Normalizer.NFC, ucd.getVersion());
nfd = new Normalizer(Normalizer.NFD, ucd.getVersion());
nfkc = new Normalizer(Normalizer.NFKC, ucd.getVersion());
nfkd = new Normalizer(Normalizer.NFKD, ucd.getVersion());
version = ucd.getVersion(); // regularize
// first the special cases
if (DEBUG) System.out.println("Adding Simple Cases");
add(new UnicodeProperty.SimpleProperty() {
public String _getValue(int codepoint) {
if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
return ucd.getName(codepoint);
}
}.setMain("Name", "na", UnicodeProperty.STRING, version)
}.setMain("Name", "na", UnicodeProperty.MISC, version)
.setValues("<string>"));
add(new UnicodeProperty.SimpleProperty() {
@ -58,7 +62,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
protected UnicodeMap _getUnicodeMap() {
return ucd.blockData;
}
}.setMain("Block", "blk", UnicodeProperty.ENUMERATED, version)
}.setMain("Block", "blk", UnicodeProperty.CATALOG, version)
.setValues(ucd.getBlockNames(null)));
add(new UnicodeProperty.SimpleProperty() {
@ -83,9 +87,125 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
if (Double.isNaN(num)) return null;
return Double.toString(num);
}
}.setMain("Numeric_Value", "nv", UnicodeProperty.NUMERIC, version)
}.setMain("Numeric_Value", "nv", UnicodeProperty.NUMERIC, version));
add(new UnicodeProperty.SimpleProperty() {
public String _getValue(int cp) {
if (!ucd.isRepresented(cp)) return null;
String b = nfkc.normalize(ucd.getCase(cp, UCD_Types.FULL, UCD_Types.FOLD));
String c = nfkc.normalize(ucd.getCase(b, UCD_Types.FULL, UCD_Types.FOLD));
if (c.equals(b)) return null;
return c;
}
public int getMaxWidth(boolean isShort) {
return 14;
}
}.setMain("FC_NFKC_Closure", "FNC", UnicodeProperty.STRING, version)
.addName("FC_NFKC"));
add(new UnicodeProperty.SimpleProperty() {
public String _getValue(int codepoint) {
if (!nfd.isNormalized(codepoint)) return "No";
else if (nfd.isTrailing(codepoint)) throw new IllegalArgumentException("Internal Error!");
else return "Yes";
}
public int getMaxWidth(boolean isShort) {
return 15;
}
}.setMain("NFD_Quick_Check", "NFD_QC", UnicodeProperty.ENUMERATED, version)
.setValues(LONG_YES_NO, YES_NO));
add(new UnicodeProperty.SimpleProperty() {
public String _getValue(int codepoint) {
if (!nfc.isNormalized(codepoint)) return "No";
else if (nfc.isTrailing(codepoint)) return "Maybe";
else return "Yes";
}
public int getMaxWidth(boolean isShort) {
return 15;
}
}.setMain("NFC_Quick_Check", "NFC_QC", UnicodeProperty.ENUMERATED, version)
.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE));
add(new UnicodeProperty.SimpleProperty() {
public String _getValue(int codepoint) {
if (!nfkd.isNormalized(codepoint)) return "No";
else if (nfkd.isTrailing(codepoint)) throw new IllegalArgumentException("Internal Error!");
else return "Yes";
}
public int getMaxWidth(boolean isShort) {
return 15;
}
}.setMain("NFKD_Quick_Check", "NFKD_QC", UnicodeProperty.ENUMERATED, version)
.setValues(LONG_YES_NO, YES_NO));
add(new UnicodeProperty.SimpleProperty() {
public String _getValue(int codepoint) {
if (!nfkc.isNormalized(codepoint)) return "No";
else if (nfkc.isTrailing(codepoint)) return "Maybe";
else return "Yes";
}
public int getMaxWidth(boolean isShort) {
return 15;
}
}.setMain("NFKC_Quick_Check", "NFKC_QC", UnicodeProperty.ENUMERATED, version)
.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE));
/*
add(new UnicodeProperty.SimpleProperty() {
public String _getValue(int codepoint) {
if (!nfx.isNormalized(codepoint)) return NO;
else if (nfx.isTrailing(codepoint)) return MAYBE;
else return "";
}
}.setMain("NFD_QuickCheck", "nv", UnicodeProperty.NUMERIC, version)
.setValues("<number>"));
*/
// Now the derived properties
if (DEBUG) System.out.println("Derived Properties");
for (int i = 0; i < DerivedProperty.DERIVED_PROPERTY_LIMIT; ++i) {
UCDProperty prop = DerivedProperty.make(i);
if (prop == null) continue;
if (!prop.isStandard()) continue;
String name = prop.getName();
if (getProperty(name) != null) {
if (DEBUG) System.out.println("Iterated Names: " + name + ", ALREADY PRESENT*");
continue; // skip if already there
}
int type = prop.getValueType();
if (i == UCD_Types.FC_NFKC_Closure) type = UnicodeProperty.STRING;
else if (i == UCD_Types.FullCompExclusion) type = UnicodeProperty.BINARY;
else type = remapUCDType(type);
if (DEBUG) System.out.println(prop.getName());
add(new UCDPropertyWrapper(prop,type,false));
}
// then the general stuff
if (DEBUG) System.out.println("Other Properties");
List names = new ArrayList();
UnifiedProperty.getAvailablePropertiesAliases(names,ucd);
Iterator it = names.iterator();
while (it.hasNext()) {
String name = (String) it.next();
if (getProperty(name) != null) {
if (DEBUG) System.out.println("Iterated Names: " + name + ", ALREADY PRESENT");
continue; // skip if already there
}
if (DEBUG) System.out.println("Iterated Names: " + name);
add(new ToolUnicodeProperty(name));
}
}
static String[] YES_NO_MAYBE = {"N", "M", "Y"};
static String[] LONG_YES_NO_MAYBE = {"No", "Maybe", "Yes"};
static String[] YES_NO = {"N", "Y"};
static String[] LONG_YES_NO = {"No", "Yes"};
/*
"Bidi_Mirroring_Glyph", "Block", "Case_Folding", "Case_Sensitive", "ISO_Comment",
"Lowercase_Mapping", "Name", "Numeric_Value", "Simple_Case_Folding",
@ -105,6 +225,57 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
}
}
*/
static class UCDPropertyWrapper extends UnicodeProperty {
UCDProperty ucdProperty;
boolean yes_no_maybe;
UCDPropertyWrapper(UCDProperty ucdProperty, int type, boolean yes_no_maybe) {
this.ucdProperty = ucdProperty;
setType(type);
String name = ucdProperty.getName(UCDProperty.LONG);
if (name == null) ucdProperty.getName(UCDProperty.SHORT);
setName(name);
this.yes_no_maybe = yes_no_maybe;
}
protected String _getVersion() {
return ucdProperty.getUCD().getVersion();
}
protected String _getValue(int codepoint) {
return ucdProperty.getValue(codepoint, UCDProperty.LONG);
}
protected List _getNameAliases(List result) {
addUnique(ucdProperty.getName(UCDProperty.SHORT), result);
addUnique(getName(), result);
return result;
}
protected List _getValueAliases(String valueAlias, List result) {
if (isType(BINARY_MASK)) {
if (valueAlias.equals("True")) addUnique("T", result);
else if (valueAlias.equals("False")) addUnique("F", result);
addUnique(valueAlias, result);
}
if (yes_no_maybe) {
if (valueAlias.equals("Yes")) addUnique("Y", result);
else if (valueAlias.equals("No")) addUnique("N", result);
else if (valueAlias.equals("Maybe")) addUnique("M", result);
addUnique(valueAlias, result);
}
return result;
}
protected List _getAvailableValues(List result) {
if (isType(BINARY_MASK)) {
addUnique("True", result);
addUnique("False", result);
}
if (yes_no_maybe) {
addUnique("No",result);
addUnique("Maybe",result);
addUnique("Yes",result);
}
return result;
}
}
static final int ODD_BALLS = (1<<UCD_Types.Cn) | (1<<UCD_Types.Co) | (1<<UCD_Types.Cs) | (1<<UCD.Cc);
/* (non-Javadoc)
@ -119,30 +290,37 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
private ToolUnicodeProperty(String propertyAlias) {
propMask = UnifiedProperty.getPropmask(propertyAlias, ucd);
up = UnifiedProperty.make(propMask, ucd);
if (up == null) throw new IllegalArgumentException("Not found: " + propertyAlias);
if (propertyAlias.equals("Case_Fold_Turkish_I")) {
System.out.println(propertyAlias + " " + getTypeName(getType()));
}
setType(getPropertyTypeInternal());
setName(propertyAlias);
}
public Collection _getAvailableValueAliases(Collection result) {
public List _getAvailableValues(List result) {
if (result == null) result = new ArrayList();
int type = getType() & ~EXTENDED_BIT;
if (type == STRING) result.add("<string>");
else if (type == NUMERIC) result.add("<string>");
int type = getType() & CORE_MASK;
if (type == STRING || type == MISC) result.add("<string>");
else if (type == NUMERIC) result.add("<number>");
else if (type == BINARY) {
result.add("True");
result.add("False");
} else if (type == ENUMERATED) {
} else if (type == ENUMERATED || type == CATALOG) {
byte style = UCD_Types.LONG;
int prop = propMask>>8;
String temp = null;
boolean titlecase = false;
for (int i = 0; i < 256; ++i) {
try {
boolean check = false;
try {
switch (prop) {
case UCD_Types.CATEGORY>>8: temp = (ucd.getCategoryID_fromIndex((byte)i, style)); break;
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex((byte)i, style)); break;
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex((short)i, style)); break;
case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex((byte)i, style)); break;
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex((byte)i, style)); break;
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex((byte)i, style));
check = temp != null;
break;
case UCD_Types.NUMERIC_TYPE>>8: temp = (ucd.getNumericTypeID_fromIndex((byte)i, style));
titlecase = true;
break;
@ -151,7 +329,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
case UCD_Types.JOINING_TYPE>>8: temp = (ucd.getJoiningTypeID_fromIndex((byte)i, style)); break;
case UCD_Types.JOINING_GROUP>>8: temp = (ucd.getJoiningGroupID_fromIndex((byte)i, style)); break;
case UCD_Types.SCRIPT>>8: temp = (ucd.getScriptID_fromIndex((byte)i, style)); titlecase = true;
if ("<unused>".equals(temp)) continue;
if (UnicodeProperty.UNUSED.equals(temp)) continue;
if (temp != null) temp = UCharacter.toTitleCase(Locale.ENGLISH,temp,null);
break;
case UCD_Types.AGE>>8: temp = (ucd.getAgeID_fromIndex((byte)i, style)); break;
@ -162,7 +340,11 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
} catch (ArrayIndexOutOfBoundsException e) {
continue;
}
if (temp != null && temp.length() != 0) result.add(Utility.getUnskeleton(temp, titlecase));
if (check) System.out.println("Value: " + temp);
if (temp != null && temp.length() != 0 && !temp.equals(UNUSED)) {
result.add(Utility.getUnskeleton(temp, titlecase));
}
if (check) System.out.println("Value2: " + temp);
}
//if (prop == (UCD_Types.DECOMPOSITION_TYPE>>8)) result.add("none");
//if (prop == (UCD_Types.JOINING_TYPE>>8)) result.add("Non_Joining");
@ -171,24 +353,22 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
return result;
}
public Collection _getAliases(Collection result) {
public List _getNameAliases(List result) {
if (result == null) result = new ArrayList();
String longName = up.getName(UCD_Types.LONG);
addUnique(Utility.getUnskeleton(longName, true), result);
String shortName = up.getName(UCD_Types.SHORT);
addUnique(Utility.getUnskeleton(shortName, false), result);
addUnique(Utility.getUnskeleton(up.getName(UCD_Types.SHORT), false), result);
addUnique(Utility.getUnskeleton(up.getName(UCD_Types.LONG), true), result);
return result;
}
public Collection _getValueAliases(String valueAlias, Collection result) {
public List _getValueAliases(String valueAlias, List result) {
if (result == null) result = new ArrayList();
int type = getType() & ~EXTENDED_BIT;
if (type == STRING) return result;
int type = getType() & CORE_MASK;
if (type == STRING || type == MISC) return result;
else if (type == NUMERIC) return result;
else if (type == BINARY) {
UnicodeProperty.addUnique(valueAlias, result);
return lookup(valueAlias, UCD_Names.YN_TABLE_LONG, UCD_Names.YN_TABLE, result);
} else if (type == ENUMERATED) {
} else if (type == ENUMERATED || type == CATALOG) {
byte style = UCD_Types.LONG;
int prop = propMask>>8;
boolean titlecase = false;
@ -198,7 +378,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
case UCD_Types.CATEGORY>>8:
return lookup(valueAlias, UCD_Names.LONG_GENERAL_CATEGORY, UCD_Names.GENERAL_CATEGORY, result);
case UCD_Types.COMBINING_CLASS>>8:
addUnique(""+i, result);
addUnique(String.valueOf(0xFF&Utility.lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, true)), result);
return lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, UCD_Names.COMBINING_CLASS, result);
case UCD_Types.BIDI_CLASS>>8:
return lookup(valueAlias, UCD_Names.LONG_BIDI_CLASS, UCD_Names.BIDI_CLASS, result);
@ -262,7 +442,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
temp = (ucd.getHangulSyllableTypeID_fromIndex(ucd.getHangulSyllableType(codepoint),style)); break;
}
if (temp != null) return Utility.getUnskeleton(temp,titlecase);
if (getType() == BINARY) {
if (isType(BINARY_MASK)) {
return up.hasValue(codepoint) ? "True" : "False";
}
return "<unknown>";
@ -285,23 +465,21 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
* @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
*/
private int getPropertyTypeInternal() {
int result = 0;
String name = up.getName(UCD_Types.LONG);
if ("Age".equals(name)) return ENUMERATED;
switch (up.getValueType()) {
case UCD_Types.NUMERIC_PROP: result = NUMERIC; break;
case UCD_Types.STRING_PROP: result = STRING; break;
case UCD_Types.MISC_PROP: result = STRING; break;
case UCD_Types.CATALOG_PROP: result = ENUMERATED; break;
case UCD_Types.FLATTENED_BINARY_PROP:
case UCD_Types.ENUMERATED_PROP: result = ENUMERATED; break;
case UCD_Types.BINARY_PROP: result = BINARY; break;
case UCD_Types.UNKNOWN_PROP:
default:
throw new IllegalArgumentException("Type: UNKNOWN_PROP");
switch(propMask) {
case UCD_Types.BINARY_PROPERTIES | UCD_Types.CaseFoldTurkishI:
case UCD_Types.BINARY_PROPERTIES | UCD_Types.Non_break:
return EXTENDED_BINARY;
}
if (!up.isStandard()) result |= EXTENDED_BIT;
return result;
switch(propMask>>8) {
case UCD_Types.SCRIPT>>8:
case UCD_Types.AGE>>8:
return CATALOG;
}
int mask = 0;
if (!up.isStandard()) mask = EXTENDED_MASK;
return remapUCDType(up.getValueType()) | mask;
}
public String _getVersion() {
@ -309,13 +487,66 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
}
}
static Collection lookup(String valueAlias, String[] main, String[] aux, Collection result) {
//System.out.println(valueAlias + "=>");
int pos = 0xFF & Utility.lookup(valueAlias, main, true);
//System.out.println("=>" + aux[pos]);
UnicodeProperty.addUnique(valueAlias, result);
if (aux == null) return result;
return UnicodeProperty.addUnique(aux[pos], result);
private int remapUCDType(int result) {
switch (result) {
case UCD_Types.NUMERIC_PROP: result = UnicodeProperty.NUMERIC; break;
case UCD_Types.STRING_PROP: result = UnicodeProperty.STRING; break;
case UCD_Types.MISC_PROP: result = UnicodeProperty.STRING; break;
case UCD_Types.CATALOG_PROP: result = UnicodeProperty.ENUMERATED; break;
case UCD_Types.FLATTENED_BINARY_PROP:
case UCD_Types.ENUMERATED_PROP: result = UnicodeProperty.ENUMERATED; break;
case UCD_Types.BINARY_PROP: result = UnicodeProperty.BINARY; break;
case UCD_Types.UNKNOWN_PROP:
default:
result = UnicodeProperty.STRING;
//throw new IllegalArgumentException("Type: UNKNOWN_PROP");
}
return result;
}
static List lookup(String valueAlias, String[] main, String[] aux, List result) {
//System.out.println(valueAlias + "=>");
//System.out.println("=>" + aux[pos]);
if (aux != null) {
int pos = 0xFF & Utility.lookup(valueAlias, main, true);
UnicodeProperty.addUnique(aux[pos], result);
}
return (List) UnicodeProperty.addUnique(valueAlias, result);
}
/*
static class DerivedPropertyWrapper extends UnicodeProperty {
UCDProperty derivedProperty;
UCD ucd;
DerivedPropertyWrapper(int derivedPropertyID, UCD ucd) {
this.ucd = ucd;
derivedProperty = DerivedProperty.make(derivedPropertyID, ucd);
}
protected String _getVersion() {
return ucd.getVersion();
}
protected String _getValue(int codepoint) {
return derivedProperty.getValue(codepoint, UCD_Types.LONG);
}
protected List _getNameAliases(List result) {
if (result != null) result = new ArrayList(1);
addUnique(derivedProperty.getName(UCD_Types.SHORT), result);
addUnique(derivedProperty.getName(UCD_Types.LONG), result);
return null;
}
protected List _getValueAliases(String valueAlias, List result) {
// TODO Auto-generated method stub
return null;
}
protected List _getAvailableValues(List result) {
// TODO Auto-generated method stub
return null;
}
}
*/
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
* $Date: 2004/02/12 08:23:16 $
* $Revision: 1.31 $
* $Date: 2004/02/18 03:09:01 $
* $Revision: 1.32 $
*
*******************************************************************************
*/
@ -865,10 +865,17 @@ public final class UCD implements UCD_Types {
}
static String getCombiningClassID_fromIndex (short index, byte style) {
return index < 0
|| index >= UCD_Names.COMBINING_CLASS.length
? null
: style == SHORT
? UCD_Names.COMBINING_CLASS[index]
: UCD_Names.LONG_COMBINING_CLASS[index];
/*
if (index > 255) return null;
index &= 0xFF;
if (style == NORMAL || style == NUMBER) return String.valueOf(index);
String s = "Fixed";
String s = "";
switch (index) {
case 0: s = style < LONG ? "NR" : "NotReordered"; break;
case 1: s = style < LONG ? "OV" : "Overlay"; break;
@ -894,9 +901,10 @@ public final class UCD implements UCD_Types {
case 233: s = style < LONG ? "DB" : "DoubleBelow"; break;
case 234: s = style < LONG ? "DA" : "DoubleAbove"; break;
case 240: s = style < LONG ? "IS" : "IotaSubscript"; break;
default: s += "_" + index;
default: s += "" + index;
}
return s;
*/
}
@ -1309,6 +1317,7 @@ to guarantee identifier closure.
isRemapped = true;
result.name = null; // clean this up, since we reuse UNASSIGNED
result.shortName = null;
result.decompositionType = NONE;
if (fixStrings) {
constructedName = "<reserved-" + Utility.hex(codePoint, 4) + ">";
//result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
@ -1570,13 +1579,13 @@ to guarantee identifier closure.
if (blockData == null) loadBlocks();
return (String)blockData.getValue(codePoint);
}
public Collection getBlockNames() {
public List getBlockNames() {
return getBlockNames(null);
}
public Collection getBlockNames(Collection result) {
public List getBlockNames(List result) {
if (result == null) result = new ArrayList();
if (blockData == null) loadBlocks();
return blockData.getAvailableValues(result);
return (List)blockData.getAvailableValues(result);
}
public UnicodeSet getBlockSet(String value, UnicodeSet result) {
if (result == null) result = new UnicodeSet();

View file

@ -70,7 +70,7 @@ public abstract class UCDProperty implements UCD_Types {
* Get the full name. Style is SHORT, NORMAL, LONG
*/
public String getFullName(byte style) {
return getProperty(style) + "=" + getValue(style);
return getPropertyName(style) + "=" + getValue(style);
}
public String getFullName() {
@ -79,7 +79,7 @@ public abstract class UCDProperty implements UCD_Types {
/**
* Get the property name. Style is SHORT, NORMAL, LONG
*/
public String getProperty(byte style) {
public String getPropertyName(byte style) {
if (style == NORMAL) style = defaultPropertyStyle;
switch (style) {
case LONG: return skeletonize ? Utility.getUnskeleton(name.toString(), false) : name.toString();
@ -89,9 +89,9 @@ public abstract class UCDProperty implements UCD_Types {
}
}
public String getProperty() { return getProperty(NORMAL); }
public String getPropertyName() { return getPropertyName(NORMAL); }
public void setProperty(byte style, String in) {
public void setPropertyName(byte style, String in) {
if (style == NORMAL) style = defaultPropertyStyle;
switch (style) {
case LONG: name = Utility.getUnskeleton(in, false); break;
@ -145,7 +145,7 @@ public abstract class UCDProperty implements UCD_Types {
*/
public String getListingValue(int cp) {
if (getValueType() != BINARY_PROP) return getValue(cp, LONG);
return getProperty(LONG);
return getPropertyName(LONG);
}
/**
@ -173,8 +173,8 @@ public abstract class UCDProperty implements UCD_Types {
// Old Name for compatibility
boolean isTest() { return isStandard(); }
String getName(byte style) { return getProperty(style); }
String getName() { return getProperty(); }
String getName(byte style) { return getPropertyName(style); }
String getName() { return getPropertyName(); }
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
* $Date: 2004/02/12 08:23:17 $
* $Revision: 1.25 $
* $Date: 2004/02/18 03:09:01 $
* $Revision: 1.26 $
*
*******************************************************************************
*/
@ -15,6 +15,7 @@ package com.ibm.text.UCD;
import java.util.Locale;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.text.utility.*;
@ -297,7 +298,7 @@ final class UCD_Names implements UCD_Types {
"TIBETAN", // TIBETAN
"MYANMAR", // MYANMAR
"GEORGIAN", // GEORGIAN
"<unused>", // JAMO -- NOT SEPARATED FROM HANGUL IN 15924
UnicodeProperty.UNUSED, // JAMO -- NOT SEPARATED FROM HANGUL IN 15924
"HANGUL", // HANGUL
"ETHIOPIC", // ETHIOPIC
"CHEROKEE", // CHEROKEE
@ -355,7 +356,7 @@ final class UCD_Names implements UCD_Types {
"Tibt", // TIBETAN
"Mymr", // MYANMAR
"Geor", // GEORGIAN
"<unused>", // JAMO -- NOT SEPARATED FROM HANGUL IN 15924
UnicodeProperty.UNUSED, // JAMO -- NOT SEPARATED FROM HANGUL IN 15924
"Hang", // HANGUL
"Ethi", // ETHIOPIC
"Cher", // CHEROKEE
@ -434,7 +435,7 @@ final class UCD_Names implements UCD_Types {
"Cc", // = Other, Control 15
"Cf", // = Other, Format 16
"<unused>", // missing
UnicodeProperty.UNUSED, // missing
"Co", // = Other, Private Use 18
"Cs", // = Other, Surrogate 19
@ -477,7 +478,7 @@ final class UCD_Names implements UCD_Types {
"Control", // = Other, Control 15
"Format", // = Other, Format 16
"<unused>", // missing
UnicodeProperty.UNUSED, // missing
"PrivateUse", // = Other, Private Use 18
"Surrogate", // = Other, Surrogate 19
@ -522,7 +523,7 @@ final class UCD_Names implements UCD_Types {
"S", // Segment Separator
"WS", // Whitespace
"ON", // Other Neutrals ; All other characters: punctuation, symbols
"<unused>", "BN", "NSM", "AL", "LRO", "RLO", "LRE", "RLE", "PDF"
UnicodeProperty.UNUSED, "BN", "NSM", "AL", "LRO", "RLO", "LRE", "RLE", "PDF"
};
static String[] LONG_BIDI_CLASS = {
@ -537,7 +538,7 @@ final class UCD_Names implements UCD_Types {
"SegmentSeparator", // Segment Separator
"WhiteSpace", // Whitespace
"OtherNeutral", // Other Neutrals ; All other characters: punctuation, symbols
"<unused>",
UnicodeProperty.UNUSED,
"BoundaryNeutral", "NonspacingMark", "ArabicLetter",
"LeftToRightOverride",
"RightToLeftOverride", "LeftToRightEmbedding",
@ -590,7 +591,7 @@ final class UCD_Names implements UCD_Types {
};
static {
fixArray(LONG_DECOMPOSITION_TYPE);
fixArray(DECOMPOSITION_TYPE);
//fixArray(DECOMPOSITION_TYPE);
}
@ -659,7 +660,7 @@ final class UCD_Names implements UCD_Types {
case 233: s = style < LONG ? "DB" : "DoubleBelow"; break;
case 234: s = style < LONG ? "DA" : "DoubleAbove"; break;
case 240: s = style < LONG ? "IS" : "IotaSubscript"; break;
default: s = style < LONG ? "" + index : "Fixed_" + index;
default: s = "" + index;
}
if (style < LONG) COMBINING_CLASS[index] = s;
else LONG_COMBINING_CLASS[index] = s;
@ -812,6 +813,13 @@ final class UCD_Names implements UCD_Types {
}
}
static void titlecase (String[] array) {
for (int i = 0; i < array.length; ++i) {
array[i] = array[1].substring(0,1).toUpperCase()
+ array[i].substring(1);
}
}
public static String[] OLD_JOINING_GROUP = {
"<no shaping>",
"AIN",

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
* $Date: 2004/02/06 18:30:19 $
* $Revision: 1.26 $
* $Date: 2004/02/18 03:09:01 $
* $Revision: 1.27 $
*
*******************************************************************************
*/
@ -15,7 +15,7 @@ package com.ibm.text.UCD;
public interface UCD_Types {
public static final int dVersion = 5; // change to fix the generated file D version. If less than zero, no "d"
public static final int dVersion = 6; // change to fix the generated file D version. If less than zero, no "d"
static final byte BINARY_FORMAT = 14; // bumped if binary format of UCD changes. Forces rebuild

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $
* $Date: 2004/02/07 01:01:13 $
* $Revision: 1.16 $
* $Date: 2004/02/18 03:09:01 $
* $Revision: 1.17 $
*
*******************************************************************************
*/
@ -63,9 +63,9 @@ public final class UnifiedBinaryProperty extends UCDProperty {
if (!up.isStandard()) continue;
if (up.getValueType() < BINARY_PROP) continue;
String shortValue = Utility.getSkeleton(up.getValue(SHORT));
String shortName = Utility.getSkeleton(up.getProperty(SHORT));
String shortName = Utility.getSkeleton(up.getPropertyName(SHORT));
String longValue = Utility.getSkeleton(up.getValue(LONG));
String longName = Utility.getSkeleton(up.getProperty(LONG));
String longName = Utility.getSkeleton(up.getPropertyName(LONG));
Integer result = new Integer(i);
propNameCache.put(longName + "=" + longValue, result);
propNameCache.put(longName + "=" + shortValue, result);
@ -313,8 +313,8 @@ public final class UnifiedBinaryProperty extends UCDProperty {
public String getFullName(byte style) {
String pre = "";
/*if ((majorProp) != BINARY_PROPERTIES>>8)*/ {
String preShort = getProperty(SHORT) + "=";
String preLong = getProperty(LONG) + "=";
String preShort = getPropertyName(SHORT) + "=";
String preLong = getPropertyName(LONG) + "=";
if (style < LONG) pre = preShort;
else if (style == LONG || preShort.equals(preLong)) pre = preLong;
else pre = preShort + "(" + preLong + ")";

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedProperty.java,v $
* $Date: 2004/02/07 01:01:12 $
* $Revision: 1.6 $
* $Date: 2004/02/18 03:09:02 $
* $Revision: 1.7 $
*
*******************************************************************************
*/
@ -27,10 +27,6 @@ public final class UnifiedProperty extends UCDProperty {
}
public static UCDProperty make(int propMask, UCD ucd) {
if (propMask == AGE) {
System.out.println();
}
if ((propMask & 0xFF00) == (BINARY_PROPERTIES & 0xFF00)) {
return UnifiedBinaryProperty.make(propMask, ucd);
}
@ -81,25 +77,29 @@ public final class UnifiedProperty extends UCDProperty {
}
private static void cacheNames(UCD ucd) {
System.out.println("Caching Property Names");
//System.out.println("Caching Property Names");
propNameCache = new HashMap();
for (int i = 0; i < LIMIT_ENUM; ++i) {
UCDProperty up = UnifiedProperty.make(i, ucd);
if (up == null) continue;
if (!up.isStandard()) continue;
if (up.getValueType() < BINARY_PROP) continue;
String shortRaw = up.getProperty(SHORT);
String shortName = Utility.getSkeleton(shortRaw);
String longRaw = up.getProperty(LONG);
String longName = Utility.getSkeleton(longRaw);
//if (up.getValueType() < BINARY_PROP) continue;
Integer result = new Integer(i);
if (!propNameCache.keySet().contains(longName)) propNameCache.put(longName, result);
if (!propNameCache.keySet().contains(shortName)) propNameCache.put(shortName, result);
String longRaw = up.getPropertyName(LONG);
String longName = Utility.getSkeleton(longRaw);
String shortRaw = up.getPropertyName(SHORT);
String shortName = Utility.getSkeleton(shortRaw);
//System.out.println("Caching Names: " + longRaw + ", " + shortRaw);
if (longName != null && !propNameCache.keySet().contains(longName)) propNameCache.put(longName, result);
if (shortName != null && !propNameCache.keySet().contains(shortName)) propNameCache.put(shortName, result);
String key = longRaw != null ? longRaw : shortRaw;
availablePropNames.add(key);
}
System.out.println("Done Caching");
//System.out.println("Done Caching");
}
static Map cache = new HashMap();
@ -185,8 +185,8 @@ public final class UnifiedProperty extends UCDProperty {
public String getFullName(byte style) {
String pre = "";
String preShort = getProperty(SHORT);
String preLong = getProperty(LONG);
String preShort = getPropertyName(SHORT);
String preLong = getPropertyName(LONG);
if (style < LONG) pre = preShort;
else if (style == LONG || preShort.equals(preLong)) pre = preLong;
else pre = preShort + "(" + preLong + ")";

View file

@ -37,6 +37,7 @@ public class UnicodeDataFile {
result.out.println("# if they have default property values.");
result.out.println("# ================================================");
}
return result;
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2004/02/12 08:23:14 $
* $Revision: 1.39 $
* $Date: 2004/02/18 03:09:02 $
* $Revision: 1.40 $
*
*******************************************************************************
*/
@ -22,6 +22,7 @@ import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.Replaceable;
import com.ibm.icu.text.ReplaceableString;
import com.ibm.icu.text.UnicodeMatcher;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.text.UCD.*;
@ -119,6 +120,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
*/
public static String getSkeleton(String source) {
return UnicodeProperty.toSkeleton(source);
/*
skeletonBuffer.setLength(0);
boolean gotOne = false;
// remove spaces, '_', '-'
@ -139,6 +142,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
}
if (!gotOne) return source; // avoid string creation
return skeletonBuffer.toString();
*/
}
private static StringBuffer skeletonBuffer = new StringBuffer();
@ -149,6 +153,8 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
*/
public static String getUnskeleton(String source, boolean titlecaseStart) {
return UnicodeProperty.regularize(source, titlecaseStart);
/*
if (source == null) return source;
if (source.equals("noBreak")) return source; // HACK
StringBuffer result = new StringBuffer();
@ -176,6 +182,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
lastCat = cat;
}
return result.toString();
*/
}
public static String findSubstring(String source, Set target, boolean invert) {