ICU-8807 Add internal changes for use in unicode tools. Adds internal API, but doesn't change behavior if that is not called.

X-SVN-Rev: 30638
This commit is contained in:
Mark Davis 2011-09-08 22:28:40 +00:00
parent 1f56fddafb
commit e3546c39d6
12 changed files with 268 additions and 53 deletions

4
.gitattributes vendored
View file

@ -714,6 +714,10 @@ icu4j/main/tests/translit/.externalToolBuilders/copy-translit-test-data.launch -
icu4j/main/tests/translit/.settings/org.eclipse.core.resources.prefs -text
icu4j/main/tests/translit/.settings/org.eclipse.jdt.core.prefs -text
icu4j/main/tests/translit/.settings/org.eclipse.jdt.ui.prefs -text
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java -text
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java -text
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java -text
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java -text
icu4j/main/tests/translit/translit-tests-build.launch -text
icu4j/manifest.stub -text
icu4j/tools/build/.settings/org.eclipse.core.resources.prefs -text

View file

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 2004-2010, International Business Machines Corporation and *
* Copyright (C) 2004-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -46,7 +46,7 @@ public class ImplicitCEGenerator {
// 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
// 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
CJK_BASE = 0x4E00,
CJK_LIMIT = 0x9FCB+1,
CJK_LIMIT = 0x9FCC+1,
CJK_COMPAT_USED_BASE = 0xFA0E,
CJK_COMPAT_USED_LIMIT = 0xFA2F+1,

View file

@ -1,6 +1,6 @@
/*
********************************************************************************
* Copyright (C) 2009-2010, Google, International Business Machines Corporation *
* Copyright (C) 2009-2011, Google, International Business Machines Corporation *
* and others. All Rights Reserved. *
********************************************************************************
*/
@ -24,6 +24,7 @@ import java.util.TreeMap;
import java.util.regex.Pattern;
import com.ibm.icu.text.StringTransform;
import com.ibm.icu.text.SymbolTable;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Freezable;
@ -37,6 +38,26 @@ public class UnicodeRegex implements Cloneable, Freezable<UnicodeRegex>, StringT
// Note: we don't currently have any state, but intend to in the future,
// particularly for the regex style supported.
private SymbolTable symbolTable;
private ParsePosition parsePosition = new ParsePosition(0);
/**
* Set the symbol table for internal processing
* @internal
*/
public SymbolTable getSymbolTable() {
return symbolTable;
}
/**
* Get the symbol table for internal processing
* @internal
*/
public UnicodeRegex setSymbolTable(SymbolTable symbolTable) {
this.symbolTable = symbolTable;
return this;
}
/**
* Adds full Unicode property support, with the latest version of Unicode,
* to Java Regex, bringing it up to Level 1 (see
@ -185,12 +206,12 @@ public class UnicodeRegex implements Cloneable, Freezable<UnicodeRegex>, StringT
// brute force replacement; do twice to allow for different order
// later on can optimize
for (int i = 0; i < 2; ++i) {
for (Iterator<String> it = variables.keySet().iterator(); it.hasNext();) {
String variable = it.next();
for (String variable : variables.keySet()) {
String definition = variables.get(variable);
for (Iterator<String> it2 = variables.keySet().iterator(); it2.hasNext();) {
String variable2 = it2.next();
if (variable.equals(variable2)) continue;
for (String variable2 : variables.keySet()) {
if (variable.equals(variable2)) {
continue;
}
String definition2 = variables.get(variable2);
String altered2 = definition2.replace(variable, definition);
if (!altered2.equals(definition2)) {
@ -303,7 +324,7 @@ public class UnicodeRegex implements Cloneable, Freezable<UnicodeRegex>, StringT
private int processSet(String regex, int i, StringBuilder result, UnicodeSet temp, ParsePosition pos) {
try {
pos.setIndex(i);
UnicodeSet x = temp.clear().applyPattern(regex, pos, null, 0);
UnicodeSet x = temp.clear().applyPattern(regex, pos, symbolTable, 0);
x.complement().complement(); // hack to fix toPattern
result.append(x.toPattern(false));
i = pos.getIndex() - 1; // allow for the loop increment
@ -335,8 +356,7 @@ public class UnicodeRegex implements Cloneable, Freezable<UnicodeRegex>, StringT
String variable = null;
StringBuffer definition = new StringBuffer();
int count = 0;
for (Iterator<String> it = lines.iterator(); it.hasNext();) {
String line = it.next();
for (String line : lines) {
++count;
// remove initial bom, comments
if (line.length() == 0) continue;

View file

@ -285,6 +285,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
* @provisional This API might change or be removed in a future release.
*/
public static final UnicodeSet ALL_CODE_POINTS = new UnicodeSet(0, 0x10FFFF).freeze();
private static XSymbolTable XSYMBOL_TABLE = null; // for overriding the the function processing
private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
@ -3282,7 +3284,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
public UnicodeSet applyPropertyAlias(String propertyAlias, String valueAlias) {
return applyPropertyAlias(propertyAlias, valueAlias, null);
}
/**
* Modifies this set to contain those code points which have the
* given value for the given property. Prior contents of this
@ -3306,6 +3308,12 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
&& ((XSymbolTable)symbols).applyPropertyAlias(propertyAlias, valueAlias, this)) {
return this;
}
if (XSYMBOL_TABLE != null) {
if (XSYMBOL_TABLE.applyPropertyAlias(propertyAlias, valueAlias, this)) {
return this;
}
}
if (valueAlias.length() > 0) {
p = UCharacter.getPropertyEnum(propertyAlias);
@ -4540,5 +4548,30 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
*/
CONDITION_COUNT
}
/**
* Get the default symbol table. Null means ordinary processing. For internal use only.
* @return
* @internal
*/
public static XSymbolTable getDefaultXSymbolTable() {
return XSYMBOL_TABLE;
}
/**
* Set the default symbol table. Null means ordinary processing. For internal use only. Will affect all subsequent parsing
* of UnicodeSets.
* <p>
* WARNING: If this function is used with a {@link UnicodeProperty}, and the
* Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call
* {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}
* with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.
*
* @param xSymbolTable the new default symbol table.
* @internal
*/
public static void setDefaultXSymbolTable(XSymbolTable xSymbolTable) {
XSYMBOL_TABLE = xSymbolTable;
}
}
//eof

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1996-2010, International Business Machines Corporation and *
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -28,7 +28,8 @@ public class TestAll extends TestGroup {
"TransliteratorTest",
"RegexUtilitiesTest",
"UnicodeMapTest",
"ThreadTest"
"ThreadTest",
"TestUnicodeProperty"
});
}

View file

@ -0,0 +1 @@
/* ******************************************************************************* * Copyright (C) 2011, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.translit; import java.util.List; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.dev.test.util.ICUPropertyFactory; import com.ibm.icu.dev.test.util.UnicodeProperty; import com.ibm.icu.dev.test.util.UnicodeProperty.Factory; import com.ibm.icu.dev.test.util.UnicodePropertySymbolTable; import com.ibm.icu.text.UnicodeSet; /** * @author markdavis * */ public class TestUnicodeProperty extends TestFmwk{ public static void main(String[] args) { new TestUnicodeProperty().run(args); } static final UnicodeSet casedLetter = new UnicodeSet("[:gc=cased letter:]"); static final UnicodeSet letter = new UnicodeSet("[:gc=L:]"); public void TestBasic() { Factory factory = ICUPropertyFactory.make(); UnicodeProperty property = factory.getProperty("gc"); List values = property.getAvailableValues(); assertTrue("Values contain GC values", values.contains("Unassigned")); final UnicodeSet lu = property.getSet("Lu"); if (!assertTrue("Gc=L contains 'A'", lu.contains('A'))) { errln("Contents:\t" + lu.complement().complement().toPattern(false)); } } public void TestSymbolTable() { Factory factory = ICUPropertyFactory.make(); UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory); UnicodeSet.setDefaultXSymbolTable(upst); try { final UnicodeSet luSet = new UnicodeSet("[:gc=L:]"); assertTrue("Gc=L contains 'A'", luSet.contains('A')); assertTrue("Gc=L contains 'Z'", luSet.contains('Z')); assertFalse("Gc=L contains 'a'", luSet.contains('1')); UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]"); assertEquals("gc=lc are equal", casedLetter, casedLetter2); } finally { // restore the world UnicodeSet.setDefaultXSymbolTable(null); } } public void TestSymbolTable2() { Factory factory = new MyUnicodePropertyFactory(); UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory); UnicodeSet.setDefaultXSymbolTable(upst); try { final UnicodeSet luSet = new UnicodeSet("[:gc=L:]"); assertFalse("Gc=L contains 'A'", luSet.contains('A')); if (!assertTrue("Gc=L contains 'Z'", luSet.contains('Z'))) { errln("Contents:\t" + luSet.complement().complement().toPattern(false)); } assertFalse("Gc=L contains 'a'", luSet.contains('1')); UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]"); assertNotEquals("gc=lc should not be equal", casedLetter, casedLetter2); } finally { // restore the world UnicodeSet.setDefaultXSymbolTable(null); } } /** * For testing, override to set A-M to Cn. */ static class MyUnicodeGCProperty extends UnicodeProperty.SimpleProperty { UnicodeProperty icuProperty = ICUPropertyFactory.make().getProperty("Gc"); { setName(icuProperty.getName()); setType(icuProperty.getType()); } @Override protected String _getValue(int codepoint) { if (codepoint >= 'A' && codepoint <= 'M') { return "Unassigned"; } else { return icuProperty.getValue(codepoint); } } @Override protected List _getValueAliases(String valueAlias, List result) { return icuProperty.getValueAliases(valueAlias, result); } @Override public List _getNameAliases(List result) { return icuProperty.getNameAliases(); } } /** * For testing, override to set A-Z to Cn. */ static class MyUnicodePropertyFactory extends ICUPropertyFactory { private MyUnicodePropertyFactory() { add(new MyUnicodeGCProperty()); } } static class MyUnicodePropertySymbolTable extends UnicodePropertySymbolTable { public MyUnicodePropertySymbolTable(Factory factory) { super(factory); } } }

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2002-2010, International Business Machines Corporation and *
* Copyright (C) 2002-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -437,7 +437,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
// NFKD = UProperty.STRING_LIMIT+3
;
private ICUPropertyFactory() {
protected ICUPropertyFactory() {
Collection c = getInternalAvailablePropertyAliases(new ArrayList());
Iterator it = c.iterator();
while (it.hasNext()) {

View file

@ -0,0 +1 @@
/* ******************************************************************************* * Copyright (C) 2011, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.util; import com.ibm.icu.dev.test.util.UnicodeTransform.Type; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.Normalizer2; import com.ibm.icu.text.Normalizer2.Mode; /** * @author markdavis * */ public class IcuUnicodeNormalizerFactory implements UnicodeTransform.Factory { public UnicodeTransform getInstance(Type type) { switch (type) { case NFC: case NFKC: return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type.toString(), Mode.COMPOSE)); case NFD: case NFKD: return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type == Type.NFD ? "NFC" : "NFKC", Mode.DECOMPOSE)); case CASEFOLD: return new CaseFolder(); default: throw new IllegalArgumentException(); } } private static class CaseFolder extends UnicodeTransform { @Override public String transform(String source) { return UCharacter.foldCase(source.toString(), true); } } private static class IcuUnicodeNormalizer extends UnicodeTransform { private Normalizer2 normalizer; private IcuUnicodeNormalizer(Normalizer2 normalizer) { this.normalizer = normalizer; } public String transform(String src) { return normalizer.normalize(src); } public boolean isTransformed(String s) { return normalizer.isNormalized(s); } } }

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1996-2010, International Business Machines Corporation and *
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -277,7 +277,7 @@ public final class UnicodeMap<T> implements Cloneable, Freezable, StringTransfor
throw new UnsupportedOperationException("Attempt to modify locked object");
}
if (errorOnReset && values[baseIndex] != null) {
throw new IllegalArgumentException("Attempt to reset value for " + Utility.hex(codepoint)
throw new UnsupportedOperationException("Attempt to reset value for " + Utility.hex(codepoint)
+ " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value);
}

View file

@ -10,19 +10,25 @@ import java.io.PrintWriter;
import java.io.StringWriter;
import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeLabel;
import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.dev.test.util.CollectionUtilities.InverseMatcher;
import com.ibm.icu.dev.test.util.CollectionUtilities.ObjectMatcher;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.SymbolTable;
import com.ibm.icu.text.Transform;
import com.ibm.icu.text.UFormat;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeMatcher;
@ -31,17 +37,87 @@ import com.ibm.icu.text.UnicodeSetIterator;
public abstract class UnicodeProperty extends UnicodeLabel {
public static final UnicodeSet UNASSIGNED = new UnicodeSet("[:gc=unassigned:]").freeze();
public static final UnicodeSet NONCHARACTERS = new UnicodeSet("[:noncharactercodepoint:]").freeze();
public static final UnicodeSet PRIVATE_USE = new UnicodeSet("[:gc=privateuse:]").freeze();
public static final UnicodeSet SURROGATE = new UnicodeSet("[:gc=surrogate:]").freeze();
public static final UnicodeSet SPECIALS = new UnicodeSet(UNASSIGNED).addAll(PRIVATE_USE).addAll(SURROGATE).freeze();
public static final int SAMPLE_UNASSIGNED = UNASSIGNED.charAt(0);
public static final int SAMPLE_PRIVATE_USE = 0xE000;
public static final int SAMPLE_SURROGATE = 0xD800;
public static final UnicodeSet STUFF_TO_TEST = new UnicodeSet(SPECIALS).complement()
.add(SAMPLE_UNASSIGNED).add(SAMPLE_PRIVATE_USE).add(SAMPLE_SURROGATE).freeze();
public static final UnicodeSet STUFF_TO_TEST_WITH_UNASSIGNED = new UnicodeSet("[:any:]").freeze();
public static final UnicodeSet HIGH_SURROGATES = new UnicodeSet("[\\uD800-\\uDB7F]").freeze();
public static final int SAMPLE_HIGH_SURROGATE = HIGH_SURROGATES.charAt(0);
public static final UnicodeSet HIGH_PRIVATE_USE_SURROGATES = new UnicodeSet("[\\uDB80-\\uDBFF]").freeze();
public static final int SAMPLE_HIGH_PRIVATE_USE_SURROGATE = HIGH_PRIVATE_USE_SURROGATES.charAt(0);
public static final UnicodeSet LOW_SURROGATES = new UnicodeSet("[\\uDC00-\\uDFFF]").freeze();
public static final int SAMPLE_LOW_SURROGATE = LOW_SURROGATES.charAt(0);
public static final UnicodeSet PRIVATE_USE_AREA = new UnicodeSet("[\\uE000-\\uF8FF]").freeze();
public static final int SAMPLE_PRIVATE_USE_AREA = PRIVATE_USE_AREA.charAt(0);
public static final UnicodeSet PRIVATE_USE_AREA_A = new UnicodeSet("[\\U000F0000-\\U000FFFFD]").freeze();
public static final int SAMPLE_PRIVATE_USE_AREA_A = PRIVATE_USE_AREA_A.charAt(0);
public static final UnicodeSet PRIVATE_USE_AREA_B = new UnicodeSet("[\\U00100000-\\U0010FFFD]").freeze();
public static final int SAMPLE_PRIVATE_USE_AREA_B = PRIVATE_USE_AREA_B.charAt(0);
// The following are special. They are used for performance, but must be changed if the version of Unicode for the UnicodeProperty changes.
private static UnicodeSet UNASSIGNED;
private static int SAMPLE_UNASSIGNED;
private static UnicodeSet SPECIALS;
private static UnicodeSet STUFF_TO_TEST;
private static UnicodeSet STUFF_TO_TEST_WITH_UNASSIGNED;
public static synchronized UnicodeSet getUNASSIGNED() {
if (UNASSIGNED == null) {
UNASSIGNED = new UnicodeSet("[:gc=unassigned:]").freeze();
}
return UNASSIGNED;
}
public static synchronized int getSAMPLE_UNASSIGNED() {
if (SAMPLE_UNASSIGNED == 0) {
SAMPLE_UNASSIGNED = getUNASSIGNED().charAt(0);
}
return SAMPLE_UNASSIGNED;
}
public static synchronized UnicodeSet getSPECIALS() {
if (SPECIALS == null) {
SPECIALS = new UnicodeSet(getUNASSIGNED()).addAll(PRIVATE_USE).addAll(SURROGATE).freeze();
}
return SPECIALS;
}
public static synchronized UnicodeSet getSTUFF_TO_TEST() {
if (STUFF_TO_TEST == null) {
STUFF_TO_TEST = new UnicodeSet(getSPECIALS()).complement()
.addAll(NONCHARACTERS)
.add(getSAMPLE_UNASSIGNED())
.add(SAMPLE_HIGH_SURROGATE)
.add(SAMPLE_HIGH_PRIVATE_USE_SURROGATE)
.add(SAMPLE_LOW_SURROGATE)
.add(SAMPLE_PRIVATE_USE_AREA)
.add(SAMPLE_PRIVATE_USE_AREA_A)
.add(SAMPLE_PRIVATE_USE_AREA_B)
.freeze();
}
return STUFF_TO_TEST;
}
public static synchronized UnicodeSet getSTUFF_TO_TEST_WITH_UNASSIGNED() {
if (STUFF_TO_TEST_WITH_UNASSIGNED == null) {
STUFF_TO_TEST_WITH_UNASSIGNED = new UnicodeSet(getSTUFF_TO_TEST()).addAll(getUNASSIGNED()).freeze();
}
return STUFF_TO_TEST_WITH_UNASSIGNED;
}
/**
* Reset the cache properties. Must be done if the version of Unicode is different than the ICU one, AND any UnicodeProperty has already been instantiated.
* TODO make this a bit more robust.
* @internal
*/
public static synchronized void ResetCacheProperties() {
UNASSIGNED = null;
SAMPLE_UNASSIGNED = 0;
SPECIALS = null;
STUFF_TO_TEST = null;
STUFF_TO_TEST_WITH_UNASSIGNED = null;
}
public static boolean DEBUG = false;
@ -57,7 +133,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
private Map valueToFirstValueAlias = null;
private boolean hasUniformUnassigned = false;
private boolean hasUniformUnassigned = true;
/*
* Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name
@ -238,7 +314,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
return maxFirstValueAliasWidth;
return maxValueWidth;
}
public final UnicodeSet getSet(String propertyValue) {
return getSet(propertyValue, null);
}
@ -247,6 +323,8 @@ public abstract class UnicodeProperty extends UnicodeLabel {
return getSet(matcher, null);
}
/** Adds the property value set to the result. Clear the result first if you don't want to keep the original contents.
*/
public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
return getSet(new SimpleMatcher(propertyValue,
isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
@ -257,7 +335,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
public static final String UNUSED = "??";
public final UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
if (result == null)
result = new UnicodeSet();
boolean uniformUnassigned = hasUniformUnassigned();
@ -422,7 +500,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
}
private static UnicodeSetIterator getStuffToTest(boolean uniformUnassigned) {
return new UnicodeSetIterator(uniformUnassigned ? STUFF_TO_TEST : STUFF_TO_TEST_WITH_UNASSIGNED);
return new UnicodeSetIterator(uniformUnassigned ? getSTUFF_TO_TEST() : getSTUFF_TO_TEST_WITH_UNASSIGNED());
}
/**
@ -654,7 +732,9 @@ public abstract class UnicodeProperty extends UnicodeLabel {
Map propertyCache = new HashMap(1);
public final Factory add(UnicodeProperty sp) {
canonicalNames.put(sp.getName(), sp);
String name2 = sp.getName();
canonicalNames.put(name2, sp);
skeletonNames.put(toSkeleton(name2), sp);
List c = sp.getNameAliases(new ArrayList(1));
Iterator it = c.iterator();
while (it.hasNext()) {
@ -1178,7 +1258,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
}
public static abstract class SimpleProperty extends BaseProperty {
List values;
LinkedHashSet values;
public UnicodeProperty addName(String alias) {
propertyAliases.add(alias);
@ -1209,7 +1289,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
}
public SimpleProperty setValues(List valueAliases) {
this.values = new ArrayList(valueAliases);
this.values = new LinkedHashSet(valueAliases);
for (Iterator it = this.values.iterator(); it.hasNext();) {
_addToValues((String) it.next(), null);
}
@ -1233,7 +1313,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
private void _addToValues(String item, String alias) {
if (values == null)
values = new ArrayList(1);
values = new LinkedHashSet();
if (toValueAliases == null)
_fixValueAliases();
addUnique(item, values);
@ -1328,32 +1408,57 @@ public abstract class UnicodeProperty extends UnicodeLabel {
public static UnicodeSet addUntested(UnicodeSet result, boolean uniformUnassigned) {
if (!uniformUnassigned) return result;
if (uniformUnassigned && result.contains(UnicodeProperty.getSAMPLE_UNASSIGNED())) {
result.addAll(UnicodeProperty.getUNASSIGNED());
}
if (result.contains(UnicodeProperty.SAMPLE_HIGH_SURROGATE)) {
result.addAll(UnicodeProperty.HIGH_SURROGATES);
}
if (result.contains(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE)) {
result.addAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES);
}
if (result.contains(UnicodeProperty.SAMPLE_LOW_SURROGATE)) {
result.addAll(UnicodeProperty.LOW_SURROGATES);
}
if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA)) {
result.addAll(UnicodeProperty.PRIVATE_USE_AREA);
}
if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A)) {
result.addAll(UnicodeProperty.PRIVATE_USE_AREA_A);
}
if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B)) {
result.addAll(UnicodeProperty.PRIVATE_USE_AREA_B);
}
if (result.contains(UnicodeProperty.SAMPLE_UNASSIGNED)) {
result.addAll(UnicodeProperty.UNASSIGNED);
}
if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE)) {
result.addAll(UnicodeProperty.PRIVATE_USE);
}
if (result.contains(UnicodeProperty.SAMPLE_SURROGATE)) {
result.addAll(UnicodeProperty.SURROGATE);
}
return result;
}
public static UnicodeMap addUntested(UnicodeMap result, boolean uniformUnassigned) {
if (!uniformUnassigned) return result;
Object temp;
if (null != (temp = result.get(UnicodeProperty.SAMPLE_UNASSIGNED))) {
result.putAll(UnicodeProperty.UNASSIGNED, temp);
if (uniformUnassigned && null != (temp = result.get(UnicodeProperty.getSAMPLE_UNASSIGNED()))) {
result.putAll(UnicodeProperty.getUNASSIGNED(), temp);
}
if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE))) {
result.putAll(UnicodeProperty.PRIVATE_USE, temp);
if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_SURROGATE))) {
result.putAll(UnicodeProperty.HIGH_SURROGATES, temp);
}
if (null != (temp = result.get(UnicodeProperty.SAMPLE_SURROGATE))) {
result.putAll(UnicodeProperty.SURROGATE, temp);
if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE))) {
result.putAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES, temp);
}
if (null != (temp = result.get(UnicodeProperty.SAMPLE_LOW_SURROGATE))) {
result.putAll(UnicodeProperty.LOW_SURROGATES, temp);
}
if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA))) {
result.putAll(UnicodeProperty.PRIVATE_USE_AREA, temp);
}
if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A))) {
result.putAll(UnicodeProperty.PRIVATE_USE_AREA_A, temp);
}
if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B))) {
result.putAll(UnicodeProperty.PRIVATE_USE_AREA_B, temp);
}
return result;
}
@ -1363,7 +1468,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
if (isType(STRING_OR_MISC_MASK)) {
return equals(cp, value);
}
String defaultValue = getValue(SAMPLE_UNASSIGNED);
String defaultValue = getValue(getSAMPLE_UNASSIGNED());
return defaultValue == null ? value == null : defaultValue.equals(value);
}
@ -1374,5 +1479,53 @@ public abstract class UnicodeProperty extends UnicodeLabel {
this.hasUniformUnassigned = hasUniformUnassigned;
return this;
}
public static class UnicodeSetProperty extends BaseProperty {
protected UnicodeSet unicodeSet;
private static final String[] YESNO_ARRAY = new String[]{"Yes", "No"};
private static final List YESNO = Arrays.asList(YESNO_ARRAY);
public UnicodeSetProperty set(UnicodeSet set) {
unicodeSet = set.freeze();
return this;
}
public UnicodeSetProperty set(String string) {
// TODO Auto-generated method stub
return set(new UnicodeSet(string).freeze());
}
protected String _getValue(int codepoint) {
return YESNO_ARRAY[unicodeSet.contains(codepoint) ? 0 : 1];
}
protected List _getAvailableValues(List result) {
return YESNO;
}
}
private static class StringTransformProperty extends SimpleProperty {
Transform<String,String> transform;
public StringTransformProperty(Transform<String,String> transform, boolean hasUniformUnassigned) {
this.transform = transform;
setUniformUnassigned(hasUniformUnassigned);
}
protected String _getValue(int codepoint) {
return transform.transform(UTF16.valueOf(codepoint));
}
}
private static class CodepointTransformProperty extends SimpleProperty {
Transform<Integer,String> transform;
public CodepointTransformProperty(Transform<Integer,String> transform, boolean hasUniformUnassigned) {
this.transform = transform;
setUniformUnassigned(hasUniformUnassigned);
}
protected String _getValue(int codepoint) {
return transform.transform(codepoint);
}
}
}

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
/* ******************************************************************************* * Copyright (C) 2011, Google, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ package com.ibm.icu.dev.test.util; import com.ibm.icu.text.Transform; import com.ibm.icu.text.UTF16; /** * Simple wrapping for normalizer that allows for both the standard ICU normalizer, and one built directly from the UCD. */ public abstract class UnicodeTransform implements Transform<String,String> { public enum Type { NFD, NFC, NFKD, NFKC, CASEFOLD } public interface Factory { public UnicodeTransform getInstance(Type type); } private static Factory factory = new IcuUnicodeNormalizerFactory(); public static synchronized Factory getFactory() { return factory; } public static synchronized void setFactory(Factory factory) { UnicodeTransform.factory = factory; } public static synchronized UnicodeTransform getInstance(Type type) { return factory.getInstance(type); } public abstract String transform(String source); /** * Can be overridden for performance. */ public boolean isTransformed(String source) { return source.equals(transform(source)); } /** * Can be overridden for performance. */ public String transform(int source) { return transform(UTF16.valueOf(source)); } /** * Can be overridden for performance. */ public boolean isTransformed(int source) { return isTransformed(UTF16.valueOf(source)); } }