mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-8807 Add internal changes for use in unicode tools. Adds internal API, but doesn't change behavior if that is not called.
X-SVN-Rev: 30638
This commit is contained in:
parent
1f56fddafb
commit
e3546c39d6
12 changed files with 268 additions and 53 deletions
4
.gitattributes
vendored
4
.gitattributes
vendored
|
@ -714,6 +714,10 @@ icu4j/main/tests/translit/.externalToolBuilders/copy-translit-test-data.launch -
|
|||
icu4j/main/tests/translit/.settings/org.eclipse.core.resources.prefs -text
|
||||
icu4j/main/tests/translit/.settings/org.eclipse.jdt.core.prefs -text
|
||||
icu4j/main/tests/translit/.settings/org.eclipse.jdt.ui.prefs -text
|
||||
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/TestUnicodeProperty.java -text
|
||||
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/IcuUnicodeNormalizerFactory.java -text
|
||||
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodePropertySymbolTable.java -text
|
||||
icu4j/main/tests/translit/src/com/ibm/icu/dev/test/util/UnicodeTransform.java -text
|
||||
icu4j/main/tests/translit/translit-tests-build.launch -text
|
||||
icu4j/manifest.stub -text
|
||||
icu4j/tools/build/.settings/org.eclipse.core.resources.prefs -text
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2004-2010, International Business Machines Corporation and *
|
||||
* Copyright (C) 2004-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -46,7 +46,7 @@ public class ImplicitCEGenerator {
|
|||
// 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
|
||||
// 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
|
||||
CJK_BASE = 0x4E00,
|
||||
CJK_LIMIT = 0x9FCB+1,
|
||||
CJK_LIMIT = 0x9FCC+1,
|
||||
|
||||
CJK_COMPAT_USED_BASE = 0xFA0E,
|
||||
CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
********************************************************************************
|
||||
* Copyright (C) 2009-2010, Google, International Business Machines Corporation *
|
||||
* Copyright (C) 2009-2011, Google, International Business Machines Corporation *
|
||||
* and others. All Rights Reserved. *
|
||||
********************************************************************************
|
||||
*/
|
||||
|
@ -24,6 +24,7 @@ import java.util.TreeMap;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import com.ibm.icu.text.StringTransform;
|
||||
import com.ibm.icu.text.SymbolTable;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.Freezable;
|
||||
|
||||
|
@ -37,6 +38,26 @@ public class UnicodeRegex implements Cloneable, Freezable<UnicodeRegex>, StringT
|
|||
// Note: we don't currently have any state, but intend to in the future,
|
||||
// particularly for the regex style supported.
|
||||
|
||||
private SymbolTable symbolTable;
|
||||
private ParsePosition parsePosition = new ParsePosition(0);
|
||||
|
||||
/**
|
||||
* Set the symbol table for internal processing
|
||||
* @internal
|
||||
*/
|
||||
public SymbolTable getSymbolTable() {
|
||||
return symbolTable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the symbol table for internal processing
|
||||
* @internal
|
||||
*/
|
||||
public UnicodeRegex setSymbolTable(SymbolTable symbolTable) {
|
||||
this.symbolTable = symbolTable;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds full Unicode property support, with the latest version of Unicode,
|
||||
* to Java Regex, bringing it up to Level 1 (see
|
||||
|
@ -185,12 +206,12 @@ public class UnicodeRegex implements Cloneable, Freezable<UnicodeRegex>, StringT
|
|||
// brute force replacement; do twice to allow for different order
|
||||
// later on can optimize
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (Iterator<String> it = variables.keySet().iterator(); it.hasNext();) {
|
||||
String variable = it.next();
|
||||
for (String variable : variables.keySet()) {
|
||||
String definition = variables.get(variable);
|
||||
for (Iterator<String> it2 = variables.keySet().iterator(); it2.hasNext();) {
|
||||
String variable2 = it2.next();
|
||||
if (variable.equals(variable2)) continue;
|
||||
for (String variable2 : variables.keySet()) {
|
||||
if (variable.equals(variable2)) {
|
||||
continue;
|
||||
}
|
||||
String definition2 = variables.get(variable2);
|
||||
String altered2 = definition2.replace(variable, definition);
|
||||
if (!altered2.equals(definition2)) {
|
||||
|
@ -303,7 +324,7 @@ public class UnicodeRegex implements Cloneable, Freezable<UnicodeRegex>, StringT
|
|||
private int processSet(String regex, int i, StringBuilder result, UnicodeSet temp, ParsePosition pos) {
|
||||
try {
|
||||
pos.setIndex(i);
|
||||
UnicodeSet x = temp.clear().applyPattern(regex, pos, null, 0);
|
||||
UnicodeSet x = temp.clear().applyPattern(regex, pos, symbolTable, 0);
|
||||
x.complement().complement(); // hack to fix toPattern
|
||||
result.append(x.toPattern(false));
|
||||
i = pos.getIndex() - 1; // allow for the loop increment
|
||||
|
@ -335,8 +356,7 @@ public class UnicodeRegex implements Cloneable, Freezable<UnicodeRegex>, StringT
|
|||
String variable = null;
|
||||
StringBuffer definition = new StringBuffer();
|
||||
int count = 0;
|
||||
for (Iterator<String> it = lines.iterator(); it.hasNext();) {
|
||||
String line = it.next();
|
||||
for (String line : lines) {
|
||||
++count;
|
||||
// remove initial bom, comments
|
||||
if (line.length() == 0) continue;
|
||||
|
|
|
@ -285,6 +285,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final UnicodeSet ALL_CODE_POINTS = new UnicodeSet(0, 0x10FFFF).freeze();
|
||||
|
||||
private static XSymbolTable XSYMBOL_TABLE = null; // for overriding the the function processing
|
||||
|
||||
private static final int LOW = 0x000000; // LOW <= all valid values. ZERO for codepoints
|
||||
private static final int HIGH = 0x110000; // HIGH > all valid values. 10000 for code units.
|
||||
|
@ -3282,7 +3284,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
public UnicodeSet applyPropertyAlias(String propertyAlias, String valueAlias) {
|
||||
return applyPropertyAlias(propertyAlias, valueAlias, null);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Modifies this set to contain those code points which have the
|
||||
* given value for the given property. Prior contents of this
|
||||
|
@ -3306,6 +3308,12 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
&& ((XSymbolTable)symbols).applyPropertyAlias(propertyAlias, valueAlias, this)) {
|
||||
return this;
|
||||
}
|
||||
|
||||
if (XSYMBOL_TABLE != null) {
|
||||
if (XSYMBOL_TABLE.applyPropertyAlias(propertyAlias, valueAlias, this)) {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
if (valueAlias.length() > 0) {
|
||||
p = UCharacter.getPropertyEnum(propertyAlias);
|
||||
|
@ -4540,5 +4548,30 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
*/
|
||||
CONDITION_COUNT
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the default symbol table. Null means ordinary processing. For internal use only.
|
||||
* @return
|
||||
* @internal
|
||||
*/
|
||||
public static XSymbolTable getDefaultXSymbolTable() {
|
||||
return XSYMBOL_TABLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the default symbol table. Null means ordinary processing. For internal use only. Will affect all subsequent parsing
|
||||
* of UnicodeSets.
|
||||
* <p>
|
||||
* WARNING: If this function is used with a {@link UnicodeProperty}, and the
|
||||
* Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call
|
||||
* {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}
|
||||
* with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.
|
||||
*
|
||||
* @param xSymbolTable the new default symbol table.
|
||||
* @internal
|
||||
*/
|
||||
public static void setDefaultXSymbolTable(XSymbolTable xSymbolTable) {
|
||||
XSYMBOL_TABLE = xSymbolTable;
|
||||
}
|
||||
}
|
||||
//eof
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -28,7 +28,8 @@ public class TestAll extends TestGroup {
|
|||
"TransliteratorTest",
|
||||
"RegexUtilitiesTest",
|
||||
"UnicodeMapTest",
|
||||
"ThreadTest"
|
||||
"ThreadTest",
|
||||
"TestUnicodeProperty"
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.test.translit;
import java.util.List;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.dev.test.util.ICUPropertyFactory;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.dev.test.util.UnicodeProperty.Factory;
import com.ibm.icu.dev.test.util.UnicodePropertySymbolTable;
import com.ibm.icu.text.UnicodeSet;
/**
* @author markdavis
*
*/
public class TestUnicodeProperty extends TestFmwk{
public static void main(String[] args) {
new TestUnicodeProperty().run(args);
}
static final UnicodeSet casedLetter = new UnicodeSet("[:gc=cased letter:]");
static final UnicodeSet letter = new UnicodeSet("[:gc=L:]");
public void TestBasic() {
Factory factory = ICUPropertyFactory.make();
UnicodeProperty property = factory.getProperty("gc");
List values = property.getAvailableValues();
assertTrue("Values contain GC values", values.contains("Unassigned"));
final UnicodeSet lu = property.getSet("Lu");
if (!assertTrue("Gc=L contains 'A'", lu.contains('A'))) {
errln("Contents:\t" + lu.complement().complement().toPattern(false));
}
}
public void TestSymbolTable() {
Factory factory = ICUPropertyFactory.make();
UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory);
UnicodeSet.setDefaultXSymbolTable(upst);
try {
final UnicodeSet luSet = new UnicodeSet("[:gc=L:]");
assertTrue("Gc=L contains 'A'", luSet.contains('A'));
assertTrue("Gc=L contains 'Z'", luSet.contains('Z'));
assertFalse("Gc=L contains 'a'", luSet.contains('1'));
UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]");
assertEquals("gc=lc are equal", casedLetter, casedLetter2);
} finally {
// restore the world
UnicodeSet.setDefaultXSymbolTable(null);
}
}
public void TestSymbolTable2() {
Factory factory = new MyUnicodePropertyFactory();
UnicodePropertySymbolTable upst = new UnicodePropertySymbolTable(factory);
UnicodeSet.setDefaultXSymbolTable(upst);
try {
final UnicodeSet luSet = new UnicodeSet("[:gc=L:]");
assertFalse("Gc=L contains 'A'", luSet.contains('A'));
if (!assertTrue("Gc=L contains 'Z'", luSet.contains('Z'))) {
errln("Contents:\t" + luSet.complement().complement().toPattern(false));
}
assertFalse("Gc=L contains 'a'", luSet.contains('1'));
UnicodeSet casedLetter2 = new UnicodeSet("[:gc=cased letter:]");
assertNotEquals("gc=lc should not be equal", casedLetter, casedLetter2);
} finally {
// restore the world
UnicodeSet.setDefaultXSymbolTable(null);
}
}
/**
* For testing, override to set A-M to Cn.
*/
static class MyUnicodeGCProperty extends UnicodeProperty.SimpleProperty {
UnicodeProperty icuProperty = ICUPropertyFactory.make().getProperty("Gc");
{
setName(icuProperty.getName());
setType(icuProperty.getType());
}
@Override
protected String _getValue(int codepoint) {
if (codepoint >= 'A' && codepoint <= 'M') {
return "Unassigned";
} else {
return icuProperty.getValue(codepoint);
}
}
@Override
protected List _getValueAliases(String valueAlias, List result) {
return icuProperty.getValueAliases(valueAlias, result);
}
@Override
public List _getNameAliases(List result) {
return icuProperty.getNameAliases();
}
}
/**
* For testing, override to set A-Z to Cn.
*/
static class MyUnicodePropertyFactory extends ICUPropertyFactory {
private MyUnicodePropertyFactory() {
add(new MyUnicodeGCProperty());
}
}
static class MyUnicodePropertySymbolTable extends UnicodePropertySymbolTable {
public MyUnicodePropertySymbolTable(Factory factory) {
super(factory);
}
}
}
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2010, International Business Machines Corporation and *
|
||||
* Copyright (C) 2002-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -437,7 +437,7 @@ public class ICUPropertyFactory extends UnicodeProperty.Factory {
|
|||
// NFKD = UProperty.STRING_LIMIT+3
|
||||
;
|
||||
|
||||
private ICUPropertyFactory() {
|
||||
protected ICUPropertyFactory() {
|
||||
Collection c = getInternalAvailablePropertyAliases(new ArrayList());
|
||||
Iterator it = c.iterator();
|
||||
while (it.hasNext()) {
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.test.util;
import com.ibm.icu.dev.test.util.UnicodeTransform.Type;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.Normalizer2;
import com.ibm.icu.text.Normalizer2.Mode;
/**
* @author markdavis
*
*/
public class IcuUnicodeNormalizerFactory implements UnicodeTransform.Factory {
public UnicodeTransform getInstance(Type type) {
switch (type) {
case NFC: case NFKC:
return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type.toString(), Mode.COMPOSE));
case NFD: case NFKD:
return new IcuUnicodeNormalizer(Normalizer2.getInstance(null, type == Type.NFD ? "NFC" : "NFKC", Mode.DECOMPOSE));
case CASEFOLD:
return new CaseFolder();
default:
throw new IllegalArgumentException();
}
}
private static class CaseFolder extends UnicodeTransform {
@Override
public String transform(String source) {
return UCharacter.foldCase(source.toString(), true);
}
}
private static class IcuUnicodeNormalizer extends UnicodeTransform {
private Normalizer2 normalizer;
private IcuUnicodeNormalizer(Normalizer2 normalizer) {
this.normalizer = normalizer;
}
public String transform(String src) {
return normalizer.normalize(src);
}
public boolean isTransformed(String s) {
return normalizer.isNormalized(s);
}
}
}
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -277,7 +277,7 @@ public final class UnicodeMap<T> implements Cloneable, Freezable, StringTransfor
|
|||
throw new UnsupportedOperationException("Attempt to modify locked object");
|
||||
}
|
||||
if (errorOnReset && values[baseIndex] != null) {
|
||||
throw new IllegalArgumentException("Attempt to reset value for " + Utility.hex(codepoint)
|
||||
throw new UnsupportedOperationException("Attempt to reset value for " + Utility.hex(codepoint)
|
||||
+ " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value);
|
||||
}
|
||||
|
||||
|
|
|
@ -10,19 +10,25 @@ import java.io.PrintWriter;
|
|||
import java.io.StringWriter;
|
||||
import java.text.ParsePosition;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.dev.test.util.UnicodeLabel;
|
||||
import com.ibm.icu.dev.test.util.UnicodeMap;
|
||||
import com.ibm.icu.dev.test.util.CollectionUtilities.InverseMatcher;
|
||||
import com.ibm.icu.dev.test.util.CollectionUtilities.ObjectMatcher;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.text.SymbolTable;
|
||||
import com.ibm.icu.text.Transform;
|
||||
import com.ibm.icu.text.UFormat;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeMatcher;
|
||||
|
@ -31,17 +37,87 @@ import com.ibm.icu.text.UnicodeSetIterator;
|
|||
|
||||
public abstract class UnicodeProperty extends UnicodeLabel {
|
||||
|
||||
public static final UnicodeSet UNASSIGNED = new UnicodeSet("[:gc=unassigned:]").freeze();
|
||||
public static final UnicodeSet NONCHARACTERS = new UnicodeSet("[:noncharactercodepoint:]").freeze();
|
||||
public static final UnicodeSet PRIVATE_USE = new UnicodeSet("[:gc=privateuse:]").freeze();
|
||||
public static final UnicodeSet SURROGATE = new UnicodeSet("[:gc=surrogate:]").freeze();
|
||||
public static final UnicodeSet SPECIALS = new UnicodeSet(UNASSIGNED).addAll(PRIVATE_USE).addAll(SURROGATE).freeze();
|
||||
public static final int SAMPLE_UNASSIGNED = UNASSIGNED.charAt(0);
|
||||
public static final int SAMPLE_PRIVATE_USE = 0xE000;
|
||||
public static final int SAMPLE_SURROGATE = 0xD800;
|
||||
public static final UnicodeSet STUFF_TO_TEST = new UnicodeSet(SPECIALS).complement()
|
||||
.add(SAMPLE_UNASSIGNED).add(SAMPLE_PRIVATE_USE).add(SAMPLE_SURROGATE).freeze();
|
||||
public static final UnicodeSet STUFF_TO_TEST_WITH_UNASSIGNED = new UnicodeSet("[:any:]").freeze();
|
||||
|
||||
public static final UnicodeSet HIGH_SURROGATES = new UnicodeSet("[\\uD800-\\uDB7F]").freeze();
|
||||
public static final int SAMPLE_HIGH_SURROGATE = HIGH_SURROGATES.charAt(0);
|
||||
public static final UnicodeSet HIGH_PRIVATE_USE_SURROGATES = new UnicodeSet("[\\uDB80-\\uDBFF]").freeze();
|
||||
public static final int SAMPLE_HIGH_PRIVATE_USE_SURROGATE = HIGH_PRIVATE_USE_SURROGATES.charAt(0);
|
||||
public static final UnicodeSet LOW_SURROGATES = new UnicodeSet("[\\uDC00-\\uDFFF]").freeze();
|
||||
public static final int SAMPLE_LOW_SURROGATE = LOW_SURROGATES.charAt(0);
|
||||
|
||||
public static final UnicodeSet PRIVATE_USE_AREA = new UnicodeSet("[\\uE000-\\uF8FF]").freeze();
|
||||
public static final int SAMPLE_PRIVATE_USE_AREA = PRIVATE_USE_AREA.charAt(0);
|
||||
public static final UnicodeSet PRIVATE_USE_AREA_A = new UnicodeSet("[\\U000F0000-\\U000FFFFD]").freeze();
|
||||
public static final int SAMPLE_PRIVATE_USE_AREA_A = PRIVATE_USE_AREA_A.charAt(0);
|
||||
public static final UnicodeSet PRIVATE_USE_AREA_B = new UnicodeSet("[\\U00100000-\\U0010FFFD]").freeze();
|
||||
public static final int SAMPLE_PRIVATE_USE_AREA_B = PRIVATE_USE_AREA_B.charAt(0);
|
||||
|
||||
// The following are special. They are used for performance, but must be changed if the version of Unicode for the UnicodeProperty changes.
|
||||
private static UnicodeSet UNASSIGNED;
|
||||
private static int SAMPLE_UNASSIGNED;
|
||||
private static UnicodeSet SPECIALS;
|
||||
private static UnicodeSet STUFF_TO_TEST;
|
||||
private static UnicodeSet STUFF_TO_TEST_WITH_UNASSIGNED;
|
||||
|
||||
public static synchronized UnicodeSet getUNASSIGNED() {
|
||||
if (UNASSIGNED == null) {
|
||||
UNASSIGNED = new UnicodeSet("[:gc=unassigned:]").freeze();
|
||||
}
|
||||
return UNASSIGNED;
|
||||
}
|
||||
|
||||
public static synchronized int getSAMPLE_UNASSIGNED() {
|
||||
if (SAMPLE_UNASSIGNED == 0) {
|
||||
SAMPLE_UNASSIGNED = getUNASSIGNED().charAt(0);
|
||||
}
|
||||
return SAMPLE_UNASSIGNED;
|
||||
}
|
||||
|
||||
public static synchronized UnicodeSet getSPECIALS() {
|
||||
if (SPECIALS == null) {
|
||||
SPECIALS = new UnicodeSet(getUNASSIGNED()).addAll(PRIVATE_USE).addAll(SURROGATE).freeze();
|
||||
}
|
||||
return SPECIALS;
|
||||
}
|
||||
|
||||
public static synchronized UnicodeSet getSTUFF_TO_TEST() {
|
||||
if (STUFF_TO_TEST == null) {
|
||||
STUFF_TO_TEST = new UnicodeSet(getSPECIALS()).complement()
|
||||
.addAll(NONCHARACTERS)
|
||||
.add(getSAMPLE_UNASSIGNED())
|
||||
.add(SAMPLE_HIGH_SURROGATE)
|
||||
.add(SAMPLE_HIGH_PRIVATE_USE_SURROGATE)
|
||||
.add(SAMPLE_LOW_SURROGATE)
|
||||
.add(SAMPLE_PRIVATE_USE_AREA)
|
||||
.add(SAMPLE_PRIVATE_USE_AREA_A)
|
||||
.add(SAMPLE_PRIVATE_USE_AREA_B)
|
||||
.freeze();
|
||||
}
|
||||
return STUFF_TO_TEST;
|
||||
}
|
||||
|
||||
public static synchronized UnicodeSet getSTUFF_TO_TEST_WITH_UNASSIGNED() {
|
||||
if (STUFF_TO_TEST_WITH_UNASSIGNED == null) {
|
||||
STUFF_TO_TEST_WITH_UNASSIGNED = new UnicodeSet(getSTUFF_TO_TEST()).addAll(getUNASSIGNED()).freeze();
|
||||
}
|
||||
return STUFF_TO_TEST_WITH_UNASSIGNED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the cache properties. Must be done if the version of Unicode is different than the ICU one, AND any UnicodeProperty has already been instantiated.
|
||||
* TODO make this a bit more robust.
|
||||
* @internal
|
||||
*/
|
||||
public static synchronized void ResetCacheProperties() {
|
||||
UNASSIGNED = null;
|
||||
SAMPLE_UNASSIGNED = 0;
|
||||
SPECIALS = null;
|
||||
STUFF_TO_TEST = null;
|
||||
STUFF_TO_TEST_WITH_UNASSIGNED = null;
|
||||
}
|
||||
|
||||
public static boolean DEBUG = false;
|
||||
|
||||
|
@ -57,7 +133,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
|
||||
private Map valueToFirstValueAlias = null;
|
||||
|
||||
private boolean hasUniformUnassigned = false;
|
||||
private boolean hasUniformUnassigned = true;
|
||||
|
||||
/*
|
||||
* Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name
|
||||
|
@ -238,7 +314,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
return maxFirstValueAliasWidth;
|
||||
return maxValueWidth;
|
||||
}
|
||||
|
||||
|
||||
public final UnicodeSet getSet(String propertyValue) {
|
||||
return getSet(propertyValue, null);
|
||||
}
|
||||
|
@ -247,6 +323,8 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
return getSet(matcher, null);
|
||||
}
|
||||
|
||||
/** Adds the property value set to the result. Clear the result first if you don't want to keep the original contents.
|
||||
*/
|
||||
public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
|
||||
return getSet(new SimpleMatcher(propertyValue,
|
||||
isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
|
||||
|
@ -257,7 +335,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
|
||||
public static final String UNUSED = "??";
|
||||
|
||||
public final UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
|
||||
public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
|
||||
if (result == null)
|
||||
result = new UnicodeSet();
|
||||
boolean uniformUnassigned = hasUniformUnassigned();
|
||||
|
@ -422,7 +500,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
}
|
||||
|
||||
private static UnicodeSetIterator getStuffToTest(boolean uniformUnassigned) {
|
||||
return new UnicodeSetIterator(uniformUnassigned ? STUFF_TO_TEST : STUFF_TO_TEST_WITH_UNASSIGNED);
|
||||
return new UnicodeSetIterator(uniformUnassigned ? getSTUFF_TO_TEST() : getSTUFF_TO_TEST_WITH_UNASSIGNED());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -654,7 +732,9 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
Map propertyCache = new HashMap(1);
|
||||
|
||||
public final Factory add(UnicodeProperty sp) {
|
||||
canonicalNames.put(sp.getName(), sp);
|
||||
String name2 = sp.getName();
|
||||
canonicalNames.put(name2, sp);
|
||||
skeletonNames.put(toSkeleton(name2), sp);
|
||||
List c = sp.getNameAliases(new ArrayList(1));
|
||||
Iterator it = c.iterator();
|
||||
while (it.hasNext()) {
|
||||
|
@ -1178,7 +1258,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
}
|
||||
|
||||
public static abstract class SimpleProperty extends BaseProperty {
|
||||
List values;
|
||||
LinkedHashSet values;
|
||||
|
||||
public UnicodeProperty addName(String alias) {
|
||||
propertyAliases.add(alias);
|
||||
|
@ -1209,7 +1289,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
}
|
||||
|
||||
public SimpleProperty setValues(List valueAliases) {
|
||||
this.values = new ArrayList(valueAliases);
|
||||
this.values = new LinkedHashSet(valueAliases);
|
||||
for (Iterator it = this.values.iterator(); it.hasNext();) {
|
||||
_addToValues((String) it.next(), null);
|
||||
}
|
||||
|
@ -1233,7 +1313,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
|
||||
private void _addToValues(String item, String alias) {
|
||||
if (values == null)
|
||||
values = new ArrayList(1);
|
||||
values = new LinkedHashSet();
|
||||
if (toValueAliases == null)
|
||||
_fixValueAliases();
|
||||
addUnique(item, values);
|
||||
|
@ -1328,32 +1408,57 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
|
||||
|
||||
public static UnicodeSet addUntested(UnicodeSet result, boolean uniformUnassigned) {
|
||||
if (!uniformUnassigned) return result;
|
||||
if (uniformUnassigned && result.contains(UnicodeProperty.getSAMPLE_UNASSIGNED())) {
|
||||
result.addAll(UnicodeProperty.getUNASSIGNED());
|
||||
}
|
||||
|
||||
if (result.contains(UnicodeProperty.SAMPLE_HIGH_SURROGATE)) {
|
||||
result.addAll(UnicodeProperty.HIGH_SURROGATES);
|
||||
}
|
||||
if (result.contains(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE)) {
|
||||
result.addAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES);
|
||||
}
|
||||
if (result.contains(UnicodeProperty.SAMPLE_LOW_SURROGATE)) {
|
||||
result.addAll(UnicodeProperty.LOW_SURROGATES);
|
||||
}
|
||||
|
||||
if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA)) {
|
||||
result.addAll(UnicodeProperty.PRIVATE_USE_AREA);
|
||||
}
|
||||
if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A)) {
|
||||
result.addAll(UnicodeProperty.PRIVATE_USE_AREA_A);
|
||||
}
|
||||
if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B)) {
|
||||
result.addAll(UnicodeProperty.PRIVATE_USE_AREA_B);
|
||||
}
|
||||
|
||||
if (result.contains(UnicodeProperty.SAMPLE_UNASSIGNED)) {
|
||||
result.addAll(UnicodeProperty.UNASSIGNED);
|
||||
}
|
||||
if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE)) {
|
||||
result.addAll(UnicodeProperty.PRIVATE_USE);
|
||||
}
|
||||
if (result.contains(UnicodeProperty.SAMPLE_SURROGATE)) {
|
||||
result.addAll(UnicodeProperty.SURROGATE);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static UnicodeMap addUntested(UnicodeMap result, boolean uniformUnassigned) {
|
||||
if (!uniformUnassigned) return result;
|
||||
|
||||
Object temp;
|
||||
if (null != (temp = result.get(UnicodeProperty.SAMPLE_UNASSIGNED))) {
|
||||
result.putAll(UnicodeProperty.UNASSIGNED, temp);
|
||||
if (uniformUnassigned && null != (temp = result.get(UnicodeProperty.getSAMPLE_UNASSIGNED()))) {
|
||||
result.putAll(UnicodeProperty.getUNASSIGNED(), temp);
|
||||
}
|
||||
if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE))) {
|
||||
result.putAll(UnicodeProperty.PRIVATE_USE, temp);
|
||||
|
||||
if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_SURROGATE))) {
|
||||
result.putAll(UnicodeProperty.HIGH_SURROGATES, temp);
|
||||
}
|
||||
if (null != (temp = result.get(UnicodeProperty.SAMPLE_SURROGATE))) {
|
||||
result.putAll(UnicodeProperty.SURROGATE, temp);
|
||||
if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE))) {
|
||||
result.putAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES, temp);
|
||||
}
|
||||
if (null != (temp = result.get(UnicodeProperty.SAMPLE_LOW_SURROGATE))) {
|
||||
result.putAll(UnicodeProperty.LOW_SURROGATES, temp);
|
||||
}
|
||||
|
||||
if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA))) {
|
||||
result.putAll(UnicodeProperty.PRIVATE_USE_AREA, temp);
|
||||
}
|
||||
if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A))) {
|
||||
result.putAll(UnicodeProperty.PRIVATE_USE_AREA_A, temp);
|
||||
}
|
||||
if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B))) {
|
||||
result.putAll(UnicodeProperty.PRIVATE_USE_AREA_B, temp);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -1363,7 +1468,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
if (isType(STRING_OR_MISC_MASK)) {
|
||||
return equals(cp, value);
|
||||
}
|
||||
String defaultValue = getValue(SAMPLE_UNASSIGNED);
|
||||
String defaultValue = getValue(getSAMPLE_UNASSIGNED());
|
||||
return defaultValue == null ? value == null : defaultValue.equals(value);
|
||||
}
|
||||
|
||||
|
@ -1374,5 +1479,53 @@ public abstract class UnicodeProperty extends UnicodeLabel {
|
|||
this.hasUniformUnassigned = hasUniformUnassigned;
|
||||
return this;
|
||||
}
|
||||
|
||||
public static class UnicodeSetProperty extends BaseProperty {
|
||||
protected UnicodeSet unicodeSet;
|
||||
private static final String[] YESNO_ARRAY = new String[]{"Yes", "No"};
|
||||
private static final List YESNO = Arrays.asList(YESNO_ARRAY);
|
||||
|
||||
public UnicodeSetProperty set(UnicodeSet set) {
|
||||
unicodeSet = set.freeze();
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnicodeSetProperty set(String string) {
|
||||
// TODO Auto-generated method stub
|
||||
return set(new UnicodeSet(string).freeze());
|
||||
}
|
||||
|
||||
protected String _getValue(int codepoint) {
|
||||
return YESNO_ARRAY[unicodeSet.contains(codepoint) ? 0 : 1];
|
||||
}
|
||||
|
||||
protected List _getAvailableValues(List result) {
|
||||
return YESNO;
|
||||
}
|
||||
}
|
||||
|
||||
private static class StringTransformProperty extends SimpleProperty {
|
||||
Transform<String,String> transform;
|
||||
|
||||
public StringTransformProperty(Transform<String,String> transform, boolean hasUniformUnassigned) {
|
||||
this.transform = transform;
|
||||
setUniformUnassigned(hasUniformUnassigned);
|
||||
}
|
||||
protected String _getValue(int codepoint) {
|
||||
return transform.transform(UTF16.valueOf(codepoint));
|
||||
}
|
||||
}
|
||||
|
||||
private static class CodepointTransformProperty extends SimpleProperty {
|
||||
Transform<Integer,String> transform;
|
||||
|
||||
public CodepointTransformProperty(Transform<Integer,String> transform, boolean hasUniformUnassigned) {
|
||||
this.transform = transform;
|
||||
setUniformUnassigned(hasUniformUnassigned);
|
||||
}
|
||||
protected String _getValue(int codepoint) {
|
||||
return transform.transform(codepoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
/*
*******************************************************************************
* Copyright (C) 2011, Google, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.test.util;
import com.ibm.icu.text.Transform;
import com.ibm.icu.text.UTF16;
/**
* Simple wrapping for normalizer that allows for both the standard ICU normalizer, and one built directly from the UCD.
*/
public abstract class UnicodeTransform implements Transform<String,String> {
public enum Type {
NFD, NFC, NFKD, NFKC, CASEFOLD
}
public interface Factory {
public UnicodeTransform getInstance(Type type);
}
private static Factory factory = new IcuUnicodeNormalizerFactory();
public static synchronized Factory getFactory() {
return factory;
}
public static synchronized void setFactory(Factory factory) {
UnicodeTransform.factory = factory;
}
public static synchronized UnicodeTransform getInstance(Type type) {
return factory.getInstance(type);
}
public abstract String transform(String source);
/**
* Can be overridden for performance.
*/
public boolean isTransformed(String source) {
return source.equals(transform(source));
}
/**
* Can be overridden for performance.
*/
public String transform(int source) {
return transform(UTF16.valueOf(source));
}
/**
* Can be overridden for performance.
*/
public boolean isTransformed(int source) {
return isTransformed(UTF16.valueOf(source));
}
}
|
Loading…
Add table
Reference in a new issue