ICU-8295 API for collation reordering

X-SVN-Rev: 29672
This commit is contained in:
Stuart Gill 2011-03-18 23:12:00 +00:00
parent b3ba6c0bf3
commit 45dd32ab8a
4 changed files with 175 additions and 83 deletions

View file

@ -1,7 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/3"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-test-framework"/>
<classpathentry kind="src" path="src"/>
<classpathentry kind="src" path="/icu4j-charset-tests"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-collate-tests"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-core-tests"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-localespi-tests"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-packaging-tests"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-translit-tests"/>
<classpathentry kind="output" path="bin"/>
</classpath>

View file

@ -1,6 +1,6 @@
/**
*******************************************************************************
* Copyright (C) 1996-2010, International Business Machines Corporation and *
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -17,6 +17,7 @@ import java.util.Set;
import com.ibm.icu.impl.ICUDebug;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.Norm2AllModes;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
import com.ibm.icu.util.VersionInfo;
@ -230,50 +231,59 @@ public abstract class Collator implements Comparator<Object>, Cloneable
*
* @see #getReorderCodes
* @see #setReorderCodes
* @internal
* @deprecated This API is ICU internal only.
* @see #getEquivalentReorderCodes
* @draft ICU 4.8
*/
public static interface ReorderCodes {
/**
* A special reordering code that is used to specify the default reordering codes for a locale.
* @draft ICU 4.8
*/
public final static int DEFAULT = 1;
/**
* A speical reordering code that is used to specify no reordering codes.
* @draft ICU 4.8
*/
public final static int NONE = UScript.UNKNOWN;
/**
* A special reordering code that is used to specify all other codes used for reordering except
* for the codes listed as ReorderingCodes and those listed explicitly in a reordering.
* @draft ICU 4.8
*/
public final static int OTHERS = UScript.UNKNOWN;
/**
* Characters with the space property.
* @internal
* @deprecated This API is ICU internal only.
* @draft ICU 4.8
*/
public final static int SPACE = 0x1000;
/**
* The first entry in the enumeration.
* @internal
* @deprecated This API is ICU internal only.
* @draft ICU 4.8
*/
public final static int FIRST = SPACE;
/**
* Characters with the punctuation property.
* @internal
* @deprecated This API is ICU internal only.
* @draft ICU 4.8
*/
public final static int PUNCTUATION = 0x1001;
/**
* Characters with the symbol property.
* @internal
* @deprecated This API is ICU internal only.
* @draft ICU 4.8
*/
public final static int SYMBOL = 0x1002;
/**
* Characters with the currency property.
* @internal
* @deprecated This API is ICU internal only.
* @draft ICU 4.8
*/
public final static int CURRENCY = 0x1003;
/**
* Characters with the digit property.
* @internal
* @deprecated This API is ICU internal only.
* @draft ICU 4.8
*/
public final static int DIGIT = 0x1004;
/**
* The limit of the reorder codes..
* @internal
* @deprecated This API is ICU internal only.
* @draft ICU 4.8
*/
public final static int LIMIT = 0x1005;
}
@ -368,13 +378,15 @@ public abstract class Collator implements Comparator<Object>, Cloneable
}
/**
* Set the reordering codes for this collator.
* The reordering codes are a combination of UScript and ReorderingCodes. These
* allow the order of these groups to be changed as a group.
* @param order the reordering codes to apply to this collator, if null then clears the reordering
* Sets the reordering codes for this collator.
* Reordering codes allow the collation ordering for groups of characters to be changed.
* The reordering codes are a combination of UScript codes and ReorderCodes.
* These allow the ordering of characters belonging to these groups to be changed as a group.
* @param order the reordering codes to apply to this collator; if this is null or an empty array
* then this clears any existing reordering
* @see #getReorderCodes
* @internal
* @deprecated This API is ICU internal only.
* @see #getEquivalentReorderCodes
* @draft ICU 4.8
*/
public void setReorderCodes(int... order)
{
@ -1057,18 +1069,34 @@ public abstract class Collator implements Comparator<Object>, Cloneable
public abstract VersionInfo getUCAVersion();
/**
* Retrieve the reordering codes for this collator.
* These reordering codes are a combination of UScript and ReorderCodes.
* Retrieves the reordering codes for this collator.
* These reordering codes are a combination of UScript codes and ReorderCodes.
* @return a copy of the reordering codes for this collator;
* if none are set then returns an empty array
* @see #setReorderCodes
* @return the reordering codes for this collator if they have been set, null otherwise.
* @internal
* @deprecated This API is ICU internal only.
* @see #getEquivalentReorderCodes
* @draft ICU 4.8
*/
public int[] getReorderCodes()
{
throw new UnsupportedOperationException();
}
/**
* Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
* codes are grouped and must reorder together.
*
* @param reorderCode code for which equivalents to be retrieved
* @return the set of all reorder codes in the same group as the given reorder code.
* @see #setReorderCodes
* @see #getReorderCodes
* @draft ICU 4.8
*/
public static int[] getEquivalentReorderCodes(int reorderCode)
{
throw new UnsupportedOperationException();
}
// protected constructor -------------------------------------------------
/**

View file

@ -638,13 +638,15 @@ public final class RuleBasedCollator extends Collator {
}
/**
* Set the reordering codes for this collator.
* The reordering codes are a combination of UScript and ReorderingCodes. These
* allow the order of these groups to be changed as a group.
* @param order the reordering codes to apply to this collator, if null then clears the reordering
* Sets the reordering codes for this collator.
* Reordering codes allow the collation ordering for groups of characters to be changed.
* The reordering codes are a combination of UScript codes and ReorderCodes.
* These allow the ordering of characters belonging to these groups to be changed as a group.
* @param order the reordering codes to apply to this collator; if this is null or an empty array
* then this clears any existing reordering
* @see #getReorderCodes
* @internal
* @deprecated This API is ICU internal only.
* @see #getEquivalentReorderCodes
* @draft ICU 4.8
*/
public void setReorderCodes(int... order) {
if (order != null && order.length > 0) {
@ -1069,33 +1071,33 @@ public final class RuleBasedCollator extends Collator {
}
/**
* Retrieve the reordering codes for this collator.
* These reordering codes are a combination of UScript and ReorderCodes.
* Retrieves the reordering codes for this collator.
* These reordering codes are a combination of UScript codes and ReorderCodes.
* @return a copy of the reordering codes for this collator;
* if none are set then returns an empty array
* @see #setReorderCodes
* @return the reordering codes for this collator if they have been set, null otherwise.
* @internal
* @deprecated This API is ICU internal only.
* @see #getEquivalentReorderCodes
* @draft ICU 4.8
*/
public int[] getReorderCodes() {
if (m_reorderCodes_ != null) {
return m_reorderCodes_.clone();
} else {
return null;
return LeadByteConstants.EMPTY_INT_ARRAY;
}
}
/**
* Retrieve the reorder codes that are grouped with the given reorder code. Some reorder codes will
* be grouped and must reorder together.
* Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
* codes are grouped and must reorder together.
*
* @see #setReorderCodes
* @see #getReorderCodes
* @param reorderCode code for which equivalents to be retrieved
* @return the set of all reorder codes in the same group as the given reorder code.
* @internal
* @deprecated This API is ICU internal only.
* @see #setReorderCodes
* @see #getReorderCodes
* @draft ICU 4.8
*/
public static int[] getReorderingCodesGroup(int reorderCode) {
public static int[] getEquivalentReorderCodes(int reorderCode) {
Set<Integer> equivalentCodesSet = new HashSet<Integer>();
int[] leadBytes = RuleBasedCollator.LEADBYTE_CONSTANTS_.getLeadBytesForReorderCode(reorderCode);
for (int leadByte : leadBytes) {
@ -1568,7 +1570,6 @@ public final class RuleBasedCollator extends Collator {
reorderCodes = new int[1];
reorderCodes[0] = offset & ~DATA_MASK_FOR_INDEX;
} else {
int length = readShort(this.LEAD_BYTE_TO_SCRIPTS_DATA, offset);
offset++;
@ -1595,7 +1596,6 @@ public final class RuleBasedCollator extends Collator {
leadBytes = new int[1];
leadBytes[0] = offset & ~DATA_MASK_FOR_INDEX;
} else {
int length = readShort(this.SCRIPT_TO_LEAD_BYTES_DATA, offset);
offset++;
@ -1684,6 +1684,9 @@ public final class RuleBasedCollator extends Collator {
int m_defaultStrength_;
boolean m_defaultIsHiragana4_;
boolean m_defaultIsNumericCollation_;
/**
* Default script order - the one created at initial rule parse time
*/
int[] m_defaultReorderCodes_;
/**
@ -3795,11 +3798,19 @@ public final class RuleBasedCollator extends Collator {
* Builds the lead byte permuatation table
*/
private void buildPermutationTable() {
if (m_reorderCodes_ == null) {
if (m_reorderCodes_ == null || m_reorderCodes_.length == 0 || (m_reorderCodes_.length == 1 && m_reorderCodes_[0] == ReorderCodes.NONE)) {
m_leadBytePermutationTable_ = null;
return;
}
if (m_reorderCodes_[0] == ReorderCodes.DEFAULT) {
// swap the reorder codes for those at build of the rules
if (m_defaultReorderCodes_ == null || m_defaultReorderCodes_.length == 0) {
m_leadBytePermutationTable_ = null;
}
m_reorderCodes_ = m_defaultReorderCodes_.clone();
}
// TODO - these need to be read in from the UCA data file
// The lowest byte that hasn't been assigned a mapping
int toBottom = 0x03;

View file

@ -3195,7 +3195,7 @@ public class CollationMiscTest extends TestFmwk {
/* clear the reordering */
myCollation.setReorderCodes(null);
retrievedReorderCodes = myCollation.getReorderCodes();
if (retrievedReorderCodes != null) {
if (retrievedReorderCodes.length != 0) {
errln("ERROR: retrieved reorder codes was not null.");
}
@ -3219,14 +3219,57 @@ public class CollationMiscTest extends TestFmwk {
/* clear the reordering */
myCollation.setReorderCodes(new int[]{});
retrievedReorderCodes = myCollation.getReorderCodes();
if (retrievedReorderCodes != null) {
if (retrievedReorderCodes.length != 0) {
errln("ERROR: retrieved reorder codes was not null.");
}
if (!(myCollation.compare(greekString, punctuationString) > 0)) {
errln("ERROR: collation result should have been greater.");
}
}
/*
* Test reordering API.
*/
public void TestReorderingAPIWithRuleCreatedCollator() throws Exception
{
Collator myCollation;
String rules = "[reorder Hani Grek]";
int[] rulesReorderCodes = {UScript.HAN, UScript.GREEK};
int[] reorderCodes = {UScript.GREEK, UScript.HAN, ReorderCodes.PUNCTUATION};
int[] retrievedReorderCodes;
/* build collator tertiary */
myCollation = new RuleBasedCollator(rules);
myCollation.setStrength(Collator.TERTIARY);
retrievedReorderCodes = myCollation.getReorderCodes();
if (!Arrays.equals(rulesReorderCodes, retrievedReorderCodes)) {
errln("ERROR: retrieved reorder codes do not match set reorder codes.");
}
/* clear the reordering */
myCollation.setReorderCodes(null);
retrievedReorderCodes = myCollation.getReorderCodes();
if (retrievedReorderCodes.length != 0) {
errln("ERROR: retrieved reorder codes was not null.");
}
/* set the reorderding */
myCollation.setReorderCodes(reorderCodes);
retrievedReorderCodes = myCollation.getReorderCodes();
if (!Arrays.equals(reorderCodes, retrievedReorderCodes)) {
errln("ERROR: retrieved reorder codes do not match set reorder codes.");
}
/* reset the reordering */
myCollation.setReorderCodes(ReorderCodes.DEFAULT);
retrievedReorderCodes = myCollation.getReorderCodes();
if (!Arrays.equals(rulesReorderCodes, retrievedReorderCodes)) {
errln("ERROR: retrieved reorder codes do not match set reorder codes.");
}
}
public void TestSameLeadBytScriptReorder(){
@ -3245,6 +3288,31 @@ public class CollationMiscTest extends TestFmwk {
-1,
};
Collator myCollation;
String rules = "[reorder Goth Latn]";
try {
myCollation = new RuleBasedCollator(rules);
} catch (Exception e) {
warnln("ERROR: in creation of rule based collator");
return;
}
myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
myCollation.setStrength(Collator.TERTIARY);
for (int i = 0; i < testSourceCases.length ; i++)
{
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
testSourceCases[i], testTargetCases[i],
results[i]);
}
// ensure that the non-reordered and reordered collation is the same
Collator nonReorderdCollator = RuleBasedCollator.getInstance();
int nonReorderedResults = nonReorderdCollator.compare(testSourceCases[0], testSourceCases[1]);
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
testSourceCases[0], testSourceCases[1], nonReorderedResults);
}
public void TestEquivalentReorderingScripts() {
int[] equivalentScriptsResult = {
UScript.BOPOMOFO, //Bopo
UScript.LISU, //Lisu
@ -3269,41 +3337,18 @@ public class CollationMiscTest extends TestFmwk {
UScript.CUNEIFORM, //Xsux
UScript.EGYPTIAN_HIEROGLYPHS //Egyp
};
Collator myCollation;
String rules = "[reorder Goth Latn]";
try {
myCollation = new RuleBasedCollator(rules);
} catch (Exception e) {
warnln("ERROR: in creation of rule based collator");
return;
}
myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
myCollation.setStrength(Collator.TERTIARY);
for (int i = 0; i < testSourceCases.length ; i++)
{
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
testSourceCases[i], testTargetCases[i],
results[i]);
}
// ensure that the non-reordered and reordered collation is the same
Collator nonReorderdCollator = RuleBasedCollator.getInstance();
int nonReorderedResults = nonReorderdCollator.compare(testSourceCases[0], testSourceCases[1]);
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
testSourceCases[0], testSourceCases[1], nonReorderedResults);
Arrays.sort(equivalentScriptsResult);
int[] equivalentScripts = RuleBasedCollator.getReorderingCodesGroup(UScript.GOTHIC);
int[] equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.GOTHIC);
Arrays.sort(equivalentScripts);
assertTrue("Script Equivalents for Reordering", Arrays.equals(equivalentScripts, equivalentScriptsResult));
equivalentScripts = RuleBasedCollator.getReorderingCodesGroup(UScript.SHAVIAN);
equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.SHAVIAN);
Arrays.sort(equivalentScripts);
assertTrue("Script Equivalents for Reordering", Arrays.equals(equivalentScripts, equivalentScriptsResult));
}
public void TestGreekFirstReorderCloning(){
public void TestGreekFirstReorderCloning() {
String[] testSourceCases = {
"\u0041",
"\u03b1\u0041",
@ -3410,8 +3455,8 @@ public class CollationMiscTest extends TestFmwk {
};
OneTestCase[] privateUseCharacterStrings = {
//new OneTestCase("\u0391", "\u0391", 0),
//new OneTestCase("\u0041", "\u0391", -1),
new OneTestCase("\u0391", "\u0391", 0),
new OneTestCase("\u0041", "\u0391", -1),
new OneTestCase("\u03B1\u0041", "\u03B1\u0391", -1),
new OneTestCase("\u0060", "\u0391", -1),
new OneTestCase("\u0391", "\ue2dc", 1),
@ -3498,5 +3543,5 @@ public class CollationMiscTest extends TestFmwk {
/* Test collation reordering API */
doTestOneReorderingAPITestCase(collationTestCases, apiRules);
}
}
}