mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-16 18:25:57 +00:00
ICU-8295 API for collation reordering
X-SVN-Rev: 29672
This commit is contained in:
parent
b3ba6c0bf3
commit
45dd32ab8a
4 changed files with 175 additions and 83 deletions
|
@ -1,7 +1,15 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/3"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-test-framework"/>
|
||||
<classpathentry kind="src" path="src"/>
|
||||
<classpathentry kind="src" path="/icu4j-charset-tests"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
|
||||
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-collate-tests"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-core-tests"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-localespi-tests"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-packaging-tests"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-translit-tests"/>
|
||||
<classpathentry kind="output" path="bin"/>
|
||||
</classpath>
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2010, International Business Machines Corporation and *
|
||||
* Copyright (C) 1996-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -17,6 +17,7 @@ import java.util.Set;
|
|||
import com.ibm.icu.impl.ICUDebug;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.Norm2AllModes;
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.util.UResourceBundle;
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
|
@ -230,50 +231,59 @@ public abstract class Collator implements Comparator<Object>, Cloneable
|
|||
*
|
||||
* @see #getReorderCodes
|
||||
* @see #setReorderCodes
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @see #getEquivalentReorderCodes
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public static interface ReorderCodes {
|
||||
/**
|
||||
* A special reordering code that is used to specify the default reordering codes for a locale.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public final static int DEFAULT = 1;
|
||||
/**
|
||||
* A speical reordering code that is used to specify no reordering codes.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public final static int NONE = UScript.UNKNOWN;
|
||||
/**
|
||||
* A special reordering code that is used to specify all other codes used for reordering except
|
||||
* for the codes listed as ReorderingCodes and those listed explicitly in a reordering.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public final static int OTHERS = UScript.UNKNOWN;
|
||||
/**
|
||||
* Characters with the space property.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public final static int SPACE = 0x1000;
|
||||
/**
|
||||
* The first entry in the enumeration.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public final static int FIRST = SPACE;
|
||||
/**
|
||||
* Characters with the punctuation property.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public final static int PUNCTUATION = 0x1001;
|
||||
/**
|
||||
* Characters with the symbol property.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public final static int SYMBOL = 0x1002;
|
||||
/**
|
||||
* Characters with the currency property.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public final static int CURRENCY = 0x1003;
|
||||
/**
|
||||
* Characters with the digit property.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public final static int DIGIT = 0x1004;
|
||||
/**
|
||||
* The limit of the reorder codes..
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public final static int LIMIT = 0x1005;
|
||||
}
|
||||
|
@ -368,13 +378,15 @@ public abstract class Collator implements Comparator<Object>, Cloneable
|
|||
}
|
||||
|
||||
/**
|
||||
* Set the reordering codes for this collator.
|
||||
* The reordering codes are a combination of UScript and ReorderingCodes. These
|
||||
* allow the order of these groups to be changed as a group.
|
||||
* @param order the reordering codes to apply to this collator, if null then clears the reordering
|
||||
* Sets the reordering codes for this collator.
|
||||
* Reordering codes allow the collation ordering for groups of characters to be changed.
|
||||
* The reordering codes are a combination of UScript codes and ReorderCodes.
|
||||
* These allow the ordering of characters belonging to these groups to be changed as a group.
|
||||
* @param order the reordering codes to apply to this collator; if this is null or an empty array
|
||||
* then this clears any existing reordering
|
||||
* @see #getReorderCodes
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @see #getEquivalentReorderCodes
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public void setReorderCodes(int... order)
|
||||
{
|
||||
|
@ -1057,18 +1069,34 @@ public abstract class Collator implements Comparator<Object>, Cloneable
|
|||
public abstract VersionInfo getUCAVersion();
|
||||
|
||||
/**
|
||||
* Retrieve the reordering codes for this collator.
|
||||
* These reordering codes are a combination of UScript and ReorderCodes.
|
||||
* Retrieves the reordering codes for this collator.
|
||||
* These reordering codes are a combination of UScript codes and ReorderCodes.
|
||||
* @return a copy of the reordering codes for this collator;
|
||||
* if none are set then returns an empty array
|
||||
* @see #setReorderCodes
|
||||
* @return the reordering codes for this collator if they have been set, null otherwise.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @see #getEquivalentReorderCodes
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public int[] getReorderCodes()
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
|
||||
* codes are grouped and must reorder together.
|
||||
*
|
||||
* @param reorderCode code for which equivalents to be retrieved
|
||||
* @return the set of all reorder codes in the same group as the given reorder code.
|
||||
* @see #setReorderCodes
|
||||
* @see #getReorderCodes
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public static int[] getEquivalentReorderCodes(int reorderCode)
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
// protected constructor -------------------------------------------------
|
||||
|
||||
/**
|
||||
|
|
|
@ -638,13 +638,15 @@ public final class RuleBasedCollator extends Collator {
|
|||
}
|
||||
|
||||
/**
|
||||
* Set the reordering codes for this collator.
|
||||
* The reordering codes are a combination of UScript and ReorderingCodes. These
|
||||
* allow the order of these groups to be changed as a group.
|
||||
* @param order the reordering codes to apply to this collator, if null then clears the reordering
|
||||
* Sets the reordering codes for this collator.
|
||||
* Reordering codes allow the collation ordering for groups of characters to be changed.
|
||||
* The reordering codes are a combination of UScript codes and ReorderCodes.
|
||||
* These allow the ordering of characters belonging to these groups to be changed as a group.
|
||||
* @param order the reordering codes to apply to this collator; if this is null or an empty array
|
||||
* then this clears any existing reordering
|
||||
* @see #getReorderCodes
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @see #getEquivalentReorderCodes
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public void setReorderCodes(int... order) {
|
||||
if (order != null && order.length > 0) {
|
||||
|
@ -1069,33 +1071,33 @@ public final class RuleBasedCollator extends Collator {
|
|||
}
|
||||
|
||||
/**
|
||||
* Retrieve the reordering codes for this collator.
|
||||
* These reordering codes are a combination of UScript and ReorderCodes.
|
||||
* Retrieves the reordering codes for this collator.
|
||||
* These reordering codes are a combination of UScript codes and ReorderCodes.
|
||||
* @return a copy of the reordering codes for this collator;
|
||||
* if none are set then returns an empty array
|
||||
* @see #setReorderCodes
|
||||
* @return the reordering codes for this collator if they have been set, null otherwise.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @see #getEquivalentReorderCodes
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public int[] getReorderCodes() {
|
||||
if (m_reorderCodes_ != null) {
|
||||
return m_reorderCodes_.clone();
|
||||
} else {
|
||||
return null;
|
||||
return LeadByteConstants.EMPTY_INT_ARRAY;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the reorder codes that are grouped with the given reorder code. Some reorder codes will
|
||||
* be grouped and must reorder together.
|
||||
* Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
|
||||
* codes are grouped and must reorder together.
|
||||
*
|
||||
* @see #setReorderCodes
|
||||
* @see #getReorderCodes
|
||||
* @param reorderCode code for which equivalents to be retrieved
|
||||
* @return the set of all reorder codes in the same group as the given reorder code.
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @see #setReorderCodes
|
||||
* @see #getReorderCodes
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
public static int[] getReorderingCodesGroup(int reorderCode) {
|
||||
public static int[] getEquivalentReorderCodes(int reorderCode) {
|
||||
Set<Integer> equivalentCodesSet = new HashSet<Integer>();
|
||||
int[] leadBytes = RuleBasedCollator.LEADBYTE_CONSTANTS_.getLeadBytesForReorderCode(reorderCode);
|
||||
for (int leadByte : leadBytes) {
|
||||
|
@ -1568,7 +1570,6 @@ public final class RuleBasedCollator extends Collator {
|
|||
reorderCodes = new int[1];
|
||||
reorderCodes[0] = offset & ~DATA_MASK_FOR_INDEX;
|
||||
} else {
|
||||
|
||||
int length = readShort(this.LEAD_BYTE_TO_SCRIPTS_DATA, offset);
|
||||
offset++;
|
||||
|
||||
|
@ -1595,7 +1596,6 @@ public final class RuleBasedCollator extends Collator {
|
|||
leadBytes = new int[1];
|
||||
leadBytes[0] = offset & ~DATA_MASK_FOR_INDEX;
|
||||
} else {
|
||||
|
||||
int length = readShort(this.SCRIPT_TO_LEAD_BYTES_DATA, offset);
|
||||
offset++;
|
||||
|
||||
|
@ -1684,6 +1684,9 @@ public final class RuleBasedCollator extends Collator {
|
|||
int m_defaultStrength_;
|
||||
boolean m_defaultIsHiragana4_;
|
||||
boolean m_defaultIsNumericCollation_;
|
||||
/**
|
||||
* Default script order - the one created at initial rule parse time
|
||||
*/
|
||||
int[] m_defaultReorderCodes_;
|
||||
|
||||
/**
|
||||
|
@ -3795,11 +3798,19 @@ public final class RuleBasedCollator extends Collator {
|
|||
* Builds the lead byte permuatation table
|
||||
*/
|
||||
private void buildPermutationTable() {
|
||||
if (m_reorderCodes_ == null) {
|
||||
if (m_reorderCodes_ == null || m_reorderCodes_.length == 0 || (m_reorderCodes_.length == 1 && m_reorderCodes_[0] == ReorderCodes.NONE)) {
|
||||
m_leadBytePermutationTable_ = null;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if (m_reorderCodes_[0] == ReorderCodes.DEFAULT) {
|
||||
// swap the reorder codes for those at build of the rules
|
||||
if (m_defaultReorderCodes_ == null || m_defaultReorderCodes_.length == 0) {
|
||||
m_leadBytePermutationTable_ = null;
|
||||
}
|
||||
m_reorderCodes_ = m_defaultReorderCodes_.clone();
|
||||
}
|
||||
|
||||
// TODO - these need to be read in from the UCA data file
|
||||
// The lowest byte that hasn't been assigned a mapping
|
||||
int toBottom = 0x03;
|
||||
|
|
|
@ -3195,7 +3195,7 @@ public class CollationMiscTest extends TestFmwk {
|
|||
/* clear the reordering */
|
||||
myCollation.setReorderCodes(null);
|
||||
retrievedReorderCodes = myCollation.getReorderCodes();
|
||||
if (retrievedReorderCodes != null) {
|
||||
if (retrievedReorderCodes.length != 0) {
|
||||
errln("ERROR: retrieved reorder codes was not null.");
|
||||
}
|
||||
|
||||
|
@ -3219,14 +3219,57 @@ public class CollationMiscTest extends TestFmwk {
|
|||
/* clear the reordering */
|
||||
myCollation.setReorderCodes(new int[]{});
|
||||
retrievedReorderCodes = myCollation.getReorderCodes();
|
||||
if (retrievedReorderCodes != null) {
|
||||
if (retrievedReorderCodes.length != 0) {
|
||||
errln("ERROR: retrieved reorder codes was not null.");
|
||||
}
|
||||
|
||||
if (!(myCollation.compare(greekString, punctuationString) > 0)) {
|
||||
errln("ERROR: collation result should have been greater.");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Test reordering API.
|
||||
*/
|
||||
public void TestReorderingAPIWithRuleCreatedCollator() throws Exception
|
||||
{
|
||||
Collator myCollation;
|
||||
String rules = "[reorder Hani Grek]";
|
||||
int[] rulesReorderCodes = {UScript.HAN, UScript.GREEK};
|
||||
int[] reorderCodes = {UScript.GREEK, UScript.HAN, ReorderCodes.PUNCTUATION};
|
||||
int[] retrievedReorderCodes;
|
||||
|
||||
|
||||
/* build collator tertiary */
|
||||
myCollation = new RuleBasedCollator(rules);
|
||||
myCollation.setStrength(Collator.TERTIARY);
|
||||
|
||||
retrievedReorderCodes = myCollation.getReorderCodes();
|
||||
if (!Arrays.equals(rulesReorderCodes, retrievedReorderCodes)) {
|
||||
errln("ERROR: retrieved reorder codes do not match set reorder codes.");
|
||||
}
|
||||
|
||||
/* clear the reordering */
|
||||
myCollation.setReorderCodes(null);
|
||||
retrievedReorderCodes = myCollation.getReorderCodes();
|
||||
if (retrievedReorderCodes.length != 0) {
|
||||
errln("ERROR: retrieved reorder codes was not null.");
|
||||
}
|
||||
|
||||
/* set the reorderding */
|
||||
myCollation.setReorderCodes(reorderCodes);
|
||||
|
||||
retrievedReorderCodes = myCollation.getReorderCodes();
|
||||
if (!Arrays.equals(reorderCodes, retrievedReorderCodes)) {
|
||||
errln("ERROR: retrieved reorder codes do not match set reorder codes.");
|
||||
}
|
||||
|
||||
/* reset the reordering */
|
||||
myCollation.setReorderCodes(ReorderCodes.DEFAULT);
|
||||
retrievedReorderCodes = myCollation.getReorderCodes();
|
||||
if (!Arrays.equals(rulesReorderCodes, retrievedReorderCodes)) {
|
||||
errln("ERROR: retrieved reorder codes do not match set reorder codes.");
|
||||
}
|
||||
}
|
||||
|
||||
public void TestSameLeadBytScriptReorder(){
|
||||
|
@ -3245,6 +3288,31 @@ public class CollationMiscTest extends TestFmwk {
|
|||
-1,
|
||||
};
|
||||
|
||||
Collator myCollation;
|
||||
String rules = "[reorder Goth Latn]";
|
||||
try {
|
||||
myCollation = new RuleBasedCollator(rules);
|
||||
} catch (Exception e) {
|
||||
warnln("ERROR: in creation of rule based collator");
|
||||
return;
|
||||
}
|
||||
myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||
myCollation.setStrength(Collator.TERTIARY);
|
||||
for (int i = 0; i < testSourceCases.length ; i++)
|
||||
{
|
||||
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
testSourceCases[i], testTargetCases[i],
|
||||
results[i]);
|
||||
}
|
||||
|
||||
// ensure that the non-reordered and reordered collation is the same
|
||||
Collator nonReorderdCollator = RuleBasedCollator.getInstance();
|
||||
int nonReorderedResults = nonReorderdCollator.compare(testSourceCases[0], testSourceCases[1]);
|
||||
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
testSourceCases[0], testSourceCases[1], nonReorderedResults);
|
||||
}
|
||||
|
||||
public void TestEquivalentReorderingScripts() {
|
||||
int[] equivalentScriptsResult = {
|
||||
UScript.BOPOMOFO, //Bopo
|
||||
UScript.LISU, //Lisu
|
||||
|
@ -3269,41 +3337,18 @@ public class CollationMiscTest extends TestFmwk {
|
|||
UScript.CUNEIFORM, //Xsux
|
||||
UScript.EGYPTIAN_HIEROGLYPHS //Egyp
|
||||
};
|
||||
|
||||
Collator myCollation;
|
||||
String rules = "[reorder Goth Latn]";
|
||||
try {
|
||||
myCollation = new RuleBasedCollator(rules);
|
||||
} catch (Exception e) {
|
||||
warnln("ERROR: in creation of rule based collator");
|
||||
return;
|
||||
}
|
||||
myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||
myCollation.setStrength(Collator.TERTIARY);
|
||||
for (int i = 0; i < testSourceCases.length ; i++)
|
||||
{
|
||||
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
testSourceCases[i], testTargetCases[i],
|
||||
results[i]);
|
||||
}
|
||||
|
||||
// ensure that the non-reordered and reordered collation is the same
|
||||
Collator nonReorderdCollator = RuleBasedCollator.getInstance();
|
||||
int nonReorderedResults = nonReorderdCollator.compare(testSourceCases[0], testSourceCases[1]);
|
||||
CollationTest.doTest(this, (RuleBasedCollator)myCollation,
|
||||
testSourceCases[0], testSourceCases[1], nonReorderedResults);
|
||||
|
||||
Arrays.sort(equivalentScriptsResult);
|
||||
int[] equivalentScripts = RuleBasedCollator.getReorderingCodesGroup(UScript.GOTHIC);
|
||||
|
||||
int[] equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.GOTHIC);
|
||||
Arrays.sort(equivalentScripts);
|
||||
assertTrue("Script Equivalents for Reordering", Arrays.equals(equivalentScripts, equivalentScriptsResult));
|
||||
|
||||
equivalentScripts = RuleBasedCollator.getReorderingCodesGroup(UScript.SHAVIAN);
|
||||
equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.SHAVIAN);
|
||||
Arrays.sort(equivalentScripts);
|
||||
assertTrue("Script Equivalents for Reordering", Arrays.equals(equivalentScripts, equivalentScriptsResult));
|
||||
}
|
||||
|
||||
public void TestGreekFirstReorderCloning(){
|
||||
public void TestGreekFirstReorderCloning() {
|
||||
String[] testSourceCases = {
|
||||
"\u0041",
|
||||
"\u03b1\u0041",
|
||||
|
@ -3410,8 +3455,8 @@ public class CollationMiscTest extends TestFmwk {
|
|||
};
|
||||
|
||||
OneTestCase[] privateUseCharacterStrings = {
|
||||
//new OneTestCase("\u0391", "\u0391", 0),
|
||||
//new OneTestCase("\u0041", "\u0391", -1),
|
||||
new OneTestCase("\u0391", "\u0391", 0),
|
||||
new OneTestCase("\u0041", "\u0391", -1),
|
||||
new OneTestCase("\u03B1\u0041", "\u03B1\u0391", -1),
|
||||
new OneTestCase("\u0060", "\u0391", -1),
|
||||
new OneTestCase("\u0391", "\ue2dc", 1),
|
||||
|
@ -3498,5 +3543,5 @@ public class CollationMiscTest extends TestFmwk {
|
|||
|
||||
/* Test collation reordering API */
|
||||
doTestOneReorderingAPITestCase(collationTestCases, apiRules);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue