mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-7023 Supporting collation import syntax in ICU4J
X-SVN-Rev: 28970
This commit is contained in:
parent
685d7f1862
commit
86ce28acfb
2 changed files with 92 additions and 10 deletions
|
@ -11,6 +11,9 @@ import java.util.Arrays;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.util.UResourceBundle;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
|
||||
|
@ -38,7 +41,7 @@ final class CollationRuleParser
|
|||
CollationRuleParser(String rules) throws ParseException
|
||||
{
|
||||
// Prepares m_copySet_ and m_removeSet_.
|
||||
extractSetsFromRules(rules);
|
||||
rules = preprocessRules(rules);
|
||||
|
||||
// Save the rules as a long string. The StringBuilder object is
|
||||
// used to store the result of token parsing as well.
|
||||
|
@ -557,7 +560,7 @@ final class CollationRuleParser
|
|||
INDIRECT_BOUNDARIES_[14].m_limitCE_
|
||||
= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_SPECIAL_MIN_ << 24;
|
||||
|
||||
RULES_OPTIONS_ = new TokenOption[19];
|
||||
RULES_OPTIONS_ = new TokenOption[20];
|
||||
String option[] = {"non-ignorable", "shifted"};
|
||||
int value[] = {RuleBasedCollator.AttributeValue.NON_IGNORABLE_,
|
||||
RuleBasedCollator.AttributeValue.SHIFTED_};
|
||||
|
@ -668,6 +671,9 @@ final class CollationRuleParser
|
|||
RULES_OPTIONS_[18] = new TokenOption("charset",
|
||||
RuleBasedCollator.Attribute.LIMIT_,
|
||||
null, null);
|
||||
RULES_OPTIONS_[19] = new TokenOption("import",
|
||||
RuleBasedCollator.Attribute.LIMIT_,
|
||||
null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1220,7 +1226,7 @@ final class CollationRuleParser
|
|||
if (m_lastRangeCp_ > 0 && m_lastRangeCp_ == m_previousCp_) {
|
||||
throw new ParseException("Chained range syntax", m_current_);
|
||||
}
|
||||
|
||||
|
||||
// The current token is the first character of the second code point of the range.
|
||||
// Process just that, and then proceed with the star.
|
||||
m_lastRangeCp_ = m_source_.codePointAt(this.m_parsedToken_.m_charsOffset_);
|
||||
|
@ -1696,7 +1702,7 @@ final class CollationRuleParser
|
|||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
*/
|
||||
private void initializeParsedToken() {
|
||||
m_parsedToken_.m_charsLen_ = 0;
|
||||
|
@ -2231,7 +2237,7 @@ final class CollationRuleParser
|
|||
return tailored;
|
||||
}
|
||||
|
||||
final private void extractSetsFromRules(String rules) throws ParseException {
|
||||
final private String preprocessRules(String rules) throws ParseException {
|
||||
int optionNumber = -1;
|
||||
int setStart = 0;
|
||||
int i = 0;
|
||||
|
@ -2248,14 +2254,32 @@ final class CollationRuleParser
|
|||
}
|
||||
} else if(optionNumber == 14) {
|
||||
UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart);
|
||||
if(m_removeSet_ == null) {
|
||||
m_removeSet_ = newSet;
|
||||
} else {
|
||||
m_removeSet_.addAll(newSet);
|
||||
}
|
||||
if(m_removeSet_ == null) {
|
||||
m_removeSet_ = newSet;
|
||||
} else {
|
||||
m_removeSet_.addAll(newSet);
|
||||
}
|
||||
} else if(optionNumber == 19) {
|
||||
int optionEndOffset = rules.indexOf(']', i) + 1;
|
||||
ULocale locale = ULocale.forLanguageTag(rules.substring(setStart, optionEndOffset-1));
|
||||
UResourceBundle bundle = UResourceBundle.getBundleInstance(
|
||||
ICUResourceBundle.ICU_BASE_NAME + "/coll", locale.getBaseName());
|
||||
|
||||
String type = locale.getKeywordValue("collation");
|
||||
if(type == null){
|
||||
type = "standard";
|
||||
}
|
||||
|
||||
String importRules = bundle.get("collations")
|
||||
.get(type)
|
||||
.get("Sequence")
|
||||
.getString();
|
||||
|
||||
rules = rules.substring(0, i) + importRules + rules.substring(optionEndOffset);
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return rules;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,8 @@ import com.ibm.icu.text.Collator;
|
|||
import com.ibm.icu.text.Normalizer;
|
||||
import com.ibm.icu.text.RawCollationKey;
|
||||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.util.UResourceBundle;
|
||||
|
@ -3007,4 +3009,60 @@ public class CollationMiscTest extends TestFmwk {
|
|||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public void TestImport(){
|
||||
try{
|
||||
RuleBasedCollator vicoll = (RuleBasedCollator)Collator.getInstance(new ULocale("vi"));
|
||||
RuleBasedCollator escoll = (RuleBasedCollator)Collator.getInstance(new ULocale("es"));
|
||||
RuleBasedCollator viescoll = new RuleBasedCollator(vicoll.getRules() + escoll.getRules());
|
||||
RuleBasedCollator importviescoll = new RuleBasedCollator("[import vi][import es]");
|
||||
|
||||
UnicodeSet tailoredSet = viescoll.getTailoredSet();
|
||||
UnicodeSet importTailoredSet = importviescoll.getTailoredSet();
|
||||
|
||||
if(!tailoredSet.equals(importTailoredSet)){
|
||||
warnln("Tailored set not equal");
|
||||
}
|
||||
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(tailoredSet); it.next();) {
|
||||
String t = it.getString();
|
||||
CollationKey sk1 = viescoll.getCollationKey(t);
|
||||
CollationKey sk2 = importviescoll.getCollationKey(t);
|
||||
if(!sk1.equals(sk2)){
|
||||
warnln("Collation key's not equal for " + t);
|
||||
}
|
||||
}
|
||||
|
||||
}catch(Exception e){
|
||||
warnln("ERROR: in creation of rule based collator");
|
||||
}
|
||||
}
|
||||
|
||||
public void TestImportWithType(){
|
||||
try{
|
||||
RuleBasedCollator vicoll = (RuleBasedCollator)Collator.getInstance(new ULocale("vi"));
|
||||
RuleBasedCollator decoll = (RuleBasedCollator)Collator.getInstance(ULocale.forLanguageTag("de-u-co-phonebk"));
|
||||
RuleBasedCollator videcoll = new RuleBasedCollator(vicoll.getRules() + decoll.getRules());
|
||||
RuleBasedCollator importvidecoll = new RuleBasedCollator("[import vi][import de-u-co-phonebk]");
|
||||
|
||||
UnicodeSet tailoredSet = videcoll.getTailoredSet();
|
||||
UnicodeSet importTailoredSet = importvidecoll.getTailoredSet();
|
||||
|
||||
if(!tailoredSet.equals(importTailoredSet)){
|
||||
warnln("Tailored set not equal");
|
||||
}
|
||||
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(tailoredSet); it.next();) {
|
||||
String t = it.getString();
|
||||
CollationKey sk1 = videcoll.getCollationKey(t);
|
||||
CollationKey sk2 = importvidecoll.getCollationKey(t);
|
||||
if(!sk1.equals(sk2)){
|
||||
warnln("Collation key's not equal for " + t);
|
||||
}
|
||||
}
|
||||
|
||||
}catch(Exception e){
|
||||
warnln("ERROR: in creation of rule based collator");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue