ICU-7023 Supporting collation import syntax in ICU4J

X-SVN-Rev: 28970
This commit is contained in:
Umesh Nair 2010-11-01 23:54:10 +00:00
parent 685d7f1862
commit 86ce28acfb
2 changed files with 92 additions and 10 deletions

View file

@ -11,6 +11,9 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.util.UResourceBundle;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.impl.UCharacterProperty;
import com.ibm.icu.lang.UCharacter;
@ -38,7 +41,7 @@ final class CollationRuleParser
CollationRuleParser(String rules) throws ParseException
{
// Prepares m_copySet_ and m_removeSet_.
extractSetsFromRules(rules);
rules = preprocessRules(rules);
// Save the rules as a long string. The StringBuilder object is
// used to store the result of token parsing as well.
@ -557,7 +560,7 @@ final class CollationRuleParser
INDIRECT_BOUNDARIES_[14].m_limitCE_
= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_SPECIAL_MIN_ << 24;
RULES_OPTIONS_ = new TokenOption[19];
RULES_OPTIONS_ = new TokenOption[20];
String option[] = {"non-ignorable", "shifted"};
int value[] = {RuleBasedCollator.AttributeValue.NON_IGNORABLE_,
RuleBasedCollator.AttributeValue.SHIFTED_};
@ -668,6 +671,9 @@ final class CollationRuleParser
RULES_OPTIONS_[18] = new TokenOption("charset",
RuleBasedCollator.Attribute.LIMIT_,
null, null);
RULES_OPTIONS_[19] = new TokenOption("import",
RuleBasedCollator.Attribute.LIMIT_,
null, null);
}
/**
@ -1220,7 +1226,7 @@ final class CollationRuleParser
if (m_lastRangeCp_ > 0 && m_lastRangeCp_ == m_previousCp_) {
throw new ParseException("Chained range syntax", m_current_);
}
// The current token is the first character of the second code point of the range.
// Process just that, and then proceed with the star.
m_lastRangeCp_ = m_source_.codePointAt(this.m_parsedToken_.m_charsOffset_);
@ -1696,7 +1702,7 @@ final class CollationRuleParser
/**
*
*
*/
private void initializeParsedToken() {
m_parsedToken_.m_charsLen_ = 0;
@ -2231,7 +2237,7 @@ final class CollationRuleParser
return tailored;
}
final private void extractSetsFromRules(String rules) throws ParseException {
final private String preprocessRules(String rules) throws ParseException {
int optionNumber = -1;
int setStart = 0;
int i = 0;
@ -2248,14 +2254,32 @@ final class CollationRuleParser
}
} else if(optionNumber == 14) {
UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart);
if(m_removeSet_ == null) {
m_removeSet_ = newSet;
} else {
m_removeSet_.addAll(newSet);
}
if(m_removeSet_ == null) {
m_removeSet_ = newSet;
} else {
m_removeSet_.addAll(newSet);
}
} else if(optionNumber == 19) {
int optionEndOffset = rules.indexOf(']', i) + 1;
ULocale locale = ULocale.forLanguageTag(rules.substring(setStart, optionEndOffset-1));
UResourceBundle bundle = UResourceBundle.getBundleInstance(
ICUResourceBundle.ICU_BASE_NAME + "/coll", locale.getBaseName());
String type = locale.getKeywordValue("collation");
if(type == null){
type = "standard";
}
String importRules = bundle.get("collations")
.get(type)
.get("Sequence")
.getString();
rules = rules.substring(0, i) + importRules + rules.substring(optionEndOffset);
}
}
i++;
}
return rules;
}
}

View file

@ -29,6 +29,8 @@ import com.ibm.icu.text.Collator;
import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.RawCollationKey;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
@ -3007,4 +3009,60 @@ public class CollationMiscTest extends TestFmwk {
e.printStackTrace();
}
}
public void TestImport(){
try{
RuleBasedCollator vicoll = (RuleBasedCollator)Collator.getInstance(new ULocale("vi"));
RuleBasedCollator escoll = (RuleBasedCollator)Collator.getInstance(new ULocale("es"));
RuleBasedCollator viescoll = new RuleBasedCollator(vicoll.getRules() + escoll.getRules());
RuleBasedCollator importviescoll = new RuleBasedCollator("[import vi][import es]");
UnicodeSet tailoredSet = viescoll.getTailoredSet();
UnicodeSet importTailoredSet = importviescoll.getTailoredSet();
if(!tailoredSet.equals(importTailoredSet)){
warnln("Tailored set not equal");
}
for (UnicodeSetIterator it = new UnicodeSetIterator(tailoredSet); it.next();) {
String t = it.getString();
CollationKey sk1 = viescoll.getCollationKey(t);
CollationKey sk2 = importviescoll.getCollationKey(t);
if(!sk1.equals(sk2)){
warnln("Collation key's not equal for " + t);
}
}
}catch(Exception e){
warnln("ERROR: in creation of rule based collator");
}
}
public void TestImportWithType(){
try{
RuleBasedCollator vicoll = (RuleBasedCollator)Collator.getInstance(new ULocale("vi"));
RuleBasedCollator decoll = (RuleBasedCollator)Collator.getInstance(ULocale.forLanguageTag("de-u-co-phonebk"));
RuleBasedCollator videcoll = new RuleBasedCollator(vicoll.getRules() + decoll.getRules());
RuleBasedCollator importvidecoll = new RuleBasedCollator("[import vi][import de-u-co-phonebk]");
UnicodeSet tailoredSet = videcoll.getTailoredSet();
UnicodeSet importTailoredSet = importvidecoll.getTailoredSet();
if(!tailoredSet.equals(importTailoredSet)){
warnln("Tailored set not equal");
}
for (UnicodeSetIterator it = new UnicodeSetIterator(tailoredSet); it.next();) {
String t = it.getString();
CollationKey sk1 = videcoll.getCollationKey(t);
CollationKey sk2 = importvidecoll.getCollationKey(t);
if(!sk1.equals(sk2)){
warnln("Collation key's not equal for " + t);
}
}
}catch(Exception e){
warnln("ERROR: in creation of rule based collator");
}
}
}