ICU-96 added Normalize flag to collations that need normalization

X-SVN-Rev: 3208
This commit is contained in:
Vladimir Weinstein 2000-12-13 01:26:07 +00:00
parent bb03b0b456
commit 07678fc629
9 changed files with 69 additions and 13 deletions

View file

@ -13,6 +13,7 @@ el {
CollationElements {
Version { "1.0" }
Override { "FALSE" }
Normalize { "TRUE" }
Sequence { "& \u0361 = \u0387 = \u03F3 & \u00B5 < \u0374 < \u0375 <"
" \u037A < \u037E < \u0384 < \u0385 & Z < \u03B1 , \u0391 ; \u03AC , \u0386 < \u03B2"
" , \u0392 ; \u03D0 < \u03B3 , \u0393 < \u03B4 , \u0394 < \u03B5 , \u0395 ; \u03AD"

View file

@ -9,6 +9,7 @@ vi {
CollationElements {
Version { "1.0" }
Override { "FALSE" }
Normalize { "TRUE" }
Sequence { "&\u009F,' ','_',\u00AF,\u00AD,'-',',',';',':','!',\u00A1"
",'?',\u00BF,'/','.','^','~',\u00B7,''','\u0022',\u00AB,\u00BB,'(',')','[',']','"
"{','}',\u00A7,\u00B6,\u00A9,\u00AE,'@',\u00A4,\u00A2,'$',\u00A3,\u00A5,'*','\u005C"

View file

@ -13,6 +13,7 @@ el {
CollationElements {
Version { "1.0" }
Override { "FALSE" }
Normalize { "TRUE" }
Sequence { "& \u0361 = \u0387 = \u03F3 & \u00B5 < \u0374 < \u0375 <"
" \u037A < \u037E < \u0384 < \u0385 & Z < \u03B1 , \u0391 ; \u03AC , \u0386 < \u03B2"
" , \u0392 ; \u03D0 < \u03B3 , \u0393 < \u03B4 , \u0394 < \u03B5 , \u0395 ; \u03AD"

View file

@ -9,6 +9,7 @@ vi {
CollationElements {
Version { "1.0" }
Override { "FALSE" }
Normalize { "TRUE" }
Sequence { "&\u009F,' ','_',\u00AF,\u00AD,'-',',',';',':','!',\u00A1"
",'?',\u00BF,'/','.','^','~',\u00B7,''','\u0022',\u00AB,\u00BB,'(',')','[',']','"
"{','}',\u00A7,\u00B6,\u00A9,\u00AE,'@',\u00A4,\u00A2,'$',\u00A3,\u00A5,'*','\u005C"

View file

@ -823,6 +823,19 @@ RuleBasedCollator::constructFromBundle(const Locale & name,
realName = binary.getName();
if(U_SUCCESS(status)) {
UErrorCode intStatus = U_ZERO_ERROR;
ResourceBundle colElem = rb.get("CollationElements", intStatus);
if(U_SUCCESS(intStatus)) {
UnicodeString norm = colElem.getStringEx("Normalize", intStatus);
if(U_SUCCESS(intStatus)) {
setDecomposition(Normalizer::DECOMP);
fDefaultDecomp = Normalizer::DECOMP;
} else {
setDecomposition(Normalizer::NO_OP);
fDefaultDecomp = Normalizer::NO_OP;
}
}
intStatus = U_ZERO_ERROR;
constructFromCache(realName, intStatus); // check whether we already have this data in cache
if(U_SUCCESS(intStatus)) {
return realName;
@ -911,9 +924,6 @@ RuleBasedCollator::RuleBasedCollator( const Locale& desiredLocale,
if (U_SUCCESS(status)) {
data->desiredLocale = desiredLocale;
data->realLocaleName = locName;
if(status != U_USING_DEFAULT_ERROR) {
setDecomposition(Normalizer::NO_OP);
}
} else {
UErrorCode intStatus = U_ZERO_ERROR;
constructFromCache(ResourceBundle::kDefaultFilename, intStatus);
@ -931,7 +941,6 @@ RuleBasedCollator::RuleBasedCollator( const Locale& desiredLocale,
}
}
data->realLocaleName = ResourceBundle::kDefaultFilename;
setDecomposition(Normalizer::NO_OP);
addToCache(ResourceBundle::kDefaultFilename);
}
return;
@ -2954,7 +2963,16 @@ void RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue valu
status = U_UNSUPPORTED_ERROR;
break;
case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
status = U_UNSUPPORTED_ERROR;
if(value == UCOL_ON) {
setDecomposition(Normalizer::DECOMP);
} else if (value == UCOL_OFF) {
setDecomposition(Normalizer::NO_OP);
} else if (value == UCOL_DEFAULT) {
setDecomposition(fDefaultDecomp);
} else {
status = U_ILLEGAL_ARGUMENT_ERROR ;
}
break;
break;
case UCOL_STRENGTH: /* attribute for strength */
status = U_UNSUPPORTED_ERROR;
@ -2985,7 +3003,11 @@ UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCod
status = U_UNSUPPORTED_ERROR;
break;
case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
status = U_UNSUPPORTED_ERROR;
if(getDecomposition() == Normalizer::DECOMP) {
return UCOL_ON;
} else {
return UCOL_OFF;
}
break;
case UCOL_STRENGTH: /* attribute for strength */
switch(getStrength()) {

View file

@ -1168,8 +1168,8 @@ ucol_strcoll( const UCollator *coll,
collIterate sColl, tColl;
if(cppColl->getDecomposition() == Normalizer::NO_OP) {
init_collIterate(source, sourceLength, &sColl, FALSE);
init_collIterate(target, targetLength, &tColl, FALSE);
init_collIterate(source, sourceLength == -1 ? u_strlen(source) : sourceLength, &sColl, FALSE);
init_collIterate(target, targetLength == -1 ? u_strlen(target) : targetLength, &tColl, FALSE);
} else { /* TODO: This is bad behaved if we're working with small buffers */
/* We really need the normalization quick check here*/
UNormalizationMode normMode = ucol_getNormalization(coll);

View file

@ -1024,6 +1024,7 @@ private:
NormalizerIterator *cursor2;
UBool dataIsOwned;
TableCollationData* data;
Normalizer::EMode fDefaultDecomp;
};
inline UBool

View file

@ -38,6 +38,7 @@ void addCollAPITest(TestNode** root)
addTest(root, &TestElemIter, "tscoll/capitst/TestElemIter");
addTest(root, &TestGetAll, "tscoll/capitst/TestGetAll");
addTest(root, &TestGetDefaultRules, "tscoll/capitst/TestGetDefaultRules");
addTest(root, &TestDecomposition, "tscoll/capitst/TestDecomposition");
}
@ -356,6 +357,38 @@ void TestCompare()
free(test1);
free(test2);
}
/*
---------------------------------------------
tests decomposition setting
*/
void TestDecomposition() {
UErrorCode status = U_ZERO_ERROR;
UCollator *en_US, *el_GR, *vi_VN;
en_US = ucol_open("en_US", &status);
el_GR = ucol_open("el_GR", &status);
vi_VN = ucol_open("vi_VN", &status);
/* there is no reason to have canonical decomposition in en_US OR default locale */
if(ucol_getNormalization(vi_VN) != UCOL_DECOMP_CAN)
{
log_err("ERROR: vi_VN collation did not have cannonical decomposition for normalization!\n");
}
if(ucol_getNormalization(el_GR) != UCOL_DECOMP_CAN)
{
log_err("ERROR: el_GR collation did not have cannonical decomposition for normalization!\n");
}
if(ucol_getNormalization(en_US) != UNORM_NONE)
{
log_err("ERROR: en_US collation had cannonical decomposition for normalization!\n");
}
ucol_close(en_US);
ucol_close(el_GR);
ucol_close(vi_VN);
}
/*
----------------------------------------------------------------------------
@ -384,11 +417,6 @@ void TestSortKey()
return;
}
if(ucol_getNormalization(col) != UCOL_DECOMP_CAN)
{
log_err("ERROR: default collation did not have cannonical decomposition for normalization!\n");
}
if(ucol_getStrength(col) != UCOL_DEFAULT_STRENGTH)
{

View file

@ -62,6 +62,7 @@
**/
void TestGetDefaultRules(void);
void TestDecomposition(void);