mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-96 added Normalize flag to collations that need normalization
X-SVN-Rev: 3208
This commit is contained in:
parent
bb03b0b456
commit
07678fc629
9 changed files with 69 additions and 13 deletions
|
@ -13,6 +13,7 @@ el {
|
|||
CollationElements {
|
||||
Version { "1.0" }
|
||||
Override { "FALSE" }
|
||||
Normalize { "TRUE" }
|
||||
Sequence { "& \u0361 = \u0387 = \u03F3 & \u00B5 < \u0374 < \u0375 <"
|
||||
" \u037A < \u037E < \u0384 < \u0385 & Z < \u03B1 , \u0391 ; \u03AC , \u0386 < \u03B2"
|
||||
" , \u0392 ; \u03D0 < \u03B3 , \u0393 < \u03B4 , \u0394 < \u03B5 , \u0395 ; \u03AD"
|
||||
|
|
|
@ -9,6 +9,7 @@ vi {
|
|||
CollationElements {
|
||||
Version { "1.0" }
|
||||
Override { "FALSE" }
|
||||
Normalize { "TRUE" }
|
||||
Sequence { "&\u009F,' ','_',\u00AF,\u00AD,'-',',',';',':','!',\u00A1"
|
||||
",'?',\u00BF,'/','.','^','~',\u00B7,''','\u0022',\u00AB,\u00BB,'(',')','[',']','"
|
||||
"{','}',\u00A7,\u00B6,\u00A9,\u00AE,'@',\u00A4,\u00A2,'$',\u00A3,\u00A5,'*','\u005C"
|
||||
|
|
|
@ -13,6 +13,7 @@ el {
|
|||
CollationElements {
|
||||
Version { "1.0" }
|
||||
Override { "FALSE" }
|
||||
Normalize { "TRUE" }
|
||||
Sequence { "& \u0361 = \u0387 = \u03F3 & \u00B5 < \u0374 < \u0375 <"
|
||||
" \u037A < \u037E < \u0384 < \u0385 & Z < \u03B1 , \u0391 ; \u03AC , \u0386 < \u03B2"
|
||||
" , \u0392 ; \u03D0 < \u03B3 , \u0393 < \u03B4 , \u0394 < \u03B5 , \u0395 ; \u03AD"
|
||||
|
|
|
@ -9,6 +9,7 @@ vi {
|
|||
CollationElements {
|
||||
Version { "1.0" }
|
||||
Override { "FALSE" }
|
||||
Normalize { "TRUE" }
|
||||
Sequence { "&\u009F,' ','_',\u00AF,\u00AD,'-',',',';',':','!',\u00A1"
|
||||
",'?',\u00BF,'/','.','^','~',\u00B7,''','\u0022',\u00AB,\u00BB,'(',')','[',']','"
|
||||
"{','}',\u00A7,\u00B6,\u00A9,\u00AE,'@',\u00A4,\u00A2,'$',\u00A3,\u00A5,'*','\u005C"
|
||||
|
|
|
@ -823,6 +823,19 @@ RuleBasedCollator::constructFromBundle(const Locale & name,
|
|||
realName = binary.getName();
|
||||
if(U_SUCCESS(status)) {
|
||||
UErrorCode intStatus = U_ZERO_ERROR;
|
||||
ResourceBundle colElem = rb.get("CollationElements", intStatus);
|
||||
if(U_SUCCESS(intStatus)) {
|
||||
UnicodeString norm = colElem.getStringEx("Normalize", intStatus);
|
||||
if(U_SUCCESS(intStatus)) {
|
||||
setDecomposition(Normalizer::DECOMP);
|
||||
fDefaultDecomp = Normalizer::DECOMP;
|
||||
} else {
|
||||
setDecomposition(Normalizer::NO_OP);
|
||||
fDefaultDecomp = Normalizer::NO_OP;
|
||||
}
|
||||
}
|
||||
intStatus = U_ZERO_ERROR;
|
||||
|
||||
constructFromCache(realName, intStatus); // check whether we already have this data in cache
|
||||
if(U_SUCCESS(intStatus)) {
|
||||
return realName;
|
||||
|
@ -911,9 +924,6 @@ RuleBasedCollator::RuleBasedCollator( const Locale& desiredLocale,
|
|||
if (U_SUCCESS(status)) {
|
||||
data->desiredLocale = desiredLocale;
|
||||
data->realLocaleName = locName;
|
||||
if(status != U_USING_DEFAULT_ERROR) {
|
||||
setDecomposition(Normalizer::NO_OP);
|
||||
}
|
||||
} else {
|
||||
UErrorCode intStatus = U_ZERO_ERROR;
|
||||
constructFromCache(ResourceBundle::kDefaultFilename, intStatus);
|
||||
|
@ -931,7 +941,6 @@ RuleBasedCollator::RuleBasedCollator( const Locale& desiredLocale,
|
|||
}
|
||||
}
|
||||
data->realLocaleName = ResourceBundle::kDefaultFilename;
|
||||
setDecomposition(Normalizer::NO_OP);
|
||||
addToCache(ResourceBundle::kDefaultFilename);
|
||||
}
|
||||
return;
|
||||
|
@ -2954,7 +2963,16 @@ void RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue valu
|
|||
status = U_UNSUPPORTED_ERROR;
|
||||
break;
|
||||
case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
if(value == UCOL_ON) {
|
||||
setDecomposition(Normalizer::DECOMP);
|
||||
} else if (value == UCOL_OFF) {
|
||||
setDecomposition(Normalizer::NO_OP);
|
||||
} else if (value == UCOL_DEFAULT) {
|
||||
setDecomposition(fDefaultDecomp);
|
||||
} else {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR ;
|
||||
}
|
||||
break;
|
||||
break;
|
||||
case UCOL_STRENGTH: /* attribute for strength */
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
|
@ -2985,7 +3003,11 @@ UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCod
|
|||
status = U_UNSUPPORTED_ERROR;
|
||||
break;
|
||||
case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
|
||||
status = U_UNSUPPORTED_ERROR;
|
||||
if(getDecomposition() == Normalizer::DECOMP) {
|
||||
return UCOL_ON;
|
||||
} else {
|
||||
return UCOL_OFF;
|
||||
}
|
||||
break;
|
||||
case UCOL_STRENGTH: /* attribute for strength */
|
||||
switch(getStrength()) {
|
||||
|
|
|
@ -1168,8 +1168,8 @@ ucol_strcoll( const UCollator *coll,
|
|||
collIterate sColl, tColl;
|
||||
|
||||
if(cppColl->getDecomposition() == Normalizer::NO_OP) {
|
||||
init_collIterate(source, sourceLength, &sColl, FALSE);
|
||||
init_collIterate(target, targetLength, &tColl, FALSE);
|
||||
init_collIterate(source, sourceLength == -1 ? u_strlen(source) : sourceLength, &sColl, FALSE);
|
||||
init_collIterate(target, targetLength == -1 ? u_strlen(target) : targetLength, &tColl, FALSE);
|
||||
} else { /* TODO: This is bad behaved if we're working with small buffers */
|
||||
/* We really need the normalization quick check here*/
|
||||
UNormalizationMode normMode = ucol_getNormalization(coll);
|
||||
|
|
|
@ -1024,6 +1024,7 @@ private:
|
|||
NormalizerIterator *cursor2;
|
||||
UBool dataIsOwned;
|
||||
TableCollationData* data;
|
||||
Normalizer::EMode fDefaultDecomp;
|
||||
};
|
||||
|
||||
inline UBool
|
||||
|
|
|
@ -38,6 +38,7 @@ void addCollAPITest(TestNode** root)
|
|||
addTest(root, &TestElemIter, "tscoll/capitst/TestElemIter");
|
||||
addTest(root, &TestGetAll, "tscoll/capitst/TestGetAll");
|
||||
addTest(root, &TestGetDefaultRules, "tscoll/capitst/TestGetDefaultRules");
|
||||
addTest(root, &TestDecomposition, "tscoll/capitst/TestDecomposition");
|
||||
|
||||
}
|
||||
|
||||
|
@ -356,6 +357,38 @@ void TestCompare()
|
|||
free(test1);
|
||||
free(test2);
|
||||
|
||||
}
|
||||
/*
|
||||
---------------------------------------------
|
||||
tests decomposition setting
|
||||
*/
|
||||
void TestDecomposition() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCollator *en_US, *el_GR, *vi_VN;
|
||||
en_US = ucol_open("en_US", &status);
|
||||
el_GR = ucol_open("el_GR", &status);
|
||||
vi_VN = ucol_open("vi_VN", &status);
|
||||
|
||||
/* there is no reason to have canonical decomposition in en_US OR default locale */
|
||||
if(ucol_getNormalization(vi_VN) != UCOL_DECOMP_CAN)
|
||||
{
|
||||
log_err("ERROR: vi_VN collation did not have cannonical decomposition for normalization!\n");
|
||||
}
|
||||
|
||||
if(ucol_getNormalization(el_GR) != UCOL_DECOMP_CAN)
|
||||
{
|
||||
log_err("ERROR: el_GR collation did not have cannonical decomposition for normalization!\n");
|
||||
}
|
||||
|
||||
if(ucol_getNormalization(en_US) != UNORM_NONE)
|
||||
{
|
||||
log_err("ERROR: en_US collation had cannonical decomposition for normalization!\n");
|
||||
}
|
||||
|
||||
ucol_close(en_US);
|
||||
ucol_close(el_GR);
|
||||
ucol_close(vi_VN);
|
||||
|
||||
}
|
||||
/*
|
||||
----------------------------------------------------------------------------
|
||||
|
@ -384,11 +417,6 @@ void TestSortKey()
|
|||
return;
|
||||
}
|
||||
|
||||
if(ucol_getNormalization(col) != UCOL_DECOMP_CAN)
|
||||
{
|
||||
log_err("ERROR: default collation did not have cannonical decomposition for normalization!\n");
|
||||
}
|
||||
|
||||
|
||||
if(ucol_getStrength(col) != UCOL_DEFAULT_STRENGTH)
|
||||
{
|
||||
|
|
|
@ -62,6 +62,7 @@
|
|||
**/
|
||||
void TestGetDefaultRules(void);
|
||||
|
||||
void TestDecomposition(void);
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue