mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-21460 Changed the ULocale initializers to allow locale IDs that use BCP47 syntax, but with '_' as a field delimiter.
(APIs that specifically require BCP47 syntax are unaffected-- they still require '-').
This commit is contained in:
parent
0d407fc616
commit
01e1adc9e4
7 changed files with 66 additions and 14 deletions
|
@ -1477,21 +1477,37 @@ _canonicalize(const char* localeID,
|
|||
ByteSink& sink,
|
||||
uint32_t options,
|
||||
UErrorCode* err) {
|
||||
if (U_FAILURE(*err)) {
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
|
||||
PreflightingLocaleIDBuffer tempBuffer;
|
||||
PreflightingLocaleIDBuffer tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this
|
||||
const char* origLocaleID;
|
||||
const char* tmpLocaleID;
|
||||
const char* keywordAssign = NULL;
|
||||
const char* separatorIndicator = NULL;
|
||||
|
||||
if (U_FAILURE(*err)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (_hasBCP47Extension(localeID)) {
|
||||
CharString localeIDWithHyphens;
|
||||
const char* localeIDPtr = localeID;
|
||||
|
||||
// convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
|
||||
if (uprv_strchr(localeID, '_') != nullptr && localeID[1] != '-' && localeID[1] != '_') {
|
||||
localeIDWithHyphens.append(localeID, -1, *err);
|
||||
if (U_SUCCESS(*err)) {
|
||||
for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) {
|
||||
if (*p == '_') {
|
||||
*p = '-';
|
||||
}
|
||||
}
|
||||
localeIDPtr = localeIDWithHyphens.data();
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
tempBuffer.requestedCapacity = _ConvertBCP47(tmpLocaleID, localeID,
|
||||
tempBuffer.getBuffer(), tempBuffer.getCapacity(), err);
|
||||
tempBuffer.requestedCapacity = _ConvertBCP47(tmpLocaleID, localeIDPtr, tempBuffer.getBuffer(),
|
||||
tempBuffer.getCapacity(), err);
|
||||
} while (tempBuffer.needToTryAgain(err));
|
||||
} else {
|
||||
if (localeID==NULL) {
|
||||
|
|
|
@ -90,6 +90,7 @@ void addCollAPITest(TestNode** root)
|
|||
addTest(root, &TestBengaliSortKey, "tscoll/capitst/TestBengaliSortKey");
|
||||
addTest(root, &TestGetKeywordValuesForLocale, "tscoll/capitst/TestGetKeywordValuesForLocale");
|
||||
addTest(root, &TestStrcollNull, "tscoll/capitst/TestStrcollNull");
|
||||
addTest(root, &TestLocaleIDWithUnderscoreAndExtension, "tscoll/capitst/TestLocaleIDWithUnderscoreAndExtension");
|
||||
}
|
||||
|
||||
void TestGetSetAttr(void) {
|
||||
|
@ -2565,4 +2566,18 @@ static void TestStrcollNull(void) {
|
|||
ucol_close(coll);
|
||||
}
|
||||
|
||||
static void TestLocaleIDWithUnderscoreAndExtension(void) {
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
UCollator* c1 = ucol_open("en-US-u-kn-true", &err);
|
||||
UCollator* c2 = ucol_open("en_US-u-kn-true", &err);
|
||||
|
||||
if (assertSuccess("Failed to create collators", &err)) {
|
||||
assertTrue("Comparison using \"normal\" collator failed", !ucol_greater(c1, u"2", -1, u"10", -1));
|
||||
assertTrue("Comparison using \"bad\" collator failed", !ucol_greater(c2, u"2", -1, u"10", -1));
|
||||
}
|
||||
|
||||
ucol_close(c1);
|
||||
ucol_close(c2);
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
|
|
@ -136,6 +136,11 @@
|
|||
* test strcoll with null arg
|
||||
*/
|
||||
static void TestStrcollNull(void);
|
||||
|
||||
/**
|
||||
* Simple test for ICU-21460. The issue affects all components, but was originally reported against collation.
|
||||
*/
|
||||
static void TestLocaleIDWithUnderscoreAndExtension(void);
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
||||
|
|
|
@ -3723,13 +3723,13 @@ const char* const basic_maximize_data[][2] = {
|
|||
""
|
||||
}, {
|
||||
"de_u_co_phonebk",
|
||||
"de_Latn_DE_U_CO_PHONEBK"
|
||||
"de_Latn_DE@collation=phonebook"
|
||||
}, {
|
||||
"de_Latn_u_co_phonebk",
|
||||
"de_Latn_DE_U_CO_PHONEBK"
|
||||
"de_Latn_DE@collation=phonebook"
|
||||
}, {
|
||||
"de_Latn_DE_u_co_phonebk",
|
||||
"de_Latn_DE_U_CO_PHONEBK"
|
||||
"de_Latn_DE@collation=phonebook"
|
||||
}, {
|
||||
"_Arab@em=emoji",
|
||||
"ar_Arab_EG@em=emoji"
|
||||
|
@ -6377,7 +6377,7 @@ static const struct {
|
|||
{"hant-cmn-cn", "hant", 4},
|
||||
{"zh-cmn-TW", "cmn_TW", FULL_LENGTH},
|
||||
{"zh-x_t-ab", "zh", 2},
|
||||
{"zh-hans-cn-u-ca-x_t-u", "zh_Hans_CN@calendar=yes", 15},
|
||||
{"zh-hans-cn-u-ca-x_t-u", "zh_Hans_CN@calendar=yes", 15},
|
||||
/* #20140 dupe keys in U-extension */
|
||||
{"zh-u-ca-chinese-ca-gregory", "zh@calendar=chinese", FULL_LENGTH},
|
||||
{"zh-u-ca-gregory-co-pinyin-ca-chinese", "zh@calendar=gregorian;collation=pinyin", FULL_LENGTH},
|
||||
|
|
|
@ -4805,7 +4805,7 @@ void LocaleTest::TestCanonicalization(void)
|
|||
{ "x-piglatin_ML.MBE", "x-piglatin_ML.MBE", "x-piglatin_ML" },
|
||||
{ "i-cherokee_US.utf7", "i-cherokee_US.utf7", "i-cherokee_US" },
|
||||
{ "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA" },
|
||||
{ "no-no-ny.utf8@B", "no_NO_NY.utf8@B", "no_NO_B_NY" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
|
||||
{ "no-no-ny.utf8@B", "no_NO_NY.utf8@B", "no_NO@b=ny" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
|
||||
|
||||
/* fleshing out canonicalization */
|
||||
/* trim space and sort keywords, ';' is separator so not present at end in canonical form */
|
||||
|
|
|
@ -1131,10 +1131,13 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
* @stable ICU 3.0
|
||||
*/
|
||||
public static String getName(String localeID){
|
||||
String tmpLocaleID;
|
||||
String tmpLocaleID = localeID;
|
||||
// Convert BCP47 id if necessary
|
||||
if (localeID != null && !localeID.contains("@") && getShortestSubtagLength(localeID) == 1) {
|
||||
tmpLocaleID = forLanguageTag(localeID).getName();
|
||||
if (localeID.indexOf('_') >= 0 && localeID.charAt(1) != '_' && localeID.charAt(1) != '-') {
|
||||
tmpLocaleID = localeID.replace('_', '-');
|
||||
}
|
||||
tmpLocaleID = forLanguageTag(tmpLocaleID).getName();
|
||||
if (tmpLocaleID.length() == 0) {
|
||||
tmpLocaleID = localeID;
|
||||
}
|
||||
|
|
|
@ -1702,4 +1702,17 @@ public class CollationAPITest extends TestFmwk {
|
|||
errln("unexpected exception for tailoring many characters at the end of symbols: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestBogusLocaleID() {
|
||||
try {
|
||||
Collator c1 = Collator.getInstance(new ULocale("en-US-u-kn-true"));
|
||||
Collator c2 = Collator.getInstance(new ULocale("en_US-u-kn-true"));
|
||||
|
||||
assertTrue("Comparison using \"normal\" collator failed", c1.compare("2", "10") < 0);
|
||||
assertTrue("Comparison using \"bad\" collator failed", c2.compare("2", "10") < 0);
|
||||
} catch (Exception e) {
|
||||
errln("Exception creating collators: " + e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue