mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 00:43:32 +00:00
ICU-20410 Fix grandfathered tag w/ extensions
This commit is contained in:
parent
0ef0629736
commit
94ff6b1024
7 changed files with 72 additions and 12 deletions
|
@ -2063,13 +2063,26 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
|||
return t.orphan();
|
||||
}
|
||||
|
||||
size_t parsedLenDelta = 0;
|
||||
// Grandfathered tag will be consider together. Grandfathered tag with intervening
|
||||
// script and region such as art-DE-lojban or art-Latn-lojban won't be
|
||||
// matched.
|
||||
/* check if the tag is grandfathered */
|
||||
for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
|
||||
if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
|
||||
int32_t checkGrandfatheredLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i]));
|
||||
if (tagLen < checkGrandfatheredLen) {
|
||||
continue;
|
||||
}
|
||||
if (tagLen > checkGrandfatheredLen && tagBuf[checkGrandfatheredLen] != '-') {
|
||||
// make sure next char is '-'.
|
||||
continue;
|
||||
}
|
||||
if (uprv_strnicmp(GRANDFATHERED[i], tagBuf, checkGrandfatheredLen) == 0) {
|
||||
int32_t newTagLength;
|
||||
|
||||
grandfatheredLen = tagLen; /* back up for output parsedLen */
|
||||
newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
|
||||
grandfatheredLen = checkGrandfatheredLen; /* back up for output parsedLen */
|
||||
int32_t replacementLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
|
||||
newTagLength = replacementLen + tagLen - checkGrandfatheredLen;
|
||||
if (tagLen < newTagLength) {
|
||||
uprv_free(tagBuf);
|
||||
tagBuf = (char*)uprv_malloc(newTagLength + 1);
|
||||
|
@ -2080,12 +2093,15 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
|||
t->buf = tagBuf;
|
||||
tagLen = newTagLength;
|
||||
}
|
||||
parsedLenDelta = checkGrandfatheredLen - replacementLen;
|
||||
uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
|
||||
if (checkGrandfatheredLen != tagLen) {
|
||||
uprv_strcpy(t->buf + replacementLen, tag + checkGrandfatheredLen);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
size_t parsedLenDelta = 0;
|
||||
if (grandfatheredLen == 0) {
|
||||
for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
|
||||
const char* redundantTag = REDUNDANT[i];
|
||||
|
@ -2400,8 +2416,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
|||
}
|
||||
|
||||
if (parsedLen != NULL) {
|
||||
*parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
|
||||
(int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
|
||||
*parsedLen = (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
|
||||
}
|
||||
|
||||
return t.orphan();
|
||||
|
|
|
@ -6160,8 +6160,8 @@ static const struct {
|
|||
/* #9562 IANA language tag data update */
|
||||
{"en-gb-oed", "en_GB_OXENDICT", FULL_LENGTH},
|
||||
{"i-navajo", "nv", FULL_LENGTH},
|
||||
{"i-navajo-a-foo", "", 0},
|
||||
{"i-navajo-latn-us", "", 0},
|
||||
{"i-navajo-a-foo", "nv@a=foo", FULL_LENGTH},
|
||||
{"i-navajo-latn-us", "nv_Latn_US", FULL_LENGTH},
|
||||
{"sgn-br", "bzs", FULL_LENGTH},
|
||||
{"sgn-br-u-co-phonebk", "bzs@collation=phonebook", FULL_LENGTH},
|
||||
{"ja-latn-hepburn-heploc", "ja_Latn__ALALC97", FULL_LENGTH},
|
||||
|
|
|
@ -748,9 +748,12 @@ TestUCaseMap(void) {
|
|||
/* overly long locale IDs may get truncated to their language code to avoid unnecessary allocation */
|
||||
ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode);
|
||||
locale=ucasemap_getLocale(csm);
|
||||
if(0!=strncmp(locale, "i-klingon", 9)) {
|
||||
// "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog" is canonicalized
|
||||
// into "tlh-the-quick-brown-fox-jumps-over-the-lazy-dog"
|
||||
// and "the" will be treated as an extlang which replaces "tlh".
|
||||
if(0!=strncmp(locale, "the", 3)) {
|
||||
log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s\n"
|
||||
" does not start with \"i-klingon\"\n", locale);
|
||||
" does not start with \"the\"\n", locale);
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
|
|
|
@ -248,6 +248,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
|
|||
TESTCASE_AUTO(TestIsRightToLeft);
|
||||
TESTCASE_AUTO(TestBug13277);
|
||||
TESTCASE_AUTO(TestBug13554);
|
||||
TESTCASE_AUTO(TestBug20410);
|
||||
TESTCASE_AUTO(TestForLanguageTag);
|
||||
TESTCASE_AUTO(TestToLanguageTag);
|
||||
TESTCASE_AUTO(TestMoveAssign);
|
||||
|
@ -2965,6 +2966,32 @@ void LocaleTest::TestBug13554() {
|
|||
}
|
||||
}
|
||||
|
||||
void LocaleTest::TestBug20410() {
|
||||
IcuTestErrorCode status(*this, "TestBug20410()");
|
||||
|
||||
static const char tag1[] = "art-lojban-x-0";
|
||||
static const Locale expected1("jbo@x=0");
|
||||
Locale result1 = Locale::forLanguageTag(tag1, status);
|
||||
status.errIfFailureAndReset("\"%s\"", tag1);
|
||||
assertEquals(tag1, expected1.getName(), result1.getName());
|
||||
|
||||
static const char tag2[] = "zh-xiang-u-nu-thai-x-0";
|
||||
static const Locale expected2("hsn@numbers=thai;x=0");
|
||||
Locale result2 = Locale::forLanguageTag(tag2, status);
|
||||
status.errIfFailureAndReset("\"%s\"", tag2);
|
||||
assertEquals(tag2, expected2.getName(), result2.getName());
|
||||
|
||||
static const char locid3[] = "art__lojban@x=0";
|
||||
Locale result3 = Locale::createCanonical(locid3);
|
||||
static const Locale expected3("art__LOJBAN@x=0");
|
||||
assertEquals(locid3, expected3.getName(), result3.getName());
|
||||
|
||||
static const char locid4[] = "art-lojban-x-0";
|
||||
Locale result4 = Locale::createCanonical(locid4);
|
||||
static const Locale expected4("jbo@x=0");
|
||||
assertEquals(locid4, expected4.getName(), result4.getName());
|
||||
}
|
||||
|
||||
void LocaleTest::TestForLanguageTag() {
|
||||
IcuTestErrorCode status(*this, "TestForLanguageTag()");
|
||||
|
||||
|
|
|
@ -114,6 +114,7 @@ public:
|
|||
void TestBug11421();
|
||||
void TestBug13277();
|
||||
void TestBug13554();
|
||||
void TestBug20410();
|
||||
|
||||
void TestAddLikelySubtags();
|
||||
void TestMinimizeSubtags();
|
||||
|
|
|
@ -169,9 +169,20 @@ public class LanguageTag {
|
|||
|
||||
// Check if the tag is grandfathered
|
||||
String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
|
||||
// Language tag is at least 2 alpha so we can skip searching the first 2 chars.
|
||||
int dash = 2;
|
||||
while (gfmap == null && (dash = languageTag.indexOf('-', dash + 1)) != -1) {
|
||||
gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag.substring(0, dash)));
|
||||
}
|
||||
|
||||
if (gfmap != null) {
|
||||
// use preferred mapping
|
||||
itr = new StringTokenIterator(gfmap[1], SEP);
|
||||
if (gfmap[0].length() == languageTag.length()) {
|
||||
// use preferred mapping
|
||||
itr = new StringTokenIterator(gfmap[1], SEP);
|
||||
} else {
|
||||
// append the rest of the tag.
|
||||
itr = new StringTokenIterator(gfmap[1] + languageTag.substring(dash), SEP);
|
||||
}
|
||||
isGrandfathered = true;
|
||||
} else {
|
||||
itr = new StringTokenIterator(languageTag, SEP);
|
||||
|
|
|
@ -4187,6 +4187,9 @@ public class ULocaleTest extends TestFmwk {
|
|||
{"zh-u-ca-gregory-co-pinyin-ca-chinese", "zh@calendar=gregorian;collation=pinyin", NOERROR},
|
||||
{"de-latn-DE-1901-u-co-phonebk-co-pinyin-ca-gregory", "de_Latn_DE_1901@calendar=gregorian;collation=phonebook", NOERROR},
|
||||
{"th-u-kf-nu-thai-kf-false", "th@colcasefirst=yes;numbers=thai", NOERROR},
|
||||
/* #20410 */
|
||||
{"art-lojban-x-0", "jbo@x=0", NOERROR},
|
||||
{"zh-xiang-u-nu-thai-x-0", "hsn@numbers=thai;x=0", NOERROR},
|
||||
};
|
||||
|
||||
for (int i = 0; i < langtag_to_locale.length; i++) {
|
||||
|
|
Loading…
Add table
Reference in a new issue