ICU-20410 Fix grandfathered tag w/ extensions

This commit is contained in:
Frank Tang 2019-02-11 13:47:07 -08:00 committed by Frank Yung-Fong Tang
parent 0ef0629736
commit 94ff6b1024
7 changed files with 72 additions and 12 deletions

View file

@ -2063,13 +2063,26 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
return t.orphan();
}
size_t parsedLenDelta = 0;
// Grandfathered tag will be consider together. Grandfathered tag with intervening
// script and region such as art-DE-lojban or art-Latn-lojban won't be
// matched.
/* check if the tag is grandfathered */
for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
int32_t checkGrandfatheredLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i]));
if (tagLen < checkGrandfatheredLen) {
continue;
}
if (tagLen > checkGrandfatheredLen && tagBuf[checkGrandfatheredLen] != '-') {
// make sure next char is '-'.
continue;
}
if (uprv_strnicmp(GRANDFATHERED[i], tagBuf, checkGrandfatheredLen) == 0) {
int32_t newTagLength;
grandfatheredLen = tagLen; /* back up for output parsedLen */
newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
grandfatheredLen = checkGrandfatheredLen; /* back up for output parsedLen */
int32_t replacementLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
newTagLength = replacementLen + tagLen - checkGrandfatheredLen;
if (tagLen < newTagLength) {
uprv_free(tagBuf);
tagBuf = (char*)uprv_malloc(newTagLength + 1);
@ -2080,12 +2093,15 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
t->buf = tagBuf;
tagLen = newTagLength;
}
parsedLenDelta = checkGrandfatheredLen - replacementLen;
uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
if (checkGrandfatheredLen != tagLen) {
uprv_strcpy(t->buf + replacementLen, tag + checkGrandfatheredLen);
}
break;
}
}
size_t parsedLenDelta = 0;
if (grandfatheredLen == 0) {
for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
const char* redundantTag = REDUNDANT[i];
@ -2400,8 +2416,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
if (parsedLen != NULL) {
*parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
(int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
*parsedLen = (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
}
return t.orphan();

View file

@ -6160,8 +6160,8 @@ static const struct {
/* #9562 IANA language tag data update */
{"en-gb-oed", "en_GB_OXENDICT", FULL_LENGTH},
{"i-navajo", "nv", FULL_LENGTH},
{"i-navajo-a-foo", "", 0},
{"i-navajo-latn-us", "", 0},
{"i-navajo-a-foo", "nv@a=foo", FULL_LENGTH},
{"i-navajo-latn-us", "nv_Latn_US", FULL_LENGTH},
{"sgn-br", "bzs", FULL_LENGTH},
{"sgn-br-u-co-phonebk", "bzs@collation=phonebook", FULL_LENGTH},
{"ja-latn-hepburn-heploc", "ja_Latn__ALALC97", FULL_LENGTH},

View file

@ -748,9 +748,12 @@ TestUCaseMap(void) {
/* overly long locale IDs may get truncated to their language code to avoid unnecessary allocation */
ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode);
locale=ucasemap_getLocale(csm);
if(0!=strncmp(locale, "i-klingon", 9)) {
// "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog" is canonicalized
// into "tlh-the-quick-brown-fox-jumps-over-the-lazy-dog"
// and "the" will be treated as an extlang which replaces "tlh".
if(0!=strncmp(locale, "the", 3)) {
log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s\n"
" does not start with \"i-klingon\"\n", locale);
" does not start with \"the\"\n", locale);
}
errorCode=U_ZERO_ERROR;

View file

@ -248,6 +248,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
TESTCASE_AUTO(TestIsRightToLeft);
TESTCASE_AUTO(TestBug13277);
TESTCASE_AUTO(TestBug13554);
TESTCASE_AUTO(TestBug20410);
TESTCASE_AUTO(TestForLanguageTag);
TESTCASE_AUTO(TestToLanguageTag);
TESTCASE_AUTO(TestMoveAssign);
@ -2965,6 +2966,32 @@ void LocaleTest::TestBug13554() {
}
}
void LocaleTest::TestBug20410() {
IcuTestErrorCode status(*this, "TestBug20410()");
static const char tag1[] = "art-lojban-x-0";
static const Locale expected1("jbo@x=0");
Locale result1 = Locale::forLanguageTag(tag1, status);
status.errIfFailureAndReset("\"%s\"", tag1);
assertEquals(tag1, expected1.getName(), result1.getName());
static const char tag2[] = "zh-xiang-u-nu-thai-x-0";
static const Locale expected2("hsn@numbers=thai;x=0");
Locale result2 = Locale::forLanguageTag(tag2, status);
status.errIfFailureAndReset("\"%s\"", tag2);
assertEquals(tag2, expected2.getName(), result2.getName());
static const char locid3[] = "art__lojban@x=0";
Locale result3 = Locale::createCanonical(locid3);
static const Locale expected3("art__LOJBAN@x=0");
assertEquals(locid3, expected3.getName(), result3.getName());
static const char locid4[] = "art-lojban-x-0";
Locale result4 = Locale::createCanonical(locid4);
static const Locale expected4("jbo@x=0");
assertEquals(locid4, expected4.getName(), result4.getName());
}
void LocaleTest::TestForLanguageTag() {
IcuTestErrorCode status(*this, "TestForLanguageTag()");

View file

@ -114,6 +114,7 @@ public:
void TestBug11421();
void TestBug13277();
void TestBug13554();
void TestBug20410();
void TestAddLikelySubtags();
void TestMinimizeSubtags();

View file

@ -169,9 +169,20 @@ public class LanguageTag {
// Check if the tag is grandfathered
String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
// Language tag is at least 2 alpha so we can skip searching the first 2 chars.
int dash = 2;
while (gfmap == null && (dash = languageTag.indexOf('-', dash + 1)) != -1) {
gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag.substring(0, dash)));
}
if (gfmap != null) {
// use preferred mapping
itr = new StringTokenIterator(gfmap[1], SEP);
if (gfmap[0].length() == languageTag.length()) {
// use preferred mapping
itr = new StringTokenIterator(gfmap[1], SEP);
} else {
// append the rest of the tag.
itr = new StringTokenIterator(gfmap[1] + languageTag.substring(dash), SEP);
}
isGrandfathered = true;
} else {
itr = new StringTokenIterator(languageTag, SEP);

View file

@ -4187,6 +4187,9 @@ public class ULocaleTest extends TestFmwk {
{"zh-u-ca-gregory-co-pinyin-ca-chinese", "zh@calendar=gregorian;collation=pinyin", NOERROR},
{"de-latn-DE-1901-u-co-phonebk-co-pinyin-ca-gregory", "de_Latn_DE_1901@calendar=gregorian;collation=phonebook", NOERROR},
{"th-u-kf-nu-thai-kf-false", "th@colcasefirst=yes;numbers=thai", NOERROR},
/* #20410 */
{"art-lojban-x-0", "jbo@x=0", NOERROR},
{"zh-xiang-u-nu-thai-x-0", "hsn@numbers=thai;x=0", NOERROR},
};
for (int i = 0; i < langtag_to_locale.length; i++) {