ICU-20447 fix uloc_getName(x) same as Locale(x).getName() etc. for ""/"und"/"root"

This commit is contained in:
Fredrik Roubert 2019-02-22 00:06:15 +01:00 committed by Fredrik Roubert
parent c0598f401d
commit c3abe48e1c
9 changed files with 285 additions and 86 deletions

View file

@ -26,7 +26,6 @@
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
@ -375,7 +374,12 @@ _getDisplayNameForComponent(const char *locale,
return 0;
}
if(length==0) {
return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
// For the display name, we treat this as unknown language (ICU-20273).
if (getter == uloc_getLanguage) {
uprv_strcpy(localeBuffer, "und");
} else {
return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
}
}
root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG;
@ -507,22 +511,6 @@ uloc_getDisplayName(const char *locale,
return 0;
}
// For the display name, we treat this as unknown language (ICU-20273).
static const char UND[] = "und";
CharString und;
if (locale != NULL) {
if (*locale == '\0') {
locale = UND;
} else if (*locale == '_') {
und.append(UND, *pErrorCode);
und.append(locale, *pErrorCode);
if (U_FAILURE(*pErrorCode)) {
return 0;
}
locale = und.data();
}
}
{
UErrorCode status = U_ZERO_ERROR;

View file

@ -626,19 +626,6 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
variantBegin = (int32_t)(field[variantField] - fullName);
}
if (length == 4 && uprv_stricmp(fullName, "root") == 0) {
length = 0;
variantBegin = 0;
language[0] = '\0';
fullName[0] = '\0';
} else if (length >= 3 && uprv_strnicmp(fullName, "und", 3) == 0 &&
(length == 3 || fullName[3] == '_' || fullName[3] == '@')) {
length -= 3;
variantBegin -= 3;
language[0] = '\0';
uprv_memmove(fullName, fullName + 3, length + 1);
}
err = U_ZERO_ERROR;
initBaseName(err);
if (U_FAILURE(err)) {

View file

@ -34,6 +34,13 @@
#include "ulocimp.h"
#include "ustr_imp.h"
/**
* These are the canonical strings for unknown languages, scripts and regions.
**/
static const char* const unknownLanguage = "und";
static const char* const unknownScript = "Zzzz";
static const char* const unknownRegion = "ZZ";
/**
* This function looks for the localeID in the likelySubtags resource.
*
@ -55,6 +62,19 @@ findLikelySubtags(const char* localeID,
UErrorCode tmpErr = U_ZERO_ERROR;
icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
if (U_SUCCESS(tmpErr)) {
icu::CharString und;
if (localeID != NULL) {
if (*localeID == '\0') {
localeID = unknownLanguage;
} else if (*localeID == '_') {
und.append(unknownLanguage, *err);
und.append(localeID, *err);
if (U_FAILURE(*err)) {
return NULL;
}
localeID = und.data();
}
}
s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
if (U_FAILURE(tmpErr)) {
@ -72,6 +92,11 @@ findLikelySubtags(const char* localeID,
}
else {
u_UCharsToChars(s, buffer, resLen + 1);
if (resLen >= 3 &&
uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
(resLen == 3 || buffer[3] == '_')) {
uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
}
result = buffer;
}
} else {
@ -97,9 +122,10 @@ appendTag(
const char* tag,
int32_t tagLength,
char* buffer,
int32_t* bufferLength) {
int32_t* bufferLength,
UBool withSeparator) {
if (*bufferLength > 0) {
if (withSeparator) {
buffer[*bufferLength] = '_';
++(*bufferLength);
}
@ -112,13 +138,6 @@ appendTag(
*bufferLength += tagLength;
}
/**
* These are the canonical strings for unknown languages, scripts and regions.
**/
static const char* const unknownLanguage = "und";
static const char* const unknownScript = "Zzzz";
static const char* const unknownRegion = "ZZ";
/**
* Create a tag string from the supplied parameters. The lang, script and region
* parameters may be NULL pointers. If they are, their corresponding length parameters
@ -189,18 +208,14 @@ createTagStringWithAlternates(
lang,
langLength,
tagBuffer,
&tagLength);
&tagLength,
/*withSeparator=*/FALSE);
}
else if (alternateTags == NULL) {
/*
* Append the value for an unknown language, if
* Use the empty string for an unknown language, if
* we found no language.
*/
appendTag(
unknownLanguage,
(int32_t)uprv_strlen(unknownLanguage),
tagBuffer,
&tagLength);
}
else {
/*
@ -221,21 +236,17 @@ createTagStringWithAlternates(
}
else if (alternateLangLength == 0) {
/*
* Append the value for an unknown language, if
* Use the empty string for an unknown language, if
* we found no language.
*/
appendTag(
unknownLanguage,
(int32_t)uprv_strlen(unknownLanguage),
tagBuffer,
&tagLength);
}
else {
appendTag(
alternateLang,
alternateLangLength,
tagBuffer,
&tagLength);
&tagLength,
/*withSeparator=*/FALSE);
}
}
@ -244,7 +255,8 @@ createTagStringWithAlternates(
script,
scriptLength,
tagBuffer,
&tagLength);
&tagLength,
/*withSeparator=*/TRUE);
}
else if (alternateTags != NULL) {
/*
@ -268,7 +280,8 @@ createTagStringWithAlternates(
alternateScript,
alternateScriptLength,
tagBuffer,
&tagLength);
&tagLength,
/*withSeparator=*/TRUE);
}
}
@ -277,7 +290,8 @@ createTagStringWithAlternates(
region,
regionLength,
tagBuffer,
&tagLength);
&tagLength,
/*withSeparator=*/TRUE);
regionAppended = TRUE;
}
@ -302,7 +316,8 @@ createTagStringWithAlternates(
alternateRegion,
alternateRegionLength,
tagBuffer,
&tagLength);
&tagLength,
/*withSeparator=*/TRUE);
regionAppended = TRUE;
}
@ -464,15 +479,9 @@ parseTagString(
*langLength = subtagLength;
/*
* If no language was present, use the value of unknownLanguage
* instead. Otherwise, move past any separator.
* If no language was present, use the empty string instead.
* Otherwise, move past any separator.
*/
if (*langLength == 0) {
uprv_strcpy(
lang,
unknownLanguage);
*langLength = (int32_t)uprv_strlen(lang);
}
if (_isIDSeparator(*position)) {
++position;
}
@ -1003,7 +1012,7 @@ _uloc_minimizeSubtags(const char* localeID,
if(U_FAILURE(*err)) {
goto error;
}
else if (uprv_strnicmp(
else if (!tagBuffer.isEmpty() && uprv_strnicmp(
maximizedTagBuffer.data(),
tagBuffer.data(),
tagBuffer.length()) == 0) {

View file

@ -1180,6 +1180,16 @@ ulocimp_getLanguage(const char *localeID,
int32_t offset;
char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
if (uprv_stricmp(localeID, "root") == 0) {
localeID += 4;
} else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
(localeID[3] == '\0' ||
localeID[3] == '-' ||
localeID[3] == '_' ||
localeID[3] == '@')) {
localeID += 3;
}
/* if it starts with i- or x- then copy that prefix */
if(_isIDPrefix(localeID)) {
if(i<languageCapacity) {
@ -1777,9 +1787,16 @@ uloc_getParent(const char* localeID,
i=0;
}
if(i>0 && parent != localeID) {
uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
if (i > 0) {
if (uprv_strnicmp(localeID, "und_", 4) == 0) {
localeID += 3;
i -= 3;
uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
} else if (parent != localeID) {
uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
}
}
return u_terminateChars(parent, parentCapacity, i, err);
}

View file

@ -622,8 +622,11 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) {
setParseError("expected language tag in [import langTag]", errorCode);
return;
}
if(length == 3 && uprv_memcmp(baseID, "und", 3) == 0) {
if(length == 0) {
uprv_strcpy(baseID, "root");
} else if(*baseID == '_') {
uprv_memmove(baseID + 3, baseID, length + 1);
uprv_memcpy(baseID, "und", 3);
}
// @collation=type, or length=0 if not specified
char collationType[ULOC_KEYWORDS_CAPACITY];

View file

@ -1554,11 +1554,7 @@ RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
"collation", locale,
NULL, &errorCode);
if(U_FAILURE(errorCode)) { return 0; }
if(length == 0) {
uprv_strcpy(resultLocale, "root");
} else {
resultLocale[length] = 0;
}
resultLocale[length] = 0;
// Append items in alphabetic order of their short definition letters.
CharString result;
@ -1585,7 +1581,11 @@ RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode);
appendSubtag(result, 'K', subtag, length, errorCode);
length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
appendSubtag(result, 'L', subtag, length, errorCode);
if (length == 0) {
appendSubtag(result, 'L', "root", 4, errorCode);
} else {
appendSubtag(result, 'L', subtag, length, errorCode);
}
if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
}

View file

@ -3563,7 +3563,7 @@ const char* const basic_minimize_data[][2] = {
"de_Latn_DE_POSIX_1901",
"de__POSIX_1901"
}, {
"und",
"",
""
}, {
"en_Latn_US@calendar=gregorian",
@ -5098,8 +5098,8 @@ const char* const full_data[][3] = {
"zh_HK"
}, {
"und_AQ",
"und_Latn_AQ",
"und_AQ"
"_Latn_AQ",
"_AQ"
}, {
"und_Zzzz",
"en_Latn_US",
@ -5122,8 +5122,8 @@ const char* const full_data[][3] = {
"zh_HK"
}, {
"und_Zzzz_AQ",
"und_Latn_AQ",
"und_AQ"
"_Latn_AQ",
"_AQ"
}, {
"und_Latn",
"en_Latn_US",
@ -5146,8 +5146,8 @@ const char* const full_data[][3] = {
"zh_Latn_HK"
}, {
"und_Latn_AQ",
"und_Latn_AQ",
"und_AQ"
"_Latn_AQ",
"_AQ"
}, {
"und_Hans",
"zh_Hans_CN",
@ -5218,8 +5218,8 @@ const char* const full_data[][3] = {
"zh_Moon_HK"
}, {
"und_Moon_AQ",
"und_Moon_AQ",
"und_Moon_AQ"
"_Moon_AQ",
"_Moon_AQ"
}, {
"es",
"es_Latn_ES",
@ -6520,7 +6520,7 @@ typedef struct {
} BadLocaleItem;
static const BadLocaleItem badLocaleItems[] = {
{ "-9223372036854775808", "en", "9223372036854775808", U_USING_DEFAULT_WARNING },
{ "-9223372036854775808", "en", "Unknown language (9223372036854775808)", U_USING_DEFAULT_WARNING },
/* add more in the future */
{ NULL, NULL, NULL, U_ZERO_ERROR } /* terminator */
};

View file

@ -261,6 +261,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
TESTCASE_AUTO(TestUnd);
TESTCASE_AUTO(TestUndScript);
TESTCASE_AUTO(TestUndRegion);
TESTCASE_AUTO(TestUndCAPI);
TESTCASE_AUTO_END;
}
@ -3544,3 +3545,196 @@ void LocaleTest::TestUndRegion() {
assertEquals("getDisplayName()", displayName, locale_tag.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, locale_build.getDisplayName(displayLocale, tmp));
}
void LocaleTest::TestUndCAPI() {
IcuTestErrorCode status(*this, "TestUndCAPI()");
static const char empty[] = "";
static const char root[] = "root";
static const char und[] = "und";
static const char empty_script[] = "_Cyrl";
static const char empty_region[] = "_AQ";
static const char und_script[] = "und_Cyrl";
static const char und_region[] = "und_AQ";
char tmp[ULOC_FULLNAME_CAPACITY];
int32_t reslen;
// uloc_getName()
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getName(empty, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getName()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getName(root, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", root);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getName()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getName(und, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getName()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getName(empty_script, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty_script);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getName()", empty_script, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getName(empty_region, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty_region);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getName()", empty_region, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getName(und_script, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und_script);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getName()", empty_script, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getName(und_region, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und_region);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getName()", empty_region, tmp);
// uloc_getBaseName()
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getBaseName(empty, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getBaseName()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getBaseName(root, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", root);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getBaseName()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getBaseName(und, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getBaseName()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getBaseName(empty_script, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty_script);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getBaseName()", empty_script, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getBaseName(empty_region, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty_region);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getBaseName()", empty_region, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getBaseName(und_script, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und_script);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getBaseName()", empty_script, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getBaseName(und_region, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und_region);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getBaseName()", empty_region, tmp);
// uloc_getParent()
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getParent(empty, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getParent()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getParent(root, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", root);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getParent()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getParent(und, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getParent()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getParent(empty_script, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty_script);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getParent()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getParent(empty_region, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty_region);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getParent()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getParent(und_script, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und_script);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getParent()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getParent(und_region, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und_region);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getParent()", empty, tmp);
// uloc_getLanguage()
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getLanguage(empty, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getLanguage()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getLanguage(root, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", root);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getLanguage()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getLanguage(und, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getLanguage()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getLanguage(empty_script, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty_script);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getLanguage()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getLanguage(empty_region, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", empty_region);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getLanguage()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getLanguage(und_script, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und_script);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getLanguage()", empty, tmp);
uprv_memset(tmp, '!', sizeof tmp);
reslen = uloc_getLanguage(und_region, tmp, sizeof tmp, status);
status.errIfFailureAndReset("\"%s\"", und_region);
assertTrue("reslen >= 0", reslen >= 0);
assertEquals("uloc_getLanguage()", empty, tmp);
}

View file

@ -135,6 +135,7 @@ public:
void TestUnd();
void TestUndScript();
void TestUndRegion();
void TestUndCAPI();
private:
void _checklocs(const char* label,