ICU-22520 Update all users of ulocimp_get*() to ulocimp_getSubtags().

This simplifies the code by removing the need for finding the positions
of the subtags, all that logic is now in just one single place.
This commit is contained in:
Fredrik Roubert 2024-01-03 10:20:21 +09:00 committed by Fredrik Roubert
parent dc70b5a056
commit 1b768edbdf
3 changed files with 55 additions and 124 deletions

View file

@ -156,52 +156,27 @@ parseTagString(
icu::CharString& region,
UErrorCode* err)
{
icu::CharString variant;
const char* position = localeID;
if (U_FAILURE(*err) || localeID == nullptr) {
goto error;
}
lang = ulocimp_getLanguage(position, &position, *err);
ulocimp_getSubtags(localeID, &lang, &script, &region, &variant, &position, *err);
/*
* Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
* to be an error, because it indicates the user-supplied tag is
* not well-formed.
*/
if(U_FAILURE(*err)) {
goto error;
}
/*
* If no language was present, use the empty string instead.
* Otherwise, move past any separator.
*/
if (!variant.isEmpty()) {
position -= 1 + variant.length();
}
if (_isIDSeparator(*position)) {
++position;
}
script = ulocimp_getScript(position, &position, *err);
if(U_FAILURE(*err)) {
goto error;
}
if (!script.isEmpty()) {
/*
* Move past any separator.
*/
if (_isIDSeparator(*position)) {
++position;
}
}
region = ulocimp_getCountry(position, &position, *err);
if(U_FAILURE(*err)) {
goto error;
}
if (region.isEmpty() && *position != 0 && *position != '@') {
/* back up over consumed trailing separator */
--position;

View file

@ -1569,32 +1569,18 @@ uloc_openKeywords(const char* localeID,
tmpLocaleID=localeID;
}
/* Skip the language */
ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
ulocimp_getSubtags(
tmpLocaleID,
nullptr,
nullptr,
nullptr,
nullptr,
&tmpLocaleID,
*status);
if (U_FAILURE(*status)) {
return 0;
}
if(_isIDSeparator(*tmpLocaleID)) {
const char *scriptID;
/* Skip the script if available */
ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
if (U_FAILURE(*status)) {
return 0;
}
if(scriptID != tmpLocaleID+1) {
/* Found optional script */
tmpLocaleID = scriptID;
}
/* Skip the Country */
if (_isIDSeparator(*tmpLocaleID)) {
ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
if (U_FAILURE(*status)) {
return 0;
}
}
}
/* keywords are located after '@' */
if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != nullptr) {
CharString keywords;
@ -1634,7 +1620,7 @@ _canonicalize(const char* localeID,
return;
}
int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
int32_t j, fieldCount=0;
CharString tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this
CharString localeIDWithHyphens; // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
const char* origLocaleID;
@ -1671,57 +1657,41 @@ _canonicalize(const char* localeID,
origLocaleID=tmpLocaleID;
/* get all pieces, one after another, and separate with '_' */
CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
CharString tag;
CharString script;
CharString country;
CharString variant;
ulocimp_getSubtags(
tmpLocaleID,
&tag,
&script,
&country,
&variant,
&tmpLocaleID,
*err);
if (tag.length() == I_DEFAULT_LENGTH &&
uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
tag.clear();
tag.append(uloc_getDefault(), *err);
} else if(_isIDSeparator(*tmpLocaleID)) {
const char *scriptID;
++fieldCount;
tag.append('_', *err);
CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
tag.append(script, *err);
scriptSize = script.length();
if(scriptSize > 0) {
/* Found optional script */
tmpLocaleID = scriptID;
} else {
if (!script.isEmpty()) {
++fieldCount;
if (_isIDSeparator(*tmpLocaleID)) {
/* If there is something else, then we add the _ */
tag.append('_', *err);
tag.append(script, *err);
}
if (!country.isEmpty()) {
++fieldCount;
tag.append('_', *err);
tag.append(country, *err);
}
if (!variant.isEmpty()) {
++fieldCount;
if (country.isEmpty()) {
tag.append('_', *err);
}
}
if (_isIDSeparator(*tmpLocaleID)) {
const char *cntryID;
CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
tag.append(country, *err);
if (!country.isEmpty()) {
/* Found optional country */
tmpLocaleID = cntryID;
}
if(_isIDSeparator(*tmpLocaleID)) {
/* If there is something else, then we add the _ if we found country before. */
if (!_isIDSeparator(*(tmpLocaleID+1))) {
++fieldCount;
tag.append('_', *err);
}
variantSize = -tag.length();
{
CharStringByteSink s(&tag);
_getVariant(tmpLocaleID+1, *tmpLocaleID, &s, nullptr, false);
}
variantSize += tag.length();
if (variantSize > 0) {
tmpLocaleID += variantSize + 1; /* skip '_' and variant */
}
}
tag.append('_', *err);
tag.append(variant, *err);
}
}
@ -1767,22 +1737,15 @@ _canonicalize(const char* localeID,
/* Handle @FOO variant if @ is present and not followed by = */
if (tmpLocaleID!=nullptr && keywordAssign==nullptr) {
/* Add missing '_' if needed */
if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
if (fieldCount < 2 || (fieldCount < 3 && !script.isEmpty())) {
do {
tag.append('_', *err);
++fieldCount;
} while(fieldCount<2);
}
int32_t posixVariantSize = -tag.length();
{
CharStringByteSink s(&tag);
_getVariant(tmpLocaleID+1, '@', &s, nullptr, (UBool)(variantSize > 0));
}
posixVariantSize += tag.length();
if (posixVariantSize > 0) {
variantSize += posixVariantSize;
}
CharStringByteSink s(&tag);
_getVariant(tmpLocaleID+1, '@', &s, nullptr, !variant.isEmpty());
}
/* Look up the ID in the canonicalization map */

View file

@ -209,17 +209,11 @@ static bool getParentLocaleID(char *name, const char *origName, UResOpenType ope
}
UErrorCode err = U_ZERO_ERROR;
const char* tempNamePtr = name;
CharString language = ulocimp_getLanguage(tempNamePtr, &tempNamePtr, err);
if (*tempNamePtr == '_') {
++tempNamePtr;
}
CharString script = ulocimp_getScript(tempNamePtr, &tempNamePtr, err);
if (*tempNamePtr == '_') {
++tempNamePtr;
}
CharString region = ulocimp_getCountry(tempNamePtr, &tempNamePtr, err);
CharString workingLocale;
CharString language;
CharString script;
CharString region;
ulocimp_getSubtags(name, &language, &script, &region, nullptr, nullptr, err);
if (U_FAILURE(err)) {
// hopefully this never happens...
return chopLocale(name);
@ -238,6 +232,8 @@ static bool getParentLocaleID(char *name, const char *origName, UResOpenType ope
}
}
CharString workingLocale;
// if it's not in the parent locale table, figure out the fallback script algorithmically
// (see CLDR-15265 for an explanation of the algorithm)
if (!script.isEmpty() && !region.isEmpty()) {
@ -254,12 +250,9 @@ static bool getParentLocaleID(char *name, const char *origName, UResOpenType ope
// - if yes, replace the region with the script from the original locale ID
// - if no, replace the region with the default script for that language and region
UErrorCode err = U_ZERO_ERROR;
tempNamePtr = origName;
CharString origNameLanguage = ulocimp_getLanguage(tempNamePtr, &tempNamePtr, err);
if (*tempNamePtr == '_') {
++tempNamePtr;
}
CharString origNameScript = ulocimp_getScript(origName, nullptr, err);
CharString origNameLanguage;
CharString origNameScript;
ulocimp_getSubtags(origName, &origNameLanguage, &origNameScript, nullptr, nullptr, nullptr, err);
if (!origNameScript.isEmpty()) {
workingLocale.append(language, err).append("_", err).append(origNameScript, err);
} else {