From 1b768edbdf9a8ffa7d27afec9913c9386e0b1bd5 Mon Sep 17 00:00:00 2001 From: Fredrik Roubert Date: Wed, 3 Jan 2024 10:20:21 +0900 Subject: [PATCH] ICU-22520 Update all users of ulocimp_get*() to ulocimp_getSubtags(). This simplifies the code by removing the need for finding the positions of the subtags, all that logic is now in just one single place. --- icu4c/source/common/loclikely.cpp | 37 ++-------- icu4c/source/common/uloc.cpp | 115 ++++++++++-------------------- icu4c/source/common/uresbund.cpp | 27 +++---- 3 files changed, 55 insertions(+), 124 deletions(-) diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp index bbcc0017380..783201437f2 100644 --- a/icu4c/source/common/loclikely.cpp +++ b/icu4c/source/common/loclikely.cpp @@ -156,52 +156,27 @@ parseTagString( icu::CharString& region, UErrorCode* err) { + icu::CharString variant; const char* position = localeID; if (U_FAILURE(*err) || localeID == nullptr) { goto error; } - lang = ulocimp_getLanguage(position, &position, *err); + ulocimp_getSubtags(localeID, &lang, &script, ®ion, &variant, &position, *err); - /* - * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING - * to be an error, because it indicates the user-supplied tag is - * not well-formed. - */ if(U_FAILURE(*err)) { goto error; } - /* - * If no language was present, use the empty string instead. - * Otherwise, move past any separator. - */ + if (!variant.isEmpty()) { + position -= 1 + variant.length(); + } + if (_isIDSeparator(*position)) { ++position; } - script = ulocimp_getScript(position, &position, *err); - - if(U_FAILURE(*err)) { - goto error; - } - - if (!script.isEmpty()) { - /* - * Move past any separator. - */ - if (_isIDSeparator(*position)) { - ++position; - } - } - - region = ulocimp_getCountry(position, &position, *err); - - if(U_FAILURE(*err)) { - goto error; - } - if (region.isEmpty() && *position != 0 && *position != '@') { /* back up over consumed trailing separator */ --position; diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp index 97fe5d442f1..b7d7c858e78 100644 --- a/icu4c/source/common/uloc.cpp +++ b/icu4c/source/common/uloc.cpp @@ -1569,32 +1569,18 @@ uloc_openKeywords(const char* localeID, tmpLocaleID=localeID; } - /* Skip the language */ - ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status); + ulocimp_getSubtags( + tmpLocaleID, + nullptr, + nullptr, + nullptr, + nullptr, + &tmpLocaleID, + *status); if (U_FAILURE(*status)) { return 0; } - if(_isIDSeparator(*tmpLocaleID)) { - const char *scriptID; - /* Skip the script if available */ - ulocimp_getScript(tmpLocaleID+1, &scriptID, *status); - if (U_FAILURE(*status)) { - return 0; - } - if(scriptID != tmpLocaleID+1) { - /* Found optional script */ - tmpLocaleID = scriptID; - } - /* Skip the Country */ - if (_isIDSeparator(*tmpLocaleID)) { - ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status); - if (U_FAILURE(*status)) { - return 0; - } - } - } - /* keywords are located after '@' */ if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != nullptr) { CharString keywords; @@ -1634,7 +1620,7 @@ _canonicalize(const char* localeID, return; } - int32_t j, fieldCount=0, scriptSize=0, variantSize=0; + int32_t j, fieldCount=0; CharString tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this CharString localeIDWithHyphens; // if localeID has a BPC47 extension and have _, tmpLocaleID points to this const char* origLocaleID; @@ -1671,57 +1657,41 @@ _canonicalize(const char* localeID, origLocaleID=tmpLocaleID; /* get all pieces, one after another, and separate with '_' */ - CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err); + CharString tag; + CharString script; + CharString country; + CharString variant; + ulocimp_getSubtags( + tmpLocaleID, + &tag, + &script, + &country, + &variant, + &tmpLocaleID, + *err); if (tag.length() == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) { tag.clear(); tag.append(uloc_getDefault(), *err); - } else if(_isIDSeparator(*tmpLocaleID)) { - const char *scriptID; - - ++fieldCount; - tag.append('_', *err); - - CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err); - tag.append(script, *err); - scriptSize = script.length(); - if(scriptSize > 0) { - /* Found optional script */ - tmpLocaleID = scriptID; + } else { + if (!script.isEmpty()) { ++fieldCount; - if (_isIDSeparator(*tmpLocaleID)) { - /* If there is something else, then we add the _ */ + tag.append('_', *err); + tag.append(script, *err); + } + if (!country.isEmpty()) { + ++fieldCount; + tag.append('_', *err); + tag.append(country, *err); + } + if (!variant.isEmpty()) { + ++fieldCount; + if (country.isEmpty()) { tag.append('_', *err); } - } - - if (_isIDSeparator(*tmpLocaleID)) { - const char *cntryID; - - CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err); - tag.append(country, *err); - if (!country.isEmpty()) { - /* Found optional country */ - tmpLocaleID = cntryID; - } - if(_isIDSeparator(*tmpLocaleID)) { - /* If there is something else, then we add the _ if we found country before. */ - if (!_isIDSeparator(*(tmpLocaleID+1))) { - ++fieldCount; - tag.append('_', *err); - } - - variantSize = -tag.length(); - { - CharStringByteSink s(&tag); - _getVariant(tmpLocaleID+1, *tmpLocaleID, &s, nullptr, false); - } - variantSize += tag.length(); - if (variantSize > 0) { - tmpLocaleID += variantSize + 1; /* skip '_' and variant */ - } - } + tag.append('_', *err); + tag.append(variant, *err); } } @@ -1767,22 +1737,15 @@ _canonicalize(const char* localeID, /* Handle @FOO variant if @ is present and not followed by = */ if (tmpLocaleID!=nullptr && keywordAssign==nullptr) { /* Add missing '_' if needed */ - if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) { + if (fieldCount < 2 || (fieldCount < 3 && !script.isEmpty())) { do { tag.append('_', *err); ++fieldCount; } while(fieldCount<2); } - int32_t posixVariantSize = -tag.length(); - { - CharStringByteSink s(&tag); - _getVariant(tmpLocaleID+1, '@', &s, nullptr, (UBool)(variantSize > 0)); - } - posixVariantSize += tag.length(); - if (posixVariantSize > 0) { - variantSize += posixVariantSize; - } + CharStringByteSink s(&tag); + _getVariant(tmpLocaleID+1, '@', &s, nullptr, !variant.isEmpty()); } /* Look up the ID in the canonicalization map */ diff --git a/icu4c/source/common/uresbund.cpp b/icu4c/source/common/uresbund.cpp index 6886e78e50b..1dc86358113 100644 --- a/icu4c/source/common/uresbund.cpp +++ b/icu4c/source/common/uresbund.cpp @@ -209,17 +209,11 @@ static bool getParentLocaleID(char *name, const char *origName, UResOpenType ope } UErrorCode err = U_ZERO_ERROR; - const char* tempNamePtr = name; - CharString language = ulocimp_getLanguage(tempNamePtr, &tempNamePtr, err); - if (*tempNamePtr == '_') { - ++tempNamePtr; - } - CharString script = ulocimp_getScript(tempNamePtr, &tempNamePtr, err); - if (*tempNamePtr == '_') { - ++tempNamePtr; - } - CharString region = ulocimp_getCountry(tempNamePtr, &tempNamePtr, err); - CharString workingLocale; + CharString language; + CharString script; + CharString region; + ulocimp_getSubtags(name, &language, &script, ®ion, nullptr, nullptr, err); + if (U_FAILURE(err)) { // hopefully this never happens... return chopLocale(name); @@ -238,6 +232,8 @@ static bool getParentLocaleID(char *name, const char *origName, UResOpenType ope } } + CharString workingLocale; + // if it's not in the parent locale table, figure out the fallback script algorithmically // (see CLDR-15265 for an explanation of the algorithm) if (!script.isEmpty() && !region.isEmpty()) { @@ -254,12 +250,9 @@ static bool getParentLocaleID(char *name, const char *origName, UResOpenType ope // - if yes, replace the region with the script from the original locale ID // - if no, replace the region with the default script for that language and region UErrorCode err = U_ZERO_ERROR; - tempNamePtr = origName; - CharString origNameLanguage = ulocimp_getLanguage(tempNamePtr, &tempNamePtr, err); - if (*tempNamePtr == '_') { - ++tempNamePtr; - } - CharString origNameScript = ulocimp_getScript(origName, nullptr, err); + CharString origNameLanguage; + CharString origNameScript; + ulocimp_getSubtags(origName, &origNameLanguage, &origNameScript, nullptr, nullptr, nullptr, err); if (!origNameScript.isEmpty()) { workingLocale.append(language, err).append("_", err).append(origNameScript, err); } else {