diff --git a/icu4c/source/common/localebuilder.cpp b/icu4c/source/common/localebuilder.cpp index e53065a8a60..71969c847b1 100644 --- a/icu4c/source/common/localebuilder.cpp +++ b/icu4c/source/common/localebuilder.cpp @@ -1,6 +1,8 @@ // © 2019 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html +#include +#include #include #include "bytesinkutil.h" // StringByteSink @@ -162,12 +164,15 @@ _isKeywordValue(const char* key, const char* value, int32_t value_len) // otherwise: unicode extension value // We need to convert from legacy key/value to unicode // key/value - const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key); - const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value); + std::optional unicode_locale_key = ulocimp_toBcpKeyWithFallback(key); + std::optional unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value); - return unicode_locale_key && unicode_locale_type && - ultag_isUnicodeLocaleKey(unicode_locale_key, -1) && - ultag_isUnicodeLocaleType(unicode_locale_type, -1); + return unicode_locale_key.has_value() && + unicode_locale_type.has_value() && + ultag_isUnicodeLocaleKey(unicode_locale_key->data(), + static_cast(unicode_locale_key->size())) && + ultag_isUnicodeLocaleType(unicode_locale_type->data(), + static_cast(unicode_locale_type->size())); } void diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp index d608ba8a2ed..ff9ad3a4e3c 100644 --- a/icu4c/source/common/locid.cpp +++ b/icu4c/source/common/locid.cpp @@ -31,6 +31,8 @@ ****************************************************************************** */ +#include +#include #include #include "unicode/bytestream.h" @@ -1570,8 +1572,8 @@ AliasReplacer::replaceTransformedExtensions( // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue. *const_cast(tvalue++) = '\0'; // NUL terminate tkey output.append(tfield, status).append('-', status); - const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue); - output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status); + std::optional bcpTValue = ulocimp_toBcpType(tfield, tvalue); + output.append(bcpTValue.has_value() ? *bcpTValue : tvalue, status); } } if (U_FAILURE(status)) { @@ -2608,33 +2610,26 @@ Locale::getUnicodeKeywordValue(StringPiece keywordName, return; } - // TODO: Remove the need for a const char* to a NUL terminated buffer. - const CharString keywordName_nul(keywordName, status); - if (U_FAILURE(status)) { - return; - } - - const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data()); - if (legacy_key == nullptr) { + std::optional legacy_key = ulocimp_toLegacyKeyWithFallback(keywordName); + if (!legacy_key.has_value()) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } - auto legacy_value = getKeywordValue(legacy_key, status); + auto legacy_value = getKeywordValue(*legacy_key, status); if (U_FAILURE(status)) { return; } - const char* unicode_value = uloc_toUnicodeLocaleType( - keywordName_nul.data(), legacy_value.data()); - - if (unicode_value == nullptr) { + std::optional unicode_value = + ulocimp_toBcpTypeWithFallback(keywordName, legacy_value.toStringPiece()); + if (!unicode_value.has_value()) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } - sink.Append(unicode_value, static_cast(uprv_strlen(unicode_value))); + sink.Append(unicode_value->data(), static_cast(unicode_value->size())); } void @@ -2699,32 +2694,25 @@ Locale::setUnicodeKeywordValue(StringPiece keywordName, return; } - // TODO: Remove the need for a const char* to a NUL terminated buffer. - const CharString keywordName_nul(keywordName, status); - const CharString keywordValue_nul(keywordValue, status); - if (U_FAILURE(status)) { - return; - } - - const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data()); - if (legacy_key == nullptr) { + std::optional legacy_key = ulocimp_toLegacyKeyWithFallback(keywordName); + if (!legacy_key.has_value()) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } - const char* legacy_value = nullptr; + std::string_view value; - if (!keywordValue_nul.isEmpty()) { - legacy_value = - uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data()); - - if (legacy_value == nullptr) { + if (!keywordValue.empty()) { + std::optional legacy_value = + ulocimp_toLegacyTypeWithFallback(keywordName, keywordValue); + if (!legacy_value.has_value()) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } + value = *legacy_value; } - setKeywordValue(legacy_key, legacy_value, status); + setKeywordValue(*legacy_key, value, status); } const char * diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp index a8621a8912c..6ce799442ef 100644 --- a/icu4c/source/common/uloc.cpp +++ b/icu4c/source/common/uloc.cpp @@ -30,6 +30,7 @@ l = lang, C = ctry, M = charmap, V = variant */ +#include #include #include @@ -2291,8 +2292,17 @@ uloc_getISOCountries() U_CAPI const char* U_EXPORT2 uloc_toUnicodeLocaleKey(const char* keyword) { - const char* bcpKey = ulocimp_toBcpKey(keyword); - if (bcpKey == nullptr && ultag_isUnicodeLocaleKey(keyword, -1)) { + if (keyword == nullptr || *keyword == '\0') { return nullptr; } + std::optional result = ulocimp_toBcpKeyWithFallback(keyword); + return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated. +} + +U_EXPORT std::optional +ulocimp_toBcpKeyWithFallback(std::string_view keyword) +{ + std::optional bcpKey = ulocimp_toBcpKey(keyword); + if (!bcpKey.has_value() && + ultag_isUnicodeLocaleKey(keyword.data(), static_cast(keyword.size()))) { // unknown keyword, but syntax is fine.. return keyword; } @@ -2302,8 +2312,18 @@ uloc_toUnicodeLocaleKey(const char* keyword) U_CAPI const char* U_EXPORT2 uloc_toUnicodeLocaleType(const char* keyword, const char* value) { - const char* bcpType = ulocimp_toBcpType(keyword, value); - if (bcpType == nullptr && ultag_isUnicodeLocaleType(value, -1)) { + if (keyword == nullptr || *keyword == '\0' || + value == nullptr || *value == '\0') { return nullptr; } + std::optional result = ulocimp_toBcpTypeWithFallback(keyword, value); + return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated. +} + +U_EXPORT std::optional +ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value) +{ + std::optional bcpType = ulocimp_toBcpType(keyword, value); + if (!bcpType.has_value() && + ultag_isUnicodeLocaleType(value.data(), static_cast(value.size()))) { // unknown keyword, but syntax is fine.. return value; } @@ -2313,37 +2333,28 @@ uloc_toUnicodeLocaleType(const char* keyword, const char* value) namespace { bool -isWellFormedLegacyKey(const char* legacyKey) +isWellFormedLegacyKey(std::string_view key) { - const char* p = legacyKey; - while (*p) { - if (!UPRV_ISALPHANUM(*p)) { - return false; - } - p++; - } - return true; + return std::all_of(key.begin(), key.end(), UPRV_ISALPHANUM); } bool -isWellFormedLegacyType(const char* legacyType) +isWellFormedLegacyType(std::string_view legacyType) { - const char* p = legacyType; int32_t alphaNumLen = 0; - while (*p) { - if (*p == '_' || *p == '/' || *p == '-') { + for (char c : legacyType) { + if (c == '_' || c == '/' || c == '-') { if (alphaNumLen == 0) { return false; } alphaNumLen = 0; - } else if (UPRV_ISALPHANUM(*p)) { + } else if (UPRV_ISALPHANUM(c)) { alphaNumLen++; } else { return false; } - p++; } - return (alphaNumLen != 0); + return alphaNumLen != 0; } } // namespace @@ -2351,8 +2362,16 @@ isWellFormedLegacyType(const char* legacyType) U_CAPI const char* U_EXPORT2 uloc_toLegacyKey(const char* keyword) { - const char* legacyKey = ulocimp_toLegacyKey(keyword); - if (legacyKey == nullptr) { + if (keyword == nullptr || *keyword == '\0') { return nullptr; } + std::optional result = ulocimp_toLegacyKeyWithFallback(keyword); + return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated. +} + +U_EXPORT std::optional +ulocimp_toLegacyKeyWithFallback(std::string_view keyword) +{ + std::optional legacyKey = ulocimp_toLegacyKey(keyword); + if (!legacyKey.has_value() && isWellFormedLegacyKey(keyword)) { // Checks if the specified locale key is well-formed with the legacy locale syntax. // // Note: @@ -2360,9 +2379,7 @@ uloc_toLegacyKey(const char* keyword) // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax // Keys can only consist of [0-9a-zA-Z]. - if (isWellFormedLegacyKey(keyword)) { - return keyword; - } + return keyword; } return legacyKey; } @@ -2370,8 +2387,17 @@ uloc_toLegacyKey(const char* keyword) U_CAPI const char* U_EXPORT2 uloc_toLegacyType(const char* keyword, const char* value) { - const char* legacyType = ulocimp_toLegacyType(keyword, value); - if (legacyType == nullptr) { + if (keyword == nullptr || *keyword == '\0' || + value == nullptr || *value == '\0') { return nullptr; } + std::optional result = ulocimp_toLegacyTypeWithFallback(keyword, value); + return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated. +} + +U_EXPORT std::optional +ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value) +{ + std::optional legacyType = ulocimp_toLegacyType(keyword, value); + if (!legacyType.has_value() && isWellFormedLegacyType(value)) { // Checks if the specified locale type is well-formed with the legacy locale syntax. // // Note: @@ -2380,9 +2406,7 @@ uloc_toLegacyType(const char* keyword, const char* value) // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv") - if (isWellFormedLegacyType(value)) { - return value; - } + return value; } return legacyType; } diff --git a/icu4c/source/common/uloc_keytype.cpp b/icu4c/source/common/uloc_keytype.cpp index 6ea886ca295..9dc392126ef 100644 --- a/icu4c/source/common/uloc_keytype.cpp +++ b/icu4c/source/common/uloc_keytype.cpp @@ -7,6 +7,8 @@ ********************************************************************** */ #include +#include +#include #include "unicode/utypes.h" #include "unicode/unistr.h" @@ -18,6 +20,7 @@ #include "uassert.h" #include "ucln_cmn.h" #include "uhash.h" +#include "ulocimp.h" #include "umutex.h" #include "uresimp.h" #include "uvector.h" @@ -35,20 +38,25 @@ typedef enum { } SpecialType; struct LocExtKeyData : public icu::UMemory { - const char* legacyId; - const char* bcpId; + std::string_view legacyId; + std::string_view bcpId; icu::LocalUHashtablePointer typeMap; uint32_t specialTypes; }; struct LocExtType : public icu::UMemory { - const char* legacyId; - const char* bcpId; + std::string_view legacyId; + std::string_view bcpId; +}; + +struct TypeAlias : public icu::UMemory { + std::string_view from; }; static icu::MemoryPool* gKeyTypeStringPool = nullptr; static icu::MemoryPool* gLocExtKeyDataEntries = nullptr; static icu::MemoryPool* gLocExtTypeEntries = nullptr; +static icu::MemoryPool* gTypeAliasEntries = nullptr; U_CDECL_BEGIN @@ -65,6 +73,9 @@ uloc_key_type_cleanup() { delete gLocExtTypeEntries; gLocExtTypeEntries = nullptr; + delete gTypeAliasEntries; + gTypeAliasEntries = nullptr; + delete gKeyTypeStringPool; gKeyTypeStringPool = nullptr; @@ -81,7 +92,7 @@ initFromResourceBundle(UErrorCode& sts) { U_NAMESPACE_USE ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup); - gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, nullptr, &sts); + gLocExtKeyMap = uhash_open(uhash_hashIStringView, uhash_compareIStringView, nullptr, &sts); LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(nullptr, "keyTypeData", &sts)); LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", nullptr, &sts)); @@ -112,6 +123,11 @@ initFromResourceBundle(UErrorCode& sts) { sts = U_MEMORY_ALLOCATION_ERROR; return; } + gTypeAliasEntries = new icu::MemoryPool; + if (gTypeAliasEntries == nullptr) { + sts = U_MEMORY_ALLOCATION_ERROR; + return; + } // iterate through keyMap resource LocalUResourceBundlePointer keyMapEntry; @@ -144,7 +160,7 @@ initFromResourceBundle(UErrorCode& sts) { bool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0; - UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, nullptr, &sts); + UHashtable* typeDataMap = uhash_open(uhash_hashIStringView, uhash_compareIStringView, nullptr, &sts); if (U_FAILURE(sts)) { break; } @@ -253,10 +269,10 @@ initFromResourceBundle(UErrorCode& sts) { t->bcpId = bcpTypeId; t->legacyId = legacyTypeId; - uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts); + uhash_put(typeDataMap, &t->legacyId, t, &sts); if (bcpTypeId != legacyTypeId) { // different type value - uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts); + uhash_put(typeDataMap, &t->bcpId, t, &sts); } if (U_FAILURE(sts)) { break; @@ -275,8 +291,14 @@ initFromResourceBundle(UErrorCode& sts) { break; } // check if this is an alias of canonical legacy type - if (uprv_compareInvWithUChar(nullptr, legacyTypeId, -1, to, toLen) == 0) { + if (uprv_compareInvWithUChar( + nullptr, + t->legacyId.data(), + static_cast(t->legacyId.size()), + to, + toLen) == 0) { const char* from = ures_getKey(typeAliasDataEntry.getAlias()); + TypeAlias* alias = gTypeAliasEntries->create(TypeAlias{{}, from}); if (isTZ) { // replace colon with slash if necessary if (uprv_strchr(from, ':') != nullptr) { @@ -293,10 +315,10 @@ initFromResourceBundle(UErrorCode& sts) { fromBuf->data(), fromBuf->data() + fromBuf->length(), ':', '/'); - from = fromBuf->data(); + alias->from = fromBuf->toStringPiece(); } } - uhash_put(typeDataMap, (void*)from, t, &sts); + uhash_put(typeDataMap, &alias->from, t, &sts); } } if (U_FAILURE(sts)) { @@ -316,9 +338,15 @@ initFromResourceBundle(UErrorCode& sts) { break; } // check if this is an alias of bcp type - if (uprv_compareInvWithUChar(nullptr, bcpTypeId, -1, to, toLen) == 0) { + if (uprv_compareInvWithUChar( + nullptr, + t->bcpId.data(), + static_cast(t->bcpId.size()), + to, + toLen) == 0) { const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias()); - uhash_put(typeDataMap, (void*)from, t, &sts); + TypeAlias* alias = gTypeAliasEntries->create(TypeAlias{{}, from}); + uhash_put(typeDataMap, &alias->from, t, &sts); } } if (U_FAILURE(sts)) { @@ -341,10 +369,10 @@ initFromResourceBundle(UErrorCode& sts) { keyData->specialTypes = specialTypes; keyData->typeMap.adoptInstead(typeDataMap); - uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts); + uhash_put(gLocExtKeyMap, &keyData->legacyId, keyData, &sts); if (legacyKeyId != bcpKeyId) { // different key value - uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts); + uhash_put(gLocExtKeyMap, &keyData->bcpId, keyData, &sts); } if (U_FAILURE(sts)) { break; @@ -363,100 +391,96 @@ init() { } bool -isSpecialTypeCodepoints(const char* val) { +isSpecialTypeCodepoints(std::string_view val) { int32_t subtagLen = 0; - const char* p = val; - while (*p) { - if (*p == '-') { + for (char c : val) { + if (c == '-') { if (subtagLen < 4 || subtagLen > 6) { return false; } subtagLen = 0; - } else if ((*p >= '0' && *p <= '9') || - (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous - (*p >= 'a' && *p <= 'f')) { // also in EBCDIC + } else if ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || // A-F/a-f are contiguous + (c >= 'a' && c <= 'f')) { // also in EBCDIC subtagLen++; } else { return false; } - p++; } - return (subtagLen >= 4 && subtagLen <= 6); + return subtagLen >= 4 && subtagLen <= 6; } bool -isSpecialTypeReorderCode(const char* val) { +isSpecialTypeReorderCode(std::string_view val) { int32_t subtagLen = 0; - const char* p = val; - while (*p) { - if (*p == '-') { + for (char c : val) { + if (c == '-') { if (subtagLen < 3 || subtagLen > 8) { return false; } subtagLen = 0; - } else if (uprv_isASCIILetter(*p)) { + } else if (uprv_isASCIILetter(c)) { subtagLen++; } else { return false; } - p++; } - return (subtagLen >=3 && subtagLen <=8); + return subtagLen >= 3 && subtagLen <= 8; } bool -isSpecialTypeRgKeyValue(const char* val) { +isSpecialTypeRgKeyValue(std::string_view val) { int32_t subtagLen = 0; - const char* p = val; - while (*p) { - if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) || - (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) { + for (char c : val) { + if ((subtagLen < 2 && uprv_isASCIILetter(c)) || + (subtagLen >= 2 && (c == 'Z' || c == 'z'))) { subtagLen++; } else { return false; } - p++; } - return (subtagLen == 6); + return subtagLen == 6; } } // namespace -U_EXPORT const char* -ulocimp_toBcpKey(const char* key) { +U_EXPORT std::optional +ulocimp_toBcpKey(std::string_view key) { if (!init()) { - return nullptr; + return std::nullopt; } - LocExtKeyData* keyData = static_cast(uhash_get(gLocExtKeyMap, key)); + LocExtKeyData* keyData = static_cast(uhash_get(gLocExtKeyMap, &key)); if (keyData != nullptr) { return keyData->bcpId; } - return nullptr; + + return std::nullopt; } -U_EXPORT const char* -ulocimp_toLegacyKey(const char* key) { +U_EXPORT std::optional +ulocimp_toLegacyKey(std::string_view key) { if (!init()) { - return nullptr; + return std::nullopt; } - LocExtKeyData* keyData = static_cast(uhash_get(gLocExtKeyMap, key)); + LocExtKeyData* keyData = static_cast(uhash_get(gLocExtKeyMap, &key)); if (keyData != nullptr) { return keyData->legacyId; } - return nullptr; + + return std::nullopt; } -U_EXPORT const char* -ulocimp_toBcpType(const char* key, const char* type) { +U_EXPORT std::optional +ulocimp_toBcpType(std::string_view key, std::string_view type) { if (!init()) { - return nullptr; + return std::nullopt; } - LocExtKeyData* keyData = static_cast(uhash_get(gLocExtKeyMap, key)); + LocExtKeyData* keyData = static_cast(uhash_get(gLocExtKeyMap, &key)); if (keyData != nullptr) { - LocExtType* t = static_cast(uhash_get(keyData->typeMap.getAlias(), type)); + LocExtType* t = static_cast(uhash_get(keyData->typeMap.getAlias(), &type)); if (t != nullptr) { return t->bcpId; } @@ -476,19 +500,20 @@ ulocimp_toBcpType(const char* key, const char* type) { } } } - return nullptr; + + return std::nullopt; } -U_EXPORT const char* -ulocimp_toLegacyType(const char* key, const char* type) { +U_EXPORT std::optional +ulocimp_toLegacyType(std::string_view key, std::string_view type) { if (!init()) { - return nullptr; + return std::nullopt; } - LocExtKeyData* keyData = static_cast(uhash_get(gLocExtKeyMap, key)); + LocExtKeyData* keyData = static_cast(uhash_get(gLocExtKeyMap, &key)); if (keyData != nullptr) { - LocExtType* t = static_cast(uhash_get(keyData->typeMap.getAlias(), type)); + LocExtType* t = static_cast(uhash_get(keyData->typeMap.getAlias(), &type)); if (t != nullptr) { return t->legacyId; } @@ -508,5 +533,6 @@ ulocimp_toLegacyType(const char* key, const char* type) { } } } - return nullptr; + + return std::nullopt; } diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp index 45c57ec84d8..7b3b1e73a37 100644 --- a/icu4c/source/common/uloc_tag.cpp +++ b/icu4c/source/common/uloc_tag.cpp @@ -7,6 +7,8 @@ ********************************************************************** */ +#include +#include #include #include "unicode/bytestream.h" @@ -1374,27 +1376,28 @@ _appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, bool str bcpValue = nullptr; } } else if (isBcpUExt) { - bcpKey = uloc_toUnicodeLocaleKey(key); - if (bcpKey == nullptr) { + std::optional optBcpKey = ulocimp_toBcpKeyWithFallback(key); + if (!optBcpKey.has_value()) { if (strict) { status = U_ILLEGAL_ARGUMENT_ERROR; break; } continue; } + bcpKey = optBcpKey->data(); - /* we've checked buf is null-terminated above */ - bcpValue = uloc_toUnicodeLocaleType(key, buf.data()); - if (bcpValue == nullptr) { + std::optional optBcpValue = + ulocimp_toBcpTypeWithFallback(key, buf.toStringPiece()); + if (!optBcpValue.has_value()) { if (strict) { status = U_ILLEGAL_ARGUMENT_ERROR; break; } continue; } - if (bcpValue == buf.data()) { + if (optBcpValue->data() == buf.data()) { /* - When uloc_toUnicodeLocaleType(key, buf) returns the + When ulocimp_toBcpTypeWithFallback(key, buf) returns the input value as is, the value is well-formed, but has no known mapping. This implementation normalizes the value to lower case @@ -1412,6 +1415,8 @@ _appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, bool str T_CString_toLowerCase(extBuf->data()); bcpValue = extBuf->data(); + } else { + bcpValue = optBcpValue->data(); } } else { if (*key == PRIVATEUSE) { @@ -1669,33 +1674,28 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT const char *pKey = nullptr; /* LDML key */ const char *pType = nullptr; /* LDML type */ - char bcpKeyBuf[3]; /* BCP key length is always 2 for now */ - U_ASSERT(pBcpKey != nullptr); - if (bcpKeyLen >= static_cast(sizeof(bcpKeyBuf))) { + /* BCP key length is always 2 for now */ + if (bcpKeyLen != 2) { /* the BCP key is invalid */ status = U_ILLEGAL_ARGUMENT_ERROR; return; } - U_ASSERT(bcpKeyLen <= 2); - - uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); - bcpKeyBuf[bcpKeyLen] = 0; /* u extension key to LDML key */ - pKey = uloc_toLegacyKey(bcpKeyBuf); - if (pKey == nullptr) { + std::optional legacyKey = ulocimp_toLegacyKeyWithFallback( + {pBcpKey, static_cast(bcpKeyLen)}); + if (!legacyKey.has_value()) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } - if (pKey == bcpKeyBuf) { + if (legacyKey->data() == pBcpKey) { /* The key returned by toLegacyKey points to the input buffer. We normalize the result key to lower case. */ - T_CString_toLowerCase(bcpKeyBuf); - icu::CharString* key = kwdBuf.create(bcpKeyBuf, bcpKeyLen, status); + icu::CharString* key = kwdBuf.create(pBcpKey, bcpKeyLen, status); if (key == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; @@ -1703,36 +1703,37 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT if (U_FAILURE(status)) { return; } + T_CString_toLowerCase(key->data()); pKey = key->data(); + } else { + pKey = legacyKey->data(); } if (pBcpType) { - icu::CharString bcpTypeBuf(pBcpType, bcpTypeLen, status); - if (U_FAILURE(status)) { - return; - } - /* BCP type to locale type */ - pType = uloc_toLegacyType(pKey, bcpTypeBuf.data()); - if (pType == nullptr) { + std::optional legacyType = ulocimp_toLegacyTypeWithFallback( + pKey, {pBcpType, static_cast(bcpTypeLen)}); + if (!legacyType.has_value()) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } - if (pType == bcpTypeBuf.data()) { + if (legacyType->data() == pBcpType) { /* The type returned by toLegacyType points to the input buffer. We normalize the result type to lower case. */ - /* normalize to lower case */ - T_CString_toLowerCase(bcpTypeBuf.data()); - if (icu::CharString* type = - kwdBuf.create(std::move(bcpTypeBuf), status)) { - if (U_FAILURE(status)) { return; } - pType = type->data(); - } else { + icu::CharString* type = kwdBuf.create(pBcpType, bcpTypeLen, status); + if (type == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } + if (U_FAILURE(status)) { + return; + } + T_CString_toLowerCase(type->data()); + pType = type->data(); + } else { + pType = legacyType->data(); } } else { /* typeless - default type value is "yes" */ diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h index 5eab0812cf5..536b6a6de60 100644 --- a/icu4c/source/common/ulocimp.h +++ b/icu4c/source/common/ulocimp.h @@ -11,6 +11,7 @@ #define ULOCIMP_H #include +#include #include #include "unicode/bytestream.h" @@ -54,6 +55,18 @@ uloc_getCurrentCountryID(const char* oldID); U_CFUNC const char* uloc_getCurrentLanguageID(const char* oldID); +U_EXPORT std::optional +ulocimp_toBcpKeyWithFallback(std::string_view keyword); + +U_EXPORT std::optional +ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value); + +U_EXPORT std::optional +ulocimp_toLegacyKeyWithFallback(std::string_view keyword); + +U_EXPORT std::optional +ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value); + U_EXPORT icu::CharString ulocimp_getKeywords(const char* localeID, char prev, @@ -392,17 +405,17 @@ ultag_isVariantSubtags(const char* s, int32_t len); const char* ultag_getTKeyStart(const char* localeID); -U_EXPORT const char* -ulocimp_toBcpKey(const char* key); +U_EXPORT std::optional +ulocimp_toBcpKey(std::string_view key); -U_EXPORT const char* -ulocimp_toLegacyKey(const char* key); +U_EXPORT std::optional +ulocimp_toLegacyKey(std::string_view key); -U_EXPORT const char* -ulocimp_toBcpType(const char* key, const char* type); +U_EXPORT std::optional +ulocimp_toBcpType(std::string_view key, std::string_view type); -U_EXPORT const char* -ulocimp_toLegacyType(const char* key, const char* type); +U_EXPORT std::optional +ulocimp_toLegacyType(std::string_view key, std::string_view type); /* Function for testing purpose */ U_EXPORT const char* const*