From ca1435c3ea7314d9636e59a9b87d95df6dd7f8a9 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Wed, 9 Aug 2023 18:04:49 +0000 Subject: [PATCH] ICU-22453 Fix non null terminated buffer issue. See #2543 --- icu4c/source/common/locid.cpp | 33 ++++++++++--------- icu4c/source/common/loclikelysubtags.cpp | 24 ++++++++------ icu4c/source/common/uniquecharstr.h | 42 ++++++++++++++++++------ 3 files changed, 63 insertions(+), 36 deletions(-) diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp index 70a794ae076..057e68d9d64 100644 --- a/icu4c/source/common/locid.cpp +++ b/icu4c/source/common/locid.cpp @@ -563,7 +563,7 @@ private: LocalMemory& replacementIndexes, int32_t &length, void (*checkType)(const char* type), - void (*checkReplacement)(const UnicodeString& replacement), + void (*checkReplacement)(const UChar* replacement), UErrorCode &status); // Read the languageAlias data from alias to @@ -700,7 +700,7 @@ AliasDataBuilder::readAlias( LocalMemory& replacementIndexes, int32_t &length, void (*checkType)(const char* type), - void (*checkReplacement)(const UnicodeString& replacement), + void (*checkReplacement)(const UChar* replacement), UErrorCode &status) { if (U_FAILURE(status)) { return; @@ -720,8 +720,8 @@ AliasDataBuilder::readAlias( LocalUResourceBundlePointer res( ures_getNextResource(alias, nullptr, &status)); const char* aliasFrom = ures_getKey(res.getAlias()); - UnicodeString aliasTo = - ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status); + const UChar* aliasTo = + ures_getStringByKey(res.getAlias(), "replacement", nullptr, &status); if (U_FAILURE(status)) return; checkType(aliasFrom); @@ -766,7 +766,7 @@ AliasDataBuilder::readLanguageAlias( #else [](const char*) {}, #endif - [](const UnicodeString&) {}, status); + [](const UChar*) {}, status); } /** @@ -790,12 +790,12 @@ AliasDataBuilder::readScriptAlias( [](const char* type) { U_ASSERT(uprv_strlen(type) == 4); }, - [](const UnicodeString& replacement) { - U_ASSERT(replacement.length() == 4); + [](const UChar* replacement) { + U_ASSERT(u_strlen(replacement) == 4); }, #else [](const char*) {}, - [](const UnicodeString&) { }, + [](const UChar*) { }, #endif status); } @@ -824,7 +824,7 @@ AliasDataBuilder::readTerritoryAlias( #else [](const char*) {}, #endif - [](const UnicodeString&) { }, + [](const UChar*) { }, status); } @@ -851,15 +851,16 @@ AliasDataBuilder::readVariantAlias( U_ASSERT(uprv_strlen(type) != 4 || (type[0] >= '0' && type[0] <= '9')); }, - [](const UnicodeString& replacement) { - U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8); - U_ASSERT(replacement.length() != 4 || - (replacement.charAt(0) >= u'0' && - replacement.charAt(0) <= u'9')); + [](const UChar* replacement) { + int32_t len = u_strlen(replacement); + U_ASSERT(len >= 4 && len <= 8); + U_ASSERT(len != 4 || + (*replacement >= u'0' && + *replacement <= u'9')); }, #else [](const char*) {}, - [](const UnicodeString&) { }, + [](const UChar*) { }, #endif status); } @@ -888,7 +889,7 @@ AliasDataBuilder::readSubdivisionAlias( #else [](const char*) {}, #endif - [](const UnicodeString&) { }, + [](const UChar*) { }, status); } diff --git a/icu4c/source/common/loclikelysubtags.cpp b/icu4c/source/common/loclikelysubtags.cpp index 677ab0bda50..8511955da63 100644 --- a/icu4c/source/common/loclikelysubtags.cpp +++ b/icu4c/source/common/loclikelysubtags.cpp @@ -241,9 +241,11 @@ private: return false; } for (int i = 0; i < length; ++i) { - stringArray.getValue(i, value); // returns true because i < length - rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode); - if (U_FAILURE(errorCode)) { return false; } + if (stringArray.getValue(i, value)) { // returns true because i < length + int32_t strLength = 0; + rawIndexes[i] = strings.add(value.getString(strLength, errorCode), errorCode); + if (U_FAILURE(errorCode)) { return false; } + } } } return true; @@ -261,10 +263,10 @@ private: lang[0] = 'a' + ((encoded % 27) - 1); lang[1] = 'a' + (((encoded / 27 ) % 27) - 1); if (encoded / (27 * 27) == 0) { - return UnicodeString(lang, 2); + return UnicodeString(lang, 2, US_INV); } lang[2] = 'a' + ((encoded / (27 * 27)) - 1); - return UnicodeString(lang, 3); + return UnicodeString(lang, 3, US_INV); } UnicodeString toScript(int encoded) { if (encoded == 0) { @@ -278,7 +280,8 @@ private: if (script == nullptr) { return UNICODE_STRING_SIMPLE(""); } - return UnicodeString(script, 4); + U_ASSERT(uprv_strlen(script) == 4); + return UnicodeString(script, 4, US_INV); } UnicodeString m49IndexToCode(const ResourceArray &m49Array, ResourceValue &value, int index, UErrorCode &errorCode) { if (U_FAILURE(errorCode)) { @@ -306,7 +309,7 @@ private: char region[2]; region[0] = 'A' + ((encoded % 27) - 1); region[1] = 'A' + (((encoded / 27) % 27) - 1); - return UnicodeString(region, 2); + return UnicodeString(region, 2, US_INV); } bool readLSREncodedStrings(const ResourceTable &table, const char* key, ResourceValue &value, const ResourceArray& m49Array, @@ -321,9 +324,10 @@ private: return false; } for (int i = 0; i < length; ++i) { - rawIndexes[i*3] = strings.add(toLanguage(vectors[i]), errorCode); - rawIndexes[i*3+1] = strings.add(toScript(vectors[i]), errorCode); - rawIndexes[i*3+2] = strings.add(toRegion(m49Array, value, vectors[i], errorCode), errorCode); + rawIndexes[i*3] = strings.addByValue(toLanguage(vectors[i]), errorCode); + rawIndexes[i*3+1] = strings.addByValue(toScript(vectors[i]), errorCode); + rawIndexes[i*3+2] = strings.addByValue( + toRegion(m49Array, value, vectors[i], errorCode), errorCode); if (U_FAILURE(errorCode)) { return false; } } length *= 3; diff --git a/icu4c/source/common/uniquecharstr.h b/icu4c/source/common/uniquecharstr.h index 10cc924f7f9..e5d3b734180 100644 --- a/icu4c/source/common/uniquecharstr.h +++ b/icu4c/source/common/uniquecharstr.h @@ -10,6 +10,7 @@ #include "charstr.h" #include "uassert.h" #include "uhash.h" +#include "cmemory.h" U_NAMESPACE_BEGIN @@ -47,22 +48,20 @@ public: } /** - * Adds a string and returns a unique number for it. - * The string's buffer contents must not change, nor move around in memory, + * Adds a NUL-terminated string and returns a unique number for it. + * The string must not change, nor move around in memory, * while this UniqueCharStrings is in use. - * The string contents must be NUL-terminated exactly at s.length(). * - * Best used with read-only-alias UnicodeString objects that point to - * stable storage, such as strings returned by resource bundle functions. + * Best used with string data in a stable storage, such as strings returned + * by resource bundle functions. */ - int32_t add(const UnicodeString &s, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return 0; } + int32_t add(const char16_t*p, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return -1; } if (isFrozen) { errorCode = U_NO_WRITE_PERMISSION; - return 0; + return -1; } // The string points into the resource bundle. - const char16_t *p = s.getBuffer(); int32_t oldIndex = uhash_geti(&map, p); if (oldIndex != 0) { // found duplicate return oldIndex; @@ -71,11 +70,33 @@ public: // The strings object is also terminated with one implicit NUL. strings->append(0, errorCode); int32_t newIndex = strings->length(); - strings->appendInvariantChars(s, errorCode); + strings->appendInvariantChars(p, u_strlen(p), errorCode); uhash_puti(&map, const_cast(p), newIndex, &errorCode); return newIndex; } + /** + * Adds a unicode string by value and returns a unique number for it. + */ + int32_t addByValue(UnicodeString s, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return -1; } + if (isFrozen) { + errorCode = U_NO_WRITE_PERMISSION; + return -1; + } + int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer()); + if (oldIndex != 0) { // found duplicate + return oldIndex; + } + // We need to store the string content of the UnicodeString. + UnicodeString *key = keyStore.create(s); + if (key == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return -1; + } + return add(key->getTerminatedBuffer(), errorCode); + } + void freeze() { isFrozen = true; } /** @@ -90,6 +111,7 @@ public: private: UHashtable map; CharString *strings; + MemoryPool keyStore; bool isFrozen = false; };