ICU-22453 Fix non null terminated buffer issue.

See #2543
This commit is contained in:
Frank Tang 2023-08-09 18:04:49 +00:00 committed by Frank Yung-Fong Tang
parent 56850c9a42
commit ca1435c3ea
3 changed files with 63 additions and 36 deletions

View file

@ -563,7 +563,7 @@ private:
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
void (*checkType)(const char* type),
void (*checkReplacement)(const UnicodeString& replacement),
void (*checkReplacement)(const UChar* replacement),
UErrorCode &status);
// Read the languageAlias data from alias to
@ -700,7 +700,7 @@ AliasDataBuilder::readAlias(
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
void (*checkType)(const char* type),
void (*checkReplacement)(const UnicodeString& replacement),
void (*checkReplacement)(const UChar* replacement),
UErrorCode &status) {
if (U_FAILURE(status)) {
return;
@ -720,8 +720,8 @@ AliasDataBuilder::readAlias(
LocalUResourceBundlePointer res(
ures_getNextResource(alias, nullptr, &status));
const char* aliasFrom = ures_getKey(res.getAlias());
UnicodeString aliasTo =
ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status);
const UChar* aliasTo =
ures_getStringByKey(res.getAlias(), "replacement", nullptr, &status);
if (U_FAILURE(status)) return;
checkType(aliasFrom);
@ -766,7 +766,7 @@ AliasDataBuilder::readLanguageAlias(
#else
[](const char*) {},
#endif
[](const UnicodeString&) {}, status);
[](const UChar*) {}, status);
}
/**
@ -790,12 +790,12 @@ AliasDataBuilder::readScriptAlias(
[](const char* type) {
U_ASSERT(uprv_strlen(type) == 4);
},
[](const UnicodeString& replacement) {
U_ASSERT(replacement.length() == 4);
[](const UChar* replacement) {
U_ASSERT(u_strlen(replacement) == 4);
},
#else
[](const char*) {},
[](const UnicodeString&) { },
[](const UChar*) { },
#endif
status);
}
@ -824,7 +824,7 @@ AliasDataBuilder::readTerritoryAlias(
#else
[](const char*) {},
#endif
[](const UnicodeString&) { },
[](const UChar*) { },
status);
}
@ -851,15 +851,16 @@ AliasDataBuilder::readVariantAlias(
U_ASSERT(uprv_strlen(type) != 4 ||
(type[0] >= '0' && type[0] <= '9'));
},
[](const UnicodeString& replacement) {
U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8);
U_ASSERT(replacement.length() != 4 ||
(replacement.charAt(0) >= u'0' &&
replacement.charAt(0) <= u'9'));
[](const UChar* replacement) {
int32_t len = u_strlen(replacement);
U_ASSERT(len >= 4 && len <= 8);
U_ASSERT(len != 4 ||
(*replacement >= u'0' &&
*replacement <= u'9'));
},
#else
[](const char*) {},
[](const UnicodeString&) { },
[](const UChar*) { },
#endif
status);
}
@ -888,7 +889,7 @@ AliasDataBuilder::readSubdivisionAlias(
#else
[](const char*) {},
#endif
[](const UnicodeString&) { },
[](const UChar*) { },
status);
}

View file

@ -241,9 +241,11 @@ private:
return false;
}
for (int i = 0; i < length; ++i) {
stringArray.getValue(i, value); // returns true because i < length
rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
if (U_FAILURE(errorCode)) { return false; }
if (stringArray.getValue(i, value)) { // returns true because i < length
int32_t strLength = 0;
rawIndexes[i] = strings.add(value.getString(strLength, errorCode), errorCode);
if (U_FAILURE(errorCode)) { return false; }
}
}
}
return true;
@ -261,10 +263,10 @@ private:
lang[0] = 'a' + ((encoded % 27) - 1);
lang[1] = 'a' + (((encoded / 27 ) % 27) - 1);
if (encoded / (27 * 27) == 0) {
return UnicodeString(lang, 2);
return UnicodeString(lang, 2, US_INV);
}
lang[2] = 'a' + ((encoded / (27 * 27)) - 1);
return UnicodeString(lang, 3);
return UnicodeString(lang, 3, US_INV);
}
UnicodeString toScript(int encoded) {
if (encoded == 0) {
@ -278,7 +280,8 @@ private:
if (script == nullptr) {
return UNICODE_STRING_SIMPLE("");
}
return UnicodeString(script, 4);
U_ASSERT(uprv_strlen(script) == 4);
return UnicodeString(script, 4, US_INV);
}
UnicodeString m49IndexToCode(const ResourceArray &m49Array, ResourceValue &value, int index, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
@ -306,7 +309,7 @@ private:
char region[2];
region[0] = 'A' + ((encoded % 27) - 1);
region[1] = 'A' + (((encoded / 27) % 27) - 1);
return UnicodeString(region, 2);
return UnicodeString(region, 2, US_INV);
}
bool readLSREncodedStrings(const ResourceTable &table, const char* key, ResourceValue &value, const ResourceArray& m49Array,
@ -321,9 +324,10 @@ private:
return false;
}
for (int i = 0; i < length; ++i) {
rawIndexes[i*3] = strings.add(toLanguage(vectors[i]), errorCode);
rawIndexes[i*3+1] = strings.add(toScript(vectors[i]), errorCode);
rawIndexes[i*3+2] = strings.add(toRegion(m49Array, value, vectors[i], errorCode), errorCode);
rawIndexes[i*3] = strings.addByValue(toLanguage(vectors[i]), errorCode);
rawIndexes[i*3+1] = strings.addByValue(toScript(vectors[i]), errorCode);
rawIndexes[i*3+2] = strings.addByValue(
toRegion(m49Array, value, vectors[i], errorCode), errorCode);
if (U_FAILURE(errorCode)) { return false; }
}
length *= 3;

View file

@ -10,6 +10,7 @@
#include "charstr.h"
#include "uassert.h"
#include "uhash.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
@ -47,22 +48,20 @@ public:
}
/**
* Adds a string and returns a unique number for it.
* The string's buffer contents must not change, nor move around in memory,
* Adds a NUL-terminated string and returns a unique number for it.
* The string must not change, nor move around in memory,
* while this UniqueCharStrings is in use.
* The string contents must be NUL-terminated exactly at s.length().
*
* Best used with read-only-alias UnicodeString objects that point to
* stable storage, such as strings returned by resource bundle functions.
* Best used with string data in a stable storage, such as strings returned
* by resource bundle functions.
*/
int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return 0; }
int32_t add(const char16_t*p, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return -1; }
if (isFrozen) {
errorCode = U_NO_WRITE_PERMISSION;
return 0;
return -1;
}
// The string points into the resource bundle.
const char16_t *p = s.getBuffer();
int32_t oldIndex = uhash_geti(&map, p);
if (oldIndex != 0) { // found duplicate
return oldIndex;
@ -71,11 +70,33 @@ public:
// The strings object is also terminated with one implicit NUL.
strings->append(0, errorCode);
int32_t newIndex = strings->length();
strings->appendInvariantChars(s, errorCode);
strings->appendInvariantChars(p, u_strlen(p), errorCode);
uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
return newIndex;
}
/**
* Adds a unicode string by value and returns a unique number for it.
*/
int32_t addByValue(UnicodeString s, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return -1; }
if (isFrozen) {
errorCode = U_NO_WRITE_PERMISSION;
return -1;
}
int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer());
if (oldIndex != 0) { // found duplicate
return oldIndex;
}
// We need to store the string content of the UnicodeString.
UnicodeString *key = keyStore.create(s);
if (key == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return -1;
}
return add(key->getTerminatedBuffer(), errorCode);
}
void freeze() { isFrozen = true; }
/**
@ -90,6 +111,7 @@ public:
private:
UHashtable map;
CharString *strings;
MemoryPool<UnicodeString> keyStore;
bool isFrozen = false;
};