mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
ICU-13707 Make UnicodeString safe when appended or inserted into itself. (#147)
This commit is contained in:
parent
db05e18f28
commit
a21523d7c8
4 changed files with 121 additions and 27 deletions
|
@ -243,6 +243,9 @@ class UnicodeStringAppendable; // unicode/appendable.h
|
|||
* than other ICU APIs. In particular:
|
||||
* - If indexes are out of bounds for a UnicodeString object
|
||||
* (<0 or >length()) then they are "pinned" to the nearest boundary.
|
||||
* - If the buffer passed to an insert/append/replace operation is owned by the
|
||||
* target object, e.g., calling str.append(str), an extra copy may take place
|
||||
* to ensure safety.
|
||||
* - If primitive string pointer values (e.g., const char16_t * or char *)
|
||||
* for input strings are NULL, then those input string parameters are treated
|
||||
* as if they pointed to an empty string.
|
||||
|
|
|
@ -1447,10 +1447,15 @@ UnicodeString::doReplace(int32_t start,
|
|||
}
|
||||
|
||||
if(srcChars == 0) {
|
||||
srcStart = srcLength = 0;
|
||||
} else if(srcLength < 0) {
|
||||
// get the srcLength if necessary
|
||||
srcLength = u_strlen(srcChars + srcStart);
|
||||
srcLength = 0;
|
||||
} else {
|
||||
// Perform all remaining operations relative to srcChars + srcStart.
|
||||
// From this point forward, do not use srcStart.
|
||||
srcChars += srcStart;
|
||||
if (srcLength < 0) {
|
||||
// get the srcLength if necessary
|
||||
srcLength = u_strlen(srcChars);
|
||||
}
|
||||
}
|
||||
|
||||
// pin the indices to legal values
|
||||
|
@ -1465,17 +1470,28 @@ UnicodeString::doReplace(int32_t start,
|
|||
}
|
||||
newLength += srcLength;
|
||||
|
||||
// Check for insertion into ourself
|
||||
const UChar *oldArray = getArrayStart();
|
||||
if (isBufferWritable() &&
|
||||
oldArray < srcChars + srcLength &&
|
||||
srcChars < oldArray + oldLength) {
|
||||
// Copy into a new UnicodeString and start over
|
||||
UnicodeString copy(srcChars, srcLength);
|
||||
if (copy.isBogus()) {
|
||||
setToBogus();
|
||||
return *this;
|
||||
}
|
||||
return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
|
||||
}
|
||||
|
||||
// cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
|
||||
// therefore we need to keep the current fArray
|
||||
UChar oldStackBuffer[US_STACKBUF_SIZE];
|
||||
UChar *oldArray;
|
||||
if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
|
||||
// copy the stack buffer contents because it will be overwritten with
|
||||
// fUnion.fFields values
|
||||
u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
|
||||
u_memcpy(oldStackBuffer, oldArray, oldLength);
|
||||
oldArray = oldStackBuffer;
|
||||
} else {
|
||||
oldArray = getArrayStart();
|
||||
}
|
||||
|
||||
// clone our array and allocate a bigger array if needed
|
||||
|
@ -1503,7 +1519,7 @@ UnicodeString::doReplace(int32_t start,
|
|||
}
|
||||
|
||||
// now fill in the hole with the new string
|
||||
us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
|
||||
us_arrayCopy(srcChars, 0, newArray, start, srcLength);
|
||||
|
||||
setLength(newLength);
|
||||
|
||||
|
@ -1536,15 +1552,34 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng
|
|||
return *this;
|
||||
}
|
||||
|
||||
// Perform all remaining operations relative to srcChars + srcStart.
|
||||
// From this point forward, do not use srcStart.
|
||||
srcChars += srcStart;
|
||||
|
||||
if(srcLength < 0) {
|
||||
// get the srcLength if necessary
|
||||
if((srcLength = u_strlen(srcChars + srcStart)) == 0) {
|
||||
if((srcLength = u_strlen(srcChars)) == 0) {
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t oldLength = length();
|
||||
int32_t newLength = oldLength + srcLength;
|
||||
|
||||
// Check for append onto ourself
|
||||
const UChar* oldArray = getArrayStart();
|
||||
if (isBufferWritable() &&
|
||||
oldArray < srcChars + srcLength &&
|
||||
srcChars < oldArray + oldLength) {
|
||||
// Copy into a new UnicodeString and start over
|
||||
UnicodeString copy(srcChars, srcLength);
|
||||
if (copy.isBogus()) {
|
||||
setToBogus();
|
||||
return *this;
|
||||
}
|
||||
return doAppend(copy.getArrayStart(), 0, srcLength);
|
||||
}
|
||||
|
||||
// optimize append() onto a large-enough, owned string
|
||||
if((newLength <= getCapacity() && isBufferWritable()) ||
|
||||
cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
|
||||
|
@ -1556,8 +1591,8 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng
|
|||
// or
|
||||
// str.appendString(buffer, length)
|
||||
// or similar.
|
||||
if(srcChars + srcStart != newArray + oldLength) {
|
||||
us_arrayCopy(srcChars, srcStart, newArray, oldLength, srcLength);
|
||||
if(srcChars != newArray + oldLength) {
|
||||
us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
|
||||
}
|
||||
setLength(newLength);
|
||||
}
|
||||
|
|
|
@ -64,6 +64,7 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &
|
|||
TESTCASE_AUTO(TestUInt16Pointers);
|
||||
TESTCASE_AUTO(TestWCharPointers);
|
||||
TESTCASE_AUTO(TestNullPointers);
|
||||
TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -1123,27 +1124,25 @@ UnicodeStringTest::TestMiscellaneous()
|
|||
errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
|
||||
}
|
||||
|
||||
test1=UNICODE_STRING("la", 2);
|
||||
test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1);
|
||||
if(test1!=UNICODE_STRING("la lila", 7)) {
|
||||
errln("UnicodeString::append(const UChar *, start, length) failed");
|
||||
}
|
||||
// NOTE: Some compilers will optimize u"la" to point to the same static memory
|
||||
// as u" lila", offset by 3 code units
|
||||
test1=UnicodeString(TRUE, u"la", 2);
|
||||
test1.append(UnicodeString(TRUE, u" lila", 5).getTerminatedBuffer(), 0, -1);
|
||||
assertEquals("UnicodeString::append(const UChar *, start, length) failed",
|
||||
u"la lila", test1);
|
||||
|
||||
test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX);
|
||||
if(test1!=UNICODE_STRING("la dudum lila", 13)) {
|
||||
errln("UnicodeString::insert(start, const UniStr &, start, length) failed");
|
||||
}
|
||||
test1.insert(3, UnicodeString(TRUE, u"dudum ", 6), 0, INT32_MAX);
|
||||
assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
|
||||
u"la dudum lila", test1);
|
||||
|
||||
static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
|
||||
test1.insert(9, ucs, -1);
|
||||
if(test1!=UNICODE_STRING("la dudum hm lila", 16)) {
|
||||
errln("UnicodeString::insert(start, const UChar *, length) failed");
|
||||
}
|
||||
assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
|
||||
u"la dudum hm lila", test1);
|
||||
|
||||
test1.replace(9, 2, (UChar)0x2b);
|
||||
if(test1!=UNICODE_STRING("la dudum + lila", 15)) {
|
||||
errln("UnicodeString::replace(start, length, UChar) failed");
|
||||
}
|
||||
assertEquals("UnicodeString::replace(start, length, UChar) failed",
|
||||
u"la dudum + lila", test1);
|
||||
|
||||
if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
|
||||
errln("UnicodeString::hasMetaData() returns TRUE");
|
||||
|
@ -2248,3 +2247,59 @@ UnicodeStringTest::TestNullPointers() {
|
|||
UnicodeString(u"def").extract(nullptr, 0, errorCode);
|
||||
assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
|
||||
}
|
||||
|
||||
void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
|
||||
IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
|
||||
|
||||
// Test append operation
|
||||
UnicodeString str(u"foo ");
|
||||
str.append(str);
|
||||
str.append(str);
|
||||
str.append(str);
|
||||
assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
|
||||
|
||||
// Test append operation with readonly alias to start
|
||||
str = UnicodeString(TRUE, u"foo ", 4);
|
||||
str.append(str);
|
||||
str.append(str);
|
||||
str.append(str);
|
||||
assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
|
||||
|
||||
// Test append operation with aliased substring
|
||||
str = u"abcde";
|
||||
UnicodeString sub = str.tempSubString(1, 2);
|
||||
str.append(sub);
|
||||
assertEquals("", u"abcdebc", str);
|
||||
|
||||
// Test append operation with double-aliased substring
|
||||
str = UnicodeString(TRUE, u"abcde", 5);
|
||||
sub = str.tempSubString(1, 2);
|
||||
str.append(sub);
|
||||
assertEquals("", u"abcdebc", str);
|
||||
|
||||
// Test insert operation
|
||||
str = u"a-*b";
|
||||
str.insert(2, str);
|
||||
str.insert(4, str);
|
||||
str.insert(8, str);
|
||||
assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
|
||||
|
||||
// Test insert operation with readonly alias to start
|
||||
str = UnicodeString(TRUE, u"a-*b", 4);
|
||||
str.insert(2, str);
|
||||
str.insert(4, str);
|
||||
str.insert(8, str);
|
||||
assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
|
||||
|
||||
// Test insert operation with aliased substring
|
||||
str = u"abcde";
|
||||
sub = str.tempSubString(1, 3);
|
||||
str.insert(2, sub);
|
||||
assertEquals("", u"abbcdcde", str);
|
||||
|
||||
// Test insert operation with double-aliased substring
|
||||
str = UnicodeString(TRUE, u"abcde", 5);
|
||||
sub = str.tempSubString(1, 3);
|
||||
str.insert(2, sub);
|
||||
assertEquals("", u"abbcdcde", str);
|
||||
}
|
||||
|
|
|
@ -96,6 +96,7 @@ public:
|
|||
void TestUInt16Pointers();
|
||||
void TestWCharPointers();
|
||||
void TestNullPointers();
|
||||
void TestUnicodeStringInsertAppendToSelf();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue