ICU-8091 fix UnicodeString::reverse() for when the only lead surrogate is in the middle of an odd-length string

X-SVN-Rev: 28952
This commit is contained in:
Markus Scherer 2010-10-29 00:15:27 +00:00
parent 8f93a3e07a
commit 334ad85355
2 changed files with 21 additions and 7 deletions

View file

@ -1388,15 +1388,19 @@ UnicodeString::doReverse(int32_t start,
pinIndices(start, length);
UChar *left = getArrayStart() + start;
UChar *right = left + length;
UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
UChar swap;
UBool hasSupplementary = FALSE;
while(left < --right) {
hasSupplementary |= (UBool)UTF_IS_LEAD(swap = *left);
hasSupplementary |= (UBool)UTF_IS_LEAD(*left++ = *right);
*right = swap;
}
// Before the loop we know left<right because length>=2.
do {
hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
*right-- = swap;
} while(left < right);
// Make sure to test the middle code unit of an odd-length string.
// Redundant if the length is even.
hasSupplementary |= (UBool)U16_IS_LEAD(*left);
/* if there are supplementary code points in the reversed range, then re-swap their surrogates */
if(hasSupplementary) {
@ -1405,7 +1409,7 @@ UnicodeString::doReverse(int32_t start,
left = getArrayStart() + start;
right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
while(left < right) {
if(UTF_IS_TRAIL(swap = *left) && UTF_IS_LEAD(swap2 = *(left + 1))) {
if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
*left++ = swap2;
*left++ = swap;
} else {

View file

@ -1006,6 +1006,16 @@ UnicodeStringTest::TestReverse()
if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
errln("reverse() failed with supplementary characters");
}
// Test case for ticket #8091:
// UnicodeString::reverse() failed to see a lead surrogate in the middle of
// an odd-length string that contains no other lead surrogates.
test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
test.reverse();
if(test!=expected) {
errln("reverse() failed with only lead surrogate in the middle");
}
}
void