mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 12:40:02 +00:00
ICU-13489 Merging #13510(r40714,r40715) UTF-8 to UTF-8 conversion overflow to maint-60 for 60.2.
X-SVN-Rev: 40716
This commit is contained in:
parent
e6d1cc2430
commit
42f3b2d844
3 changed files with 98 additions and 16 deletions
|
@ -28,6 +28,7 @@
|
|||
#include "unicode/utf.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "uassert.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "cmemory.h"
|
||||
|
@ -694,7 +695,9 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
|||
// Use a single counter for source and target, counting the minimum of
|
||||
// the source length and the target capacity.
|
||||
// Let the standard converter handle edge cases.
|
||||
const uint8_t *limit=sourceLimit;
|
||||
if(count>targetCapacity) {
|
||||
limit-=(count-targetCapacity);
|
||||
count=targetCapacity;
|
||||
}
|
||||
|
||||
|
@ -707,11 +710,11 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
|||
// sequence from the previous buffer.
|
||||
int32_t length=count-toULimit;
|
||||
if(length>0) {
|
||||
uint8_t b1=*(sourceLimit-1);
|
||||
uint8_t b1=*(limit-1);
|
||||
if(U8_IS_SINGLE(b1)) {
|
||||
// common ASCII character
|
||||
} else if(U8_IS_TRAIL(b1) && length>=2) {
|
||||
uint8_t b2=*(sourceLimit-2);
|
||||
uint8_t b2=*(limit-2);
|
||||
if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
|
||||
// truncated 3-byte sequence
|
||||
count-=2;
|
||||
|
@ -811,7 +814,7 @@ moreBytes:
|
|||
}
|
||||
|
||||
/* copy the legal byte sequence to the target */
|
||||
{
|
||||
if(count>=toULength) {
|
||||
int8_t i;
|
||||
|
||||
for(i=0; i<oldToULength; ++i) {
|
||||
|
@ -822,9 +825,18 @@ moreBytes:
|
|||
*target++=*source++;
|
||||
}
|
||||
count-=toULength;
|
||||
} else {
|
||||
// A supplementary character that does not fit into the target.
|
||||
// Let the standard converter handle this.
|
||||
source-=(toULength-oldToULength);
|
||||
pToUArgs->source=(char *)source;
|
||||
pFromUArgs->target=(char *)target;
|
||||
*pErrorCode=U_USING_DEFAULT_WARNING;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
U_ASSERT(count>=0);
|
||||
|
||||
if(U_SUCCESS(*pErrorCode) && source<sourceLimit) {
|
||||
if(target==(const uint8_t *)pFromUArgs->targetLimit) {
|
||||
|
|
|
@ -68,21 +68,16 @@ ConversionTest::~ConversionTest() {
|
|||
void
|
||||
ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
|
||||
if (exec) logln("TestSuite ConversionTest: ");
|
||||
switch (index) {
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
#if !UCONFIG_NO_FILE_IO
|
||||
case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
|
||||
case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
|
||||
case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
|
||||
case 3: name="TestDefaultIgnorableCallback"; if (exec) TestDefaultIgnorableCallback(); break;
|
||||
#else
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3: name="skip"; break;
|
||||
TESTCASE_AUTO(TestToUnicode);
|
||||
TESTCASE_AUTO(TestFromUnicode);
|
||||
TESTCASE_AUTO(TestGetUnicodeSet);
|
||||
#endif
|
||||
case 4: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
|
||||
default: name=""; break; //needed to end loop
|
||||
}
|
||||
TESTCASE_AUTO(TestGetUnicodeSet2);
|
||||
TESTCASE_AUTO(TestDefaultIgnorableCallback);
|
||||
TESTCASE_AUTO(TestUTF8ToUTF8Overflow);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
// test data interface ----------------------------------------------------- ***
|
||||
|
@ -723,6 +718,80 @@ ConversionTest::TestDefaultIgnorableCallback() {
|
|||
delete set_ignorable;
|
||||
}
|
||||
|
||||
void
|
||||
ConversionTest::TestUTF8ToUTF8Overflow() {
|
||||
IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow");
|
||||
LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
|
||||
LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
|
||||
static const char *text = "aä"; // ä: 2 bytes
|
||||
const char *source = text;
|
||||
const char *sourceLimit = text + strlen(text);
|
||||
char result[20];
|
||||
char *target = result;
|
||||
const char *targetLimit = result + sizeof(result);
|
||||
UChar buffer16[20];
|
||||
UChar *pivotSource = buffer16;
|
||||
UChar *pivotTarget = buffer16;
|
||||
const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
|
||||
|
||||
// Convert with insufficient target capacity.
|
||||
result[2] = 5;
|
||||
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
|
||||
&target, result + 2, &source, sourceLimit,
|
||||
buffer16, &pivotSource, &pivotTarget, pivotLimit,
|
||||
FALSE, FALSE, errorCode);
|
||||
assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
|
||||
int32_t length = (int32_t)(target - result);
|
||||
assertEquals("number of bytes written", 2, length);
|
||||
assertEquals("next byte not clobbered", 5, result[2]);
|
||||
|
||||
// Convert the rest and flush.
|
||||
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
|
||||
&target, targetLimit, &source, sourceLimit,
|
||||
buffer16, &pivotSource, &pivotTarget, pivotLimit,
|
||||
FALSE, TRUE, errorCode);
|
||||
|
||||
assertSuccess("UTF-8->UTF-8", errorCode);
|
||||
length = (int32_t)(target - result);
|
||||
assertEquals("3 bytes", 3, length);
|
||||
if (length == 3) {
|
||||
assertTrue("result same as input", memcmp(text, result, length) == 0);
|
||||
}
|
||||
|
||||
ucnv_reset(cnv1.getAlias());
|
||||
ucnv_reset(cnv2.getAlias());
|
||||
memset(result, 0, sizeof(result));
|
||||
static const char *text2 = "a🚲"; // U+1F6B2 bicycle: 4 bytes
|
||||
source = text2;
|
||||
sourceLimit = text2 + strlen(text2);
|
||||
target = result;
|
||||
pivotSource = pivotTarget = buffer16;
|
||||
|
||||
// Convert with insufficient target capacity.
|
||||
result[3] = 5;
|
||||
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
|
||||
&target, result + 3, &source, sourceLimit,
|
||||
buffer16, &pivotSource, &pivotTarget, pivotLimit,
|
||||
FALSE, FALSE, errorCode);
|
||||
assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
|
||||
length = (int32_t)(target - result);
|
||||
assertEquals("text2 number of bytes written", 3, length);
|
||||
assertEquals("text2 next byte not clobbered", 5, result[3]);
|
||||
|
||||
// Convert the rest and flush.
|
||||
ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
|
||||
&target, targetLimit, &source, sourceLimit,
|
||||
buffer16, &pivotSource, &pivotTarget, pivotLimit,
|
||||
FALSE, TRUE, errorCode);
|
||||
|
||||
assertSuccess("text2 UTF-8->UTF-8", errorCode);
|
||||
length = (int32_t)(target - result);
|
||||
assertEquals("text2 5 bytes", 5, length);
|
||||
if (length == 5) {
|
||||
assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
// open testdata or ICU data converter ------------------------------------- ***
|
||||
|
||||
UConverter *
|
||||
|
|
|
@ -76,6 +76,7 @@ public:
|
|||
void TestGetUnicodeSet();
|
||||
void TestGetUnicodeSet2();
|
||||
void TestDefaultIgnorableCallback();
|
||||
void TestUTF8ToUTF8Overflow();
|
||||
|
||||
private:
|
||||
UBool
|
||||
|
|
Loading…
Add table
Reference in a new issue