mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-1939 fix UTF-16 and UTF-32 converters: handling of U_TRUNCATED_CHAR_FOUND
X-SVN-Rev: 8840
This commit is contained in:
parent
3e5346ffbf
commit
f0f2f037e3
1 changed files with 42 additions and 12 deletions
|
@ -882,7 +882,7 @@ _UTF16PEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
int32_t count;
|
||||
int32_t sourceIndex = 0;
|
||||
|
||||
if(length <= 0) {
|
||||
if(length <= 0 && cnv->toUnicodeStatus == 0) {
|
||||
/* no input, nothing to do */
|
||||
return;
|
||||
}
|
||||
|
@ -893,7 +893,7 @@ _UTF16PEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
}
|
||||
|
||||
/* complete a partial UChar from the last call */
|
||||
if(cnv->toUnicodeStatus != 0) {
|
||||
if(length != 0 && cnv->toUnicodeStatus != 0) {
|
||||
/*
|
||||
* copy the byte from the last call and the first one here into the target,
|
||||
* byte-wise to keep the platform endianness
|
||||
|
@ -942,7 +942,9 @@ _UTF16PEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
/* consume the last byte and store it, making sure that it will never set the status to 0 */
|
||||
cnv->toUnicodeStatus = *source++ | 0x100;
|
||||
}
|
||||
/* } else length==0 { nothing to do */
|
||||
} else /* length==0 */ if(cnv->toUnicodeStatus!=0 && pArgs->flush) {
|
||||
/* a UChar remains incomplete */
|
||||
*pErrorCode = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
|
||||
/* write back the updated pointers */
|
||||
|
@ -1044,7 +1046,7 @@ _UTF16OEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
int32_t count;
|
||||
int32_t sourceIndex = 0;
|
||||
|
||||
if(length <= 0) {
|
||||
if(length <= 0 && cnv->toUnicodeStatus == 0) {
|
||||
/* no input, nothing to do */
|
||||
return;
|
||||
}
|
||||
|
@ -1055,7 +1057,7 @@ _UTF16OEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
}
|
||||
|
||||
/* complete a partial UChar from the last call */
|
||||
if(cnv->toUnicodeStatus != 0) {
|
||||
if(length != 0 && cnv->toUnicodeStatus != 0) {
|
||||
/*
|
||||
* copy the byte from the last call and the first one here into the target,
|
||||
* byte-wise, reversing the platform endianness
|
||||
|
@ -1112,7 +1114,9 @@ _UTF16OEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
/* consume the last byte and store it, making sure that it will never set the status to 0 */
|
||||
cnv->toUnicodeStatus = *source++ | 0x100;
|
||||
}
|
||||
/* } else length==0 { nothing to do */
|
||||
} else /* length==0 */ if(cnv->toUnicodeStatus!=0 && pArgs->flush) {
|
||||
/* a UChar remains incomplete */
|
||||
*pErrorCode = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
|
||||
/* write back the updated pointers */
|
||||
|
@ -2503,14 +2507,27 @@ _UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
}
|
||||
}
|
||||
|
||||
if(source==sourceLimit && pArgs->flush && state!=0) {
|
||||
/* handle 0<state<8: call UTF-16BE with too-short input */
|
||||
if(state<8) {
|
||||
if(source==sourceLimit && pArgs->flush) {
|
||||
/* handle truncated input */
|
||||
switch(state) {
|
||||
case 0:
|
||||
break; /* no input at all, nothing to do */
|
||||
case 8:
|
||||
_UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
|
||||
break;
|
||||
case 9:
|
||||
_UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
|
||||
break;
|
||||
default:
|
||||
/* handle 0<state<8: call UTF-16BE with too-short input */
|
||||
pArgs->source=utf16BOM+(state&4); /* select the correct BOM */
|
||||
pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */
|
||||
|
||||
/* no offsets: not enough for output */
|
||||
_UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
|
||||
/* pArgs->source restored below */
|
||||
pArgs->sourceLimit=sourceLimit;
|
||||
break;
|
||||
}
|
||||
cnv->mode=0; /* reset */
|
||||
} else {
|
||||
|
@ -2745,14 +2762,27 @@ _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
}
|
||||
}
|
||||
|
||||
if(source==sourceLimit && pArgs->flush && state!=0) {
|
||||
/* handle 0<state<8: call UTF-32BE with too-short input */
|
||||
if(state<8) {
|
||||
if(source==sourceLimit && pArgs->flush) {
|
||||
/* handle truncated input */
|
||||
switch(state) {
|
||||
case 0:
|
||||
break; /* no input at all, nothing to do */
|
||||
case 8:
|
||||
T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
|
||||
break;
|
||||
case 9:
|
||||
T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
|
||||
break;
|
||||
default:
|
||||
/* handle 0<state<8: call UTF-32BE with too-short input */
|
||||
pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
|
||||
pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */
|
||||
|
||||
/* no offsets: not enough for output */
|
||||
T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
|
||||
/* pArgs->source restored below */
|
||||
pArgs->sourceLimit=sourceLimit;
|
||||
break;
|
||||
}
|
||||
cnv->mode=0; /* reset */
|
||||
} else {
|
||||
|
|
Loading…
Add table
Reference in a new issue