diff --git a/icu4c/source/common/ucnvmbcs.c b/icu4c/source/common/ucnvmbcs.c index 4e33ec54813..4b0f89bd590 100644 --- a/icu4c/source/common/ucnvmbcs.c +++ b/icu4c/source/common/ucnvmbcs.c @@ -3168,6 +3168,9 @@ getTrail: } } else { /* no more input */ + if (pArgs->flush) { + *pErrorCode=U_TRUNCATED_CHAR_FOUND; + } break; } } else { @@ -3235,6 +3238,14 @@ getTrail: /* set offsets since the start or the last callback */ if(offsets!=NULL) { size_t count=source-lastSource; + if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) { + /* + Caller gave us a partial supplementary character, + which this function couldn't convert in any case. + The callback will handle the offset. + */ + count--; + } while(count>0) { *offsets++=sourceIndex++; --count; @@ -4799,8 +4810,7 @@ ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs, /* reset the selector for the next code point */ cnv->useSubChar1=FALSE; - switch(cnv->sharedData->mbcs.outputType) { - case MBCS_OUTPUT_2_SISO: + if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) { p=buffer; /* fromUnicodeStatus contains prevLength */ @@ -4826,16 +4836,11 @@ ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs, *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return; } - ucnv_cbFromUWriteBytes(pArgs, - buffer, (int32_t)(p-buffer), - offsetIndex, pErrorCode); - break; - default: - ucnv_cbFromUWriteBytes(pArgs, - subchar, length, - offsetIndex, pErrorCode); - break; + subchar=buffer; + length=(int32_t)(p-buffer); } + + ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode); } U_CFUNC UConverterType diff --git a/icu4c/source/test/testdata/conversion.txt b/icu4c/source/test/testdata/conversion.txt index 858fe03a7b4..12e4f52d330 100644 --- a/icu4c/source/test/testdata/conversion.txt +++ b/icu4c/source/test/testdata/conversion.txt @@ -517,6 +517,34 @@ conversion:table(nofallback) { :intvector{ 0,1,1,1,1,1,2 }, :int{1}, :int{0}, "", "?=@$!@$", "" } + { + "windows-1252", + "1\U00010001\u0085\U000500022\ud8003\udc014\ue001", + :bin{ 311a1a1a321a331a341a }, + :intvector{ 0,1,3,4,6,7,8,9,10,11 }, + :int{1}, :int{0}, "", "?", "" + } + { + "windows-1252", + "\uD87E", // lone surrogate can cause an offset overflow + :bin{ 1a }, + :intvector{ 0 }, + :int{1}, :int{0}, "", "?", "" + } + { + "windows-1252", + "\uD87E", // lone surrogate can cause an offset overflow + :bin{ 6875683f }, + :intvector{ 0,0,0,0 }, + :int{1}, :int{0}, "", "?=huh?", "" // Use a long substitution character + } + { + "*test4", + "\u30ab", // An incomplete multi-codepoint character + :bin{ ff }, + :intvector{ 0 }, + :int{1}, :int{0}, "", "?", "" + } { "ibm-930", // stateful MBCS "a\ufdd0\u4e00\ufdd0e",