mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-20 20:19:32 +00:00
ICU-2856 make EBCDIC_STATEFUL always emit SI at the end of a stream if the last character was DBCS
X-SVN-Rev: 11788
This commit is contained in:
parent
3a31e9598d
commit
0fcfe71911
4 changed files with 65 additions and 38 deletions
|
@ -2141,7 +2141,7 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
|
||||
UChar32 c;
|
||||
|
||||
int32_t sourceIndex, nextSourceIndex;
|
||||
int32_t prevSourceIndex, sourceIndex, nextSourceIndex;
|
||||
|
||||
UConverterCallbackReason reason;
|
||||
uint32_t stage2Entry;
|
||||
|
@ -2184,6 +2184,7 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
prevLength=cnv->fromUnicodeStatus;
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
prevSourceIndex=-1;
|
||||
sourceIndex= c==0 ? 0 : -1;
|
||||
nextSourceIndex=0;
|
||||
|
||||
|
@ -2525,7 +2526,10 @@ getTrail:
|
|||
|
||||
/* normal end of conversion: prepare for a new character */
|
||||
c=0;
|
||||
sourceIndex=nextSourceIndex;
|
||||
if(offsets!=NULL) {
|
||||
prevSourceIndex=sourceIndex;
|
||||
sourceIndex=nextSourceIndex;
|
||||
}
|
||||
continue;
|
||||
|
||||
/*
|
||||
|
@ -2551,6 +2555,7 @@ callback:
|
|||
* The above branch for MBCS_OUTPUT_2_SISO has saved the previous state already.
|
||||
* See comments there.
|
||||
*/
|
||||
prevSourceIndex=sourceIndex;
|
||||
|
||||
/* call the callback function */
|
||||
fromUCallback(cnv, cnv->fromUContext, pArgs, c, reason, pErrorCode);
|
||||
|
@ -2596,20 +2601,42 @@ callback:
|
|||
}
|
||||
}
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(c!=0 && U_SUCCESS(*pErrorCode)) {
|
||||
if(pArgs->flush && source>=sourceLimit && U_SUCCESS(*pErrorCode)) {
|
||||
/* end of input stream */
|
||||
if(c!=0) {
|
||||
/* a Unicode code point remains incomplete (only a first surrogate) */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
/* the following may change with Jitterbug 2449: would prepare for callback instead of resetting */
|
||||
c=0;
|
||||
prevLength=1;
|
||||
} else if(outputType==MBCS_OUTPUT_2_SISO && prevLength==2) {
|
||||
/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
|
||||
if(targetCapacity>0) {
|
||||
*target++=(uint8_t)UCNV_SI;
|
||||
if(offsets!=NULL) {
|
||||
/* set the last source character's index (sourceIndex points at sourceLimit now) */
|
||||
*offsets++=prevSourceIndex;
|
||||
}
|
||||
} else {
|
||||
/* target is full */
|
||||
cnv->charErrorBuffer[0]=(char)UCNV_SI;
|
||||
cnv->charErrorBufferLength=1;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
prevLength=1; /* we switched into SBCS */
|
||||
}
|
||||
|
||||
/* reset the state for the next conversion */
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
c=0;
|
||||
prevLength=1;
|
||||
}
|
||||
cnv->fromUSurrogateLead=0;
|
||||
cnv->fromUnicodeStatus=1;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
cnv->fromUnicodeStatus=prevLength;
|
||||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
cnv->fromUnicodeStatus=prevLength;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
pArgs->target=(char *)target;
|
||||
|
|
|
@ -449,13 +449,13 @@ static void TestConvert()
|
|||
else {
|
||||
for(i=0; i<targetCapacity; i++){
|
||||
if(target[i] != expectedTarget[i]){
|
||||
log_data_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]);
|
||||
log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]);
|
||||
}
|
||||
}
|
||||
|
||||
i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err);
|
||||
if(U_FAILURE(err) || i!=6){
|
||||
log_data_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 6\n",
|
||||
if(U_FAILURE(err) || i!=7){
|
||||
log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
|
||||
u_errorName(err), i);
|
||||
}
|
||||
|
||||
|
@ -463,14 +463,14 @@ static void TestConvert()
|
|||
err=U_ZERO_ERROR;
|
||||
i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err);
|
||||
if(i !=0){
|
||||
log_data_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
|
||||
log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
|
||||
}
|
||||
|
||||
err=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
sourceLimit=sizeof(source)/sizeof(source[0]);
|
||||
i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
|
||||
if(i !=0 ){
|
||||
log_data_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
|
||||
log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
|
||||
}
|
||||
|
||||
err=U_ZERO_ERROR;
|
||||
|
@ -478,7 +478,7 @@ static void TestConvert()
|
|||
targetLimit=0;
|
||||
i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
|
||||
if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){
|
||||
log_data_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
|
||||
log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
|
||||
}
|
||||
err=U_ZERO_ERROR;
|
||||
free(target);
|
||||
|
|
|
@ -153,7 +153,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
|
|||
0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
|
||||
|
||||
static const uint8_t expskipIBM_930[] = {
|
||||
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b };
|
||||
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
|
||||
|
||||
gInBufferSize = inputsize;
|
||||
gOutBufferSize = outputsize;
|
||||
|
@ -165,9 +165,9 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
|
|||
static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
|
||||
static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
|
||||
|
||||
static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4};
|
||||
static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3};
|
||||
static const int32_t toIBM930Offsskip [] = { 0, 0, 0, 1, 1, 3, 3};
|
||||
static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
|
||||
static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
|
||||
static const int32_t toIBM930Offsskip [] = { 0, 0, 0, 1, 1, 3, 3, 3 };
|
||||
|
||||
if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
|
||||
expskipIBM_949, sizeof(expskipIBM_949), "ibm-949",
|
||||
|
@ -190,8 +190,8 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
|
|||
|
||||
{
|
||||
static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
|
||||
static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b };
|
||||
static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8 };
|
||||
static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
|
||||
static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
|
||||
|
||||
/* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
|
||||
if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR,
|
||||
|
@ -1338,19 +1338,19 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
|
|||
0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
|
||||
|
||||
static const uint8_t expsubIBM_930[] = {
|
||||
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b };
|
||||
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
|
||||
|
||||
static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
|
||||
static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
|
||||
static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
|
||||
|
||||
static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4};
|
||||
static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3};
|
||||
static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3};
|
||||
static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
|
||||
static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
|
||||
static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
|
||||
|
||||
static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7};
|
||||
static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6};
|
||||
static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7};
|
||||
static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
|
||||
static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 };
|
||||
static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 };
|
||||
|
||||
gInBufferSize = inputsize;
|
||||
gOutBufferSize = outputsize;
|
||||
|
@ -1823,11 +1823,11 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
|
|||
0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
|
||||
|
||||
const uint8_t expsubwvalIBM_930[] = {
|
||||
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b };
|
||||
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
|
||||
|
||||
int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4};
|
||||
int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3};
|
||||
int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3}; /* last item: 3,3,3 because there's a shift plus a doublebyter .. */
|
||||
int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
|
||||
int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
|
||||
int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
|
||||
|
||||
gInBufferSize = inputsize;
|
||||
gOutBufferSize = outputsize;
|
||||
|
|
|
@ -727,11 +727,11 @@ static void TestConvertFallBackWithBufferSizes(int32_t outsize, int32_t insize )
|
|||
/* Test for jitterbug 509 EBCDIC_STATEFUL Converters*/
|
||||
{
|
||||
const UChar unicodeInput[] = {0x00AF, 0x2013, 0x2223, 0x004C, 0x5F5D, 0xFF5E };
|
||||
const uint8_t expectedtest1[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1};
|
||||
int32_t totest1Offs[] = {0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5};
|
||||
const uint8_t test1input[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1};
|
||||
const uint8_t expectedtest1[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1,0x0f };
|
||||
int32_t totest1Offs[] = {0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5 };
|
||||
const uint8_t test1input[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1 };
|
||||
const UChar expectedUnicode[] = {0x203e, 0x2014, 0xff5c, 0x004c, 0x5f5e, 0x223c };
|
||||
int32_t fromtest1Offs[] = {1, 3, 5, 8, 10, 12};
|
||||
int32_t fromtest1Offs[] = {1, 3, 5, 8, 10, 12 };
|
||||
/*from Unicode*/
|
||||
if(!testConvertFromUnicode(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
|
||||
expectedtest1, sizeof(expectedtest1), "ibm-1371", TRUE, totest1Offs ))
|
||||
|
|
Loading…
Add table
Reference in a new issue