ICU-2856 make EBCDIC_STATEFUL always emit SI at the end of a stream if the last character was DBCS

X-SVN-Rev: 11788
This commit is contained in:
Markus Scherer 2003-05-02 22:38:41 +00:00
parent 3a31e9598d
commit 0fcfe71911
4 changed files with 65 additions and 38 deletions

View file

@ -2141,7 +2141,7 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
UChar32 c;
int32_t sourceIndex, nextSourceIndex;
int32_t prevSourceIndex, sourceIndex, nextSourceIndex;
UConverterCallbackReason reason;
uint32_t stage2Entry;
@ -2184,6 +2184,7 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
prevLength=cnv->fromUnicodeStatus;
/* sourceIndex=-1 if the current character began in the previous buffer */
prevSourceIndex=-1;
sourceIndex= c==0 ? 0 : -1;
nextSourceIndex=0;
@ -2525,7 +2526,10 @@ getTrail:
/* normal end of conversion: prepare for a new character */
c=0;
sourceIndex=nextSourceIndex;
if(offsets!=NULL) {
prevSourceIndex=sourceIndex;
sourceIndex=nextSourceIndex;
}
continue;
/*
@ -2551,6 +2555,7 @@ callback:
* The above branch for MBCS_OUTPUT_2_SISO has saved the previous state already.
* See comments there.
*/
prevSourceIndex=sourceIndex;
/* call the callback function */
fromUCallback(cnv, cnv->fromUContext, pArgs, c, reason, pErrorCode);
@ -2596,20 +2601,42 @@ callback:
}
}
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(c!=0 && U_SUCCESS(*pErrorCode)) {
if(pArgs->flush && source>=sourceLimit && U_SUCCESS(*pErrorCode)) {
/* end of input stream */
if(c!=0) {
/* a Unicode code point remains incomplete (only a first surrogate) */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
/* the following may change with Jitterbug 2449: would prepare for callback instead of resetting */
c=0;
prevLength=1;
} else if(outputType==MBCS_OUTPUT_2_SISO && prevLength==2) {
/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
if(targetCapacity>0) {
*target++=(uint8_t)UCNV_SI;
if(offsets!=NULL) {
/* set the last source character's index (sourceIndex points at sourceLimit now) */
*offsets++=prevSourceIndex;
}
} else {
/* target is full */
cnv->charErrorBuffer[0]=(char)UCNV_SI;
cnv->charErrorBufferLength=1;
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
prevLength=1; /* we switched into SBCS */
}
/* reset the state for the next conversion */
if(U_SUCCESS(*pErrorCode)) {
c=0;
prevLength=1;
}
cnv->fromUSurrogateLead=0;
cnv->fromUnicodeStatus=1;
} else {
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead=(UChar)c;
cnv->fromUnicodeStatus=prevLength;
}
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead=(UChar)c;
cnv->fromUnicodeStatus=prevLength;
/* write back the updated pointers */
pArgs->source=source;
pArgs->target=(char *)target;

View file

@ -449,13 +449,13 @@ static void TestConvert()
else {
for(i=0; i<targetCapacity; i++){
if(target[i] != expectedTarget[i]){
log_data_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]);
log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]);
}
}
i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err);
if(U_FAILURE(err) || i!=6){
log_data_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 6\n",
if(U_FAILURE(err) || i!=7){
log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
u_errorName(err), i);
}
@ -463,14 +463,14 @@ static void TestConvert()
err=U_ZERO_ERROR;
i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err);
if(i !=0){
log_data_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
}
err=U_ILLEGAL_ARGUMENT_ERROR;
sourceLimit=sizeof(source)/sizeof(source[0]);
i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
if(i !=0 ){
log_data_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
}
err=U_ZERO_ERROR;
@ -478,7 +478,7 @@ static void TestConvert()
targetLimit=0;
i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){
log_data_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
}
err=U_ZERO_ERROR;
free(target);

View file

@ -153,7 +153,7 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
static const uint8_t expskipIBM_930[] = {
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b };
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
gInBufferSize = inputsize;
gOutBufferSize = outputsize;
@ -165,9 +165,9 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4};
static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3};
static const int32_t toIBM930Offsskip [] = { 0, 0, 0, 1, 1, 3, 3};
static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
static const int32_t toIBM930Offsskip [] = { 0, 0, 0, 1, 1, 3, 3, 3 };
if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expskipIBM_949, sizeof(expskipIBM_949), "ibm-949",
@ -190,8 +190,8 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
{
static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b };
static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8 };
static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
/* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR,
@ -1338,19 +1338,19 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
static const uint8_t expsubIBM_930[] = {
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b };
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4};
static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3};
static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3};
static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7};
static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6};
static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7};
static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 };
static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 };
gInBufferSize = inputsize;
gOutBufferSize = outputsize;
@ -1823,11 +1823,11 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
const uint8_t expsubwvalIBM_930[] = {
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b };
0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4};
int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3};
int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3}; /* last item: 3,3,3 because there's a shift plus a doublebyter .. */
int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
gInBufferSize = inputsize;
gOutBufferSize = outputsize;

View file

@ -727,11 +727,11 @@ static void TestConvertFallBackWithBufferSizes(int32_t outsize, int32_t insize )
/* Test for jitterbug 509 EBCDIC_STATEFUL Converters*/
{
const UChar unicodeInput[] = {0x00AF, 0x2013, 0x2223, 0x004C, 0x5F5D, 0xFF5E };
const uint8_t expectedtest1[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1};
int32_t totest1Offs[] = {0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5};
const uint8_t test1input[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1};
const uint8_t expectedtest1[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1,0x0f };
int32_t totest1Offs[] = {0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5 };
const uint8_t test1input[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1 };
const UChar expectedUnicode[] = {0x203e, 0x2014, 0xff5c, 0x004c, 0x5f5e, 0x223c };
int32_t fromtest1Offs[] = {1, 3, 5, 8, 10, 12};
int32_t fromtest1Offs[] = {1, 3, 5, 8, 10, 12 };
/*from Unicode*/
if(!testConvertFromUnicode(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
expectedtest1, sizeof(expectedtest1), "ibm-1371", TRUE, totest1Offs ))