mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 22:15:31 +00:00
ICU-2449 refactor conversion - call fromUnicode callbacks only from ucnv.c framework
X-SVN-Rev: 12667
This commit is contained in:
parent
cab6c351eb
commit
8ab9f23f3a
13 changed files with 319 additions and 741 deletions
|
@ -611,7 +611,7 @@ static void _reset(UConverter *converter, UConverterResetChoice choice,
|
|||
}
|
||||
if(choice!=UCNV_RESET_TO_UNICODE) {
|
||||
converter->fromUnicodeStatus = 0;
|
||||
converter->fromUSurrogateLead = 0;
|
||||
converter->fromUChar32 = 0;
|
||||
converter->invalidUCharLength = converter->charErrorBufferLength = 0;
|
||||
}
|
||||
|
||||
|
@ -864,7 +864,7 @@ _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
|
|||
converterSawEndOfInput=
|
||||
(UBool)(U_SUCCESS(*err) &&
|
||||
pArgs->flush && pArgs->source==pArgs->sourceLimit &&
|
||||
cnv->fromUSurrogateLead==0);
|
||||
cnv->fromUChar32==0);
|
||||
|
||||
/* no callback called yet for this iteration */
|
||||
calledCallback=FALSE;
|
||||
|
@ -911,13 +911,11 @@ _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
|
|||
* (continue converting by breaking out of only the inner loop)
|
||||
*/
|
||||
break;
|
||||
} else if(pArgs->flush && cnv->fromUSurrogateLead!=0) {
|
||||
} else if(pArgs->flush && cnv->fromUChar32!=0) {
|
||||
/*
|
||||
* the entire input stream is consumed
|
||||
* and there is a partial, truncated input sequence left
|
||||
*/
|
||||
cnv->invalidUCharBuffer[0]=(UChar)cnv->fromUSurrogateLead;
|
||||
cnv->invalidUCharLength=1;
|
||||
|
||||
/* inject an error and continue with callback handling */
|
||||
*err=U_TRUNCATED_CHAR_FOUND;
|
||||
|
@ -970,20 +968,15 @@ _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
|
|||
/* callback handling */
|
||||
{
|
||||
UChar32 codePoint;
|
||||
int32_t i;
|
||||
|
||||
/* get the first code point */
|
||||
i=0;
|
||||
errorInputLength=cnv->invalidUCharLength;
|
||||
if(errorInputLength>0) {
|
||||
U16_NEXT(cnv->invalidUCharBuffer, i, errorInputLength, codePoint);
|
||||
} else {
|
||||
/* should never occur because errors should be caused by some input */
|
||||
codePoint=U_SENTINEL;
|
||||
}
|
||||
/* get and write the code point */
|
||||
codePoint=cnv->fromUChar32;
|
||||
errorInputLength=0;
|
||||
U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
|
||||
cnv->invalidUCharLength=(int8_t)errorInputLength;
|
||||
|
||||
/* set the converter state to deal with the next character */
|
||||
cnv->fromUSurrogateLead=0;
|
||||
cnv->fromUChar32=0;
|
||||
|
||||
/* call the callback function */
|
||||
cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
|
||||
|
|
|
@ -1279,70 +1279,6 @@ getEndOfBuffer_2022(const char** source,
|
|||
return sourceLimit;
|
||||
}
|
||||
|
||||
/*
|
||||
* From Unicode Callback helper function
|
||||
*/
|
||||
static void
|
||||
fromUnicodeCallback(UConverterFromUnicodeArgs* args,const UChar32 sourceChar,const UChar** pSource,
|
||||
unsigned char** pTarget,int32_t** pOffsets,UConverterCallbackReason reason, UErrorCode* err){
|
||||
|
||||
/*variables for callback */
|
||||
const UChar* saveSource =NULL;
|
||||
char* saveTarget =NULL;
|
||||
int32_t* saveOffsets =NULL;
|
||||
int currentOffset =0;
|
||||
int saveIndex =0;
|
||||
int32_t* offsets = *pOffsets;
|
||||
const UChar* source = *pSource;
|
||||
unsigned char* target = *pTarget;
|
||||
|
||||
args->converter->invalidUCharLength = 0;
|
||||
|
||||
if(sourceChar>0xffff){
|
||||
args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(uint16_t)(((sourceChar)>>10)+0xd7c0);
|
||||
args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(uint16_t)(((sourceChar)&0x3ff)|0xdc00);
|
||||
}
|
||||
else{
|
||||
args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(UChar)sourceChar;
|
||||
}
|
||||
if(offsets)
|
||||
currentOffset = *(offsets-1)+1;
|
||||
|
||||
saveSource = args->source;
|
||||
saveTarget = args->target;
|
||||
saveOffsets = args->offsets;
|
||||
args->target = (char*)target;
|
||||
args->source = source;
|
||||
args->offsets = offsets;
|
||||
|
||||
/*copies current values for the ErrorFunctor to update */
|
||||
/*Calls the ErrorFunctor */
|
||||
args->converter->fromUCharErrorBehaviour ( args->converter->fromUContext,
|
||||
args,
|
||||
args->converter->invalidUCharBuffer,
|
||||
args->converter->invalidUCharLength,
|
||||
(UChar32) (sourceChar),
|
||||
reason,
|
||||
err);
|
||||
|
||||
saveIndex = args->target - (char*)target;
|
||||
if(args->offsets){
|
||||
args->offsets = saveOffsets;
|
||||
while(saveIndex-->0){
|
||||
*offsets = currentOffset;
|
||||
offsets++;
|
||||
}
|
||||
}
|
||||
target = (unsigned char*)args->target;
|
||||
*pTarget=target;
|
||||
*pOffsets=offsets;
|
||||
args->source=saveSource;
|
||||
args->target=saveTarget;
|
||||
args->offsets=saveOffsets;
|
||||
args->converter->fromUSurrogateLead=0x00;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* To Unicode Callback helper function
|
||||
*/
|
||||
|
@ -1528,7 +1464,6 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
|||
UChar32 sourceChar =0x0000;
|
||||
const char* escSeq = NULL;
|
||||
int len =0; /*length of escSeq chars*/
|
||||
UConverterCallbackReason reason;
|
||||
UConverterSharedData* sharedData=NULL;
|
||||
UBool useFallback;
|
||||
|
||||
|
@ -1556,7 +1491,7 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
|||
currentType = &converterData->currentType;
|
||||
|
||||
/* check if the last codepoint of previous buffer was a lead surrogate*/
|
||||
if(args->converter->fromUSurrogateLead!=0 && target< targetLimit) {
|
||||
if((sourceChar = args->converter->fromUChar32)!=0 && target< targetLimit) {
|
||||
goto getTrail;
|
||||
}
|
||||
|
||||
|
@ -1700,17 +1635,13 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
|||
}
|
||||
}
|
||||
else{
|
||||
|
||||
/* if we cannot find the character after checking all codepages
|
||||
* then this is an error
|
||||
*/
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
|
||||
/*check if the char is a First surrogate*/
|
||||
if(UTF_IS_SURROGATE(sourceChar)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
|
||||
args->converter->fromUSurrogateLead=(UChar)sourceChar;
|
||||
getTrail:
|
||||
/*look ahead to find the trail surrogate*/
|
||||
if(source < sourceLimit) {
|
||||
|
@ -1718,36 +1649,31 @@ getTrail:
|
|||
UChar trail=(UChar) *source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
source++;
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
|
||||
args->converter->fromUSurrogateLead=0x00;
|
||||
reason =UCNV_UNASSIGNED;
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
/* convert this surrogate code point */
|
||||
/* exit this condition tree */
|
||||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*err=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
*err = U_ZERO_ERROR;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*err=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
} else {
|
||||
/* callback(unassigned) for a BMP code point */
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
/* Call the callback function*/
|
||||
fromUnicodeCallback(args,sourceChar,&source,&target,&offsets,reason,err);
|
||||
initIterState = *currentState;
|
||||
if (U_FAILURE (*err)){
|
||||
break;
|
||||
}
|
||||
|
||||
args->converter->fromUChar32=sourceChar;
|
||||
break;
|
||||
}
|
||||
} /* end if(myTargetIndex<myTargetLength) */
|
||||
else{
|
||||
|
@ -2045,7 +1971,6 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
|||
UBool isTargetByteDBCS;
|
||||
UBool oldIsTargetByteDBCS;
|
||||
UConverterDataISO2022 *converterData;
|
||||
UConverterCallbackReason reason;
|
||||
UConverterSharedData* sharedData;
|
||||
UBool useFallback;
|
||||
int32_t length =0;
|
||||
|
@ -2070,7 +1995,7 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
|||
}
|
||||
|
||||
isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus;
|
||||
if(args->converter->fromUSurrogateLead!=0 && target <targetLimit) {
|
||||
if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
|
||||
goto getTrail;
|
||||
}
|
||||
while(source < sourceLimit){
|
||||
|
@ -2140,13 +2065,10 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
|||
/* oops.. the code point is unassingned
|
||||
* set the error and reason
|
||||
*/
|
||||
reason =UCNV_UNASSIGNED;
|
||||
*err =U_INVALID_CHAR_FOUND;
|
||||
|
||||
/*check if the char is a First surrogate*/
|
||||
if(UTF_IS_SURROGATE(sourceChar)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
|
||||
args->converter->fromUSurrogateLead=(UChar)sourceChar;
|
||||
getTrail:
|
||||
/*look ahead to find the trail surrogate*/
|
||||
if(source < sourceLimit) {
|
||||
|
@ -2154,38 +2076,32 @@ getTrail:
|
|||
UChar trail=(UChar) *source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
source++;
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
|
||||
args->converter->fromUSurrogateLead=0x00;
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
reason =UCNV_UNASSIGNED;
|
||||
/* convert this surrogate code point */
|
||||
/* exit this condition tree */
|
||||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*err=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
*err = U_ZERO_ERROR;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*err=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
} else {
|
||||
/* callback(unassigned) for a BMP code point */
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
args->converter->fromUnicodeStatus = (int32_t)isTargetByteDBCS;
|
||||
/* Call the callback function*/
|
||||
fromUnicodeCallback(args,sourceChar,&source,&target,&offsets,reason,err);
|
||||
isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
|
||||
|
||||
if (U_FAILURE (*err)){
|
||||
break;
|
||||
}
|
||||
args->converter->fromUChar32=sourceChar;
|
||||
args->converter->fromUnicodeStatus = (int32_t)isTargetByteDBCS;
|
||||
break;
|
||||
}
|
||||
} /* end if(myTargetIndex<myTargetLength) */
|
||||
else{
|
||||
|
@ -2542,7 +2458,6 @@ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
|||
int len =0; /*length of escSeq chars*/
|
||||
uint32_t targetValue=0;
|
||||
uint8_t planeVal=0;
|
||||
UConverterCallbackReason reason;
|
||||
UConverterSharedData* sharedData=NULL;
|
||||
UBool useFallback;
|
||||
|
||||
|
@ -2575,7 +2490,7 @@ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
|||
sharedData = (*currentConverter)->sharedData;
|
||||
|
||||
/* check if the last codepoint of previous buffer was a lead surrogate*/
|
||||
if(args->converter->fromUSurrogateLead!=0 && target< targetLimit) {
|
||||
if((sourceChar = args->converter->fromUChar32)!=0 && target< targetLimit) {
|
||||
goto getTrail;
|
||||
}
|
||||
|
||||
|
@ -2591,7 +2506,6 @@ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
|
|||
/*check if the char is a First surrogate*/
|
||||
if(UTF_IS_SURROGATE(sourceChar)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
|
||||
args->converter->fromUSurrogateLead=(UChar)sourceChar;
|
||||
getTrail:
|
||||
/*look ahead to find the trail surrogate*/
|
||||
if(source < sourceLimit) {
|
||||
|
@ -2599,28 +2513,28 @@ getTrail:
|
|||
UChar trail=(UChar) *source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
source++;
|
||||
/*(((args->converter->fromUSurrogateLead)<<10L)+(trail)-((0xd800<<10L)+0xdc00-0x10000))*/
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
|
||||
args->converter->fromUSurrogateLead=0x00;
|
||||
/* convert this surrogate code point */
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
|
||||
args->converter->fromUChar32=0x00;
|
||||
/* convert this supplementary code point */
|
||||
/* exit this condition tree */
|
||||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*err=U_ILLEGAL_CHAR_FOUND;
|
||||
goto callback;
|
||||
args->converter->fromUChar32=sourceChar;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
args->converter->fromUChar32=sourceChar;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*err=U_ILLEGAL_CHAR_FOUND;
|
||||
goto callback;
|
||||
args->converter->fromUChar32=sourceChar;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2755,20 +2669,12 @@ getTrail:
|
|||
|
||||
}
|
||||
else{
|
||||
|
||||
/* if we cannot find the character after checking all codepages
|
||||
* then this is an error
|
||||
*/
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
callback:
|
||||
|
||||
fromUnicodeCallback(args,sourceChar,&source,&target,&offsets,reason,err);
|
||||
initIterState = *currentState;
|
||||
|
||||
if (U_FAILURE (*err)){
|
||||
break;
|
||||
}
|
||||
args->converter->fromUChar32=sourceChar;
|
||||
break;
|
||||
}
|
||||
} /* end if(myTargetIndex<myTargetLength) */
|
||||
else{
|
||||
|
|
|
@ -148,7 +148,17 @@ struct UConverter {
|
|||
uint32_t toUnicodeStatus; /* Used to internalize stream status information */
|
||||
int32_t mode;
|
||||
uint32_t fromUnicodeStatus;
|
||||
UChar fromUSurrogateLead; /* similar to toUBytes; keeps the lead surrogate of the current character */
|
||||
|
||||
/*
|
||||
* More fromUnicode() status. Serves 3 purposes:
|
||||
* - keeps a lead surrogate between buffers (similar to toUBytes[])
|
||||
* - keeps a lead surrogate at the end of the stream,
|
||||
* which the framework handles as truncated input
|
||||
* - if the fromUnicode() implementation returns to the framework
|
||||
* (ucnv.c ucnv_fromUnicode()), then the framework calls the callback
|
||||
* for this code point
|
||||
*/
|
||||
UChar32 fromUChar32;
|
||||
|
||||
int8_t subCharLen; /* length of the codepage specific character sequence */
|
||||
int8_t invalidCharLength;
|
||||
|
|
|
@ -116,3 +116,43 @@ ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
|
|||
uset_addRange(set, 0, 0xd7ff);
|
||||
uset_addRange(set, 0xe000, 0x10ffff);
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_fromUWriteBytes(UConverter *cnv,
|
||||
const char *bytes, int32_t length,
|
||||
char **target, const char *targetLimit,
|
||||
int32_t **offsets,
|
||||
int32_t sourceIndex,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *t=*target;
|
||||
int32_t *o;
|
||||
|
||||
/* write bytes */
|
||||
if(offsets==NULL || (o=*offsets)==NULL) {
|
||||
while(length>0 && t<targetLimit) {
|
||||
*t++=*bytes++;
|
||||
--length;
|
||||
}
|
||||
} else {
|
||||
/* output with offsets */
|
||||
while(length>0 && t<targetLimit) {
|
||||
*t++=*bytes++;
|
||||
*o++=sourceIndex;
|
||||
--length;
|
||||
}
|
||||
*offsets=o;
|
||||
}
|
||||
*target=t;
|
||||
|
||||
/* write overflow */
|
||||
if(length>0) {
|
||||
if(cnv!=NULL) {
|
||||
t=(char *)cnv->charErrorBuffer;
|
||||
cnv->charErrorBufferLength=(int8_t)length;
|
||||
do {
|
||||
*t++=(uint8_t)*bytes++;
|
||||
} while(--length>0);
|
||||
}
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -229,4 +229,12 @@ ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
|
|||
UConverterUnicodeSet which,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_fromUWriteBytes(UConverter *cnv,
|
||||
const char *bytes, int32_t length,
|
||||
char **target, const char *targetLimit,
|
||||
int32_t **offsets,
|
||||
int32_t sourceIndex,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif /* UCNV_CNV */
|
||||
|
|
|
@ -311,10 +311,10 @@ T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
|
|||
|
||||
temp[0] = 0;
|
||||
|
||||
if (args->converter->fromUSurrogateLead)
|
||||
if (args->converter->fromUChar32)
|
||||
{
|
||||
ch = args->converter->fromUSurrogateLead;
|
||||
args->converter->fromUSurrogateLead = 0;
|
||||
ch = args->converter->fromUChar32;
|
||||
args->converter->fromUChar32 = 0;
|
||||
goto lowsurogate;
|
||||
}
|
||||
|
||||
|
@ -346,7 +346,7 @@ lowsurogate:
|
|||
else if (!args->flush)
|
||||
{
|
||||
/* ran out of source */
|
||||
args->converter->fromUSurrogateLead = (UChar)ch;
|
||||
args->converter->fromUChar32 = ch;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
@ -396,10 +396,10 @@ T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
|
|||
|
||||
temp[0] = 0;
|
||||
|
||||
if (args->converter->fromUSurrogateLead)
|
||||
if (args->converter->fromUChar32)
|
||||
{
|
||||
ch = args->converter->fromUSurrogateLead;
|
||||
args->converter->fromUSurrogateLead = 0;
|
||||
ch = args->converter->fromUChar32;
|
||||
args->converter->fromUChar32 = 0;
|
||||
goto lowsurogate;
|
||||
}
|
||||
|
||||
|
@ -423,7 +423,7 @@ lowsurogate:
|
|||
else if (!args->flush)
|
||||
{
|
||||
/* ran out of source */
|
||||
args->converter->fromUSurrogateLead = (UChar)ch;
|
||||
args->converter->fromUChar32 = ch;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
@ -790,10 +790,10 @@ T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
|
|||
|
||||
temp[3] = 0;
|
||||
|
||||
if (args->converter->fromUSurrogateLead)
|
||||
if (args->converter->fromUChar32)
|
||||
{
|
||||
ch = args->converter->fromUSurrogateLead;
|
||||
args->converter->fromUSurrogateLead = 0;
|
||||
ch = args->converter->fromUChar32;
|
||||
args->converter->fromUChar32 = 0;
|
||||
goto lowsurogate;
|
||||
}
|
||||
|
||||
|
@ -817,7 +817,7 @@ lowsurogate:
|
|||
else if (!args->flush)
|
||||
{
|
||||
/* ran out of source */
|
||||
args->converter->fromUSurrogateLead = (UChar)ch;
|
||||
args->converter->fromUChar32 = ch;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
@ -867,10 +867,10 @@ T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
|
|||
|
||||
temp[3] = 0;
|
||||
|
||||
if (args->converter->fromUSurrogateLead)
|
||||
if (args->converter->fromUChar32)
|
||||
{
|
||||
ch = args->converter->fromUSurrogateLead;
|
||||
args->converter->fromUSurrogateLead = 0;
|
||||
ch = args->converter->fromUChar32;
|
||||
args->converter->fromUChar32 = 0;
|
||||
goto lowsurogate;
|
||||
}
|
||||
|
||||
|
@ -894,7 +894,7 @@ lowsurogate:
|
|||
else if (!args->flush)
|
||||
{
|
||||
/* ran out of source */
|
||||
args->converter->fromUSurrogateLead = (UChar)ch;
|
||||
args->converter->fromUChar32 = ch;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -443,14 +443,14 @@ U_CFUNC void T_UConverter_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
|
|||
const UChar *sourceLimit = args->sourceLimit;
|
||||
const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
|
||||
UBool isCESU8 = (UBool)(args->converter->sharedData == &_CESU8Data);
|
||||
uint32_t ch, ch2;
|
||||
UChar32 ch, ch2;
|
||||
int16_t indexToWrite;
|
||||
char temp[4];
|
||||
|
||||
if (cnv->fromUSurrogateLead && myTarget < targetLimit)
|
||||
if (cnv->fromUChar32 && myTarget < targetLimit)
|
||||
{
|
||||
ch = cnv->fromUSurrogateLead;
|
||||
cnv->fromUSurrogateLead = 0;
|
||||
ch = cnv->fromUChar32;
|
||||
cnv->fromUChar32 = 0;
|
||||
goto lowsurrogate;
|
||||
}
|
||||
|
||||
|
@ -494,63 +494,21 @@ lowsurrogate:
|
|||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
ch2 = ch;
|
||||
cnv->fromUChar32 = ch;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
cnv->fromUSurrogateLead = (UChar)ch;
|
||||
cnv->fromUChar32 = ch;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
ch2 = ch;
|
||||
}
|
||||
|
||||
if(ch2 != 0) {
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
cnv->fromUChar32 = ch;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
|
||||
/* update the arguments structure */
|
||||
args->source=mySource;
|
||||
args->target=(char *)myTarget;
|
||||
|
||||
/* write the code point as code units */
|
||||
cnv->invalidUCharBuffer[0] = (UChar)ch2;
|
||||
cnv->invalidUCharLength = 1;
|
||||
|
||||
/* call the callback function */
|
||||
cnv->fromUCharErrorBehaviour(cnv->fromUContext, args, cnv->invalidUCharBuffer, 1, ch2, UCNV_ILLEGAL, err);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
ch = cnv->fromUSurrogateLead;
|
||||
cnv->fromUSurrogateLead = 0;
|
||||
|
||||
myTarget=(uint8_t *)args->target;
|
||||
mySource=args->source;
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*err==U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
} else if(U_FAILURE(*err)) {
|
||||
/* break on error */
|
||||
break;
|
||||
} else if(cnv->charErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*err=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
/*
|
||||
* } else if(ch != 0) { ...
|
||||
* ### TODO 2002jul01 markus: It looks like this code (from ucnvmbcs.c)
|
||||
* does not handle the case where the callback leaves ch=fromUSurrogateLead!=0 .
|
||||
* We would have to check myTarget<targetLimit and goto lowsurrogate?!
|
||||
*/
|
||||
}
|
||||
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -602,15 +560,15 @@ U_CFUNC void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeA
|
|||
const UChar *sourceLimit = args->sourceLimit;
|
||||
const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
|
||||
UBool isCESU8 = (UBool)(args->converter->sharedData == &_CESU8Data);
|
||||
uint32_t ch, ch2;
|
||||
UChar32 ch, ch2;
|
||||
int32_t offsetNum, nextSourceIndex;
|
||||
int16_t indexToWrite;
|
||||
char temp[4];
|
||||
|
||||
if (cnv->fromUSurrogateLead && myTarget < targetLimit)
|
||||
if (cnv->fromUChar32 && myTarget < targetLimit)
|
||||
{
|
||||
ch = cnv->fromUSurrogateLead;
|
||||
cnv->fromUSurrogateLead = 0;
|
||||
ch = cnv->fromUChar32;
|
||||
cnv->fromUChar32 = 0;
|
||||
offsetNum = -1;
|
||||
nextSourceIndex = 0;
|
||||
goto lowsurrogate;
|
||||
|
@ -664,69 +622,21 @@ lowsurrogate:
|
|||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
ch2 = ch;
|
||||
cnv->fromUChar32 = ch;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
cnv->fromUSurrogateLead = (UChar)ch;
|
||||
cnv->fromUChar32 = ch;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
ch2 = ch;
|
||||
}
|
||||
|
||||
if(ch2 != 0) {
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
cnv->fromUChar32 = ch;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
|
||||
/* update the arguments structure */
|
||||
args->source=mySource;
|
||||
args->target=(char *)myTarget;
|
||||
args->offsets=myOffsets;
|
||||
|
||||
/* write the code point as code units */
|
||||
cnv->invalidUCharBuffer[0] = (UChar)ch2;
|
||||
cnv->invalidUCharLength = 1;
|
||||
|
||||
/* call the callback function */
|
||||
cnv->fromUCharErrorBehaviour(cnv->fromUContext, args, cnv->invalidUCharBuffer, 1, ch2, UCNV_ILLEGAL, err);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
ch = cnv->fromUSurrogateLead;
|
||||
cnv->fromUSurrogateLead = 0;
|
||||
|
||||
/* update target and deal with offsets if necessary */
|
||||
myOffsets=ucnv_updateCallbackOffsets(myOffsets, ((uint8_t *)args->target)-myTarget, offsetNum);
|
||||
myTarget=(uint8_t *)args->target;
|
||||
|
||||
/* update the source pointer and index */
|
||||
offsetNum=nextSourceIndex+(args->source-mySource);
|
||||
mySource=args->source;
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*err==U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
} else if(U_FAILURE(*err)) {
|
||||
/* break on error */
|
||||
break;
|
||||
} else if(cnv->charErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*err=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
/*
|
||||
* } else if(ch != 0) { ...
|
||||
* ### TODO 2002jul01 markus: It looks like this code (from ucnvmbcs.c)
|
||||
* does not handle the case where the callback leaves ch=fromUSurrogateLead!=0 .
|
||||
* We would have to check myTarget<targetLimit and goto lowsurrogate?!
|
||||
*/
|
||||
}
|
||||
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -402,7 +402,7 @@ U_ALIGN_CODE(16)
|
|||
offsets=pArgs->offsets;
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
c=cnv->fromUChar32;
|
||||
prev=(int32_t)cnv->fromUnicodeStatus;
|
||||
if(prev==0) {
|
||||
prev=BOCU1_ASCII_PREV;
|
||||
|
@ -667,7 +667,7 @@ getTrail:
|
|||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
|
||||
cnv->fromUChar32= c<0 ? -c : 0;
|
||||
cnv->fromUnicodeStatus=(uint32_t)prev;
|
||||
|
||||
/* write back the updated pointers */
|
||||
|
@ -701,7 +701,7 @@ _Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
|
|||
targetCapacity=pArgs->targetLimit-pArgs->target;
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
c=cnv->fromUChar32;
|
||||
prev=(int32_t)cnv->fromUnicodeStatus;
|
||||
if(prev==0) {
|
||||
prev=BOCU1_ASCII_PREV;
|
||||
|
@ -888,7 +888,7 @@ getTrail:
|
|||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
|
||||
cnv->fromUChar32= c<0 ? -c : 0;
|
||||
cnv->fromUnicodeStatus=(uint32_t)prev;
|
||||
|
||||
/* write back the updated pointers */
|
||||
|
|
|
@ -69,7 +69,7 @@ _HZOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, U
|
|||
cnv->toUnicodeStatus = 0;
|
||||
cnv->fromUnicodeStatus= 0;
|
||||
cnv->mode=0;
|
||||
cnv->fromUSurrogateLead=0x0000;
|
||||
cnv->fromUChar32=0x0000;
|
||||
cnv->extraInfo = uprv_malloc (sizeof (UConverterDataHZ));
|
||||
if(cnv->extraInfo != NULL){
|
||||
((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode);
|
||||
|
@ -108,7 +108,7 @@ _HZReset(UConverter *cnv, UConverterResetChoice choice){
|
|||
}
|
||||
if(choice!=UCNV_RESET_TO_UNICODE) {
|
||||
cnv->fromUnicodeStatus= 0;
|
||||
cnv->fromUSurrogateLead=0x0000;
|
||||
cnv->fromUChar32=0x0000;
|
||||
if(cnv->extraInfo != NULL){
|
||||
((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
|
||||
((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
|
||||
|
@ -347,7 +347,6 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
|
|||
UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
|
||||
UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
|
||||
UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
|
||||
UConverterCallbackReason reason;
|
||||
UBool isEscapeAppended =FALSE;
|
||||
int len =0;
|
||||
const char* escSeq=NULL;
|
||||
|
@ -356,7 +355,7 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
|
|||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < targetLength) {
|
||||
if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
|
||||
goto getTrail;
|
||||
}
|
||||
/*writing the char to the output stream */
|
||||
|
@ -440,16 +439,12 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
|
|||
|
||||
}
|
||||
else{
|
||||
/* oops.. the code point is unassingned
|
||||
* set the error and reason
|
||||
*/
|
||||
reason =UCNV_UNASSIGNED;
|
||||
*err =U_INVALID_CHAR_FOUND;
|
||||
/* oops.. the code point is unassigned */
|
||||
/*Handle surrogates */
|
||||
/*check if the char is a First surrogate*/
|
||||
if(UTF_IS_SURROGATE(mySourceChar)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
|
||||
args->converter->fromUSurrogateLead=(UChar)mySourceChar;
|
||||
args->converter->fromUChar32=mySourceChar;
|
||||
getTrail:
|
||||
/*look ahead to find the trail surrogate*/
|
||||
if(mySourceIndex < mySourceLength) {
|
||||
|
@ -457,87 +452,32 @@ getTrail:
|
|||
UChar trail=(UChar) args->source[mySourceIndex];
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
++mySourceIndex;
|
||||
mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
|
||||
args->converter->fromUSurrogateLead=0x00;
|
||||
mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUChar32, trail);
|
||||
args->converter->fromUChar32=0x00;
|
||||
/* there are no surrogates in GB2312*/
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
reason=UCNV_UNASSIGNED;
|
||||
/* exit this condition tree */
|
||||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*err=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
*err = U_ZERO_ERROR;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*err=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
} else {
|
||||
/* callback(unassigned) for a BMP code point */
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
|
||||
{
|
||||
int32_t saveIndex=0;
|
||||
int32_t currentOffset = (args->offsets) ? *(offsets-1)+1:0;
|
||||
char * saveTarget = args->target;
|
||||
const UChar* saveSource = args->source;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
|
||||
args->converter->invalidUCharLength = 0;
|
||||
|
||||
if(mySourceChar>0xffff){
|
||||
args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(uint16_t)(((mySourceChar)>>10)+0xd7c0);
|
||||
args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(uint16_t)(((mySourceChar)&0x3ff)|0xdc00);
|
||||
}
|
||||
else{
|
||||
args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(UChar)mySourceChar;
|
||||
}
|
||||
|
||||
myConverterData->isTargetUCharDBCS = (UBool)isTargetUCharDBCS;
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
args->offsets = args->offsets?offsets:0;
|
||||
|
||||
|
||||
saveIndex = myTargetIndex;
|
||||
/*copies current values for the ErrorFunctor to update */
|
||||
/*Calls the ErrorFunctor */
|
||||
args->converter->fromUCharErrorBehaviour ( args->converter->fromUContext,
|
||||
args,
|
||||
args->converter->invalidUCharBuffer,
|
||||
args->converter->invalidUCharLength,
|
||||
(UChar32) (mySourceChar),
|
||||
reason,
|
||||
err);
|
||||
/*Update the local Indexes so that the conversion
|
||||
*can restart at the right points
|
||||
*/
|
||||
myTargetIndex = (int32_t)(args->target - (char*)myTarget);
|
||||
mySourceIndex = (int32_t)(args->source - mySource);
|
||||
args->offsets = saveOffsets;
|
||||
saveIndex = myTargetIndex - saveIndex;
|
||||
if(args->offsets){
|
||||
args->offsets = saveOffsets;
|
||||
while(saveIndex-->0){
|
||||
*offsets = currentOffset;
|
||||
offsets++;
|
||||
}
|
||||
}
|
||||
isTargetUCharDBCS=myConverterData->isTargetUCharDBCS;
|
||||
args->source = saveSource;
|
||||
args->target = saveTarget;
|
||||
args->offsets = saveOffsets;
|
||||
args->converter->fromUSurrogateLead=0x00;
|
||||
if (U_FAILURE (*err))
|
||||
break;
|
||||
|
||||
}
|
||||
args->converter->fromUChar32=mySourceChar;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else{
|
||||
|
|
|
@ -116,7 +116,7 @@ typedef struct{
|
|||
MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */
|
||||
MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */
|
||||
MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */
|
||||
UBool isFirstBuffer;
|
||||
UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */
|
||||
char name[30];
|
||||
}UConverterDataISCII;
|
||||
|
||||
|
@ -197,13 +197,12 @@ _ISCIIReset(UConverter *cnv, UConverterResetChoice choice){
|
|||
data->contextCharToUnicode=NO_CHAR_MARKER;
|
||||
}
|
||||
if(choice!=UCNV_RESET_TO_UNICODE) {
|
||||
cnv->fromUSurrogateLead=0x0000;
|
||||
cnv->fromUChar32=0x0000;
|
||||
data->contextCharFromUnicode=0x00;
|
||||
data->currentMaskFromUnicode=data->defDeltaToUnicode;
|
||||
data->currentDeltaFromUnicode=data->defDeltaToUnicode;
|
||||
data->isFirstBuffer=TRUE;
|
||||
}
|
||||
data->isFirstBuffer=TRUE;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -811,7 +810,6 @@ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
|
|||
int32_t* offsets = args->offsets;
|
||||
uint32_t targetByteUnit = 0x0000;
|
||||
UChar32 sourceChar = 0x0000;
|
||||
UConverterCallbackReason reason;
|
||||
UBool useFallback;
|
||||
UConverterDataISCII *converterData;
|
||||
uint16_t newDelta=0;
|
||||
|
@ -828,7 +826,7 @@ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
|
|||
newDelta=converterData->currentDeltaFromUnicode;
|
||||
range = (uint16_t)(newDelta/DELTA);
|
||||
|
||||
if(args->converter->fromUSurrogateLead!=0 && target <targetLimit) {
|
||||
if((sourceChar = args->converter->fromUChar32)!=0) {
|
||||
goto getTrail;
|
||||
}
|
||||
|
||||
|
@ -946,16 +944,10 @@ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
|
|||
}
|
||||
}
|
||||
else{
|
||||
/* oops.. the code point is unassingned
|
||||
* set the error and reason
|
||||
*/
|
||||
reason =UCNV_UNASSIGNED;
|
||||
*err =U_INVALID_CHAR_FOUND;
|
||||
|
||||
/* oops.. the code point is unassigned */
|
||||
/*check if the char is a First surrogate*/
|
||||
if(UTF_IS_SURROGATE(sourceChar)) {
|
||||
if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
|
||||
args->converter->fromUSurrogateLead=(UChar)sourceChar;
|
||||
getTrail:
|
||||
/*look ahead to find the trail surrogate*/
|
||||
if(source < sourceLimit) {
|
||||
|
@ -963,94 +955,32 @@ getTrail:
|
|||
UChar trail= (*source);
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
source++;
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
|
||||
args->converter->fromUSurrogateLead=0x00;
|
||||
reason =UCNV_UNASSIGNED;
|
||||
sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
|
||||
*err =U_INVALID_CHAR_FOUND;
|
||||
/* convert this surrogate code point */
|
||||
/* exit this condition tree */
|
||||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
sourceChar = args->converter->fromUSurrogateLead;
|
||||
reason=UCNV_ILLEGAL;
|
||||
*err=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
*err = U_ZERO_ERROR;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*err=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
} else {
|
||||
/* callback(unassigned) for a BMP code point */
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
{
|
||||
/*variables for callback */
|
||||
const UChar* saveSource =NULL;
|
||||
char* saveTarget =NULL;
|
||||
int32_t* saveOffsets =NULL;
|
||||
int currentOffset =0;
|
||||
int32_t saveIndex =0;
|
||||
|
||||
args->converter->invalidUCharLength = 0;
|
||||
|
||||
if(sourceChar>0xffff){
|
||||
/* we have got a surrogate pair... dissable and populate the invalidUCharBuffer */
|
||||
args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++]
|
||||
=(uint16_t)(((sourceChar)>>10)+0xd7c0);
|
||||
args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++]
|
||||
=(uint16_t)(((sourceChar)&0x3ff)|0xdc00);
|
||||
}
|
||||
else{
|
||||
args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++]
|
||||
=(UChar)sourceChar;
|
||||
}
|
||||
|
||||
if(offsets){
|
||||
currentOffset = *(offsets-1)+1;
|
||||
}
|
||||
saveSource = args->source;
|
||||
saveTarget = args->target;
|
||||
saveOffsets = args->offsets;
|
||||
args->target = (char*)target;
|
||||
args->source = source;
|
||||
args->offsets = offsets;
|
||||
|
||||
/*copies current values for the ErrorFunctor to update */
|
||||
/*Calls the ErrorFunctor */
|
||||
args->converter->fromUCharErrorBehaviour ( args->converter->fromUContext,
|
||||
args,
|
||||
args->converter->invalidUCharBuffer,
|
||||
args->converter->invalidUCharLength,
|
||||
(UChar32) (sourceChar),
|
||||
reason,
|
||||
err);
|
||||
|
||||
saveIndex = (int32_t)(args->target - (char*)target);
|
||||
if(args->offsets){
|
||||
args->offsets = saveOffsets;
|
||||
while(saveIndex-->0){
|
||||
*offsets = currentOffset;
|
||||
offsets++;
|
||||
}
|
||||
}
|
||||
target = (unsigned char*)args->target;
|
||||
args->source=saveSource;
|
||||
args->target=saveTarget;
|
||||
args->offsets=saveOffsets;
|
||||
args->converter->fromUSurrogateLead=0x00;
|
||||
|
||||
if (U_FAILURE (*err)){
|
||||
break;
|
||||
}
|
||||
}
|
||||
args->converter->fromUChar32=sourceChar;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}/* end while(mySourceIndex<mySourceLength) */
|
||||
|
||||
/*save the state and return */
|
||||
|
|
|
@ -171,7 +171,7 @@ _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
}
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
cp=cnv->fromUSurrogateLead;
|
||||
cp=cnv->fromUChar32;
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
sourceIndex= cp==0 ? 0 : -1;
|
||||
|
@ -299,7 +299,7 @@ getTrail:
|
|||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
cnv->fromUSurrogateLead=(UChar)cp;
|
||||
cnv->fromUChar32=cp;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
@ -308,14 +308,7 @@ getTrail:
|
|||
}
|
||||
|
||||
*pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
|
||||
|
||||
/* write the code point as code units */
|
||||
{
|
||||
int32_t i=0;
|
||||
U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, i, cp);
|
||||
cnv->invalidUCharLength=(int8_t)i;
|
||||
}
|
||||
|
||||
cnv->fromUChar32=cp;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -315,11 +315,14 @@ static void
|
|||
_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
fromUCallback(UConverter *cnv,
|
||||
const void *context, UConverterFromUnicodeArgs *pArgs,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason, UErrorCode *pErrorCode);
|
||||
static UChar32
|
||||
_extFromU(UConverter *cnv, const UConverterSharedData *sharedData,
|
||||
UChar32 cp,
|
||||
const UChar **source, const UChar *sourceLimit,
|
||||
char **target, const char *targetLimit,
|
||||
int32_t **offsets, int32_t sourceIndex,
|
||||
UBool useFallback, UBool flush,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
toUCallback(UConverter *cnv,
|
||||
|
@ -819,7 +822,7 @@ _MBCSOpen(UConverter *cnv,
|
|||
cnv->toULength=0; /* byteIndex */
|
||||
|
||||
/* fromUnicode */
|
||||
cnv->fromUSurrogateLead=0;
|
||||
cnv->fromUChar32=0;
|
||||
cnv->fromUnicodeStatus=1; /* prevLength */
|
||||
#endif
|
||||
}
|
||||
|
@ -2141,7 +2144,6 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
|
||||
int32_t prevSourceIndex, sourceIndex, nextSourceIndex;
|
||||
|
||||
UConverterCallbackReason reason;
|
||||
uint32_t stage2Entry;
|
||||
uint32_t value;
|
||||
int32_t length, prevLength;
|
||||
|
@ -2178,7 +2180,7 @@ _MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
}
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
c=cnv->fromUChar32;
|
||||
prevLength=cnv->fromUnicodeStatus;
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
|
@ -2246,9 +2248,8 @@ getTrail:
|
|||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto callback;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
|
@ -2257,9 +2258,8 @@ getTrail:
|
|||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto callback;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2422,8 +2422,32 @@ getTrail:
|
|||
* There is no way with this data structure for fallback output
|
||||
* for other than U+0000 to be a zero byte.
|
||||
*/
|
||||
/* callback(unassigned) */
|
||||
goto unassigned;
|
||||
|
||||
unassigned:
|
||||
/* try an extension mapping */
|
||||
pArgs->source=source;
|
||||
c=_extFromU(cnv, cnv->sharedData,
|
||||
c, &source, sourceLimit,
|
||||
(char **)&target, (char *)target+targetCapacity,
|
||||
&offsets, sourceIndex,
|
||||
(UBool)UCNV_FROM_U_USE_FALLBACK(cnv, c), pArgs->flush,
|
||||
pErrorCode);
|
||||
nextSourceIndex+=(int32_t)(source-pArgs->source);
|
||||
prevLength=cnv->fromUnicodeStatus; /* restore SISO state */
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
/* not mappable or buffer overflow */
|
||||
break;
|
||||
} else {
|
||||
/* a mapping was written to the target, continue */
|
||||
|
||||
/* normal end of conversion: prepare for a new character */
|
||||
if(offsets!=NULL) {
|
||||
prevSourceIndex=sourceIndex;
|
||||
sourceIndex=nextSourceIndex;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* write the output character bytes from value and length */
|
||||
|
@ -2529,69 +2553,6 @@ getTrail:
|
|||
sourceIndex=nextSourceIndex;
|
||||
}
|
||||
continue;
|
||||
|
||||
/*
|
||||
* This is the same ugly trick as in ToUnicode(), for the
|
||||
* same reasons...
|
||||
*/
|
||||
unassigned:
|
||||
reason=UCNV_UNASSIGNED;
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
callback:
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
/* update the arguments structure */
|
||||
pArgs->source=source;
|
||||
pArgs->target=(char *)target;
|
||||
pArgs->offsets=offsets;
|
||||
|
||||
/* set the converter state in UConverter to deal with the next character */
|
||||
cnv->fromUSurrogateLead=0;
|
||||
/*
|
||||
* Do not save the prevLength SISO state because prevLength is set for
|
||||
* the character that is now not output because it is unassigned or it is
|
||||
* a fallback that is not taken.
|
||||
* The above branch for MBCS_OUTPUT_2_SISO has saved the previous state already.
|
||||
* See comments there.
|
||||
*/
|
||||
prevSourceIndex=sourceIndex;
|
||||
|
||||
/* call the callback function */
|
||||
fromUCallback(cnv, cnv->fromUContext, pArgs, c, reason, pErrorCode);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
prevLength=cnv->fromUnicodeStatus;
|
||||
|
||||
/* update target and deal with offsets if necessary */
|
||||
offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
|
||||
target=(uint8_t *)pArgs->target;
|
||||
|
||||
/* update the source pointer and index */
|
||||
sourceIndex=nextSourceIndex+(pArgs->source-source);
|
||||
source=pArgs->source;
|
||||
targetCapacity=(uint8_t *)pArgs->targetLimit-target;
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
} else if(U_FAILURE(*pErrorCode)) {
|
||||
/* break on error */
|
||||
c=0;
|
||||
break;
|
||||
} else if(cnv->charErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We do not need to repeat the statements from the normal
|
||||
* end of the conversion because we already updated all the
|
||||
* necessary variables.
|
||||
*/
|
||||
} else {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
|
@ -2630,7 +2591,7 @@ callback:
|
|||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
cnv->fromUChar32=c;
|
||||
cnv->fromUnicodeStatus=prevLength;
|
||||
|
||||
/* write back the updated pointers */
|
||||
|
@ -2656,7 +2617,6 @@ _MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
|
||||
int32_t sourceIndex, nextSourceIndex;
|
||||
|
||||
UConverterCallbackReason reason;
|
||||
uint32_t stage2Entry;
|
||||
uint32_t value;
|
||||
int32_t length, prevLength;
|
||||
|
@ -2681,7 +2641,7 @@ _MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
}
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
c=cnv->fromUChar32;
|
||||
prevLength=cnv->fromUnicodeStatus;
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
|
@ -2735,9 +2695,8 @@ getTrail:
|
|||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto callback;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
|
@ -2746,9 +2705,8 @@ getTrail:
|
|||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto callback;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2774,8 +2732,28 @@ getTrail:
|
|||
* There is no way with this data structure for fallback output
|
||||
* for other than U+0000 to be a zero byte.
|
||||
*/
|
||||
/* callback(unassigned) */
|
||||
goto unassigned;
|
||||
|
||||
unassigned:
|
||||
/* try an extension mapping */
|
||||
pArgs->source=source;
|
||||
c=_extFromU(cnv, cnv->sharedData,
|
||||
c, &source, sourceLimit,
|
||||
(char **)&target, (char *)target+targetCapacity,
|
||||
&offsets, sourceIndex,
|
||||
(UBool)UCNV_FROM_U_USE_FALLBACK(cnv, c), pArgs->flush,
|
||||
pErrorCode);
|
||||
nextSourceIndex+=(int32_t)(source-pArgs->source);
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
/* not mappable or buffer overflow */
|
||||
break;
|
||||
} else {
|
||||
/* a mapping was written to the target, continue */
|
||||
|
||||
/* normal end of conversion: prepare for a new character */
|
||||
sourceIndex=nextSourceIndex;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* write the output character bytes from value and length */
|
||||
|
@ -2815,62 +2793,6 @@ getTrail:
|
|||
c=0;
|
||||
sourceIndex=nextSourceIndex;
|
||||
continue;
|
||||
|
||||
/*
|
||||
* This is the same ugly trick as in ToUnicode(), for the
|
||||
* same reasons...
|
||||
*/
|
||||
unassigned:
|
||||
reason=UCNV_UNASSIGNED;
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
callback:
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
/* update the arguments structure */
|
||||
pArgs->source=source;
|
||||
pArgs->target=(char *)target;
|
||||
pArgs->offsets=offsets;
|
||||
|
||||
/* set the converter state in UConverter to deal with the next character */
|
||||
cnv->fromUSurrogateLead=0;
|
||||
cnv->fromUnicodeStatus=prevLength;
|
||||
|
||||
/* call the callback function */
|
||||
fromUCallback(cnv, cnv->fromUContext, pArgs, c, reason, pErrorCode);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
prevLength=cnv->fromUnicodeStatus;
|
||||
|
||||
/* update target and deal with offsets if necessary */
|
||||
offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
|
||||
target=(uint8_t *)pArgs->target;
|
||||
|
||||
/* update the source pointer and index */
|
||||
sourceIndex=nextSourceIndex+(pArgs->source-source);
|
||||
source=pArgs->source;
|
||||
targetCapacity=(uint8_t *)pArgs->targetLimit-target;
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
} else if(U_FAILURE(*pErrorCode)) {
|
||||
/* break on error */
|
||||
c=0;
|
||||
break;
|
||||
} else if(cnv->charErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We do not need to repeat the statements from the normal
|
||||
* end of the conversion because we already updated all the
|
||||
* necessary variables.
|
||||
*/
|
||||
} else {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
|
@ -2879,7 +2801,7 @@ callback:
|
|||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
cnv->fromUChar32=c;
|
||||
cnv->fromUnicodeStatus=prevLength;
|
||||
|
||||
/* write back the updated pointers */
|
||||
|
@ -2905,7 +2827,6 @@ _MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
|
||||
int32_t sourceIndex, nextSourceIndex;
|
||||
|
||||
UConverterCallbackReason reason;
|
||||
uint16_t value, minValue;
|
||||
UBool hasSupplementary;
|
||||
|
||||
|
@ -2934,7 +2855,7 @@ _MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
hasSupplementary=(UBool)(cnv->sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
c=cnv->fromUChar32;
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
sourceIndex= c==0 ? 0 : -1;
|
||||
|
@ -2982,9 +2903,8 @@ getTrail:
|
|||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto callback;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
|
@ -2993,9 +2913,8 @@ getTrail:
|
|||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto callback;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3016,65 +2935,28 @@ getTrail:
|
|||
/* normal end of conversion: prepare for a new character */
|
||||
c=0;
|
||||
sourceIndex=nextSourceIndex;
|
||||
continue;
|
||||
} else { /* unassigned */
|
||||
/*
|
||||
* We allow a 0 byte output if the Unicode code point is
|
||||
* U+0000 and also if the "assigned" bit is set for this entry.
|
||||
* There is no way with this data structure for fallback output
|
||||
* for other than U+0000 to be a zero byte.
|
||||
*/
|
||||
/* callback(unassigned) */
|
||||
}
|
||||
unassigned:
|
||||
reason=UCNV_UNASSIGNED;
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
callback:
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
/* update the arguments structure */
|
||||
pArgs->source=source;
|
||||
pArgs->target=(char *)target;
|
||||
pArgs->offsets=offsets;
|
||||
/* try an extension mapping */
|
||||
pArgs->source=source;
|
||||
c=_extFromU(cnv, cnv->sharedData,
|
||||
c, &source, sourceLimit,
|
||||
(char **)&target, (char *)target+targetCapacity,
|
||||
&offsets, sourceIndex,
|
||||
(UBool)UCNV_FROM_U_USE_FALLBACK(cnv, c), pArgs->flush,
|
||||
pErrorCode);
|
||||
nextSourceIndex+=(int32_t)(source-pArgs->source);
|
||||
|
||||
/* set the converter state in UConverter to deal with the next character */
|
||||
cnv->fromUSurrogateLead=0;
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
/* not mappable or buffer overflow */
|
||||
break;
|
||||
} else {
|
||||
/* a mapping was written to the target, continue */
|
||||
|
||||
/* call the callback function */
|
||||
fromUCallback(cnv, cnv->fromUContext, pArgs, c, reason, pErrorCode);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
|
||||
/* update target and deal with offsets if necessary */
|
||||
offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
|
||||
target=(uint8_t *)pArgs->target;
|
||||
|
||||
/* update the source pointer and index */
|
||||
sourceIndex=nextSourceIndex+(pArgs->source-source);
|
||||
source=pArgs->source;
|
||||
targetCapacity=(uint8_t *)pArgs->targetLimit-target;
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
} else if(U_FAILURE(*pErrorCode)) {
|
||||
/* break on error */
|
||||
c=0;
|
||||
break;
|
||||
} else if(cnv->charErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
/* normal end of conversion: prepare for a new character */
|
||||
sourceIndex=nextSourceIndex;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We do not need to repeat the statements from the normal
|
||||
* end of the conversion because we already updated all the
|
||||
* necessary variables.
|
||||
*/
|
||||
} else {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
|
@ -3083,7 +2965,7 @@ callback:
|
|||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
cnv->fromUChar32=c;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
|
@ -3113,7 +2995,6 @@ _MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
|
||||
int32_t sourceIndex;
|
||||
|
||||
UConverterCallbackReason reason;
|
||||
uint16_t value, minValue;
|
||||
|
||||
/* set up the local pointers */
|
||||
|
@ -3140,7 +3021,7 @@ _MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
}
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
c=cnv->fromUChar32;
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
sourceIndex= c==0 ? 0 : -1;
|
||||
|
@ -3237,15 +3118,6 @@ unrolled:
|
|||
continue;
|
||||
} else if(!UTF_IS_SURROGATE(c)) {
|
||||
/* normal, unassigned BMP character */
|
||||
/*
|
||||
* We allow a 0 byte output if the Unicode code point is
|
||||
* U+0000 and also if the "assigned" bit is set for this entry.
|
||||
* There is no way with this data structure for fallback output
|
||||
* for other than U+0000 to be a zero byte.
|
||||
*/
|
||||
/* callback(unassigned) */
|
||||
reason=UCNV_UNASSIGNED;
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
} else if(UTF_IS_SURROGATE_FIRST(c)) {
|
||||
getTrail:
|
||||
if(source<sourceLimit) {
|
||||
|
@ -3256,13 +3128,11 @@ getTrail:
|
|||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
/* this codepage does not map supplementary code points */
|
||||
/* callback(unassigned) */
|
||||
reason=UCNV_UNASSIGNED;
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
|
@ -3271,69 +3141,45 @@ getTrail:
|
|||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
break;
|
||||
}
|
||||
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
/* get the number of code units for c to correctly advance sourceIndex after the callback call */
|
||||
length=UTF_CHAR_LENGTH(c);
|
||||
/* c does not have a mapping */
|
||||
|
||||
/* set offsets since the start or the last callback */
|
||||
/* get the number of code units for c to correctly advance sourceIndex */
|
||||
length=U16_LENGTH(c);
|
||||
|
||||
/* set offsets since the start or the last extension */
|
||||
if(offsets!=NULL) {
|
||||
int32_t count=(int32_t)(source-lastSource);
|
||||
|
||||
/* do not set the offset for the callback-causing character */
|
||||
/* do not set the offset for this character */
|
||||
count-=length;
|
||||
|
||||
while(count>0) {
|
||||
*offsets++=sourceIndex++;
|
||||
--count;
|
||||
}
|
||||
/* offset and sourceIndex are now set for the current character */
|
||||
/* offsets and sourceIndex are now set for the current character */
|
||||
}
|
||||
|
||||
/* update the arguments structure */
|
||||
pArgs->source=source;
|
||||
pArgs->target=(char *)target;
|
||||
pArgs->offsets=offsets;
|
||||
/* try an extension mapping */
|
||||
lastSource=source;
|
||||
c=_extFromU(cnv, cnv->sharedData,
|
||||
c, &source, sourceLimit,
|
||||
(char **)&target, (char *)target+targetCapacity,
|
||||
&offsets, sourceIndex,
|
||||
(UBool)UCNV_FROM_U_USE_FALLBACK(cnv, c), pArgs->flush,
|
||||
pErrorCode);
|
||||
sourceIndex+=length+(int32_t)(source-lastSource);
|
||||
lastSource=source;
|
||||
|
||||
/* set the converter state in UConverter to deal with the next character */
|
||||
cnv->fromUSurrogateLead=0;
|
||||
|
||||
/* call the callback function */
|
||||
fromUCallback(cnv, cnv->fromUContext, pArgs, c, reason, pErrorCode);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
|
||||
/* update target and deal with offsets if necessary */
|
||||
offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
|
||||
target=(uint8_t *)pArgs->target;
|
||||
|
||||
/* update the source pointer and index */
|
||||
sourceIndex+=length+(pArgs->source-source);
|
||||
source=lastSource=pArgs->source;
|
||||
targetCapacity=(uint8_t *)pArgs->targetLimit-target;
|
||||
length=sourceLimit-source;
|
||||
if(length<targetCapacity) {
|
||||
targetCapacity=length;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
} else if(U_FAILURE(*pErrorCode)) {
|
||||
/* break on error */
|
||||
c=0;
|
||||
break;
|
||||
} else if(cnv->charErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
/* not mappable or buffer overflow */
|
||||
break;
|
||||
} else {
|
||||
/* a mapping was written to the target, continue */
|
||||
}
|
||||
|
||||
#if MBCS_UNROLL_SINGLE_FROM_BMP
|
||||
|
@ -3357,7 +3203,7 @@ getTrail:
|
|||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
cnv->fromUChar32=c;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
|
@ -3672,35 +3518,53 @@ const UConverterSharedData _MBCSData={
|
|||
0
|
||||
};
|
||||
|
||||
/* GB 18030 special handling ------------------------------------------------ */
|
||||
/* conversion extensions for input not in the main table -------------------- */
|
||||
|
||||
/* definition of LINEAR macros and gb18030Ranges see near the beginning of the file */
|
||||
/*
|
||||
* Hardcoded extension handling for GB 18030.
|
||||
* Definition of LINEAR macros and gb18030Ranges see near the beginning of the file.
|
||||
*
|
||||
* In the future, conversion extensions may handle m:n mappings and delta tables,
|
||||
* see http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/conversion/conversion_extensions.html
|
||||
*
|
||||
* If an input character cannot be mapped, then these functions set an error
|
||||
* code. The framework will then call the callback function.
|
||||
*/
|
||||
|
||||
/* the callback functions handle GB 18030 specially */
|
||||
static void
|
||||
fromUCallback(UConverter *cnv,
|
||||
const void *context, UConverterFromUnicodeArgs *pArgs,
|
||||
UChar32 codePoint,
|
||||
UConverterCallbackReason reason, UErrorCode *pErrorCode) {
|
||||
int32_t i;
|
||||
/*
|
||||
* TODO when implementing real extensions, review whether the useFallback parameter
|
||||
* should get cnv->useFallback or the full resolution considering cp as well
|
||||
*/
|
||||
|
||||
if((cnv->options&_MBCS_OPTION_GB18030)!=0 && reason==UCNV_UNASSIGNED) {
|
||||
/*
|
||||
* @return if(U_FAILURE) return the code point for cnv->fromUChar32
|
||||
* else return 0 after output has been written to the target
|
||||
*/
|
||||
static UChar32
|
||||
_extFromU(UConverter *cnv, const UConverterSharedData *sharedData,
|
||||
UChar32 cp,
|
||||
const UChar **source, const UChar *sourceLimit,
|
||||
char **target, const char *targetLimit,
|
||||
int32_t **offsets, int32_t sourceIndex,
|
||||
UBool useFallback, UBool flush,
|
||||
UErrorCode *pErrorCode) {
|
||||
/* GB 18030 */
|
||||
if(cnv!=NULL && (cnv->options&_MBCS_OPTION_GB18030)!=0) {
|
||||
const uint32_t *range;
|
||||
int32_t i;
|
||||
|
||||
range=gb18030Ranges[0];
|
||||
for(i=0; i<sizeof(gb18030Ranges)/sizeof(gb18030Ranges[0]); range+=4, ++i) {
|
||||
if(range[0]<=(uint32_t)codePoint && (uint32_t)codePoint<=range[1]) {
|
||||
if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) {
|
||||
/* found the Unicode code point, output the four-byte sequence for it */
|
||||
uint32_t linear;
|
||||
char bytes[4];
|
||||
|
||||
/* found the Unicode code point, output the four-byte sequence for it */
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
|
||||
/* get the linear value of the first GB 18030 code in this range */
|
||||
linear=range[2]-LINEAR_18030_BASE;
|
||||
|
||||
/* add the offset from the beginning of the range */
|
||||
linear+=((uint32_t)codePoint-range[0]);
|
||||
linear+=((uint32_t)cp-range[0]);
|
||||
|
||||
/* turn this into a four-byte sequence */
|
||||
bytes[3]=(char)(0x30+linear%10); linear/=10;
|
||||
|
@ -3709,21 +3573,21 @@ fromUCallback(UConverter *cnv,
|
|||
bytes[0]=(char)(0x81+linear);
|
||||
|
||||
/* output this sequence */
|
||||
ucnv_cbFromUWriteBytes(pArgs, bytes, 4, 0, pErrorCode);
|
||||
return;
|
||||
ucnv_fromUWriteBytes(cnv,
|
||||
bytes, 4, target, targetLimit,
|
||||
offsets, sourceIndex, pErrorCode);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* write the code point as code units */
|
||||
i=0;
|
||||
UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, codePoint);
|
||||
cnv->invalidUCharLength=(int8_t)i;
|
||||
|
||||
/* call the normal callback function */
|
||||
cnv->fromUCharErrorBehaviour(context, pArgs, cnv->invalidUCharBuffer, i, codePoint, reason, pErrorCode);
|
||||
/* no mapping */
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
return cp;
|
||||
}
|
||||
|
||||
/* GB 18030 special handling ------------------------------------------------ */
|
||||
|
||||
static void
|
||||
toUCallback(UConverter *cnv,
|
||||
const void *context, UConverterToUnicodeArgs *pArgs,
|
||||
|
|
|
@ -181,7 +181,7 @@ _SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
|
|||
break;
|
||||
}
|
||||
|
||||
cnv->fromUSurrogateLead=0;
|
||||
cnv->fromUChar32=0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -216,8 +216,6 @@ _SCSUClose(UConverter *cnv) {
|
|||
|
||||
/* SCSU-to-Unicode conversion functions ------------------------------------- */
|
||||
|
||||
/* ### TODO check operator precedence | << + < */
|
||||
|
||||
static void
|
||||
_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
||||
UErrorCode *pErrorCode) {
|
||||
|
@ -1059,7 +1057,7 @@ _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
dynamicWindow=scsu->fromUDynamicWindow;
|
||||
currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
|
||||
|
||||
c=cnv->fromUSurrogateLead;
|
||||
c=cnv->fromUChar32;
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
sourceIndex= c==0 ? 0 : -1;
|
||||
|
@ -1386,18 +1384,11 @@ getTrailUnicode:
|
|||
}
|
||||
endloop:
|
||||
|
||||
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
||||
/* c is an unpaired surrogate */
|
||||
cnv->invalidUCharBuffer[0]=(UChar)c;
|
||||
cnv->invalidUCharLength=1;
|
||||
c=0;
|
||||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
scsu->fromUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->fromUDynamicWindow=dynamicWindow;
|
||||
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
cnv->fromUChar32=c;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
|
@ -1553,7 +1544,7 @@ _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
|
|||
dynamicWindow=scsu->fromUDynamicWindow;
|
||||
currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
|
||||
|
||||
c=cnv->fromUSurrogateLead;
|
||||
c=cnv->fromUChar32;
|
||||
|
||||
/* similar conversion "loop" as in toUnicode */
|
||||
loop:
|
||||
|
@ -1851,18 +1842,11 @@ getTrailUnicode:
|
|||
}
|
||||
endloop:
|
||||
|
||||
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
||||
/* c is an unpaired surrogate */
|
||||
cnv->invalidUCharBuffer[0]=(UChar)c;
|
||||
cnv->invalidUCharLength=1;
|
||||
c=0;
|
||||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
scsu->fromUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->fromUDynamicWindow=dynamicWindow;
|
||||
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
cnv->fromUChar32=c;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
|
|
Loading…
Add table
Reference in a new issue