ICU-2449 new semantics of truncated sequences; move callback/truncated handling into ucnv.c API functions

X-SVN-Rev: 12649
This commit is contained in:
Markus Scherer 2003-07-22 04:22:57 +00:00
parent 305d4724a3
commit 39a2aed516
17 changed files with 1277 additions and 1448 deletions

File diff suppressed because it is too large Load diff

View file

@ -356,7 +356,7 @@ setInitialStateToUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConver
static void
setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData);
/*************** Converter implemenations ******************/
/*************** Converter implementations ******************/
static const UConverterImpl _ISO2022Impl={
UCNV_ISO_2022,
@ -1020,6 +1020,12 @@ T_UConverter_toUnicode_ISO_2022(UConverterToUnicodeArgs *args,
saveThis = args->converter;
args->offsets = NULL;
args->converter = myData->currentConverter;
/*
* ### TODO this does not maintain overflow and error buffers between
* the sub-converter and this one;
* idea: just copy those parts of the sub-UConverter into the 2022 UConverter
* after ucnv_toUnicode()
*/
ucnv_toUnicode(args->converter,
&args->target,
args->targetLimit,
@ -1079,10 +1085,6 @@ T_UConverter_toUnicode_ISO_2022(UConverterToUnicodeArgs *args,
}
myData->isFirstBuffer=FALSE;
if( (args->source == args->sourceLimit) && args->flush){
_ISO2022Reset(args->converter,UCNV_RESET_FROM_UNICODE);
}
}
static void
@ -1187,9 +1189,6 @@ T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
myOffset += args->source - sourceStart;
}
if( (args->source == args->sourceLimit) && args->flush){
_ISO2022Reset(args->converter,UCNV_RESET_TO_UNICODE);
}
}
static UCNV_TableStates_2022
@ -1758,20 +1757,6 @@ getTrail:
}/* end while(mySourceIndex<mySourceLength) */
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if (args->converter->fromUSurrogateLead !=0 && (source == sourceLimit) && args->flush){
*err = U_TRUNCATED_CHAR_FOUND;
}
/* Reset the state of converter if we consumed
* the source and flush is true
*/
if( (source == sourceLimit) && args->flush){
setInitialStateFromUnicodeJPCN(args->converter,converterData);
}
/*save the state and return */
args->source = source;
args->target = (char*)target;
@ -2009,19 +1994,6 @@ CALLBACK:
break;
}
}
if((args->flush==TRUE)
&& (mySource == mySourceLimit)
&& ( *toUnicodeStatus!=0x00)){
*err = U_TRUNCATED_CHAR_FOUND;
*toUnicodeStatus= 0x00;
}
/* Reset the state of converter if we consumed
* the source and flush is true
*/
if( (mySource == mySourceLimit) && args->flush){
setInitialStateToUnicodeJPCN(args->converter,myData);
}
args->target = myTarget;
args->source = mySource;
}
@ -2223,20 +2195,6 @@ getTrail:
}/* end while(mySourceIndex<mySourceLength) */
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if (args->converter->fromUSurrogateLead !=0 && (source == sourceLimit) && args->flush){
*err = U_TRUNCATED_CHAR_FOUND;
}
/* Reset the state of converter if we consumed
* the source and flush is true
*/
if( (source == sourceLimit) && args->flush){
setInitialStateFromUnicodeKR(args->converter,converterData);
}
/*save the state and return */
args->source = source;
args->target = (char*)target;
@ -2404,19 +2362,6 @@ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
break;
}
}
if((args->flush==TRUE)
&& (mySource == mySourceLimit)
&& ( args->converter->toUnicodeStatus !=0x00)){
*err = U_TRUNCATED_CHAR_FOUND;
args->converter->toUnicodeStatus = 0x00;
}
/* Reset the state of converter if we consumed
* the source and flush is true
*/
if( (mySource == mySourceLimit) && args->flush){
setInitialStateToUnicodeKR(args->converter,myData);
}
args->target = myTarget;
args->source = mySource;
}
@ -2833,20 +2778,6 @@ callback:
}/* end while(mySourceIndex<mySourceLength) */
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if (args->converter->fromUSurrogateLead !=0 && (source == sourceLimit) && args->flush){
*err = U_TRUNCATED_CHAR_FOUND;
}
/* Reset the state of converter if we consumed
* the source and flush is true
*/
if( (source == sourceLimit) && args->flush){
setInitialStateFromUnicodeJPCN(args->converter,converterData);
}
/*save the state and return */
args->source = source;
args->target = (char*)target;
@ -3234,19 +3165,6 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
break;
}
}
if((args->flush==TRUE)
&& (mySource == mySourceLimit)
&& ( args->converter->toUnicodeStatus !=0x00)){
*err = U_TRUNCATED_CHAR_FOUND;
args->converter->toUnicodeStatus = 0x00;
}
/* Reset the state of converter if we consumed
* the source and flush is true
*/
if( (mySource == mySourceLimit) && args->flush){
setInitialStateToUnicodeJPCN(args->converter,myData);
}
args->target = myTarget;
args->source = mySource;
}

View file

@ -768,7 +768,6 @@ ucnv_createConverterFromSharedData(UConverter *myUConverter,
myUConverter->isExtraLocal = FALSE;
myUConverter->sharedData = mySharedConverterData;
myUConverter->options = options;
myUConverter->mode = UCNV_SI;
myUConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE;
myUConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE;
myUConverter->toUnicodeStatus = myUConverter->sharedData->toUnicodeStatus;

View file

@ -25,8 +25,12 @@
/* size of the overflow buffers in UConverter, enough for escaping callbacks */
#define UCNV_ERROR_BUFFER_LENGTH 32
/* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */
#define UCNV_MAX_SUBCHAR_LEN 4
/* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */
#define UCNV_MAX_CHAR_LEN 8
/* converter options bits */
#define UCNV_OPTION_VERSION 0xf
#define UCNV_OPTION_SWAP_LFNL 0x10
@ -140,7 +144,7 @@ struct UConverter {
UBool useFallback;
int8_t toULength; /* number of bytes in toUBytes */
uint8_t toUBytes[7]; /* more "toU status"; keeps the bytes of the current character */
uint8_t toUBytes[UCNV_MAX_CHAR_LEN-1];/* more "toU status"; keeps the bytes of the current character */
uint32_t toUnicodeStatus; /* Used to internalize stream status information */
int32_t mode;
uint32_t fromUnicodeStatus;
@ -155,12 +159,11 @@ struct UConverter {
uint8_t subChar1; /* single-byte substitution character if different from subChar */
uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* codepage specific character sequence */
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN]; /* bytes from last error/callback situation */
char invalidCharBuffer[UCNV_MAX_CHAR_LEN]; /* bytes from last error/callback situation */
uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */
UChar invalidUCharBuffer[3]; /* UChars from last error/callback situation */
UChar invalidUCharBuffer[U16_MAX_LENGTH]; /* UChars from last error/callback situation */
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */
};
U_CDECL_END /* end of UConverter */

View file

@ -22,99 +22,6 @@
#include "ucnv_cnv.h"
#include "cmemory.h"
/*Empties the internal unicode output buffer */
void ucnv_flushInternalUnicodeBuffer (UConverter * _this,
UChar * myTarget,
int32_t * myTargetIndex,
int32_t targetLength,
int32_t** offsets,
UErrorCode * err)
{
int32_t myUCharErrorBufferLength = _this->UCharErrorBufferLength;
if (myUCharErrorBufferLength <= targetLength)
{
/*we have enough space
*So we just copy the whole Error Buffer in to the output stream
*/
uprv_memcpy (myTarget,
_this->UCharErrorBuffer,
sizeof (UChar) * myUCharErrorBufferLength);
if (offsets)
{
int32_t i=0;
for (i=0; i<myUCharErrorBufferLength;i++) (*offsets)[i] = -1;
*offsets += myUCharErrorBufferLength;
}
*myTargetIndex += myUCharErrorBufferLength;
_this->UCharErrorBufferLength = 0;
}
else
{
/* We don't have enough space so we copy as much as we can
* on the output stream and update the object
* by updating the internal buffer*/
uprv_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * targetLength);
if (offsets)
{
int32_t i=0;
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
*offsets += targetLength;
}
uprv_memmove (_this->UCharErrorBuffer,
_this->UCharErrorBuffer + targetLength,
sizeof (UChar) * (myUCharErrorBufferLength - targetLength));
_this->UCharErrorBufferLength -= (int8_t) targetLength;
*myTargetIndex = targetLength;
*err = U_BUFFER_OVERFLOW_ERROR;
}
}
/*Empties the internal codepage output buffer */
void ucnv_flushInternalCharBuffer (UConverter * _this,
char *myTarget,
int32_t * myTargetIndex,
int32_t targetLength,
int32_t** offsets,
UErrorCode * err)
{
int32_t myCharErrorBufferLength = _this->charErrorBufferLength;
/*we have enough space */
if (myCharErrorBufferLength <= targetLength)
{
uprv_memcpy (myTarget, _this->charErrorBuffer, myCharErrorBufferLength);
if (offsets)
{
int32_t i=0;
for (i=0; i<myCharErrorBufferLength;i++) (*offsets)[i] = -1;
*offsets += myCharErrorBufferLength;
}
*myTargetIndex += myCharErrorBufferLength;
_this->charErrorBufferLength = 0;
}
else
{
/* We don't have enough space so we copy as much as we can
* on the output stream and update the object
*/
uprv_memcpy (myTarget, _this->charErrorBuffer, targetLength);
if (offsets)
{
int32_t i=0;
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
*offsets += targetLength;
}
uprv_memmove (_this->charErrorBuffer,
_this->charErrorBuffer + targetLength,
(myCharErrorBufferLength - targetLength));
_this->charErrorBufferLength -= (int8_t) targetLength;
*myTargetIndex = targetLength;
*err = U_BUFFER_OVERFLOW_ERROR;
}
}
/**
* This function is useful for implementations of getNextUChar().
* After a call to a callback function or to toUnicode(), an output buffer
@ -193,54 +100,6 @@ ucnv_updateCallbackOffsets(int32_t *offsets, int32_t length, int32_t sourceIndex
}
}
/*
* This is a simple implementation of ucnv_getNextUChar() that uses the
* converter's toUnicode() function. See ucnv_cnv.h for details.
*/
U_CFUNC UChar32
ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
T_ToUnicodeFunction toU,
UBool collectPairs,
UErrorCode *pErrorCode) {
UChar buffer[UTF_MAX_CHAR_LENGTH];
const char *realLimit=pArgs->sourceLimit;
pArgs->target=buffer;
pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
while(pArgs->source<realLimit) {
/* feed in one byte at a time to make sure to get only one character out */
pArgs->sourceLimit=pArgs->source+1;
pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
/* convert this byte and check the result */
toU(pArgs, pErrorCode);
if(U_SUCCESS(*pErrorCode)) {
int32_t length=(int32_t)(pArgs->target-buffer);
/* this test is UTF-16 specific */
if(/* some output and
(source consumed or don't collect surrogate pairs or not a surrogate or a surrogate pair) */
length>0 &&
(pArgs->flush || !collectPairs || !UTF_IS_FIRST_SURROGATE(buffer[0]) || length==2)
) {
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, length);
}
/* else continue with the loop */
} else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
*pErrorCode=U_ZERO_ERROR;
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, UTF_MAX_CHAR_LENGTH);
} else {
/* U_FAILURE() */
return 0xffff;
}
}
/* no output because of empty input or only state changes and skipping callbacks */
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0xffff;
}
U_CFUNC void
ucnv_getCompleteUnicodeSet(const UConverter *cnv,
USet *set,

View file

@ -59,11 +59,11 @@ typedef enum UConverterResetChoice {
typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice);
typedef void (*T_ToUnicodeFunction) (UConverterToUnicodeArgs *, UErrorCode *);
typedef void (*UConverterToUnicode) (UConverterToUnicodeArgs *, UErrorCode *);
typedef void (*T_FromUnicodeFunction) (UConverterFromUnicodeArgs *, UErrorCode *);
typedef void (*UConverterFromUnicode) (UConverterFromUnicodeArgs *, UErrorCode *);
typedef UChar32 (*T_GetNextUCharFunction) (UConverterToUnicodeArgs *, UErrorCode *);
typedef UChar32 (*UConverterGetNextUChar) (UConverterToUnicodeArgs *, UErrorCode *);
typedef void (*UConverterGetStarters)(const UConverter* converter,
UBool starters[256],
@ -116,20 +116,6 @@ typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv,
UBool CONVERSION_U_SUCCESS (UErrorCode err);
void ucnv_flushInternalUnicodeBuffer (UConverter * _this,
UChar * myTarget,
int32_t * myTargetIndex,
int32_t targetLength,
int32_t** offsets,
UErrorCode * err);
void ucnv_flushInternalCharBuffer (UConverter * _this,
char *myTarget,
int32_t * myTargetIndex,
int32_t targetLength,
int32_t** offsets,
UErrorCode * err);
/**
* UConverterImpl contains all the data and functions for a converter type.
* Its function pointers work much like a C++ vtable.
@ -156,11 +142,11 @@ struct UConverterImpl {
UConverterClose close;
UConverterReset reset;
T_ToUnicodeFunction toUnicode;
T_ToUnicodeFunction toUnicodeWithOffsets;
T_FromUnicodeFunction fromUnicode;
T_FromUnicodeFunction fromUnicodeWithOffsets;
T_GetNextUCharFunction getNextUChar;
UConverterToUnicode toUnicode;
UConverterToUnicode toUnicodeWithOffsets;
UConverterFromUnicode fromUnicode;
UConverterFromUnicode fromUnicodeWithOffsets;
UConverterGetNextUChar getNextUChar;
UConverterGetStarters getStarters;
UConverterGetName getName;
@ -224,30 +210,12 @@ ucnv_updateCallbackOffsets(int32_t *offsets, int32_t length, int32_t sourceIndex
#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c)
/**
* This is a simple implementation of ucnv_getNextUChar() that uses the
* converter's toUnicode() function.
*
* \par
* A surrogate pair from a single byte sequence is always
* combined to a supplementary code point.
* A surrogate pair from consecutive byte sequences is only combined
* if collectPairs is set. This is necessary for SCSU
* but not allowed for most legacy codepages.
*
* @param pArgs The argument structure supplied by ucnv_getNextUChar()
* @param toU A function pointer to the converter's toUnicode() function
* @param collectPairs indicates whether separate surrogate results from
* consecutive byte sequences should be combined into
* a single code point
* @param pErrorCode An ICU error code parameter
* @return The Unicode code point as a result of a conversion of a minimal
* number of input bytes
* Magic number for ucnv_getNextUChar(), returned by a
* getNextUChar() implementation to indicate to use the converter's toUnicode()
* instead of the native function.
* @internal
*/
U_CFUNC UChar32
ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
T_ToUnicodeFunction toU,
UBool collectPairs,
UErrorCode *pErrorCode);
#define UCNV_GET_NEXT_UCHAR_USE_TO_U -9
U_CFUNC void
ucnv_getCompleteUnicodeSet(const UConverter *cnv,

View file

@ -1211,11 +1211,11 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
{
saveSource = args->source; /* beginning of current code point */
if (args->converter->invalidCharLength) /* reassemble char from previous call */
if (args->converter->toULength) /* reassemble char from previous call */
{
char LMBCS [ULMBCS_CHARSIZE_MAX];
const char *pLMBCS = LMBCS, *saveSourceLimit;
size_t size_old = args->converter->invalidCharLength;
size_t size_old = args->converter->toULength;
/* limit from source is either reminder of temp buffer, or user limit on source */
size_t size_new_maybe_1 = sizeof(LMBCS) - size_old;
@ -1223,7 +1223,7 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2;
uprv_memcpy(LMBCS, args->converter->invalidCharBuffer, size_old);
uprv_memcpy(LMBCS, args->converter->toUBytes, size_old);
uprv_memcpy(LMBCS + size_old, args->source, size_new);
saveSourceLimit = args->sourceLimit;
args->source = pLMBCS;
@ -1234,12 +1234,12 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
args->sourceLimit = saveSourceLimit;
args->source += (pLMBCS - LMBCS - size_old);
if (*err == U_TRUNCATED_CHAR_FOUND && !args->flush)
if (*err == U_TRUNCATED_CHAR_FOUND)
{
/* evil special case: source buffers so small a char spans more than 2 buffers */
int8_t savebytes = (int8_t)(size_old+size_new);
args->converter->invalidCharLength = savebytes;
uprv_memcpy(args->converter->invalidCharBuffer, LMBCS, savebytes);
args->converter->toULength = savebytes;
uprv_memcpy(args->converter->toUBytes, LMBCS, savebytes);
args->source = args->sourceLimit;
*err = U_ZERO_ERROR;
return;
@ -1247,7 +1247,7 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
else
{
/* clear the partial-char marker */
args->converter->invalidCharLength = 0;
args->converter->toULength = 0;
}
}
else
@ -1313,11 +1313,10 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
if (*err == U_TRUNCATED_CHAR_FOUND)
{
args->source = args->sourceLimit;
if (!args->flush )
{
int8_t savebytes = (int8_t)(args->sourceLimit - saveSource);
args->converter->invalidCharLength = (int8_t)savebytes;
uprv_memcpy(args->converter->invalidCharBuffer, saveSource, savebytes);
args->converter->toULength = (int8_t)savebytes;
uprv_memcpy(args->converter->toUBytes, saveSource, savebytes);
*err = U_ZERO_ERROR;
}
}

View file

@ -35,7 +35,7 @@ _UTF16PEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
int32_t count;
int32_t sourceIndex = 0;
if(length <= 0 && cnv->toUnicodeStatus == 0) {
if(length <= 0) {
/* no input, nothing to do */
return;
}
@ -46,14 +46,14 @@ _UTF16PEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
}
/* complete a partial UChar from the last call */
if(length != 0 && cnv->toUnicodeStatus != 0) {
if(length != 0 && cnv->toULength != 0) {
/*
* copy the byte from the last call and the first one here into the target,
* byte-wise to keep the platform endianness
*/
uint8_t *p = (uint8_t *)target++;
*p++ = (uint8_t)cnv->toUnicodeStatus;
cnv->toUnicodeStatus = 0;
*p++ = cnv->toUBytes[0];
cnv->toULength = 0;
*p = *source++;
--length;
--targetCapacity;
@ -88,16 +88,9 @@ _UTF16PEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
/* it must be targetCapacity==0 because otherwise the above would have copied more */
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
} else if(length == 1) {
if(pArgs->flush) {
/* a UChar remains incomplete */
*pErrorCode = U_TRUNCATED_CHAR_FOUND;
} else {
/* consume the last byte and store it, making sure that it will never set the status to 0 */
cnv->toUnicodeStatus = *source++ | 0x100;
}
} else /* length==0 */ if(cnv->toUnicodeStatus!=0 && pArgs->flush) {
/* a UChar remains incomplete */
*pErrorCode = U_TRUNCATED_CHAR_FOUND;
/* consume the last byte and store it */
cnv->toUBytes[0]=*source++;
cnv->toULength=1;
}
/* write back the updated pointers */
@ -199,7 +192,7 @@ _UTF16OEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
int32_t count;
int32_t sourceIndex = 0;
if(length <= 0 && cnv->toUnicodeStatus == 0) {
if(length <= 0) {
/* no input, nothing to do */
return;
}
@ -210,14 +203,14 @@ _UTF16OEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
}
/* complete a partial UChar from the last call */
if(length != 0 && cnv->toUnicodeStatus != 0) {
if(length != 0 && cnv->toULength != 0) {
/*
* copy the byte from the last call and the first one here into the target,
* byte-wise, reversing the platform endianness
*/
*target8++ = *source++;
*target8++ = (uint8_t)cnv->toUnicodeStatus;
cnv->toUnicodeStatus = 0;
*target8++ = cnv->toUBytes[0];
cnv->toULength = 0;
++target;
--length;
--targetCapacity;
@ -260,16 +253,8 @@ _UTF16OEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
/* it must be targetCapacity==0 because otherwise the above would have copied more */
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
} else if(length == 1) {
if(pArgs->flush) {
/* a UChar remains incomplete */
*pErrorCode = U_TRUNCATED_CHAR_FOUND;
} else {
/* consume the last byte and store it, making sure that it will never set the status to 0 */
cnv->toUnicodeStatus = *source++ | 0x100;
}
} else /* length==0 */ if(cnv->toUnicodeStatus!=0 && pArgs->flush) {
/* a UChar remains incomplete */
*pErrorCode = U_TRUNCATED_CHAR_FOUND;
cnv->toUBytes[0]=*source++;
cnv->toULength=1;
}
/* write back the updated pointers */
@ -727,12 +712,12 @@ _UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
_UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
pArgs->source=source;
pArgs->sourceLimit=sourceLimit;
state=8;
break;
}
cnv->mode=0; /* reset */
} else {
cnv->mode=state;
}
cnv->mode=state;
}
static UChar32
@ -744,7 +729,7 @@ _UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs,
case 9:
return T_UConverter_getNextUChar_UTF16_LE(pArgs, pErrorCode);
default:
return ucnv_getNextUCharFromToUImpl(pArgs, _UTF16ToUnicodeWithOffsets, TRUE, pErrorCode);
return UCNV_GET_NEXT_UCHAR_USE_TO_U;
}
}

View file

@ -128,20 +128,10 @@ morebytes:
}
else
{
if (args->flush)
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
args->converter->toUnicodeStatus = MAXIMUM_UCS4;
}
}
else
{ /* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
args->converter->toUnicodeStatus = ch + 1;
args->converter->toULength = (int8_t) i;
}
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
args->converter->toUnicodeStatus = ch + 1;
args->converter->toULength = (int8_t) i;
goto donefornow;
}
}
@ -237,20 +227,10 @@ morebytes:
}
else
{
if (args->flush)
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
args->converter->toUnicodeStatus = MAXIMUM_UCS4;
}
}
else
{ /* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
args->converter->toUnicodeStatus = ch + 1;
args->converter->toULength = (int8_t) i;
}
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
args->converter->toUnicodeStatus = ch + 1;
args->converter->toULength = (int8_t) i;
goto donefornow;
}
}
@ -331,10 +311,10 @@ T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
temp[0] = 0;
if (args->converter->fromUnicodeStatus)
if (args->converter->fromUSurrogateLead)
{
ch = args->converter->fromUnicodeStatus;
args->converter->fromUnicodeStatus = 0;
ch = args->converter->fromUSurrogateLead;
args->converter->fromUSurrogateLead = 0;
goto lowsurogate;
}
@ -354,12 +334,22 @@ lowsurogate:
mySource++;
}
}
#if 0
/*
* ### TODO the old code used to convert unpaired surrogates in the middle
* of a stream but not at the end
* figure out which way to go definitely when discussing
* Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
*
* for now (j2449), unpaired surrogates are always converted
*/
else if (!args->flush)
{
/* ran out of source */
args->converter->fromUnicodeStatus = ch;
args->converter->fromUSurrogateLead = (UChar)ch;
break;
}
#endif
}
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
@ -406,10 +396,10 @@ T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
temp[0] = 0;
if (args->converter->fromUnicodeStatus)
if (args->converter->fromUSurrogateLead)
{
ch = args->converter->fromUnicodeStatus;
args->converter->fromUnicodeStatus = 0;
ch = args->converter->fromUSurrogateLead;
args->converter->fromUSurrogateLead = 0;
goto lowsurogate;
}
@ -429,12 +419,14 @@ lowsurogate:
mySource++;
}
}
#if 0
else if (!args->flush)
{
/* ran out of source */
args->converter->fromUnicodeStatus = ch;
args->converter->fromUSurrogateLead = (UChar)ch;
break;
}
#endif
}
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
@ -613,20 +605,10 @@ morebytes:
}
else
{
if (args->flush)
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
args->converter->toUnicodeStatus = 0;
}
}
else
{ /* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
args->converter->toUnicodeStatus = ch + 1;
args->converter->toULength = (int8_t) i;
}
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
args->converter->toUnicodeStatus = ch + 1;
args->converter->toULength = (int8_t) i;
goto donefornow;
}
}
@ -724,20 +706,10 @@ morebytes:
}
else
{
if (args->flush)
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
args->converter->toUnicodeStatus = 0;
}
}
else
{ /* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
args->converter->toUnicodeStatus = ch + 1;
args->converter->toULength = (int8_t) i;
}
/* stores a partially calculated target*/
/* + 1 to make 0 a valid character */
args->converter->toUnicodeStatus = ch + 1;
args->converter->toULength = (int8_t) i;
goto donefornow;
}
}
@ -818,10 +790,10 @@ T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
temp[3] = 0;
if (args->converter->fromUnicodeStatus)
if (args->converter->fromUSurrogateLead)
{
ch = args->converter->fromUnicodeStatus;
args->converter->fromUnicodeStatus = 0;
ch = args->converter->fromUSurrogateLead;
args->converter->fromUSurrogateLead = 0;
goto lowsurogate;
}
@ -841,12 +813,14 @@ lowsurogate:
mySource++;
}
}
#if 0
else if (!args->flush)
{
/* ran out of source */
args->converter->fromUnicodeStatus = ch;
args->converter->fromUSurrogateLead = (UChar)ch;
break;
}
#endif
}
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
@ -893,10 +867,10 @@ T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
temp[3] = 0;
if (args->converter->fromUnicodeStatus)
if (args->converter->fromUSurrogateLead)
{
ch = args->converter->fromUnicodeStatus;
args->converter->fromUnicodeStatus = 0;
ch = args->converter->fromUSurrogateLead;
args->converter->fromUSurrogateLead = 0;
goto lowsurogate;
}
@ -916,12 +890,14 @@ lowsurogate:
mySource++;
}
}
#if 0
else if (!args->flush)
{
/* ran out of source */
args->converter->fromUnicodeStatus = ch;
args->converter->fromUSurrogateLead = (UChar)ch;
break;
}
#endif
}
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
@ -1253,12 +1229,12 @@ _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
pArgs->source=source;
pArgs->sourceLimit=sourceLimit;
state=8;
break;
}
cnv->mode=0; /* reset */
} else {
cnv->mode=state;
}
cnv->mode=state;
}
static UChar32
@ -1270,7 +1246,7 @@ _UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs,
case 9:
return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode);
default:
return ucnv_getNextUCharFromToUImpl(pArgs, _UTF32ToUnicodeWithOffsets, FALSE, pErrorCode);
return UCNV_GET_NEXT_UCHAR_USE_TO_U;
}
}

View file

@ -416,20 +416,20 @@ unicodeMode:
}
endloop:
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(!inDirectMode && bits!=0 && U_SUCCESS(*pErrorCode)) {
/* a character byte sequence remains incomplete */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
cnv->toULength=0;
} else {
/* set the converter state back into UConverter */
cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
cnv->toULength=byteIndex;
if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) {
/*
* if we are in Unicode mode, then the byteIndex might not be 0,
* but that is ok if bits==0
* -> we set byteIndex=0 at the end of the stream to avoid a truncated error
* (not true for IMAP-mailbox-name where we must end in direct mode)
*/
byteIndex=0;
}
/* set the converter state back into UConverter */
cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
cnv->toULength=byteIndex;
finish:
/* write back the updated pointers */
pArgs->source=(const char *)source;
@ -495,12 +495,6 @@ callback:
}
}
static UChar32
_UTF7GetNextUChar(UConverterToUnicodeArgs *pArgs,
UErrorCode *pErrorCode) {
return ucnv_getNextUCharFromToUImpl(pArgs, pArgs->converter->sharedData->impl->toUnicode, TRUE, pErrorCode);
}
static void
_UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
UErrorCode *pErrorCode) {
@ -788,7 +782,7 @@ static const UConverterImpl _UTF7Impl={
_UTF7ToUnicodeWithOffsets,
_UTF7FromUnicodeWithOffsets,
_UTF7FromUnicodeWithOffsets,
_UTF7GetNextUChar,
NULL,
NULL,
_UTF7GetName,
@ -1001,7 +995,8 @@ directMode:
/* switch to Unicode mode */
nextSourceIndex=++sourceIndex;
inDirectMode=FALSE;
byteIndex=0;
bytes[0]=b;
byteIndex=1;
bits=0;
base64Counter=-1;
goto unicodeMode;
@ -1145,19 +1140,9 @@ unicodeMode:
}
endloop:
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(!inDirectMode && U_SUCCESS(*pErrorCode)) {
/* a character byte sequence remains incomplete - IMAP must end in ASCII/direct mode */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
cnv->toULength=0;
} else {
/* set the converter state back into UConverter */
cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
cnv->toULength=byteIndex;
}
/* set the converter state back into UConverter */
cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
cnv->toULength=byteIndex;
finish:
/* write back the updated pointers */
@ -1525,7 +1510,7 @@ static const UConverterImpl _IMAPImpl={
_IMAPToUnicodeWithOffsets,
_IMAPFromUnicodeWithOffsets,
_IMAPFromUnicodeWithOffsets,
_UTF7GetNextUChar,
NULL,
NULL,
NULL,

View file

@ -115,6 +115,7 @@ T_UConverter_toUnicode_InvalidChar_Callback(UConverterToUnicodeArgs * args,
converter->toUBytes,
converter->toULength);
converter->invalidCharLength = converter->toULength;
converter->toULength = 0;
/* Call the ErrorFunction */
args->converter->fromCharErrorBehaviour(converter->toUContext,
@ -200,19 +201,10 @@ morebytes:
}
else
{
if (args->flush)
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
}
}
else
{ /* stores a partially calculated target*/
args->converter->toUnicodeStatus = ch;
args->converter->mode = inBytes;
args->converter->toULength = (int8_t) i;
}
/* stores a partially calculated target*/
args->converter->toUnicodeStatus = ch;
args->converter->mode = inBytes;
args->converter->toULength = (int8_t) i;
goto donefornow;
}
}
@ -236,6 +228,7 @@ morebytes:
(isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
{
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
args->converter->toULength = 0;
if (ch <= MAXIMUM_UCS2)
{
/* fits in 16 bits */
@ -350,20 +343,9 @@ morebytes:
}
else
{
if (args->flush)
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
args->converter->toUnicodeStatus = 0;
}
}
else
{
args->converter->toUnicodeStatus = ch;
args->converter->mode = inBytes;
args->converter->toULength = (int8_t)i;
}
args->converter->toUnicodeStatus = ch;
args->converter->mode = inBytes;
args->converter->toULength = (int8_t)i;
goto donefornow;
}
}
@ -387,6 +369,7 @@ morebytes:
(isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
{
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
args->converter->toULength = 0;
if (ch <= MAXIMUM_UCS2)
{
/* fits in 16 bits */
@ -604,11 +587,6 @@ lowsurrogate:
{
*err = U_BUFFER_OVERFLOW_ERROR;
}
if(args->flush && mySource >= sourceLimit && cnv->fromUSurrogateLead != 0 && U_SUCCESS(*err)) {
/* a Unicode code point remains incomplete (only a first surrogate) */
*err = U_TRUNCATED_CHAR_FOUND;
cnv->fromUSurrogateLead = 0;
}
args->target = (char *) myTarget;
args->source = mySource;
@ -787,11 +765,6 @@ lowsurrogate:
{
*err = U_BUFFER_OVERFLOW_ERROR;
}
if(args->flush && mySource >= sourceLimit && cnv->fromUSurrogateLead != 0 && U_SUCCESS(*err)) {
/* a Unicode code point remains incomplete (only a first surrogate) */
*err = U_TRUNCATED_CHAR_FOUND;
cnv->fromUSurrogateLead = 0;
}
args->target = (char *) myTarget;
args->source = mySource;

View file

@ -666,19 +666,9 @@ getTrail:
}
}
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(c<0 && U_SUCCESS(*pErrorCode)) {
/* a Unicode code point remains incomplete (only a first surrogate) */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->fromUSurrogateLead=0;
cnv->fromUnicodeStatus=BOCU1_ASCII_PREV;
} else {
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
cnv->fromUnicodeStatus=(uint32_t)prev;
}
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
cnv->fromUnicodeStatus=(uint32_t)prev;
/* write back the updated pointers */
pArgs->source=source;
@ -897,19 +887,9 @@ getTrail:
}
}
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(c<0 && U_SUCCESS(*pErrorCode)) {
/* a Unicode code point remains incomplete (only a first surrogate) */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->fromUSurrogateLead=0;
cnv->fromUnicodeStatus=BOCU1_ASCII_PREV;
} else {
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
cnv->fromUnicodeStatus=(uint32_t)prev;
}
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
cnv->fromUnicodeStatus=(uint32_t)prev;
/* write back the updated pointers */
pArgs->source=source;
@ -1228,21 +1208,10 @@ getTrail:
}
endloop:
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(byteIndex>0 && U_SUCCESS(*pErrorCode)) {
/* a character byte sequence remains incomplete */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
cnv->mode=0;
cnv->toULength=0;
} else {
/* set the converter state back into UConverter */
cnv->toUnicodeStatus=(uint32_t)prev;
cnv->mode=(diff<<2)|count;
cnv->toULength=byteIndex;
}
/* set the converter state back into UConverter */
cnv->toUnicodeStatus=(uint32_t)prev;
cnv->mode=(diff<<2)|count;
cnv->toULength=byteIndex;
finish:
/* write back the updated pointers */
@ -1495,21 +1464,10 @@ getTrail:
}
endloop:
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(byteIndex>0 && U_SUCCESS(*pErrorCode)) {
/* a character byte sequence remains incomplete */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
cnv->mode=0;
cnv->toULength=0;
} else {
/* set the converter state back into UConverter */
cnv->toUnicodeStatus=(uint32_t)prev;
cnv->mode=(diff<<2)|count;
cnv->toULength=byteIndex;
}
/* set the converter state back into UConverter */
cnv->toUnicodeStatus=(uint32_t)prev;
cnv->mode=(diff<<2)|count;
cnv->toULength=byteIndex;
finish:
/* write back the updated pointers */

View file

@ -325,18 +325,6 @@ SAVE_STATE:
break;
}
}
if((args->flush==TRUE)
&& (mySource == mySourceLimit)
&& ( args->converter->toUnicodeStatus !=0x00)){
*err = U_TRUNCATED_CHAR_FOUND;
args->converter->toUnicodeStatus = 0x00;
}
/* Reset the state of converter if we consumed
* the source and flush is true
*/
if( (mySource == mySourceLimit) && args->flush){
_HZReset(args->converter, UCNV_RESET_TO_UNICODE);
}
args->target = myTarget;
args->source = mySource;
@ -558,19 +546,6 @@ getTrail:
}
targetUniChar=missingCharMarker;
}
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == mySourceLength) && args->flush){
*err = U_TRUNCATED_CHAR_FOUND;
args->converter->toUnicodeStatus = 0x00;
}
/* Reset the state of converter if we consumed
* the source and flush is true
*/
if( (mySourceIndex == mySourceLength) && args->flush){
_HZReset(args->converter, UCNV_RESET_FROM_UNICODE);
}
args->target += myTargetIndex;
args->source += mySourceIndex;

View file

@ -1053,21 +1053,6 @@ getTrail:
}/* end while(mySourceIndex<mySourceLength) */
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if (args->converter->fromUSurrogateLead !=0 && (source == sourceLimit) && args->flush){
*err = U_TRUNCATED_CHAR_FOUND;
}
/* Reset the state of converter if we consumed
* the source and flush is true
*/
if( (source == sourceLimit) && args->flush){
/*reset converter*/
_ISCIIReset(args->converter,UCNV_RESET_FROM_UNICODE);
}
/*save the state and return */
args->source = source;
args->target = (char*)target;
@ -1396,26 +1381,30 @@ CALLBACK:
break;
}
}
if((args->flush==TRUE)
&& (source == sourceLimit)
&& data->contextCharToUnicode != NO_CHAR_MARKER){
/* if we have ATR in context it is an error */
if(data->contextCharToUnicode==ATR || data->contextCharToUnicode==EXT || *toUnicodeStatus == missingCharMarker){
*err = U_TRUNCATED_CHAR_FOUND;
if(args->flush && source == sourceLimit) {
/* end of the input stream */
UConverter *cnv = args->converter;
if(*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV){
/* set toUBytes[] */
cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
cnv->toULength = 1;
/* avoid looping on truncated sequences */
*contextCharToUnicode = NO_CHAR_MARKER;
}else{
WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),
*toUnicodeStatus,data->currentDeltaToUnicode,err);
*toUnicodeStatus = missingCharMarker;
cnv->toULength = 0;
}
if(*toUnicodeStatus != missingCharMarker) {
/* output a remaining target character */
WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),
*toUnicodeStatus,data->currentDeltaToUnicode,err);
*toUnicodeStatus = missingCharMarker;
}
}
/* Reset the state of converter if we consumed
* the source and flush is true
*/
if( (source == sourceLimit) && args->flush){
/*reset converter*/
_ISCIIReset(args->converter,UCNV_RESET_TO_UNICODE);
}
args->target = target;
args->source = source;
}

View file

@ -146,23 +146,21 @@ static void
_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
UErrorCode *pErrorCode) {
UConverter *cnv;
const UChar *source, *sourceLimit, *lastSource;
uint8_t *target;
const UChar *source, *sourceLimit;
uint8_t *target, *oldTarget;
int32_t targetCapacity, length;
int32_t *offsets;
UChar32 c, max;
UChar32 cp;
UChar c, max;
int32_t sourceIndex;
UConverterCallbackReason reason;
int32_t i;
/* set up the local pointers */
cnv=pArgs->converter;
source=pArgs->source;
sourceLimit=pArgs->sourceLimit;
target=(uint8_t *)pArgs->target;
target=oldTarget=(uint8_t *)pArgs->target;
targetCapacity=pArgs->targetLimit-pArgs->target;
offsets=pArgs->offsets;
@ -173,11 +171,10 @@ _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
}
/* get the converter state from UConverter */
c=cnv->fromUSurrogateLead;
cp=cnv->fromUSurrogateLead;
/* sourceIndex=-1 if the current character began in the previous buffer */
sourceIndex= c==0 ? 0 : -1;
lastSource=source;
sourceIndex= cp==0 ? 0 : -1;
/*
* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
@ -189,13 +186,12 @@ _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
}
/* conversion loop */
if(c!=0 && targetCapacity>0) {
if(cp!=0 && targetCapacity>0) {
goto getTrail;
}
#if LATIN1_UNROLL_FROM_UNICODE
/* unroll the loop with the most common case */
unrolled:
if(targetCapacity>=16) {
int32_t count, loops;
UChar u, oredChars;
@ -247,7 +243,7 @@ unrolled:
targetCapacity-=16*count;
if(offsets!=NULL) {
lastSource+=16*count;
oldTarget+=16*count;
while(count>0) {
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
@ -268,156 +264,73 @@ unrolled:
--count;
}
}
c=0;
}
#endif
while(targetCapacity>0) {
/*
* Get a correct Unicode code point:
* a single UChar for a BMP code point or
* a matched surrogate pair for a "surrogate code point".
*/
c=*source++;
if(c<=max) {
/* convert the Unicode code point */
*target++=(uint8_t)c;
--targetCapacity;
/* conversion loop */
c=0;
while(targetCapacity>0 && (c=*source++)<=max) {
/* convert the Unicode code point */
*target++=(uint8_t)c;
--targetCapacity;
}
/* normal end of conversion: prepare for a new character */
c=0;
} else {
if(!UTF_IS_SURROGATE(c)) {
/* callback(unassigned) */
reason=UCNV_UNASSIGNED;
*pErrorCode=U_INVALID_CHAR_FOUND;
} else if(UTF_IS_SURROGATE_FIRST(c)) {
/*
* not a real loop: just using while() to use a break inside instead of goto
* logically, this is just if(c>max) ...
*/
while(c>max) {
cp=c;
if(!U_IS_SURROGATE(cp)) {
/* callback(unassigned) */
} else if(U_IS_SURROGATE_LEAD(cp)) {
getTrail:
if(source<sourceLimit) {
/* test the following code unit */
UChar trail=*source;
if(UTF_IS_SECOND_SURROGATE(trail)) {
++source;
c=UTF16_GET_PAIR_VALUE(c, trail);
/* this codepage does not map supplementary code points */
/* callback(unassigned) */
reason=UCNV_UNASSIGNED;
*pErrorCode=U_INVALID_CHAR_FOUND;
} else {
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
reason=UCNV_ILLEGAL;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
}
if(source<sourceLimit) {
/* test the following code unit */
UChar trail=*source;
if(U16_IS_TRAIL(trail)) {
++source;
cp=U16_GET_SUPPLEMENTARY(cp, trail);
/* this codepage does not map supplementary code points */
/* callback(unassigned) */
} else {
/* no more input */
break;
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
}
} else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
reason=UCNV_ILLEGAL;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
}
/* call the callback function with all the preparations and post-processing */
/* get the number of code units for c to correctly advance sourceIndex after the callback call */
length=UTF_CHAR_LENGTH(c);
/* set offsets since the start or the last callback */
if(offsets!=NULL) {
int32_t count=(int32_t)(source-lastSource);
/* do not set the offset for the callback-causing character */
count-=length;
while(count>0) {
*offsets++=sourceIndex++;
--count;
}
/* offset and sourceIndex are now set for the current character */
}
/* update the arguments structure */
pArgs->source=source;
pArgs->target=(char *)target;
pArgs->offsets=offsets;
/* set the converter state in UConverter to deal with the next character */
cnv->fromUSurrogateLead=0;
/* write the code point as code units */
i=0;
UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
cnv->invalidUCharLength=(int8_t)i;
/* i==length */
/* call the callback function */
cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, reason, pErrorCode);
/* get the converter state from UConverter */
c=cnv->fromUSurrogateLead;
/* update target and deal with offsets if necessary */
offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
target=(uint8_t *)pArgs->target;
/* update the source pointer and index */
sourceIndex+=length+(pArgs->source-source);
source=lastSource=pArgs->source;
targetCapacity=(uint8_t *)pArgs->targetLimit-target;
length=sourceLimit-source;
if(length<targetCapacity) {
targetCapacity=length;
}
/*
* If the callback overflowed the target, then we need to
* stop here with an overflow indication.
*/
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
break;
} else if(U_FAILURE(*pErrorCode)) {
/* break on error */
c=0;
break;
} else if(cnv->charErrorBufferLength>0) {
/* target is full */
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
/* no more input */
cnv->fromUSurrogateLead=(UChar)cp;
break;
}
#if LATIN1_UNROLL_FROM_UNICODE
goto unrolled;
#endif
} else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
}
*pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
/* write the code point as code units */
{
int32_t i=0;
U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, i, cp);
cnv->invalidUCharLength=(int8_t)i;
}
break;
}
if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
/* target is full */
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
/* set offsets since the start or the last callback */
/* set offsets since the start */
if(offsets!=NULL) {
size_t count=source-lastSource;
size_t count=target-oldTarget;
while(count>0) {
*offsets++=sourceIndex++;
--count;
}
}
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(c!=0 && U_SUCCESS(*pErrorCode)) {
/* a Unicode code point remains incomplete (only a first surrogate) */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->fromUSurrogateLead=0;
} else {
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead=(UChar)c;
if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
/* target is full */
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
/* write back the updated pointers */
@ -479,23 +392,24 @@ const UConverterSharedData _Latin1Data={
static void
_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
UErrorCode *pErrorCode) {
const uint8_t *source, *sourceLimit, *lastSource;
UChar *target;
const uint8_t *source, *sourceLimit;
UChar *target, *oldTarget;
int32_t targetCapacity, length;
int32_t *offsets;
int32_t sourceIndex;
uint8_t c;
/* set up the local pointers */
source=(const uint8_t *)pArgs->source;
sourceLimit=(const uint8_t *)pArgs->sourceLimit;
target=pArgs->target;
target=oldTarget=pArgs->target;
targetCapacity=pArgs->targetLimit-pArgs->target;
offsets=pArgs->offsets;
/* sourceIndex=-1 if the current character began in the previous buffer */
sourceIndex=0;
lastSource=source;
/*
* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
@ -508,7 +422,6 @@ _ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
#if ASCII_UNROLL_TO_UNICODE
/* unroll the loop with the most common case */
unrolled:
if(targetCapacity>=16) {
int32_t count, loops;
UChar oredChars;
@ -544,7 +457,7 @@ unrolled:
targetCapacity-=16*count;
if(offsets!=NULL) {
lastSource+=16*count;
oldTarget+=16*count;
while(count>0) {
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
@ -569,86 +482,26 @@ unrolled:
#endif
/* conversion loop */
while(targetCapacity>0) {
if((*target++=*source++)<=0x7f) {
--targetCapacity;
} else {
UConverter *cnv;
/* back out the illegal character */
--target;
/* call the callback function with all the preparations and post-processing */
cnv=pArgs->converter;
/* callback(illegal) */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
/* set offsets since the start or the last callback */
if(offsets!=NULL) {
int32_t count=(int32_t)(source-lastSource);
/* predecrement: do not set the offset for the callback-causing character */
while(--count>0) {
*offsets++=sourceIndex++;
}
/* offset and sourceIndex are now set for the current character */
}
/* update the arguments structure */
pArgs->source=(const char *)source;
pArgs->target=target;
pArgs->offsets=offsets;
/* copy the current bytes to invalidCharBuffer */
cnv->invalidCharBuffer[0]=*(source-1);
cnv->invalidCharLength=1;
/* call the callback function */
cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode);
/* update target and deal with offsets if necessary */
offsets=ucnv_updateCallbackOffsets(offsets, pArgs->target-target, sourceIndex);
target=pArgs->target;
/* update the source pointer and index */
sourceIndex+=1+((const uint8_t *)pArgs->source-source);
source=lastSource=(const uint8_t *)pArgs->source;
targetCapacity=pArgs->targetLimit-target;
length=sourceLimit-source;
if(length<targetCapacity) {
targetCapacity=length;
}
/*
* If the callback overflowed the target, then we need to
* stop here with an overflow indication.
*/
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
break;
} else if(U_FAILURE(*pErrorCode)) {
/* break on error */
break;
} else if(cnv->UCharErrorBufferLength>0) {
/* target is full */
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
break;
}
#if ASCII_UNROLL_TO_UNICODE
goto unrolled;
#endif
}
c=0;
while(targetCapacity>0 && (c=*source++)<=0x7f) {
*target++=c;
--targetCapacity;
}
if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
if(c>0x7f) {
/* callback(illegal); copy the current bytes to invalidCharBuffer */
UConverter *cnv=pArgs->converter;
cnv->invalidCharBuffer[0]=c;
cnv->invalidCharLength=1;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
} else if(source<sourceLimit && target>=pArgs->targetLimit) {
/* target is full */
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
/* set offsets since the start or the last callback */
/* set offsets since the start */
if(offsets!=NULL) {
size_t count=source-lastSource;
size_t count=target-oldTarget;
while(count>0) {
*offsets++=sourceIndex++;
--count;

View file

@ -777,21 +777,6 @@ _MBCSUnload(UConverterSharedData *sharedData) {
}
}
static void
_MBCSReset(UConverter *cnv, UConverterResetChoice choice) {
if(choice<=UCNV_RESET_TO_UNICODE) {
/* toUnicode */
cnv->toUnicodeStatus=0; /* offset */
cnv->mode=0; /* state */
cnv->toULength=0; /* byteIndex */
}
if(choice!=UCNV_RESET_TO_UNICODE) {
/* fromUnicode */
cnv->fromUSurrogateLead=0;
cnv->fromUnicodeStatus=1; /* prevLength */
}
}
static void
_MBCSOpen(UConverter *cnv,
const char *name,
@ -822,7 +807,21 @@ _MBCSOpen(UConverter *cnv,
}
}
_MBCSReset(cnv, UCNV_RESET_BOTH);
#if 0
/*
* documentation of UConverter fields used for status
* all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset()
*/
/* toUnicode */
cnv->toUnicodeStatus=0; /* offset */
cnv->mode=0; /* state */
cnv->toULength=0; /* byteIndex */
/* fromUnicode */
cnv->fromUSurrogateLead=0;
cnv->fromUnicodeStatus=1; /* prevLength */
#endif
}
static const char *
@ -1151,21 +1150,10 @@ callback:
}
}
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(byteIndex>0 && U_SUCCESS(*pErrorCode)) {
/* a character byte sequence remains incomplete */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->toUnicodeStatus=0;
cnv->mode=0;
cnv->toULength=0;
} else {
/* set the converter state back into UConverter */
cnv->toUnicodeStatus=offset;
cnv->mode=state;
cnv->toULength=byteIndex;
}
/* set the converter state back into UConverter */
cnv->toUnicodeStatus=offset;
cnv->mode=state;
cnv->toULength=byteIndex;
/* write back the updated pointers */
pArgs->source=(const char *)source;
@ -1622,7 +1610,7 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
* with the rare case of a codepage that maps single surrogates
* without adding the complexity to this already complicated function here.
*/
return ucnv_getNextUCharFromToUImpl(pArgs, _MBCSToUnicodeWithOffsets, TRUE, pErrorCode);
return UCNV_GET_NEXT_UCHAR_USE_TO_U;
} else if(cnv->sharedData->table->mbcs.countStates==1) {
return _MBCSSingleGetNextUChar(pArgs, pErrorCode);
}
@ -2335,7 +2323,7 @@ getTrail:
cnv->fromUnicodeStatus=prevLength; /* save the old state */
value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
if(value<=0xff) {
if(prevLength==1) {
if(prevLength<=1) {
length=1;
} else {
/* change from double-byte mode to single-byte */
@ -2611,36 +2599,34 @@ callback:
}
}
if(pArgs->flush && source>=sourceLimit && U_SUCCESS(*pErrorCode)) {
/* end of input stream */
if(c!=0) {
/* a Unicode code point remains incomplete (only a first surrogate) */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
/* the following may change with Jitterbug 2449: would prepare for callback instead of resetting */
c=0;
prevLength=1;
} else if(outputType==MBCS_OUTPUT_2_SISO && prevLength==2) {
/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
if(targetCapacity>0) {
*target++=(uint8_t)UCNV_SI;
if(offsets!=NULL) {
/* set the last source character's index (sourceIndex points at sourceLimit now) */
*offsets++=prevSourceIndex;
}
} else {
/* target is full */
cnv->charErrorBuffer[0]=(char)UCNV_SI;
cnv->charErrorBufferLength=1;
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
/*
* the end of the input stream and detection of truncated input
* are handled by the framework, but for EBCDIC_STATEFUL conversion
* we need to emit an SI at the very end
*
* conditions:
* successful
* EBCDIC_STATEFUL in DBCS mode
* end of input and no truncated input
*/
if( U_SUCCESS(*pErrorCode) &&
outputType==MBCS_OUTPUT_2_SISO && prevLength==2 &&
pArgs->flush && source>=sourceLimit && c==0
) {
/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
if(targetCapacity>0) {
*target++=(uint8_t)UCNV_SI;
if(offsets!=NULL) {
/* set the last source character's index (sourceIndex points at sourceLimit now) */
*offsets++=prevSourceIndex;
}
prevLength=1; /* we switched into SBCS */
}
/* reset the state for the next conversion */
if(U_SUCCESS(*pErrorCode)) {
c=0;
prevLength=1;
} else {
/* target is full */
cnv->charErrorBuffer[0]=(char)UCNV_SI;
cnv->charErrorBufferLength=1;
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
prevLength=1; /* we switched into SBCS */
}
/* set the converter state back into UConverter */
@ -2892,19 +2878,9 @@ callback:
}
}
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(c!=0 && U_SUCCESS(*pErrorCode)) {
/* a Unicode code point remains incomplete (only a first surrogate) */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->fromUSurrogateLead=0;
cnv->fromUnicodeStatus=1;
} else {
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead=(UChar)c;
cnv->fromUnicodeStatus=prevLength;
}
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead=(UChar)c;
cnv->fromUnicodeStatus=prevLength;
/* write back the updated pointers */
pArgs->source=source;
@ -3106,17 +3082,8 @@ callback:
}
}
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(c!=0 && U_SUCCESS(*pErrorCode)) {
/* a Unicode code point remains incomplete (only a first surrogate) */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->fromUSurrogateLead=0;
} else {
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead=(UChar)c;
}
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead=(UChar)c;
/* write back the updated pointers */
pArgs->source=source;
@ -3389,17 +3356,8 @@ getTrail:
}
}
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(c!=0 && U_SUCCESS(*pErrorCode)) {
/* a Unicode code point remains incomplete (only a first surrogate) */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->fromUSurrogateLead=0;
} else {
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead=(UChar)c;
}
/* set the converter state back into UConverter */
cnv->fromUSurrogateLead=(UChar)c;
/* write back the updated pointers */
pArgs->source=source;
@ -3643,7 +3601,7 @@ _MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
*p++=subchar[0];
break;
case 2:
if(cnv->fromUnicodeStatus==1) {
if(cnv->fromUnicodeStatus<=1) {
/* SBCS mode and DBCS sub char: change to DBCS */
cnv->fromUnicodeStatus=2;
*p++=UCNV_SO;
@ -3688,7 +3646,7 @@ static const UConverterImpl _MBCSImpl={
_MBCSOpen,
NULL,
_MBCSReset,
NULL,
_MBCSToUnicodeWithOffsets,
_MBCSToUnicodeWithOffsets,

View file

@ -272,11 +272,9 @@ _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
* The end of the input or output buffer is also handled by the slower loop.
* The slow loop jumps (goto) to the fast-path loop again as soon as possible.
*
* The callback handling is done by jumping (goto) to the callback section at the end
* of the function. From there, it either jumps to here to continue or to
* the endloop section to clean up and return.
* The callback handling is done by returning with an error code.
* The conversion framework actually calls the callback function.
*/
loop:
if(isSingleByteMode) {
/* fast path for single-byte mode */
if(state==readCommand) {
@ -367,13 +365,20 @@ singleByteMode:
goto fastUnicode;
} else /* Srs */ {
/* callback(illegal) */
cnv->invalidCharBuffer[0]=b;
cnv->invalidCharLength=1;
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
cnv->toUBytes[0]=b;
cnv->toULength=1;
goto endloop;
}
/* store the first byte of a multibyte sequence in toUBytes[] */
cnv->toUBytes[0]=b;
cnv->toULength=1;
break;
case quotePairOne:
byteOne=b;
cnv->toUBytes[1]=b;
cnv->toULength=2;
state=quotePairTwo;
break;
case quotePairTwo:
@ -426,6 +431,8 @@ singleByteMode:
case definePairOne:
dynamicWindow=(int8_t)((b>>5)&7);
byteOne=(uint8_t)(b&0x1f);
cnv->toUBytes[1]=b;
cnv->toULength=2;
state=definePairTwo;
break;
case definePairTwo:
@ -436,10 +443,9 @@ singleByteMode:
case defineOne:
if(b==0) {
/* callback(illegal): Reserved window offset value 0 */
cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
cnv->invalidCharBuffer[1]=b;
cnv->invalidCharLength=2;
goto callback;
cnv->toUBytes[1]=b;
cnv->toULength=2;
goto endloop;
} else if(b<gapThreshold) {
scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
} else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
@ -448,10 +454,9 @@ singleByteMode:
scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
} else {
/* callback(illegal): Reserved window offset value 0xa8..0xf8 */
cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
cnv->invalidCharBuffer[1]=b;
cnv->invalidCharLength=2;
goto callback;
cnv->toUBytes[1]=b;
cnv->toULength=2;
goto endloop;
}
sourceIndex=nextSourceIndex;
state=readCommand;
@ -487,6 +492,8 @@ fastUnicode:
case readCommand:
if((uint8_t)(b-UC0)>(Urs-UC0)) {
byteOne=b;
cnv->toUBytes[0]=b;
cnv->toULength=1;
state=quotePairTwo;
} else if(/* UC0<=b && */ b<=UC7) {
dynamicWindow=(int8_t)(b-UC0);
@ -496,23 +503,32 @@ fastUnicode:
} else if(/* UD0<=b && */ b<=UD7) {
dynamicWindow=(int8_t)(b-UD0);
isSingleByteMode=TRUE;
cnv->toUBytes[0]=b;
cnv->toULength=1;
state=defineOne;
goto singleByteMode;
} else if(b==UDX) {
isSingleByteMode=TRUE;
cnv->toUBytes[0]=b;
cnv->toULength=1;
state=definePairOne;
goto singleByteMode;
} else if(b==UQU) {
cnv->toUBytes[0]=b;
cnv->toULength=1;
state=quotePairOne;
} else /* Urs */ {
/* callback(illegal) */
cnv->invalidCharBuffer[0]=b;
cnv->invalidCharLength=1;
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
cnv->toUBytes[0]=b;
cnv->toULength=1;
goto endloop;
}
break;
case quotePairOne:
byteOne=b;
cnv->toUBytes[1]=b;
cnv->toULength=2;
state=quotePairTwo;
break;
case quotePairTwo:
@ -528,80 +544,35 @@ fastUnicode:
}
endloop:
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(state!=readCommand && U_SUCCESS(*pErrorCode)) {
/* a character byte sequence remains incomplete */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
/* copy the input sequence into the error buffer */
int8_t i;
for(i=0; i<cnv->toULength; ++i) {
cnv->invalidCharBuffer[i]=(char)cnv->toUBytes[i];
}
_SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
} else {
/* set the converter state back into UConverter */
scsu->toUIsSingleByteMode=isSingleByteMode;
scsu->toUState=state;
scsu->toUQuoteWindow=quoteWindow;
scsu->toUDynamicWindow=dynamicWindow;
scsu->toUByteOne=byteOne;
cnv->invalidCharLength=i;
/* reset to deal with the next character */
state=readCommand;
}
finish:
/* set the converter state back into UConverter */
if(state==readCommand) {
/* not in a multi-byte sequence, reset toULength */
cnv->toULength=0;
}
scsu->toUIsSingleByteMode=isSingleByteMode;
scsu->toUState=state;
scsu->toUQuoteWindow=quoteWindow;
scsu->toUDynamicWindow=dynamicWindow;
scsu->toUByteOne=byteOne;
/* write back the updated pointers */
pArgs->source=(const char *)source;
pArgs->target=target;
pArgs->offsets=offsets;
return;
callback:
/* call the callback function with all the preparations and post-processing */
/* update the arguments structure */
pArgs->source=(const char *)source;
pArgs->target=target;
pArgs->offsets=offsets;
/* the current bytes were copied to invalidCharBuffer before the goto callback jump */
/* set the converter state in UConverter to deal with the next character */
scsu->toUIsSingleByteMode=isSingleByteMode;
scsu->toUState=readCommand;
scsu->toUQuoteWindow=quoteWindow;
scsu->toUDynamicWindow=dynamicWindow;
scsu->toUByteOne=0;
/* call the callback function */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, cnv->invalidCharLength, UCNV_ILLEGAL, pErrorCode);
/* get the converter state from UConverter */
isSingleByteMode=scsu->toUIsSingleByteMode;
state=scsu->toUState;
quoteWindow=scsu->toUQuoteWindow;
dynamicWindow=scsu->toUDynamicWindow;
byteOne=scsu->toUByteOne;
/* update target and deal with offsets if necessary */
offsets=ucnv_updateCallbackOffsets(offsets, (int32_t)(pArgs->target-target), sourceIndex);
target=pArgs->target;
/* update the source pointer and index */
sourceIndex=(int32_t)(nextSourceIndex+((const uint8_t *)pArgs->source-source));
source=(const uint8_t *)pArgs->source;
/*
* If the callback overflowed the target, then we need to
* stop here with an overflow indication.
*/
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
goto endloop;
} else if(cnv->UCharErrorBufferLength>0) {
/* target is full */
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
goto endloop;
} else if(U_FAILURE(*pErrorCode)) {
/* break on error */
_SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
goto finish;
} else {
goto loop;
}
}
/*
@ -619,7 +590,6 @@ _SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
const uint8_t *source, *sourceLimit;
UChar *target;
const UChar *targetLimit;
UBool isSingleByteMode;
uint8_t state, byteOne;
int8_t quoteWindow, dynamicWindow;
@ -658,11 +628,9 @@ _SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
* The end of the input or output buffer is also handled by the slower loop.
* The slow loop jumps (goto) to the fast-path loop again as soon as possible.
*
* The callback handling is done by jumping (goto) to the callback section at the end
* of the function. From there, it either jumps to here to continue or to
* the endloop section to clean up and return.
* The callback handling is done by returning with an error code.
* The conversion framework actually calls the callback function.
*/
loop:
if(isSingleByteMode) {
/* fast path for single-byte mode */
if(state==readCommand) {
@ -731,13 +699,20 @@ singleByteMode:
goto fastUnicode;
} else /* Srs */ {
/* callback(illegal) */
cnv->invalidCharBuffer[0]=b;
cnv->invalidCharLength=1;
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
cnv->toUBytes[0]=b;
cnv->toULength=1;
goto endloop;
}
/* store the first byte of a multibyte sequence in toUBytes[] */
cnv->toUBytes[0]=b;
cnv->toULength=1;
break;
case quotePairOne:
byteOne=b;
cnv->toUBytes[1]=b;
cnv->toULength=2;
state=quotePairTwo;
break;
case quotePairTwo:
@ -772,6 +747,8 @@ singleByteMode:
case definePairOne:
dynamicWindow=(int8_t)((b>>5)&7);
byteOne=(uint8_t)(b&0x1f);
cnv->toUBytes[1]=b;
cnv->toULength=2;
state=definePairTwo;
break;
case definePairTwo:
@ -781,10 +758,9 @@ singleByteMode:
case defineOne:
if(b==0) {
/* callback(illegal): Reserved window offset value 0 */
cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
cnv->invalidCharBuffer[1]=b;
cnv->invalidCharLength=2;
goto callback;
cnv->toUBytes[1]=b;
cnv->toULength=2;
goto endloop;
} else if(b<gapThreshold) {
scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
} else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
@ -793,10 +769,9 @@ singleByteMode:
scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
} else {
/* callback(illegal): Reserved window offset value 0xa8..0xf8 */
cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
cnv->invalidCharBuffer[1]=b;
cnv->invalidCharLength=2;
goto callback;
cnv->toUBytes[1]=b;
cnv->toULength=2;
goto endloop;
}
state=readCommand;
goto fastSingle;
@ -825,6 +800,8 @@ fastUnicode:
case readCommand:
if((uint8_t)(b-UC0)>(Urs-UC0)) {
byteOne=b;
cnv->toUBytes[0]=b;
cnv->toULength=1;
state=quotePairTwo;
} else if(/* UC0<=b && */ b<=UC7) {
dynamicWindow=(int8_t)(b-UC0);
@ -833,23 +810,32 @@ fastUnicode:
} else if(/* UD0<=b && */ b<=UD7) {
dynamicWindow=(int8_t)(b-UD0);
isSingleByteMode=TRUE;
cnv->toUBytes[0]=b;
cnv->toULength=1;
state=defineOne;
goto singleByteMode;
} else if(b==UDX) {
isSingleByteMode=TRUE;
cnv->toUBytes[0]=b;
cnv->toULength=1;
state=definePairOne;
goto singleByteMode;
} else if(b==UQU) {
cnv->toUBytes[0]=b;
cnv->toULength=1;
state=quotePairOne;
} else /* Urs */ {
/* callback(illegal) */
cnv->invalidCharBuffer[0]=b;
cnv->invalidCharLength=1;
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
cnv->toUBytes[0]=b;
cnv->toULength=1;
goto endloop;
}
break;
case quotePairOne:
byteOne=b;
cnv->toUBytes[1]=b;
cnv->toULength=2;
state=quotePairTwo;
break;
case quotePairTwo:
@ -861,80 +847,34 @@ fastUnicode:
}
endloop:
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(state!=readCommand && U_SUCCESS(*pErrorCode)) {
/* a character byte sequence remains incomplete */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
/* copy the input sequence into the error buffer */
int8_t i;
for(i=0; i<cnv->toULength; ++i) {
cnv->invalidCharBuffer[i]=(char)cnv->toUBytes[i];
}
_SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
} else {
/* set the converter state back into UConverter */
scsu->toUIsSingleByteMode=isSingleByteMode;
scsu->toUState=state;
scsu->toUQuoteWindow=quoteWindow;
scsu->toUDynamicWindow=dynamicWindow;
scsu->toUByteOne=byteOne;
cnv->invalidCharLength=i;
/* reset to deal with the next character */
state=readCommand;
}
finish:
/* set the converter state back into UConverter */
if(state==readCommand) {
/* not in a multi-byte sequence, reset toULength */
cnv->toULength=0;
}
scsu->toUIsSingleByteMode=isSingleByteMode;
scsu->toUState=state;
scsu->toUQuoteWindow=quoteWindow;
scsu->toUDynamicWindow=dynamicWindow;
scsu->toUByteOne=byteOne;
/* write back the updated pointers */
pArgs->source=(const char *)source;
pArgs->target=target;
return;
callback:
/* call the callback function with all the preparations and post-processing */
/* update the arguments structure */
pArgs->source=(const char *)source;
pArgs->target=target;
/* the current bytes were copied to invalidCharBuffer before the goto callback jump */
/* set the converter state in UConverter to deal with the next character */
scsu->toUIsSingleByteMode=isSingleByteMode;
scsu->toUState=readCommand;
scsu->toUQuoteWindow=quoteWindow;
scsu->toUDynamicWindow=dynamicWindow;
scsu->toUByteOne=0;
/* call the callback function */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, cnv->invalidCharLength, UCNV_ILLEGAL, pErrorCode);
/* get the converter state from UConverter */
isSingleByteMode=scsu->toUIsSingleByteMode;
state=scsu->toUState;
quoteWindow=scsu->toUQuoteWindow;
dynamicWindow=scsu->toUDynamicWindow;
byteOne=scsu->toUByteOne;
target=pArgs->target;
source=(const uint8_t *)pArgs->source;
/*
* If the callback overflowed the target, then we need to
* stop here with an overflow indication.
*/
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
goto endloop;
} else if(cnv->UCharErrorBufferLength>0) {
/* target is full */
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
goto endloop;
} else if(U_FAILURE(*pErrorCode)) {
/* break on error */
_SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
goto finish;
} else {
goto loop;
}
}
static UChar32
_SCSUGetNextUChar(UConverterToUnicodeArgs *pArgs,
UErrorCode *pErrorCode) {
return ucnv_getNextUCharFromToUImpl(pArgs, _SCSUToUnicode, TRUE, pErrorCode);
}
/* SCSU-from-Unicode conversion functions ----------------------------------- */
@ -1095,7 +1035,6 @@ _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
int32_t sourceIndex, nextSourceIndex;
uint32_t i;
int32_t length;
/* variables for compression heuristics */
@ -1188,7 +1127,8 @@ getTrailSingle:
} else {
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
goto endloop;
}
} else {
/* no more input */
@ -1197,7 +1137,8 @@ getTrailSingle:
} else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
goto endloop;
}
/* compress supplementary character U+10000..U+10ffff */
@ -1383,7 +1324,8 @@ getTrailUnicode:
} else {
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
goto endloop;
}
} else {
/* no more input */
@ -1392,7 +1334,8 @@ getTrailUnicode:
} else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
goto endloop;
}
/* compress supplementary character */
@ -1443,22 +1386,19 @@ getTrailUnicode:
}
endloop:
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(c!=0 && U_SUCCESS(*pErrorCode)) {
/* a character byte sequence remains incomplete */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
_SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
} else {
/* set the converter state back into UConverter */
scsu->fromUIsSingleByteMode=isSingleByteMode;
scsu->fromUDynamicWindow=dynamicWindow;
cnv->fromUSurrogateLead=(UChar)c;
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
/* c is an unpaired surrogate */
cnv->invalidUCharBuffer[0]=(UChar)c;
cnv->invalidUCharLength=1;
c=0;
}
finish:
/* set the converter state back into UConverter */
scsu->fromUIsSingleByteMode=isSingleByteMode;
scsu->fromUDynamicWindow=dynamicWindow;
cnv->fromUSurrogateLead=(UChar)c;
/* write back the updated pointers */
pArgs->source=source;
pArgs->target=(char *)target;
@ -1566,59 +1506,6 @@ outputBytes:
c=0;
goto endloop;
}
callback:
/* call the callback function with all the preparations and post-processing */
/* update the arguments structure */
pArgs->source=source;
pArgs->target=(char *)target;
pArgs->offsets=offsets;
/* set the converter state in UConverter to deal with the next character */
scsu->fromUIsSingleByteMode=isSingleByteMode;
scsu->fromUDynamicWindow=dynamicWindow;
cnv->fromUSurrogateLead=0;
/* write the code point as code units */
i=0;
UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
cnv->invalidUCharLength=(int8_t)i;
/* call the callback function */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, UCNV_ILLEGAL, pErrorCode);
/* get the converter state from UConverter */
isSingleByteMode=scsu->fromUIsSingleByteMode;
dynamicWindow=scsu->fromUDynamicWindow;
currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
c=cnv->fromUSurrogateLead;
/* update target and deal with offsets if necessary */
offsets=ucnv_updateCallbackOffsets(offsets, (int32_t)(((uint8_t *)pArgs->target)-target), sourceIndex);
target=(uint8_t *)pArgs->target;
/* update the source pointer and index */
sourceIndex=(int32_t)(nextSourceIndex+(pArgs->source-source));
source=pArgs->source;
targetCapacity=(int32_t)((uint8_t *)pArgs->targetLimit-target);
/*
* If the callback overflowed the target, then we need to
* stop here with an overflow indication.
*/
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
goto endloop;
} else if(cnv->charErrorBufferLength>0) {
/* target is full */
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
goto endloop;
} else if(U_FAILURE(*pErrorCode)) {
/* break on error */
_SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
goto finish;
} else {
goto loop;
}
}
/*
@ -1643,7 +1530,6 @@ _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
uint32_t c, delta;
uint32_t i;
int32_t length;
/* variables for compression heuristics */
@ -1720,7 +1606,8 @@ getTrailSingle:
} else {
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
goto endloop;
}
} else {
/* no more input */
@ -1729,7 +1616,8 @@ getTrailSingle:
} else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
goto endloop;
}
/* compress supplementary character U+10000..U+10ffff */
@ -1902,7 +1790,8 @@ getTrailUnicode:
} else {
/* this is an unmatched lead code unit (1st surrogate) */
/* callback(illegal) */
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
goto endloop;
}
} else {
/* no more input */
@ -1911,7 +1800,8 @@ getTrailUnicode:
} else {
/* this is an unmatched trail code unit (2nd surrogate) */
/* callback(illegal) */
goto callback;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
goto endloop;
}
/* compress supplementary character */
@ -1961,22 +1851,19 @@ getTrailUnicode:
}
endloop:
if(pArgs->flush && source>=sourceLimit) {
/* reset the state for the next conversion */
if(c!=0 && U_SUCCESS(*pErrorCode)) {
/* a character byte sequence remains incomplete */
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
_SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
} else {
/* set the converter state back into UConverter */
scsu->fromUIsSingleByteMode=isSingleByteMode;
scsu->fromUDynamicWindow=dynamicWindow;
cnv->fromUSurrogateLead=(UChar)c;
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
/* c is an unpaired surrogate */
cnv->invalidUCharBuffer[0]=(UChar)c;
cnv->invalidUCharLength=1;
c=0;
}
finish:
/* set the converter state back into UConverter */
scsu->fromUIsSingleByteMode=isSingleByteMode;
scsu->fromUDynamicWindow=dynamicWindow;
cnv->fromUSurrogateLead=(UChar)c;
/* write back the updated pointers */
pArgs->source=source;
pArgs->target=(char *)target;
@ -2052,54 +1939,6 @@ outputBytes:
c=0;
goto endloop;
}
callback:
/* call the callback function with all the preparations and post-processing */
/* update the arguments structure */
pArgs->source=source;
pArgs->target=(char *)target;
/* set the converter state in UConverter to deal with the next character */
scsu->fromUIsSingleByteMode=isSingleByteMode;
scsu->fromUDynamicWindow=dynamicWindow;
cnv->fromUSurrogateLead=0;
/* write the code point as code units */
i=0;
UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
cnv->invalidUCharLength=(int8_t)i;
/* call the callback function */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, UCNV_ILLEGAL, pErrorCode);
/* get the converter state from UConverter */
isSingleByteMode=scsu->fromUIsSingleByteMode;
dynamicWindow=scsu->fromUDynamicWindow;
currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
c=cnv->fromUSurrogateLead;
target=(uint8_t *)pArgs->target;
source=pArgs->source;
targetCapacity=(int32_t)((uint8_t *)pArgs->targetLimit-target);
/*
* If the callback overflowed the target, then we need to
* stop here with an overflow indication.
*/
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
goto endloop;
} else if(cnv->charErrorBufferLength>0) {
/* target is full */
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
goto endloop;
} else if(U_FAILURE(*pErrorCode)) {
/* break on error */
_SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
goto finish;
} else {
goto loop;
}
}
/* miscellaneous ------------------------------------------------------------ */
@ -2194,7 +2033,7 @@ static const UConverterImpl _SCSUImpl={
_SCSUToUnicodeWithOffsets,
_SCSUFromUnicode,
_SCSUFromUnicodeWithOffsets,
_SCSUGetNextUChar,
NULL,
NULL,
_SCSUGetName,