mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 01:11:02 +00:00
ICU-2449 new semantics of truncated sequences; move callback/truncated handling into ucnv.c API functions
X-SVN-Rev: 12649
This commit is contained in:
parent
305d4724a3
commit
39a2aed516
17 changed files with 1277 additions and 1448 deletions
File diff suppressed because it is too large
Load diff
|
@ -356,7 +356,7 @@ setInitialStateToUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConver
|
|||
static void
|
||||
setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData);
|
||||
|
||||
/*************** Converter implemenations ******************/
|
||||
/*************** Converter implementations ******************/
|
||||
static const UConverterImpl _ISO2022Impl={
|
||||
UCNV_ISO_2022,
|
||||
|
||||
|
@ -1020,6 +1020,12 @@ T_UConverter_toUnicode_ISO_2022(UConverterToUnicodeArgs *args,
|
|||
saveThis = args->converter;
|
||||
args->offsets = NULL;
|
||||
args->converter = myData->currentConverter;
|
||||
/*
|
||||
* ### TODO this does not maintain overflow and error buffers between
|
||||
* the sub-converter and this one;
|
||||
* idea: just copy those parts of the sub-UConverter into the 2022 UConverter
|
||||
* after ucnv_toUnicode()
|
||||
*/
|
||||
ucnv_toUnicode(args->converter,
|
||||
&args->target,
|
||||
args->targetLimit,
|
||||
|
@ -1079,10 +1085,6 @@ T_UConverter_toUnicode_ISO_2022(UConverterToUnicodeArgs *args,
|
|||
}
|
||||
|
||||
myData->isFirstBuffer=FALSE;
|
||||
if( (args->source == args->sourceLimit) && args->flush){
|
||||
_ISO2022Reset(args->converter,UCNV_RESET_FROM_UNICODE);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1187,9 +1189,6 @@ T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
|
|||
myOffset += args->source - sourceStart;
|
||||
|
||||
}
|
||||
if( (args->source == args->sourceLimit) && args->flush){
|
||||
_ISO2022Reset(args->converter,UCNV_RESET_TO_UNICODE);
|
||||
}
|
||||
}
|
||||
|
||||
static UCNV_TableStates_2022
|
||||
|
@ -1758,20 +1757,6 @@ getTrail:
|
|||
|
||||
}/* end while(mySourceIndex<mySourceLength) */
|
||||
|
||||
|
||||
/*If at the end of conversion we are still carrying state information
|
||||
*flush is TRUE, we can deduce that the input stream is truncated
|
||||
*/
|
||||
if (args->converter->fromUSurrogateLead !=0 && (source == sourceLimit) && args->flush){
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
/* Reset the state of converter if we consumed
|
||||
* the source and flush is true
|
||||
*/
|
||||
if( (source == sourceLimit) && args->flush){
|
||||
setInitialStateFromUnicodeJPCN(args->converter,converterData);
|
||||
}
|
||||
|
||||
/*save the state and return */
|
||||
args->source = source;
|
||||
args->target = (char*)target;
|
||||
|
@ -2009,19 +1994,6 @@ CALLBACK:
|
|||
break;
|
||||
}
|
||||
}
|
||||
if((args->flush==TRUE)
|
||||
&& (mySource == mySourceLimit)
|
||||
&& ( *toUnicodeStatus!=0x00)){
|
||||
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
*toUnicodeStatus= 0x00;
|
||||
}
|
||||
/* Reset the state of converter if we consumed
|
||||
* the source and flush is true
|
||||
*/
|
||||
if( (mySource == mySourceLimit) && args->flush){
|
||||
setInitialStateToUnicodeJPCN(args->converter,myData);
|
||||
}
|
||||
args->target = myTarget;
|
||||
args->source = mySource;
|
||||
}
|
||||
|
@ -2223,20 +2195,6 @@ getTrail:
|
|||
|
||||
}/* end while(mySourceIndex<mySourceLength) */
|
||||
|
||||
|
||||
/*If at the end of conversion we are still carrying state information
|
||||
*flush is TRUE, we can deduce that the input stream is truncated
|
||||
*/
|
||||
if (args->converter->fromUSurrogateLead !=0 && (source == sourceLimit) && args->flush){
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
/* Reset the state of converter if we consumed
|
||||
* the source and flush is true
|
||||
*/
|
||||
if( (source == sourceLimit) && args->flush){
|
||||
setInitialStateFromUnicodeKR(args->converter,converterData);
|
||||
}
|
||||
|
||||
/*save the state and return */
|
||||
args->source = source;
|
||||
args->target = (char*)target;
|
||||
|
@ -2404,19 +2362,6 @@ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
|
|||
break;
|
||||
}
|
||||
}
|
||||
if((args->flush==TRUE)
|
||||
&& (mySource == mySourceLimit)
|
||||
&& ( args->converter->toUnicodeStatus !=0x00)){
|
||||
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = 0x00;
|
||||
}
|
||||
/* Reset the state of converter if we consumed
|
||||
* the source and flush is true
|
||||
*/
|
||||
if( (mySource == mySourceLimit) && args->flush){
|
||||
setInitialStateToUnicodeKR(args->converter,myData);
|
||||
}
|
||||
args->target = myTarget;
|
||||
args->source = mySource;
|
||||
}
|
||||
|
@ -2833,20 +2778,6 @@ callback:
|
|||
|
||||
}/* end while(mySourceIndex<mySourceLength) */
|
||||
|
||||
|
||||
/*If at the end of conversion we are still carrying state information
|
||||
*flush is TRUE, we can deduce that the input stream is truncated
|
||||
*/
|
||||
if (args->converter->fromUSurrogateLead !=0 && (source == sourceLimit) && args->flush){
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
/* Reset the state of converter if we consumed
|
||||
* the source and flush is true
|
||||
*/
|
||||
if( (source == sourceLimit) && args->flush){
|
||||
setInitialStateFromUnicodeJPCN(args->converter,converterData);
|
||||
}
|
||||
|
||||
/*save the state and return */
|
||||
args->source = source;
|
||||
args->target = (char*)target;
|
||||
|
@ -3234,19 +3165,6 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
|
|||
break;
|
||||
}
|
||||
}
|
||||
if((args->flush==TRUE)
|
||||
&& (mySource == mySourceLimit)
|
||||
&& ( args->converter->toUnicodeStatus !=0x00)){
|
||||
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = 0x00;
|
||||
}
|
||||
/* Reset the state of converter if we consumed
|
||||
* the source and flush is true
|
||||
*/
|
||||
if( (mySource == mySourceLimit) && args->flush){
|
||||
setInitialStateToUnicodeJPCN(args->converter,myData);
|
||||
}
|
||||
args->target = myTarget;
|
||||
args->source = mySource;
|
||||
}
|
||||
|
|
|
@ -768,7 +768,6 @@ ucnv_createConverterFromSharedData(UConverter *myUConverter,
|
|||
myUConverter->isExtraLocal = FALSE;
|
||||
myUConverter->sharedData = mySharedConverterData;
|
||||
myUConverter->options = options;
|
||||
myUConverter->mode = UCNV_SI;
|
||||
myUConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE;
|
||||
myUConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE;
|
||||
myUConverter->toUnicodeStatus = myUConverter->sharedData->toUnicodeStatus;
|
||||
|
|
|
@ -25,8 +25,12 @@
|
|||
/* size of the overflow buffers in UConverter, enough for escaping callbacks */
|
||||
#define UCNV_ERROR_BUFFER_LENGTH 32
|
||||
|
||||
/* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */
|
||||
#define UCNV_MAX_SUBCHAR_LEN 4
|
||||
|
||||
/* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */
|
||||
#define UCNV_MAX_CHAR_LEN 8
|
||||
|
||||
/* converter options bits */
|
||||
#define UCNV_OPTION_VERSION 0xf
|
||||
#define UCNV_OPTION_SWAP_LFNL 0x10
|
||||
|
@ -140,7 +144,7 @@ struct UConverter {
|
|||
|
||||
UBool useFallback;
|
||||
int8_t toULength; /* number of bytes in toUBytes */
|
||||
uint8_t toUBytes[7]; /* more "toU status"; keeps the bytes of the current character */
|
||||
uint8_t toUBytes[UCNV_MAX_CHAR_LEN-1];/* more "toU status"; keeps the bytes of the current character */
|
||||
uint32_t toUnicodeStatus; /* Used to internalize stream status information */
|
||||
int32_t mode;
|
||||
uint32_t fromUnicodeStatus;
|
||||
|
@ -155,12 +159,11 @@ struct UConverter {
|
|||
|
||||
uint8_t subChar1; /* single-byte substitution character if different from subChar */
|
||||
uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* codepage specific character sequence */
|
||||
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN]; /* bytes from last error/callback situation */
|
||||
char invalidCharBuffer[UCNV_MAX_CHAR_LEN]; /* bytes from last error/callback situation */
|
||||
uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */
|
||||
|
||||
UChar invalidUCharBuffer[3]; /* UChars from last error/callback situation */
|
||||
UChar invalidUCharBuffer[U16_MAX_LENGTH]; /* UChars from last error/callback situation */
|
||||
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */
|
||||
|
||||
};
|
||||
|
||||
U_CDECL_END /* end of UConverter */
|
||||
|
|
|
@ -22,99 +22,6 @@
|
|||
#include "ucnv_cnv.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
/*Empties the internal unicode output buffer */
|
||||
void ucnv_flushInternalUnicodeBuffer (UConverter * _this,
|
||||
UChar * myTarget,
|
||||
int32_t * myTargetIndex,
|
||||
int32_t targetLength,
|
||||
int32_t** offsets,
|
||||
UErrorCode * err)
|
||||
{
|
||||
int32_t myUCharErrorBufferLength = _this->UCharErrorBufferLength;
|
||||
|
||||
if (myUCharErrorBufferLength <= targetLength)
|
||||
{
|
||||
/*we have enough space
|
||||
*So we just copy the whole Error Buffer in to the output stream
|
||||
*/
|
||||
uprv_memcpy (myTarget,
|
||||
_this->UCharErrorBuffer,
|
||||
sizeof (UChar) * myUCharErrorBufferLength);
|
||||
if (offsets)
|
||||
{
|
||||
int32_t i=0;
|
||||
for (i=0; i<myUCharErrorBufferLength;i++) (*offsets)[i] = -1;
|
||||
*offsets += myUCharErrorBufferLength;
|
||||
}
|
||||
*myTargetIndex += myUCharErrorBufferLength;
|
||||
_this->UCharErrorBufferLength = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We don't have enough space so we copy as much as we can
|
||||
* on the output stream and update the object
|
||||
* by updating the internal buffer*/
|
||||
uprv_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * targetLength);
|
||||
if (offsets)
|
||||
{
|
||||
int32_t i=0;
|
||||
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
|
||||
*offsets += targetLength;
|
||||
}
|
||||
uprv_memmove (_this->UCharErrorBuffer,
|
||||
_this->UCharErrorBuffer + targetLength,
|
||||
sizeof (UChar) * (myUCharErrorBufferLength - targetLength));
|
||||
_this->UCharErrorBufferLength -= (int8_t) targetLength;
|
||||
*myTargetIndex = targetLength;
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/*Empties the internal codepage output buffer */
|
||||
void ucnv_flushInternalCharBuffer (UConverter * _this,
|
||||
char *myTarget,
|
||||
int32_t * myTargetIndex,
|
||||
int32_t targetLength,
|
||||
int32_t** offsets,
|
||||
UErrorCode * err)
|
||||
{
|
||||
int32_t myCharErrorBufferLength = _this->charErrorBufferLength;
|
||||
|
||||
/*we have enough space */
|
||||
if (myCharErrorBufferLength <= targetLength)
|
||||
{
|
||||
uprv_memcpy (myTarget, _this->charErrorBuffer, myCharErrorBufferLength);
|
||||
if (offsets)
|
||||
{
|
||||
int32_t i=0;
|
||||
for (i=0; i<myCharErrorBufferLength;i++) (*offsets)[i] = -1;
|
||||
*offsets += myCharErrorBufferLength;
|
||||
}
|
||||
|
||||
*myTargetIndex += myCharErrorBufferLength;
|
||||
_this->charErrorBufferLength = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We don't have enough space so we copy as much as we can
|
||||
* on the output stream and update the object
|
||||
*/
|
||||
uprv_memcpy (myTarget, _this->charErrorBuffer, targetLength);
|
||||
if (offsets)
|
||||
{
|
||||
int32_t i=0;
|
||||
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
|
||||
*offsets += targetLength;
|
||||
}
|
||||
uprv_memmove (_this->charErrorBuffer,
|
||||
_this->charErrorBuffer + targetLength,
|
||||
(myCharErrorBufferLength - targetLength));
|
||||
_this->charErrorBufferLength -= (int8_t) targetLength;
|
||||
*myTargetIndex = targetLength;
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This function is useful for implementations of getNextUChar().
|
||||
* After a call to a callback function or to toUnicode(), an output buffer
|
||||
|
@ -193,54 +100,6 @@ ucnv_updateCallbackOffsets(int32_t *offsets, int32_t length, int32_t sourceIndex
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a simple implementation of ucnv_getNextUChar() that uses the
|
||||
* converter's toUnicode() function. See ucnv_cnv.h for details.
|
||||
*/
|
||||
U_CFUNC UChar32
|
||||
ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
|
||||
T_ToUnicodeFunction toU,
|
||||
UBool collectPairs,
|
||||
UErrorCode *pErrorCode) {
|
||||
UChar buffer[UTF_MAX_CHAR_LENGTH];
|
||||
const char *realLimit=pArgs->sourceLimit;
|
||||
|
||||
pArgs->target=buffer;
|
||||
pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
|
||||
|
||||
while(pArgs->source<realLimit) {
|
||||
/* feed in one byte at a time to make sure to get only one character out */
|
||||
pArgs->sourceLimit=pArgs->source+1;
|
||||
pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
|
||||
|
||||
/* convert this byte and check the result */
|
||||
toU(pArgs, pErrorCode);
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
int32_t length=(int32_t)(pArgs->target-buffer);
|
||||
|
||||
/* this test is UTF-16 specific */
|
||||
if(/* some output and
|
||||
(source consumed or don't collect surrogate pairs or not a surrogate or a surrogate pair) */
|
||||
length>0 &&
|
||||
(pArgs->flush || !collectPairs || !UTF_IS_FIRST_SURROGATE(buffer[0]) || length==2)
|
||||
) {
|
||||
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, length);
|
||||
}
|
||||
/* else continue with the loop */
|
||||
} else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, UTF_MAX_CHAR_LENGTH);
|
||||
} else {
|
||||
/* U_FAILURE() */
|
||||
return 0xffff;
|
||||
}
|
||||
}
|
||||
|
||||
/* no output because of empty input or only state changes and skipping callbacks */
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_getCompleteUnicodeSet(const UConverter *cnv,
|
||||
USet *set,
|
||||
|
|
|
@ -59,11 +59,11 @@ typedef enum UConverterResetChoice {
|
|||
|
||||
typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice);
|
||||
|
||||
typedef void (*T_ToUnicodeFunction) (UConverterToUnicodeArgs *, UErrorCode *);
|
||||
typedef void (*UConverterToUnicode) (UConverterToUnicodeArgs *, UErrorCode *);
|
||||
|
||||
typedef void (*T_FromUnicodeFunction) (UConverterFromUnicodeArgs *, UErrorCode *);
|
||||
typedef void (*UConverterFromUnicode) (UConverterFromUnicodeArgs *, UErrorCode *);
|
||||
|
||||
typedef UChar32 (*T_GetNextUCharFunction) (UConverterToUnicodeArgs *, UErrorCode *);
|
||||
typedef UChar32 (*UConverterGetNextUChar) (UConverterToUnicodeArgs *, UErrorCode *);
|
||||
|
||||
typedef void (*UConverterGetStarters)(const UConverter* converter,
|
||||
UBool starters[256],
|
||||
|
@ -116,20 +116,6 @@ typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv,
|
|||
|
||||
UBool CONVERSION_U_SUCCESS (UErrorCode err);
|
||||
|
||||
void ucnv_flushInternalUnicodeBuffer (UConverter * _this,
|
||||
UChar * myTarget,
|
||||
int32_t * myTargetIndex,
|
||||
int32_t targetLength,
|
||||
int32_t** offsets,
|
||||
UErrorCode * err);
|
||||
|
||||
void ucnv_flushInternalCharBuffer (UConverter * _this,
|
||||
char *myTarget,
|
||||
int32_t * myTargetIndex,
|
||||
int32_t targetLength,
|
||||
int32_t** offsets,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* UConverterImpl contains all the data and functions for a converter type.
|
||||
* Its function pointers work much like a C++ vtable.
|
||||
|
@ -156,11 +142,11 @@ struct UConverterImpl {
|
|||
UConverterClose close;
|
||||
UConverterReset reset;
|
||||
|
||||
T_ToUnicodeFunction toUnicode;
|
||||
T_ToUnicodeFunction toUnicodeWithOffsets;
|
||||
T_FromUnicodeFunction fromUnicode;
|
||||
T_FromUnicodeFunction fromUnicodeWithOffsets;
|
||||
T_GetNextUCharFunction getNextUChar;
|
||||
UConverterToUnicode toUnicode;
|
||||
UConverterToUnicode toUnicodeWithOffsets;
|
||||
UConverterFromUnicode fromUnicode;
|
||||
UConverterFromUnicode fromUnicodeWithOffsets;
|
||||
UConverterGetNextUChar getNextUChar;
|
||||
|
||||
UConverterGetStarters getStarters;
|
||||
UConverterGetName getName;
|
||||
|
@ -224,30 +210,12 @@ ucnv_updateCallbackOffsets(int32_t *offsets, int32_t length, int32_t sourceIndex
|
|||
#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c)
|
||||
|
||||
/**
|
||||
* This is a simple implementation of ucnv_getNextUChar() that uses the
|
||||
* converter's toUnicode() function.
|
||||
*
|
||||
* \par
|
||||
* A surrogate pair from a single byte sequence is always
|
||||
* combined to a supplementary code point.
|
||||
* A surrogate pair from consecutive byte sequences is only combined
|
||||
* if collectPairs is set. This is necessary for SCSU
|
||||
* but not allowed for most legacy codepages.
|
||||
*
|
||||
* @param pArgs The argument structure supplied by ucnv_getNextUChar()
|
||||
* @param toU A function pointer to the converter's toUnicode() function
|
||||
* @param collectPairs indicates whether separate surrogate results from
|
||||
* consecutive byte sequences should be combined into
|
||||
* a single code point
|
||||
* @param pErrorCode An ICU error code parameter
|
||||
* @return The Unicode code point as a result of a conversion of a minimal
|
||||
* number of input bytes
|
||||
* Magic number for ucnv_getNextUChar(), returned by a
|
||||
* getNextUChar() implementation to indicate to use the converter's toUnicode()
|
||||
* instead of the native function.
|
||||
* @internal
|
||||
*/
|
||||
U_CFUNC UChar32
|
||||
ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
|
||||
T_ToUnicodeFunction toU,
|
||||
UBool collectPairs,
|
||||
UErrorCode *pErrorCode);
|
||||
#define UCNV_GET_NEXT_UCHAR_USE_TO_U -9
|
||||
|
||||
U_CFUNC void
|
||||
ucnv_getCompleteUnicodeSet(const UConverter *cnv,
|
||||
|
|
|
@ -1211,11 +1211,11 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
|
|||
{
|
||||
saveSource = args->source; /* beginning of current code point */
|
||||
|
||||
if (args->converter->invalidCharLength) /* reassemble char from previous call */
|
||||
if (args->converter->toULength) /* reassemble char from previous call */
|
||||
{
|
||||
char LMBCS [ULMBCS_CHARSIZE_MAX];
|
||||
const char *pLMBCS = LMBCS, *saveSourceLimit;
|
||||
size_t size_old = args->converter->invalidCharLength;
|
||||
size_t size_old = args->converter->toULength;
|
||||
|
||||
/* limit from source is either reminder of temp buffer, or user limit on source */
|
||||
size_t size_new_maybe_1 = sizeof(LMBCS) - size_old;
|
||||
|
@ -1223,7 +1223,7 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
|
|||
size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2;
|
||||
|
||||
|
||||
uprv_memcpy(LMBCS, args->converter->invalidCharBuffer, size_old);
|
||||
uprv_memcpy(LMBCS, args->converter->toUBytes, size_old);
|
||||
uprv_memcpy(LMBCS + size_old, args->source, size_new);
|
||||
saveSourceLimit = args->sourceLimit;
|
||||
args->source = pLMBCS;
|
||||
|
@ -1234,12 +1234,12 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
|
|||
args->sourceLimit = saveSourceLimit;
|
||||
args->source += (pLMBCS - LMBCS - size_old);
|
||||
|
||||
if (*err == U_TRUNCATED_CHAR_FOUND && !args->flush)
|
||||
if (*err == U_TRUNCATED_CHAR_FOUND)
|
||||
{
|
||||
/* evil special case: source buffers so small a char spans more than 2 buffers */
|
||||
int8_t savebytes = (int8_t)(size_old+size_new);
|
||||
args->converter->invalidCharLength = savebytes;
|
||||
uprv_memcpy(args->converter->invalidCharBuffer, LMBCS, savebytes);
|
||||
args->converter->toULength = savebytes;
|
||||
uprv_memcpy(args->converter->toUBytes, LMBCS, savebytes);
|
||||
args->source = args->sourceLimit;
|
||||
*err = U_ZERO_ERROR;
|
||||
return;
|
||||
|
@ -1247,7 +1247,7 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
|
|||
else
|
||||
{
|
||||
/* clear the partial-char marker */
|
||||
args->converter->invalidCharLength = 0;
|
||||
args->converter->toULength = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1313,11 +1313,10 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
|
|||
if (*err == U_TRUNCATED_CHAR_FOUND)
|
||||
{
|
||||
args->source = args->sourceLimit;
|
||||
if (!args->flush )
|
||||
{
|
||||
int8_t savebytes = (int8_t)(args->sourceLimit - saveSource);
|
||||
args->converter->invalidCharLength = (int8_t)savebytes;
|
||||
uprv_memcpy(args->converter->invalidCharBuffer, saveSource, savebytes);
|
||||
args->converter->toULength = (int8_t)savebytes;
|
||||
uprv_memcpy(args->converter->toUBytes, saveSource, savebytes);
|
||||
*err = U_ZERO_ERROR;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ _UTF16PEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
int32_t count;
|
||||
int32_t sourceIndex = 0;
|
||||
|
||||
if(length <= 0 && cnv->toUnicodeStatus == 0) {
|
||||
if(length <= 0) {
|
||||
/* no input, nothing to do */
|
||||
return;
|
||||
}
|
||||
|
@ -46,14 +46,14 @@ _UTF16PEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
}
|
||||
|
||||
/* complete a partial UChar from the last call */
|
||||
if(length != 0 && cnv->toUnicodeStatus != 0) {
|
||||
if(length != 0 && cnv->toULength != 0) {
|
||||
/*
|
||||
* copy the byte from the last call and the first one here into the target,
|
||||
* byte-wise to keep the platform endianness
|
||||
*/
|
||||
uint8_t *p = (uint8_t *)target++;
|
||||
*p++ = (uint8_t)cnv->toUnicodeStatus;
|
||||
cnv->toUnicodeStatus = 0;
|
||||
*p++ = cnv->toUBytes[0];
|
||||
cnv->toULength = 0;
|
||||
*p = *source++;
|
||||
--length;
|
||||
--targetCapacity;
|
||||
|
@ -88,16 +88,9 @@ _UTF16PEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
/* it must be targetCapacity==0 because otherwise the above would have copied more */
|
||||
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||
} else if(length == 1) {
|
||||
if(pArgs->flush) {
|
||||
/* a UChar remains incomplete */
|
||||
*pErrorCode = U_TRUNCATED_CHAR_FOUND;
|
||||
} else {
|
||||
/* consume the last byte and store it, making sure that it will never set the status to 0 */
|
||||
cnv->toUnicodeStatus = *source++ | 0x100;
|
||||
}
|
||||
} else /* length==0 */ if(cnv->toUnicodeStatus!=0 && pArgs->flush) {
|
||||
/* a UChar remains incomplete */
|
||||
*pErrorCode = U_TRUNCATED_CHAR_FOUND;
|
||||
/* consume the last byte and store it */
|
||||
cnv->toUBytes[0]=*source++;
|
||||
cnv->toULength=1;
|
||||
}
|
||||
|
||||
/* write back the updated pointers */
|
||||
|
@ -199,7 +192,7 @@ _UTF16OEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
int32_t count;
|
||||
int32_t sourceIndex = 0;
|
||||
|
||||
if(length <= 0 && cnv->toUnicodeStatus == 0) {
|
||||
if(length <= 0) {
|
||||
/* no input, nothing to do */
|
||||
return;
|
||||
}
|
||||
|
@ -210,14 +203,14 @@ _UTF16OEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
}
|
||||
|
||||
/* complete a partial UChar from the last call */
|
||||
if(length != 0 && cnv->toUnicodeStatus != 0) {
|
||||
if(length != 0 && cnv->toULength != 0) {
|
||||
/*
|
||||
* copy the byte from the last call and the first one here into the target,
|
||||
* byte-wise, reversing the platform endianness
|
||||
*/
|
||||
*target8++ = *source++;
|
||||
*target8++ = (uint8_t)cnv->toUnicodeStatus;
|
||||
cnv->toUnicodeStatus = 0;
|
||||
*target8++ = cnv->toUBytes[0];
|
||||
cnv->toULength = 0;
|
||||
++target;
|
||||
--length;
|
||||
--targetCapacity;
|
||||
|
@ -260,16 +253,8 @@ _UTF16OEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
/* it must be targetCapacity==0 because otherwise the above would have copied more */
|
||||
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||
} else if(length == 1) {
|
||||
if(pArgs->flush) {
|
||||
/* a UChar remains incomplete */
|
||||
*pErrorCode = U_TRUNCATED_CHAR_FOUND;
|
||||
} else {
|
||||
/* consume the last byte and store it, making sure that it will never set the status to 0 */
|
||||
cnv->toUnicodeStatus = *source++ | 0x100;
|
||||
}
|
||||
} else /* length==0 */ if(cnv->toUnicodeStatus!=0 && pArgs->flush) {
|
||||
/* a UChar remains incomplete */
|
||||
*pErrorCode = U_TRUNCATED_CHAR_FOUND;
|
||||
cnv->toUBytes[0]=*source++;
|
||||
cnv->toULength=1;
|
||||
}
|
||||
|
||||
/* write back the updated pointers */
|
||||
|
@ -727,12 +712,12 @@ _UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
_UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
|
||||
pArgs->source=source;
|
||||
pArgs->sourceLimit=sourceLimit;
|
||||
state=8;
|
||||
break;
|
||||
}
|
||||
cnv->mode=0; /* reset */
|
||||
} else {
|
||||
cnv->mode=state;
|
||||
}
|
||||
|
||||
cnv->mode=state;
|
||||
}
|
||||
|
||||
static UChar32
|
||||
|
@ -744,7 +729,7 @@ _UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs,
|
|||
case 9:
|
||||
return T_UConverter_getNextUChar_UTF16_LE(pArgs, pErrorCode);
|
||||
default:
|
||||
return ucnv_getNextUCharFromToUImpl(pArgs, _UTF16ToUnicodeWithOffsets, TRUE, pErrorCode);
|
||||
return UCNV_GET_NEXT_UCHAR_USE_TO_U;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -128,20 +128,10 @@ morebytes:
|
|||
}
|
||||
else
|
||||
{
|
||||
if (args->flush)
|
||||
{
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = MAXIMUM_UCS4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* stores a partially calculated target*/
|
||||
/* + 1 to make 0 a valid character */
|
||||
args->converter->toUnicodeStatus = ch + 1;
|
||||
args->converter->toULength = (int8_t) i;
|
||||
}
|
||||
/* stores a partially calculated target*/
|
||||
/* + 1 to make 0 a valid character */
|
||||
args->converter->toUnicodeStatus = ch + 1;
|
||||
args->converter->toULength = (int8_t) i;
|
||||
goto donefornow;
|
||||
}
|
||||
}
|
||||
|
@ -237,20 +227,10 @@ morebytes:
|
|||
}
|
||||
else
|
||||
{
|
||||
if (args->flush)
|
||||
{
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = MAXIMUM_UCS4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* stores a partially calculated target*/
|
||||
/* + 1 to make 0 a valid character */
|
||||
args->converter->toUnicodeStatus = ch + 1;
|
||||
args->converter->toULength = (int8_t) i;
|
||||
}
|
||||
/* stores a partially calculated target*/
|
||||
/* + 1 to make 0 a valid character */
|
||||
args->converter->toUnicodeStatus = ch + 1;
|
||||
args->converter->toULength = (int8_t) i;
|
||||
goto donefornow;
|
||||
}
|
||||
}
|
||||
|
@ -331,10 +311,10 @@ T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
|
|||
|
||||
temp[0] = 0;
|
||||
|
||||
if (args->converter->fromUnicodeStatus)
|
||||
if (args->converter->fromUSurrogateLead)
|
||||
{
|
||||
ch = args->converter->fromUnicodeStatus;
|
||||
args->converter->fromUnicodeStatus = 0;
|
||||
ch = args->converter->fromUSurrogateLead;
|
||||
args->converter->fromUSurrogateLead = 0;
|
||||
goto lowsurogate;
|
||||
}
|
||||
|
||||
|
@ -354,12 +334,22 @@ lowsurogate:
|
|||
mySource++;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
/*
|
||||
* ### TODO the old code used to convert unpaired surrogates in the middle
|
||||
* of a stream but not at the end
|
||||
* figure out which way to go definitely when discussing
|
||||
* Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
|
||||
*
|
||||
* for now (j2449), unpaired surrogates are always converted
|
||||
*/
|
||||
else if (!args->flush)
|
||||
{
|
||||
/* ran out of source */
|
||||
args->converter->fromUnicodeStatus = ch;
|
||||
args->converter->fromUSurrogateLead = (UChar)ch;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
|
||||
|
@ -406,10 +396,10 @@ T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
|
|||
|
||||
temp[0] = 0;
|
||||
|
||||
if (args->converter->fromUnicodeStatus)
|
||||
if (args->converter->fromUSurrogateLead)
|
||||
{
|
||||
ch = args->converter->fromUnicodeStatus;
|
||||
args->converter->fromUnicodeStatus = 0;
|
||||
ch = args->converter->fromUSurrogateLead;
|
||||
args->converter->fromUSurrogateLead = 0;
|
||||
goto lowsurogate;
|
||||
}
|
||||
|
||||
|
@ -429,12 +419,14 @@ lowsurogate:
|
|||
mySource++;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
else if (!args->flush)
|
||||
{
|
||||
/* ran out of source */
|
||||
args->converter->fromUnicodeStatus = ch;
|
||||
args->converter->fromUSurrogateLead = (UChar)ch;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
|
||||
|
@ -613,20 +605,10 @@ morebytes:
|
|||
}
|
||||
else
|
||||
{
|
||||
if (args->flush)
|
||||
{
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* stores a partially calculated target*/
|
||||
/* + 1 to make 0 a valid character */
|
||||
args->converter->toUnicodeStatus = ch + 1;
|
||||
args->converter->toULength = (int8_t) i;
|
||||
}
|
||||
/* stores a partially calculated target*/
|
||||
/* + 1 to make 0 a valid character */
|
||||
args->converter->toUnicodeStatus = ch + 1;
|
||||
args->converter->toULength = (int8_t) i;
|
||||
goto donefornow;
|
||||
}
|
||||
}
|
||||
|
@ -724,20 +706,10 @@ morebytes:
|
|||
}
|
||||
else
|
||||
{
|
||||
if (args->flush)
|
||||
{
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* stores a partially calculated target*/
|
||||
/* + 1 to make 0 a valid character */
|
||||
args->converter->toUnicodeStatus = ch + 1;
|
||||
args->converter->toULength = (int8_t) i;
|
||||
}
|
||||
/* stores a partially calculated target*/
|
||||
/* + 1 to make 0 a valid character */
|
||||
args->converter->toUnicodeStatus = ch + 1;
|
||||
args->converter->toULength = (int8_t) i;
|
||||
goto donefornow;
|
||||
}
|
||||
}
|
||||
|
@ -818,10 +790,10 @@ T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
|
|||
|
||||
temp[3] = 0;
|
||||
|
||||
if (args->converter->fromUnicodeStatus)
|
||||
if (args->converter->fromUSurrogateLead)
|
||||
{
|
||||
ch = args->converter->fromUnicodeStatus;
|
||||
args->converter->fromUnicodeStatus = 0;
|
||||
ch = args->converter->fromUSurrogateLead;
|
||||
args->converter->fromUSurrogateLead = 0;
|
||||
goto lowsurogate;
|
||||
}
|
||||
|
||||
|
@ -841,12 +813,14 @@ lowsurogate:
|
|||
mySource++;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
else if (!args->flush)
|
||||
{
|
||||
/* ran out of source */
|
||||
args->converter->fromUnicodeStatus = ch;
|
||||
args->converter->fromUSurrogateLead = (UChar)ch;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
|
||||
|
@ -893,10 +867,10 @@ T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
|
|||
|
||||
temp[3] = 0;
|
||||
|
||||
if (args->converter->fromUnicodeStatus)
|
||||
if (args->converter->fromUSurrogateLead)
|
||||
{
|
||||
ch = args->converter->fromUnicodeStatus;
|
||||
args->converter->fromUnicodeStatus = 0;
|
||||
ch = args->converter->fromUSurrogateLead;
|
||||
args->converter->fromUSurrogateLead = 0;
|
||||
goto lowsurogate;
|
||||
}
|
||||
|
||||
|
@ -916,12 +890,14 @@ lowsurogate:
|
|||
mySource++;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
else if (!args->flush)
|
||||
{
|
||||
/* ran out of source */
|
||||
args->converter->fromUnicodeStatus = ch;
|
||||
args->converter->fromUSurrogateLead = (UChar)ch;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
|
||||
|
@ -1253,12 +1229,12 @@ _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
|
||||
pArgs->source=source;
|
||||
pArgs->sourceLimit=sourceLimit;
|
||||
state=8;
|
||||
break;
|
||||
}
|
||||
cnv->mode=0; /* reset */
|
||||
} else {
|
||||
cnv->mode=state;
|
||||
}
|
||||
|
||||
cnv->mode=state;
|
||||
}
|
||||
|
||||
static UChar32
|
||||
|
@ -1270,7 +1246,7 @@ _UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs,
|
|||
case 9:
|
||||
return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode);
|
||||
default:
|
||||
return ucnv_getNextUCharFromToUImpl(pArgs, _UTF32ToUnicodeWithOffsets, FALSE, pErrorCode);
|
||||
return UCNV_GET_NEXT_UCHAR_USE_TO_U;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -416,20 +416,20 @@ unicodeMode:
|
|||
}
|
||||
endloop:
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(!inDirectMode && bits!=0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a character byte sequence remains incomplete */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
|
||||
cnv->toULength=0;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
|
||||
cnv->toULength=byteIndex;
|
||||
if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) {
|
||||
/*
|
||||
* if we are in Unicode mode, then the byteIndex might not be 0,
|
||||
* but that is ok if bits==0
|
||||
* -> we set byteIndex=0 at the end of the stream to avoid a truncated error
|
||||
* (not true for IMAP-mailbox-name where we must end in direct mode)
|
||||
*/
|
||||
byteIndex=0;
|
||||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
|
||||
cnv->toULength=byteIndex;
|
||||
|
||||
finish:
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=(const char *)source;
|
||||
|
@ -495,12 +495,6 @@ callback:
|
|||
}
|
||||
}
|
||||
|
||||
static UChar32
|
||||
_UTF7GetNextUChar(UConverterToUnicodeArgs *pArgs,
|
||||
UErrorCode *pErrorCode) {
|
||||
return ucnv_getNextUCharFromToUImpl(pArgs, pArgs->converter->sharedData->impl->toUnicode, TRUE, pErrorCode);
|
||||
}
|
||||
|
||||
static void
|
||||
_UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
||||
UErrorCode *pErrorCode) {
|
||||
|
@ -788,7 +782,7 @@ static const UConverterImpl _UTF7Impl={
|
|||
_UTF7ToUnicodeWithOffsets,
|
||||
_UTF7FromUnicodeWithOffsets,
|
||||
_UTF7FromUnicodeWithOffsets,
|
||||
_UTF7GetNextUChar,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
_UTF7GetName,
|
||||
|
@ -1001,7 +995,8 @@ directMode:
|
|||
/* switch to Unicode mode */
|
||||
nextSourceIndex=++sourceIndex;
|
||||
inDirectMode=FALSE;
|
||||
byteIndex=0;
|
||||
bytes[0]=b;
|
||||
byteIndex=1;
|
||||
bits=0;
|
||||
base64Counter=-1;
|
||||
goto unicodeMode;
|
||||
|
@ -1145,19 +1140,9 @@ unicodeMode:
|
|||
}
|
||||
endloop:
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(!inDirectMode && U_SUCCESS(*pErrorCode)) {
|
||||
/* a character byte sequence remains incomplete - IMAP must end in ASCII/direct mode */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
|
||||
cnv->toULength=0;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
|
||||
cnv->toULength=byteIndex;
|
||||
}
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
|
||||
cnv->toULength=byteIndex;
|
||||
|
||||
finish:
|
||||
/* write back the updated pointers */
|
||||
|
@ -1525,7 +1510,7 @@ static const UConverterImpl _IMAPImpl={
|
|||
_IMAPToUnicodeWithOffsets,
|
||||
_IMAPFromUnicodeWithOffsets,
|
||||
_IMAPFromUnicodeWithOffsets,
|
||||
_UTF7GetNextUChar,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
|
|
|
@ -115,6 +115,7 @@ T_UConverter_toUnicode_InvalidChar_Callback(UConverterToUnicodeArgs * args,
|
|||
converter->toUBytes,
|
||||
converter->toULength);
|
||||
converter->invalidCharLength = converter->toULength;
|
||||
converter->toULength = 0;
|
||||
|
||||
/* Call the ErrorFunction */
|
||||
args->converter->fromCharErrorBehaviour(converter->toUContext,
|
||||
|
@ -200,19 +201,10 @@ morebytes:
|
|||
}
|
||||
else
|
||||
{
|
||||
if (args->flush)
|
||||
{
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* stores a partially calculated target*/
|
||||
args->converter->toUnicodeStatus = ch;
|
||||
args->converter->mode = inBytes;
|
||||
args->converter->toULength = (int8_t) i;
|
||||
}
|
||||
/* stores a partially calculated target*/
|
||||
args->converter->toUnicodeStatus = ch;
|
||||
args->converter->mode = inBytes;
|
||||
args->converter->toULength = (int8_t) i;
|
||||
goto donefornow;
|
||||
}
|
||||
}
|
||||
|
@ -236,6 +228,7 @@ morebytes:
|
|||
(isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
|
||||
{
|
||||
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
|
||||
args->converter->toULength = 0;
|
||||
if (ch <= MAXIMUM_UCS2)
|
||||
{
|
||||
/* fits in 16 bits */
|
||||
|
@ -350,20 +343,9 @@ morebytes:
|
|||
}
|
||||
else
|
||||
{
|
||||
if (args->flush)
|
||||
{
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
args->converter->toUnicodeStatus = ch;
|
||||
args->converter->mode = inBytes;
|
||||
args->converter->toULength = (int8_t)i;
|
||||
}
|
||||
args->converter->toUnicodeStatus = ch;
|
||||
args->converter->mode = inBytes;
|
||||
args->converter->toULength = (int8_t)i;
|
||||
goto donefornow;
|
||||
}
|
||||
}
|
||||
|
@ -387,6 +369,7 @@ morebytes:
|
|||
(isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
|
||||
{
|
||||
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
|
||||
args->converter->toULength = 0;
|
||||
if (ch <= MAXIMUM_UCS2)
|
||||
{
|
||||
/* fits in 16 bits */
|
||||
|
@ -604,11 +587,6 @@ lowsurrogate:
|
|||
{
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
if(args->flush && mySource >= sourceLimit && cnv->fromUSurrogateLead != 0 && U_SUCCESS(*err)) {
|
||||
/* a Unicode code point remains incomplete (only a first surrogate) */
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
cnv->fromUSurrogateLead = 0;
|
||||
}
|
||||
|
||||
args->target = (char *) myTarget;
|
||||
args->source = mySource;
|
||||
|
@ -787,11 +765,6 @@ lowsurrogate:
|
|||
{
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
if(args->flush && mySource >= sourceLimit && cnv->fromUSurrogateLead != 0 && U_SUCCESS(*err)) {
|
||||
/* a Unicode code point remains incomplete (only a first surrogate) */
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
cnv->fromUSurrogateLead = 0;
|
||||
}
|
||||
|
||||
args->target = (char *) myTarget;
|
||||
args->source = mySource;
|
||||
|
|
|
@ -666,19 +666,9 @@ getTrail:
|
|||
}
|
||||
}
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(c<0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a Unicode code point remains incomplete (only a first surrogate) */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->fromUSurrogateLead=0;
|
||||
cnv->fromUnicodeStatus=BOCU1_ASCII_PREV;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
|
||||
cnv->fromUnicodeStatus=(uint32_t)prev;
|
||||
}
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
|
||||
cnv->fromUnicodeStatus=(uint32_t)prev;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
|
@ -897,19 +887,9 @@ getTrail:
|
|||
}
|
||||
}
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(c<0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a Unicode code point remains incomplete (only a first surrogate) */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->fromUSurrogateLead=0;
|
||||
cnv->fromUnicodeStatus=BOCU1_ASCII_PREV;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
|
||||
cnv->fromUnicodeStatus=(uint32_t)prev;
|
||||
}
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead= c<0 ? (UChar)-c : 0;
|
||||
cnv->fromUnicodeStatus=(uint32_t)prev;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
|
@ -1228,21 +1208,10 @@ getTrail:
|
|||
}
|
||||
endloop:
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(byteIndex>0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a character byte sequence remains incomplete */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
|
||||
cnv->mode=0;
|
||||
cnv->toULength=0;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->toUnicodeStatus=(uint32_t)prev;
|
||||
cnv->mode=(diff<<2)|count;
|
||||
cnv->toULength=byteIndex;
|
||||
}
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->toUnicodeStatus=(uint32_t)prev;
|
||||
cnv->mode=(diff<<2)|count;
|
||||
cnv->toULength=byteIndex;
|
||||
|
||||
finish:
|
||||
/* write back the updated pointers */
|
||||
|
@ -1495,21 +1464,10 @@ getTrail:
|
|||
}
|
||||
endloop:
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(byteIndex>0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a character byte sequence remains incomplete */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
|
||||
cnv->mode=0;
|
||||
cnv->toULength=0;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->toUnicodeStatus=(uint32_t)prev;
|
||||
cnv->mode=(diff<<2)|count;
|
||||
cnv->toULength=byteIndex;
|
||||
}
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->toUnicodeStatus=(uint32_t)prev;
|
||||
cnv->mode=(diff<<2)|count;
|
||||
cnv->toULength=byteIndex;
|
||||
|
||||
finish:
|
||||
/* write back the updated pointers */
|
||||
|
|
|
@ -325,18 +325,6 @@ SAVE_STATE:
|
|||
break;
|
||||
}
|
||||
}
|
||||
if((args->flush==TRUE)
|
||||
&& (mySource == mySourceLimit)
|
||||
&& ( args->converter->toUnicodeStatus !=0x00)){
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = 0x00;
|
||||
}
|
||||
/* Reset the state of converter if we consumed
|
||||
* the source and flush is true
|
||||
*/
|
||||
if( (mySource == mySourceLimit) && args->flush){
|
||||
_HZReset(args->converter, UCNV_RESET_TO_UNICODE);
|
||||
}
|
||||
|
||||
args->target = myTarget;
|
||||
args->source = mySource;
|
||||
|
@ -558,19 +546,6 @@ getTrail:
|
|||
}
|
||||
targetUniChar=missingCharMarker;
|
||||
}
|
||||
/*If at the end of conversion we are still carrying state information
|
||||
*flush is TRUE, we can deduce that the input stream is truncated
|
||||
*/
|
||||
if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == mySourceLength) && args->flush){
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = 0x00;
|
||||
}
|
||||
/* Reset the state of converter if we consumed
|
||||
* the source and flush is true
|
||||
*/
|
||||
if( (mySourceIndex == mySourceLength) && args->flush){
|
||||
_HZReset(args->converter, UCNV_RESET_FROM_UNICODE);
|
||||
}
|
||||
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
|
|
|
@ -1053,21 +1053,6 @@ getTrail:
|
|||
|
||||
}/* end while(mySourceIndex<mySourceLength) */
|
||||
|
||||
|
||||
/*If at the end of conversion we are still carrying state information
|
||||
*flush is TRUE, we can deduce that the input stream is truncated
|
||||
*/
|
||||
if (args->converter->fromUSurrogateLead !=0 && (source == sourceLimit) && args->flush){
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
/* Reset the state of converter if we consumed
|
||||
* the source and flush is true
|
||||
*/
|
||||
if( (source == sourceLimit) && args->flush){
|
||||
/*reset converter*/
|
||||
_ISCIIReset(args->converter,UCNV_RESET_FROM_UNICODE);
|
||||
}
|
||||
|
||||
/*save the state and return */
|
||||
args->source = source;
|
||||
args->target = (char*)target;
|
||||
|
@ -1396,26 +1381,30 @@ CALLBACK:
|
|||
break;
|
||||
}
|
||||
}
|
||||
if((args->flush==TRUE)
|
||||
&& (source == sourceLimit)
|
||||
&& data->contextCharToUnicode != NO_CHAR_MARKER){
|
||||
/* if we have ATR in context it is an error */
|
||||
if(data->contextCharToUnicode==ATR || data->contextCharToUnicode==EXT || *toUnicodeStatus == missingCharMarker){
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
|
||||
if(args->flush && source == sourceLimit) {
|
||||
/* end of the input stream */
|
||||
UConverter *cnv = args->converter;
|
||||
|
||||
if(*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV){
|
||||
/* set toUBytes[] */
|
||||
cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
|
||||
cnv->toULength = 1;
|
||||
|
||||
/* avoid looping on truncated sequences */
|
||||
*contextCharToUnicode = NO_CHAR_MARKER;
|
||||
}else{
|
||||
WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),
|
||||
*toUnicodeStatus,data->currentDeltaToUnicode,err);
|
||||
*toUnicodeStatus = missingCharMarker;
|
||||
cnv->toULength = 0;
|
||||
}
|
||||
|
||||
if(*toUnicodeStatus != missingCharMarker) {
|
||||
/* output a remaining target character */
|
||||
WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),
|
||||
*toUnicodeStatus,data->currentDeltaToUnicode,err);
|
||||
*toUnicodeStatus = missingCharMarker;
|
||||
}
|
||||
}
|
||||
/* Reset the state of converter if we consumed
|
||||
* the source and flush is true
|
||||
*/
|
||||
if( (source == sourceLimit) && args->flush){
|
||||
/*reset converter*/
|
||||
_ISCIIReset(args->converter,UCNV_RESET_TO_UNICODE);
|
||||
}
|
||||
|
||||
args->target = target;
|
||||
args->source = source;
|
||||
}
|
||||
|
|
|
@ -146,23 +146,21 @@ static void
|
|||
_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
||||
UErrorCode *pErrorCode) {
|
||||
UConverter *cnv;
|
||||
const UChar *source, *sourceLimit, *lastSource;
|
||||
uint8_t *target;
|
||||
const UChar *source, *sourceLimit;
|
||||
uint8_t *target, *oldTarget;
|
||||
int32_t targetCapacity, length;
|
||||
int32_t *offsets;
|
||||
|
||||
UChar32 c, max;
|
||||
UChar32 cp;
|
||||
UChar c, max;
|
||||
|
||||
int32_t sourceIndex;
|
||||
|
||||
UConverterCallbackReason reason;
|
||||
int32_t i;
|
||||
|
||||
/* set up the local pointers */
|
||||
cnv=pArgs->converter;
|
||||
source=pArgs->source;
|
||||
sourceLimit=pArgs->sourceLimit;
|
||||
target=(uint8_t *)pArgs->target;
|
||||
target=oldTarget=(uint8_t *)pArgs->target;
|
||||
targetCapacity=pArgs->targetLimit-pArgs->target;
|
||||
offsets=pArgs->offsets;
|
||||
|
||||
|
@ -173,11 +171,10 @@ _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
}
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
cp=cnv->fromUSurrogateLead;
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
sourceIndex= c==0 ? 0 : -1;
|
||||
lastSource=source;
|
||||
sourceIndex= cp==0 ? 0 : -1;
|
||||
|
||||
/*
|
||||
* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
|
||||
|
@ -189,13 +186,12 @@ _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
}
|
||||
|
||||
/* conversion loop */
|
||||
if(c!=0 && targetCapacity>0) {
|
||||
if(cp!=0 && targetCapacity>0) {
|
||||
goto getTrail;
|
||||
}
|
||||
|
||||
#if LATIN1_UNROLL_FROM_UNICODE
|
||||
/* unroll the loop with the most common case */
|
||||
unrolled:
|
||||
if(targetCapacity>=16) {
|
||||
int32_t count, loops;
|
||||
UChar u, oredChars;
|
||||
|
@ -247,7 +243,7 @@ unrolled:
|
|||
targetCapacity-=16*count;
|
||||
|
||||
if(offsets!=NULL) {
|
||||
lastSource+=16*count;
|
||||
oldTarget+=16*count;
|
||||
while(count>0) {
|
||||
*offsets++=sourceIndex++;
|
||||
*offsets++=sourceIndex++;
|
||||
|
@ -268,156 +264,73 @@ unrolled:
|
|||
--count;
|
||||
}
|
||||
}
|
||||
|
||||
c=0;
|
||||
}
|
||||
#endif
|
||||
|
||||
while(targetCapacity>0) {
|
||||
/*
|
||||
* Get a correct Unicode code point:
|
||||
* a single UChar for a BMP code point or
|
||||
* a matched surrogate pair for a "surrogate code point".
|
||||
*/
|
||||
c=*source++;
|
||||
if(c<=max) {
|
||||
/* convert the Unicode code point */
|
||||
*target++=(uint8_t)c;
|
||||
--targetCapacity;
|
||||
/* conversion loop */
|
||||
c=0;
|
||||
while(targetCapacity>0 && (c=*source++)<=max) {
|
||||
/* convert the Unicode code point */
|
||||
*target++=(uint8_t)c;
|
||||
--targetCapacity;
|
||||
}
|
||||
|
||||
/* normal end of conversion: prepare for a new character */
|
||||
c=0;
|
||||
} else {
|
||||
if(!UTF_IS_SURROGATE(c)) {
|
||||
/* callback(unassigned) */
|
||||
reason=UCNV_UNASSIGNED;
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
} else if(UTF_IS_SURROGATE_FIRST(c)) {
|
||||
/*
|
||||
* not a real loop: just using while() to use a break inside instead of goto
|
||||
* logically, this is just if(c>max) ...
|
||||
*/
|
||||
while(c>max) {
|
||||
cp=c;
|
||||
if(!U_IS_SURROGATE(cp)) {
|
||||
/* callback(unassigned) */
|
||||
} else if(U_IS_SURROGATE_LEAD(cp)) {
|
||||
getTrail:
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=*source;
|
||||
if(UTF_IS_SECOND_SURROGATE(trail)) {
|
||||
++source;
|
||||
c=UTF16_GET_PAIR_VALUE(c, trail);
|
||||
/* this codepage does not map supplementary code points */
|
||||
/* callback(unassigned) */
|
||||
reason=UCNV_UNASSIGNED;
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
if(source<sourceLimit) {
|
||||
/* test the following code unit */
|
||||
UChar trail=*source;
|
||||
if(U16_IS_TRAIL(trail)) {
|
||||
++source;
|
||||
cp=U16_GET_SUPPLEMENTARY(cp, trail);
|
||||
/* this codepage does not map supplementary code points */
|
||||
/* callback(unassigned) */
|
||||
} else {
|
||||
/* no more input */
|
||||
break;
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
}
|
||||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
reason=UCNV_ILLEGAL;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
/* get the number of code units for c to correctly advance sourceIndex after the callback call */
|
||||
length=UTF_CHAR_LENGTH(c);
|
||||
|
||||
/* set offsets since the start or the last callback */
|
||||
if(offsets!=NULL) {
|
||||
int32_t count=(int32_t)(source-lastSource);
|
||||
|
||||
/* do not set the offset for the callback-causing character */
|
||||
count-=length;
|
||||
|
||||
while(count>0) {
|
||||
*offsets++=sourceIndex++;
|
||||
--count;
|
||||
}
|
||||
/* offset and sourceIndex are now set for the current character */
|
||||
}
|
||||
|
||||
/* update the arguments structure */
|
||||
pArgs->source=source;
|
||||
pArgs->target=(char *)target;
|
||||
pArgs->offsets=offsets;
|
||||
|
||||
/* set the converter state in UConverter to deal with the next character */
|
||||
cnv->fromUSurrogateLead=0;
|
||||
|
||||
/* write the code point as code units */
|
||||
i=0;
|
||||
UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
|
||||
cnv->invalidUCharLength=(int8_t)i;
|
||||
/* i==length */
|
||||
|
||||
/* call the callback function */
|
||||
cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, reason, pErrorCode);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
c=cnv->fromUSurrogateLead;
|
||||
|
||||
/* update target and deal with offsets if necessary */
|
||||
offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
|
||||
target=(uint8_t *)pArgs->target;
|
||||
|
||||
/* update the source pointer and index */
|
||||
sourceIndex+=length+(pArgs->source-source);
|
||||
source=lastSource=pArgs->source;
|
||||
targetCapacity=(uint8_t *)pArgs->targetLimit-target;
|
||||
length=sourceLimit-source;
|
||||
if(length<targetCapacity) {
|
||||
targetCapacity=length;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
} else if(U_FAILURE(*pErrorCode)) {
|
||||
/* break on error */
|
||||
c=0;
|
||||
break;
|
||||
} else if(cnv->charErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
/* no more input */
|
||||
cnv->fromUSurrogateLead=(UChar)cp;
|
||||
break;
|
||||
}
|
||||
|
||||
#if LATIN1_UNROLL_FROM_UNICODE
|
||||
goto unrolled;
|
||||
#endif
|
||||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
}
|
||||
|
||||
*pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
|
||||
|
||||
/* write the code point as code units */
|
||||
{
|
||||
int32_t i=0;
|
||||
U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, i, cp);
|
||||
cnv->invalidUCharLength=(int8_t)i;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
|
||||
/* set offsets since the start or the last callback */
|
||||
/* set offsets since the start */
|
||||
if(offsets!=NULL) {
|
||||
size_t count=source-lastSource;
|
||||
size_t count=target-oldTarget;
|
||||
while(count>0) {
|
||||
*offsets++=sourceIndex++;
|
||||
--count;
|
||||
}
|
||||
}
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(c!=0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a Unicode code point remains incomplete (only a first surrogate) */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->fromUSurrogateLead=0;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
|
||||
/* write back the updated pointers */
|
||||
|
@ -479,23 +392,24 @@ const UConverterSharedData _Latin1Data={
|
|||
static void
|
||||
_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
||||
UErrorCode *pErrorCode) {
|
||||
const uint8_t *source, *sourceLimit, *lastSource;
|
||||
UChar *target;
|
||||
const uint8_t *source, *sourceLimit;
|
||||
UChar *target, *oldTarget;
|
||||
int32_t targetCapacity, length;
|
||||
int32_t *offsets;
|
||||
|
||||
int32_t sourceIndex;
|
||||
|
||||
uint8_t c;
|
||||
|
||||
/* set up the local pointers */
|
||||
source=(const uint8_t *)pArgs->source;
|
||||
sourceLimit=(const uint8_t *)pArgs->sourceLimit;
|
||||
target=pArgs->target;
|
||||
target=oldTarget=pArgs->target;
|
||||
targetCapacity=pArgs->targetLimit-pArgs->target;
|
||||
offsets=pArgs->offsets;
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
sourceIndex=0;
|
||||
lastSource=source;
|
||||
|
||||
/*
|
||||
* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
|
||||
|
@ -508,7 +422,6 @@ _ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
|
||||
#if ASCII_UNROLL_TO_UNICODE
|
||||
/* unroll the loop with the most common case */
|
||||
unrolled:
|
||||
if(targetCapacity>=16) {
|
||||
int32_t count, loops;
|
||||
UChar oredChars;
|
||||
|
@ -544,7 +457,7 @@ unrolled:
|
|||
targetCapacity-=16*count;
|
||||
|
||||
if(offsets!=NULL) {
|
||||
lastSource+=16*count;
|
||||
oldTarget+=16*count;
|
||||
while(count>0) {
|
||||
*offsets++=sourceIndex++;
|
||||
*offsets++=sourceIndex++;
|
||||
|
@ -569,86 +482,26 @@ unrolled:
|
|||
#endif
|
||||
|
||||
/* conversion loop */
|
||||
while(targetCapacity>0) {
|
||||
if((*target++=*source++)<=0x7f) {
|
||||
--targetCapacity;
|
||||
} else {
|
||||
UConverter *cnv;
|
||||
|
||||
/* back out the illegal character */
|
||||
--target;
|
||||
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
cnv=pArgs->converter;
|
||||
|
||||
/* callback(illegal) */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
|
||||
/* set offsets since the start or the last callback */
|
||||
if(offsets!=NULL) {
|
||||
int32_t count=(int32_t)(source-lastSource);
|
||||
|
||||
/* predecrement: do not set the offset for the callback-causing character */
|
||||
while(--count>0) {
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
/* offset and sourceIndex are now set for the current character */
|
||||
}
|
||||
|
||||
/* update the arguments structure */
|
||||
pArgs->source=(const char *)source;
|
||||
pArgs->target=target;
|
||||
pArgs->offsets=offsets;
|
||||
|
||||
/* copy the current bytes to invalidCharBuffer */
|
||||
cnv->invalidCharBuffer[0]=*(source-1);
|
||||
cnv->invalidCharLength=1;
|
||||
|
||||
/* call the callback function */
|
||||
cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode);
|
||||
|
||||
/* update target and deal with offsets if necessary */
|
||||
offsets=ucnv_updateCallbackOffsets(offsets, pArgs->target-target, sourceIndex);
|
||||
target=pArgs->target;
|
||||
|
||||
/* update the source pointer and index */
|
||||
sourceIndex+=1+((const uint8_t *)pArgs->source-source);
|
||||
source=lastSource=(const uint8_t *)pArgs->source;
|
||||
targetCapacity=pArgs->targetLimit-target;
|
||||
length=sourceLimit-source;
|
||||
if(length<targetCapacity) {
|
||||
targetCapacity=length;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
} else if(U_FAILURE(*pErrorCode)) {
|
||||
/* break on error */
|
||||
break;
|
||||
} else if(cnv->UCharErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
#if ASCII_UNROLL_TO_UNICODE
|
||||
goto unrolled;
|
||||
#endif
|
||||
}
|
||||
c=0;
|
||||
while(targetCapacity>0 && (c=*source++)<=0x7f) {
|
||||
*target++=c;
|
||||
--targetCapacity;
|
||||
}
|
||||
|
||||
if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
|
||||
if(c>0x7f) {
|
||||
/* callback(illegal); copy the current bytes to invalidCharBuffer */
|
||||
UConverter *cnv=pArgs->converter;
|
||||
cnv->invalidCharBuffer[0]=c;
|
||||
cnv->invalidCharLength=1;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
} else if(source<sourceLimit && target>=pArgs->targetLimit) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
|
||||
/* set offsets since the start or the last callback */
|
||||
/* set offsets since the start */
|
||||
if(offsets!=NULL) {
|
||||
size_t count=source-lastSource;
|
||||
size_t count=target-oldTarget;
|
||||
while(count>0) {
|
||||
*offsets++=sourceIndex++;
|
||||
--count;
|
||||
|
|
|
@ -777,21 +777,6 @@ _MBCSUnload(UConverterSharedData *sharedData) {
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_MBCSReset(UConverter *cnv, UConverterResetChoice choice) {
|
||||
if(choice<=UCNV_RESET_TO_UNICODE) {
|
||||
/* toUnicode */
|
||||
cnv->toUnicodeStatus=0; /* offset */
|
||||
cnv->mode=0; /* state */
|
||||
cnv->toULength=0; /* byteIndex */
|
||||
}
|
||||
if(choice!=UCNV_RESET_TO_UNICODE) {
|
||||
/* fromUnicode */
|
||||
cnv->fromUSurrogateLead=0;
|
||||
cnv->fromUnicodeStatus=1; /* prevLength */
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_MBCSOpen(UConverter *cnv,
|
||||
const char *name,
|
||||
|
@ -822,7 +807,21 @@ _MBCSOpen(UConverter *cnv,
|
|||
}
|
||||
}
|
||||
|
||||
_MBCSReset(cnv, UCNV_RESET_BOTH);
|
||||
#if 0
|
||||
/*
|
||||
* documentation of UConverter fields used for status
|
||||
* all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset()
|
||||
*/
|
||||
|
||||
/* toUnicode */
|
||||
cnv->toUnicodeStatus=0; /* offset */
|
||||
cnv->mode=0; /* state */
|
||||
cnv->toULength=0; /* byteIndex */
|
||||
|
||||
/* fromUnicode */
|
||||
cnv->fromUSurrogateLead=0;
|
||||
cnv->fromUnicodeStatus=1; /* prevLength */
|
||||
#endif
|
||||
}
|
||||
|
||||
static const char *
|
||||
|
@ -1151,21 +1150,10 @@ callback:
|
|||
}
|
||||
}
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(byteIndex>0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a character byte sequence remains incomplete */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->toUnicodeStatus=0;
|
||||
cnv->mode=0;
|
||||
cnv->toULength=0;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->toUnicodeStatus=offset;
|
||||
cnv->mode=state;
|
||||
cnv->toULength=byteIndex;
|
||||
}
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->toUnicodeStatus=offset;
|
||||
cnv->mode=state;
|
||||
cnv->toULength=byteIndex;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=(const char *)source;
|
||||
|
@ -1622,7 +1610,7 @@ _MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
|||
* with the rare case of a codepage that maps single surrogates
|
||||
* without adding the complexity to this already complicated function here.
|
||||
*/
|
||||
return ucnv_getNextUCharFromToUImpl(pArgs, _MBCSToUnicodeWithOffsets, TRUE, pErrorCode);
|
||||
return UCNV_GET_NEXT_UCHAR_USE_TO_U;
|
||||
} else if(cnv->sharedData->table->mbcs.countStates==1) {
|
||||
return _MBCSSingleGetNextUChar(pArgs, pErrorCode);
|
||||
}
|
||||
|
@ -2335,7 +2323,7 @@ getTrail:
|
|||
cnv->fromUnicodeStatus=prevLength; /* save the old state */
|
||||
value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
|
||||
if(value<=0xff) {
|
||||
if(prevLength==1) {
|
||||
if(prevLength<=1) {
|
||||
length=1;
|
||||
} else {
|
||||
/* change from double-byte mode to single-byte */
|
||||
|
@ -2611,36 +2599,34 @@ callback:
|
|||
}
|
||||
}
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit && U_SUCCESS(*pErrorCode)) {
|
||||
/* end of input stream */
|
||||
if(c!=0) {
|
||||
/* a Unicode code point remains incomplete (only a first surrogate) */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
/* the following may change with Jitterbug 2449: would prepare for callback instead of resetting */
|
||||
c=0;
|
||||
prevLength=1;
|
||||
} else if(outputType==MBCS_OUTPUT_2_SISO && prevLength==2) {
|
||||
/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
|
||||
if(targetCapacity>0) {
|
||||
*target++=(uint8_t)UCNV_SI;
|
||||
if(offsets!=NULL) {
|
||||
/* set the last source character's index (sourceIndex points at sourceLimit now) */
|
||||
*offsets++=prevSourceIndex;
|
||||
}
|
||||
} else {
|
||||
/* target is full */
|
||||
cnv->charErrorBuffer[0]=(char)UCNV_SI;
|
||||
cnv->charErrorBufferLength=1;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
/*
|
||||
* the end of the input stream and detection of truncated input
|
||||
* are handled by the framework, but for EBCDIC_STATEFUL conversion
|
||||
* we need to emit an SI at the very end
|
||||
*
|
||||
* conditions:
|
||||
* successful
|
||||
* EBCDIC_STATEFUL in DBCS mode
|
||||
* end of input and no truncated input
|
||||
*/
|
||||
if( U_SUCCESS(*pErrorCode) &&
|
||||
outputType==MBCS_OUTPUT_2_SISO && prevLength==2 &&
|
||||
pArgs->flush && source>=sourceLimit && c==0
|
||||
) {
|
||||
/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
|
||||
if(targetCapacity>0) {
|
||||
*target++=(uint8_t)UCNV_SI;
|
||||
if(offsets!=NULL) {
|
||||
/* set the last source character's index (sourceIndex points at sourceLimit now) */
|
||||
*offsets++=prevSourceIndex;
|
||||
}
|
||||
prevLength=1; /* we switched into SBCS */
|
||||
}
|
||||
|
||||
/* reset the state for the next conversion */
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
c=0;
|
||||
prevLength=1;
|
||||
} else {
|
||||
/* target is full */
|
||||
cnv->charErrorBuffer[0]=(char)UCNV_SI;
|
||||
cnv->charErrorBufferLength=1;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
prevLength=1; /* we switched into SBCS */
|
||||
}
|
||||
|
||||
/* set the converter state back into UConverter */
|
||||
|
@ -2892,19 +2878,9 @@ callback:
|
|||
}
|
||||
}
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(c!=0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a Unicode code point remains incomplete (only a first surrogate) */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->fromUSurrogateLead=0;
|
||||
cnv->fromUnicodeStatus=1;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
cnv->fromUnicodeStatus=prevLength;
|
||||
}
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
cnv->fromUnicodeStatus=prevLength;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
|
@ -3106,17 +3082,8 @@ callback:
|
|||
}
|
||||
}
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(c!=0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a Unicode code point remains incomplete (only a first surrogate) */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->fromUSurrogateLead=0;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
}
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
|
@ -3389,17 +3356,8 @@ getTrail:
|
|||
}
|
||||
}
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(c!=0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a Unicode code point remains incomplete (only a first surrogate) */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->fromUSurrogateLead=0;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
}
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
|
@ -3643,7 +3601,7 @@ _MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
|
|||
*p++=subchar[0];
|
||||
break;
|
||||
case 2:
|
||||
if(cnv->fromUnicodeStatus==1) {
|
||||
if(cnv->fromUnicodeStatus<=1) {
|
||||
/* SBCS mode and DBCS sub char: change to DBCS */
|
||||
cnv->fromUnicodeStatus=2;
|
||||
*p++=UCNV_SO;
|
||||
|
@ -3688,7 +3646,7 @@ static const UConverterImpl _MBCSImpl={
|
|||
|
||||
_MBCSOpen,
|
||||
NULL,
|
||||
_MBCSReset,
|
||||
NULL,
|
||||
|
||||
_MBCSToUnicodeWithOffsets,
|
||||
_MBCSToUnicodeWithOffsets,
|
||||
|
|
|
@ -272,11 +272,9 @@ _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
* The end of the input or output buffer is also handled by the slower loop.
|
||||
* The slow loop jumps (goto) to the fast-path loop again as soon as possible.
|
||||
*
|
||||
* The callback handling is done by jumping (goto) to the callback section at the end
|
||||
* of the function. From there, it either jumps to here to continue or to
|
||||
* the endloop section to clean up and return.
|
||||
* The callback handling is done by returning with an error code.
|
||||
* The conversion framework actually calls the callback function.
|
||||
*/
|
||||
loop:
|
||||
if(isSingleByteMode) {
|
||||
/* fast path for single-byte mode */
|
||||
if(state==readCommand) {
|
||||
|
@ -367,13 +365,20 @@ singleByteMode:
|
|||
goto fastUnicode;
|
||||
} else /* Srs */ {
|
||||
/* callback(illegal) */
|
||||
cnv->invalidCharBuffer[0]=b;
|
||||
cnv->invalidCharLength=1;
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
goto endloop;
|
||||
}
|
||||
|
||||
/* store the first byte of a multibyte sequence in toUBytes[] */
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
break;
|
||||
case quotePairOne:
|
||||
byteOne=b;
|
||||
cnv->toUBytes[1]=b;
|
||||
cnv->toULength=2;
|
||||
state=quotePairTwo;
|
||||
break;
|
||||
case quotePairTwo:
|
||||
|
@ -426,6 +431,8 @@ singleByteMode:
|
|||
case definePairOne:
|
||||
dynamicWindow=(int8_t)((b>>5)&7);
|
||||
byteOne=(uint8_t)(b&0x1f);
|
||||
cnv->toUBytes[1]=b;
|
||||
cnv->toULength=2;
|
||||
state=definePairTwo;
|
||||
break;
|
||||
case definePairTwo:
|
||||
|
@ -436,10 +443,9 @@ singleByteMode:
|
|||
case defineOne:
|
||||
if(b==0) {
|
||||
/* callback(illegal): Reserved window offset value 0 */
|
||||
cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
|
||||
cnv->invalidCharBuffer[1]=b;
|
||||
cnv->invalidCharLength=2;
|
||||
goto callback;
|
||||
cnv->toUBytes[1]=b;
|
||||
cnv->toULength=2;
|
||||
goto endloop;
|
||||
} else if(b<gapThreshold) {
|
||||
scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
|
||||
} else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
|
||||
|
@ -448,10 +454,9 @@ singleByteMode:
|
|||
scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
|
||||
} else {
|
||||
/* callback(illegal): Reserved window offset value 0xa8..0xf8 */
|
||||
cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
|
||||
cnv->invalidCharBuffer[1]=b;
|
||||
cnv->invalidCharLength=2;
|
||||
goto callback;
|
||||
cnv->toUBytes[1]=b;
|
||||
cnv->toULength=2;
|
||||
goto endloop;
|
||||
}
|
||||
sourceIndex=nextSourceIndex;
|
||||
state=readCommand;
|
||||
|
@ -487,6 +492,8 @@ fastUnicode:
|
|||
case readCommand:
|
||||
if((uint8_t)(b-UC0)>(Urs-UC0)) {
|
||||
byteOne=b;
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
state=quotePairTwo;
|
||||
} else if(/* UC0<=b && */ b<=UC7) {
|
||||
dynamicWindow=(int8_t)(b-UC0);
|
||||
|
@ -496,23 +503,32 @@ fastUnicode:
|
|||
} else if(/* UD0<=b && */ b<=UD7) {
|
||||
dynamicWindow=(int8_t)(b-UD0);
|
||||
isSingleByteMode=TRUE;
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
state=defineOne;
|
||||
goto singleByteMode;
|
||||
} else if(b==UDX) {
|
||||
isSingleByteMode=TRUE;
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
state=definePairOne;
|
||||
goto singleByteMode;
|
||||
} else if(b==UQU) {
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
state=quotePairOne;
|
||||
} else /* Urs */ {
|
||||
/* callback(illegal) */
|
||||
cnv->invalidCharBuffer[0]=b;
|
||||
cnv->invalidCharLength=1;
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
goto endloop;
|
||||
}
|
||||
break;
|
||||
case quotePairOne:
|
||||
byteOne=b;
|
||||
cnv->toUBytes[1]=b;
|
||||
cnv->toULength=2;
|
||||
state=quotePairTwo;
|
||||
break;
|
||||
case quotePairTwo:
|
||||
|
@ -528,80 +544,35 @@ fastUnicode:
|
|||
}
|
||||
endloop:
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(state!=readCommand && U_SUCCESS(*pErrorCode)) {
|
||||
/* a character byte sequence remains incomplete */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
||||
/* copy the input sequence into the error buffer */
|
||||
int8_t i;
|
||||
|
||||
for(i=0; i<cnv->toULength; ++i) {
|
||||
cnv->invalidCharBuffer[i]=(char)cnv->toUBytes[i];
|
||||
}
|
||||
_SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
scsu->toUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->toUState=state;
|
||||
scsu->toUQuoteWindow=quoteWindow;
|
||||
scsu->toUDynamicWindow=dynamicWindow;
|
||||
scsu->toUByteOne=byteOne;
|
||||
cnv->invalidCharLength=i;
|
||||
|
||||
/* reset to deal with the next character */
|
||||
state=readCommand;
|
||||
}
|
||||
|
||||
finish:
|
||||
/* set the converter state back into UConverter */
|
||||
if(state==readCommand) {
|
||||
/* not in a multi-byte sequence, reset toULength */
|
||||
cnv->toULength=0;
|
||||
}
|
||||
scsu->toUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->toUState=state;
|
||||
scsu->toUQuoteWindow=quoteWindow;
|
||||
scsu->toUDynamicWindow=dynamicWindow;
|
||||
scsu->toUByteOne=byteOne;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=(const char *)source;
|
||||
pArgs->target=target;
|
||||
pArgs->offsets=offsets;
|
||||
return;
|
||||
|
||||
callback:
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
/* update the arguments structure */
|
||||
pArgs->source=(const char *)source;
|
||||
pArgs->target=target;
|
||||
pArgs->offsets=offsets;
|
||||
/* the current bytes were copied to invalidCharBuffer before the goto callback jump */
|
||||
|
||||
/* set the converter state in UConverter to deal with the next character */
|
||||
scsu->toUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->toUState=readCommand;
|
||||
scsu->toUQuoteWindow=quoteWindow;
|
||||
scsu->toUDynamicWindow=dynamicWindow;
|
||||
scsu->toUByteOne=0;
|
||||
|
||||
/* call the callback function */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, cnv->invalidCharLength, UCNV_ILLEGAL, pErrorCode);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
isSingleByteMode=scsu->toUIsSingleByteMode;
|
||||
state=scsu->toUState;
|
||||
quoteWindow=scsu->toUQuoteWindow;
|
||||
dynamicWindow=scsu->toUDynamicWindow;
|
||||
byteOne=scsu->toUByteOne;
|
||||
|
||||
/* update target and deal with offsets if necessary */
|
||||
offsets=ucnv_updateCallbackOffsets(offsets, (int32_t)(pArgs->target-target), sourceIndex);
|
||||
target=pArgs->target;
|
||||
|
||||
/* update the source pointer and index */
|
||||
sourceIndex=(int32_t)(nextSourceIndex+((const uint8_t *)pArgs->source-source));
|
||||
source=(const uint8_t *)pArgs->source;
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
goto endloop;
|
||||
} else if(cnv->UCharErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
goto endloop;
|
||||
} else if(U_FAILURE(*pErrorCode)) {
|
||||
/* break on error */
|
||||
_SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
|
||||
goto finish;
|
||||
} else {
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -619,7 +590,6 @@ _SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
|
|||
const uint8_t *source, *sourceLimit;
|
||||
UChar *target;
|
||||
const UChar *targetLimit;
|
||||
|
||||
UBool isSingleByteMode;
|
||||
uint8_t state, byteOne;
|
||||
int8_t quoteWindow, dynamicWindow;
|
||||
|
@ -658,11 +628,9 @@ _SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
|
|||
* The end of the input or output buffer is also handled by the slower loop.
|
||||
* The slow loop jumps (goto) to the fast-path loop again as soon as possible.
|
||||
*
|
||||
* The callback handling is done by jumping (goto) to the callback section at the end
|
||||
* of the function. From there, it either jumps to here to continue or to
|
||||
* the endloop section to clean up and return.
|
||||
* The callback handling is done by returning with an error code.
|
||||
* The conversion framework actually calls the callback function.
|
||||
*/
|
||||
loop:
|
||||
if(isSingleByteMode) {
|
||||
/* fast path for single-byte mode */
|
||||
if(state==readCommand) {
|
||||
|
@ -731,13 +699,20 @@ singleByteMode:
|
|||
goto fastUnicode;
|
||||
} else /* Srs */ {
|
||||
/* callback(illegal) */
|
||||
cnv->invalidCharBuffer[0]=b;
|
||||
cnv->invalidCharLength=1;
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
goto endloop;
|
||||
}
|
||||
|
||||
/* store the first byte of a multibyte sequence in toUBytes[] */
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
break;
|
||||
case quotePairOne:
|
||||
byteOne=b;
|
||||
cnv->toUBytes[1]=b;
|
||||
cnv->toULength=2;
|
||||
state=quotePairTwo;
|
||||
break;
|
||||
case quotePairTwo:
|
||||
|
@ -772,6 +747,8 @@ singleByteMode:
|
|||
case definePairOne:
|
||||
dynamicWindow=(int8_t)((b>>5)&7);
|
||||
byteOne=(uint8_t)(b&0x1f);
|
||||
cnv->toUBytes[1]=b;
|
||||
cnv->toULength=2;
|
||||
state=definePairTwo;
|
||||
break;
|
||||
case definePairTwo:
|
||||
|
@ -781,10 +758,9 @@ singleByteMode:
|
|||
case defineOne:
|
||||
if(b==0) {
|
||||
/* callback(illegal): Reserved window offset value 0 */
|
||||
cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
|
||||
cnv->invalidCharBuffer[1]=b;
|
||||
cnv->invalidCharLength=2;
|
||||
goto callback;
|
||||
cnv->toUBytes[1]=b;
|
||||
cnv->toULength=2;
|
||||
goto endloop;
|
||||
} else if(b<gapThreshold) {
|
||||
scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
|
||||
} else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
|
||||
|
@ -793,10 +769,9 @@ singleByteMode:
|
|||
scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
|
||||
} else {
|
||||
/* callback(illegal): Reserved window offset value 0xa8..0xf8 */
|
||||
cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow);
|
||||
cnv->invalidCharBuffer[1]=b;
|
||||
cnv->invalidCharLength=2;
|
||||
goto callback;
|
||||
cnv->toUBytes[1]=b;
|
||||
cnv->toULength=2;
|
||||
goto endloop;
|
||||
}
|
||||
state=readCommand;
|
||||
goto fastSingle;
|
||||
|
@ -825,6 +800,8 @@ fastUnicode:
|
|||
case readCommand:
|
||||
if((uint8_t)(b-UC0)>(Urs-UC0)) {
|
||||
byteOne=b;
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
state=quotePairTwo;
|
||||
} else if(/* UC0<=b && */ b<=UC7) {
|
||||
dynamicWindow=(int8_t)(b-UC0);
|
||||
|
@ -833,23 +810,32 @@ fastUnicode:
|
|||
} else if(/* UD0<=b && */ b<=UD7) {
|
||||
dynamicWindow=(int8_t)(b-UD0);
|
||||
isSingleByteMode=TRUE;
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
state=defineOne;
|
||||
goto singleByteMode;
|
||||
} else if(b==UDX) {
|
||||
isSingleByteMode=TRUE;
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
state=definePairOne;
|
||||
goto singleByteMode;
|
||||
} else if(b==UQU) {
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
state=quotePairOne;
|
||||
} else /* Urs */ {
|
||||
/* callback(illegal) */
|
||||
cnv->invalidCharBuffer[0]=b;
|
||||
cnv->invalidCharLength=1;
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
cnv->toUBytes[0]=b;
|
||||
cnv->toULength=1;
|
||||
goto endloop;
|
||||
}
|
||||
break;
|
||||
case quotePairOne:
|
||||
byteOne=b;
|
||||
cnv->toUBytes[1]=b;
|
||||
cnv->toULength=2;
|
||||
state=quotePairTwo;
|
||||
break;
|
||||
case quotePairTwo:
|
||||
|
@ -861,80 +847,34 @@ fastUnicode:
|
|||
}
|
||||
endloop:
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(state!=readCommand && U_SUCCESS(*pErrorCode)) {
|
||||
/* a character byte sequence remains incomplete */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
||||
/* copy the input sequence into the error buffer */
|
||||
int8_t i;
|
||||
|
||||
for(i=0; i<cnv->toULength; ++i) {
|
||||
cnv->invalidCharBuffer[i]=(char)cnv->toUBytes[i];
|
||||
}
|
||||
_SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
scsu->toUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->toUState=state;
|
||||
scsu->toUQuoteWindow=quoteWindow;
|
||||
scsu->toUDynamicWindow=dynamicWindow;
|
||||
scsu->toUByteOne=byteOne;
|
||||
cnv->invalidCharLength=i;
|
||||
|
||||
/* reset to deal with the next character */
|
||||
state=readCommand;
|
||||
}
|
||||
|
||||
finish:
|
||||
/* set the converter state back into UConverter */
|
||||
if(state==readCommand) {
|
||||
/* not in a multi-byte sequence, reset toULength */
|
||||
cnv->toULength=0;
|
||||
}
|
||||
scsu->toUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->toUState=state;
|
||||
scsu->toUQuoteWindow=quoteWindow;
|
||||
scsu->toUDynamicWindow=dynamicWindow;
|
||||
scsu->toUByteOne=byteOne;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=(const char *)source;
|
||||
pArgs->target=target;
|
||||
return;
|
||||
|
||||
callback:
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
/* update the arguments structure */
|
||||
pArgs->source=(const char *)source;
|
||||
pArgs->target=target;
|
||||
/* the current bytes were copied to invalidCharBuffer before the goto callback jump */
|
||||
|
||||
/* set the converter state in UConverter to deal with the next character */
|
||||
scsu->toUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->toUState=readCommand;
|
||||
scsu->toUQuoteWindow=quoteWindow;
|
||||
scsu->toUDynamicWindow=dynamicWindow;
|
||||
scsu->toUByteOne=0;
|
||||
|
||||
/* call the callback function */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, cnv->invalidCharLength, UCNV_ILLEGAL, pErrorCode);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
isSingleByteMode=scsu->toUIsSingleByteMode;
|
||||
state=scsu->toUState;
|
||||
quoteWindow=scsu->toUQuoteWindow;
|
||||
dynamicWindow=scsu->toUDynamicWindow;
|
||||
byteOne=scsu->toUByteOne;
|
||||
|
||||
target=pArgs->target;
|
||||
|
||||
source=(const uint8_t *)pArgs->source;
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
goto endloop;
|
||||
} else if(cnv->UCharErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
goto endloop;
|
||||
} else if(U_FAILURE(*pErrorCode)) {
|
||||
/* break on error */
|
||||
_SCSUReset(cnv, UCNV_RESET_TO_UNICODE);
|
||||
goto finish;
|
||||
} else {
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
|
||||
static UChar32
|
||||
_SCSUGetNextUChar(UConverterToUnicodeArgs *pArgs,
|
||||
UErrorCode *pErrorCode) {
|
||||
return ucnv_getNextUCharFromToUImpl(pArgs, _SCSUToUnicode, TRUE, pErrorCode);
|
||||
}
|
||||
|
||||
/* SCSU-from-Unicode conversion functions ----------------------------------- */
|
||||
|
@ -1095,7 +1035,6 @@ _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
|
||||
int32_t sourceIndex, nextSourceIndex;
|
||||
|
||||
uint32_t i;
|
||||
int32_t length;
|
||||
|
||||
/* variables for compression heuristics */
|
||||
|
@ -1188,7 +1127,8 @@ getTrailSingle:
|
|||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto endloop;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
|
@ -1197,7 +1137,8 @@ getTrailSingle:
|
|||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto endloop;
|
||||
}
|
||||
|
||||
/* compress supplementary character U+10000..U+10ffff */
|
||||
|
@ -1383,7 +1324,8 @@ getTrailUnicode:
|
|||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto endloop;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
|
@ -1392,7 +1334,8 @@ getTrailUnicode:
|
|||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto endloop;
|
||||
}
|
||||
|
||||
/* compress supplementary character */
|
||||
|
@ -1443,22 +1386,19 @@ getTrailUnicode:
|
|||
}
|
||||
endloop:
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(c!=0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a character byte sequence remains incomplete */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
_SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
scsu->fromUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->fromUDynamicWindow=dynamicWindow;
|
||||
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
||||
/* c is an unpaired surrogate */
|
||||
cnv->invalidUCharBuffer[0]=(UChar)c;
|
||||
cnv->invalidUCharLength=1;
|
||||
c=0;
|
||||
}
|
||||
|
||||
finish:
|
||||
/* set the converter state back into UConverter */
|
||||
scsu->fromUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->fromUDynamicWindow=dynamicWindow;
|
||||
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
pArgs->target=(char *)target;
|
||||
|
@ -1566,59 +1506,6 @@ outputBytes:
|
|||
c=0;
|
||||
goto endloop;
|
||||
}
|
||||
|
||||
callback:
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
/* update the arguments structure */
|
||||
pArgs->source=source;
|
||||
pArgs->target=(char *)target;
|
||||
pArgs->offsets=offsets;
|
||||
/* set the converter state in UConverter to deal with the next character */
|
||||
scsu->fromUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->fromUDynamicWindow=dynamicWindow;
|
||||
cnv->fromUSurrogateLead=0;
|
||||
|
||||
/* write the code point as code units */
|
||||
i=0;
|
||||
UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
|
||||
cnv->invalidUCharLength=(int8_t)i;
|
||||
|
||||
/* call the callback function */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, UCNV_ILLEGAL, pErrorCode);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
isSingleByteMode=scsu->fromUIsSingleByteMode;
|
||||
dynamicWindow=scsu->fromUDynamicWindow;
|
||||
currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
|
||||
c=cnv->fromUSurrogateLead;
|
||||
|
||||
/* update target and deal with offsets if necessary */
|
||||
offsets=ucnv_updateCallbackOffsets(offsets, (int32_t)(((uint8_t *)pArgs->target)-target), sourceIndex);
|
||||
target=(uint8_t *)pArgs->target;
|
||||
|
||||
/* update the source pointer and index */
|
||||
sourceIndex=(int32_t)(nextSourceIndex+(pArgs->source-source));
|
||||
source=pArgs->source;
|
||||
targetCapacity=(int32_t)((uint8_t *)pArgs->targetLimit-target);
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
goto endloop;
|
||||
} else if(cnv->charErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
goto endloop;
|
||||
} else if(U_FAILURE(*pErrorCode)) {
|
||||
/* break on error */
|
||||
_SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
|
||||
goto finish;
|
||||
} else {
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1643,7 +1530,6 @@ _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
|
|||
|
||||
uint32_t c, delta;
|
||||
|
||||
uint32_t i;
|
||||
int32_t length;
|
||||
|
||||
/* variables for compression heuristics */
|
||||
|
@ -1720,7 +1606,8 @@ getTrailSingle:
|
|||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto endloop;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
|
@ -1729,7 +1616,8 @@ getTrailSingle:
|
|||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto endloop;
|
||||
}
|
||||
|
||||
/* compress supplementary character U+10000..U+10ffff */
|
||||
|
@ -1902,7 +1790,8 @@ getTrailUnicode:
|
|||
} else {
|
||||
/* this is an unmatched lead code unit (1st surrogate) */
|
||||
/* callback(illegal) */
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto endloop;
|
||||
}
|
||||
} else {
|
||||
/* no more input */
|
||||
|
@ -1911,7 +1800,8 @@ getTrailUnicode:
|
|||
} else {
|
||||
/* this is an unmatched trail code unit (2nd surrogate) */
|
||||
/* callback(illegal) */
|
||||
goto callback;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
goto endloop;
|
||||
}
|
||||
|
||||
/* compress supplementary character */
|
||||
|
@ -1961,22 +1851,19 @@ getTrailUnicode:
|
|||
}
|
||||
endloop:
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(c!=0 && U_SUCCESS(*pErrorCode)) {
|
||||
/* a character byte sequence remains incomplete */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
_SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
scsu->fromUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->fromUDynamicWindow=dynamicWindow;
|
||||
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
||||
/* c is an unpaired surrogate */
|
||||
cnv->invalidUCharBuffer[0]=(UChar)c;
|
||||
cnv->invalidUCharLength=1;
|
||||
c=0;
|
||||
}
|
||||
|
||||
finish:
|
||||
/* set the converter state back into UConverter */
|
||||
scsu->fromUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->fromUDynamicWindow=dynamicWindow;
|
||||
|
||||
cnv->fromUSurrogateLead=(UChar)c;
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
pArgs->target=(char *)target;
|
||||
|
@ -2052,54 +1939,6 @@ outputBytes:
|
|||
c=0;
|
||||
goto endloop;
|
||||
}
|
||||
|
||||
callback:
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
/* update the arguments structure */
|
||||
pArgs->source=source;
|
||||
pArgs->target=(char *)target;
|
||||
/* set the converter state in UConverter to deal with the next character */
|
||||
scsu->fromUIsSingleByteMode=isSingleByteMode;
|
||||
scsu->fromUDynamicWindow=dynamicWindow;
|
||||
cnv->fromUSurrogateLead=0;
|
||||
|
||||
/* write the code point as code units */
|
||||
i=0;
|
||||
UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
|
||||
cnv->invalidUCharLength=(int8_t)i;
|
||||
|
||||
/* call the callback function */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, UCNV_ILLEGAL, pErrorCode);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
isSingleByteMode=scsu->fromUIsSingleByteMode;
|
||||
dynamicWindow=scsu->fromUDynamicWindow;
|
||||
currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
|
||||
c=cnv->fromUSurrogateLead;
|
||||
|
||||
target=(uint8_t *)pArgs->target;
|
||||
|
||||
source=pArgs->source;
|
||||
targetCapacity=(int32_t)((uint8_t *)pArgs->targetLimit-target);
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
goto endloop;
|
||||
} else if(cnv->charErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
goto endloop;
|
||||
} else if(U_FAILURE(*pErrorCode)) {
|
||||
/* break on error */
|
||||
_SCSUReset(cnv, UCNV_RESET_FROM_UNICODE);
|
||||
goto finish;
|
||||
} else {
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
|
||||
/* miscellaneous ------------------------------------------------------------ */
|
||||
|
@ -2194,7 +2033,7 @@ static const UConverterImpl _SCSUImpl={
|
|||
_SCSUToUnicodeWithOffsets,
|
||||
_SCSUFromUnicode,
|
||||
_SCSUFromUnicodeWithOffsets,
|
||||
_SCSUGetNextUChar,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
_SCSUGetName,
|
||||
|
|
Loading…
Add table
Reference in a new issue