ICU-311 Major rewrite of the internal codepath to use the UConverterToUnicodeArgs and UConverterFromUnicodeArgs.

X-SVN-Rev: 1777
This commit is contained in:
Helena Chapman 2000-07-10 20:51:54 +00:00
parent 4a043d3575
commit ccede1e5b9
8 changed files with 1085 additions and 1365 deletions

View file

@ -444,6 +444,7 @@ void ucnv_fromUnicode (UConverter * _this,
UBool flush,
UErrorCode * err)
{
UConverterFromUnicodeArgs args;
/*
* Check parameters in for all conversions
*/
@ -473,16 +474,19 @@ void ucnv_fromUnicode (UConverter * _this,
if (U_FAILURE (*err)) return;
}
args.converter = _this;
args.flush = flush;
args.offsets = offsets;
args.source = *source;
args.sourceLimit = sourceLimit;
args.target = *target;
args.targetLimit = targetLimit;
args.size = sizeof(args);
if (offsets) {
if (_this->sharedData->impl->fromUnicodeWithOffsets != NULL) {
_this->sharedData->impl->fromUnicodeWithOffsets(_this,
target,
targetLimit,
source,
sourceLimit,
offsets,
flush,
err);
_this->sharedData->impl->fromUnicodeWithOffsets(&args, err);
*source = args.source;
*target = args.target;
return;
} else {
/* all code points are of the same length */
@ -491,18 +495,18 @@ void ucnv_fromUnicode (UConverter * _this,
if(bytesPerChar == 1) {
for (i=0; i<targetSize; i++) {
offsets[i] = i;
args.offsets[i] = i;
}
} else if(bytesPerChar == 2) {
for (i=0; i<targetSize; i++) {
offsets[i] = i>>1;
args.offsets[i] = i>>1;
}
} else {
int32_t j = 0, k = bytesPerChar;
for (i=0; i<targetSize; i++) {
/* offsets[i] = i/bytesPerChar; -- without division */
offsets[i] = j;
args.offsets[i] = j;
if(--k == 0) {
k = bytesPerChar;
++j;
@ -513,15 +517,9 @@ void ucnv_fromUnicode (UConverter * _this,
}
/*calls the specific conversion routines */
_this->sharedData->impl->fromUnicode(_this,
target,
targetLimit,
source,
sourceLimit,
offsets,
flush,
err);
_this->sharedData->impl->fromUnicode(&args, err);
*source = args.source;
*target = args.target;
return;
}
@ -536,6 +534,7 @@ void ucnv_toUnicode (UConverter * _this,
UBool flush,
UErrorCode * err)
{
UConverterToUnicodeArgs args;
/*
* Check parameters in for all conversions
*/
@ -565,16 +564,19 @@ void ucnv_toUnicode (UConverter * _this,
return;
}
args.converter = _this;
args.flush = flush;
args.offsets = offsets;
args.source = (char *) *source;
args.sourceLimit = sourceLimit;
args.target = *target;
args.targetLimit = targetLimit;
args.size = sizeof(args);
if (offsets) {
if (_this->sharedData->impl->toUnicodeWithOffsets != NULL) {
_this->sharedData->impl->toUnicodeWithOffsets(_this,
target,
targetLimit,
source,
sourceLimit,
offsets,
flush,
err);
_this->sharedData->impl->toUnicodeWithOffsets(&args, err);
*source = args.source;
*target = args.target;
return;
} else {
/* all code points are of the same length */
@ -598,16 +600,10 @@ void ucnv_toUnicode (UConverter * _this,
}
/*calls the specific conversion routines */
_this->sharedData->impl->toUnicode(_this,
target,
targetLimit,
source,
sourceLimit,
offsets,
flush,
err);
_this->sharedData->impl->toUnicode(&args, err);
*source = args.source;
*target = args.target;
return;
}
@ -618,13 +614,12 @@ int32_t ucnv_fromUChars (const UConverter * converter,
int32_t sourceSize,
UErrorCode * err)
{
const UChar *mySource = source;
const UChar *mySource_limit;
int32_t mySourceLength = sourceSize;
UConverter myConverter;
char *myTarget = target;
char *myTarget_limit;
int32_t targetCapacity = 0;
UConverterFromUnicodeArgs args;
if (U_FAILURE (*err))
return 0;
@ -655,7 +650,7 @@ int32_t ucnv_fromUChars (const UConverter * converter,
return 0;
}
mySource_limit = mySource + mySourceLength;
mySource_limit = source + mySourceLength;
myTarget_limit = target + targetSize;
/* Pin the limit to U_MAX_PTR. NULL check is for AS/400. */
@ -663,17 +658,20 @@ int32_t ucnv_fromUChars (const UConverter * converter,
myTarget_limit = (char *)U_MAX_PTR;
}
args.converter = &myConverter;
args.flush = TRUE;
args.offsets = NULL;
args.source = source;
args.sourceLimit = mySource_limit;
args.target = target;
args.targetLimit = myTarget_limit;
args.size = sizeof(args);
if (targetSize > 0)
{
ucnv_fromUnicode (&myConverter,
&myTarget,
myTarget_limit,
&mySource,
mySource_limit,
NULL,
TRUE,
err);
targetCapacity = myTarget - target;
/*calls the specific conversion routines */
args.converter->sharedData->impl->fromUnicode(&args, err);
targetCapacity = args.target - target;
}
/*Updates targetCapacity to contain the number of bytes written to target */
@ -689,7 +687,6 @@ int32_t ucnv_fromUChars (const UConverter * converter,
if (*err == U_INDEX_OUTOFBOUNDS_ERROR)
{
char target2[CHUNK_SIZE];
char *target2_alias = target2;
const char *target2_limit = target2 + CHUNK_SIZE;
/*We use a stack allocated buffer around which we loop
@ -699,19 +696,12 @@ int32_t ucnv_fromUChars (const UConverter * converter,
while (*err == U_INDEX_OUTOFBOUNDS_ERROR)
{
*err = U_ZERO_ERROR;
target2_alias = target2;
ucnv_fromUnicode (&myConverter,
&target2_alias,
target2_limit,
&mySource,
mySource_limit,
NULL,
TRUE,
err);
args.target = target2;
args.targetLimit = target2_limit;
args.converter->sharedData->impl->fromUnicode(&args, err);
/*updates the output parameter to contain the number of char required */
targetCapacity += (target2_alias - target2) + 1;
}
/*updates the output parameter to contain the number of char required */
targetCapacity += (args.target - target2) + 1; }
/*We will set the erro code to U_BUFFER_OVERFLOW_ERROR only if
*nothing graver happened in the previous loop*/
(targetCapacity)--;
@ -729,12 +719,11 @@ int32_t ucnv_toUChars (const UConverter * converter,
int32_t sourceSize,
UErrorCode * err)
{
const char *mySource = source;
const char *mySource_limit = source + sourceSize;
UConverter myConverter;
UChar *myTarget = target;
UChar *myTarget_limit;
int32_t targetCapacity;
UConverterToUnicodeArgs args;
if (U_FAILURE (*err))
return 0;
@ -766,6 +755,13 @@ int32_t ucnv_toUChars (const UConverter * converter,
/*Removes all state info on the UConverter */
ucnv_reset (&myConverter);
args.converter = &myConverter;
args.flush = TRUE;
args.offsets = NULL;
args.source = source;
args.sourceLimit = mySource_limit;
args.target = target;
args.size = sizeof(args);
if (targetSize > 0)
{
myTarget_limit = target + targetSize - 1;
@ -777,24 +773,18 @@ int32_t ucnv_toUChars (const UConverter * converter,
/*Not in pure pre-flight mode */
ucnv_toUnicode (&myConverter,
&myTarget,
myTarget_limit, /*Save a spot for the Null terminator */
&mySource,
mySource_limit,
NULL,
TRUE,
err);
args.targetLimit = myTarget_limit;
args.converter->sharedData->impl->toUnicode(&args, err);
/*Null terminates the string */
*(myTarget) = 0x0000;
*(args.target) = 0x0000;
}
/*Rigs targetCapacity to have at least one cell for zero termination */
/*Updates targetCapacity to contain the number of bytes written to target */
targetCapacity = 1;
targetCapacity += myTarget - target;
targetCapacity += args.target - target;
if (targetSize == 0)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
@ -807,7 +797,6 @@ int32_t ucnv_toUChars (const UConverter * converter,
if (*err == U_INDEX_OUTOFBOUNDS_ERROR)
{
UChar target2[CHUNK_SIZE];
UChar *target2_alias = target2;
const UChar *target2_limit = target2 + CHUNK_SIZE;
/*We use a stack allocated buffer around which we loop
@ -816,18 +805,12 @@ int32_t ucnv_toUChars (const UConverter * converter,
while (*err == U_INDEX_OUTOFBOUNDS_ERROR)
{
*err = U_ZERO_ERROR;
target2_alias = target2;
ucnv_toUnicode (&myConverter,
&target2_alias,
target2_limit,
&mySource,
mySource_limit,
NULL,
TRUE,
err);
args.target = target2;
args.targetLimit = target2_limit;
args.converter->sharedData->impl->toUnicode(&args, err);
/*updates the output parameter to contain the number of char required */
targetCapacity += target2_alias - target2 + 1;
targetCapacity += args.target - target2 + 1;
}
(targetCapacity)--; /*adjust for last one */
if (U_SUCCESS (*err))
@ -842,6 +825,9 @@ UChar32 ucnv_getNextUChar (UConverter * converter,
const char *sourceLimit,
UErrorCode * err)
{
UConverterToUnicodeArgs args;
UChar32 ch;
/* In case internal data had been stored
* we return the first UChar32 in the internal buffer,
* and update the internal state accordingly
@ -864,10 +850,18 @@ UChar32 ucnv_getNextUChar (UConverter * converter,
}
/*calls the specific conversion routines */
/*as dictated in a code review, avoids a switch statement */
return converter->sharedData->impl->getNextUChar(converter,
source,
sourceLimit,
args.converter = converter;
args.flush = TRUE;
args.offsets = NULL;
args.source = *source;
args.sourceLimit = sourceLimit;
args.target = NULL;
args.targetLimit = NULL;
args.size = sizeof(args);
ch = converter->sharedData->impl->getNextUChar(&args,
err);
*source = args.source;
return ch;
}

File diff suppressed because it is too large Load diff

View file

@ -67,76 +67,82 @@ U_CDECL_BEGIN
#define missingUCharMarker 0xFFFD
#define FromU_CALLBACK_MACRO(context, args, codeUnits, length, codePoint, reason, err) \
if (args.converter->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
if (args->converter->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
else \
{ \
/*copies current values for the ErrorFunctor to update */ \
/*Calls the ErrorFunctor */ \
args.converter->fromUCharErrorBehaviour ( context, \
&args, \
args->converter->fromUCharErrorBehaviour ( context, \
args, \
codeUnits, \
length, \
codePoint, \
reason, \
err); \
myTargetIndex = args.target - (char*)myTarget; \
mySourceIndex = args.source - mySource; \
myTargetIndex = args->target - (char*)myTarget; \
mySourceIndex = args->source - mySource; \
}
/*
*/
#define ToU_CALLBACK_MACRO(context, args, codePoints, length, reason, err) \
if (args.converter->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \
if (args->converter->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \
else \
{ \
/*Calls the ErrorFunctor */ \
args.converter->fromCharErrorBehaviour ( \
args->converter->fromCharErrorBehaviour ( \
context, \
&args, \
args, \
codePoints, \
length, \
reason, \
err); \
myTargetIndex = args.target - myTarget; \
mySourceIndex = args.source - (const char*)mySource; \
myTargetIndex = args->target - myTarget; \
mySourceIndex = args->source - (const char*)mySource; \
}
/*
*/
#define FromU_CALLBACK_OFFSETS_LOGIC_MACRO(context, args, codeUnits, length, codePoint, reason, err) \
if (args.converter->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
if (args->converter->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
else \
{ \
int32_t My_i = myTargetIndex; \
/*copies current values for the ErrorFunctor to update */ \
/*Calls the ErrorFunctor */ \
args.converter->fromUCharErrorBehaviour ( \
args->converter->fromUCharErrorBehaviour ( \
context, \
&args, \
args, \
codeUnits, \
length, \
codePoint, \
reason, \
err); \
/*Update the local Indexes so that the conversion can restart at the right points */ \
myTargetIndex = args.target - (char*)myTarget; \
mySourceIndex = args.source - mySource; \
for (;My_i < myTargetIndex;My_i++) offsets[My_i] += currentOffset ; \
myTargetIndex = args->target - (char*)myTarget; \
mySourceIndex = args->source - mySource; \
args->offsets = saveOffsets; \
for (;My_i < myTargetIndex;My_i++) args->offsets[My_i] += currentOffset ; \
}
/*
*/
#define ToU_CALLBACK_OFFSETS_LOGIC_MACRO(context, args, codePoints, length, reason, err) \
if (args.converter->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \
if (args->converter->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \
else \
{ \
args.converter->fromCharErrorBehaviour ( \
args->converter->fromCharErrorBehaviour ( \
context, \
&args, \
args, \
codePoints, \
length, \
reason, \
err); \
/*Update the local Indexes so that the conversion can restart at the right points */ \
myTargetIndex = args.target - myTarget; \
mySourceIndex = args.source - (const char*)mySource; \
for (;My_i < myTargetIndex;My_i++) {offsets[My_i] += currentOffset ; } \
myTargetIndex = args->target - myTarget; \
mySourceIndex = args->source - (const char*)mySource; \
args->offsets = saveOffsets; \
for (;My_i < myTargetIndex;My_i++) {args->offsets[My_i] += currentOffset ; } \
}
/*
*/
typedef void (*UConverterLoad) (UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode);
typedef void (*UConverterUnload) (UConverterSharedData *sharedData);
@ -146,28 +152,11 @@ typedef void (*UConverterClose) (UConverter *cnv);
typedef void (*UConverterReset) (UConverter *cnv);
typedef void (*T_ToUnicodeFunction) (UConverter *,
UChar **,
const UChar *,
const char **,
const char *,
int32_t* offsets,
UBool,
UErrorCode *);
typedef void (*T_ToUnicodeFunction) (UConverterToUnicodeArgs *, UErrorCode *);
typedef void (*T_FromUnicodeFunction) (UConverter *,
char **,
const char *,
const UChar **,
const UChar *,
int32_t* offsets,
UBool,
UErrorCode *);
typedef void (*T_FromUnicodeFunction) (UConverterFromUnicodeArgs *, UErrorCode *);
typedef UChar32 (*T_GetNextUCharFunction) (UConverter *,
const char **,
const char *,
UErrorCode *);
typedef UChar32 (*T_GetNextUCharFunction) (UConverterToUnicodeArgs *, UErrorCode *);
typedef void (*UConverterGetStarters)(const UConverter* converter,
UBool starters[256],

View file

@ -541,36 +541,17 @@ static void \
*/
void
_LMBCSToUnicodeWithOffsets(
UConverter* _this, /* the converter*/
UChar** target, /* updated ptr to target */
const UChar* targetLimit, /* exclusive target limit */
const char** source, /* updated ptr to source */
const char* sourceLimit, /* exclusive source limit*/
int32_t* offsets, /* pointer to array to be filled in with
source offsets */
UBool flush, /* boolean: true means client won't call
back with more data from same source */
UErrorCode* err); /* Std ICU err code */
_LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *args,
UErrorCode* err); /* Std ICU err code */
void
_LMBCSFromUnicode(
UConverter* _this, /* same as above, but other direction */
char** target,
const char* targetLimit,
const UChar** source,
const UChar* sourceLimit,
int32_t * offsets,
UBool flush,
UErrorCode* err);
_LMBCSFromUnicode(UConverterFromUnicodeArgs *args,
UErrorCode* err);
UChar32
_LMBCSGetNextUChar(
UConverter* _this, /* also from Unicode, but 1 char */
const char** source,
const char* sourceLimit,
UErrorCode* err);
_LMBCSGetNextUChar(UConverterToUnicodeArgs *args,
UErrorCode* err);
/* Here's the open worker & the common close function */
@ -761,14 +742,8 @@ LMBCSConvertUni(ulmbcs_byte_t * pLMBCS, UChar uniChar)
/* The main Unicode to LMBCS conversion function */
void
_LMBCSFromUnicode(UConverter* _this,
char** target,
const char* targetLimit,
const UChar** source,
const UChar* sourceLimit,
int32_t * offsets,
UBool flush,
UErrorCode* err)
_LMBCSFromUnicode(UConverterFromUnicodeArgs* args,
UErrorCode* err)
{
ulmbcs_byte_t lastConverterIndex = 0;
UChar uniChar;
@ -776,7 +751,7 @@ _LMBCSFromUnicode(UConverter* _this,
ulmbcs_byte_t * pLMBCS;
int bytes_written;
UBool groups_tried[ULMBCS_GRP_LAST];
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
int sourceIndex = 0;
@ -803,9 +778,9 @@ _LMBCSFromUnicode(UConverter* _this,
4. And as a grand fallback: Unicode
*/
while (*source < sourceLimit && !U_FAILURE(*err))
while (args->source < args->sourceLimit && !U_FAILURE(*err))
{
uniChar = **source;
uniChar = *(args->source);
bytes_written = 0;
pLMBCS = LMBCS;
@ -930,14 +905,14 @@ _LMBCSFromUnicode(UConverter* _this,
}
/* we have a translation. increment source and write as much as posible to target */
(*source)++;
args->source++;
pLMBCS = LMBCS;
while (*target < targetLimit && bytes_written--)
while (args->target < args->targetLimit && bytes_written--)
{
*(*target)++ = *pLMBCS++;
if (offsets)
*(args->target)++ = *pLMBCS++;
if (args->offsets)
{
*offsets++ = sourceIndex;
*(args->offsets)++ = sourceIndex;
}
}
sourceIndex++;
@ -947,9 +922,9 @@ _LMBCSFromUnicode(UConverter* _this,
common code will move this to target if we get called back with
enough target room
*/
uint8_t * pErrorBuffer = _this->charErrorBuffer;
uint8_t * pErrorBuffer = args->converter->charErrorBuffer;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
_this->charErrorBufferLength = bytes_written;
args->converter->charErrorBufferLength = bytes_written;
while (bytes_written--)
{
*pErrorBuffer++ = *pLMBCS++;
@ -964,9 +939,9 @@ _LMBCSFromUnicode(UConverter* _this,
/* A function to call when we are looking at the Unicode group byte in LMBCS */
UChar
GetUniFromLMBCSUni(char const * * ppLMBCSin) /* Called with LMBCS-style Unicode byte stream */
GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode byte stream */
{
uint8_t HighCh = *(*ppLMBCSin)++; /* Big-endian Unicode in LMBCS compatibility group*/
uint8_t HighCh = *(*ppLMBCSin)++; /* Big-endian Unicode in LMBCS compatibility group*/
uint8_t LowCh = *(*ppLMBCSin)++;
if (HighCh == ULMBCS_UNICOMPATZERO )
@ -984,9 +959,9 @@ GetUniFromLMBCSUni(char const * * ppLMBCSin) /* Called with LMBCS-style Unicode
*/
#define CHECK_SOURCE_LIMIT(index) \
if ((*source)+index > sourceLimit){\
if (args->source+index > args->sourceLimit){\
*err = U_TRUNCATED_CHAR_FOUND;\
*source = saveSource;\
args->source = saveSource;\
return missingUCharMarker;}
@ -998,25 +973,22 @@ GetUniFromLMBCSUni(char const * * ppLMBCSin) /* Called with LMBCS-style Unicode
*/
UChar32
_LMBCSGetNextUCharWorker(UConverter* _this,
const char** source,
const char* sourceLimit,
_LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
UErrorCode* err,
UBool returnUTF32)
{
ulmbcs_byte_t CurByte; /* A byte from the input stream */
UChar32 uniChar; /* an output UNICODE char */
const char * saveSource;
UConverterToUnicodeArgs args;
/* error check */
if (*source >= sourceLimit)
if (args->source >= args->sourceLimit)
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return missingUCharMarker;
}
/* Grab first byte & save address for error recovery */
CurByte = *((ulmbcs_byte_t *) (saveSource = (*source)++));
CurByte = *((ulmbcs_byte_t *) (saveSource = args->source++));
/*
* at entry of each if clause:
@ -1049,7 +1021,7 @@ _LMBCSGetNextUCharWorker(UConverter* _this,
{
ulmbcs_byte_t C0C1byte;
CHECK_SOURCE_LIMIT(1);
C0C1byte = *(*source)++;
C0C1byte = *(args->source)++;
uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte;
}
else
@ -1058,14 +1030,14 @@ _LMBCSGetNextUCharWorker(UConverter* _this,
UChar second;
CHECK_SOURCE_LIMIT(2);
uniChar = GetUniFromLMBCSUni(source);
uniChar = GetUniFromLMBCSUni(&(args->source));
/* at this point we are usually done, but we need to make sure we are not in
a situation where we can successfully put together a surrogate pair */
if(returnUTF32 && UTF_IS_FIRST_SURROGATE(uniChar) && (*source+3 <= sourceLimit)
&& *(*source)++ == ULMBCS_GRP_UNICODE
&& UTF_IS_SECOND_SURROGATE(second = GetUniFromLMBCSUni(source)))
if(returnUTF32 && UTF_IS_FIRST_SURROGATE(uniChar) && (args->source+3 <= args->sourceLimit)
&& *(args->source)++ == ULMBCS_GRP_UNICODE
&& UTF_IS_SECOND_SURROGATE(second = GetUniFromLMBCSUni(&(args->source))))
{
uniChar = UTF16_GET_PAIR_VALUE(uniChar, second);
}
@ -1073,7 +1045,7 @@ _LMBCSGetNextUCharWorker(UConverter* _this,
else if (CurByte <= ULMBCS_CTRLOFFSET)
{
group = CurByte; /* group byte is in the source */
extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
cnv = extraInfo->OptGrpConverter[group];
if (!cnv)
@ -1087,8 +1059,8 @@ _LMBCSGetNextUCharWorker(UConverter* _this,
ulmbcs_byte_t HighCh, LowCh;
CHECK_SOURCE_LIMIT(2);
HighCh = *(*source)++;
LowCh = *(*source)++;
HighCh = *(args->source)++;
LowCh = *(args->source)++;
/* check for LMBCS doubled-group-byte case */
mbChar = (HighCh == group) ? LowCh : (HighCh<<8) | LowCh;
MyCArray = &cnv->sharedData->table->mbcs.toUnicode;
@ -1096,7 +1068,7 @@ _LMBCSGetNextUCharWorker(UConverter* _this,
}
else { /* single byte conversion */
CHECK_SOURCE_LIMIT(1);
CurByte = *(*source)++;
CurByte = *(args->source)++;
if (CurByte >= ULMBCS_C1START)
{
@ -1107,7 +1079,7 @@ _LMBCSGetNextUCharWorker(UConverter* _this,
/* The non-optimizable oddballs where there is an explicit byte
* AND the second byte is not in the upper ascii range
*/
extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
cnv = extraInfo->OptGrpConverter [ULMBCS_GRP_EXCEPT];
/* Lookup value must include opt group */
@ -1119,7 +1091,7 @@ _LMBCSGetNextUCharWorker(UConverter* _this,
}
else if (CurByte >= ULMBCS_C1START) /* group byte is implicit */
{
extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo;
group = extraInfo->OptGroup;
cnv = extraInfo->OptGrpConverter[group];
if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */
@ -1135,7 +1107,7 @@ _LMBCSGetNextUCharWorker(UConverter* _this,
{
CHECK_SOURCE_LIMIT(1);
HighCh = CurByte;
LowCh = *(*source)++;
LowCh = *(args->source)++;
mbChar = (HighCh<<8) | LowCh;
}
MyCArray = &cnv->sharedData->table->mbcs.toUnicode;
@ -1155,21 +1127,19 @@ _LMBCSGetNextUCharWorker(UConverter* _this,
/* This code needs updating when new error callbacks are installed */
UChar * pUniChar = (UChar *)&uniChar;
args.converter = _this;
args.target = pUniChar;
args.targetLimit = pUniChar + 1;
args.source = saveSource;
args.sourceLimit = sourceLimit;
args.flush = TRUE;
args.offsets = NULL;
args.size = sizeof(args);
_this->fromCharErrorBehaviour(_this->toUContext,
&args,
args->target = pUniChar;
args->targetLimit = pUniChar + 1;
args->source = saveSource;
args->flush = TRUE;
args->offsets = NULL;
args->size = sizeof(args);
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
args,
saveSource,
sourceLimit - saveSource,
args->sourceLimit - saveSource,
UCNV_UNASSIGNED,
err);
*source = saveSource;
args->source = saveSource;
}
return uniChar;
}
@ -1178,88 +1148,86 @@ _LMBCSGetNextUCharWorker(UConverter* _this,
/* The exported function that gets one UTF32 character from a LMBCS stream
*/
UChar32
_LMBCSGetNextUChar(UConverter* _this,
const char** source,
const char* sourceLimit,
_LMBCSGetNextUChar(UConverterToUnicodeArgs* args,
UErrorCode* err)
{
return _LMBCSGetNextUCharWorker(_this, source, sourceLimit, err, TRUE);
return _LMBCSGetNextUCharWorker(args, err, TRUE);
}
/* The exported function that converts lmbcs to one or more
UChars - currently UTF-16
*/
void
_LMBCSToUnicodeWithOffsets(UConverter* _this,
UChar** target,
const UChar* targetLimit,
const char** source,
const char* sourceLimit,
int32_t* offsets,
UBool flush,
_LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args,
UErrorCode* err)
{
UChar uniChar; /* one output UNICODE char */
const char * saveSource;
const char * pStartLMBCS = *source; /* beginning of whole string */
const char * pStartLMBCS = args->source; /* beginning of whole string */
if (targetLimit == *target) /* error check may belong in common code */
if (args->targetLimit == args->target) /* error check may belong in common code */
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
/* Process from source to limit, or until error */
while (!*err && sourceLimit > *source && targetLimit > *target)
while (!*err && args->sourceLimit > args->source && args->targetLimit > args->target)
{
saveSource = *source; /* beginning of current code point */
saveSource = args->source; /* beginning of current code point */
if (_this->invalidCharLength) /* reassemble char from previous call */
if (args->converter->invalidCharLength) /* reassemble char from previous call */
{
char LMBCS [ULMBCS_CHARSIZE_MAX];
char *pLMBCS = LMBCS;
size_t size_old = _this->invalidCharLength;
char *pLMBCS = LMBCS, *saveSource, *saveSourceLimit;
size_t size_old = args->converter->invalidCharLength;
/* limit from source is either reminder of temp buffer, or user limit on source */
size_t size_new_maybe_1 = sizeof(LMBCS) - size_old;
size_t size_new_maybe_2 = sourceLimit - *source;
size_t size_new_maybe_2 = args->sourceLimit - args->source;
size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2;
uprv_memcpy(LMBCS, _this->invalidCharBuffer, size_old);
uprv_memcpy(LMBCS + size_old, *source, size_new);
uprv_memcpy(LMBCS, args->converter->invalidCharBuffer, size_old);
uprv_memcpy(LMBCS + size_old, args->source, size_new);
saveSource = (char*)args->source;
saveSourceLimit = (char*)args->sourceLimit;
args->source = pLMBCS;
args->sourceLimit = pLMBCS+size_old+size_new;
uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err, FALSE);
pLMBCS = (char*)args->source;
args->source =saveSource;
args->sourceLimit = saveSourceLimit;
args->source += (pLMBCS - LMBCS - size_old);
uniChar = (UChar) _LMBCSGetNextUCharWorker(_this, &pLMBCS, pLMBCS+size_old+size_new, err, FALSE);
(*source) += (pLMBCS - LMBCS - size_old);
if (*err == U_TRUNCATED_CHAR_FOUND && !flush)
if (*err == U_TRUNCATED_CHAR_FOUND && !args->flush)
{
/* evil special case: source buffers so small a char spans more than 2 buffers */
size_t savebytes = size_old+size_new;
_this->invalidCharLength = savebytes;
uprv_memcpy(_this->invalidCharBuffer, LMBCS, savebytes);
(*source) = sourceLimit;
*err = 0;
args->converter->invalidCharLength = savebytes;
uprv_memcpy(args->converter->invalidCharBuffer, LMBCS, savebytes);
args->source = args->sourceLimit;
*err = U_ZERO_ERROR;
return;
}
else
{
/* clear the partial-char marker */
_this->invalidCharLength = 0;
args->converter->invalidCharLength = 0;
}
}
else
{
uniChar = (UChar) _LMBCSGetNextUCharWorker(_this, source, sourceLimit, err, FALSE);
uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err, FALSE);
}
if (U_SUCCESS(*err))
{
if (uniChar != missingUCharMarker)
{
*(*target)++ = uniChar;
if(offsets)
*(args->target)++ = uniChar;
if(args->offsets)
{
*offsets++ = saveSource - pStartLMBCS;
*(args->offsets)++ = saveSource - pStartLMBCS;
}
}
else
@ -1269,18 +1237,18 @@ _LMBCSToUnicodeWithOffsets(UConverter* _this,
}
}
/* if target ran out before source, return U_INDEX_OUTOFBOUNDS_ERROR */
if (U_SUCCESS(*err) && sourceLimit > *source && targetLimit <= *target)
if (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit <= args->target)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
/* If character incomplete, store away partial char if more to come */
if ((*err == U_TRUNCATED_CHAR_FOUND) && !flush )
if ((*err == U_TRUNCATED_CHAR_FOUND) && !args->flush )
{
size_t savebytes = sourceLimit - saveSource;
_this->invalidCharLength = savebytes;
uprv_memcpy(_this->invalidCharBuffer, saveSource, savebytes);
(*source) = sourceLimit;
size_t savebytes = args->sourceLimit - saveSource;
args->converter->invalidCharLength = savebytes;
uprv_memcpy(args->converter->invalidCharBuffer, saveSource, savebytes);
args->source = args->sourceLimit;
*err = 0;
}
}

View file

@ -61,35 +61,28 @@ const int8_t bytesFromUTF8[256] = {
const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};
void T_UConverter_toUnicode_UTF8 (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
UErrorCode * err)
{
const unsigned char *mySource = (unsigned char *) *source;
UChar *myTarget = *target;
const unsigned char *mySource = (unsigned char *) args->source;
UChar *myTarget = args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
int32_t targetLength = args->targetLimit - myTarget;
int32_t sourceLength = args->sourceLimit - (char *) mySource;
uint32_t ch = 0 ,
ch2 =0 ,
i =0; /* Index into the current # of bytes consumed in the current sequence */
uint32_t inBytes = 0; /* Total number of bytes in the current UTF8 sequence */
UConverterToUnicodeArgs args;
if (_this->toUnicodeStatus)
if (args->converter->toUnicodeStatus)
{
i = _this->invalidCharLength; /* restore # of bytes consumed */
inBytes = _this->toUnicodeStatus; /* Restore size of current sequence */
i = args->converter->invalidCharLength; /* restore # of bytes consumed */
inBytes = args->converter->toUnicodeStatus; /* Restore size of current sequence */
ch = _this->mode; /*Stores the previously calculated ch from a previous call*/
_this->toUnicodeStatus = 0;
_this->invalidCharLength = 0;
ch = args->converter->mode; /*Stores the previously calculated ch from a previous call*/
args->converter->toUnicodeStatus = 0;
args->converter->invalidCharLength = 0;
goto morebytes;
}
@ -109,7 +102,7 @@ void T_UConverter_toUnicode_UTF8 (UConverter * _this,
/* store the first char */
inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */
_this->invalidCharBuffer[0] = (char)ch;
args->converter->invalidCharBuffer[0] = (char)ch;
i = 1;
morebytes:
@ -118,22 +111,22 @@ void T_UConverter_toUnicode_UTF8 (UConverter * _this,
{
if (mySourceIndex >= sourceLength)
{
if (flush)
if (args->flush)
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
_this->toUnicodeStatus = 0x00;
args->converter->toUnicodeStatus = 0x00;
}
}
else
{
_this->toUnicodeStatus = inBytes;
_this->invalidCharLength = (int8_t)i;
args->converter->toUnicodeStatus = inBytes;
args->converter->invalidCharLength = (int8_t)i;
}
goto donefornow;
}
_this->invalidCharBuffer[i] = (char) (ch2 = (((uint32_t)mySource[mySourceIndex++]) & 0x000000FF));
args->converter->invalidCharBuffer[i] = (char) (ch2 = (((uint32_t)mySource[mySourceIndex++]) & 0x000000FF));
if ((ch2 & 0xC0) != 0x80) /* Invalid trailing byte */
break;
}
@ -161,37 +154,38 @@ void T_UConverter_toUnicode_UTF8 (UConverter * _this,
}
else
{
_this->invalidUCharBuffer[0] = (UChar) ch;
_this->invalidUCharLength = 1;
args->converter->invalidUCharBuffer[0] = (UChar) ch;
args->converter->invalidUCharLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
else
{
UChar* saveTarget = args->target;
const char* saveSource = args->source;
*err = U_ILLEGAL_CHAR_FOUND;
_this->invalidCharLength = (int8_t)i;
args->converter->invalidCharLength = (int8_t)i;
#ifdef Debug
printf("inbytes %d\n, _this->invalidCharLength = %d,\n mySource[mySourceIndex]=%X\n", inBytes, _this->invalidCharLength, mySource[mySourceIndex]);
printf("inbytes %d\n, args->converter->invalidCharLength = %d,\n mySource[mySourceIndex]=%X\n", inBytes, args->converter->invalidCharLength, mySource[mySourceIndex]);
#endif
/* Needed explicit cast for mySource on MVS to make compiler happy - JJD */
args.converter = _this;
args.target = myTarget + myTargetIndex;
args.targetLimit = targetLimit;
args.source = (const char*) mySource + mySourceIndex;
args.sourceLimit = sourceLimit;
args.flush = flush;
args.offsets = offsets;
args.size = sizeof(args);
ToU_CALLBACK_MACRO(_this->toUContext,
args->target = myTarget + myTargetIndex;
args->source = (const char*) mySource + mySourceIndex;
ToU_CALLBACK_MACRO(args->converter->toUContext,
args,
_this->invalidCharBuffer,
_this->invalidCharLength,
args->converter->invalidCharBuffer,
args->converter->invalidCharLength,
UCNV_UNASSIGNED,
err);
args->source = saveSource;
args->target = saveTarget;
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
args->converter->invalidCharLength = 0;
}
}
}
@ -204,36 +198,29 @@ void T_UConverter_toUnicode_UTF8 (UConverter * _this,
}
donefornow:
*target += myTargetIndex;
*source += mySourceIndex;
_this->mode = ch; /*stores a partially calculated target*/
args->target += myTargetIndex;
args->source += mySourceIndex;
args->converter->mode = ch; /*stores a partially calculated target*/
}
void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
UErrorCode * err)
{
const unsigned char *mySource = (unsigned char *) *source;
UChar *myTarget = *target;
const unsigned char *mySource = (unsigned char *) args->source;
UChar *myTarget = args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
int32_t targetLength = args->targetLimit - myTarget;
int32_t sourceLength = args->sourceLimit - (char *) mySource;
uint32_t ch = 0, ch2 = 0, i = 0;
uint32_t inBytes = 0;
UConverterToUnicodeArgs args;
if (_this->toUnicodeStatus)
if (args->converter->toUnicodeStatus)
{
i = _this->invalidCharLength;
inBytes = _this->toUnicodeStatus;
_this->toUnicodeStatus = 0;
ch = _this->mode;
i = args->converter->invalidCharLength;
inBytes = args->converter->toUnicodeStatus;
args->converter->toUnicodeStatus = 0;
ch = args->converter->mode;
goto morebytes;
}
@ -244,13 +231,13 @@ void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
ch = mySource[mySourceIndex++];
if (ch < 0x80) /* Simple case */
{
offsets[myTargetIndex] = mySourceIndex-1;
args->offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (UChar) ch;
}
else
{
inBytes = bytesFromUTF8[ch];
_this->invalidCharBuffer[0] = (char)ch;
args->converter->invalidCharBuffer[0] = (char)ch;
i = 1;
morebytes:
@ -259,22 +246,22 @@ void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
{
if (mySourceIndex >= sourceLength)
{
if (flush)
if (args->flush)
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
_this->toUnicodeStatus = 0x00;
args->converter->toUnicodeStatus = 0x00;
}
}
else
{
_this->toUnicodeStatus = inBytes;
_this->invalidCharLength = (int8_t)i;
args->converter->toUnicodeStatus = inBytes;
args->converter->invalidCharLength = (int8_t)i;
}
goto donefornow;
}
_this->invalidCharBuffer[i] = (char) (ch2 = mySource[mySourceIndex++]);
args->converter->invalidCharBuffer[i] = (char) (ch2 = mySource[mySourceIndex++]);
if ((ch2 & 0xC0) != 0x80) /* Invalid trailing byte */
break;
}
@ -287,57 +274,57 @@ void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
{
if (ch <= kMaximumUCS2) {
offsets[myTargetIndex] = mySourceIndex-3;
myTarget[myTargetIndex++] = (UChar) ch;
args->offsets[myTargetIndex] = mySourceIndex-3;
myTarget[myTargetIndex++] = (UChar) ch;
}
else
{
ch -= halfBase;
offsets[myTargetIndex] = mySourceIndex-4;
args->offsets[myTargetIndex] = mySourceIndex-4;
myTarget[myTargetIndex++] = (UChar) ((ch >> halfShift) + kSurrogateHighStart);
ch = (ch & halfMask) + kSurrogateLowStart;
if (myTargetIndex < targetLength)
{
offsets[myTargetIndex] = mySourceIndex-4;
args->offsets[myTargetIndex] = mySourceIndex-4;
myTarget[myTargetIndex++] = (char)ch;
}
else
{
_this->invalidUCharBuffer[0] = (UChar) ch;
_this->invalidUCharLength = 1;
args->converter->invalidUCharBuffer[0] = (UChar) ch;
args->converter->invalidUCharLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
else
{
int32_t currentOffset = offsets[myTargetIndex-1];
int32_t currentOffset = args->offsets[myTargetIndex-1];
int32_t My_i = myTargetIndex;
UChar* saveTarget = args->target;
const char* saveSource = args->source;
int32_t* saveOffsets = args->offsets;
*err = U_ILLEGAL_CHAR_FOUND;
_this->invalidCharLength = (int8_t)i;
args->converter->invalidCharLength = (int8_t)i;
args.converter = _this;
args.target = myTarget + myTargetIndex;
args.targetLimit = targetLimit;
args.source = (const char*)mySource + mySourceIndex;
args.sourceLimit = sourceLimit;
args.flush = flush;
args.offsets = offsets?offsets+myTargetIndex:0;
args.size = sizeof(args);
args->target = myTarget + myTargetIndex;
args->source = (const char*)mySource + mySourceIndex;
args->offsets = args->offsets?args->offsets+myTargetIndex:0;
/* To do HSYS: more smarts here, including offsets */
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this->toUContext,
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(args->converter->toUContext,
args,
_this->invalidCharBuffer,
_this->invalidCharLength,
args->converter->invalidCharBuffer,
args->converter->invalidCharLength,
UCNV_UNASSIGNED,
err);
/* Needed explicit cast for mySource on MVS to make compiler happy - JJD */
args->source = saveSource;
args->target = saveTarget;
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
args->converter->invalidCharLength = 0;
}
}
}
@ -350,36 +337,30 @@ void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
}
donefornow:
*target += myTargetIndex;
*source += mySourceIndex;
_this->mode = ch;
args->target += myTargetIndex;
args->source += mySourceIndex;
args->converter->mode = ch;
}
void T_UConverter_fromUnicode_UTF8 (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
UErrorCode * err)
{
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
const UChar *mySource = args->source;
unsigned char *myTarget = (unsigned char *) args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
int32_t targetLength = args->targetLimit - (char *) myTarget;
int32_t sourceLength = args->sourceLimit - mySource;
uint32_t ch;
int16_t i, bytesToWrite = 0;
uint32_t ch2;
char temp[4];
if (_this->fromUnicodeStatus)
if (args->converter->fromUnicodeStatus)
{
ch = _this->fromUnicodeStatus;
_this->fromUnicodeStatus = 0;
ch = args->converter->fromUnicodeStatus;
args->converter->fromUnicodeStatus = 0;
goto lowsurogate;
}
while (mySourceIndex < sourceLength)
@ -402,8 +383,8 @@ void T_UConverter_fromUnicode_UTF8 (UConverter * _this,
}
else
{
_this->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80);
_this->charErrorBufferLength = 1;
args->converter->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80);
args->converter->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
@ -413,7 +394,7 @@ void T_UConverter_fromUnicode_UTF8 (UConverter * _this,
if ((ch >= kSurrogateHighStart) && (ch <= kSurrogateHighEnd))
{
lowsurogate:
if (mySourceIndex < sourceLength && !flush)
if (mySourceIndex < sourceLength && !args->flush)
{
ch2 = mySource[mySourceIndex];
if ((ch2 >= kSurrogateLowStart) && (ch2 <= kSurrogateLowEnd))
@ -446,7 +427,7 @@ void T_UConverter_fromUnicode_UTF8 (UConverter * _this,
}
else
{
_this->charErrorBuffer[_this->charErrorBufferLength++] = temp[i];
args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[i];
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
@ -460,36 +441,30 @@ void T_UConverter_fromUnicode_UTF8 (UConverter * _this,
}
*target += myTargetIndex;
*source += mySourceIndex;
args->target += myTargetIndex;
args->source += mySourceIndex;
return;
}
void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
UErrorCode * err)
{
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
const UChar *mySource = args->source;
unsigned char *myTarget = (unsigned char *) args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
int32_t targetLength = args->targetLimit - (char *) myTarget;
int32_t sourceLength = args->sourceLimit - mySource;
uint32_t ch;
int16_t i, bytesToWrite = 0;
uint32_t ch2;
char temp[4];
if (_this->fromUnicodeStatus)
if (args->converter->fromUnicodeStatus)
{
ch = _this->fromUnicodeStatus;
_this->fromUnicodeStatus = 0;
ch = args->converter->fromUnicodeStatus;
args->converter->fromUnicodeStatus = 0;
goto lowsurogate;
}
while (mySourceIndex < sourceLength)
@ -501,22 +476,22 @@ void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
if (ch < 0x80) /* Single byte */
{
offsets[myTargetIndex] = mySourceIndex-1;
args->offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) ch;
}
else if (ch < 0x800) /* Double byte */
{
offsets[myTargetIndex] = mySourceIndex-1;
args->offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0);
if (myTargetIndex < targetLength)
{
offsets[myTargetIndex] = mySourceIndex-1;
args->offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80);
}
else
{
_this->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80);
_this->charErrorBufferLength = 1;
args->converter->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80);
args->converter->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
@ -526,7 +501,7 @@ void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
if ((ch >= kSurrogateHighStart) && (ch <= kSurrogateHighEnd))
{
lowsurogate:
if (mySourceIndex < sourceLength && !flush)
if (mySourceIndex < sourceLength && !args->flush)
{
ch2 = mySource[mySourceIndex];
if ((ch2 >= kSurrogateLowStart) && (ch2 <= kSurrogateLowEnd))
@ -555,12 +530,12 @@ void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
{
if (myTargetIndex < targetLength)
{
offsets[myTargetIndex] = mySourceIndex-1;
args->offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = temp[i];
}
else
{
_this->charErrorBuffer[_this->charErrorBufferLength++] = temp[i];
args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[i];
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
@ -574,33 +549,30 @@ void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this,
}
*target += myTargetIndex;
*source += mySourceIndex;
args->target += myTargetIndex;
args->source += mySourceIndex;
return;
}
UChar32 T_UConverter_getNextUChar_UTF8(UConverter* converter,
const char** source,
const char* sourceLimit,
UChar32 T_UConverter_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
UErrorCode* err)
{
/*safe keeps a ptr to the beginning in case we need to step back*/
char const *sourceInitial = *source;
char const *sourceInitial = args->source;
uint16_t extraBytesToWrite;
uint8_t myByte;
UChar32 ch;
int8_t isLegalSequence = 1;
UConverterToUnicodeArgs args;
/*Input boundary check*/
if ((*source) >= sourceLimit)
if (args->source >= args->sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
}
myByte = (uint8_t)*((*source)++);
myByte = (uint8_t)*(args->source++);
if(myByte < 0x80) {
return (UChar32)myByte;
}
@ -612,7 +584,7 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverter* converter,
/*The byte sequence is longer than the buffer area passed*/
if ((*source + extraBytesToWrite - 1) > sourceLimit)
if ((args->source + extraBytesToWrite - 1) > args->sourceLimit)
{
*err = U_TRUNCATED_CHAR_FOUND;
return 0xFFFD;
@ -623,31 +595,31 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverter* converter,
switch(extraBytesToWrite)
{
/* note: code falls through cases! (sic)*/
case 6: ch += (myByte = (uint8_t)*((*source)++)); ch <<= 6;
case 6: ch += (myByte = (uint8_t)*(args->source++)); ch <<= 6;
if ((myByte & 0xC0) != 0x80)
{
isLegalSequence = 0;
break;
}
case 5: ch += (myByte = *((*source)++)); ch <<= 6;
case 5: ch += (myByte = *(args->source++)); ch <<= 6;
if ((myByte & 0xC0) != 0x80)
{
isLegalSequence = 0;
break;
}
case 4: ch += (myByte = *((*source)++)); ch <<= 6;
case 4: ch += (myByte = *(args->source++)); ch <<= 6;
if ((myByte & 0xC0) != 0x80)
{
isLegalSequence = 0;
break;
}
case 3: ch += (myByte = *((*source)++)); ch <<= 6;
case 3: ch += (myByte = *(args->source++)); ch <<= 6;
if ((myByte & 0xC0) != 0x80)
{
isLegalSequence = 0;
break;
}
case 2: ch += (myByte = *((*source)++));
case 2: ch += (myByte = *(args->source++));
if ((myByte & 0xC0) != 0x80)
{
isLegalSequence = 0;
@ -664,27 +636,22 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverter* converter,
CALL_ERROR_FUNCTION:
{
/*rewinds source*/
const char* sourceFinal = *source;
const char* sourceFinal = args->source;
UChar myUChar = (UChar)ch; /* ### TODO: this is a hack until we prepare the callbacks for code points */
UChar* myUCharPtr = &myUChar;
*err = U_ILLEGAL_CHAR_FOUND;
*source = sourceInitial;
args->source = sourceInitial;
/*It is very likely that the ErrorFunctor will write to the
*internal buffers */
args.converter = converter;
args.target = myUCharPtr;
args.targetLimit = myUCharPtr + 1;
args.source = sourceFinal;
args.sourceLimit = sourceLimit;
args.flush = TRUE;
args.offsets = NULL;
args.size = sizeof(args);
converter->fromCharErrorBehaviour(converter->toUContext,
&args,
args->target = myUCharPtr;
args->targetLimit = myUCharPtr + 1;
args->source = sourceFinal;
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
args,
sourceFinal,
sourceLimit-sourceFinal,
args->sourceLimit-sourceFinal,
UCNV_UNASSIGNED,
err);
@ -732,21 +699,15 @@ const UConverterSharedData _UTF8Data={
/* UTF-16BE ----------------------------------------------------------------- */
void T_UConverter_toUnicode_UTF16_BE (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_toUnicode_UTF16_BE (UConverterToUnicodeArgs * args,
UErrorCode * err)
{
const unsigned char *mySource = (unsigned char *) *source;
UChar *myTarget = *target;
const unsigned char *mySource = (unsigned char *) args->source;
UChar *myTarget = args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
int32_t targetLength = args->targetLimit - myTarget;
int32_t sourceLength = args->sourceLimit - (char *) mySource;
UChar mySourceChar = 0x0000;
UChar oldmySourceChar = 0x0000;
@ -758,15 +719,15 @@ void T_UConverter_toUnicode_UTF16_BE (UConverter * _this,
/*gets the corresponding UChar */
mySourceChar = (unsigned char) mySource[mySourceIndex++];
oldmySourceChar = mySourceChar;
if (_this->toUnicodeStatus == 0)
if (args->converter->toUnicodeStatus == 0)
{
_this->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar;
args->converter->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar;
}
else
{
if (_this->toUnicodeStatus != 0xFFFF)
mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | mySourceChar);
_this->toUnicodeStatus = 0;
if (args->converter->toUnicodeStatus != 0xFFFF)
mySourceChar = (UChar) ((args->converter->toUnicodeStatus << 8) | mySourceChar);
args->converter->toUnicodeStatus = 0;
@ -781,38 +742,32 @@ void T_UConverter_toUnicode_UTF16_BE (UConverter * _this,
}
}
if (U_SUCCESS(*err) && flush
if (U_SUCCESS(*err) && args->flush
&& (mySourceIndex == sourceLength)
&& (_this->toUnicodeStatus != 0x00))
&& (args->converter->toUnicodeStatus != 0x00))
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
_this->toUnicodeStatus = 0x00;
args->converter->toUnicodeStatus = 0x00;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
args->target += myTargetIndex;
args->source += mySourceIndex;
return;
}
void T_UConverter_fromUnicode_UTF16_BE (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_fromUnicode_UTF16_BE (UConverterFromUnicodeArgs * args,
UErrorCode * err)
{
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
const UChar *mySource = args->source;
unsigned char *myTarget = (unsigned char *) args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
int32_t targetLength = args->targetLimit - (char *) myTarget;
int32_t sourceLength = args->sourceLimit - mySource;
UChar mySourceChar;
/*writing the char to the output stream */
@ -829,8 +784,8 @@ void T_UConverter_fromUnicode_UTF16_BE (UConverter * _this,
}
else
{
_this->charErrorBuffer[0] = (char) mySourceChar;
_this->charErrorBufferLength = 1;
args->converter->charErrorBuffer[0] = (char) mySourceChar;
args->converter->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
@ -841,22 +796,20 @@ void T_UConverter_fromUnicode_UTF16_BE (UConverter * _this,
}
}
*target += myTargetIndex;
*source += mySourceIndex;;
args->target += myTargetIndex;
args->source += mySourceIndex;;
return;
}
UChar32 T_UConverter_getNextUChar_UTF16_BE(UConverter* converter,
const char** source,
const char* sourceLimit,
UChar32 T_UConverter_getNextUChar_UTF16_BE(UConverterToUnicodeArgs* args,
UErrorCode* err)
{
UChar32 myUChar;
/*Checks boundaries and set appropriate error codes*/
if ((*source)+2 > sourceLimit)
if (args->source+2 > args->sourceLimit)
{
if ((*source) >= sourceLimit)
if (args->source >= args->sourceLimit)
{
/*Either caller has reached the end of the byte stream*/
*err = U_INDEX_OUTOFBOUNDS_ERROR;
@ -873,25 +826,25 @@ UChar32 T_UConverter_getNextUChar_UTF16_BE(UConverter* converter,
/*Gets the corresponding codepoint*/
myUChar = ((uint16_t)((**source)) << 8) |((uint8_t)*((*source)+1));
*source += 2;
myUChar = ((uint16_t)(*(args->source)) << 8) |((uint8_t)*((args->source)+1));
args->source += 2;
if(UTF_IS_FIRST_SURROGATE(myUChar)) {
uint16_t second;
if ((*source)+2 > sourceLimit) {
if (args->source+2 > args->sourceLimit) {
*err = U_TRUNCATED_CHAR_FOUND;
return 0xFFFD;
}
/* get the second surrogate and assemble the code point */
second = ((uint16_t)((**source)) << 8) |((uint8_t)*((*source)+1));
second = ((uint16_t)(*(args->source)) << 8) |((uint8_t)*(args->source+1));
/* ignore unmatched surrogates and just deliver the first one in such a case */
if(UTF_IS_SECOND_SURROGATE(second)) {
/* matched pair, get pair value */
myUChar = UTF16_GET_PAIR_VALUE(myUChar, second);
*source += 2;
args->source += 2;
}
}
@ -933,21 +886,15 @@ const UConverterSharedData _UTF16BEData={
/* UTF-16LE ----------------------------------------------------------------- */
void T_UConverter_toUnicode_UTF16_LE (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_toUnicode_UTF16_LE (UConverterToUnicodeArgs * args,
UErrorCode * err)
{
const unsigned char *mySource = (unsigned char *) *source;
UChar *myTarget = *target;
const unsigned char *mySource = (unsigned char *) args->source;
UChar *myTarget = args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
int32_t targetLength = args->targetLimit - myTarget;
int32_t sourceLength = args->sourceLimit - (char *) mySource;
UChar mySourceChar = 0x0000;
while (mySourceIndex < sourceLength)
@ -957,20 +904,20 @@ void T_UConverter_toUnicode_UTF16_LE (UConverter * _this,
/*gets the corresponding UniChar */
mySourceChar = (unsigned char) mySource[mySourceIndex++];
if (_this->toUnicodeStatus == 0x00)
if (args->converter->toUnicodeStatus == 0x00)
{
_this->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar;
args->converter->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar;
}
else
{
if (_this->toUnicodeStatus == 0xFFFF)
if (args->converter->toUnicodeStatus == 0xFFFF)
mySourceChar = (UChar) (mySourceChar << 8);
else
{
mySourceChar <<= 8;
mySourceChar |= (UChar) (_this->toUnicodeStatus);
mySourceChar |= (UChar) (args->converter->toUnicodeStatus);
}
_this->toUnicodeStatus = 0x00;
args->converter->toUnicodeStatus = 0x00;
myTarget[myTargetIndex++] = mySourceChar;
}
}
@ -982,39 +929,33 @@ void T_UConverter_toUnicode_UTF16_LE (UConverter * _this,
}
if (U_SUCCESS(*err) && flush
if (U_SUCCESS(*err) && args->flush
&& (mySourceIndex == sourceLength)
&& (_this->toUnicodeStatus != 0x00))
&& (args->converter->toUnicodeStatus != 0x00))
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
_this->toUnicodeStatus = 0x00;
args->converter->toUnicodeStatus = 0x00;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
args->target += myTargetIndex;
args->source += mySourceIndex;
return;
}
void T_UConverter_fromUnicode_UTF16_LE (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_fromUnicode_UTF16_LE (UConverterFromUnicodeArgs * args,
UErrorCode * err)
{
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
const UChar *mySource = args->source;
unsigned char *myTarget = (unsigned char *) args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
int32_t targetLength = args->targetLimit - (char *) myTarget;
int32_t sourceLength = args->sourceLimit - mySource;
UChar mySourceChar;
@ -1032,8 +973,8 @@ void T_UConverter_fromUnicode_UTF16_LE (UConverter * _this,
}
else
{
_this->charErrorBuffer[0] = (char) (mySourceChar >> 8);
_this->charErrorBufferLength = 1;
args->converter->charErrorBuffer[0] = (char) (mySourceChar >> 8);
args->converter->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
@ -1044,22 +985,20 @@ void T_UConverter_fromUnicode_UTF16_LE (UConverter * _this,
}
}
*target += myTargetIndex;
*source += mySourceIndex;;
args->target += myTargetIndex;
args->source += mySourceIndex;;
return;
}
UChar32 T_UConverter_getNextUChar_UTF16_LE(UConverter* converter,
const char** source,
const char* sourceLimit,
UChar32 T_UConverter_getNextUChar_UTF16_LE(UConverterToUnicodeArgs* args,
UErrorCode* err)
{
UChar32 myUChar;
/*Checks boundaries and set appropriate error codes*/
if ((*source)+2 > sourceLimit)
if (args->source+2 > args->sourceLimit)
{
if ((*source) >= sourceLimit)
if (args->source >= args->sourceLimit)
{
/*Either caller has reached the end of the byte stream*/
*err = U_INDEX_OUTOFBOUNDS_ERROR;
@ -1075,26 +1014,26 @@ UChar32 T_UConverter_getNextUChar_UTF16_LE(UConverter* converter,
/*Gets the corresponding codepoint*/
myUChar = ((uint16_t)*((*source)+1) << 8) |((uint8_t)((**source)));
myUChar = ((uint16_t)*((args->source)+1) << 8) |((uint8_t)(*(args->source)));
/*updates the source*/
*source += 2;
args->source += 2;
if(UTF_IS_FIRST_SURROGATE(myUChar)) {
uint16_t second;
if ((*source)+2 > sourceLimit) {
if (args->source+2 > args->sourceLimit) {
*err = U_TRUNCATED_CHAR_FOUND;
return 0xFFFD;
}
/* get the second surrogate and assemble the code point */
second = ((uint16_t)*((*source)+1) << 8) |((uint8_t)((**source)));
second = ((uint16_t)*(args->source+1) << 8) |((uint8_t)(*(args->source)));
/* ignore unmatched surrogates and just deliver the first one in such a case */
if(UTF_IS_SECOND_SURROGATE(second)) {
/* matched pair, get pair value */
myUChar = UTF16_GET_PAIR_VALUE(myUChar, second);
*source += 2;
args->source += 2;
}
}

View file

@ -1,4 +1,4 @@
/*
/*
**********************************************************************
* Copyright (C) 2000, International Business Machines
* Corporation and others. All Rights Reserved.
@ -25,18 +25,12 @@
/* ISO 8859-1 --------------------------------------------------------------- */
static void T_UConverter_toUnicode_LATIN_1 (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
static void T_UConverter_toUnicode_LATIN_1 (UConverterToUnicodeArgs * args,
UErrorCode * err)
{
unsigned char *mySource = (unsigned char *) *source;
UChar *myTarget = *target;
int32_t sourceLength = sourceLimit - (char *) mySource;
unsigned char *mySource = (unsigned char *) args->source;
UChar *myTarget = args->target;
int32_t sourceLength = args->sourceLimit - (char *) mySource;
int32_t readLen = 0;
int32_t i = 0;
@ -46,45 +40,37 @@ static void T_UConverter_toUnicode_LATIN_1 (UConverter * _this,
*in case we don't have enough buffer space
*we set the error flag accordingly
*/
if ((targetLimit - *target) < sourceLength)
if ((args->targetLimit - args->target) < sourceLength)
{
readLen = targetLimit - *target;
readLen = args->targetLimit - args->target;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
else
{
readLen = sourceLimit - (char *) mySource;
readLen = args->sourceLimit - (char *) mySource;
}
for (i = 0; i < readLen; i++) myTarget[i] = (UChar) mySource[i];
*target += i;
*source += i;
args->target += i;
args->source += i;
return;
}
static void T_UConverter_fromUnicode_LATIN_1 (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
UBool flush,
static void T_UConverter_fromUnicode_LATIN_1 (UConverterFromUnicodeArgs * args,
UErrorCode * err)
{
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
const UChar *mySource = args->source;
unsigned char *myTarget = (unsigned char *) args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
UConverterFromUnicodeArgs args;
int32_t targetLength = args->targetLimit - (char *) myTarget;
int32_t sourceLength = args->sourceLimit - mySource;
UConverterCallbackReason reason;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
{
if (myTargetIndex < targetLength)
{
if (mySource[mySourceIndex] < 0x0100)
@ -96,16 +82,16 @@ static void T_UConverter_fromUnicode_LATIN_1 (UConverter * _this,
{
*err = U_INVALID_CHAR_FOUND;
reason = UCNV_UNASSIGNED;
_this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex];
_this->invalidUCharLength = 1;
args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex];
args->converter->invalidUCharLength = 1;
if (UTF_IS_LEAD(mySource[mySourceIndex++]))
{
if (mySourceIndex < sourceLength)
{
if (UTF_IS_TRAIL(mySource[mySourceIndex]))
{
_this->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
_this->invalidUCharLength++;
args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
args->converter->invalidUCharLength++;
mySourceIndex++;
}
else
@ -113,42 +99,47 @@ static void T_UConverter_fromUnicode_LATIN_1 (UConverter * _this,
reason = UCNV_ILLEGAL;
}
}
else if (flush == TRUE)
else if (args->flush == TRUE)
{
reason = UCNV_ILLEGAL;
*err = U_TRUNCATED_CHAR_FOUND;
}
else
{
_this->fromUSurrogateLead = _this->invalidUCharBuffer[0];
args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0];
/* do not call the callback */
}
}
if (_this->fromUSurrogateLead == 0)
if (args->converter->fromUSurrogateLead == 0)
{
int32_t currentOffset = myTargetIndex;
const UChar *saveSource = args->source;
char *saveTarget = args->target;
int32_t *saveOffset = args->offsets;
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
args.converter = _this;
args.target = (char*)myTarget + myTargetIndex;;
args.targetLimit = targetLimit;
args.source = mySource + mySourceIndex;
args.sourceLimit = sourceLimit;
args.flush = flush;
args.offsets = offsets;
args.size = sizeof(args);
args->target = (char*)myTarget + myTargetIndex;;
args->source = mySource + mySourceIndex;
FromU_CALLBACK_MACRO(args.converter->fromUContext,
FromU_CALLBACK_MACRO(args->converter->fromUContext,
args,
_this->invalidUCharBuffer,
_this->invalidUCharLength,
(UChar32) (_this->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(_this->invalidUCharBuffer[0],
_this->invalidUCharBuffer[2])
: _this->invalidUCharBuffer[0]),
args->converter->invalidUCharBuffer,
args->converter->invalidUCharLength,
(UChar32) (args->converter->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
args->converter->invalidUCharBuffer[2])
: args->converter->invalidUCharBuffer[0]),
reason,
err);
if (U_FAILURE (*err)) break;
_this->invalidUCharLength = 0;
args->source = saveSource;
args->target = saveTarget;
args->offsets = saveOffset;
if (U_FAILURE (*err))
{
break;
}
args->converter->invalidUCharLength = 0;
}
}
}
@ -159,16 +150,14 @@ static void T_UConverter_fromUnicode_LATIN_1 (UConverter * _this,
}
}
*target += myTargetIndex;
*source += mySourceIndex;;
args->target += myTargetIndex;
args->source += mySourceIndex;;
return;
}
static UChar32 T_UConverter_getNextUChar_LATIN_1(UConverter* converter,
const char** source,
const char* sourceLimit,
UErrorCode* err)
static UChar32 T_UConverter_getNextUChar_LATIN_1(UConverterToUnicodeArgs* args,
UErrorCode* err)
{
/* Empties the internal buffers if need be
@ -176,14 +165,14 @@ static UChar32 T_UConverter_getNextUChar_LATIN_1(UConverter* converter,
* (LATIN_1 is a subset of Unicode)
*/
if ((*source)+1 > sourceLimit)
if (args->source+1 > args->sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
}
/* make sure that we zero-extend, not sign-extend, the byte */
return (UChar)(uint8_t)*((*source)++);
return (UChar)(uint8_t)*(args->source++);
}
static const UConverterImpl _Latin1Impl={

View file

@ -69,30 +69,23 @@ _MBCSUnload(UConverterSharedData *sharedData) {
uprv_free (sharedData->table);
}
static void T_UConverter_toUnicode_MBCS (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
static void T_UConverter_toUnicode_MBCS (UConverterToUnicodeArgs * args,
UErrorCode * err)
{
const char *mySource = *source;
UChar *myTarget = *target;
const char *mySource = args->source;
UChar *myTarget = args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
int32_t targetLength = args->targetLimit - myTarget;
int32_t sourceLength = args->sourceLimit - mySource;
CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
UBool *myStarters = NULL;
UConverterToUnicodeArgs args;
myToUnicode = &_this->sharedData->table->mbcs.toUnicode;
myToUnicodeFallback = &_this->sharedData->table->mbcs.toUnicodeFallback;
myStarters = _this->sharedData->table->mbcs.starters;
myToUnicode = &args->converter->sharedData->table->mbcs.toUnicode;
myToUnicodeFallback = &args->converter->sharedData->table->mbcs.toUnicodeFallback;
myStarters = args->converter->sharedData->table->mbcs.starters;
while (mySourceIndex < sourceLength)
{
@ -103,9 +96,9 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this,
if (myStarters[(uint8_t) mySourceChar] &&
(_this->toUnicodeStatus == 0x00))
(args->converter->toUnicodeStatus == 0x00))
{
_this->toUnicodeStatus = (unsigned char) mySourceChar;
args->converter->toUnicodeStatus = (unsigned char) mySourceChar;
}
else
{
@ -114,11 +107,11 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this,
*one
*/
if (_this->toUnicodeStatus != 0x00)
if (args->converter->toUnicodeStatus != 0x00)
{
mySourceChar |= (UChar) (_this->toUnicodeStatus << 8);
mySourceChar |= (UChar) (args->converter->toUnicodeStatus << 8);
_this->toUnicodeStatus = 0x00;
args->converter->toUnicodeStatus = 0x00;
}
/*gets the corresponding Unicode codepoint */
@ -129,8 +122,8 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this,
{
myTarget[myTargetIndex++] = targetUniChar;
}
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasToUnicodeFallback == TRUE))
else if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
if (targetUniChar != missingUCharMarker)
@ -140,39 +133,39 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this,
}
if (targetUniChar == missingUCharMarker)
{
const char *saveSource = args->source;
UChar *saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
*err = U_INVALID_CHAR_FOUND;
if (mySourceChar > 0xff)
{
_this->invalidCharLength = 2;
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
_this->invalidCharBuffer[1] = (char) mySourceChar;
args->converter->invalidCharLength = 2;
args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
args->converter->invalidCharBuffer[1] = (char) mySourceChar;
}
else
{
_this->invalidCharLength = 1;
_this->invalidCharBuffer[0] = (char) mySourceChar;
args->converter->invalidCharLength = 1;
args->converter->invalidCharBuffer[0] = (char) mySourceChar;
}
args.converter = _this;
args.target = myTarget + myTargetIndex;
args.targetLimit = targetLimit;
args.source = mySource + mySourceIndex;
args.sourceLimit = sourceLimit;
args.flush = flush;
args.offsets = offsets;
args.size = sizeof(args);
args->target = myTarget + myTargetIndex;
args->source = mySource + mySourceIndex;
/* to do hsys: add more smarts to the codeUnits and length later */
ToU_CALLBACK_MACRO(_this->toUContext,
ToU_CALLBACK_MACRO(args->converter->toUContext,
args,
_this->invalidCharBuffer,
_this->invalidCharLength,
args->converter->invalidCharBuffer,
args->converter->invalidCharLength,
UCNV_UNASSIGNED,
err);
args->source = saveSource;
args->target = saveTarget;
args->offsets = saveOffsets;
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
args->converter->invalidCharLength = 0;
}
}
}
@ -186,48 +179,41 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this,
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if (_this->toUnicodeStatus
if (args->converter->toUnicodeStatus
&& (mySourceIndex == sourceLength)
&& (flush == TRUE))
&& (args->flush == TRUE))
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
_this->toUnicodeStatus = 0x00;
args->converter->toUnicodeStatus = 0x00;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
args->target += myTargetIndex;
args->source += mySourceIndex;
return;
}
static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
UErrorCode * err)
{
const char *mySource = *source;
UChar *myTarget = *target;
const char *mySource = args->source;
UChar *myTarget = args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
int32_t targetLength = args->targetLimit - myTarget;
int32_t sourceLength = args->sourceLimit - mySource;
CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
UChar oldMySourceChar = 0x0000;
UBool *myStarters = NULL;
UConverterToUnicodeArgs args;
myToUnicode = &_this->sharedData->table->mbcs.toUnicode;
myToUnicodeFallback = &_this->sharedData->table->mbcs.toUnicodeFallback;
myStarters = _this->sharedData->table->mbcs.starters;
myToUnicode = &args->converter->sharedData->table->mbcs.toUnicode;
myToUnicodeFallback = &args->converter->sharedData->table->mbcs.toUnicodeFallback;
myStarters = args->converter->sharedData->table->mbcs.starters;
while (mySourceIndex < sourceLength)
{
@ -238,9 +224,9 @@ static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
if (myStarters[(uint8_t) mySourceChar] &&
(_this->toUnicodeStatus == 0x00))
(args->converter->toUnicodeStatus == 0x00))
{
_this->toUnicodeStatus = (unsigned char) mySourceChar;
args->converter->toUnicodeStatus = (unsigned char) mySourceChar;
}
else
{
@ -249,11 +235,11 @@ static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
*one
*/
if (_this->toUnicodeStatus != 0x00)
if (args->converter->toUnicodeStatus != 0x00)
{
mySourceChar |= (UChar) (_this->toUnicodeStatus << 8);
mySourceChar |= (UChar) (args->converter->toUnicodeStatus << 8);
_this->toUnicodeStatus = 0x00;
args->converter->toUnicodeStatus = 0x00;
}
/*gets the corresponding Unicode codepoint */
@ -267,10 +253,10 @@ static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
{
if (targetUniChar > 0x00FF)
offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */
else
offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */
if (targetUniChar > 0x00FF)
args->offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */
else
args->offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */
}
@ -278,17 +264,17 @@ static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
oldMySourceChar = mySourceChar;
}
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasToUnicodeFallback == TRUE))
else if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu (myToUnicodeFallback, mySourceChar);
/*writes the UniChar to the output stream */
{
if (targetUniChar > 0x00FF)
offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */
else
offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */
if (targetUniChar > 0x00FF)
args->offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */
else
args->offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */
}
myTarget[myTargetIndex++] = targetUniChar;
@ -296,39 +282,40 @@ static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
}
if (targetUniChar == missingUCharMarker)
{
int32_t currentOffset = offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1);
int32_t currentOffset = args->offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1);
int32_t My_i = myTargetIndex;
const char *saveSource = args->source;
UChar *saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
*err = U_INVALID_CHAR_FOUND;
if (mySourceChar > 0xff)
{
_this->invalidCharLength = 2;
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
_this->invalidCharBuffer[1] = (char) mySourceChar;
args->converter->invalidCharLength = 2;
args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
args->converter->invalidCharBuffer[1] = (char) mySourceChar;
}
else
{
_this->invalidCharLength = 1;
_this->invalidCharBuffer[0] = (char) mySourceChar;
args->converter->invalidCharLength = 1;
args->converter->invalidCharBuffer[0] = (char) mySourceChar;
}
args.converter = _this;
args.target = myTarget + myTargetIndex;
args.targetLimit = targetLimit;
args.source = mySource + mySourceIndex;
args.sourceLimit = sourceLimit;
args.flush = flush;
args.offsets = offsets?offsets+myTargetIndex:0;
args.size = sizeof(args);
args->target = myTarget + myTargetIndex;
args->source = mySource + mySourceIndex;
args->offsets = args->offsets?args->offsets+myTargetIndex:0;
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this->toUContext,
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(args->converter->toUContext,
args,
_this->invalidCharBuffer,
_this->invalidCharLength,
args->converter->invalidCharBuffer,
args->converter->invalidCharLength,
UCNV_UNASSIGNED,
err);
args->source = saveSource;
args->target = saveTarget;
args->offsets = saveOffsets;
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
args->converter->invalidCharLength = 0;
}
}
}
@ -342,47 +329,40 @@ static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if (_this->toUnicodeStatus
if (args->converter->toUnicodeStatus
&& (mySourceIndex == sourceLength)
&& (flush == TRUE))
&& (args->flush == TRUE))
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
_this->toUnicodeStatus = 0x00;
args->converter->toUnicodeStatus = 0x00;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
args->target += myTargetIndex;
args->source += mySourceIndex;
return;
}
static void T_UConverter_fromUnicode_MBCS (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
UBool flush,
static void T_UConverter_fromUnicode_MBCS (UConverterFromUnicodeArgs * args,
UErrorCode * err)
{
const UChar *mySource = *source;
char *myTarget = *target;
const UChar *mySource = args->source;
char *myTarget = args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
int32_t targetLength = args->targetLimit - myTarget;
int32_t sourceLength = args->sourceLimit - mySource;
CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
UConverterFromUnicodeArgs args;
UConverterCallbackReason reason;
myFromUnicode = &_this->sharedData->table->mbcs.fromUnicode;
myFromUnicodeFallback = &_this->sharedData->table->mbcs.fromUnicodeFallback;
myFromUnicode = &args->converter->sharedData->table->mbcs.fromUnicode;
myFromUnicodeFallback = &args->converter->sharedData->table->mbcs.fromUnicodeFallback;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
@ -407,14 +387,14 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this,
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
args->converter->charErrorBuffer[0] = (char) targetUniChar;
args->converter->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasFromUnicodeFallback == TRUE))
else if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar);
@ -433,8 +413,8 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this,
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
args->converter->charErrorBuffer[0] = (char) targetUniChar;
args->converter->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
@ -442,19 +422,22 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this,
}
if (targetUniChar == missingCharMarker)
{
const UChar *saveSource = args->source;
char *saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
*err = U_INVALID_CHAR_FOUND;
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
_this->invalidUCharLength = 1;
args->converter->invalidUCharBuffer[0] = (UChar) mySourceChar;
args->converter->invalidUCharLength = 1;
if (UTF_IS_LEAD(mySource[mySourceIndex-1]))
{
/*if (mySource < sourceLimit)*/
/* if (mySource < args->sourceLimit) */
if(mySourceIndex < sourceLength)
{
if (UTF_IS_TRAIL(mySource[mySourceIndex]))
{
_this->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
_this->invalidUCharLength++;
args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
args->converter->invalidUCharLength++;
mySourceIndex++;
}
else
@ -462,41 +445,39 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this,
reason = UCNV_ILLEGAL;
}
}
else if (flush == TRUE)
else if (args->flush == TRUE)
{
reason = UCNV_ILLEGAL;
*err = U_TRUNCATED_CHAR_FOUND;
}
else
{
_this->fromUSurrogateLead = _this->invalidUCharBuffer[0];
args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0];
/* do not call the callback */
}
}
if (_this->fromUSurrogateLead == 0)
if (args->converter->fromUSurrogateLead == 0)
{
args.converter = _this;
args.target = myTarget + myTargetIndex;
args.targetLimit = targetLimit;
args.source = mySource + mySourceIndex;
args.sourceLimit = sourceLimit;
args.flush = flush;
args.offsets = offsets;
args.size = sizeof(args);
args->target = myTarget + myTargetIndex;
args->source = mySource + mySourceIndex;
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
/* HSYS: to do: more smarts */
FromU_CALLBACK_MACRO(args.converter->fromUContext,
FromU_CALLBACK_MACRO(args->converter->fromUContext,
args,
_this->invalidUCharBuffer,
_this->invalidUCharLength,
(UChar32) (_this->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(_this->invalidUCharBuffer[0],
_this->invalidUCharBuffer[2])
: _this->invalidUCharBuffer[0]),
args->converter->invalidUCharBuffer,
args->converter->invalidUCharLength,
(UChar32) (args->converter->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
args->converter->invalidUCharBuffer[2])
: args->converter->invalidUCharBuffer[0]),
reason,
err);
args->source = saveSource;
args->target = saveTarget;
args->offsets = saveOffsets;
if (U_FAILURE (*err)) break;
_this->invalidUCharLength = 0;
args->converter->invalidUCharLength = 0;
}
}
}
@ -509,37 +490,30 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this,
}
*target += myTargetIndex;
*source += mySourceIndex;;
args->target += myTargetIndex;
args->source += mySourceIndex;;
return;
}
static void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
UBool flush,
static void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
UErrorCode * err)
{
const UChar *mySource = *source;
char *myTarget = *target;
const UChar *mySource = args->source;
char *myTarget = args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
int32_t targetLength = args->targetLimit - myTarget;
int32_t sourceLength = args->sourceLimit - mySource;
CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
UConverterFromUnicodeArgs args;
UConverterCallbackReason reason;
myFromUnicode = &_this->sharedData->table->mbcs.fromUnicode;
myFromUnicodeFallback = &_this->sharedData->table->mbcs.fromUnicodeFallback;
myFromUnicode = &args->converter->sharedData->table->mbcs.fromUnicode;
myFromUnicodeFallback = &args->converter->sharedData->table->mbcs.fromUnicodeFallback;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
@ -553,52 +527,52 @@ static void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
{
if (targetUniChar <= 0x00FF)
{
offsets[myTargetIndex] = mySourceIndex-1;
args->offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
offsets[myTargetIndex] = mySourceIndex-1;
args->offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
if (myTargetIndex < targetLength)
{
offsets[myTargetIndex] = mySourceIndex-1;
args->offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
args->converter->charErrorBuffer[0] = (char) targetUniChar;
args->converter->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasFromUnicodeFallback == TRUE))
else if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar);
if (targetUniChar != missingCharMarker)
{
if (targetUniChar <= 0x00FF)
{
offsets[myTargetIndex] = mySourceIndex-1;
args->offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
offsets[myTargetIndex] = mySourceIndex-1;
args->offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
if (myTargetIndex < targetLength)
{
offsets[myTargetIndex] = mySourceIndex-1;
args->offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
args->converter->charErrorBuffer[0] = (char) targetUniChar;
args->converter->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
@ -609,20 +583,24 @@ static void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
{
int32_t currentOffset = mySourceIndex -1;
int32_t My_i = myTargetIndex;
const UChar *saveSource = args->source;
char *saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
*err = U_INVALID_CHAR_FOUND;
reason = UCNV_UNASSIGNED;
_this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
_this->invalidUCharLength = 1;
args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
args->converter->invalidUCharLength = 1;
if (UTF_IS_LEAD(mySource[mySourceIndex-1]))
{
/*if (mySource < sourceLimit)*/
/* if (mySource < args->sourceLimit) */
if(mySourceIndex < sourceLength)
{
if (UTF_IS_TRAIL(mySource[mySourceIndex]))
{
_this->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
_this->invalidUCharLength++;
args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
args->converter->invalidUCharLength++;
mySourceIndex++;
}
else
@ -630,42 +608,39 @@ static void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
reason = UCNV_ILLEGAL;
}
}
else if (flush == TRUE)
else if (args->flush == TRUE)
{
reason = UCNV_ILLEGAL;
*err = U_TRUNCATED_CHAR_FOUND;
}
else
{
_this->fromUSurrogateLead = _this->invalidUCharBuffer[0];
args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0];
/* do not call the callback */
}
}
if (_this->fromUSurrogateLead == 0)
if (args->converter->fromUSurrogateLead == 0)
{
args.converter = _this;
args.target = myTarget + myTargetIndex;
args.targetLimit = targetLimit;
args.source = mySource + mySourceIndex;
args.sourceLimit = sourceLimit;
args.flush = flush;
args.offsets = offsets?offsets+myTargetIndex:0;
args.size = sizeof(args);
args->target = myTarget + myTargetIndex;
args->source = mySource + mySourceIndex;
args->offsets = args->offsets?args->offsets+myTargetIndex:0;
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
/* HSYS: to do: more smarts including offsets*/
FromU_CALLBACK_OFFSETS_LOGIC_MACRO(args.converter->fromUContext,
FromU_CALLBACK_OFFSETS_LOGIC_MACRO(args->converter->fromUContext,
args,
_this->invalidUCharBuffer,
_this->invalidUCharLength,
(UChar32) (_this->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(_this->invalidUCharBuffer[0],
_this->invalidUCharBuffer[2])
: _this->invalidUCharBuffer[0]),
args->converter->invalidUCharBuffer,
args->converter->invalidUCharLength,
(UChar32) (args->converter->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
args->converter->invalidUCharBuffer[2])
: args->converter->invalidUCharBuffer[0]),
reason,
err);
args->offsets = saveOffsets;
args->source = saveSource;
args->target = saveTarget;
if (U_FAILURE (*err)) break;
_this->invalidUCharLength = 0;
args->converter->invalidUCharLength = 0;
}
}
}
@ -678,91 +653,83 @@ static void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
}
*target += myTargetIndex;
*source += mySourceIndex;;
args->target += myTargetIndex;
args->source += mySourceIndex;;
return;
}
static UChar32 T_UConverter_getNextUChar_MBCS(UConverter* converter,
const char** source,
const char* sourceLimit,
static UChar32 T_UConverter_getNextUChar_MBCS(UConverterToUnicodeArgs* args,
UErrorCode* err)
{
UChar myUChar;
char const *sourceInitial = *source;
UConverterToUnicodeArgs args;
char const *sourceInitial = args->source;
/*safe keeps a ptr to the beginning in case we need to step back*/
/*Input boundary check*/
if ((*source)+1 > sourceLimit)
if (args->source+1 > args->sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
}
/*Checks to see if the byte is a lead*/
if (converter->sharedData->table->mbcs.starters[(uint8_t)**source] == FALSE)
if (args->converter->sharedData->table->mbcs.starters[(uint8_t)*(args->source)] == FALSE)
{
/*Not lead byte: we update the source ptr and get the codepoint*/
myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicode),
(UChar)(**source));
if ((converter->useFallback == TRUE) &&
(converter->sharedData->staticData->hasToUnicodeFallback == TRUE) &&
myUChar = ucmp16_getu((&args->converter->sharedData->table->mbcs.toUnicode),
(UChar)*(args->source));
if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE) &&
(myUChar == 0xFFFD))
{
myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicodeFallback),
(UChar)(**source));
myUChar = ucmp16_getu((&args->converter->sharedData->table->mbcs.toUnicodeFallback),
(UChar)*(args->source));
}
(*source)++;
args->source++;
}
else
{
/*Lead byte: we Build the codepoint and get the corresponding character
* and update the source ptr*/
if ((*source + 2) > sourceLimit)
if (args->source + 2 > args->sourceLimit)
{
*err = U_TRUNCATED_CHAR_FOUND;
return 0xFFFD;
}
myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicode),
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))));
myUChar = ucmp16_getu((&args->converter->sharedData->table->mbcs.toUnicode),
(uint16_t)(((UChar)(*(args->source)) << 8) |((uint8_t)*(args->source+1))));
if ((converter->useFallback == TRUE) &&
(converter->sharedData->staticData->hasToUnicodeFallback == TRUE) &&
if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE) &&
(myUChar == 0xFFFD))
{
myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicodeFallback),
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))));
myUChar = ucmp16_getu((&args->converter->sharedData->table->mbcs.toUnicodeFallback),
(uint16_t)(((UChar)(*(args->source)) << 8) |((uint8_t)*(args->source+1))));
}
(*source) += 2;
args->source += 2;
}
if (myUChar != 0xFFFD) return myUChar;
else
{
/*rewinds source*/
const char* sourceFinal = *source;
const char* sourceFinal = args->source;
UChar* myUCharPtr = &myUChar;
*err = U_INVALID_CHAR_FOUND;
*source = sourceInitial;
args->source = sourceInitial;
/*It's is very likely that the ErrorFunctor will write to the
*internal buffers */
args.converter = converter;
args.target = myUCharPtr;
args.targetLimit = myUCharPtr + 1;
args.source = sourceFinal;
args.sourceLimit = sourceLimit;
args.flush = TRUE;
args.offsets = NULL;
args.size = sizeof(args);
converter->fromCharErrorBehaviour(converter->toUContext,
&args,
args->target = myUCharPtr;
args->targetLimit = myUCharPtr + 1;
args->source = sourceFinal;
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
args,
sourceFinal,
sourceLimit-sourceFinal,
args->sourceLimit-sourceFinal,
UCNV_UNASSIGNED,
err);

View file

@ -59,27 +59,20 @@ _SBCSUnload(UConverterSharedData *sharedData) {
uprv_free (sharedData->table);
}
void T_UConverter_toUnicode_SBCS (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
UErrorCode * err)
{
char *mySource = (char *) *source;
UChar *myTarget = *target;
char *mySource = (char *) args->source;
UChar *myTarget = args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
int32_t targetLength = args->targetLimit - myTarget;
int32_t sourceLength = args->sourceLimit - (char *) mySource;
UChar *myToUnicode = NULL, *myToUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UConverterToUnicodeArgs args;
myToUnicode = _this->sharedData->table->sbcs.toUnicode;
myToUnicodeFallback = _this->sharedData->table->sbcs.toUnicodeFallback;
myToUnicode = args->converter->sharedData->table->sbcs.toUnicode;
myToUnicodeFallback = args->converter->sharedData->table->sbcs.toUnicodeFallback;
while (mySourceIndex < sourceLength)
{
@ -96,8 +89,8 @@ void T_UConverter_toUnicode_SBCS (UConverter * _this,
}
else
{
if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasToUnicodeFallback == TRUE))
if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
/* Look up in the fallback table first */
targetUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
@ -108,29 +101,30 @@ void T_UConverter_toUnicode_SBCS (UConverter * _this,
}
if (targetUniChar == missingUCharMarker)
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
_this->invalidCharLength = 1;
const char *saveSource = args->source;
UChar *saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
args.converter = _this;
args.target = myTarget + myTargetIndex;
args.targetLimit = targetLimit;
args.source = mySource + mySourceIndex;
args.sourceLimit = sourceLimit;
args.flush = flush;
args.offsets = offsets;
args.size = sizeof(args);
*err = U_INVALID_CHAR_FOUND;
args->converter->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
args->converter->invalidCharLength = 1;
args->target = myTarget + myTargetIndex;
args->source = mySource + mySourceIndex;
/* to do hsys: add more smarts to the codeUnits and length later */
ToU_CALLBACK_MACRO(_this->toUContext,
ToU_CALLBACK_MACRO(args->converter->toUContext,
args,
_this->invalidCharBuffer,
_this->invalidCharLength,
args->converter->invalidCharBuffer,
args->converter->invalidCharLength,
UCNV_UNASSIGNED,
err);
/* Hsys: calculate the source and target advancement */
args->source = saveSource;
args->target = saveTarget;
args->offsets = saveOffsets;
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
args->converter->invalidCharLength = 0;
}
}
}
@ -141,38 +135,31 @@ void T_UConverter_toUnicode_SBCS (UConverter * _this,
}
}
*target += myTargetIndex;
*source += mySourceIndex;
args->target += myTargetIndex;
args->source += mySourceIndex;
return;
}
void T_UConverter_fromUnicode_SBCS (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_fromUnicode_SBCS (UConverterFromUnicodeArgs * args,
UErrorCode * err)
{
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
const UChar *mySource = args->source;
unsigned char *myTarget = (unsigned char *) args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
int32_t targetLength = args->targetLimit - (char *) myTarget;
int32_t sourceLength = args->sourceLimit - mySource;
CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
unsigned char targetChar = 0x00;
UConverterFromUnicodeArgs args;
UConverterCallbackReason reason;
myFromUnicode = &_this->sharedData->table->sbcs.fromUnicode;
myFromUnicodeFallback = &_this->sharedData->table->sbcs.fromUnicodeFallback;
myFromUnicode = &args->converter->sharedData->table->sbcs.fromUnicode;
myFromUnicodeFallback = &args->converter->sharedData->table->sbcs.fromUnicodeFallback;
/*writing the char to the output stream */
/* HSYS : to do : finish the combining of the surrogate characters later */
/*
if (_this->fromUSurrogateLead != 0 && UTF_IS_TRAIL(mySource[mySourceIndex]))
if (args->converter->fromUSurrogateLead != 0 && UTF_IS_TRAIL(mySource[mySourceIndex]))
{
}
*/
@ -188,8 +175,8 @@ void T_UConverter_fromUnicode_SBCS (UConverter * _this,
/*writes the char to the output stream */
myTarget[myTargetIndex++] = targetChar;
}
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasFromUnicodeFallback == TRUE))
else if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE))
{
/* Look up in the fallback table first */
targetChar = ucmp8_getu (myFromUnicodeFallback, mySource[mySourceIndex-1]);
@ -204,17 +191,17 @@ void T_UConverter_fromUnicode_SBCS (UConverter * _this,
*err = U_INVALID_CHAR_FOUND;
reason = UCNV_UNASSIGNED;
_this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
_this->invalidUCharLength = 1;
args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
args->converter->invalidUCharLength = 1;
if (UTF_IS_LEAD(mySource[mySourceIndex-1]))
{
/*if (mySource < sourceLimit)*/
/*if (mySource < args->sourceLimit)*/
if(mySourceIndex < sourceLength)
{
if (UTF_IS_TRAIL(mySource[mySourceIndex]))
{
_this->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
_this->invalidUCharLength++;
args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
args->converter->invalidUCharLength++;
mySourceIndex++;
}
else
@ -222,47 +209,47 @@ void T_UConverter_fromUnicode_SBCS (UConverter * _this,
reason = UCNV_ILLEGAL;
}
}
else if (flush == TRUE)
else if (args->flush == TRUE)
{
reason = UCNV_ILLEGAL;
*err = U_TRUNCATED_CHAR_FOUND;
}
else
{
_this->fromUSurrogateLead = _this->invalidUCharBuffer[0];
args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0];
/* do not call the callback */
}
}
if (_this->fromUSurrogateLead == 0)
if (args->converter->fromUSurrogateLead == 0)
{
args.converter = _this;
args.target = (char *)myTarget+myTargetIndex;
args.targetLimit = targetLimit;
args.source = mySource+mySourceIndex;
args.sourceLimit = sourceLimit;
args.flush = flush;
args.offsets = offsets;
args.size = sizeof(args);
const UChar *saveSource = args->source;
char *saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
args->target = (char *)myTarget+myTargetIndex;
args->source = mySource+mySourceIndex;
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
/* Check if we have encountered a surrogate pair. If first UChar is lead byte
and second UChar is trail byte, it's a surrogate char. If UChar is lead byte
but second UChar is not trail byte, it's illegal sequence. If neither, it's
plain unassigned code point.*/
FromU_CALLBACK_MACRO(args.converter->fromUContext,
FromU_CALLBACK_MACRO(args->converter->fromUContext,
args,
_this->invalidUCharBuffer,
_this->invalidUCharLength,
(UChar32) (_this->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(_this->invalidUCharBuffer[0],
_this->invalidUCharBuffer[2])
: _this->invalidUCharBuffer[0]),
args->converter->invalidUCharBuffer,
args->converter->invalidUCharLength,
(UChar32) (args->converter->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
args->converter->invalidUCharBuffer[2])
: args->converter->invalidUCharBuffer[0]),
reason,
err);
args->source = saveSource;
args->target = saveTarget;
args->offsets = saveOffsets;
if (U_FAILURE (*err))
{
break;
}
_this->invalidUCharLength = 0;
args->converter->invalidUCharLength = 0;
}
}
}
@ -274,62 +261,54 @@ void T_UConverter_fromUnicode_SBCS (UConverter * _this,
}
*target += myTargetIndex;
*source += mySourceIndex;
args->target += myTargetIndex;
args->source += mySourceIndex;
return;
}
UChar32 T_UConverter_getNextUChar_SBCS(UConverter* converter,
const char** source,
const char* sourceLimit,
UChar32 T_UConverter_getNextUChar_SBCS(UConverterToUnicodeArgs* args,
UErrorCode* err)
{
UChar myUChar;
UConverterToUnicodeArgs args;
if (U_FAILURE(*err)) return 0xFFFD;
if ((*source)+1 > sourceLimit)
if (args->source+1 > args->sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
}
/*Gets the corresponding codepoint*/
myUChar = converter->sharedData->table->sbcs.toUnicode[(unsigned char)*((*source)++)];
myUChar = args->converter->sharedData->table->sbcs.toUnicode[(unsigned char)*(args->source++)];
if (myUChar != 0xFFFD) return myUChar;
else
{
UChar* myUCharPtr = &myUChar;
const char* sourceFinal = *source;
const char* sourceFinal = args->source;
/* Do the fallback stuff */
if ((converter->useFallback == TRUE)&&
(converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
if ((args->converter->useFallback == TRUE)&&
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
myUChar = converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*((*source)-1)];
myUChar = args->converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*(args->source-1)];
if (myUChar != 0xFFFD) return myUChar;
}
*err = U_INVALID_CHAR_FOUND;
/*Calls the ErrorFunctor after rewinding the input buffer*/
(*source)--;
args->source--;
/*It's is very likely that the ErrorFunctor will write to the
*internal buffers */
args.converter = converter;
args.target = myUCharPtr;
args.targetLimit = myUCharPtr + 1;
args.source = sourceFinal;
args.sourceLimit = sourceLimit;
args.flush = TRUE;
args.offsets = NULL;
args.size = sizeof(args);
converter->fromCharErrorBehaviour(converter->toUContext,
&args,
args->target = myUCharPtr;
args->targetLimit = myUCharPtr + 1;
args->source = sourceFinal;
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
args,
sourceFinal,
1,
UCNV_UNASSIGNED,
@ -411,28 +390,21 @@ _DBCSUnload(UConverterSharedData *sharedData) {
uprv_free (sharedData->table);
}
void T_UConverter_toUnicode_DBCS (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
UErrorCode * err)
{
const char *mySource = ( char *) *source;
UChar *myTarget = *target;
const char *mySource = ( char *) args->source;
UChar *myTarget = args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
int32_t targetLength = args->targetLimit - myTarget;
int32_t sourceLength = args->sourceLimit - (char *) mySource;
CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
UConverterToUnicodeArgs args;
myToUnicode = &_this->sharedData->table->dbcs.toUnicode;
myToUnicodeFallback = &_this->sharedData->table->dbcs.toUnicodeFallback;
myToUnicode = &args->converter->sharedData->table->dbcs.toUnicode;
myToUnicodeFallback = &args->converter->sharedData->table->dbcs.toUnicodeFallback;
while (mySourceIndex < sourceLength)
{
@ -442,16 +414,16 @@ void T_UConverter_toUnicode_DBCS (UConverter * _this,
mySourceChar = (unsigned char) mySource[mySourceIndex++];
/*We have no internal state, we should */
if (_this->toUnicodeStatus == 0x00)
if (args->converter->toUnicodeStatus == 0x00)
{
_this->toUnicodeStatus = (unsigned char) mySourceChar;
args->converter->toUnicodeStatus = (unsigned char) mySourceChar;
}
else
{
if (_this->toUnicodeStatus != 0x00)
if (args->converter->toUnicodeStatus != 0x00)
{
mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | (mySourceChar & 0x00FF));
_this->toUnicodeStatus = 0x00;
mySourceChar = (UChar) ((args->converter->toUnicodeStatus << 8) | (mySourceChar & 0x00FF));
args->converter->toUnicodeStatus = 0x00;
}
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
@ -462,8 +434,8 @@ void T_UConverter_toUnicode_DBCS (UConverter * _this,
/*writes the UniChar to the output stream */
myTarget[myTargetIndex++] = targetUniChar;
}
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasToUnicodeFallback == TRUE))
else if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
if (targetUniChar != missingUCharMarker)
@ -473,31 +445,31 @@ void T_UConverter_toUnicode_DBCS (UConverter * _this,
}
if (targetUniChar == missingUCharMarker)
{
const char *saveSource = args->source;
UChar *saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
*err = U_INVALID_CHAR_FOUND;
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
_this->invalidCharBuffer[1] = (char) mySourceChar;
_this->invalidCharLength = 2;
args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
args->converter->invalidCharBuffer[1] = (char) mySourceChar;
args->converter->invalidCharLength = 2;
args.converter = _this;
args.target = myTarget + myTargetIndex;
args.targetLimit = targetLimit;
args.source = mySource + mySourceIndex;
args.sourceLimit = sourceLimit;
args.flush = flush;
args.offsets = offsets;
args.size = sizeof(args);
args->target = myTarget + myTargetIndex;
args->source = mySource + mySourceIndex;
/* to do hsys: add more smarts to the codeUnits and length later */
ToU_CALLBACK_MACRO(_this->toUContext,
ToU_CALLBACK_MACRO(args->converter->toUContext,
args,
_this->invalidCharBuffer,
_this->invalidCharLength,
args->converter->invalidCharBuffer,
args->converter->invalidCharLength,
UCNV_UNASSIGNED,
err);
/* Hsys: calculate the source and target advancement */
args->source = saveSource;
args->target = saveTarget;
args->offsets = saveOffsets;
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
args->converter->invalidCharLength = 0;
}
}
}
@ -511,47 +483,40 @@ void T_UConverter_toUnicode_DBCS (UConverter * _this,
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if ((flush == TRUE)
if ((args->flush == TRUE)
&& (mySourceIndex == sourceLength)
&& (_this->toUnicodeStatus != 0x00))
&& (args->converter->toUnicodeStatus != 0x00))
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
_this->toUnicodeStatus = 0x00;
args->converter->toUnicodeStatus = 0x00;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
args->target += myTargetIndex;
args->source += mySourceIndex;
return;
}
void T_UConverter_fromUnicode_DBCS (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
UBool flush,
void T_UConverter_fromUnicode_DBCS (UConverterFromUnicodeArgs * args,
UErrorCode * err)
{
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
const UChar *mySource = args->source;
unsigned char *myTarget = (unsigned char *) args->target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
int32_t targetLength = args->targetLimit - (char *) myTarget;
int32_t sourceLength = args->sourceLimit - mySource;
CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
UConverterFromUnicodeArgs args;
UConverterCallbackReason reason;
myFromUnicode = &_this->sharedData->table->dbcs.fromUnicode;
myFromUnicodeFallback = &_this->sharedData->table->dbcs.fromUnicodeFallback;
myFromUnicode = &args->converter->sharedData->table->dbcs.fromUnicode;
myFromUnicodeFallback = &args->converter->sharedData->table->dbcs.fromUnicodeFallback;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
@ -573,13 +538,13 @@ void T_UConverter_fromUnicode_DBCS (UConverter * _this,
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
args->converter->charErrorBuffer[0] = (char) targetUniChar;
args->converter->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasFromUnicodeFallback == TRUE))
else if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar);
@ -593,8 +558,8 @@ void T_UConverter_fromUnicode_DBCS (UConverter * _this,
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
args->converter->charErrorBuffer[0] = (char) targetUniChar;
args->converter->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
@ -604,17 +569,17 @@ void T_UConverter_fromUnicode_DBCS (UConverter * _this,
*err = U_INVALID_CHAR_FOUND;
reason = UCNV_UNASSIGNED;
_this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
_this->invalidUCharLength = 1;
args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
args->converter->invalidUCharLength = 1;
if (UTF_IS_LEAD(mySource[mySourceIndex-1]))
{
/*if (mySource < sourceLimit)*/
/*if (mySource < args->sourceLimit) */
if(mySourceIndex < sourceLength)
{
if (UTF_IS_TRAIL(mySource[mySourceIndex]))
{
_this->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
_this->invalidUCharLength++;
args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
args->converter->invalidUCharLength++;
mySourceIndex++;
}
else
@ -622,47 +587,47 @@ void T_UConverter_fromUnicode_DBCS (UConverter * _this,
reason = UCNV_ILLEGAL;
}
}
else if (flush == TRUE)
else if (args->flush == TRUE)
{
reason = UCNV_ILLEGAL;
*err = U_TRUNCATED_CHAR_FOUND;
}
else
{
_this->fromUSurrogateLead = _this->invalidUCharBuffer[0];
args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0];
/* do not call the callback */
}
}
if (_this->fromUSurrogateLead == 0)
if (args->converter->fromUSurrogateLead == 0)
{
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
/* Check if we have encountered a surrogate pair. If first UChar is lead byte
and second UChar is trail byte, it's a surrogate char. If UChar is lead byte
but second UChar is not trail byte, it's illegal sequence. If neither, it's
plain unassigned code point.*/
args.converter = _this;
args.target = (char*)myTarget + myTargetIndex;
args.targetLimit = targetLimit;
args.source = mySource + mySourceIndex;
args.sourceLimit = sourceLimit;
args.flush = flush;
args.offsets = offsets;
args.size = sizeof(args);
FromU_CALLBACK_MACRO(args.converter->fromUContext,
const UChar *saveSource = args->source;
char *saveTarget = args->target;
int32_t *saveOffsets = args->offsets;
args->target = (char*)myTarget + myTargetIndex;
args->source = mySource + mySourceIndex;
FromU_CALLBACK_MACRO(args->converter->fromUContext,
args,
_this->invalidUCharBuffer,
_this->invalidUCharLength,
(UChar32) (_this->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(_this->invalidUCharBuffer[0],
_this->invalidUCharBuffer[2])
: _this->invalidUCharBuffer[0]),
args->converter->invalidUCharBuffer,
args->converter->invalidUCharLength,
(UChar32) (args->converter->invalidUCharLength == 2 ?
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
args->converter->invalidUCharBuffer[2])
: args->converter->invalidUCharBuffer[0]),
reason,
err);
args->source = saveSource;
args->target = saveTarget;
args->offsets = saveOffsets;
if (U_FAILURE (*err))
{
break;
}
_this->invalidUCharLength = 0;
args->converter->invalidUCharLength = 0;
}
}
}
@ -673,31 +638,28 @@ void T_UConverter_fromUnicode_DBCS (UConverter * _this,
}
}
*target += myTargetIndex;
*source += mySourceIndex;;
args->target += myTargetIndex;
args->source += mySourceIndex;;
return;
}
UChar32 T_UConverter_getNextUChar_DBCS(UConverter* converter,
const char** source,
const char* sourceLimit,
UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
UErrorCode* err)
{
UChar myUChar;
UConverterToUnicodeArgs args;
if (U_FAILURE(*err)) return 0xFFFD;
/*Checks boundaries and set appropriate error codes*/
if ((*source)+2 > sourceLimit)
if (args->source+2 > args->sourceLimit)
{
if ((*source) >= sourceLimit)
if (args->source >= args->sourceLimit)
{
/*Either caller has reached the end of the byte stream*/
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
else if (((*source)+1) == sourceLimit)
else if ((args->source+1) == args->sourceLimit)
{
/* a character was cut in half*/
*err = U_TRUNCATED_CHAR_FOUND;
@ -707,46 +669,41 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverter* converter,
}
/*Gets the corresponding codepoint*/
myUChar = ucmp16_getu((&converter->sharedData->table->dbcs.toUnicode),
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))));
myUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicode),
(uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source+1))));
/*update the input pointer*/
*source += 2;
args->source += 2;
if (myUChar != 0xFFFD) return myUChar;
else
{
UChar* myUCharPtr = &myUChar;
const char* sourceFinal = *source;
const char* sourceFinal = args->source;
/* rewinding the input buffer*/
(*source) -= 2;
args->source -= 2;
/* Do the fallback stuff */
if ((converter->useFallback == TRUE) &&
(converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
if ((args->converter->useFallback == TRUE) &&
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
myUChar = ucmp16_getu((&converter->sharedData->table->dbcs.toUnicodeFallback),
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)-1))));
myUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicodeFallback),
(uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source-1))));
if (myUChar != 0xFFFD)
{
*source += 2;
args->source += 2;
return myUChar;
}
}
*err = U_INVALID_CHAR_FOUND;
args.converter = converter;
args.target = myUCharPtr;
args.targetLimit = myUCharPtr + 1;
args.source = sourceFinal;
args.sourceLimit = sourceLimit;
args.flush = TRUE;
args.offsets = NULL;
args.size = sizeof(args);
args->target = myUCharPtr;
args->targetLimit = myUCharPtr + 1;
args->source = sourceFinal;
/*It's is very likely that the ErrorFunctor will write to the
*internal buffers */
converter->fromCharErrorBehaviour(converter->toUContext,
&args,
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
args,
sourceFinal,
2,
UCNV_UNASSIGNED,