mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 22:15:31 +00:00
ICU-484 remove old code for SBCS, DBCS, and EBCDIC_STATEFUL
X-SVN-Rev: 3353
This commit is contained in:
parent
eda9b41095
commit
8eb8473811
6 changed files with 6 additions and 1560 deletions
|
@ -69,7 +69,7 @@ uchar.o uchriter.o ucmp8.o ucmp16.o ucmp32.o ucnv.o ucnv_bld.o ucnv_cb.o \
|
|||
ucnv_cnv.o ucnv_err.o ucnv_io.o uhash.o uhash_us.o uloc.o unicode.o unistr.o \
|
||||
uresbund.o uresdata.o ustring.o rbdata.o ubidi.o ubidiwrt.o ubidiln.o \
|
||||
bidi.o ushape.o uvector.o udata.o unames.o utf_impl.o unorm.o \
|
||||
ucnv2022.o ucnvebdc.o ucnvlat1.o ucnv_utf.o ucnvhz.o ucnvsbcs.o ucnvmbcs.o ucnv_lmb.o ucnvscsu.o
|
||||
ucnv2022.o ucnvlat1.o ucnv_utf.o ucnvhz.o ucnvmbcs.o ucnv_lmb.o ucnvscsu.o
|
||||
|
||||
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
|
||||
|
||||
|
|
|
@ -331,10 +331,6 @@ SOURCE=.\ucnv_utf.c
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnvebdc.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnvhz.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
@ -347,10 +343,6 @@ SOURCE=.\ucnvmbcs.c
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnvsbcs.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnvscsu.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
|
||||
#include "ucnv_io.h"
|
||||
#include "uhash.h"
|
||||
#include "ucmp16.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_cnv.h"
|
||||
|
@ -45,9 +44,9 @@ extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l);
|
|||
|
||||
static const UConverterSharedData *
|
||||
converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
|
||||
&_SBCSData, &_DBCSData, &_MBCSData, &_Latin1Data,
|
||||
NULL, NULL, &_MBCSData, &_Latin1Data,
|
||||
&_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData,
|
||||
&_EBCDICStatefulData, &_ISO2022Data,
|
||||
NULL, &_ISO2022Data,
|
||||
&_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
|
||||
&_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19,
|
||||
&_HZData, &_SCSUData, &_ASCIIData
|
||||
|
@ -263,19 +262,6 @@ UBool deleteSharedConverterData (UConverterSharedData * deadSharedData)
|
|||
if (deadSharedData->referenceCounter > 0)
|
||||
return FALSE;
|
||||
|
||||
/* Note: if we have a dataMemory, then that means that all ucmp's came
|
||||
from udata, and their tables will go away at the end
|
||||
of this function. So, we need to simply dealloc the UCMP8's themselves.
|
||||
We're guaranteed that they do not allocate any further memory.
|
||||
|
||||
When we have an API to simply 'init' a ucmp8, then no action at all will
|
||||
need to happen. --srl
|
||||
|
||||
This means that the compact arrays would have to be static fields in
|
||||
UConverterSharedData, not pointers to allocated structures.
|
||||
Markus
|
||||
*/
|
||||
|
||||
if (deadSharedData->impl->unload != NULL) {
|
||||
deadSharedData->impl->unload(deadSharedData);
|
||||
}
|
||||
|
@ -487,6 +473,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *s
|
|||
return NULL;
|
||||
|
||||
if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES ||
|
||||
converterData[type] == NULL ||
|
||||
converterData[type]->referenceCounter != 1 ||
|
||||
source->structSize != sizeof(UConverterStaticData))
|
||||
{
|
||||
|
|
|
@ -22,35 +22,9 @@
|
|||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnvmbcs.h"
|
||||
/* SBCS needed: #include "ucmp8.h" */
|
||||
#include "ucmp16.h"
|
||||
|
||||
#if 0
|
||||
/* SBCS was: */
|
||||
/*Table Node Definitions */
|
||||
typedef struct
|
||||
{
|
||||
UChar *toUnicode; /* [256]; */
|
||||
CompactByteArray fromUnicode;
|
||||
UChar *toUnicodeFallback;
|
||||
CompactByteArray fromUnicodeFallback;
|
||||
}
|
||||
UConverterSBCSTable;
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
CompactShortArray toUnicode;
|
||||
CompactShortArray fromUnicode;
|
||||
CompactShortArray toUnicodeFallback;
|
||||
CompactShortArray fromUnicodeFallback;
|
||||
}
|
||||
UConverterDBCSTable;
|
||||
|
||||
union UConverterTable
|
||||
{
|
||||
/* UConverterSBCSTable sbcs; */
|
||||
UConverterDBCSTable dbcs;
|
||||
UConverterMBCSTable mbcs;
|
||||
};
|
||||
|
||||
|
@ -230,9 +204,9 @@ struct UConverterImpl {
|
|||
};
|
||||
|
||||
extern const UConverterSharedData
|
||||
_SBCSData, _DBCSData, _MBCSData, _Latin1Data,
|
||||
_MBCSData, _Latin1Data,
|
||||
_UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
|
||||
_EBCDICStatefulData, _ISO2022Data,
|
||||
_ISO2022Data,
|
||||
_LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
|
||||
_LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,
|
||||
_HZData, _SCSUData, _ASCIIData;
|
||||
|
|
|
@ -1,722 +0,0 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnvebcdic.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2000Aug29
|
||||
* created by: Ram Viswanadha
|
||||
*
|
||||
* Change history:
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucmp16.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "cstring.h"
|
||||
|
||||
|
||||
/* Protos */
|
||||
U_CFUNC void T_UConverter_fromUnicode_EBCDIC_STATEFUL(UConverterFromUnicodeArgs * args,
|
||||
UErrorCode * err);
|
||||
|
||||
U_CFUNC void T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
|
||||
UErrorCode * err);
|
||||
|
||||
U_CFUNC void T_UConverter_toUnicode_EBCDIC_STATEFUL(UConverterToUnicodeArgs * args,
|
||||
UErrorCode * err);
|
||||
|
||||
U_CFUNC void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
U_CFUNC UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL (UConverterToUnicodeArgs * args,
|
||||
UErrorCode * err);
|
||||
|
||||
/* Forward declaration */
|
||||
|
||||
U_CFUNC void
|
||||
_DBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode);
|
||||
|
||||
U_CFUNC void
|
||||
_DBCSUnload(UConverterSharedData *sharedData);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
U_CFUNC void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverterToUnicodeArgs *args,
|
||||
UErrorCode * err)
|
||||
{
|
||||
char *mySource = (char *) args->source;
|
||||
UChar *myTarget = args->target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = args->targetLimit - args->target;
|
||||
int32_t sourceLength = args->sourceLimit - args->source;
|
||||
CompactShortArray *myToUnicode = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
UChar mySourceChar = 0x0000;
|
||||
int32_t myMode = args->converter->mode;
|
||||
|
||||
myToUnicode = &(args->converter->sharedData->table->dbcs.toUnicode);
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
/*gets the corresponding UniChar */
|
||||
mySourceChar = (unsigned char) (args->source[mySourceIndex++]);
|
||||
if (mySourceChar == UCNV_SI) myMode = UCNV_SI;
|
||||
else if (mySourceChar == UCNV_SO) myMode = UCNV_SO;
|
||||
else if ((myMode == UCNV_SO) &&
|
||||
(args->converter->toUnicodeStatus == 0x00))
|
||||
{
|
||||
args->converter->toUnicodeStatus = (unsigned char) mySourceChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*In case there is a state, we update the source char
|
||||
*by concatenating the previous char with the current
|
||||
*one
|
||||
*/
|
||||
if (args->converter->toUnicodeStatus != 0x00)
|
||||
{
|
||||
mySourceChar |= (UChar) (args->converter->toUnicodeStatus << 8);
|
||||
args->converter->toUnicodeStatus = 0x00;
|
||||
}
|
||||
else mySourceChar &= 0x00FF;
|
||||
|
||||
/*gets the corresponding Unicode codepoint */
|
||||
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
||||
|
||||
/*writing the UniChar to the output stream */
|
||||
if (targetUniChar < 0xfffe)
|
||||
{
|
||||
/*writes the UniChar to the output stream */
|
||||
args->target[myTargetIndex++] = targetUniChar;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
const char* saveSource = args->source;
|
||||
UChar* saveTarget = args->target;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
if (targetUniChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
if (mySourceChar > 0xff)
|
||||
{
|
||||
args->converter->invalidCharLength = 2;
|
||||
args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
||||
args->converter->invalidCharBuffer[1] = (char) mySourceChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
args->converter->invalidCharLength = 1;
|
||||
args->converter->invalidCharBuffer[0] = (char) mySourceChar;
|
||||
}
|
||||
args->converter->mode = myMode;
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
ToU_CALLBACK_MACRO(args->converter->toUContext,
|
||||
args,
|
||||
args->converter->invalidCharBuffer,
|
||||
args->converter->invalidCharLength,
|
||||
reason,
|
||||
err);
|
||||
|
||||
myMode = args->converter->mode;
|
||||
args->source = saveSource;
|
||||
args->target = saveTarget;
|
||||
args->offsets = saveOffsets;
|
||||
myMode = args->converter->mode;
|
||||
if (U_FAILURE (*err)) break;
|
||||
args->converter->invalidCharLength = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*If at the end of conversion we are still carrying state information
|
||||
*flush is TRUE, we can deduce that the input stream is truncated
|
||||
*/
|
||||
if (args->converter->toUnicodeStatus
|
||||
&& (mySourceIndex == sourceLength)
|
||||
&& (args->flush == TRUE))
|
||||
{
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = 0x00;
|
||||
}
|
||||
}
|
||||
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
args->converter->mode = myMode;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
U_CFUNC void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
|
||||
UErrorCode * err)
|
||||
{
|
||||
char *mySource = (char *) args->source;
|
||||
UChar *myTarget = args->target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = args->targetLimit - args->target;
|
||||
int32_t sourceLength = args->sourceLimit - args->source;
|
||||
CompactShortArray *myToUnicode = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
UChar mySourceChar = 0x0000;
|
||||
int32_t myMode = args->converter->mode;
|
||||
|
||||
myToUnicode = &args->converter->sharedData->table->dbcs.toUnicode;
|
||||
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
/*gets the corresponding UniChar */
|
||||
mySourceChar = (unsigned char) (args->source[mySourceIndex++]);
|
||||
if (mySourceChar == UCNV_SI) myMode = UCNV_SI;
|
||||
else if (mySourceChar == UCNV_SO) myMode = UCNV_SO;
|
||||
else if ((myMode == UCNV_SO) &&
|
||||
(args->converter->toUnicodeStatus == 0x00))
|
||||
{
|
||||
args->converter->toUnicodeStatus = (unsigned char) mySourceChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*In case there is a state, we update the source char
|
||||
*by concatenating the previous char with the current
|
||||
*one
|
||||
*/
|
||||
if (args->converter->toUnicodeStatus != 0x00)
|
||||
{
|
||||
mySourceChar |= (UChar) (args->converter->toUnicodeStatus << 8);
|
||||
args->converter->toUnicodeStatus = 0x00;
|
||||
}
|
||||
else mySourceChar &= 0x00FF;
|
||||
|
||||
/*gets the corresponding Unicode codepoint */
|
||||
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
||||
|
||||
/*writing the UniChar to the output stream */
|
||||
if (targetUniChar < 0xfffe)
|
||||
{
|
||||
/*writes the UniChar to the output stream */
|
||||
{
|
||||
if(myMode == UCNV_SO)
|
||||
args->offsets[myTargetIndex] = mySourceIndex-2; /* double byte */
|
||||
else
|
||||
args->offsets[myTargetIndex] = mySourceIndex-1; /* single byte */
|
||||
}
|
||||
args->target[myTargetIndex++] = targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
int32_t currentOffset = args->offsets[myTargetIndex-1] + 2;/* Because mySourceIndex was already incremented */
|
||||
int32_t My_i = myTargetIndex;
|
||||
const char* saveSource = args->source;
|
||||
UChar* saveTarget = args->target;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
if (targetUniChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
if (mySourceChar > 0xFF)
|
||||
{
|
||||
args->converter->invalidCharLength = 2;
|
||||
args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
||||
args->converter->invalidCharBuffer[1] = (char) mySourceChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
args->converter->invalidCharLength = 1;
|
||||
args->converter->invalidCharBuffer[0] = (char) mySourceChar;
|
||||
}
|
||||
args->converter->mode = myMode;
|
||||
|
||||
args->target = args->target + myTargetIndex;
|
||||
args->source = args->source + mySourceIndex;
|
||||
args->offsets = args->offsets?args->offsets+myTargetIndex:0;
|
||||
/* call back handles the offset array */
|
||||
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(args->converter->toUContext,
|
||||
args,
|
||||
args->source,
|
||||
1,
|
||||
reason,
|
||||
err);
|
||||
|
||||
args->source = saveSource;
|
||||
args->target = saveTarget;
|
||||
myMode = args->converter->mode;
|
||||
if (U_FAILURE (*err)) break;
|
||||
args->converter->invalidCharLength = 0;
|
||||
myMode = args->converter->mode;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*If at the end of conversion we are still carrying state information
|
||||
*flush is TRUE, we can deduce that the input stream is truncated
|
||||
*/
|
||||
if (args->converter->toUnicodeStatus
|
||||
&& (mySourceIndex == sourceLength)
|
||||
&& (args->flush == TRUE))
|
||||
{
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = 0x00;
|
||||
}
|
||||
}
|
||||
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
args->converter->mode = myMode;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
U_CFUNC void T_UConverter_fromUnicode_EBCDIC_STATEFUL (UConverterFromUnicodeArgs * args,
|
||||
UErrorCode * err)
|
||||
|
||||
{
|
||||
const UChar *mySource = args->source;
|
||||
unsigned char *myTarget = (unsigned char *) args->target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = args->targetLimit - args->target;
|
||||
int32_t sourceLength = args->sourceLimit - args->source;
|
||||
CompactShortArray *myFromUnicode = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
UChar mySourceChar = 0x0000;
|
||||
UBool isTargetUCharDBCS = (UBool)args->converter->fromUnicodeStatus;
|
||||
UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
|
||||
|
||||
myFromUnicode = &args->converter->sharedData->table->dbcs.fromUnicode;
|
||||
/*writing the char to the output stream */
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
mySourceChar = (UChar) args->source[mySourceIndex++];
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
||||
oldIsTargetUCharDBCS = isTargetUCharDBCS;
|
||||
|
||||
if (targetUniChar != missingCharMarker)
|
||||
{
|
||||
isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
|
||||
if (oldIsTargetUCharDBCS != isTargetUCharDBCS)
|
||||
{
|
||||
if (isTargetUCharDBCS) args->target[myTargetIndex++] = UCNV_SO;
|
||||
else args->target[myTargetIndex++] = UCNV_SI;
|
||||
|
||||
|
||||
if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength))
|
||||
{
|
||||
args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
else if (myTargetIndex+1 >= targetLength)
|
||||
{
|
||||
args->converter->charErrorBuffer[0] = (char) (targetUniChar >> 8);
|
||||
args->converter->charErrorBuffer[1] = (char)(targetUniChar & 0x00FF);
|
||||
args->converter->charErrorBufferLength = 2;
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!isTargetUCharDBCS)
|
||||
{
|
||||
args->target[myTargetIndex++] = (char) targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
args->target[myTargetIndex++] = (char) (targetUniChar >> 8);
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
args->target[myTargetIndex++] = (char) targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
args->converter->charErrorBuffer[0] = (char) targetUniChar;
|
||||
args->converter->charErrorBufferLength = 1;
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const UChar* saveSource = args->source;
|
||||
char* saveTarget = args->target;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
|
||||
isTargetUCharDBCS = oldIsTargetUCharDBCS;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
args->converter->invalidUCharBuffer[0] = (UChar) mySourceChar;
|
||||
args->converter->invalidUCharLength = 1;
|
||||
|
||||
args->converter->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
FromU_CALLBACK_MACRO(args->converter->fromUContext,
|
||||
args,
|
||||
args->converter->invalidUCharBuffer,
|
||||
1,
|
||||
(UChar32) mySourceChar,
|
||||
UCNV_UNASSIGNED,
|
||||
err);
|
||||
args->source = saveSource;
|
||||
args->target = saveTarget;
|
||||
args->offsets = saveOffsets;
|
||||
isTargetUCharDBCS = (UBool) args->converter->fromUnicodeStatus;
|
||||
if (U_FAILURE (*err)) break;
|
||||
args->converter->invalidUCharLength = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
|
||||
args->converter->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
U_CFUNC void T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
|
||||
UErrorCode * err)
|
||||
|
||||
{
|
||||
const UChar *mySource = args->source;
|
||||
unsigned char *myTarget = (unsigned char *) args->target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = args->targetLimit - args->target;
|
||||
int32_t sourceLength = args->sourceLimit - args->source;
|
||||
CompactShortArray *myFromUnicode = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
UChar mySourceChar = 0x0000;
|
||||
UBool isTargetUCharDBCS = (UBool)args->converter->fromUnicodeStatus;
|
||||
UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
|
||||
|
||||
myFromUnicode = &args->converter->sharedData->table->dbcs.fromUnicode;
|
||||
/*writing the char to the output stream */
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
mySourceChar = (UChar) args->source[mySourceIndex++];
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
||||
oldIsTargetUCharDBCS = isTargetUCharDBCS;
|
||||
|
||||
if (targetUniChar != missingCharMarker)
|
||||
{
|
||||
isTargetUCharDBCS =(UBool) (targetUniChar>0x00FF);
|
||||
if (oldIsTargetUCharDBCS != isTargetUCharDBCS)
|
||||
{
|
||||
args->offsets[myTargetIndex] = mySourceIndex-1;
|
||||
if (isTargetUCharDBCS) args->target[myTargetIndex++] = UCNV_SO;
|
||||
else args->target[myTargetIndex++] = UCNV_SI;
|
||||
|
||||
|
||||
if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength))
|
||||
{
|
||||
args->converter->charErrorBuffer[0] = (char) targetUniChar;
|
||||
args->converter->charErrorBufferLength = 1;
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
else if (myTargetIndex+1 >= targetLength)
|
||||
{
|
||||
args->converter->charErrorBuffer[0] = (char) (targetUniChar >> 8);
|
||||
args->converter->charErrorBuffer[1] = (char) (targetUniChar & 0x00FF);
|
||||
args->converter->charErrorBufferLength = 2;
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isTargetUCharDBCS)
|
||||
{
|
||||
args->offsets[myTargetIndex] = mySourceIndex-1;
|
||||
args->target[myTargetIndex++] = (char) targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
args->offsets[myTargetIndex] = mySourceIndex-1;
|
||||
args->target[myTargetIndex++] = (char) (targetUniChar >> 8);
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
args->offsets[myTargetIndex] = mySourceIndex-1;
|
||||
args->target[myTargetIndex++] = (char) targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
args->converter->charErrorBuffer[0] = (char) targetUniChar;
|
||||
args->converter->charErrorBufferLength = 1;
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int32_t currentOffset = args->offsets[myTargetIndex-1]+1;
|
||||
char * saveTarget = args->target;
|
||||
const UChar* saveSource = args->source;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
args->converter->invalidUCharBuffer[0] = (UChar) mySourceChar;
|
||||
args->converter->invalidUCharLength = 1;
|
||||
|
||||
/* Breaks out of the loop since behaviour was set to stop */
|
||||
args->converter->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
args->offsets = args->offsets?args->offsets+myTargetIndex:0;
|
||||
FromU_CALLBACK_OFFSETS_LOGIC_MACRO(args->converter->fromUContext,
|
||||
args,
|
||||
args->converter->invalidUCharBuffer,
|
||||
1,
|
||||
(UChar32)mySourceChar,
|
||||
UCNV_UNASSIGNED,
|
||||
err);
|
||||
isTargetUCharDBCS = (UBool)(args->converter->fromUnicodeStatus);
|
||||
args->source = saveSource;
|
||||
args->target = saveTarget;
|
||||
args->offsets = saveOffsets;
|
||||
isTargetUCharDBCS = (UBool)(args->converter->fromUnicodeStatus);
|
||||
if (U_FAILURE (*err)) break;
|
||||
args->converter->invalidUCharLength = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
|
||||
args->converter->fromUnicodeStatus = (int32_t)isTargetUCharDBCS;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
U_CFUNC UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverterToUnicodeArgs* args,
|
||||
UErrorCode* err)
|
||||
{
|
||||
UChar myUChar;
|
||||
char const *sourceInitial = args->source;
|
||||
/*safe keeps a ptr to the beginning in case we need to step back*/
|
||||
|
||||
/*Input boundary check*/
|
||||
if (args->source >= args->sourceLimit)
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/*Checks to see if with have SI/SO shifters
|
||||
if we do we change the mode appropriately and we consume the byte*/
|
||||
while ((*(args->source) == UCNV_SI) || (*(args->source) == UCNV_SO))
|
||||
{
|
||||
args->converter->mode = *(args->source);
|
||||
args->source++;
|
||||
sourceInitial = args->source;
|
||||
|
||||
/*Rechecks boundary after consuming the shift sequence*/
|
||||
if (args->source >= args->sourceLimit)
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xffff;
|
||||
}
|
||||
}
|
||||
|
||||
if (args->converter->mode == UCNV_SI)
|
||||
{
|
||||
myUChar = ucmp16_getu( (&(args->converter->sharedData->table->dbcs.toUnicode)),
|
||||
((UChar)(uint8_t)(*(args->source))));
|
||||
args->source++;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*Lead byte: we Build the codepoint and get the corresponding character
|
||||
* and update the source ptr*/
|
||||
if ((args->source + 2) > args->sourceLimit)
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
myUChar = ucmp16_getu( (&(args->converter->sharedData->table->dbcs.toUnicode)),
|
||||
(((UChar)(uint8_t)((*(args->source))) << 8) |((uint8_t)*(args->source+1))) );
|
||||
|
||||
args->source += 2;
|
||||
}
|
||||
|
||||
if (myUChar < 0xfffe) return myUChar;
|
||||
else
|
||||
{
|
||||
/* HSYS: Check logic here */
|
||||
UChar* myUCharPtr = &myUChar;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
if (myUChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
/*It's is very likely that the ErrorFunctor will write to the
|
||||
*internal buffers */
|
||||
args->target = myUCharPtr;
|
||||
args->targetLimit = myUCharPtr + 1;
|
||||
|
||||
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
|
||||
args,
|
||||
sourceInitial,
|
||||
args->source - sourceInitial,
|
||||
reason,
|
||||
err);
|
||||
|
||||
/*makes the internal caching transparent to the user*/
|
||||
if (*err == U_BUFFER_OVERFLOW_ERROR) *err = U_ZERO_ERROR;
|
||||
|
||||
return myUChar;
|
||||
}
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
_EBCDIC_STATEFUL_WriteSub(UConverterFromUnicodeArgs *pArgs, int32_t offsetIndex, UErrorCode *pErrorCode) {
|
||||
UConverter *cnv = pArgs->converter;
|
||||
char *p;
|
||||
char buffer[4];
|
||||
|
||||
p = buffer;
|
||||
|
||||
/* fromUnicodeStatus contains UBool "in DBCS mode" */
|
||||
switch(cnv->subCharLen) {
|
||||
case 1:
|
||||
if(cnv->fromUnicodeStatus) {
|
||||
/* DBCS mode and SBCS sub char: change to SBCS */
|
||||
cnv->fromUnicodeStatus = FALSE;
|
||||
*p++ = UCNV_SI;
|
||||
}
|
||||
*p++ = cnv->subChar[0];
|
||||
break;
|
||||
case 2:
|
||||
if(!cnv->fromUnicodeStatus) {
|
||||
/* SBCS mode and DBCS sub char: change to DBCS */
|
||||
cnv->fromUnicodeStatus = TRUE;
|
||||
*p++ = UCNV_SO;
|
||||
}
|
||||
*p++ = cnv->subChar[0];
|
||||
*p++ = cnv->subChar[1];
|
||||
break;
|
||||
default:
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
ucnv_cbFromUWriteBytes(pArgs,
|
||||
buffer, (int32_t)(p - buffer),
|
||||
offsetIndex, pErrorCode);
|
||||
}
|
||||
|
||||
static const UConverterImpl _EBCDICStatefulImpl={
|
||||
UCNV_EBCDIC_STATEFUL,
|
||||
|
||||
_DBCSLoad,
|
||||
_DBCSUnload,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
T_UConverter_toUnicode_EBCDIC_STATEFUL,
|
||||
T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC,
|
||||
T_UConverter_fromUnicode_EBCDIC_STATEFUL,
|
||||
T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC,
|
||||
T_UConverter_getNextUChar_EBCDIC_STATEFUL,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
_EBCDIC_STATEFUL_WriteSub
|
||||
};
|
||||
|
||||
/* Static data is in tools/makeconv/ucnvstat.c for data-based
|
||||
* converters. Be sure to update it as well.
|
||||
*/
|
||||
|
||||
const UConverterSharedData _EBCDICStatefulData={
|
||||
sizeof(UConverterSharedData), 1,
|
||||
NULL, NULL, NULL, FALSE, &_EBCDICStatefulImpl,
|
||||
0
|
||||
};
|
|
@ -1,785 +0,0 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnvsbcs.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2000feb03
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Change history:
|
||||
*
|
||||
* 05/09/00 helena Added implementation to handle fallback mappings.
|
||||
* 06/20/2000 helena OS/400 port changes; mostly typecast.
|
||||
* 06/29/2000 helena Major rewrite of the callback APIs.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucmp16.h"
|
||||
/* SBCS needed: #include "ucmp8.h" */
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "ucnv_cnv.h"
|
||||
|
||||
#if 0
|
||||
/* SBCS replaced by MBCS 2000dec20 */
|
||||
|
||||
/* SBCS --------------------------------------------------------------------- */
|
||||
|
||||
static void
|
||||
_SBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
|
||||
const uint8_t *oldraw = raw;
|
||||
sharedData->table->sbcs.toUnicode = (UChar *)raw;
|
||||
raw += sizeof(uint16_t)*256; oldraw = raw;
|
||||
ucmp8_initFromData(&sharedData->table->sbcs.fromUnicode, &raw, pErrorCode);
|
||||
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
|
||||
{
|
||||
if(((raw-oldraw)&3)!=0) {
|
||||
raw+=4-((raw-oldraw)&3); /* pad to 4 */
|
||||
}
|
||||
ucmp8_initFromData(&sharedData->table->sbcs.fromUnicodeFallback, &raw, pErrorCode);
|
||||
}
|
||||
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
|
||||
{
|
||||
if(((raw-oldraw)&3)!=0) {
|
||||
raw+=4-((raw-oldraw)&3); /* pad to 4 */
|
||||
}
|
||||
sharedData->table->sbcs.toUnicodeFallback = (UChar *)raw;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_SBCSUnload(UConverterSharedData *sharedData) {
|
||||
ucmp8_close (&sharedData->table->sbcs.fromUnicode);
|
||||
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
|
||||
ucmp8_close (&sharedData->table->sbcs.fromUnicodeFallback);
|
||||
}
|
||||
|
||||
U_CFUNC void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
|
||||
UErrorCode * err)
|
||||
{
|
||||
char *mySource = (char *) args->source;
|
||||
UChar *myTarget = args->target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = args->targetLimit - myTarget;
|
||||
int32_t sourceLength = args->sourceLimit - (char *) mySource;
|
||||
UChar *myToUnicode = NULL, *myToUnicodeFallback = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
|
||||
myToUnicode = args->converter->sharedData->table->sbcs.toUnicode;
|
||||
myToUnicodeFallback = args->converter->sharedData->table->sbcs.toUnicodeFallback;
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
|
||||
/*writing the UniChar to the output stream */
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
/*gets the corresponding UniChar */
|
||||
targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]];
|
||||
|
||||
if (targetUniChar < 0xfffe)
|
||||
{
|
||||
/* writes the UniChar to the output stream */
|
||||
myTarget[myTargetIndex++] = targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (UCNV_TO_U_USE_FALLBACK(args->converter) &&
|
||||
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
||||
{
|
||||
/* Look up in the fallback table first */
|
||||
UChar fallbackUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
|
||||
if (fallbackUniChar < 0xfffe)
|
||||
{
|
||||
myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
|
||||
}
|
||||
}
|
||||
if (targetUniChar >= 0xfffe)
|
||||
{
|
||||
const char *saveSource = args->source;
|
||||
UChar *saveTarget = args->target;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
if (targetUniChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
args->converter->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
|
||||
args->converter->invalidCharLength = 1;
|
||||
|
||||
args->target = myTarget + myTargetIndex;
|
||||
args->source = mySource + mySourceIndex;
|
||||
|
||||
/* to do hsys: add more smarts to the codeUnits and length later */
|
||||
ToU_CALLBACK_MACRO(args->converter->toUContext,
|
||||
args,
|
||||
args->converter->invalidCharBuffer,
|
||||
args->converter->invalidCharLength,
|
||||
reason,
|
||||
err);
|
||||
/* Hsys: calculate the source and target advancement */
|
||||
args->source = saveSource;
|
||||
args->target = saveTarget;
|
||||
args->offsets = saveOffsets;
|
||||
if (U_FAILURE (*err)) break;
|
||||
args->converter->invalidCharLength = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
U_CFUNC void T_UConverter_fromUnicode_SBCS (UConverterFromUnicodeArgs * args,
|
||||
UErrorCode * err)
|
||||
{
|
||||
const UChar *mySource = args->source;
|
||||
unsigned char *myTarget = (unsigned char *) args->target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = args->targetLimit - (char *) myTarget;
|
||||
int32_t sourceLength = args->sourceLimit - mySource;
|
||||
CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
|
||||
unsigned char targetChar = 0x00;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
myFromUnicode = &args->converter->sharedData->table->sbcs.fromUnicode;
|
||||
myFromUnicodeFallback = &args->converter->sharedData->table->sbcs.fromUnicodeFallback;
|
||||
/*writing the char to the output stream */
|
||||
/* HSYS : to do : finish the combining of the surrogate characters later */
|
||||
/*
|
||||
if (args->converter->fromUSurrogateLead != 0 && UTF_IS_TRAIL(mySource[mySourceIndex]))
|
||||
{
|
||||
}
|
||||
*/
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
targetChar = ucmp8_getu (myFromUnicode, mySource[mySourceIndex]);
|
||||
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
mySourceIndex++;
|
||||
if (targetChar != 0 || !mySource[mySourceIndex - 1])
|
||||
{
|
||||
/*writes the char to the output stream */
|
||||
myTarget[myTargetIndex++] = targetChar;
|
||||
}
|
||||
else if (UCNV_FROM_U_USE_FALLBACK(args->converter, mySource[mySourceIndex-1]) &&
|
||||
(args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE))
|
||||
{
|
||||
/* Look up in the fallback table first */
|
||||
targetChar = ucmp8_getu (myFromUnicodeFallback, mySource[mySourceIndex-1]);
|
||||
if (targetChar != 0 || !mySource[mySourceIndex - 1])
|
||||
{
|
||||
/*writes the char to the output stream */
|
||||
myTarget[myTargetIndex++] = targetChar;
|
||||
}
|
||||
}
|
||||
if (targetChar == 0 && mySource[mySourceIndex-1] != 0)
|
||||
{
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
reason = UCNV_UNASSIGNED;
|
||||
|
||||
args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
|
||||
args->converter->invalidUCharLength = 1;
|
||||
if (UTF_IS_LEAD(mySource[mySourceIndex-1]))
|
||||
{
|
||||
/*if (mySource < args->sourceLimit)*/
|
||||
if(mySourceIndex < sourceLength)
|
||||
{
|
||||
if (UTF_IS_TRAIL(mySource[mySourceIndex]))
|
||||
{
|
||||
args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
|
||||
args->converter->invalidUCharLength++;
|
||||
mySourceIndex++;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
}
|
||||
}
|
||||
else if (args->flush == TRUE)
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0];
|
||||
/* do not call the callback */
|
||||
}
|
||||
}
|
||||
if (args->converter->fromUSurrogateLead == 0)
|
||||
{
|
||||
const UChar *saveSource = args->source;
|
||||
char *saveTarget = args->target;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
args->target = (char *)myTarget+myTargetIndex;
|
||||
args->source = mySource+mySourceIndex;
|
||||
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
|
||||
/* Check if we have encountered a surrogate pair. If first UChar is lead byte
|
||||
and second UChar is trail byte, it's a surrogate char. If UChar is lead byte
|
||||
but second UChar is not trail byte, it's illegal sequence. If neither, it's
|
||||
plain unassigned code point.*/
|
||||
FromU_CALLBACK_MACRO(args->converter->fromUContext,
|
||||
args,
|
||||
args->converter->invalidUCharBuffer,
|
||||
args->converter->invalidUCharLength,
|
||||
(UChar32) (args->converter->invalidUCharLength == 2 ?
|
||||
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
|
||||
args->converter->invalidUCharBuffer[1])
|
||||
: args->converter->invalidUCharBuffer[0]),
|
||||
reason,
|
||||
err);
|
||||
args->source = saveSource;
|
||||
args->target = saveTarget;
|
||||
args->offsets = saveOffsets;
|
||||
if (U_FAILURE (*err))
|
||||
{
|
||||
break;
|
||||
}
|
||||
args->converter->invalidUCharLength = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
U_CFUNC UChar32 T_UConverter_getNextUChar_SBCS(UConverterToUnicodeArgs* args,
|
||||
UErrorCode* err)
|
||||
{
|
||||
UChar myUChar;
|
||||
|
||||
if (U_FAILURE(*err)) return 0xffff;
|
||||
|
||||
if (args->source+1 > args->sourceLimit)
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/*Gets the corresponding codepoint*/
|
||||
myUChar = args->converter->sharedData->table->sbcs.toUnicode[(unsigned char)*(args->source++)];
|
||||
|
||||
if (myUChar < 0xfffe) return myUChar;
|
||||
else
|
||||
{
|
||||
UChar* myUCharPtr = &myUChar;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
/* Do the fallback stuff */
|
||||
if (UCNV_TO_U_USE_FALLBACK(args->converter) &&
|
||||
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
||||
{
|
||||
UChar fallbackUChar = args->converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*(args->source-1)];
|
||||
if (fallbackUChar < 0xfffe) return fallbackUChar;
|
||||
}
|
||||
|
||||
if (myUChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
/*Calls the ErrorFunctor */
|
||||
/*It's is very likely that the ErrorFunctor will write to the
|
||||
*internal buffers */
|
||||
args->target = myUCharPtr;
|
||||
args->targetLimit = myUCharPtr + 1;
|
||||
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
|
||||
args,
|
||||
args->source - 1,
|
||||
1,
|
||||
reason,
|
||||
err);
|
||||
|
||||
/*makes the internal caching transparent to the user*/
|
||||
if (*err == U_BUFFER_OVERFLOW_ERROR) *err = U_ZERO_ERROR;
|
||||
|
||||
return myUChar;
|
||||
}
|
||||
}
|
||||
|
||||
static const UConverterImpl _SBCSImpl={
|
||||
UCNV_SBCS,
|
||||
|
||||
_SBCSLoad,
|
||||
_SBCSUnload,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
T_UConverter_toUnicode_SBCS,
|
||||
NULL,
|
||||
T_UConverter_fromUnicode_SBCS,
|
||||
NULL,
|
||||
T_UConverter_getNextUChar_SBCS,
|
||||
|
||||
NULL,
|
||||
NULL
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/* Static data is in tools/makeconv/ucnvstat.c for data-based
|
||||
* converters. Be sure to update it as well.
|
||||
*/
|
||||
|
||||
const UConverterSharedData _SBCSData={
|
||||
sizeof(UConverterSharedData), 0 /* ### 1 to be operational */,
|
||||
NULL, NULL, NULL, FALSE, NULL /* ### &_SBCSImpl */,
|
||||
0
|
||||
};
|
||||
|
||||
/* DBCS --------------------------------------------------------------------- */
|
||||
|
||||
U_CFUNC void
|
||||
_DBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
|
||||
const uint8_t *oldraw = raw;
|
||||
ucmp16_initFromData(&sharedData->table->dbcs.toUnicode,&raw, pErrorCode);
|
||||
if(((raw-oldraw)&3)!=0) {
|
||||
raw+=4-((raw-oldraw)&3); /* pad to 4 */
|
||||
}
|
||||
oldraw = raw;
|
||||
ucmp16_initFromData(&sharedData->table->dbcs.fromUnicode, &raw, pErrorCode);
|
||||
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
|
||||
{
|
||||
if(((raw-oldraw)&3)!=0) {
|
||||
raw+=4-((raw-oldraw)&3); /* pad to 4 */
|
||||
}
|
||||
ucmp16_initFromData(&sharedData->table->dbcs.fromUnicodeFallback, &raw, pErrorCode);
|
||||
oldraw = raw;
|
||||
}
|
||||
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
|
||||
{
|
||||
if(((raw-oldraw)&3)!=0) {
|
||||
raw+=4-((raw-oldraw)&3); /* pad to 4 */
|
||||
}
|
||||
ucmp16_initFromData(&sharedData->table->dbcs.toUnicodeFallback, &raw, pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
_DBCSUnload(UConverterSharedData *sharedData) {
|
||||
ucmp16_close (&sharedData->table->dbcs.fromUnicode);
|
||||
ucmp16_close (&sharedData->table->dbcs.toUnicode);
|
||||
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
|
||||
ucmp16_close (&sharedData->table->dbcs.fromUnicodeFallback);
|
||||
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
|
||||
ucmp16_close (&sharedData->table->dbcs.toUnicodeFallback);
|
||||
}
|
||||
|
||||
U_CFUNC void T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
|
||||
UErrorCode * err)
|
||||
{
|
||||
const char *mySource = ( char *) args->source;
|
||||
UChar *myTarget = args->target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = args->targetLimit - myTarget;
|
||||
int32_t sourceLength = args->sourceLimit - (char *) mySource;
|
||||
CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
UChar mySourceChar = 0x0000;
|
||||
|
||||
myToUnicode = &args->converter->sharedData->table->dbcs.toUnicode;
|
||||
myToUnicodeFallback = &args->converter->sharedData->table->dbcs.toUnicodeFallback;
|
||||
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
/*gets the corresponding UniChar */
|
||||
mySourceChar = (unsigned char) mySource[mySourceIndex++];
|
||||
|
||||
/*We have no internal state, we should */
|
||||
if (args->converter->toUnicodeStatus == 0x00)
|
||||
{
|
||||
args->converter->toUnicodeStatus = (unsigned char) mySourceChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (args->converter->toUnicodeStatus != 0x00)
|
||||
{
|
||||
mySourceChar = (UChar) ((args->converter->toUnicodeStatus << 8) | (mySourceChar & 0x00FF));
|
||||
args->converter->toUnicodeStatus = 0x00;
|
||||
}
|
||||
|
||||
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
||||
|
||||
/*writing the UniChar to the output stream */
|
||||
if (targetUniChar < 0xfffe)
|
||||
{
|
||||
/*writes the UniChar to the output stream */
|
||||
myTarget[myTargetIndex++] = targetUniChar;
|
||||
}
|
||||
else if (UCNV_TO_U_USE_FALLBACK(args->converter) &&
|
||||
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
||||
{
|
||||
UChar fallbackUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
|
||||
if (fallbackUniChar < 0xfffe)
|
||||
{
|
||||
myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
|
||||
}
|
||||
}
|
||||
if (targetUniChar >= 0xfffe)
|
||||
{
|
||||
const char *saveSource = args->source;
|
||||
UChar *saveTarget = args->target;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
if (targetUniChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
||||
args->converter->invalidCharBuffer[1] = (char) mySourceChar;
|
||||
args->converter->invalidCharLength = 2;
|
||||
|
||||
args->target = myTarget + myTargetIndex;
|
||||
args->source = mySource + mySourceIndex;
|
||||
|
||||
/* to do hsys: add more smarts to the codeUnits and length later */
|
||||
ToU_CALLBACK_MACRO(args->converter->toUContext,
|
||||
args,
|
||||
args->converter->invalidCharBuffer,
|
||||
args->converter->invalidCharLength,
|
||||
reason,
|
||||
err);
|
||||
/* Hsys: calculate the source and target advancement */
|
||||
args->source = saveSource;
|
||||
args->target = saveTarget;
|
||||
args->offsets = saveOffsets;
|
||||
if (U_FAILURE (*err)) break;
|
||||
args->converter->invalidCharLength = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*If at the end of conversion we are still carrying state information
|
||||
*flush is TRUE, we can deduce that the input stream is truncated
|
||||
*/
|
||||
if ((args->flush == TRUE)
|
||||
&& (mySourceIndex == sourceLength)
|
||||
&& (args->converter->toUnicodeStatus != 0x00))
|
||||
{
|
||||
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
args->converter->toUnicodeStatus = 0x00;
|
||||
}
|
||||
}
|
||||
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
U_CFUNC void T_UConverter_fromUnicode_DBCS (UConverterFromUnicodeArgs * args,
|
||||
UErrorCode * err)
|
||||
{
|
||||
const UChar *mySource = args->source;
|
||||
unsigned char *myTarget = (unsigned char *) args->target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = args->targetLimit - (char *) myTarget;
|
||||
int32_t sourceLength = args->sourceLimit - mySource;
|
||||
CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
UChar mySourceChar = 0x0000;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
myFromUnicode = &args->converter->sharedData->table->dbcs.fromUnicode;
|
||||
myFromUnicodeFallback = &args->converter->sharedData->table->dbcs.fromUnicodeFallback;
|
||||
|
||||
/*writing the char to the output stream */
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
mySourceChar = (UChar) mySource[mySourceIndex++];
|
||||
|
||||
/*Gets the corresponding codepoint */
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
||||
if (targetUniChar != missingCharMarker)
|
||||
{
|
||||
/*writes the char to the output stream */
|
||||
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
myTarget[myTargetIndex++] = (char) targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
args->converter->charErrorBuffer[0] = (char) targetUniChar;
|
||||
args->converter->charErrorBufferLength = 1;
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
}
|
||||
else if (UCNV_FROM_U_USE_FALLBACK(args->converter, mySourceChar) &&
|
||||
(args->converter->sharedData->staticData->hasFromUnicodeFallback == TRUE))
|
||||
{
|
||||
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar);
|
||||
if (targetUniChar != missingCharMarker)
|
||||
{
|
||||
/*writes the char to the output stream */
|
||||
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
myTarget[myTargetIndex++] = (char) targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
args->converter->charErrorBuffer[0] = (char) targetUniChar;
|
||||
args->converter->charErrorBufferLength = 1;
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (targetUniChar == missingCharMarker)
|
||||
{
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
reason = UCNV_UNASSIGNED;
|
||||
|
||||
args->converter->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
|
||||
args->converter->invalidUCharLength = 1;
|
||||
if (UTF_IS_LEAD(mySource[mySourceIndex-1]))
|
||||
{
|
||||
/*if (mySource < args->sourceLimit) */
|
||||
if(mySourceIndex < sourceLength)
|
||||
{
|
||||
if (UTF_IS_TRAIL(mySource[mySourceIndex]))
|
||||
{
|
||||
args->converter->invalidUCharBuffer[1] = (UChar)mySource[mySourceIndex];
|
||||
args->converter->invalidUCharLength++;
|
||||
mySourceIndex++;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
}
|
||||
}
|
||||
else if (args->flush == TRUE)
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
args->converter->fromUSurrogateLead = args->converter->invalidUCharBuffer[0];
|
||||
/* do not call the callback */
|
||||
}
|
||||
}
|
||||
if (args->converter->fromUSurrogateLead == 0)
|
||||
{
|
||||
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
|
||||
/* Check if we have encountered a surrogate pair. If first UChar is lead byte
|
||||
and second UChar is trail byte, it's a surrogate char. If UChar is lead byte
|
||||
but second UChar is not trail byte, it's illegal sequence. If neither, it's
|
||||
plain unassigned code point.*/
|
||||
const UChar *saveSource = args->source;
|
||||
char *saveTarget = args->target;
|
||||
int32_t *saveOffsets = args->offsets;
|
||||
args->target = (char*)myTarget + myTargetIndex;
|
||||
args->source = mySource + mySourceIndex;
|
||||
FromU_CALLBACK_MACRO(args->converter->fromUContext,
|
||||
args,
|
||||
args->converter->invalidUCharBuffer,
|
||||
args->converter->invalidUCharLength,
|
||||
(UChar32) (args->converter->invalidUCharLength == 2 ?
|
||||
UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0],
|
||||
args->converter->invalidUCharBuffer[1])
|
||||
: args->converter->invalidUCharBuffer[0]),
|
||||
reason,
|
||||
err);
|
||||
args->source = saveSource;
|
||||
args->target = saveTarget;
|
||||
args->offsets = saveOffsets;
|
||||
if (U_FAILURE (*err))
|
||||
{
|
||||
break;
|
||||
}
|
||||
args->converter->invalidUCharLength = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;;
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
U_CFUNC UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
|
||||
UErrorCode* err)
|
||||
{
|
||||
UChar myUChar;
|
||||
|
||||
if (U_FAILURE(*err)) return 0xffff;
|
||||
/*Checks boundaries and set appropriate error codes*/
|
||||
if (args->source+2 > args->sourceLimit)
|
||||
{
|
||||
if (args->source >= args->sourceLimit)
|
||||
{
|
||||
/*Either caller has reached the end of the byte stream*/
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
}
|
||||
else if ((args->source+1) == args->sourceLimit)
|
||||
{
|
||||
/* a character was cut in half*/
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/*Gets the corresponding codepoint*/
|
||||
myUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicode),
|
||||
(uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source+1))));
|
||||
|
||||
/*update the input pointer*/
|
||||
args->source += 2;
|
||||
if (myUChar < 0xfffe) return myUChar;
|
||||
else
|
||||
{
|
||||
UChar* myUCharPtr = &myUChar;
|
||||
UConverterCallbackReason reason;
|
||||
|
||||
/* Do the fallback stuff */
|
||||
if (UCNV_TO_U_USE_FALLBACK(args->converter) &&
|
||||
(args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
|
||||
{
|
||||
UChar fallbackUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicodeFallback),
|
||||
(uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source-1))));
|
||||
if (fallbackUChar < 0xfffe)
|
||||
{
|
||||
args->source += 2;
|
||||
return fallbackUChar;
|
||||
}
|
||||
}
|
||||
|
||||
if (myUChar == 0xfffe)
|
||||
{
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
reason = UCNV_ILLEGAL;
|
||||
*err = U_ILLEGAL_CHAR_FOUND;
|
||||
}
|
||||
|
||||
args->target = myUCharPtr;
|
||||
args->targetLimit = myUCharPtr + 1;
|
||||
/*It's is very likely that the ErrorFunctor will write to the
|
||||
*internal buffers */
|
||||
args->converter->fromCharErrorBehaviour(args->converter->toUContext,
|
||||
args,
|
||||
args->source - 2,
|
||||
2,
|
||||
reason,
|
||||
err);
|
||||
/*makes the internal caching transparent to the user*/
|
||||
if (*err == U_BUFFER_OVERFLOW_ERROR) *err = U_ZERO_ERROR;
|
||||
|
||||
return myUChar;
|
||||
}
|
||||
}
|
||||
|
||||
static const UConverterImpl _DBCSImpl={
|
||||
UCNV_DBCS,
|
||||
|
||||
_DBCSLoad,
|
||||
_DBCSUnload,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
T_UConverter_toUnicode_DBCS,
|
||||
NULL,
|
||||
T_UConverter_fromUnicode_DBCS,
|
||||
NULL,
|
||||
T_UConverter_getNextUChar_DBCS,
|
||||
|
||||
NULL,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
||||
/* Static data is in tools/makeconv/ucnvstat.c for data-based
|
||||
* converters. Be sure to update it as well.
|
||||
*/
|
||||
|
||||
const UConverterSharedData _DBCSData={
|
||||
sizeof(UConverterSharedData), 1,
|
||||
NULL, NULL, NULL, FALSE, &_DBCSImpl,
|
||||
0, /* tounicodestatus */
|
||||
};
|
Loading…
Add table
Reference in a new issue