ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable)

X-SVN-Rev: 721
This commit is contained in:
Markus Scherer 2000-02-05 00:19:15 +00:00
parent 01a8d20f2e
commit 7f7b2d90f3
13 changed files with 3999 additions and 4174 deletions

View file

@ -74,7 +74,8 @@ mutex.o normlzr.o putil.o rbcache.o resbund.o schriter.o scsu.o \
uchar.o uchriter.o ucmp8.o ucmp16.o ucmp32.o ucnv.o ucnv_bld.o \
ucnv_cnv.o ucnv_err.o ucnv_io.o uhash.o uloc.o unicode.o unistr.o \
ures.o ustring.o rbread.o rbdata.o ubidi.o ubidiln.o \
bidi.o uvector.o udata.o unames.o utf_impl.o
bidi.o uvector.o udata.o unames.o utf_impl.o \
ucnv_2022.o ucnv_utf.o ucnv_sbcs.o ucnv_mbcs.o
DEPS = $(OBJECTS:.o=.d)

View file

@ -211,6 +211,10 @@ SOURCE=.\ucnv.c
# End Source File
# Begin Source File
SOURCE=.\ucnv_2022.c
# End Source File
# Begin Source File
SOURCE=.\ucnv_bld.c
# End Source File
# Begin Source File
@ -227,6 +231,18 @@ SOURCE=.\ucnv_io.c
# End Source File
# Begin Source File
SOURCE=.\ucnv_mbcs.c
# End Source File
# Begin Source File
SOURCE=.\ucnv_sbcs.c
# End Source File
# Begin Source File
SOURCE=.\ucnv_utf.c
# End Source File
# Begin Source File
SOURCE=.\udata.c
# ADD CPP /Ze
# End Source File

View file

@ -61,8 +61,12 @@ UnicodeConverterCPP& UnicodeConverterCPP::operator=(const UnicodeConverterCPP&
*Increments the assigner converter's ref count
*/
Mutex updateReferenceCounters;
myUnicodeConverter->sharedData->referenceCounter--;
that.myUnicodeConverter->sharedData->referenceCounter++;
if (myUnicodeConverter->sharedData->referenceCounter != 0 && myUnicodeConverter->sharedData->referenceCounter != ~0) {
myUnicodeConverter->sharedData->referenceCounter--;
}
if (that.myUnicodeConverter->sharedData->referenceCounter != ~0) {
that.myUnicodeConverter->sharedData->referenceCounter++;
}
}
*myUnicodeConverter = *(that.myUnicodeConverter);
@ -98,7 +102,9 @@ UnicodeConverterCPP::UnicodeConverterCPP(const UnicodeConverterCPP& that)
myUnicodeConverter = new UConverter;
{
Mutex updateReferenceCounter;
that.myUnicodeConverter->sharedData->referenceCounter++;
if (that.myUnicodeConverter->sharedData->referenceCounter != ~0) {
that.myUnicodeConverter->sharedData->referenceCounter++;
}
}
*myUnicodeConverter = *(that.myUnicodeConverter);
}

View file

@ -38,112 +38,6 @@
static int32_t ucnv_getAmbiguousCCSID (const UConverter* cnv);
/* Internal function : end */
typedef void (*T_ToUnicodeFunction) (UConverter *,
UChar **,
const UChar *,
const char **,
const char *,
int32_t* offsets,
bool_t,
UErrorCode *);
typedef void (*T_FromUnicodeFunction) (UConverter *,
char **,
const char *,
const UChar **,
const UChar *,
int32_t* offsets,
bool_t,
UErrorCode *);
typedef UChar (*T_GetNextUCharFunction) (UConverter *,
const char **,
const char *,
UErrorCode *);
static T_ToUnicodeFunction TO_UNICODE_FUNCTIONS[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] =
{
T_UConverter_toUnicode_SBCS,
T_UConverter_toUnicode_DBCS,
T_UConverter_toUnicode_MBCS,
T_UConverter_toUnicode_LATIN_1,
T_UConverter_toUnicode_UTF8,
T_UConverter_toUnicode_UTF16_BE,
T_UConverter_toUnicode_UTF16_LE,
T_UConverter_toUnicode_EBCDIC_STATEFUL,
T_UConverter_toUnicode_ISO_2022
};
static T_ToUnicodeFunction TO_UNICODE_FUNCTIONS_OFFSETS_LOGIC[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] =
{
NULL, /*UCNV_SBCS*/
NULL, /*UCNV_DBCS*/
T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC,
NULL, /*UCNV_LATIN_1*/
T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC,
NULL, /*UTF16_BE*/
NULL, /*UTF16_LE*/
T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC,
T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC
};
static T_FromUnicodeFunction FROM_UNICODE_FUNCTIONS_OFFSETS_LOGIC[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] =
{
NULL, /*UCNV_SBCS*/
NULL, /*UCNV_DBCS*/
T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC,
NULL, /*UCNV_LATIN_1*/
T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC,
NULL, /*UTF16_BE*/
NULL, /*UTF16_LE*/
T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC,
T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC
};
static T_FromUnicodeFunction FROM_UNICODE_FUNCTIONS[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] =
{
T_UConverter_fromUnicode_SBCS,
T_UConverter_fromUnicode_DBCS,
T_UConverter_fromUnicode_MBCS,
T_UConverter_fromUnicode_LATIN_1,
T_UConverter_fromUnicode_UTF8,
T_UConverter_fromUnicode_UTF16_BE,
T_UConverter_fromUnicode_UTF16_LE,
T_UConverter_fromUnicode_EBCDIC_STATEFUL,
T_UConverter_fromUnicode_ISO_2022
};
static T_GetNextUCharFunction GET_NEXT_UChar_FUNCTIONS[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] =
{
T_UConverter_getNextUChar_SBCS,
T_UConverter_getNextUChar_DBCS,
T_UConverter_getNextUChar_MBCS,
T_UConverter_getNextUChar_LATIN_1,
T_UConverter_getNextUChar_UTF8,
T_UConverter_getNextUChar_UTF16_BE,
T_UConverter_getNextUChar_UTF16_LE,
T_UConverter_getNextUChar_EBCDIC_STATEFUL,
T_UConverter_getNextUChar_ISO_2022
};
void flushInternalUnicodeBuffer (UConverter * _this,
UChar * myTarget,
int32_t * myTargetIndex,
int32_t targetLength,
int32_t** offsets,
UErrorCode * err);
void flushInternalCharBuffer (UConverter * _this,
char *myTarget,
int32_t * myTargetIndex,
int32_t targetLength,
int32_t** offsets,
UErrorCode * err);
static void T_UConverter_fromCodepageToCodepage (UConverter * outConverter,
UConverter * inConverter,
char **target,
@ -219,6 +113,7 @@ void ucnv_close (UConverter * converter)
{
if (converter == NULL)
return;
/* ### this cleanup would be cleaner in a function in UConverterImpl */
if ((converter->sharedData->conversionType == UCNV_ISO_2022) &&
(converter->mode == UCNV_SO))
{
@ -226,9 +121,13 @@ void ucnv_close (UConverter * converter)
uprv_free (converter->extraInfo);
}
umtx_lock (NULL);
converter->sharedData->referenceCounter--;
umtx_unlock (NULL);
if (converter->sharedData->referenceCounter != ~0) {
umtx_lock (NULL);
if (converter->sharedData->referenceCounter != 0) {
converter->sharedData->referenceCounter--;
}
umtx_unlock (NULL);
}
uprv_free (converter);
return;
@ -597,8 +496,7 @@ void ucnv_fromUnicode (UConverter * _this,
}
default:
{
FROM_UNICODE_FUNCTIONS_OFFSETS_LOGIC[(int) myConvType] (_this,
_this->sharedData->impl->fromUnicodeWithOffsets(_this,
target,
targetLimit,
source,
@ -611,7 +509,7 @@ void ucnv_fromUnicode (UConverter * _this,
};
}
/*calls the specific conversion routines */
FROM_UNICODE_FUNCTIONS[(int)myConvType] (_this,
_this->sharedData->impl->fromUnicode(_this,
target,
targetLimit,
source,
@ -687,8 +585,7 @@ void ucnv_toUnicode (UConverter * _this,
}
default:
{
TO_UNICODE_FUNCTIONS_OFFSETS_LOGIC[(int) myConvType] (_this,
_this->sharedData->impl->toUnicodeWithOffsets(_this,
target,
targetLimit,
source,
@ -701,7 +598,7 @@ void ucnv_toUnicode (UConverter * _this,
};
}
/*calls the specific conversion routines */
TO_UNICODE_FUNCTIONS[(int) myConvType] (_this,
_this->sharedData->impl->toUnicode(_this,
target,
targetLimit,
source,
@ -959,7 +856,7 @@ UChar ucnv_getNextUChar (UConverter * converter,
}
/*calls the specific conversion routines */
/*as dictated in a code review, avoids a switch statement */
return GET_NEXT_UChar_FUNCTIONS[(int) (converter->sharedData->conversionType)] (converter,
return converter->sharedData->impl->getNextUChar(converter,
source,
sourceLimit,
err);

File diff suppressed because it is too large Load diff

View file

@ -20,6 +20,7 @@
#include "ucmp8.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv_err.h"
#include "ucnv_cnv.h"
#include "ucnv_imp.h"
#include "unicode/udata.h"
#include "unicode/ucnv.h"
@ -30,21 +31,29 @@
#include <stdio.h>
/*Array used to generate ALGORITHMIC_CONVERTERS_HASHTABLE
*should ALWAYS BE EMPTY STRING TERMINATED.
*/
static const char *algorithmicConverterNames[] = {
"LATIN_1",
"UTF8",
"UTF16_BigEndian",
"UTF16_LittleEndian",
"UTF16_PlatformEndian",
"UTF16_OppositeEndian",
"ISO_2022",
"JIS",
"EUC",
"GB",
""
static const UConverterSharedData *
converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
&_SBCSData, &_DBCSData, &_MBCSData, &_Latin1Data,
&_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_EBCDICStatefulData,
&_ISO2022Data
};
static struct {
const char *name;
UConverterType type;
} cnvNameType[] = {
{ "LATIN_1", UCNV_LATIN_1 },
{ "UTF8", UCNV_UTF8 },
{ "UTF16_BigEndian", UCNV_UTF16_BigEndian },
{ "UTF16_LittleEndian", UCNV_UTF16_LittleEndian },
#if U_IS_BIG_ENDIAN
{ "UTF16_PlatformEndian", UCNV_UTF16_BigEndian },
{ "UTF16_OppositeEndian", UCNV_UTF16_LittleEndian },
#else
{ "UTF16_PlatformEndian", UCNV_UTF16_LittleEndian },
{ "UTF16_OppositeEndian", UCNV_UTF16_BigEndian},
#endif
{ "ISO_2022", UCNV_ISO_2022 }
};
/*Takes an alias name gets an actual converter file name
@ -52,7 +61,6 @@ static const char *algorithmicConverterNames[] = {
*allocates the memory and returns a new UConverter object
*/
static UConverter *createConverterFromFile (const char *converterName, UErrorCode * err);
static UConverter *createConverterFromAlgorithmicType (const char *realName, UErrorCode * err);
/*Given a file returns a newly allocated CompactByteArray based on the a serialized one */
static CompactByteArray *createCompactByteArrayFromFile (FileStream * infile, UErrorCode * err);
@ -70,7 +78,7 @@ static CompactShortArray *createCompactShortArrayFromFile (FileStream * infile,
static UConverterPlatform getPlatformFromName (char *name);
static int32_t getCodepageNumberFromName (char *name);
static UConverterType getAlgorithmicTypeFromName (const char *realName);
static const UConverterSharedData *getAlgorithmicTypeFromName (const char *realName);
/*these functions initialize the lightweight mutable part of the
@ -84,10 +92,38 @@ static void initializeAlgorithmicConverter (UConverter * myConverter);
static int32_t uhash_hashSharedData (void *sharedData);
/*Defines the struct of a UConverterSharedData the immutable, shared part of
*UConverter -
* This is the definition from ICU 1.4, necessary to read converter data
* version 1 because the structure is directly embedded in the data.
* See udata.html for why this is bad (pointers, enums, padding...).
*/
typedef struct
{
uint32_t structSize; /* Size of this structure */
void *dataMemory;
uint32_t referenceCounter; /*used to count number of clients */
char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */
UConverterPlatform platform; /*platform of the converter (only IBM now) */
int32_t codepage; /*codepage # (now IBM-$codepage) */
UConverterType conversionType; /*conversion type */
int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */
int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */
struct
{ /*initial values of some members of the mutable part of object */
uint32_t toUnicodeStatus;
int8_t subCharLen;
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN];
}
defaultConverterValues;
UConverterTable *table; /*Pointer to conversion data */
}
UConverterSharedData_1_4;
/**
* Un flatten shared data from a UDATA..
*/
U_CAPI UConverterSharedData* U_EXPORT2 ucnv_data_unFlattenClone(const UConverterSharedData *data, UErrorCode *status);
U_CAPI UConverterSharedData* U_EXPORT2 ucnv_data_unFlattenClone(const UConverterSharedData_1_4 *data, UErrorCode *status);
/*initializes some global variables */
@ -328,7 +364,7 @@ UConverter* createConverterFromFile (const char *fileName, UErrorCode * err)
}
/* clone it. OK to drop the original sharedData */
myConverter->sharedData = ucnv_data_unFlattenClone(myConverter->sharedData, err);
myConverter->sharedData = ucnv_data_unFlattenClone((UConverterSharedData_1_4 *)myConverter->sharedData, err);
myConverter->sharedData->dataMemory = (void*)data; /* for future use */
@ -372,39 +408,16 @@ void
/*returns a converter type from a string
*/
UConverterType
const UConverterSharedData *
getAlgorithmicTypeFromName (const char *realName)
{
if (uprv_strcmp (realName, "UTF8") == 0)
return UCNV_UTF8;
else if (uprv_strcmp (realName, "UTF16_BigEndian") == 0)
return UCNV_UTF16_BigEndian;
else if (uprv_strcmp (realName, "UTF16_LittleEndian") == 0)
return UCNV_UTF16_LittleEndian;
else if (uprv_strcmp (realName, "LATIN_1") == 0)
return UCNV_LATIN_1;
else if (uprv_strcmp (realName, "JIS") == 0)
return UCNV_JIS;
else if (uprv_strcmp (realName, "EUC") == 0)
return UCNV_EUC;
else if (uprv_strcmp (realName, "GB") == 0)
return UCNV_GB;
else if (uprv_strcmp (realName, "ISO_2022") == 0)
return UCNV_ISO_2022;
else if (uprv_strcmp (realName, "UTF16_PlatformEndian") == 0)
# if U_IS_BIG_ENDIAN
return UCNV_UTF16_BigEndian;
# else
return UCNV_UTF16_LittleEndian;
# endif
else if (uprv_strcmp (realName, "UTF16_OppositeEndian") == 0)
# if U_IS_BIG_ENDIAN
return UCNV_UTF16_LittleEndian;
# else
return UCNV_UTF16_BigEndian;
# endif
else
return UCNV_UNSUPPORTED_CONVERTER;
int i;
for(i=0; i<sizeof(cnvNameType)/sizeof(cnvNameType[0]); ++i) {
if(uprv_strcmp(realName, cnvNameType[i].name)==0) {
return converterData[cnvNameType[i].type];
}
}
return NULL;
}
@ -460,6 +473,7 @@ void shareConverterData (UConverterSharedData * data)
}
umtx_lock (NULL);
/* ### check to see if the element is not already there! */
uhash_put(SHARED_DATA_HASHTABLE,
data,
&err);
@ -482,6 +496,7 @@ UConverterSharedData *getSharedConverterData (const char *name)
/*frees the string of memory blocks associates with a sharedConverter
*if and only if the referenceCounter == 0
* ### this cleanup would be cleaner in a function in UConverterImpl
*/
bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
{
@ -510,7 +525,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
{
ucmp16_close (deadSharedData->table->mbcs.fromUnicode);
ucmp16_close (deadSharedData->table->mbcs.toUnicode);
uprv_free (deadSharedData->table);
uprv_free (deadSharedData->table);
};
break;
@ -519,7 +534,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
{
ucmp16_close (deadSharedData->table->dbcs.fromUnicode);
ucmp16_close (deadSharedData->table->dbcs.toUnicode);
uprv_free (deadSharedData->table);
uprv_free (deadSharedData->table);
};
break;
@ -537,55 +552,6 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
return TRUE;
}
bool_t isDataBasedConverter (const char *name)
{
int32_t i = 0;
bool_t result = FALSE;
UErrorCode err = U_ZERO_ERROR;
/*Lazy evaluates the hashtable */
if (ALGORITHMIC_CONVERTERS_HASHTABLE == NULL)
{
UHashtable* myHT;
{
myHT = uhash_open ((UHashFunction)uhash_hashIString, &err);
if (U_FAILURE (err)) return FALSE;
while (algorithmicConverterNames[i][0] != '\0')
{
/*Stores in the hashtable a pointer to the statically init'ed array containing
*the names
*/
uhash_put (myHT,
(void *) algorithmicConverterNames[i],
&err);
i++; /*Some Compilers (Solaris WSpro and MSVC-Release Mode
*don't differentiate between i++ and ++i
*so we have to increment in a line by itself
*/
}
}
umtx_lock (NULL);
if (ALGORITHMIC_CONVERTERS_HASHTABLE == NULL) ALGORITHMIC_CONVERTERS_HASHTABLE = myHT;
else uhash_close(myHT);
umtx_unlock (NULL);
}
if (uhash_get (ALGORITHMIC_CONVERTERS_HASHTABLE,
uhash_hashIString (name)) == NULL)
{
result = TRUE;
}
return result;
}
/*Logic determines if the converter is Algorithmic AND/OR cached
*depending on that:
* -we either go to get data from disk and cache it (Data=TRUE, Cached=False)
@ -636,7 +602,8 @@ UConverter *
}
}
if (isDataBasedConverter (realName))
mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName (realName);
if (mySharedConverterData == NULL)
{
mySharedConverterData = getSharedConverterData (realName);
@ -652,8 +619,6 @@ UConverter *
else
{
/*shared it with other library clients */
shareConverterData (myUConverter->sharedData);
return myUConverter;
}
@ -668,6 +633,7 @@ UConverter *
return NULL;
}
/* ### this is unsafe: the shared data could have been deleted since sharing or getting it - these operations should increase the counter! */
/*update the reference counter: one more client */
umtx_lock (NULL);
mySharedConverterData->referenceCounter++;
@ -681,45 +647,24 @@ UConverter *
}
else
{
/*with have an algorithmic converter */
mySharedConverterData = getSharedConverterData (realName);
/*Non cached */
if (mySharedConverterData == NULL)
/* ### we have an algorithmic converter, it does not need to be cached?! */
if (getSharedConverterData (realName) == NULL)
{
myUConverter = createConverterFromAlgorithmicType (realName, err);
if (U_FAILURE (*err) || (myUConverter == NULL))
{
uprv_free (myUConverter);
return NULL;
}
else
{
/* put the shared object in shared table */
shareConverterData (myUConverter->sharedData);
return myUConverter;
}
}
else
{
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
if (myUConverter == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/*Increase the reference counter */
umtx_lock (NULL);
mySharedConverterData->referenceCounter++;
umtx_unlock (NULL);
/*initializes the converter */
myUConverter->sharedData = mySharedConverterData;
initializeAlgorithmicConverter (myUConverter);
return myUConverter;
/* put the shared object in shared table */
shareConverterData (mySharedConverterData);
}
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
if (myUConverter == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/*initializes the converter */
uprv_memset(myUConverter, 0, sizeof(UConverter));
myUConverter->sharedData = mySharedConverterData;
initializeAlgorithmicConverter (myUConverter);
return myUConverter;
}
@ -751,21 +696,11 @@ void initializeDataConverter (UConverter * myUConverter)
}
/* This function initializes algorithmic converters
* based on there type
* based on their type
*/
void
initializeAlgorithmicConverter (UConverter * myConverter)
{
char UTF8_subChar[] = {(char) 0xFF, (char) 0xFF, (char) 0xFF};
char UTF16BE_subChar[] = {(char) 0xFF, (char) 0xFD};
char UTF16LE_subChar[] = {(char) 0xFD, (char) 0xFF};
char EUC_subChar[] = {(char) 0xAF, (char) 0xFE};
char GB_subChar[] = {(char) 0xFF, (char) 0xFF};
char JIS_subChar[] = {(char) 0xFF, (char) 0xFF};
char LATIN1_subChar = 0x1A;
myConverter->mode = UCNV_SI;
myConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE;
myConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE;
@ -774,216 +709,79 @@ void
myConverter->extraInfo = NULL;
myConverter->fromUnicodeStatus = 0;
myConverter->toUnicodeStatus = myConverter->sharedData->defaultConverterValues.toUnicodeStatus;
myConverter->subCharLen = myConverter->sharedData->defaultConverterValues.subCharLen;
uprv_memcpy (myConverter->subChar, myConverter->sharedData->defaultConverterValues.subChar, UCNV_MAX_SUBCHAR_LEN);
/* ### it would be cleaner to have the following in a function in UConverterImpl, with a UErrorCode */
switch (myConverter->sharedData->conversionType)
{
case UCNV_UTF8:
{
myConverter->sharedData->minBytesPerChar = 1;
myConverter->sharedData->maxBytesPerChar = 4;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 3;
myConverter->subCharLen = 3;
myConverter->toUnicodeStatus = 0;
myConverter->fromUnicodeStatus = 0; /* srl */
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 1208;
uprv_strcpy(myConverter->sharedData->name, "UTF8");
uprv_memcpy (myConverter->subChar, UTF8_subChar, 3);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF8_subChar, 3);
break;
}
case UCNV_LATIN_1:
{
myConverter->sharedData->minBytesPerChar = 1;
myConverter->sharedData->maxBytesPerChar = 1;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 1;
myConverter->subCharLen = 1;
myConverter->toUnicodeStatus = 0;
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 819;
uprv_strcpy(myConverter->sharedData->name, "LATIN_1");
*(myConverter->subChar) = LATIN1_subChar;
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
break;
}
case UCNV_UTF16_BigEndian:
{
myConverter->sharedData->minBytesPerChar = 2;
myConverter->sharedData->maxBytesPerChar = 2;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
myConverter->fromUnicodeStatus = 0;
uprv_strcpy(myConverter->sharedData->name, "UTF_16BE");
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 1200;
uprv_memcpy (myConverter->subChar, UTF16BE_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16BE_subChar, 2);
break;
}
case UCNV_UTF16_LittleEndian:
{
myConverter->sharedData->minBytesPerChar = 2;
myConverter->sharedData->maxBytesPerChar = 2;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
myConverter->fromUnicodeStatus = 0;
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 1200;
uprv_strcpy(myConverter->sharedData->name, "UTF_16LE");
uprv_memcpy (myConverter->subChar, UTF16LE_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16LE_subChar, 2);
break;
}
case UCNV_EUC:
{
myConverter->sharedData->minBytesPerChar = 1;
myConverter->sharedData->maxBytesPerChar = 2;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
uprv_memcpy (myConverter->subChar, EUC_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, EUC_subChar, 2);
break;
}
case UCNV_ISO_2022:
{
myConverter->charErrorBuffer[0] = 0x1b;
myConverter->charErrorBuffer[1] = 0x25;
myConverter->charErrorBuffer[2] = 0x42;
myConverter->charErrorBufferLength = 3;
myConverter->sharedData->minBytesPerChar = 1;
myConverter->sharedData->maxBytesPerChar = 3;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 1;
myConverter->subCharLen = 1;
myConverter->toUnicodeStatus = 0;
myConverter->fromUnicodeStatus = 0; /* srl */
myConverter->sharedData->codepage = 2022;
uprv_strcpy(myConverter->sharedData->name, "ISO_2022");
*(myConverter->subChar) = LATIN1_subChar;
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
myConverter->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
/* ### check for extraInfo==NULL !! does this need to be allocated at all? */
((UConverterDataISO2022 *) myConverter->extraInfo)->currentConverter = NULL;
((UConverterDataISO2022 *) myConverter->extraInfo)->escSeq2022Length = 0;
break;
}
case UCNV_GB:
{
myConverter->sharedData->minBytesPerChar = 2;
myConverter->sharedData->maxBytesPerChar = 2;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
uprv_memcpy (myConverter->subChar, GB_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, GB_subChar, 2);
break;
}
case UCNV_JIS:
{
myConverter->sharedData->minBytesPerChar = 2;
myConverter->sharedData->maxBytesPerChar = 2;
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
uprv_memcpy (myConverter->subChar, JIS_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, JIS_subChar, 2);
break;
}
default:
break;
};
myConverter->toUnicodeStatus = myConverter->sharedData->defaultConverterValues.toUnicodeStatus;
}
/*This function creates an algorithmic converter
*Note That even algorithmic converters are shared
* (The UConverterSharedData->table == NULL since
* there are no tables)
*for uniformity of design and control flow
*/
UConverter *
createConverterFromAlgorithmicType (const char *actualName, UErrorCode * err)
{
int32_t i = 0;
UConverter *myConverter = NULL;
UConverterSharedData *mySharedData = NULL;
UConverterType myType = getAlgorithmicTypeFromName (actualName);
if (U_FAILURE (*err))
return NULL;
myConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
if (myConverter == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
myConverter->sharedData = NULL;
mySharedData = (UConverterSharedData *) uprv_malloc (sizeof (UConverterSharedData));
if (mySharedData == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
uprv_free (myConverter);
return NULL;
}
mySharedData->structSize = sizeof(UConverterSharedData);
mySharedData->table = NULL;
mySharedData->dataMemory = NULL;
uprv_strcpy (mySharedData->name, actualName);
/*Initializes the referenceCounter to 1 */
mySharedData->referenceCounter = 1;
mySharedData->platform = UCNV_UNKNOWN;
mySharedData->codepage = 0;
mySharedData->conversionType = myType;
myConverter->sharedData = mySharedData;
initializeAlgorithmicConverter (myConverter);
return myConverter;
}
UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *source, UErrorCode *status)
UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData_1_4 *source, UErrorCode *status)
{
const uint8_t *raw, *oldraw;
UConverterSharedData *data = NULL;
UConverterType type = source->conversionType;
if(U_FAILURE(*status))
return NULL;
if(source->structSize != sizeof(UConverterSharedData))
if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES ||
converterData[type]->referenceCounter != 1 ||
source->structSize != sizeof(UConverterSharedData_1_4))
{
*status = U_INVALID_TABLE_FORMAT;
return NULL;
}
data = (UConverterSharedData*) malloc(sizeof(UConverterSharedData));
raw = (uint8_t*)source;
uprv_memcpy(data,source,sizeof(UConverterSharedData));
raw += data->structSize;
data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData));
if(data == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/* data->table = (UConverterTable*)raw; */
/* copy initial values from the static structure for this type */
uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData));
/* ### it would be much more efficient if the table were a direct member, not a pointer */
data->table = (UConverterTable *)uprv_malloc(sizeof(UConverterTable));
if(data->table == NULL) {
uprv_free(data);
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/* fill in fields from the loaded data */
data->name = source->name; /* ### this could/should come from the caller - should be the same as the canonical name?!! */
data->codepage = source->codepage;
data->platform = source->platform;
data->minBytesPerChar = source->minBytesPerChar;
data->maxBytesPerChar = source->maxBytesPerChar;
uprv_memcpy(&data->defaultConverterValues, &source->defaultConverterValues, sizeof(data->defaultConverterValues));
raw = (uint8_t*)source + source->structSize;
/* the checks above made sure that the type is valid for a data-based converter */
switch (data->conversionType)
{
case UCNV_SBCS:
data->table = malloc(sizeof(UConverterSBCSTable));
data->table->sbcs.toUnicode = (UChar*)raw;
raw += sizeof(UChar)*256;
@ -993,22 +791,20 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc
case UCNV_EBCDIC_STATEFUL:
case UCNV_DBCS:
data->table = uprv_malloc(sizeof(UConverterDBCSTable));
oldraw = raw;
data->table->dbcs.toUnicode=ucmp16_cloneFromData(&raw, status);
while((raw-oldraw)%4) /* pad to 4 */
raw++;
/* pad to 4 */
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3);
}
data->table->dbcs.fromUnicode =ucmp16_cloneFromData(&raw, status);
break;
case UCNV_MBCS:
data->table = uprv_malloc(sizeof(UConverterMBCSTable));
data->table->mbcs.starters = (bool_t*)raw;
raw += sizeof(bool_t)*256;
@ -1016,24 +812,15 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc
data->table->mbcs.toUnicode = ucmp16_cloneFromData(&raw, status);
while((raw-oldraw)%4) /* pad to 4 */
raw++;
/* pad to 4 */
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3);
}
data->table->mbcs.fromUnicode = ucmp16_cloneFromData(&raw, status);
break;
default:
*status = U_INVALID_TABLE_FORMAT;
return NULL;
}
return data;
}

File diff suppressed because it is too large Load diff

View file

@ -15,286 +15,150 @@
#include "unicode/utypes.h"
#include "unicode/ucnv_bld.h"
U_CDECL_BEGIN
#define missingCharMarker 0xFFFF
#define missingUCharMarker 0xFFFD
#define FromU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
else \
{ \
char *myTargetCopy = myTarget + myTargetIndex; \
const UChar *mySourceCopy = mySource + mySourceIndex; \
/*copies current values for the ErrorFunctor to update */ \
/*Calls the ErrorFunctor */ \
_this->fromUCharErrorBehaviour (_this, \
(char **) &myTargetCopy, \
targetLimit, \
(const UChar **) &mySourceCopy, \
sourceLimit, \
offsets, \
flush, \
err); \
/*Update the local Indexes so that the conversion can restart at the right points */ \
mySourceIndex = (mySourceCopy - mySource) ; \
myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \
}
#define ToU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \
else \
{ \
UChar *myTargetCopy = myTarget + myTargetIndex; \
const char *mySourceCopy = mySource + mySourceIndex; \
/*Calls the ErrorFunctor */ \
_this->fromCharErrorBehaviour (_this, \
&myTargetCopy, \
targetLimit, \
(const char **) &mySourceCopy, \
sourceLimit, \
offsets, \
flush, \
err); \
/*Update the local Indexes so that the conversion can restart at the right points */ \
mySourceIndex = ((char*)mySourceCopy - (char*)mySource); \
myTargetIndex = (myTargetCopy - myTarget); \
}
#define FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
else \
{ \
char *myTargetCopy = myTarget + myTargetIndex; \
const UChar *mySourceCopy = mySource + mySourceIndex; \
int32_t My_i = myTargetIndex; \
/*copies current values for the ErrorFunctor to update */ \
/*Calls the ErrorFunctor */ \
_this->fromUCharErrorBehaviour (_this, \
(char **) &myTargetCopy, \
targetLimit, \
(const UChar **) &mySourceCopy, \
sourceLimit, \
offsets + myTargetIndex, \
flush, \
err); \
/*Update the local Indexes so that the conversion can restart at the right points */ \
mySourceIndex = mySourceCopy - mySource ; \
myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \
for (;My_i < myTargetIndex;My_i++) offsets[My_i] += currentOffset ; \
}
#define ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \
else \
{ \
UChar *myTargetCopy = myTarget + myTargetIndex; \
const char *mySourceCopy = mySource + mySourceIndex; \
int32_t My_i = myTargetIndex; \
_this->fromCharErrorBehaviour (_this, \
&myTargetCopy, \
targetLimit, \
(const char **) &mySourceCopy, \
sourceLimit, \
offsets + myTargetIndex, \
flush, \
err); \
/*Update the local Indexes so that the conversion can restart at the right points */ \
mySourceIndex = (char *)mySourceCopy - (char*)mySource; \
myTargetIndex = ((UChar*)myTargetCopy - (UChar*)myTarget); \
for (;My_i < myTargetIndex;My_i++) {offsets[My_i] += currentOffset ; } \
}
typedef void (*T_ToUnicodeFunction) (UConverter *,
UChar **,
const UChar *,
const char **,
const char *,
int32_t* offsets,
bool_t,
UErrorCode *);
typedef void (*T_FromUnicodeFunction) (UConverter *,
char **,
const char *,
const UChar **,
const UChar *,
int32_t* offsets,
bool_t,
UErrorCode *);
typedef UChar (*T_GetNextUCharFunction) (UConverter *,
const char **,
const char *,
UErrorCode *);
bool_t CONVERSION_U_SUCCESS (UErrorCode err);
void T_UConverter_toUnicode_SBCS (UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void flushInternalUnicodeBuffer (UConverter * _this,
UChar * myTarget,
int32_t * myTargetIndex,
int32_t targetLength,
int32_t** offsets,
UErrorCode * err);
void T_UConverter_fromUnicode_SBCS (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void flushInternalCharBuffer (UConverter * _this,
char *myTarget,
int32_t * myTargetIndex,
int32_t targetLength,
int32_t** offsets,
UErrorCode * err);
void T_UConverter_toUnicode_MBCS (UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
struct UConverterImpl {
UConverterType type;
void T_UConverter_fromUnicode_MBCS (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
T_ToUnicodeFunction toUnicode;
T_ToUnicodeFunction toUnicodeWithOffsets;
T_FromUnicodeFunction fromUnicode;
T_FromUnicodeFunction fromUnicodeWithOffsets;
T_GetNextUCharFunction getNextUChar;
};
void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
extern UConverterSharedData
_SBCSData, _DBCSData, _MBCSData, _Latin1Data,
_UTF8Data, _UTF16BEData, _UTF16LEData, _EBCDICStatefulData,
_ISO2022Data;
void T_UConverter_toUnicode_DBCS (UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_fromUnicode_DBCS (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_fromUnicode_UTF16_BE (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_toUnicode_UTF16_BE (UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_fromUnicode_UTF16_LE (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_toUnicode_EBCDIC_STATEFUL(UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_fromUnicode_EBCDIC_STATEFUL(UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC(UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC(UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_toUnicode_ISO_2022(UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_fromUnicode_ISO_2022(UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_toUnicode_UTF16_LE (UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_fromUnicode_UTF8 (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_toUnicode_UTF8 (UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_fromUnicode_LATIN_1 (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
void T_UConverter_toUnicode_LATIN_1 (UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
UChar T_UConverter_getNextUChar_LATIN_1 (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err);
UChar T_UConverter_getNextUChar_SBCS (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err);
UChar T_UConverter_getNextUChar_DBCS (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err);
UChar T_UConverter_getNextUChar_MBCS (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err);
UChar T_UConverter_getNextUChar_UTF8 (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err);
UChar T_UConverter_getNextUChar_UTF16_BE (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err);
UChar T_UConverter_getNextUChar_UTF16_LE (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err);
UChar T_UConverter_getNextUChar_EBCDIC_STATEFUL (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err);
UChar T_UConverter_getNextUChar_ISO_2022 (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err);
U_CDECL_END
#endif /* UCNV_CNV */

View file

@ -0,0 +1,552 @@
/*
**********************************************************************
* Copyright (C) 2000, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_mbcs.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2000feb03
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "ucmp16.h"
#include "ucmp8.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv.h"
#include "ucnv_cnv.h"
/* MBCS --------------------------------------------------------------------- */
void T_UConverter_toUnicode_MBCS (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
bool_t flush,
UErrorCode * err)
{
const char *mySource = *source;
UChar *myTarget = *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactShortArray *myToUnicode = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
bool_t *myStarters = NULL;
myToUnicode = _this->sharedData->table->mbcs.toUnicode;
myStarters = _this->sharedData->table->mbcs.starters;
while (mySourceIndex < sourceLength)
{
if (myTargetIndex < targetLength)
{
/*gets the corresponding UniChar */
mySourceChar = (unsigned char) (mySource[mySourceIndex++]);
if (myStarters[(uint8_t) mySourceChar] &&
(_this->toUnicodeStatus == 0x00))
{
_this->toUnicodeStatus = (unsigned char) mySourceChar;
}
else
{
/*In case there is a state, we update the source char
*by concatenating the previous char with the current
*one
*/
if (_this->toUnicodeStatus != 0x00)
{
mySourceChar |= (UChar) (_this->toUnicodeStatus << 8);
_this->toUnicodeStatus = 0x00;
}
/*gets the corresponding Unicode codepoint */
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
/*writing the UniChar to the output stream */
if (targetUniChar != missingUCharMarker)
{
myTarget[myTargetIndex++] = targetUniChar;
}
else
{
*err = U_INVALID_CHAR_FOUND;
if (mySourceChar > 0xff)
{
_this->invalidCharLength = 2;
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
_this->invalidCharBuffer[1] = (char) mySourceChar;
}
else
{
_this->invalidCharLength = 1;
_this->invalidCharBuffer[0] = (char) mySourceChar;
}
ToU_CALLBACK_MACRO(_this,
myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
}
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if (_this->toUnicodeStatus
&& (mySourceIndex == sourceLength)
&& (flush == TRUE))
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
_this->toUnicodeStatus = 0x00;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
return;
}
void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
bool_t flush,
UErrorCode * err)
{
const char *mySource = *source;
UChar *myTarget = *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactShortArray *myToUnicode = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
UChar oldMySourceChar;
bool_t *myStarters = NULL;
int32_t* originalOffsets = offsets;
myToUnicode = _this->sharedData->table->mbcs.toUnicode;
myStarters = _this->sharedData->table->mbcs.starters;
while (mySourceIndex < sourceLength)
{
if (myTargetIndex < targetLength)
{
/*gets the corresponding UniChar */
mySourceChar = (unsigned char) (mySource[mySourceIndex++]);
if (myStarters[(uint8_t) mySourceChar] &&
(_this->toUnicodeStatus == 0x00))
{
_this->toUnicodeStatus = (unsigned char) mySourceChar;
}
else
{
/*In case there is a state, we update the source char
*by concatenating the previous char with the current
*one
*/
if (_this->toUnicodeStatus != 0x00)
{
mySourceChar |= (UChar) (_this->toUnicodeStatus << 8);
_this->toUnicodeStatus = 0x00;
}
/*gets the corresponding Unicode codepoint */
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
/*writing the UniChar to the output stream */
if (targetUniChar != missingUCharMarker)
{
/*writes the UniChar to the output stream */
{
if (targetUniChar > 0x00FF)
offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */
else
offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */
}
myTarget[myTargetIndex++] = targetUniChar;
oldMySourceChar = mySourceChar;
}
else
{
int32_t currentOffset = offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1);
*err = U_INVALID_CHAR_FOUND;
if (mySourceChar > 0xff)
{
_this->invalidCharLength = 2;
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
_this->invalidCharBuffer[1] = (char) mySourceChar;
}
else
{
_this->invalidCharLength = 1;
_this->invalidCharBuffer[0] = (char) mySourceChar;
}
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this,
myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
}
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if (_this->toUnicodeStatus
&& (mySourceIndex == sourceLength)
&& (flush == TRUE))
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
_this->toUnicodeStatus = 0x00;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
return;
}
void T_UConverter_fromUnicode_MBCS (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
bool_t flush,
UErrorCode * err)
{
const UChar *mySource = *source;
char *myTarget = *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactShortArray *myFromUnicode = NULL;
UChar targetUniChar = 0x0000;
int8_t targetUniCharByteNum = 0;
UChar mySourceChar = 0x0000;
myFromUnicode = _this->sharedData->table->mbcs.fromUnicode;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
{
if (myTargetIndex < targetLength)
{
mySourceChar = (UChar) mySource[mySourceIndex++];
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
if (targetUniChar != missingCharMarker)
{
if (targetUniChar <= 0x00FF)
{
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
if (myTargetIndex < targetLength)
{
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
else
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
_this->invalidUCharLength = 1;
FromU_CALLBACK_MACRO(_this,
myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidUCharLength = 0;
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
*target += myTargetIndex;
*source += mySourceIndex;;
return;
}
void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
bool_t flush,
UErrorCode * err)
{
const UChar *mySource = *source;
char *myTarget = *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactShortArray *myFromUnicode = NULL;
UChar targetUniChar = 0x0000;
int8_t targetUniCharByteNum = 0;
UChar mySourceChar = 0x0000;
int32_t* originalOffsets = offsets;
myFromUnicode = _this->sharedData->table->mbcs.fromUnicode;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
{
if (myTargetIndex < targetLength)
{
mySourceChar = (UChar) mySource[mySourceIndex++];
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
if (targetUniChar != missingCharMarker)
{
if (targetUniChar <= 0x00FF)
{
offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
if (myTargetIndex < targetLength)
{
offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
else
{
int32_t currentOffset = mySourceIndex -1;
int32_t* offsetsAnchor = offsets;
*err = U_INVALID_CHAR_FOUND;
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
_this->invalidUCharLength = 1;
FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this,
myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidUCharLength = 0;
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
*target += myTargetIndex;
*source += mySourceIndex;;
return;
}
UChar T_UConverter_getNextUChar_MBCS(UConverter* converter,
const char** source,
const char* sourceLimit,
UErrorCode* err)
{
UChar myUChar;
char const *sourceInitial = *source;
/*safe keeps a ptr to the beginning in case we need to step back*/
/*Input boundary check*/
if ((*source)+1 > sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
}
/*Checks to see if the byte is a lead*/
if (converter->sharedData->table->mbcs.starters[(uint8_t)**source] == FALSE)
{
/*Not lead byte: we update the source ptr and get the codepoint*/
myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode,
(UChar)(**source));
(*source)++;
}
else
{
/*Lead byte: we Build the codepoint and get the corresponding character
* and update the source ptr*/
if ((*source + 2) > sourceLimit)
{
*err = U_TRUNCATED_CHAR_FOUND;
return 0xFFFD;
}
myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode,
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))));
(*source) += 2;
}
if (myUChar != 0xFFFD) return myUChar;
else
{
/*rewinds source*/
const char* sourceFinal = *source;
UChar* myUCharPtr = &myUChar;
*err = U_INVALID_CHAR_FOUND;
*source = sourceInitial;
/*It's is very likely that the ErrorFunctor will write to the
*internal buffers */
converter->fromCharErrorBehaviour(converter,
&myUCharPtr,
myUCharPtr + 1,
&sourceFinal,
sourceLimit,
NULL,
TRUE,
err);
/*makes the internal caching transparent to the user*/
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
return myUChar;
}
}
static UConverterImpl _MBCSImpl={
UCNV_MBCS,
T_UConverter_toUnicode_MBCS,
T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC,
T_UConverter_fromUnicode_MBCS,
T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC,
T_UConverter_getNextUChar_MBCS
};
extern UConverterSharedData _MBCSData={
sizeof(UConverterSharedData), 1,
NULL, NULL, &_MBCSImpl, "MBCS",
0, UCNV_IBM, UCNV_MBCS, 1, 1,
{ 0, 1, 0, 0, 0, 0 }
};

View file

@ -0,0 +1,486 @@
/*
**********************************************************************
* Copyright (C) 2000, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_sbcs.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2000feb03
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "ucmp16.h"
#include "ucmp8.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv.h"
#include "ucnv_cnv.h"
/* SBCS --------------------------------------------------------------------- */
void T_UConverter_toUnicode_SBCS (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
bool_t flush,
UErrorCode * err)
{
char *mySource = (char *) *source;
UChar *myTarget = *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
UChar *myToUnicode = NULL;
UChar targetUniChar = 0x0000;
myToUnicode = _this->sharedData->table->sbcs.toUnicode;
while (mySourceIndex < sourceLength)
{
/*writing the UniChar to the output stream */
if (myTargetIndex < targetLength)
{
/*gets the corresponding UniChar */
targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]];
if (targetUniChar != missingUCharMarker)
{
/* writes the UniChar to the output stream */
myTarget[myTargetIndex++] = targetUniChar;
}
else
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
_this->invalidCharLength = 1;
ToU_CALLBACK_MACRO(_this,
myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
return;
}
void T_UConverter_fromUnicode_SBCS (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
bool_t flush,
UErrorCode * err)
{
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactByteArray *myFromUnicode;
unsigned char targetChar = 0x00;
myFromUnicode = _this->sharedData->table->sbcs.fromUnicode;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
{
targetChar = ucmp8_getu (myFromUnicode, mySource[mySourceIndex]);
if (myTargetIndex < targetLength)
{
mySourceIndex++;
if (targetChar != 0 || !mySource[mySourceIndex - 1])
{
/*writes the char to the output stream */
myTarget[myTargetIndex++] = targetChar;
}
else
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
_this->invalidUCharLength = 1;
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
FromU_CALLBACK_MACRO(_this,
(char *)myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err))
{
break;
}
_this->invalidUCharLength = 0;
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
return;
}
UChar T_UConverter_getNextUChar_SBCS(UConverter* converter,
const char** source,
const char* sourceLimit,
UErrorCode* err)
{
UChar myUChar;
if ((*source)+1 > sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
}
/*Gets the corresponding codepoint*/
myUChar = converter->sharedData->table->sbcs.toUnicode[(unsigned char)*((*source)++)];
if (myUChar != 0xFFFD) return myUChar;
else
{
UChar* myUCharPtr = &myUChar;
const char* sourceFinal = *source;
*err = U_INVALID_CHAR_FOUND;
/*Calls the ErrorFunctor after rewinding the input buffer*/
(*source)--;
/*It's is very likely that the ErrorFunctor will write to the
*internal buffers */
converter->fromCharErrorBehaviour(converter,
&myUCharPtr,
myUCharPtr + 1,
&sourceFinal,
sourceLimit,
NULL,
TRUE,
err);
/*makes the internal caching transparent to the user*/
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
return myUChar;
}
}
static UConverterImpl _SBCSImpl={
UCNV_SBCS,
T_UConverter_toUnicode_SBCS,
NULL,
T_UConverter_fromUnicode_SBCS,
NULL,
T_UConverter_getNextUChar_SBCS
};
extern UConverterSharedData _SBCSData={
sizeof(UConverterSharedData), 1,
NULL, NULL, &_SBCSImpl, "SBCS",
0, UCNV_IBM, UCNV_SBCS, 1, 1,
{ 0, 1, 0, 0, 0, 0 }
};
/* DBCS --------------------------------------------------------------------- */
void T_UConverter_toUnicode_DBCS (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
bool_t flush,
UErrorCode * err)
{
const char *mySource = ( char *) *source;
UChar *myTarget = *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
CompactShortArray *myToUnicode = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
myToUnicode = _this->sharedData->table->dbcs.toUnicode;
while (mySourceIndex < sourceLength)
{
if (myTargetIndex < targetLength)
{
/*gets the corresponding UniChar */
mySourceChar = (unsigned char) mySource[mySourceIndex++];
/*We have no internal state, we should */
if (_this->toUnicodeStatus == 0x00)
{
_this->toUnicodeStatus = (unsigned char) mySourceChar;
}
else
{
if (_this->toUnicodeStatus != 0x00)
{
mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | (mySourceChar & 0x00FF));
_this->toUnicodeStatus = 0x00;
}
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
/*writing the UniChar to the output stream */
if (targetUniChar != missingUCharMarker)
{
/*writes the UniChar to the output stream */
myTarget[myTargetIndex++] = targetUniChar;
}
else
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
_this->invalidCharBuffer[1] = (char) mySourceChar;
_this->invalidCharLength = 2;
ToU_CALLBACK_MACRO(_this,
myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
}
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
/*If at the end of conversion we are still carrying state information
*flush is TRUE, we can deduce that the input stream is truncated
*/
if ((flush == TRUE)
&& (mySourceIndex == sourceLength)
&& (_this->toUnicodeStatus != 0x00))
{
if (U_SUCCESS(*err))
{
*err = U_TRUNCATED_CHAR_FOUND;
_this->toUnicodeStatus = 0x00;
}
}
*target += myTargetIndex;
*source += mySourceIndex;
return;
}
void T_UConverter_fromUnicode_DBCS (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
bool_t flush,
UErrorCode * err)
{
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactShortArray *myFromUnicode = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
myFromUnicode = _this->sharedData->table->dbcs.fromUnicode;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
{
if (myTargetIndex < targetLength)
{
mySourceChar = (UChar) mySource[mySourceIndex++];
/*Gets the corresponding codepoint */
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
if (targetUniChar != missingCharMarker)
{
/*writes the char to the output stream */
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
if (myTargetIndex < targetLength)
{
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
else
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
_this->invalidUCharLength = 1;
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
FromU_CALLBACK_MACRO(_this,
(char *)myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidUCharLength = 0;
}
}
else
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
}
*target += myTargetIndex;
*source += mySourceIndex;;
return;
}
UChar T_UConverter_getNextUChar_DBCS(UConverter* converter,
const char** source,
const char* sourceLimit,
UErrorCode* err)
{
UChar myUChar;
/*Checks boundaries and set appropriate error codes*/
if ((*source)+2 > sourceLimit)
{
if ((*source) >= sourceLimit)
{
/*Either caller has reached the end of the byte stream*/
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
else if (((*source)+1) == sourceLimit)
{
/* a character was cut in half*/
*err = U_TRUNCATED_CHAR_FOUND;
}
return 0xFFFD;
}
/*Gets the corresponding codepoint*/
myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode,
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))));
/*update the input pointer*/
*source += 2;
if (myUChar != 0xFFFD) return myUChar;
else
{
UChar* myUCharPtr = &myUChar;
const char* sourceFinal = *source;
/*Calls the ErrorFunctor after rewinding the input buffer*/
(*source) -= 2;
*err = U_INVALID_CHAR_FOUND;
/*It's is very likely that the ErrorFunctor will write to the
*internal buffers */
converter->fromCharErrorBehaviour(converter,
&myUCharPtr,
myUCharPtr + 1,
&sourceFinal,
sourceLimit,
NULL,
TRUE,
err);
/*makes the internal caching transparent to the user*/
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
return myUChar;
}
}
static UConverterImpl _DBCSImpl={
UCNV_DBCS,
T_UConverter_toUnicode_DBCS,
NULL,
T_UConverter_fromUnicode_DBCS,
NULL,
T_UConverter_getNextUChar_DBCS
};
extern UConverterSharedData _DBCSData={
sizeof(UConverterSharedData), 1,
NULL, NULL, &_DBCSImpl, "DBCS",
0, UCNV_IBM, UCNV_DBCS, 2, 2,
{ 0, 1, 0, 0, 0, 0 }
};

File diff suppressed because it is too large Load diff

View file

@ -46,8 +46,8 @@ typedef struct _CompactByteArray CompactByteArray;
/*Pointer to the aforementioned file */
#define UCNV_MAX_LINE_TEXT (UCNV_MAX_CONVERTER_NAME_LENGTH*400)
#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
#define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
#define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
typedef enum {
UCNV_UNSUPPORTED_CONVERTER = -1,
@ -61,12 +61,10 @@ typedef enum {
UCNV_EBCDIC_STATEFUL = 7,
UCNV_ISO_2022 = 8,
/* Number of converter types for which we have conversion routines. */
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9,
UCNV_JIS = 9,
UCNV_EUC = 10,
UCNV_GB = 11
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9
} UConverterType;
/* ### move the following typedef and array into implementation files! */
typedef struct
{
int32_t ccsid;
@ -121,95 +119,125 @@ typedef union
UConverterTable;
/*Defines the struct of a UConverterSharedData the immutable, shared part of
*UConverter
*/
typedef struct
{
uint32_t structSize; /* Size of this structure */
void *dataMemory;
uint32_t referenceCounter; /*used to count number of clients */
char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */
UConverterPlatform platform; /*platform of the converter (only IBM now) */
int32_t codepage; /*codepage # (now IBM-$codepage) */
UConverterType conversionType; /*conversion type */
int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */
int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */
struct
{ /*initial values of some members of the mutable part of object */
uint32_t toUnicodeStatus;
int8_t subCharLen;
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN];
}
defaultConverterValues;
UConverterTable *table; /*Pointer to conversion data */
}
UConverterSharedData;
/*Defines a UConverter, the lightweight mutable part the user sees */
U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
itself is compiled under C++, the linkage of the funcptrs will
work.
*/
*/
struct UConverter
{
int32_t toUnicodeStatus; /*Used to internalize stream status information */
int32_t fromUnicodeStatus;
struct UConverterImpl;
typedef struct UConverterImpl UConverterImpl;
/* ###
* Markus Scherer on 2000feb04:
* I have change UConverter and UConverterSharedData; there may be more changes,
* or we may decide to roll back the structure definitions to what they were
* before, with the additional UConverterImpl field and the new semantics for
* referenceCounter.
*
* Reasons for changes: Attempt at performance improvements, especially
* a) decrease amount of internal, implicit padding by reordering the fields
* b) save space by storing the internal name of the converter only with a
* pointer instead of an array
*
* In addition to that, I added the UConverterImpl field for better
* modularizing the code and making it more maintainable. It may actually
* become slightly faster by doing this.
*
* I changed the UConverter.to|fromUnicodeStatus to be unsigned because
* the defaultValues.toUnicodeStatus is unsigned, and it seemed to be a safer choice.
*
* Ultimately, I would prefer not to expose these definitions any more at all,
* but this is suspect to discussions, proposals and design reviews.
*
* I would personally like to see more information hiding (with helper APIs),
* useful state fields in UConverter that are reserved for the callbacks,
* and directly included structures instead of pointers to allocated
* memory, like for UConverterTable and its variant fields.
*/
/*
* Defines the UConverterSharedData struct,
* the immutable, shared part of UConverter.
*/
typedef struct {
uint32_t structSize; /* Size of this structure */
uint32_t referenceCounter; /* used to count number of clients, 0xffffffff for static SharedData */
const void *dataMemory; /* from udata_openChoice() */
UConverterTable *table; /* Pointer to conversion data */
const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */
const char *name; /* internal name of the converter */
int32_t codepage; /* codepage # (now IBM-$codepage) */
int8_t platform; /* platform of the converter (only IBM now) */
int8_t conversionType; /* conversion type */
int8_t minBytesPerChar; /* Minimum # bytes per char in this codepage */
int8_t maxBytesPerChar; /* Maximum # bytes per char in this codepage */
/*initial values of some members of the mutable part of object */
struct {
uint32_t toUnicodeStatus;
int8_t subCharLen;
uint8_t subChar[UCNV_MAX_SUBCHAR_LEN];
} defaultConverterValues;
} UConverterSharedData;
/* Defines a UConverter, the lightweight mutable part the user sees */
struct UConverter {
uint32_t toUnicodeStatus; /* Used to internalize stream status information */
uint32_t fromUnicodeStatus;
int32_t mode;
int8_t subCharLen; /* length of the codepage specific character sequence */
int8_t invalidCharLength;
int8_t invalidUCharLength;
int8_t pad;
int32_t mode;
int8_t subCharLen; /*length of the codepage specific character sequence */
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; /*codepage specific character sequence */
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store unicode data meant for
*output stream by the Error function pointers
*/
unsigned char charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store codepage data meant for
* output stream by the Error function pointers
*/
int8_t UCharErrorBufferLength; /*used to indicate the number of valid UChars
*in charErrorBuffer
*/
int8_t charErrorBufferLength; /*used to indicate the number of valid bytes
*in charErrorBuffer
*/
int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */
int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */
uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* codepage specific character sequence */
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN];
uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */
UChar invalidUCharBuffer[3];
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN];
/*Error function pointer called when conversion issues
*occur during a T_UConverter_fromUnicode call
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */
/*
* Error function pointer called when conversion issues
* occur during a T_UConverter_fromUnicode call
*/
void (*fromUCharErrorBehaviour) (struct UConverter *,
char **,
const char *,
const UChar **,
const UChar *,
int32_t* offsets,
bool_t,
UErrorCode *);
/*Error function pointer called when conversion issues
*occur during a T_UConverter_toUnicode call
char **,
const char *,
const UChar **,
const UChar *,
int32_t* offsets,
bool_t,
UErrorCode *);
/*
* Error function pointer called when conversion issues
* occur during a T_UConverter_toUnicode call
*/
void (*fromCharErrorBehaviour) (struct UConverter *,
UChar **,
const UChar *,
const char **,
const char *,
int32_t* offsets,
bool_t,
UErrorCode *);
UChar **,
const UChar *,
const char **,
const char *,
int32_t* offsets,
bool_t,
UErrorCode *);
UConverterSharedData *sharedData; /*Pointer to the shared immutable part of the
*converter object
*/
void *extraInfo; /*currently only used to point to a struct containing UConverter used by iso 2022
Could be used by clients writing their own call back function to
pass context to them
*/
};
UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */
/*
* currently only used to point to a struct containing UConverter used by iso 2022;
* could be used by clients writing their own call back function to pass context to them
*/
void *extraInfo;
};
U_CDECL_END /* end of UConverter */
@ -219,7 +247,7 @@ typedef struct UConverter UConverter;
typedef struct
{
UConverter *currentConverter;
unsigned char escSeq2022[10];
uint8_t escSeq2022[10];
int8_t escSeq2022Length;
}
UConverterDataISO2022;

View file

@ -28,28 +28,115 @@
#include "unewdata.h"
#include "ucmpwrit.h"
/*Defines the struct of a UConverterSharedData the immutable, shared part of
*UConverter -
* This is the definition from ICU 1.4, necessary to read converter data
* version 1 because the structure is directly embedded in the data.
* See udata.html for why this is bad (pointers, enums, padding...).
*/
typedef struct
{
uint32_t structSize; /* Size of this structure */
void *dataMemory;
uint32_t referenceCounter; /*used to count number of clients */
char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */
UConverterPlatform platform; /*platform of the converter (only IBM now) */
int32_t codepage; /*codepage # (now IBM-$codepage) */
UConverterType conversionType; /*conversion type */
int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */
int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */
struct
{ /*initial values of some members of the mutable part of object */
uint32_t toUnicodeStatus;
int8_t subCharLen;
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN];
}
defaultConverterValues;
UConverterTable *table; /*Pointer to conversion data */
}
UConverterSharedData_1_4;
struct UConverter_1_4
{
int32_t toUnicodeStatus; /*Used to internalize stream status information */
int32_t fromUnicodeStatus;
int8_t invalidCharLength;
int8_t invalidUCharLength;
int8_t pad;
int32_t mode;
int8_t subCharLen; /*length of the codepage specific character sequence */
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; /*codepage specific character sequence */
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store unicode data meant for
*output stream by the Error function pointers
*/
unsigned char charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store codepage data meant for
* output stream by the Error function pointers
*/
int8_t UCharErrorBufferLength; /*used to indicate the number of valid UChars
*in charErrorBuffer
*/
int8_t charErrorBufferLength; /*used to indicate the number of valid bytes
*in charErrorBuffer
*/
UChar invalidUCharBuffer[3];
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN];
/*Error function pointer called when conversion issues
*occur during a T_UConverter_fromUnicode call
*/
void (*fromUCharErrorBehaviour) (struct UConverter_1_4 *,
char **,
const char *,
const UChar **,
const UChar *,
int32_t* offsets,
bool_t,
UErrorCode *);
/*Error function pointer called when conversion issues
*occur during a T_UConverter_toUnicode call
*/
void (*fromCharErrorBehaviour) (struct UConverter_1_4 *,
UChar **,
const UChar *,
const char **,
const char *,
int32_t* offsets,
bool_t,
UErrorCode *);
UConverterSharedData_1_4 *sharedData; /*Pointer to the shared immutable part of the
*converter object
*/
void *extraInfo; /*currently only used to point to a struct containing UConverter_1_4 used by iso 2022
Could be used by clients writing their own call back function to
pass context to them
*/
};
typedef struct UConverter_1_4 UConverter_1_4;
/*Reads the header of the table file and fills in basic knowledge about the converter
*in "converter"
*/
static void readHeaderFromFile(UConverter* myConverter, FileStream* convFile, UErrorCode* err);
static void readHeaderFromFile(UConverter_1_4* myConverter, FileStream* convFile, UErrorCode* err);
/*Reads the rest of the file, and fills up the shared objects if necessary*/
static void loadMBCSTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err);
static void loadMBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err);
/*Reads the rest of the file, and fills up the shared objects if necessary*/
static void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err);
static void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err);
/*Reads the rest of the file, and fills up the shared objects if necessary*/
static void loadSBCSTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err);
static void loadSBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err);
/*Reads the rest of the file, and fills up the shared objects if necessary*/
static void loadDBCSTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err);
static void loadDBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err);
/* creates a UConverterSharedData from a mapping file, fills in necessary links to it the
/* creates a UConverterSharedData_1_4 from a mapping file, fills in necessary links to it the
* appropriate function pointers
* if the data tables are already in memory
*/
static UConverterSharedData* createConverterFromTableFile(const char* realName, UErrorCode* err);
static UConverterSharedData_1_4* createConverterFromTableFile(const char* realName, UErrorCode* err);
/*writes a CompactShortArray to a file*/
@ -60,11 +147,13 @@ static void writeCompactByteArrayToFile(FileStream* outfile, const CompactByteAr
/*writes a binary to a file*/
static void writeUConverterSharedDataToFile(const char* filename,
UConverterSharedData* mySharedData,
UConverterSharedData_1_4* mySharedData,
UErrorCode* err);
static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData* data);
static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData_1_4* data);
bool_t makeconv_deleteSharedConverterData(UConverterSharedData_1_4* deadSharedData);
static UConverterPlatform getPlatformFromName(char* name);
static int32_t getCodepageNumberFromName(char* name);
@ -158,7 +247,7 @@ static const UDataInfo dataInfo={
};
void writeConverterData(UConverterSharedData *mySharedData, const char *cName, UErrorCode *status)
void writeConverterData(UConverterSharedData_1_4 *mySharedData, const char *cName, UErrorCode *status)
{
UNewDataMemory *mem;
const char *cnvName, *cnvName2;
@ -192,7 +281,7 @@ void writeConverterData(UConverterSharedData *mySharedData, const char *cName, U
int main(int argc, char** argv)
{
UConverterSharedData* mySharedData = NULL;
UConverterSharedData_1_4* mySharedData = NULL;
UErrorCode err = U_ZERO_ERROR;
char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
char* dot = NULL, *arg;
@ -232,7 +321,7 @@ int main(int argc, char** argv)
{
/* writeUConverterSharedDataToFile(outFileName, mySharedData, &err); */
writeConverterData(mySharedData, cnvName, &err);
deleteSharedConverterData(mySharedData);
makeconv_deleteSharedConverterData(mySharedData);
if(U_FAILURE(err))
{
@ -289,7 +378,7 @@ int32_t getCodepageNumberFromName(char* name)
}
/*Reads the header of the table file and fills in basic knowledge about the converter in "converter"*/
void readHeaderFromFile(UConverter* myConverter,
void readHeaderFromFile(UConverter_1_4* myConverter,
FileStream* convFile,
UErrorCode* err)
{
@ -412,7 +501,7 @@ void readHeaderFromFile(UConverter* myConverter,
void loadSBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err)
void loadSBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err)
{
char storageLine[UCNV_MAX_LINE_TEXT];
char* line = NULL;
@ -479,7 +568,7 @@ void loadSBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UError
return;
}
void loadMBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err)
void loadMBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err)
{
char storageLine[UCNV_MAX_LINE_TEXT];
char* line = NULL;
@ -560,7 +649,7 @@ void loadMBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UError
return;
}
void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err)
void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err)
{
char storageLine[UCNV_MAX_LINE_TEXT];
char* line = NULL;
@ -627,7 +716,7 @@ void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter* myConver
}
void loadDBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err)
void loadDBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err)
{
char storageLine[UCNV_MAX_LINE_TEXT];
char* line = NULL;
@ -688,7 +777,7 @@ void loadDBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UError
}
/*deletes the "shared" type object*/
bool_t deleteSharedConverterData(UConverterSharedData* deadSharedData)
bool_t makeconv_deleteSharedConverterData(UConverterSharedData_1_4* deadSharedData)
{
if (deadSharedData->conversionType == UCNV_SBCS)
{
@ -719,13 +808,13 @@ bool_t deleteSharedConverterData(UConverterSharedData* deadSharedData)
/*creates a UConverter, fills in necessary links to it the appropriate function pointers*/
UConverterSharedData* createConverterFromTableFile(const char* converterName, UErrorCode* err)
/*creates a UConverter_1_4, fills in necessary links to it the appropriate function pointers*/
UConverterSharedData_1_4* createConverterFromTableFile(const char* converterName, UErrorCode* err)
{
FileStream* convFile = NULL;
int32_t i = 0;
UConverterSharedData* mySharedData = NULL;
UConverter myConverter;
UConverterSharedData_1_4* mySharedData = NULL;
UConverter_1_4 myConverter;
if (U_FAILURE(*err)) return NULL;
@ -738,14 +827,14 @@ UConverterSharedData* createConverterFromTableFile(const char* converterName, UE
}
mySharedData = (UConverterSharedData*) uprv_malloc(sizeof(UConverterSharedData));
mySharedData = (UConverterSharedData_1_4*) uprv_malloc(sizeof(UConverterSharedData_1_4));
if (mySharedData == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
T_FileStream_close(convFile);
}
mySharedData->structSize = sizeof(UConverterSharedData);
mySharedData->structSize = sizeof(UConverterSharedData_1_4);
mySharedData->dataMemory = NULL; /* for init */
myConverter.sharedData = mySharedData;
@ -787,13 +876,13 @@ UConverterSharedData* createConverterFromTableFile(const char* converterName, UE
static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData* data)
static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData_1_4* data)
{
uint32_t size = 0;
udata_writeBlock(pData, data, sizeof(UConverterSharedData));
udata_writeBlock(pData, data, sizeof(UConverterSharedData_1_4));
size += sizeof(UConverterSharedData); /* Is 4-aligned- it ends with a pointer */
size += sizeof(UConverterSharedData_1_4); /* Is 4-aligned- it ends with a pointer */
switch (data->conversionType)
{