mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable)
X-SVN-Rev: 721
This commit is contained in:
parent
01a8d20f2e
commit
7f7b2d90f3
13 changed files with 3999 additions and 4174 deletions
|
@ -74,7 +74,8 @@ mutex.o normlzr.o putil.o rbcache.o resbund.o schriter.o scsu.o \
|
|||
uchar.o uchriter.o ucmp8.o ucmp16.o ucmp32.o ucnv.o ucnv_bld.o \
|
||||
ucnv_cnv.o ucnv_err.o ucnv_io.o uhash.o uloc.o unicode.o unistr.o \
|
||||
ures.o ustring.o rbread.o rbdata.o ubidi.o ubidiln.o \
|
||||
bidi.o uvector.o udata.o unames.o utf_impl.o
|
||||
bidi.o uvector.o udata.o unames.o utf_impl.o \
|
||||
ucnv_2022.o ucnv_utf.o ucnv_sbcs.o ucnv_mbcs.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
|
|
|
@ -211,6 +211,10 @@ SOURCE=.\ucnv.c
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv_2022.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv_bld.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
@ -227,6 +231,18 @@ SOURCE=.\ucnv_io.c
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv_mbcs.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv_sbcs.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv_utf.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\udata.c
|
||||
# ADD CPP /Ze
|
||||
# End Source File
|
||||
|
|
|
@ -61,8 +61,12 @@ UnicodeConverterCPP& UnicodeConverterCPP::operator=(const UnicodeConverterCPP&
|
|||
*Increments the assigner converter's ref count
|
||||
*/
|
||||
Mutex updateReferenceCounters;
|
||||
myUnicodeConverter->sharedData->referenceCounter--;
|
||||
that.myUnicodeConverter->sharedData->referenceCounter++;
|
||||
if (myUnicodeConverter->sharedData->referenceCounter != 0 && myUnicodeConverter->sharedData->referenceCounter != ~0) {
|
||||
myUnicodeConverter->sharedData->referenceCounter--;
|
||||
}
|
||||
if (that.myUnicodeConverter->sharedData->referenceCounter != ~0) {
|
||||
that.myUnicodeConverter->sharedData->referenceCounter++;
|
||||
}
|
||||
}
|
||||
|
||||
*myUnicodeConverter = *(that.myUnicodeConverter);
|
||||
|
@ -98,7 +102,9 @@ UnicodeConverterCPP::UnicodeConverterCPP(const UnicodeConverterCPP& that)
|
|||
myUnicodeConverter = new UConverter;
|
||||
{
|
||||
Mutex updateReferenceCounter;
|
||||
that.myUnicodeConverter->sharedData->referenceCounter++;
|
||||
if (that.myUnicodeConverter->sharedData->referenceCounter != ~0) {
|
||||
that.myUnicodeConverter->sharedData->referenceCounter++;
|
||||
}
|
||||
}
|
||||
*myUnicodeConverter = *(that.myUnicodeConverter);
|
||||
}
|
||||
|
|
|
@ -38,112 +38,6 @@
|
|||
static int32_t ucnv_getAmbiguousCCSID (const UConverter* cnv);
|
||||
/* Internal function : end */
|
||||
|
||||
typedef void (*T_ToUnicodeFunction) (UConverter *,
|
||||
UChar **,
|
||||
const UChar *,
|
||||
const char **,
|
||||
const char *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
typedef void (*T_FromUnicodeFunction) (UConverter *,
|
||||
char **,
|
||||
const char *,
|
||||
const UChar **,
|
||||
const UChar *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
typedef UChar (*T_GetNextUCharFunction) (UConverter *,
|
||||
const char **,
|
||||
const char *,
|
||||
UErrorCode *);
|
||||
|
||||
static T_ToUnicodeFunction TO_UNICODE_FUNCTIONS[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] =
|
||||
|
||||
{
|
||||
T_UConverter_toUnicode_SBCS,
|
||||
T_UConverter_toUnicode_DBCS,
|
||||
T_UConverter_toUnicode_MBCS,
|
||||
T_UConverter_toUnicode_LATIN_1,
|
||||
T_UConverter_toUnicode_UTF8,
|
||||
T_UConverter_toUnicode_UTF16_BE,
|
||||
T_UConverter_toUnicode_UTF16_LE,
|
||||
T_UConverter_toUnicode_EBCDIC_STATEFUL,
|
||||
T_UConverter_toUnicode_ISO_2022
|
||||
};
|
||||
|
||||
static T_ToUnicodeFunction TO_UNICODE_FUNCTIONS_OFFSETS_LOGIC[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] =
|
||||
|
||||
{
|
||||
NULL, /*UCNV_SBCS*/
|
||||
NULL, /*UCNV_DBCS*/
|
||||
T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC,
|
||||
NULL, /*UCNV_LATIN_1*/
|
||||
T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC,
|
||||
NULL, /*UTF16_BE*/
|
||||
NULL, /*UTF16_LE*/
|
||||
T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC,
|
||||
T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC
|
||||
};
|
||||
|
||||
static T_FromUnicodeFunction FROM_UNICODE_FUNCTIONS_OFFSETS_LOGIC[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] =
|
||||
|
||||
{
|
||||
NULL, /*UCNV_SBCS*/
|
||||
NULL, /*UCNV_DBCS*/
|
||||
T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC,
|
||||
NULL, /*UCNV_LATIN_1*/
|
||||
T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC,
|
||||
NULL, /*UTF16_BE*/
|
||||
NULL, /*UTF16_LE*/
|
||||
T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC,
|
||||
T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC
|
||||
};
|
||||
|
||||
static T_FromUnicodeFunction FROM_UNICODE_FUNCTIONS[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] =
|
||||
{
|
||||
T_UConverter_fromUnicode_SBCS,
|
||||
T_UConverter_fromUnicode_DBCS,
|
||||
T_UConverter_fromUnicode_MBCS,
|
||||
T_UConverter_fromUnicode_LATIN_1,
|
||||
T_UConverter_fromUnicode_UTF8,
|
||||
T_UConverter_fromUnicode_UTF16_BE,
|
||||
T_UConverter_fromUnicode_UTF16_LE,
|
||||
T_UConverter_fromUnicode_EBCDIC_STATEFUL,
|
||||
T_UConverter_fromUnicode_ISO_2022
|
||||
};
|
||||
|
||||
static T_GetNextUCharFunction GET_NEXT_UChar_FUNCTIONS[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] =
|
||||
{
|
||||
T_UConverter_getNextUChar_SBCS,
|
||||
T_UConverter_getNextUChar_DBCS,
|
||||
T_UConverter_getNextUChar_MBCS,
|
||||
T_UConverter_getNextUChar_LATIN_1,
|
||||
T_UConverter_getNextUChar_UTF8,
|
||||
T_UConverter_getNextUChar_UTF16_BE,
|
||||
T_UConverter_getNextUChar_UTF16_LE,
|
||||
T_UConverter_getNextUChar_EBCDIC_STATEFUL,
|
||||
T_UConverter_getNextUChar_ISO_2022
|
||||
};
|
||||
|
||||
void flushInternalUnicodeBuffer (UConverter * _this,
|
||||
UChar * myTarget,
|
||||
int32_t * myTargetIndex,
|
||||
int32_t targetLength,
|
||||
int32_t** offsets,
|
||||
UErrorCode * err);
|
||||
|
||||
void flushInternalCharBuffer (UConverter * _this,
|
||||
char *myTarget,
|
||||
int32_t * myTargetIndex,
|
||||
int32_t targetLength,
|
||||
int32_t** offsets,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
static void T_UConverter_fromCodepageToCodepage (UConverter * outConverter,
|
||||
UConverter * inConverter,
|
||||
char **target,
|
||||
|
@ -219,6 +113,7 @@ void ucnv_close (UConverter * converter)
|
|||
{
|
||||
if (converter == NULL)
|
||||
return;
|
||||
/* ### this cleanup would be cleaner in a function in UConverterImpl */
|
||||
if ((converter->sharedData->conversionType == UCNV_ISO_2022) &&
|
||||
(converter->mode == UCNV_SO))
|
||||
{
|
||||
|
@ -226,9 +121,13 @@ void ucnv_close (UConverter * converter)
|
|||
uprv_free (converter->extraInfo);
|
||||
}
|
||||
|
||||
umtx_lock (NULL);
|
||||
converter->sharedData->referenceCounter--;
|
||||
umtx_unlock (NULL);
|
||||
if (converter->sharedData->referenceCounter != ~0) {
|
||||
umtx_lock (NULL);
|
||||
if (converter->sharedData->referenceCounter != 0) {
|
||||
converter->sharedData->referenceCounter--;
|
||||
}
|
||||
umtx_unlock (NULL);
|
||||
}
|
||||
uprv_free (converter);
|
||||
|
||||
return;
|
||||
|
@ -597,8 +496,7 @@ void ucnv_fromUnicode (UConverter * _this,
|
|||
}
|
||||
default:
|
||||
{
|
||||
|
||||
FROM_UNICODE_FUNCTIONS_OFFSETS_LOGIC[(int) myConvType] (_this,
|
||||
_this->sharedData->impl->fromUnicodeWithOffsets(_this,
|
||||
target,
|
||||
targetLimit,
|
||||
source,
|
||||
|
@ -611,7 +509,7 @@ void ucnv_fromUnicode (UConverter * _this,
|
|||
};
|
||||
}
|
||||
/*calls the specific conversion routines */
|
||||
FROM_UNICODE_FUNCTIONS[(int)myConvType] (_this,
|
||||
_this->sharedData->impl->fromUnicode(_this,
|
||||
target,
|
||||
targetLimit,
|
||||
source,
|
||||
|
@ -687,8 +585,7 @@ void ucnv_toUnicode (UConverter * _this,
|
|||
}
|
||||
default:
|
||||
{
|
||||
|
||||
TO_UNICODE_FUNCTIONS_OFFSETS_LOGIC[(int) myConvType] (_this,
|
||||
_this->sharedData->impl->toUnicodeWithOffsets(_this,
|
||||
target,
|
||||
targetLimit,
|
||||
source,
|
||||
|
@ -701,7 +598,7 @@ void ucnv_toUnicode (UConverter * _this,
|
|||
};
|
||||
}
|
||||
/*calls the specific conversion routines */
|
||||
TO_UNICODE_FUNCTIONS[(int) myConvType] (_this,
|
||||
_this->sharedData->impl->toUnicode(_this,
|
||||
target,
|
||||
targetLimit,
|
||||
source,
|
||||
|
@ -959,7 +856,7 @@ UChar ucnv_getNextUChar (UConverter * converter,
|
|||
}
|
||||
/*calls the specific conversion routines */
|
||||
/*as dictated in a code review, avoids a switch statement */
|
||||
return GET_NEXT_UChar_FUNCTIONS[(int) (converter->sharedData->conversionType)] (converter,
|
||||
return converter->sharedData->impl->getNextUChar(converter,
|
||||
source,
|
||||
sourceLimit,
|
||||
err);
|
||||
|
|
1238
icu4c/source/common/ucnv_2022.c
Normal file
1238
icu4c/source/common/ucnv_2022.c
Normal file
File diff suppressed because it is too large
Load diff
|
@ -20,6 +20,7 @@
|
|||
#include "ucmp8.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "ucnv_imp.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/ucnv.h"
|
||||
|
@ -30,21 +31,29 @@
|
|||
|
||||
#include <stdio.h>
|
||||
|
||||
/*Array used to generate ALGORITHMIC_CONVERTERS_HASHTABLE
|
||||
*should ALWAYS BE EMPTY STRING TERMINATED.
|
||||
*/
|
||||
static const char *algorithmicConverterNames[] = {
|
||||
"LATIN_1",
|
||||
"UTF8",
|
||||
"UTF16_BigEndian",
|
||||
"UTF16_LittleEndian",
|
||||
"UTF16_PlatformEndian",
|
||||
"UTF16_OppositeEndian",
|
||||
"ISO_2022",
|
||||
"JIS",
|
||||
"EUC",
|
||||
"GB",
|
||||
""
|
||||
static const UConverterSharedData *
|
||||
converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
|
||||
&_SBCSData, &_DBCSData, &_MBCSData, &_Latin1Data,
|
||||
&_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_EBCDICStatefulData,
|
||||
&_ISO2022Data
|
||||
};
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
UConverterType type;
|
||||
} cnvNameType[] = {
|
||||
{ "LATIN_1", UCNV_LATIN_1 },
|
||||
{ "UTF8", UCNV_UTF8 },
|
||||
{ "UTF16_BigEndian", UCNV_UTF16_BigEndian },
|
||||
{ "UTF16_LittleEndian", UCNV_UTF16_LittleEndian },
|
||||
#if U_IS_BIG_ENDIAN
|
||||
{ "UTF16_PlatformEndian", UCNV_UTF16_BigEndian },
|
||||
{ "UTF16_OppositeEndian", UCNV_UTF16_LittleEndian },
|
||||
#else
|
||||
{ "UTF16_PlatformEndian", UCNV_UTF16_LittleEndian },
|
||||
{ "UTF16_OppositeEndian", UCNV_UTF16_BigEndian},
|
||||
#endif
|
||||
{ "ISO_2022", UCNV_ISO_2022 }
|
||||
};
|
||||
|
||||
/*Takes an alias name gets an actual converter file name
|
||||
|
@ -52,7 +61,6 @@ static const char *algorithmicConverterNames[] = {
|
|||
*allocates the memory and returns a new UConverter object
|
||||
*/
|
||||
static UConverter *createConverterFromFile (const char *converterName, UErrorCode * err);
|
||||
static UConverter *createConverterFromAlgorithmicType (const char *realName, UErrorCode * err);
|
||||
|
||||
/*Given a file returns a newly allocated CompactByteArray based on the a serialized one */
|
||||
static CompactByteArray *createCompactByteArrayFromFile (FileStream * infile, UErrorCode * err);
|
||||
|
@ -70,7 +78,7 @@ static CompactShortArray *createCompactShortArrayFromFile (FileStream * infile,
|
|||
static UConverterPlatform getPlatformFromName (char *name);
|
||||
static int32_t getCodepageNumberFromName (char *name);
|
||||
|
||||
static UConverterType getAlgorithmicTypeFromName (const char *realName);
|
||||
static const UConverterSharedData *getAlgorithmicTypeFromName (const char *realName);
|
||||
|
||||
|
||||
/*these functions initialize the lightweight mutable part of the
|
||||
|
@ -84,10 +92,38 @@ static void initializeAlgorithmicConverter (UConverter * myConverter);
|
|||
|
||||
static int32_t uhash_hashSharedData (void *sharedData);
|
||||
|
||||
/*Defines the struct of a UConverterSharedData the immutable, shared part of
|
||||
*UConverter -
|
||||
* This is the definition from ICU 1.4, necessary to read converter data
|
||||
* version 1 because the structure is directly embedded in the data.
|
||||
* See udata.html for why this is bad (pointers, enums, padding...).
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint32_t structSize; /* Size of this structure */
|
||||
void *dataMemory;
|
||||
uint32_t referenceCounter; /*used to count number of clients */
|
||||
char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */
|
||||
UConverterPlatform platform; /*platform of the converter (only IBM now) */
|
||||
int32_t codepage; /*codepage # (now IBM-$codepage) */
|
||||
UConverterType conversionType; /*conversion type */
|
||||
int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */
|
||||
int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */
|
||||
struct
|
||||
{ /*initial values of some members of the mutable part of object */
|
||||
uint32_t toUnicodeStatus;
|
||||
int8_t subCharLen;
|
||||
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN];
|
||||
}
|
||||
defaultConverterValues;
|
||||
UConverterTable *table; /*Pointer to conversion data */
|
||||
}
|
||||
UConverterSharedData_1_4;
|
||||
|
||||
/**
|
||||
* Un flatten shared data from a UDATA..
|
||||
*/
|
||||
U_CAPI UConverterSharedData* U_EXPORT2 ucnv_data_unFlattenClone(const UConverterSharedData *data, UErrorCode *status);
|
||||
U_CAPI UConverterSharedData* U_EXPORT2 ucnv_data_unFlattenClone(const UConverterSharedData_1_4 *data, UErrorCode *status);
|
||||
|
||||
|
||||
/*initializes some global variables */
|
||||
|
@ -328,7 +364,7 @@ UConverter* createConverterFromFile (const char *fileName, UErrorCode * err)
|
|||
}
|
||||
|
||||
/* clone it. OK to drop the original sharedData */
|
||||
myConverter->sharedData = ucnv_data_unFlattenClone(myConverter->sharedData, err);
|
||||
myConverter->sharedData = ucnv_data_unFlattenClone((UConverterSharedData_1_4 *)myConverter->sharedData, err);
|
||||
|
||||
myConverter->sharedData->dataMemory = (void*)data; /* for future use */
|
||||
|
||||
|
@ -372,39 +408,16 @@ void
|
|||
|
||||
/*returns a converter type from a string
|
||||
*/
|
||||
UConverterType
|
||||
const UConverterSharedData *
|
||||
getAlgorithmicTypeFromName (const char *realName)
|
||||
{
|
||||
if (uprv_strcmp (realName, "UTF8") == 0)
|
||||
return UCNV_UTF8;
|
||||
else if (uprv_strcmp (realName, "UTF16_BigEndian") == 0)
|
||||
return UCNV_UTF16_BigEndian;
|
||||
else if (uprv_strcmp (realName, "UTF16_LittleEndian") == 0)
|
||||
return UCNV_UTF16_LittleEndian;
|
||||
else if (uprv_strcmp (realName, "LATIN_1") == 0)
|
||||
return UCNV_LATIN_1;
|
||||
else if (uprv_strcmp (realName, "JIS") == 0)
|
||||
return UCNV_JIS;
|
||||
else if (uprv_strcmp (realName, "EUC") == 0)
|
||||
return UCNV_EUC;
|
||||
else if (uprv_strcmp (realName, "GB") == 0)
|
||||
return UCNV_GB;
|
||||
else if (uprv_strcmp (realName, "ISO_2022") == 0)
|
||||
return UCNV_ISO_2022;
|
||||
else if (uprv_strcmp (realName, "UTF16_PlatformEndian") == 0)
|
||||
# if U_IS_BIG_ENDIAN
|
||||
return UCNV_UTF16_BigEndian;
|
||||
# else
|
||||
return UCNV_UTF16_LittleEndian;
|
||||
# endif
|
||||
else if (uprv_strcmp (realName, "UTF16_OppositeEndian") == 0)
|
||||
# if U_IS_BIG_ENDIAN
|
||||
return UCNV_UTF16_LittleEndian;
|
||||
# else
|
||||
return UCNV_UTF16_BigEndian;
|
||||
# endif
|
||||
else
|
||||
return UCNV_UNSUPPORTED_CONVERTER;
|
||||
int i;
|
||||
for(i=0; i<sizeof(cnvNameType)/sizeof(cnvNameType[0]); ++i) {
|
||||
if(uprv_strcmp(realName, cnvNameType[i].name)==0) {
|
||||
return converterData[cnvNameType[i].type];
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
@ -460,6 +473,7 @@ void shareConverterData (UConverterSharedData * data)
|
|||
|
||||
}
|
||||
umtx_lock (NULL);
|
||||
/* ### check to see if the element is not already there! */
|
||||
uhash_put(SHARED_DATA_HASHTABLE,
|
||||
data,
|
||||
&err);
|
||||
|
@ -482,6 +496,7 @@ UConverterSharedData *getSharedConverterData (const char *name)
|
|||
|
||||
/*frees the string of memory blocks associates with a sharedConverter
|
||||
*if and only if the referenceCounter == 0
|
||||
* ### this cleanup would be cleaner in a function in UConverterImpl
|
||||
*/
|
||||
bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
|
||||
{
|
||||
|
@ -510,7 +525,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
|
|||
{
|
||||
ucmp16_close (deadSharedData->table->mbcs.fromUnicode);
|
||||
ucmp16_close (deadSharedData->table->mbcs.toUnicode);
|
||||
uprv_free (deadSharedData->table);
|
||||
uprv_free (deadSharedData->table);
|
||||
};
|
||||
break;
|
||||
|
||||
|
@ -519,7 +534,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
|
|||
{
|
||||
ucmp16_close (deadSharedData->table->dbcs.fromUnicode);
|
||||
ucmp16_close (deadSharedData->table->dbcs.toUnicode);
|
||||
uprv_free (deadSharedData->table);
|
||||
uprv_free (deadSharedData->table);
|
||||
};
|
||||
break;
|
||||
|
||||
|
@ -537,55 +552,6 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
bool_t isDataBasedConverter (const char *name)
|
||||
{
|
||||
int32_t i = 0;
|
||||
bool_t result = FALSE;
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
|
||||
/*Lazy evaluates the hashtable */
|
||||
if (ALGORITHMIC_CONVERTERS_HASHTABLE == NULL)
|
||||
{
|
||||
UHashtable* myHT;
|
||||
|
||||
{
|
||||
myHT = uhash_open ((UHashFunction)uhash_hashIString, &err);
|
||||
|
||||
if (U_FAILURE (err)) return FALSE;
|
||||
while (algorithmicConverterNames[i][0] != '\0')
|
||||
{
|
||||
/*Stores in the hashtable a pointer to the statically init'ed array containing
|
||||
*the names
|
||||
*/
|
||||
|
||||
uhash_put (myHT,
|
||||
(void *) algorithmicConverterNames[i],
|
||||
&err);
|
||||
i++; /*Some Compilers (Solaris WSpro and MSVC-Release Mode
|
||||
*don't differentiate between i++ and ++i
|
||||
*so we have to increment in a line by itself
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
umtx_lock (NULL);
|
||||
if (ALGORITHMIC_CONVERTERS_HASHTABLE == NULL) ALGORITHMIC_CONVERTERS_HASHTABLE = myHT;
|
||||
else uhash_close(myHT);
|
||||
umtx_unlock (NULL);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (uhash_get (ALGORITHMIC_CONVERTERS_HASHTABLE,
|
||||
uhash_hashIString (name)) == NULL)
|
||||
{
|
||||
result = TRUE;
|
||||
}
|
||||
|
||||
|
||||
return result;
|
||||
}
|
||||
/*Logic determines if the converter is Algorithmic AND/OR cached
|
||||
*depending on that:
|
||||
* -we either go to get data from disk and cache it (Data=TRUE, Cached=False)
|
||||
|
@ -636,7 +602,8 @@ UConverter *
|
|||
}
|
||||
}
|
||||
|
||||
if (isDataBasedConverter (realName))
|
||||
mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName (realName);
|
||||
if (mySharedConverterData == NULL)
|
||||
{
|
||||
mySharedConverterData = getSharedConverterData (realName);
|
||||
|
||||
|
@ -652,8 +619,6 @@ UConverter *
|
|||
else
|
||||
{
|
||||
/*shared it with other library clients */
|
||||
|
||||
|
||||
shareConverterData (myUConverter->sharedData);
|
||||
return myUConverter;
|
||||
}
|
||||
|
@ -668,6 +633,7 @@ UConverter *
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* ### this is unsafe: the shared data could have been deleted since sharing or getting it - these operations should increase the counter! */
|
||||
/*update the reference counter: one more client */
|
||||
umtx_lock (NULL);
|
||||
mySharedConverterData->referenceCounter++;
|
||||
|
@ -681,45 +647,24 @@ UConverter *
|
|||
}
|
||||
else
|
||||
{
|
||||
/*with have an algorithmic converter */
|
||||
mySharedConverterData = getSharedConverterData (realName);
|
||||
|
||||
/*Non cached */
|
||||
if (mySharedConverterData == NULL)
|
||||
/* ### we have an algorithmic converter, it does not need to be cached?! */
|
||||
if (getSharedConverterData (realName) == NULL)
|
||||
{
|
||||
myUConverter = createConverterFromAlgorithmicType (realName, err);
|
||||
if (U_FAILURE (*err) || (myUConverter == NULL))
|
||||
{
|
||||
uprv_free (myUConverter);
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* put the shared object in shared table */
|
||||
shareConverterData (myUConverter->sharedData);
|
||||
return myUConverter;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
|
||||
if (myUConverter == NULL)
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*Increase the reference counter */
|
||||
umtx_lock (NULL);
|
||||
mySharedConverterData->referenceCounter++;
|
||||
umtx_unlock (NULL);
|
||||
|
||||
/*initializes the converter */
|
||||
myUConverter->sharedData = mySharedConverterData;
|
||||
initializeAlgorithmicConverter (myUConverter);
|
||||
return myUConverter;
|
||||
/* put the shared object in shared table */
|
||||
shareConverterData (mySharedConverterData);
|
||||
}
|
||||
|
||||
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
|
||||
if (myUConverter == NULL)
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*initializes the converter */
|
||||
uprv_memset(myUConverter, 0, sizeof(UConverter));
|
||||
myUConverter->sharedData = mySharedConverterData;
|
||||
initializeAlgorithmicConverter (myUConverter);
|
||||
return myUConverter;
|
||||
}
|
||||
|
||||
|
@ -751,21 +696,11 @@ void initializeDataConverter (UConverter * myUConverter)
|
|||
}
|
||||
|
||||
/* This function initializes algorithmic converters
|
||||
* based on there type
|
||||
* based on their type
|
||||
*/
|
||||
void
|
||||
initializeAlgorithmicConverter (UConverter * myConverter)
|
||||
{
|
||||
char UTF8_subChar[] = {(char) 0xFF, (char) 0xFF, (char) 0xFF};
|
||||
char UTF16BE_subChar[] = {(char) 0xFF, (char) 0xFD};
|
||||
char UTF16LE_subChar[] = {(char) 0xFD, (char) 0xFF};
|
||||
char EUC_subChar[] = {(char) 0xAF, (char) 0xFE};
|
||||
char GB_subChar[] = {(char) 0xFF, (char) 0xFF};
|
||||
char JIS_subChar[] = {(char) 0xFF, (char) 0xFF};
|
||||
char LATIN1_subChar = 0x1A;
|
||||
|
||||
|
||||
|
||||
myConverter->mode = UCNV_SI;
|
||||
myConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE;
|
||||
myConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE;
|
||||
|
@ -774,216 +709,79 @@ void
|
|||
|
||||
myConverter->extraInfo = NULL;
|
||||
|
||||
myConverter->fromUnicodeStatus = 0;
|
||||
myConverter->toUnicodeStatus = myConverter->sharedData->defaultConverterValues.toUnicodeStatus;
|
||||
myConverter->subCharLen = myConverter->sharedData->defaultConverterValues.subCharLen;
|
||||
uprv_memcpy (myConverter->subChar, myConverter->sharedData->defaultConverterValues.subChar, UCNV_MAX_SUBCHAR_LEN);
|
||||
|
||||
/* ### it would be cleaner to have the following in a function in UConverterImpl, with a UErrorCode */
|
||||
switch (myConverter->sharedData->conversionType)
|
||||
{
|
||||
case UCNV_UTF8:
|
||||
{
|
||||
myConverter->sharedData->minBytesPerChar = 1;
|
||||
myConverter->sharedData->maxBytesPerChar = 4;
|
||||
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
||||
myConverter->sharedData->defaultConverterValues.subCharLen = 3;
|
||||
myConverter->subCharLen = 3;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
myConverter->fromUnicodeStatus = 0; /* srl */
|
||||
myConverter->sharedData->platform = UCNV_IBM;
|
||||
myConverter->sharedData->codepage = 1208;
|
||||
uprv_strcpy(myConverter->sharedData->name, "UTF8");
|
||||
uprv_memcpy (myConverter->subChar, UTF8_subChar, 3);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF8_subChar, 3);
|
||||
|
||||
break;
|
||||
}
|
||||
case UCNV_LATIN_1:
|
||||
{
|
||||
myConverter->sharedData->minBytesPerChar = 1;
|
||||
myConverter->sharedData->maxBytesPerChar = 1;
|
||||
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
||||
myConverter->sharedData->defaultConverterValues.subCharLen = 1;
|
||||
myConverter->subCharLen = 1;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
myConverter->sharedData->platform = UCNV_IBM;
|
||||
myConverter->sharedData->codepage = 819;
|
||||
uprv_strcpy(myConverter->sharedData->name, "LATIN_1");
|
||||
*(myConverter->subChar) = LATIN1_subChar;
|
||||
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
|
||||
break;
|
||||
}
|
||||
|
||||
case UCNV_UTF16_BigEndian:
|
||||
{
|
||||
myConverter->sharedData->minBytesPerChar = 2;
|
||||
myConverter->sharedData->maxBytesPerChar = 2;
|
||||
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
||||
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
||||
myConverter->subCharLen = 2;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
myConverter->fromUnicodeStatus = 0;
|
||||
uprv_strcpy(myConverter->sharedData->name, "UTF_16BE");
|
||||
myConverter->sharedData->platform = UCNV_IBM;
|
||||
myConverter->sharedData->codepage = 1200;
|
||||
uprv_memcpy (myConverter->subChar, UTF16BE_subChar, 2);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16BE_subChar, 2);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case UCNV_UTF16_LittleEndian:
|
||||
{
|
||||
myConverter->sharedData->minBytesPerChar = 2;
|
||||
myConverter->sharedData->maxBytesPerChar = 2;
|
||||
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
||||
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
||||
myConverter->subCharLen = 2;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
myConverter->fromUnicodeStatus = 0;
|
||||
myConverter->sharedData->platform = UCNV_IBM;
|
||||
myConverter->sharedData->codepage = 1200;
|
||||
uprv_strcpy(myConverter->sharedData->name, "UTF_16LE");
|
||||
uprv_memcpy (myConverter->subChar, UTF16LE_subChar, 2);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16LE_subChar, 2);
|
||||
break;
|
||||
}
|
||||
case UCNV_EUC:
|
||||
{
|
||||
myConverter->sharedData->minBytesPerChar = 1;
|
||||
myConverter->sharedData->maxBytesPerChar = 2;
|
||||
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
||||
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
||||
myConverter->subCharLen = 2;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
uprv_memcpy (myConverter->subChar, EUC_subChar, 2);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, EUC_subChar, 2);
|
||||
break;
|
||||
}
|
||||
case UCNV_ISO_2022:
|
||||
{
|
||||
myConverter->charErrorBuffer[0] = 0x1b;
|
||||
myConverter->charErrorBuffer[1] = 0x25;
|
||||
myConverter->charErrorBuffer[2] = 0x42;
|
||||
myConverter->charErrorBufferLength = 3;
|
||||
myConverter->sharedData->minBytesPerChar = 1;
|
||||
myConverter->sharedData->maxBytesPerChar = 3;
|
||||
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
||||
myConverter->sharedData->defaultConverterValues.subCharLen = 1;
|
||||
myConverter->subCharLen = 1;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
myConverter->fromUnicodeStatus = 0; /* srl */
|
||||
myConverter->sharedData->codepage = 2022;
|
||||
uprv_strcpy(myConverter->sharedData->name, "ISO_2022");
|
||||
*(myConverter->subChar) = LATIN1_subChar;
|
||||
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
|
||||
myConverter->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
|
||||
/* ### check for extraInfo==NULL !! does this need to be allocated at all? */
|
||||
((UConverterDataISO2022 *) myConverter->extraInfo)->currentConverter = NULL;
|
||||
((UConverterDataISO2022 *) myConverter->extraInfo)->escSeq2022Length = 0;
|
||||
break;
|
||||
}
|
||||
case UCNV_GB:
|
||||
{
|
||||
myConverter->sharedData->minBytesPerChar = 2;
|
||||
myConverter->sharedData->maxBytesPerChar = 2;
|
||||
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
||||
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
||||
myConverter->subCharLen = 2;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
uprv_memcpy (myConverter->subChar, GB_subChar, 2);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, GB_subChar, 2);
|
||||
break;
|
||||
}
|
||||
case UCNV_JIS:
|
||||
{
|
||||
myConverter->sharedData->minBytesPerChar = 2;
|
||||
myConverter->sharedData->maxBytesPerChar = 2;
|
||||
myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0;
|
||||
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
||||
myConverter->subCharLen = 2;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
uprv_memcpy (myConverter->subChar, JIS_subChar, 2);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, JIS_subChar, 2);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
};
|
||||
|
||||
myConverter->toUnicodeStatus = myConverter->sharedData->defaultConverterValues.toUnicodeStatus;
|
||||
}
|
||||
|
||||
|
||||
/*This function creates an algorithmic converter
|
||||
*Note That even algorithmic converters are shared
|
||||
* (The UConverterSharedData->table == NULL since
|
||||
* there are no tables)
|
||||
*for uniformity of design and control flow
|
||||
*/
|
||||
UConverter *
|
||||
createConverterFromAlgorithmicType (const char *actualName, UErrorCode * err)
|
||||
{
|
||||
int32_t i = 0;
|
||||
UConverter *myConverter = NULL;
|
||||
UConverterSharedData *mySharedData = NULL;
|
||||
UConverterType myType = getAlgorithmicTypeFromName (actualName);
|
||||
|
||||
if (U_FAILURE (*err))
|
||||
return NULL;
|
||||
|
||||
myConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
|
||||
if (myConverter == NULL)
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
myConverter->sharedData = NULL;
|
||||
mySharedData = (UConverterSharedData *) uprv_malloc (sizeof (UConverterSharedData));
|
||||
if (mySharedData == NULL)
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
uprv_free (myConverter);
|
||||
return NULL;
|
||||
}
|
||||
mySharedData->structSize = sizeof(UConverterSharedData);
|
||||
mySharedData->table = NULL;
|
||||
mySharedData->dataMemory = NULL;
|
||||
uprv_strcpy (mySharedData->name, actualName);
|
||||
/*Initializes the referenceCounter to 1 */
|
||||
mySharedData->referenceCounter = 1;
|
||||
mySharedData->platform = UCNV_UNKNOWN;
|
||||
mySharedData->codepage = 0;
|
||||
mySharedData->conversionType = myType;
|
||||
myConverter->sharedData = mySharedData;
|
||||
|
||||
initializeAlgorithmicConverter (myConverter);
|
||||
return myConverter;
|
||||
}
|
||||
|
||||
|
||||
UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *source, UErrorCode *status)
|
||||
UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData_1_4 *source, UErrorCode *status)
|
||||
{
|
||||
const uint8_t *raw, *oldraw;
|
||||
UConverterSharedData *data = NULL;
|
||||
|
||||
UConverterType type = source->conversionType;
|
||||
|
||||
if(U_FAILURE(*status))
|
||||
return NULL;
|
||||
|
||||
if(source->structSize != sizeof(UConverterSharedData))
|
||||
if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES ||
|
||||
converterData[type]->referenceCounter != 1 ||
|
||||
source->structSize != sizeof(UConverterSharedData_1_4))
|
||||
{
|
||||
*status = U_INVALID_TABLE_FORMAT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
data = (UConverterSharedData*) malloc(sizeof(UConverterSharedData));
|
||||
raw = (uint8_t*)source;
|
||||
uprv_memcpy(data,source,sizeof(UConverterSharedData));
|
||||
|
||||
raw += data->structSize;
|
||||
data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData));
|
||||
if(data == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* data->table = (UConverterTable*)raw; */
|
||||
|
||||
/* copy initial values from the static structure for this type */
|
||||
uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData));
|
||||
|
||||
/* ### it would be much more efficient if the table were a direct member, not a pointer */
|
||||
data->table = (UConverterTable *)uprv_malloc(sizeof(UConverterTable));
|
||||
if(data->table == NULL) {
|
||||
uprv_free(data);
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* fill in fields from the loaded data */
|
||||
data->name = source->name; /* ### this could/should come from the caller - should be the same as the canonical name?!! */
|
||||
data->codepage = source->codepage;
|
||||
data->platform = source->platform;
|
||||
data->minBytesPerChar = source->minBytesPerChar;
|
||||
data->maxBytesPerChar = source->maxBytesPerChar;
|
||||
uprv_memcpy(&data->defaultConverterValues, &source->defaultConverterValues, sizeof(data->defaultConverterValues));
|
||||
|
||||
raw = (uint8_t*)source + source->structSize;
|
||||
|
||||
/* the checks above made sure that the type is valid for a data-based converter */
|
||||
switch (data->conversionType)
|
||||
{
|
||||
case UCNV_SBCS:
|
||||
data->table = malloc(sizeof(UConverterSBCSTable));
|
||||
data->table->sbcs.toUnicode = (UChar*)raw;
|
||||
raw += sizeof(UChar)*256;
|
||||
|
||||
|
@ -993,22 +791,20 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc
|
|||
|
||||
case UCNV_EBCDIC_STATEFUL:
|
||||
case UCNV_DBCS:
|
||||
data->table = uprv_malloc(sizeof(UConverterDBCSTable));
|
||||
|
||||
oldraw = raw;
|
||||
|
||||
data->table->dbcs.toUnicode=ucmp16_cloneFromData(&raw, status);
|
||||
|
||||
while((raw-oldraw)%4) /* pad to 4 */
|
||||
raw++;
|
||||
/* pad to 4 */
|
||||
if(((raw-oldraw)&3)!=0) {
|
||||
raw+=4-((raw-oldraw)&3);
|
||||
}
|
||||
|
||||
data->table->dbcs.fromUnicode =ucmp16_cloneFromData(&raw, status);
|
||||
|
||||
break;
|
||||
|
||||
case UCNV_MBCS:
|
||||
data->table = uprv_malloc(sizeof(UConverterMBCSTable));
|
||||
|
||||
data->table->mbcs.starters = (bool_t*)raw;
|
||||
raw += sizeof(bool_t)*256;
|
||||
|
||||
|
@ -1016,24 +812,15 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc
|
|||
|
||||
data->table->mbcs.toUnicode = ucmp16_cloneFromData(&raw, status);
|
||||
|
||||
while((raw-oldraw)%4) /* pad to 4 */
|
||||
raw++;
|
||||
/* pad to 4 */
|
||||
if(((raw-oldraw)&3)!=0) {
|
||||
raw+=4-((raw-oldraw)&3);
|
||||
}
|
||||
|
||||
data->table->mbcs.fromUnicode = ucmp16_cloneFromData(&raw, status);
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
*status = U_INVALID_TABLE_FORMAT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -15,286 +15,150 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
#define missingCharMarker 0xFFFF
|
||||
#define missingUCharMarker 0xFFFD
|
||||
|
||||
#define FromU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
|
||||
if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
|
||||
else \
|
||||
{ \
|
||||
char *myTargetCopy = myTarget + myTargetIndex; \
|
||||
const UChar *mySourceCopy = mySource + mySourceIndex; \
|
||||
/*copies current values for the ErrorFunctor to update */ \
|
||||
/*Calls the ErrorFunctor */ \
|
||||
_this->fromUCharErrorBehaviour (_this, \
|
||||
(char **) &myTargetCopy, \
|
||||
targetLimit, \
|
||||
(const UChar **) &mySourceCopy, \
|
||||
sourceLimit, \
|
||||
offsets, \
|
||||
flush, \
|
||||
err); \
|
||||
/*Update the local Indexes so that the conversion can restart at the right points */ \
|
||||
mySourceIndex = (mySourceCopy - mySource) ; \
|
||||
myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \
|
||||
}
|
||||
|
||||
#define ToU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
|
||||
if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \
|
||||
else \
|
||||
{ \
|
||||
UChar *myTargetCopy = myTarget + myTargetIndex; \
|
||||
const char *mySourceCopy = mySource + mySourceIndex; \
|
||||
/*Calls the ErrorFunctor */ \
|
||||
_this->fromCharErrorBehaviour (_this, \
|
||||
&myTargetCopy, \
|
||||
targetLimit, \
|
||||
(const char **) &mySourceCopy, \
|
||||
sourceLimit, \
|
||||
offsets, \
|
||||
flush, \
|
||||
err); \
|
||||
/*Update the local Indexes so that the conversion can restart at the right points */ \
|
||||
mySourceIndex = ((char*)mySourceCopy - (char*)mySource); \
|
||||
myTargetIndex = (myTargetCopy - myTarget); \
|
||||
}
|
||||
|
||||
#define FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
|
||||
if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
|
||||
else \
|
||||
{ \
|
||||
char *myTargetCopy = myTarget + myTargetIndex; \
|
||||
const UChar *mySourceCopy = mySource + mySourceIndex; \
|
||||
int32_t My_i = myTargetIndex; \
|
||||
/*copies current values for the ErrorFunctor to update */ \
|
||||
/*Calls the ErrorFunctor */ \
|
||||
_this->fromUCharErrorBehaviour (_this, \
|
||||
(char **) &myTargetCopy, \
|
||||
targetLimit, \
|
||||
(const UChar **) &mySourceCopy, \
|
||||
sourceLimit, \
|
||||
offsets + myTargetIndex, \
|
||||
flush, \
|
||||
err); \
|
||||
/*Update the local Indexes so that the conversion can restart at the right points */ \
|
||||
mySourceIndex = mySourceCopy - mySource ; \
|
||||
myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \
|
||||
for (;My_i < myTargetIndex;My_i++) offsets[My_i] += currentOffset ; \
|
||||
}
|
||||
|
||||
#define ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \
|
||||
if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \
|
||||
else \
|
||||
{ \
|
||||
UChar *myTargetCopy = myTarget + myTargetIndex; \
|
||||
const char *mySourceCopy = mySource + mySourceIndex; \
|
||||
int32_t My_i = myTargetIndex; \
|
||||
_this->fromCharErrorBehaviour (_this, \
|
||||
&myTargetCopy, \
|
||||
targetLimit, \
|
||||
(const char **) &mySourceCopy, \
|
||||
sourceLimit, \
|
||||
offsets + myTargetIndex, \
|
||||
flush, \
|
||||
err); \
|
||||
/*Update the local Indexes so that the conversion can restart at the right points */ \
|
||||
mySourceIndex = (char *)mySourceCopy - (char*)mySource; \
|
||||
myTargetIndex = ((UChar*)myTargetCopy - (UChar*)myTarget); \
|
||||
for (;My_i < myTargetIndex;My_i++) {offsets[My_i] += currentOffset ; } \
|
||||
}
|
||||
|
||||
typedef void (*T_ToUnicodeFunction) (UConverter *,
|
||||
UChar **,
|
||||
const UChar *,
|
||||
const char **,
|
||||
const char *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
typedef void (*T_FromUnicodeFunction) (UConverter *,
|
||||
char **,
|
||||
const char *,
|
||||
const UChar **,
|
||||
const UChar *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
typedef UChar (*T_GetNextUCharFunction) (UConverter *,
|
||||
const char **,
|
||||
const char *,
|
||||
UErrorCode *);
|
||||
|
||||
bool_t CONVERSION_U_SUCCESS (UErrorCode err);
|
||||
|
||||
void T_UConverter_toUnicode_SBCS (UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
void flushInternalUnicodeBuffer (UConverter * _this,
|
||||
UChar * myTarget,
|
||||
int32_t * myTargetIndex,
|
||||
int32_t targetLength,
|
||||
int32_t** offsets,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_fromUnicode_SBCS (UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
void flushInternalCharBuffer (UConverter * _this,
|
||||
char *myTarget,
|
||||
int32_t * myTargetIndex,
|
||||
int32_t targetLength,
|
||||
int32_t** offsets,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_toUnicode_MBCS (UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
struct UConverterImpl {
|
||||
UConverterType type;
|
||||
|
||||
void T_UConverter_fromUnicode_MBCS (UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
T_ToUnicodeFunction toUnicode;
|
||||
T_ToUnicodeFunction toUnicodeWithOffsets;
|
||||
T_FromUnicodeFunction fromUnicode;
|
||||
T_FromUnicodeFunction fromUnicodeWithOffsets;
|
||||
T_GetNextUCharFunction getNextUChar;
|
||||
};
|
||||
|
||||
void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
extern UConverterSharedData
|
||||
_SBCSData, _DBCSData, _MBCSData, _Latin1Data,
|
||||
_UTF8Data, _UTF16BEData, _UTF16LEData, _EBCDICStatefulData,
|
||||
_ISO2022Data;
|
||||
|
||||
void T_UConverter_toUnicode_DBCS (UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_fromUnicode_DBCS (UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_fromUnicode_UTF16_BE (UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_toUnicode_UTF16_BE (UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_fromUnicode_UTF16_LE (UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_toUnicode_EBCDIC_STATEFUL(UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_fromUnicode_EBCDIC_STATEFUL(UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC(UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC(UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_toUnicode_ISO_2022(UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_fromUnicode_ISO_2022(UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
void T_UConverter_toUnicode_UTF16_LE (UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_fromUnicode_UTF8 (UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_toUnicode_UTF8 (UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_fromUnicode_LATIN_1 (UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
void T_UConverter_toUnicode_LATIN_1 (UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
UChar T_UConverter_getNextUChar_LATIN_1 (UConverter * converter,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
UErrorCode * err);
|
||||
|
||||
UChar T_UConverter_getNextUChar_SBCS (UConverter * converter,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
UErrorCode * err);
|
||||
|
||||
UChar T_UConverter_getNextUChar_DBCS (UConverter * converter,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
UErrorCode * err);
|
||||
|
||||
UChar T_UConverter_getNextUChar_MBCS (UConverter * converter,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
UErrorCode * err);
|
||||
|
||||
UChar T_UConverter_getNextUChar_UTF8 (UConverter * converter,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
UErrorCode * err);
|
||||
|
||||
UChar T_UConverter_getNextUChar_UTF16_BE (UConverter * converter,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
UErrorCode * err);
|
||||
|
||||
UChar T_UConverter_getNextUChar_UTF16_LE (UConverter * converter,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
UChar T_UConverter_getNextUChar_EBCDIC_STATEFUL (UConverter * converter,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
UErrorCode * err);
|
||||
|
||||
UChar T_UConverter_getNextUChar_ISO_2022 (UConverter * converter,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
UErrorCode * err);
|
||||
U_CDECL_END
|
||||
|
||||
#endif /* UCNV_CNV */
|
||||
|
|
552
icu4c/source/common/ucnv_mbcs.c
Normal file
552
icu4c/source/common/ucnv_mbcs.c
Normal file
|
@ -0,0 +1,552 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_mbcs.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2000feb03
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "ucmp16.h"
|
||||
#include "ucmp8.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "ucnv_cnv.h"
|
||||
|
||||
/* MBCS --------------------------------------------------------------------- */
|
||||
|
||||
void T_UConverter_toUnicode_MBCS (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t *offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err)
|
||||
{
|
||||
const char *mySource = *source;
|
||||
UChar *myTarget = *target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = targetLimit - myTarget;
|
||||
int32_t sourceLength = sourceLimit - mySource;
|
||||
CompactShortArray *myToUnicode = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
UChar mySourceChar = 0x0000;
|
||||
bool_t *myStarters = NULL;
|
||||
|
||||
|
||||
|
||||
|
||||
myToUnicode = _this->sharedData->table->mbcs.toUnicode;
|
||||
myStarters = _this->sharedData->table->mbcs.starters;
|
||||
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
/*gets the corresponding UniChar */
|
||||
mySourceChar = (unsigned char) (mySource[mySourceIndex++]);
|
||||
|
||||
|
||||
if (myStarters[(uint8_t) mySourceChar] &&
|
||||
(_this->toUnicodeStatus == 0x00))
|
||||
{
|
||||
_this->toUnicodeStatus = (unsigned char) mySourceChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*In case there is a state, we update the source char
|
||||
*by concatenating the previous char with the current
|
||||
*one
|
||||
*/
|
||||
|
||||
if (_this->toUnicodeStatus != 0x00)
|
||||
{
|
||||
mySourceChar |= (UChar) (_this->toUnicodeStatus << 8);
|
||||
|
||||
_this->toUnicodeStatus = 0x00;
|
||||
}
|
||||
|
||||
/*gets the corresponding Unicode codepoint */
|
||||
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
||||
|
||||
/*writing the UniChar to the output stream */
|
||||
if (targetUniChar != missingUCharMarker)
|
||||
{
|
||||
myTarget[myTargetIndex++] = targetUniChar;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
if (mySourceChar > 0xff)
|
||||
{
|
||||
_this->invalidCharLength = 2;
|
||||
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
||||
_this->invalidCharBuffer[1] = (char) mySourceChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
_this->invalidCharLength = 1;
|
||||
_this->invalidCharBuffer[0] = (char) mySourceChar;
|
||||
}
|
||||
|
||||
ToU_CALLBACK_MACRO(_this,
|
||||
myTarget,
|
||||
myTargetIndex,
|
||||
targetLimit,
|
||||
mySource,
|
||||
mySourceIndex,
|
||||
sourceLimit,
|
||||
offsets,
|
||||
flush,
|
||||
err);
|
||||
|
||||
if (U_FAILURE (*err)) break;
|
||||
_this->invalidCharLength = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*If at the end of conversion we are still carrying state information
|
||||
*flush is TRUE, we can deduce that the input stream is truncated
|
||||
*/
|
||||
if (_this->toUnicodeStatus
|
||||
&& (mySourceIndex == sourceLength)
|
||||
&& (flush == TRUE))
|
||||
{
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
_this->toUnicodeStatus = 0x00;
|
||||
}
|
||||
}
|
||||
|
||||
*target += myTargetIndex;
|
||||
*source += mySourceIndex;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t *offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err)
|
||||
{
|
||||
const char *mySource = *source;
|
||||
UChar *myTarget = *target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = targetLimit - myTarget;
|
||||
int32_t sourceLength = sourceLimit - mySource;
|
||||
CompactShortArray *myToUnicode = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
UChar mySourceChar = 0x0000;
|
||||
UChar oldMySourceChar;
|
||||
bool_t *myStarters = NULL;
|
||||
int32_t* originalOffsets = offsets;
|
||||
|
||||
|
||||
|
||||
myToUnicode = _this->sharedData->table->mbcs.toUnicode;
|
||||
myStarters = _this->sharedData->table->mbcs.starters;
|
||||
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
/*gets the corresponding UniChar */
|
||||
mySourceChar = (unsigned char) (mySource[mySourceIndex++]);
|
||||
|
||||
|
||||
if (myStarters[(uint8_t) mySourceChar] &&
|
||||
(_this->toUnicodeStatus == 0x00))
|
||||
{
|
||||
_this->toUnicodeStatus = (unsigned char) mySourceChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*In case there is a state, we update the source char
|
||||
*by concatenating the previous char with the current
|
||||
*one
|
||||
*/
|
||||
|
||||
if (_this->toUnicodeStatus != 0x00)
|
||||
{
|
||||
mySourceChar |= (UChar) (_this->toUnicodeStatus << 8);
|
||||
|
||||
_this->toUnicodeStatus = 0x00;
|
||||
}
|
||||
|
||||
/*gets the corresponding Unicode codepoint */
|
||||
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
||||
|
||||
|
||||
/*writing the UniChar to the output stream */
|
||||
if (targetUniChar != missingUCharMarker)
|
||||
{
|
||||
/*writes the UniChar to the output stream */
|
||||
{
|
||||
|
||||
|
||||
if (targetUniChar > 0x00FF)
|
||||
offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */
|
||||
else
|
||||
offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */
|
||||
|
||||
|
||||
}
|
||||
myTarget[myTargetIndex++] = targetUniChar;
|
||||
oldMySourceChar = mySourceChar;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
int32_t currentOffset = offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1);
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
if (mySourceChar > 0xff)
|
||||
{
|
||||
_this->invalidCharLength = 2;
|
||||
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
||||
_this->invalidCharBuffer[1] = (char) mySourceChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
_this->invalidCharLength = 1;
|
||||
_this->invalidCharBuffer[0] = (char) mySourceChar;
|
||||
}
|
||||
|
||||
ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this,
|
||||
myTarget,
|
||||
myTargetIndex,
|
||||
targetLimit,
|
||||
mySource,
|
||||
mySourceIndex,
|
||||
sourceLimit,
|
||||
offsets,
|
||||
flush,
|
||||
err);
|
||||
|
||||
if (U_FAILURE (*err)) break;
|
||||
_this->invalidCharLength = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*If at the end of conversion we are still carrying state information
|
||||
*flush is TRUE, we can deduce that the input stream is truncated
|
||||
*/
|
||||
if (_this->toUnicodeStatus
|
||||
&& (mySourceIndex == sourceLength)
|
||||
&& (flush == TRUE))
|
||||
{
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
_this->toUnicodeStatus = 0x00;
|
||||
}
|
||||
}
|
||||
|
||||
*target += myTargetIndex;
|
||||
*source += mySourceIndex;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void T_UConverter_fromUnicode_MBCS (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t *offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err)
|
||||
|
||||
{
|
||||
const UChar *mySource = *source;
|
||||
char *myTarget = *target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = targetLimit - myTarget;
|
||||
int32_t sourceLength = sourceLimit - mySource;
|
||||
CompactShortArray *myFromUnicode = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
int8_t targetUniCharByteNum = 0;
|
||||
UChar mySourceChar = 0x0000;
|
||||
|
||||
myFromUnicode = _this->sharedData->table->mbcs.fromUnicode;
|
||||
|
||||
/*writing the char to the output stream */
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
mySourceChar = (UChar) mySource[mySourceIndex++];
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
||||
|
||||
|
||||
if (targetUniChar != missingCharMarker)
|
||||
{
|
||||
if (targetUniChar <= 0x00FF)
|
||||
{
|
||||
myTarget[myTargetIndex++] = (char) targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
myTarget[myTargetIndex++] = (char) targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
_this->charErrorBuffer[0] = (char) targetUniChar;
|
||||
_this->charErrorBufferLength = 1;
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
|
||||
_this->invalidUCharLength = 1;
|
||||
|
||||
FromU_CALLBACK_MACRO(_this,
|
||||
myTarget,
|
||||
myTargetIndex,
|
||||
targetLimit,
|
||||
mySource,
|
||||
mySourceIndex,
|
||||
sourceLimit,
|
||||
offsets,
|
||||
flush,
|
||||
err);
|
||||
|
||||
if (U_FAILURE (*err)) break;
|
||||
_this->invalidUCharLength = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
*target += myTargetIndex;
|
||||
*source += mySourceIndex;;
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t *offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err)
|
||||
|
||||
{
|
||||
const UChar *mySource = *source;
|
||||
char *myTarget = *target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = targetLimit - myTarget;
|
||||
int32_t sourceLength = sourceLimit - mySource;
|
||||
CompactShortArray *myFromUnicode = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
int8_t targetUniCharByteNum = 0;
|
||||
UChar mySourceChar = 0x0000;
|
||||
int32_t* originalOffsets = offsets;
|
||||
|
||||
myFromUnicode = _this->sharedData->table->mbcs.fromUnicode;
|
||||
|
||||
|
||||
|
||||
/*writing the char to the output stream */
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
mySourceChar = (UChar) mySource[mySourceIndex++];
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
||||
|
||||
if (targetUniChar != missingCharMarker)
|
||||
{
|
||||
if (targetUniChar <= 0x00FF)
|
||||
{
|
||||
offsets[myTargetIndex] = mySourceIndex-1;
|
||||
myTarget[myTargetIndex++] = (char) targetUniChar;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
offsets[myTargetIndex] = mySourceIndex-1;
|
||||
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
offsets[myTargetIndex] = mySourceIndex-1;
|
||||
myTarget[myTargetIndex++] = (char) targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
_this->charErrorBuffer[0] = (char) targetUniChar;
|
||||
_this->charErrorBufferLength = 1;
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int32_t currentOffset = mySourceIndex -1;
|
||||
int32_t* offsetsAnchor = offsets;
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
|
||||
_this->invalidUCharLength = 1;
|
||||
|
||||
FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this,
|
||||
myTarget,
|
||||
myTargetIndex,
|
||||
targetLimit,
|
||||
mySource,
|
||||
mySourceIndex,
|
||||
sourceLimit,
|
||||
offsets,
|
||||
flush,
|
||||
err);
|
||||
|
||||
if (U_FAILURE (*err)) break;
|
||||
_this->invalidUCharLength = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
*target += myTargetIndex;
|
||||
*source += mySourceIndex;;
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
UChar T_UConverter_getNextUChar_MBCS(UConverter* converter,
|
||||
const char** source,
|
||||
const char* sourceLimit,
|
||||
UErrorCode* err)
|
||||
{
|
||||
UChar myUChar;
|
||||
char const *sourceInitial = *source;
|
||||
/*safe keeps a ptr to the beginning in case we need to step back*/
|
||||
|
||||
/*Input boundary check*/
|
||||
if ((*source)+1 > sourceLimit)
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xFFFD;
|
||||
}
|
||||
|
||||
/*Checks to see if the byte is a lead*/
|
||||
if (converter->sharedData->table->mbcs.starters[(uint8_t)**source] == FALSE)
|
||||
{
|
||||
/*Not lead byte: we update the source ptr and get the codepoint*/
|
||||
myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode,
|
||||
(UChar)(**source));
|
||||
(*source)++;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*Lead byte: we Build the codepoint and get the corresponding character
|
||||
* and update the source ptr*/
|
||||
if ((*source + 2) > sourceLimit)
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
return 0xFFFD;
|
||||
}
|
||||
|
||||
myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode,
|
||||
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))));
|
||||
|
||||
(*source) += 2;
|
||||
}
|
||||
|
||||
if (myUChar != 0xFFFD) return myUChar;
|
||||
else
|
||||
{
|
||||
/*rewinds source*/
|
||||
const char* sourceFinal = *source;
|
||||
UChar* myUCharPtr = &myUChar;
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
*source = sourceInitial;
|
||||
|
||||
/*It's is very likely that the ErrorFunctor will write to the
|
||||
*internal buffers */
|
||||
converter->fromCharErrorBehaviour(converter,
|
||||
&myUCharPtr,
|
||||
myUCharPtr + 1,
|
||||
&sourceFinal,
|
||||
sourceLimit,
|
||||
NULL,
|
||||
TRUE,
|
||||
err);
|
||||
|
||||
/*makes the internal caching transparent to the user*/
|
||||
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
|
||||
|
||||
return myUChar;
|
||||
}
|
||||
}
|
||||
|
||||
static UConverterImpl _MBCSImpl={
|
||||
UCNV_MBCS,
|
||||
|
||||
T_UConverter_toUnicode_MBCS,
|
||||
T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC,
|
||||
T_UConverter_fromUnicode_MBCS,
|
||||
T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC,
|
||||
T_UConverter_getNextUChar_MBCS
|
||||
};
|
||||
|
||||
extern UConverterSharedData _MBCSData={
|
||||
sizeof(UConverterSharedData), 1,
|
||||
NULL, NULL, &_MBCSImpl, "MBCS",
|
||||
0, UCNV_IBM, UCNV_MBCS, 1, 1,
|
||||
{ 0, 1, 0, 0, 0, 0 }
|
||||
};
|
486
icu4c/source/common/ucnv_sbcs.c
Normal file
486
icu4c/source/common/ucnv_sbcs.c
Normal file
|
@ -0,0 +1,486 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_sbcs.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2000feb03
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "ucmp16.h"
|
||||
#include "ucmp8.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "ucnv_cnv.h"
|
||||
|
||||
/* SBCS --------------------------------------------------------------------- */
|
||||
|
||||
void T_UConverter_toUnicode_SBCS (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t *offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err)
|
||||
{
|
||||
char *mySource = (char *) *source;
|
||||
UChar *myTarget = *target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = targetLimit - myTarget;
|
||||
int32_t sourceLength = sourceLimit - (char *) mySource;
|
||||
UChar *myToUnicode = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
|
||||
myToUnicode = _this->sharedData->table->sbcs.toUnicode;
|
||||
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
|
||||
/*writing the UniChar to the output stream */
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
/*gets the corresponding UniChar */
|
||||
targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]];
|
||||
|
||||
if (targetUniChar != missingUCharMarker)
|
||||
{
|
||||
/* writes the UniChar to the output stream */
|
||||
myTarget[myTargetIndex++] = targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
_this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
|
||||
_this->invalidCharLength = 1;
|
||||
|
||||
ToU_CALLBACK_MACRO(_this,
|
||||
myTarget,
|
||||
myTargetIndex,
|
||||
targetLimit,
|
||||
mySource,
|
||||
mySourceIndex,
|
||||
sourceLimit,
|
||||
offsets,
|
||||
flush,
|
||||
err);
|
||||
|
||||
if (U_FAILURE (*err)) break;
|
||||
_this->invalidCharLength = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*target += myTargetIndex;
|
||||
*source += mySourceIndex;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void T_UConverter_fromUnicode_SBCS (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t *offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err)
|
||||
{
|
||||
const UChar *mySource = *source;
|
||||
unsigned char *myTarget = (unsigned char *) *target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = targetLimit - (char *) myTarget;
|
||||
int32_t sourceLength = sourceLimit - mySource;
|
||||
CompactByteArray *myFromUnicode;
|
||||
unsigned char targetChar = 0x00;
|
||||
|
||||
myFromUnicode = _this->sharedData->table->sbcs.fromUnicode;
|
||||
|
||||
/*writing the char to the output stream */
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
targetChar = ucmp8_getu (myFromUnicode, mySource[mySourceIndex]);
|
||||
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
mySourceIndex++;
|
||||
if (targetChar != 0 || !mySource[mySourceIndex - 1])
|
||||
{
|
||||
/*writes the char to the output stream */
|
||||
myTarget[myTargetIndex++] = targetChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
_this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
|
||||
_this->invalidUCharLength = 1;
|
||||
|
||||
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
|
||||
FromU_CALLBACK_MACRO(_this,
|
||||
(char *)myTarget,
|
||||
myTargetIndex,
|
||||
targetLimit,
|
||||
mySource,
|
||||
mySourceIndex,
|
||||
sourceLimit,
|
||||
offsets,
|
||||
flush,
|
||||
err);
|
||||
if (U_FAILURE (*err))
|
||||
{
|
||||
break;
|
||||
}
|
||||
_this->invalidUCharLength = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
*target += myTargetIndex;
|
||||
*source += mySourceIndex;
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
UChar T_UConverter_getNextUChar_SBCS(UConverter* converter,
|
||||
const char** source,
|
||||
const char* sourceLimit,
|
||||
UErrorCode* err)
|
||||
{
|
||||
UChar myUChar;
|
||||
|
||||
|
||||
if ((*source)+1 > sourceLimit)
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0xFFFD;
|
||||
}
|
||||
|
||||
|
||||
/*Gets the corresponding codepoint*/
|
||||
myUChar = converter->sharedData->table->sbcs.toUnicode[(unsigned char)*((*source)++)];
|
||||
|
||||
if (myUChar != 0xFFFD) return myUChar;
|
||||
else
|
||||
{
|
||||
UChar* myUCharPtr = &myUChar;
|
||||
const char* sourceFinal = *source;
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
|
||||
/*Calls the ErrorFunctor after rewinding the input buffer*/
|
||||
(*source)--;
|
||||
/*It's is very likely that the ErrorFunctor will write to the
|
||||
*internal buffers */
|
||||
converter->fromCharErrorBehaviour(converter,
|
||||
&myUCharPtr,
|
||||
myUCharPtr + 1,
|
||||
&sourceFinal,
|
||||
sourceLimit,
|
||||
NULL,
|
||||
TRUE,
|
||||
err);
|
||||
|
||||
/*makes the internal caching transparent to the user*/
|
||||
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
|
||||
|
||||
return myUChar;
|
||||
}
|
||||
}
|
||||
|
||||
static UConverterImpl _SBCSImpl={
|
||||
UCNV_SBCS,
|
||||
|
||||
T_UConverter_toUnicode_SBCS,
|
||||
NULL,
|
||||
T_UConverter_fromUnicode_SBCS,
|
||||
NULL,
|
||||
T_UConverter_getNextUChar_SBCS
|
||||
};
|
||||
|
||||
extern UConverterSharedData _SBCSData={
|
||||
sizeof(UConverterSharedData), 1,
|
||||
NULL, NULL, &_SBCSImpl, "SBCS",
|
||||
0, UCNV_IBM, UCNV_SBCS, 1, 1,
|
||||
{ 0, 1, 0, 0, 0, 0 }
|
||||
};
|
||||
|
||||
/* DBCS --------------------------------------------------------------------- */
|
||||
|
||||
void T_UConverter_toUnicode_DBCS (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t *offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err)
|
||||
{
|
||||
const char *mySource = ( char *) *source;
|
||||
UChar *myTarget = *target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = targetLimit - myTarget;
|
||||
int32_t sourceLength = sourceLimit - (char *) mySource;
|
||||
CompactShortArray *myToUnicode = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
UChar mySourceChar = 0x0000;
|
||||
|
||||
myToUnicode = _this->sharedData->table->dbcs.toUnicode;
|
||||
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
/*gets the corresponding UniChar */
|
||||
mySourceChar = (unsigned char) mySource[mySourceIndex++];
|
||||
|
||||
/*We have no internal state, we should */
|
||||
if (_this->toUnicodeStatus == 0x00)
|
||||
{
|
||||
_this->toUnicodeStatus = (unsigned char) mySourceChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (_this->toUnicodeStatus != 0x00)
|
||||
{
|
||||
mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | (mySourceChar & 0x00FF));
|
||||
_this->toUnicodeStatus = 0x00;
|
||||
}
|
||||
|
||||
targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);
|
||||
|
||||
/*writing the UniChar to the output stream */
|
||||
if (targetUniChar != missingUCharMarker)
|
||||
{
|
||||
/*writes the UniChar to the output stream */
|
||||
myTarget[myTargetIndex++] = targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
|
||||
_this->invalidCharBuffer[1] = (char) mySourceChar;
|
||||
_this->invalidCharLength = 2;
|
||||
|
||||
ToU_CALLBACK_MACRO(_this,
|
||||
myTarget,
|
||||
myTargetIndex,
|
||||
targetLimit,
|
||||
mySource,
|
||||
mySourceIndex,
|
||||
sourceLimit,
|
||||
offsets,
|
||||
flush,
|
||||
err);
|
||||
|
||||
if (U_FAILURE (*err)) break;
|
||||
_this->invalidCharLength = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*If at the end of conversion we are still carrying state information
|
||||
*flush is TRUE, we can deduce that the input stream is truncated
|
||||
*/
|
||||
if ((flush == TRUE)
|
||||
&& (mySourceIndex == sourceLength)
|
||||
&& (_this->toUnicodeStatus != 0x00))
|
||||
{
|
||||
|
||||
if (U_SUCCESS(*err))
|
||||
{
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
_this->toUnicodeStatus = 0x00;
|
||||
}
|
||||
}
|
||||
|
||||
*target += myTargetIndex;
|
||||
*source += mySourceIndex;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void T_UConverter_fromUnicode_DBCS (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t *offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err)
|
||||
{
|
||||
const UChar *mySource = *source;
|
||||
unsigned char *myTarget = (unsigned char *) *target;
|
||||
int32_t mySourceIndex = 0;
|
||||
int32_t myTargetIndex = 0;
|
||||
int32_t targetLength = targetLimit - (char *) myTarget;
|
||||
int32_t sourceLength = sourceLimit - mySource;
|
||||
CompactShortArray *myFromUnicode = NULL;
|
||||
UChar targetUniChar = 0x0000;
|
||||
UChar mySourceChar = 0x0000;
|
||||
|
||||
myFromUnicode = _this->sharedData->table->dbcs.fromUnicode;
|
||||
|
||||
/*writing the char to the output stream */
|
||||
while (mySourceIndex < sourceLength)
|
||||
{
|
||||
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
mySourceChar = (UChar) mySource[mySourceIndex++];
|
||||
|
||||
/*Gets the corresponding codepoint */
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
|
||||
if (targetUniChar != missingCharMarker)
|
||||
{
|
||||
/*writes the char to the output stream */
|
||||
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
|
||||
if (myTargetIndex < targetLength)
|
||||
{
|
||||
myTarget[myTargetIndex++] = (char) targetUniChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
_this->charErrorBuffer[0] = (char) targetUniChar;
|
||||
_this->charErrorBufferLength = 1;
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
|
||||
_this->invalidUCharLength = 1;
|
||||
|
||||
|
||||
/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */
|
||||
FromU_CALLBACK_MACRO(_this,
|
||||
(char *)myTarget,
|
||||
myTargetIndex,
|
||||
targetLimit,
|
||||
mySource,
|
||||
mySourceIndex,
|
||||
sourceLimit,
|
||||
offsets,
|
||||
flush,
|
||||
err);
|
||||
|
||||
if (U_FAILURE (*err)) break;
|
||||
_this->invalidUCharLength = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*target += myTargetIndex;
|
||||
*source += mySourceIndex;;
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
UChar T_UConverter_getNextUChar_DBCS(UConverter* converter,
|
||||
const char** source,
|
||||
const char* sourceLimit,
|
||||
UErrorCode* err)
|
||||
{
|
||||
UChar myUChar;
|
||||
|
||||
/*Checks boundaries and set appropriate error codes*/
|
||||
if ((*source)+2 > sourceLimit)
|
||||
{
|
||||
if ((*source) >= sourceLimit)
|
||||
{
|
||||
/*Either caller has reached the end of the byte stream*/
|
||||
*err = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
}
|
||||
else if (((*source)+1) == sourceLimit)
|
||||
{
|
||||
/* a character was cut in half*/
|
||||
*err = U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
|
||||
return 0xFFFD;
|
||||
}
|
||||
|
||||
/*Gets the corresponding codepoint*/
|
||||
myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode,
|
||||
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))));
|
||||
|
||||
/*update the input pointer*/
|
||||
*source += 2;
|
||||
if (myUChar != 0xFFFD) return myUChar;
|
||||
else
|
||||
{
|
||||
UChar* myUCharPtr = &myUChar;
|
||||
const char* sourceFinal = *source;
|
||||
|
||||
/*Calls the ErrorFunctor after rewinding the input buffer*/
|
||||
(*source) -= 2;
|
||||
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
|
||||
/*It's is very likely that the ErrorFunctor will write to the
|
||||
*internal buffers */
|
||||
converter->fromCharErrorBehaviour(converter,
|
||||
&myUCharPtr,
|
||||
myUCharPtr + 1,
|
||||
&sourceFinal,
|
||||
sourceLimit,
|
||||
NULL,
|
||||
TRUE,
|
||||
err);
|
||||
/*makes the internal caching transparent to the user*/
|
||||
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
|
||||
|
||||
return myUChar;
|
||||
}
|
||||
}
|
||||
|
||||
static UConverterImpl _DBCSImpl={
|
||||
UCNV_DBCS,
|
||||
|
||||
T_UConverter_toUnicode_DBCS,
|
||||
NULL,
|
||||
T_UConverter_fromUnicode_DBCS,
|
||||
NULL,
|
||||
T_UConverter_getNextUChar_DBCS
|
||||
};
|
||||
|
||||
extern UConverterSharedData _DBCSData={
|
||||
sizeof(UConverterSharedData), 1,
|
||||
NULL, NULL, &_DBCSImpl, "DBCS",
|
||||
0, UCNV_IBM, UCNV_DBCS, 2, 2,
|
||||
{ 0, 1, 0, 0, 0, 0 }
|
||||
};
|
1184
icu4c/source/common/ucnv_utf.c
Normal file
1184
icu4c/source/common/ucnv_utf.c
Normal file
File diff suppressed because it is too large
Load diff
|
@ -46,8 +46,8 @@ typedef struct _CompactByteArray CompactByteArray;
|
|||
/*Pointer to the aforementioned file */
|
||||
#define UCNV_MAX_LINE_TEXT (UCNV_MAX_CONVERTER_NAME_LENGTH*400)
|
||||
|
||||
#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
|
||||
#define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
|
||||
#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
|
||||
#define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
|
||||
|
||||
typedef enum {
|
||||
UCNV_UNSUPPORTED_CONVERTER = -1,
|
||||
|
@ -61,12 +61,10 @@ typedef enum {
|
|||
UCNV_EBCDIC_STATEFUL = 7,
|
||||
UCNV_ISO_2022 = 8,
|
||||
/* Number of converter types for which we have conversion routines. */
|
||||
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9,
|
||||
UCNV_JIS = 9,
|
||||
UCNV_EUC = 10,
|
||||
UCNV_GB = 11
|
||||
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9
|
||||
} UConverterType;
|
||||
|
||||
/* ### move the following typedef and array into implementation files! */
|
||||
typedef struct
|
||||
{
|
||||
int32_t ccsid;
|
||||
|
@ -121,95 +119,125 @@ typedef union
|
|||
UConverterTable;
|
||||
|
||||
|
||||
/*Defines the struct of a UConverterSharedData the immutable, shared part of
|
||||
*UConverter
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint32_t structSize; /* Size of this structure */
|
||||
void *dataMemory;
|
||||
uint32_t referenceCounter; /*used to count number of clients */
|
||||
char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */
|
||||
UConverterPlatform platform; /*platform of the converter (only IBM now) */
|
||||
int32_t codepage; /*codepage # (now IBM-$codepage) */
|
||||
UConverterType conversionType; /*conversion type */
|
||||
int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */
|
||||
int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */
|
||||
struct
|
||||
{ /*initial values of some members of the mutable part of object */
|
||||
uint32_t toUnicodeStatus;
|
||||
int8_t subCharLen;
|
||||
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN];
|
||||
}
|
||||
defaultConverterValues;
|
||||
UConverterTable *table; /*Pointer to conversion data */
|
||||
}
|
||||
UConverterSharedData;
|
||||
|
||||
|
||||
/*Defines a UConverter, the lightweight mutable part the user sees */
|
||||
|
||||
U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
|
||||
itself is compiled under C++, the linkage of the funcptrs will
|
||||
work.
|
||||
*/
|
||||
*/
|
||||
|
||||
struct UConverter
|
||||
{
|
||||
int32_t toUnicodeStatus; /*Used to internalize stream status information */
|
||||
int32_t fromUnicodeStatus;
|
||||
struct UConverterImpl;
|
||||
typedef struct UConverterImpl UConverterImpl;
|
||||
|
||||
/* ###
|
||||
* Markus Scherer on 2000feb04:
|
||||
* I have change UConverter and UConverterSharedData; there may be more changes,
|
||||
* or we may decide to roll back the structure definitions to what they were
|
||||
* before, with the additional UConverterImpl field and the new semantics for
|
||||
* referenceCounter.
|
||||
*
|
||||
* Reasons for changes: Attempt at performance improvements, especially
|
||||
* a) decrease amount of internal, implicit padding by reordering the fields
|
||||
* b) save space by storing the internal name of the converter only with a
|
||||
* pointer instead of an array
|
||||
*
|
||||
* In addition to that, I added the UConverterImpl field for better
|
||||
* modularizing the code and making it more maintainable. It may actually
|
||||
* become slightly faster by doing this.
|
||||
*
|
||||
* I changed the UConverter.to|fromUnicodeStatus to be unsigned because
|
||||
* the defaultValues.toUnicodeStatus is unsigned, and it seemed to be a safer choice.
|
||||
*
|
||||
* Ultimately, I would prefer not to expose these definitions any more at all,
|
||||
* but this is suspect to discussions, proposals and design reviews.
|
||||
*
|
||||
* I would personally like to see more information hiding (with helper APIs),
|
||||
* useful state fields in UConverter that are reserved for the callbacks,
|
||||
* and directly included structures instead of pointers to allocated
|
||||
* memory, like for UConverterTable and its variant fields.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Defines the UConverterSharedData struct,
|
||||
* the immutable, shared part of UConverter.
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t structSize; /* Size of this structure */
|
||||
uint32_t referenceCounter; /* used to count number of clients, 0xffffffff for static SharedData */
|
||||
|
||||
const void *dataMemory; /* from udata_openChoice() */
|
||||
UConverterTable *table; /* Pointer to conversion data */
|
||||
const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */
|
||||
const char *name; /* internal name of the converter */
|
||||
|
||||
int32_t codepage; /* codepage # (now IBM-$codepage) */
|
||||
|
||||
int8_t platform; /* platform of the converter (only IBM now) */
|
||||
int8_t conversionType; /* conversion type */
|
||||
|
||||
int8_t minBytesPerChar; /* Minimum # bytes per char in this codepage */
|
||||
int8_t maxBytesPerChar; /* Maximum # bytes per char in this codepage */
|
||||
|
||||
/*initial values of some members of the mutable part of object */
|
||||
struct {
|
||||
uint32_t toUnicodeStatus;
|
||||
int8_t subCharLen;
|
||||
uint8_t subChar[UCNV_MAX_SUBCHAR_LEN];
|
||||
} defaultConverterValues;
|
||||
} UConverterSharedData;
|
||||
|
||||
|
||||
/* Defines a UConverter, the lightweight mutable part the user sees */
|
||||
|
||||
struct UConverter {
|
||||
uint32_t toUnicodeStatus; /* Used to internalize stream status information */
|
||||
uint32_t fromUnicodeStatus;
|
||||
int32_t mode;
|
||||
|
||||
int8_t subCharLen; /* length of the codepage specific character sequence */
|
||||
int8_t invalidCharLength;
|
||||
int8_t invalidUCharLength;
|
||||
int8_t pad;
|
||||
int32_t mode;
|
||||
int8_t subCharLen; /*length of the codepage specific character sequence */
|
||||
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; /*codepage specific character sequence */
|
||||
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store unicode data meant for
|
||||
*output stream by the Error function pointers
|
||||
*/
|
||||
unsigned char charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store codepage data meant for
|
||||
* output stream by the Error function pointers
|
||||
*/
|
||||
int8_t UCharErrorBufferLength; /*used to indicate the number of valid UChars
|
||||
*in charErrorBuffer
|
||||
*/
|
||||
int8_t charErrorBufferLength; /*used to indicate the number of valid bytes
|
||||
*in charErrorBuffer
|
||||
*/
|
||||
int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */
|
||||
int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */
|
||||
|
||||
uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* codepage specific character sequence */
|
||||
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN];
|
||||
uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */
|
||||
|
||||
UChar invalidUCharBuffer[3];
|
||||
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN];
|
||||
/*Error function pointer called when conversion issues
|
||||
*occur during a T_UConverter_fromUnicode call
|
||||
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */
|
||||
|
||||
/*
|
||||
* Error function pointer called when conversion issues
|
||||
* occur during a T_UConverter_fromUnicode call
|
||||
*/
|
||||
void (*fromUCharErrorBehaviour) (struct UConverter *,
|
||||
char **,
|
||||
const char *,
|
||||
const UChar **,
|
||||
const UChar *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
/*Error function pointer called when conversion issues
|
||||
*occur during a T_UConverter_toUnicode call
|
||||
char **,
|
||||
const char *,
|
||||
const UChar **,
|
||||
const UChar *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
/*
|
||||
* Error function pointer called when conversion issues
|
||||
* occur during a T_UConverter_toUnicode call
|
||||
*/
|
||||
void (*fromCharErrorBehaviour) (struct UConverter *,
|
||||
UChar **,
|
||||
const UChar *,
|
||||
const char **,
|
||||
const char *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
UChar **,
|
||||
const UChar *,
|
||||
const char **,
|
||||
const char *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
UConverterSharedData *sharedData; /*Pointer to the shared immutable part of the
|
||||
*converter object
|
||||
*/
|
||||
void *extraInfo; /*currently only used to point to a struct containing UConverter used by iso 2022
|
||||
Could be used by clients writing their own call back function to
|
||||
pass context to them
|
||||
*/
|
||||
};
|
||||
UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */
|
||||
|
||||
/*
|
||||
* currently only used to point to a struct containing UConverter used by iso 2022;
|
||||
* could be used by clients writing their own call back function to pass context to them
|
||||
*/
|
||||
void *extraInfo;
|
||||
};
|
||||
|
||||
U_CDECL_END /* end of UConverter */
|
||||
|
||||
|
@ -219,7 +247,7 @@ typedef struct UConverter UConverter;
|
|||
typedef struct
|
||||
{
|
||||
UConverter *currentConverter;
|
||||
unsigned char escSeq2022[10];
|
||||
uint8_t escSeq2022[10];
|
||||
int8_t escSeq2022Length;
|
||||
}
|
||||
UConverterDataISO2022;
|
||||
|
|
|
@ -28,28 +28,115 @@
|
|||
#include "unewdata.h"
|
||||
#include "ucmpwrit.h"
|
||||
|
||||
/*Defines the struct of a UConverterSharedData the immutable, shared part of
|
||||
*UConverter -
|
||||
* This is the definition from ICU 1.4, necessary to read converter data
|
||||
* version 1 because the structure is directly embedded in the data.
|
||||
* See udata.html for why this is bad (pointers, enums, padding...).
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint32_t structSize; /* Size of this structure */
|
||||
void *dataMemory;
|
||||
uint32_t referenceCounter; /*used to count number of clients */
|
||||
char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */
|
||||
UConverterPlatform platform; /*platform of the converter (only IBM now) */
|
||||
int32_t codepage; /*codepage # (now IBM-$codepage) */
|
||||
UConverterType conversionType; /*conversion type */
|
||||
int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */
|
||||
int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */
|
||||
struct
|
||||
{ /*initial values of some members of the mutable part of object */
|
||||
uint32_t toUnicodeStatus;
|
||||
int8_t subCharLen;
|
||||
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN];
|
||||
}
|
||||
defaultConverterValues;
|
||||
UConverterTable *table; /*Pointer to conversion data */
|
||||
}
|
||||
UConverterSharedData_1_4;
|
||||
|
||||
struct UConverter_1_4
|
||||
{
|
||||
int32_t toUnicodeStatus; /*Used to internalize stream status information */
|
||||
int32_t fromUnicodeStatus;
|
||||
int8_t invalidCharLength;
|
||||
int8_t invalidUCharLength;
|
||||
int8_t pad;
|
||||
int32_t mode;
|
||||
int8_t subCharLen; /*length of the codepage specific character sequence */
|
||||
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; /*codepage specific character sequence */
|
||||
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store unicode data meant for
|
||||
*output stream by the Error function pointers
|
||||
*/
|
||||
unsigned char charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store codepage data meant for
|
||||
* output stream by the Error function pointers
|
||||
*/
|
||||
int8_t UCharErrorBufferLength; /*used to indicate the number of valid UChars
|
||||
*in charErrorBuffer
|
||||
*/
|
||||
int8_t charErrorBufferLength; /*used to indicate the number of valid bytes
|
||||
*in charErrorBuffer
|
||||
*/
|
||||
|
||||
UChar invalidUCharBuffer[3];
|
||||
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN];
|
||||
/*Error function pointer called when conversion issues
|
||||
*occur during a T_UConverter_fromUnicode call
|
||||
*/
|
||||
void (*fromUCharErrorBehaviour) (struct UConverter_1_4 *,
|
||||
char **,
|
||||
const char *,
|
||||
const UChar **,
|
||||
const UChar *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
/*Error function pointer called when conversion issues
|
||||
*occur during a T_UConverter_toUnicode call
|
||||
*/
|
||||
void (*fromCharErrorBehaviour) (struct UConverter_1_4 *,
|
||||
UChar **,
|
||||
const UChar *,
|
||||
const char **,
|
||||
const char *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
UConverterSharedData_1_4 *sharedData; /*Pointer to the shared immutable part of the
|
||||
*converter object
|
||||
*/
|
||||
void *extraInfo; /*currently only used to point to a struct containing UConverter_1_4 used by iso 2022
|
||||
Could be used by clients writing their own call back function to
|
||||
pass context to them
|
||||
*/
|
||||
};
|
||||
|
||||
typedef struct UConverter_1_4 UConverter_1_4;
|
||||
|
||||
/*Reads the header of the table file and fills in basic knowledge about the converter
|
||||
*in "converter"
|
||||
*/
|
||||
static void readHeaderFromFile(UConverter* myConverter, FileStream* convFile, UErrorCode* err);
|
||||
static void readHeaderFromFile(UConverter_1_4* myConverter, FileStream* convFile, UErrorCode* err);
|
||||
|
||||
/*Reads the rest of the file, and fills up the shared objects if necessary*/
|
||||
static void loadMBCSTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err);
|
||||
static void loadMBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err);
|
||||
|
||||
/*Reads the rest of the file, and fills up the shared objects if necessary*/
|
||||
static void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err);
|
||||
static void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err);
|
||||
|
||||
/*Reads the rest of the file, and fills up the shared objects if necessary*/
|
||||
static void loadSBCSTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err);
|
||||
static void loadSBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err);
|
||||
|
||||
/*Reads the rest of the file, and fills up the shared objects if necessary*/
|
||||
static void loadDBCSTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err);
|
||||
static void loadDBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err);
|
||||
|
||||
/* creates a UConverterSharedData from a mapping file, fills in necessary links to it the
|
||||
/* creates a UConverterSharedData_1_4 from a mapping file, fills in necessary links to it the
|
||||
* appropriate function pointers
|
||||
* if the data tables are already in memory
|
||||
*/
|
||||
static UConverterSharedData* createConverterFromTableFile(const char* realName, UErrorCode* err);
|
||||
static UConverterSharedData_1_4* createConverterFromTableFile(const char* realName, UErrorCode* err);
|
||||
|
||||
|
||||
/*writes a CompactShortArray to a file*/
|
||||
|
@ -60,11 +147,13 @@ static void writeCompactByteArrayToFile(FileStream* outfile, const CompactByteAr
|
|||
|
||||
/*writes a binary to a file*/
|
||||
static void writeUConverterSharedDataToFile(const char* filename,
|
||||
UConverterSharedData* mySharedData,
|
||||
UConverterSharedData_1_4* mySharedData,
|
||||
UErrorCode* err);
|
||||
|
||||
|
||||
static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData* data);
|
||||
static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData_1_4* data);
|
||||
|
||||
bool_t makeconv_deleteSharedConverterData(UConverterSharedData_1_4* deadSharedData);
|
||||
|
||||
static UConverterPlatform getPlatformFromName(char* name);
|
||||
static int32_t getCodepageNumberFromName(char* name);
|
||||
|
@ -158,7 +247,7 @@ static const UDataInfo dataInfo={
|
|||
};
|
||||
|
||||
|
||||
void writeConverterData(UConverterSharedData *mySharedData, const char *cName, UErrorCode *status)
|
||||
void writeConverterData(UConverterSharedData_1_4 *mySharedData, const char *cName, UErrorCode *status)
|
||||
{
|
||||
UNewDataMemory *mem;
|
||||
const char *cnvName, *cnvName2;
|
||||
|
@ -192,7 +281,7 @@ void writeConverterData(UConverterSharedData *mySharedData, const char *cName, U
|
|||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
UConverterSharedData* mySharedData = NULL;
|
||||
UConverterSharedData_1_4* mySharedData = NULL;
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
|
||||
char* dot = NULL, *arg;
|
||||
|
@ -232,7 +321,7 @@ int main(int argc, char** argv)
|
|||
{
|
||||
/* writeUConverterSharedDataToFile(outFileName, mySharedData, &err); */
|
||||
writeConverterData(mySharedData, cnvName, &err);
|
||||
deleteSharedConverterData(mySharedData);
|
||||
makeconv_deleteSharedConverterData(mySharedData);
|
||||
|
||||
if(U_FAILURE(err))
|
||||
{
|
||||
|
@ -289,7 +378,7 @@ int32_t getCodepageNumberFromName(char* name)
|
|||
}
|
||||
|
||||
/*Reads the header of the table file and fills in basic knowledge about the converter in "converter"*/
|
||||
void readHeaderFromFile(UConverter* myConverter,
|
||||
void readHeaderFromFile(UConverter_1_4* myConverter,
|
||||
FileStream* convFile,
|
||||
UErrorCode* err)
|
||||
{
|
||||
|
@ -412,7 +501,7 @@ void readHeaderFromFile(UConverter* myConverter,
|
|||
|
||||
|
||||
|
||||
void loadSBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err)
|
||||
void loadSBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err)
|
||||
{
|
||||
char storageLine[UCNV_MAX_LINE_TEXT];
|
||||
char* line = NULL;
|
||||
|
@ -479,7 +568,7 @@ void loadSBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UError
|
|||
return;
|
||||
}
|
||||
|
||||
void loadMBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err)
|
||||
void loadMBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err)
|
||||
{
|
||||
char storageLine[UCNV_MAX_LINE_TEXT];
|
||||
char* line = NULL;
|
||||
|
@ -560,7 +649,7 @@ void loadMBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UError
|
|||
return;
|
||||
}
|
||||
|
||||
void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err)
|
||||
void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err)
|
||||
{
|
||||
char storageLine[UCNV_MAX_LINE_TEXT];
|
||||
char* line = NULL;
|
||||
|
@ -627,7 +716,7 @@ void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter* myConver
|
|||
}
|
||||
|
||||
|
||||
void loadDBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err)
|
||||
void loadDBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err)
|
||||
{
|
||||
char storageLine[UCNV_MAX_LINE_TEXT];
|
||||
char* line = NULL;
|
||||
|
@ -688,7 +777,7 @@ void loadDBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UError
|
|||
}
|
||||
|
||||
/*deletes the "shared" type object*/
|
||||
bool_t deleteSharedConverterData(UConverterSharedData* deadSharedData)
|
||||
bool_t makeconv_deleteSharedConverterData(UConverterSharedData_1_4* deadSharedData)
|
||||
{
|
||||
if (deadSharedData->conversionType == UCNV_SBCS)
|
||||
{
|
||||
|
@ -719,13 +808,13 @@ bool_t deleteSharedConverterData(UConverterSharedData* deadSharedData)
|
|||
|
||||
|
||||
|
||||
/*creates a UConverter, fills in necessary links to it the appropriate function pointers*/
|
||||
UConverterSharedData* createConverterFromTableFile(const char* converterName, UErrorCode* err)
|
||||
/*creates a UConverter_1_4, fills in necessary links to it the appropriate function pointers*/
|
||||
UConverterSharedData_1_4* createConverterFromTableFile(const char* converterName, UErrorCode* err)
|
||||
{
|
||||
FileStream* convFile = NULL;
|
||||
int32_t i = 0;
|
||||
UConverterSharedData* mySharedData = NULL;
|
||||
UConverter myConverter;
|
||||
UConverterSharedData_1_4* mySharedData = NULL;
|
||||
UConverter_1_4 myConverter;
|
||||
|
||||
|
||||
if (U_FAILURE(*err)) return NULL;
|
||||
|
@ -738,14 +827,14 @@ UConverterSharedData* createConverterFromTableFile(const char* converterName, UE
|
|||
}
|
||||
|
||||
|
||||
mySharedData = (UConverterSharedData*) uprv_malloc(sizeof(UConverterSharedData));
|
||||
mySharedData = (UConverterSharedData_1_4*) uprv_malloc(sizeof(UConverterSharedData_1_4));
|
||||
if (mySharedData == NULL)
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
T_FileStream_close(convFile);
|
||||
}
|
||||
|
||||
mySharedData->structSize = sizeof(UConverterSharedData);
|
||||
mySharedData->structSize = sizeof(UConverterSharedData_1_4);
|
||||
mySharedData->dataMemory = NULL; /* for init */
|
||||
|
||||
myConverter.sharedData = mySharedData;
|
||||
|
@ -787,13 +876,13 @@ UConverterSharedData* createConverterFromTableFile(const char* converterName, UE
|
|||
|
||||
|
||||
|
||||
static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData* data)
|
||||
static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData_1_4* data)
|
||||
{
|
||||
uint32_t size = 0;
|
||||
|
||||
udata_writeBlock(pData, data, sizeof(UConverterSharedData));
|
||||
udata_writeBlock(pData, data, sizeof(UConverterSharedData_1_4));
|
||||
|
||||
size += sizeof(UConverterSharedData); /* Is 4-aligned- it ends with a pointer */
|
||||
size += sizeof(UConverterSharedData_1_4); /* Is 4-aligned- it ends with a pointer */
|
||||
|
||||
switch (data->conversionType)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue