ICU-43 Added fallback implementation to handle to/fromUnicode

mapping correctly.

X-SVN-Rev: 1359
This commit is contained in:
Helena Chapman 2000-05-12 19:59:03 +00:00
parent 6c92553260
commit 4a4bbe2373
14 changed files with 704 additions and 114 deletions

View file

@ -130,6 +130,20 @@ CompactShortArray* ucmp16_open(int16_t defaultValue)
return this_obj;
}
void ucmp16_initBogus(CompactShortArray *this_obj)
{
if (this_obj == NULL) return;
this_obj->fStructSize = sizeof(CompactShortArray);
this_obj->fCount = UCMP16_kUnicodeCount;
this_obj->fCompact = FALSE;
this_obj->fBogus = TRUE;
this_obj->fArray = NULL;
this_obj->fAlias = FALSE;
this_obj->fIndex = NULL;
this_obj->fHashes = NULL;
this_obj->fIAmOwned = TRUE;
this_obj->fDefaultValue = 0;
}
void ucmp16_init(CompactShortArray *this_obj, int16_t defaultValue)
{

View file

@ -70,7 +70,7 @@
* @see CompactIntArray
* @see CompactCharArray
* @see CompactStringArray
* @version $Revision: 1.9 $ 8/25/98
* @version $Revision: 1.10 $ 8/25/98
* @author Helena Shih
*/
@ -99,6 +99,7 @@ U_CAPI int32_t U_EXPORT2 ucmp16_getkBlockCount(void);
*/
U_CAPI CompactShortArray* U_EXPORT2 ucmp16_open(int16_t defaultValue);
U_CAPI void U_EXPORT2 ucmp16_init(CompactShortArray* array, int16_t defaultValue);
U_CAPI void U_EXPORT2 ucmp16_initBogus(CompactShortArray* array);
/**
* Construct a CompactShortArray from a pre-computed index and values array. The values

View file

@ -29,6 +29,22 @@ static int32_t findOverlappingPosition(CompactByteArray* this_obj,
int32_t ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;}
int32_t ucmp8_getkBlockCount() { return UCMP8_kBlockCount;}
void ucmp8_initBogus(CompactByteArray* array)
{
CompactByteArray* this_obj = array;
if (this_obj == NULL) return;
this_obj->fStructSize = sizeof(CompactByteArray);
this_obj->fArray = NULL;
this_obj->fIndex = NULL;
this_obj->fCount = UCMP8_kUnicodeCount;
this_obj->fCompact = FALSE;
this_obj->fBogus = TRUE;
this_obj->fAlias = FALSE;
this_obj->fIAmOwned = TRUE;
}
/* debug flags*/
/*=======================================================*/
void ucmp8_init(CompactByteArray* array, int8_t defaultValue)

View file

@ -51,6 +51,7 @@ typedef struct CompactByteArray {
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_open(int8_t defaultValue);
U_CAPI void U_EXPORT2 ucmp8_init(CompactByteArray* array, int8_t defaultValue);
U_CAPI void U_EXPORT2 ucmp8_initBogus(CompactByteArray* array);
U_CAPI CompactByteArray* U_EXPORT2 ucmp8_openAdopt(uint16_t* indexArray,
int8_t* newValues,

View file

@ -15,6 +15,7 @@
*
* Date Name Description
* 04/04/99 helena Fixed internal header inclusion.
* 05/09/00 helena Added implementation to handle fallback mappings.
*/
#include "umutex.h"
#include "unicode/ures.h"
@ -1113,3 +1114,13 @@ bool_t ucnv_isAmbiguous(const UConverter *cnv)
{
return (ucnv_getAmbiguousCCSID(cnv) == -1 ? FALSE : TRUE);
}
void ucnv_setFallback(UConverter *cnv, bool_t usesFallback)
{
cnv->useFallback = usesFallback;
}
bool_t ucnv_usesFallback(const UConverter *cnv)
{
return cnv->useFallback;
}

View file

@ -691,8 +691,8 @@ const UConverterStaticData _ISO2022StaticData={
sizeof(UConverterStaticData),
"ISO_2022",
2022, UCNV_IBM, UCNV_ISO_2022, 1, 4,
1, { 0x1a, 0, 0, 0 },
{ 0,0,0} /* reserved */
1, { 0x1a, 0, 0, 0 }, FALSE, FALSE,
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} /* reserved */
};

View file

@ -100,7 +100,7 @@ isCnvAcceptable(void *context,
pInfo->dataFormat[1]==0x6e &&
pInfo->dataFormat[2]==0x76 &&
pInfo->dataFormat[3]==0x74 &&
pInfo->formatVersion[0]==3;
pInfo->formatVersion[0]==4;
}
#define DATA_TYPE "cnv"

View file

@ -3,11 +3,16 @@
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* uconv_cnv.h:
* defines all the low level conversion functions
* T_UnicodeConverter_{to,from}Unicode_$ConversionType
*/
*
* uconv_cnv.h:
* defines all the low level conversion functions
* T_UnicodeConverter_{to,from}Unicode_$ConversionType
*
* Modification History:
*
* Date Name Description
* 05/09/00 helena Added implementation to handle fallback mappings.
*/
#ifndef UCNV_CNV_H
#define UCNV_CNV_H
@ -23,6 +28,8 @@ typedef struct
{
UChar *toUnicode; /* [256]; */
CompactByteArray fromUnicode;
UChar *toUnicodeFallback;
CompactByteArray fromUnicodeFallback;
}
UConverterSBCSTable;
@ -30,6 +37,8 @@ typedef struct
{
CompactShortArray toUnicode;
CompactShortArray fromUnicode;
CompactShortArray toUnicodeFallback;
CompactShortArray fromUnicodeFallback;
}
UConverterDBCSTable;
@ -38,6 +47,8 @@ typedef struct
bool_t *starters; /* [256]; */
CompactShortArray toUnicode;
CompactShortArray fromUnicode;
CompactShortArray toUnicodeFallback;
CompactShortArray fromUnicodeFallback;
}
UConverterMBCSTable;

View file

@ -10,6 +10,10 @@
*
* created on: 2000feb03
* created by: Markus W. Scherer
*
* Change history:
*
* 05/09/00 helena Added implementation to handle fallback mappings.
*/
#include "unicode/utypes.h"
@ -33,13 +37,33 @@ _MBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErr
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
oldraw = raw;
ucmp16_initFromData(&sharedData->table->mbcs.fromUnicode, &raw, pErrorCode);
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
{
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
oldraw = raw;
ucmp16_initFromData(&sharedData->table->mbcs.fromUnicodeFallback, &raw, pErrorCode);
}
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
{
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
ucmp16_initFromData(&sharedData->table->mbcs.toUnicodeFallback, &raw, pErrorCode);
}
}
static void
_MBCSUnload(UConverterSharedData *sharedData) {
ucmp16_close (&sharedData->table->mbcs.fromUnicode);
ucmp16_close (&sharedData->table->mbcs.toUnicode);
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
ucmp16_close (&sharedData->table->mbcs.fromUnicodeFallback);
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
ucmp16_close (&sharedData->table->mbcs.toUnicodeFallback);
uprv_free (sharedData->table);
}
@ -58,7 +82,7 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this,
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactShortArray *myToUnicode = NULL;
CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
bool_t *myStarters = NULL;
@ -67,6 +91,7 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this,
myToUnicode = &_this->sharedData->table->mbcs.toUnicode;
myToUnicodeFallback = &_this->sharedData->table->mbcs.toUnicodeFallback;
myStarters = _this->sharedData->table->mbcs.starters;
while (mySourceIndex < sourceLength)
@ -102,11 +127,19 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this,
/*writing the UniChar to the output stream */
if (targetUniChar != missingUCharMarker)
{
myTarget[myTargetIndex++] = targetUniChar;
myTarget[myTargetIndex++] = targetUniChar;
}
else
{
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
if (targetUniChar != missingUCharMarker)
{
myTarget[myTargetIndex++] = targetUniChar;
}
}
if (targetUniChar == missingUCharMarker)
{
*err = U_INVALID_CHAR_FOUND;
if (mySourceChar > 0xff)
{
@ -178,13 +211,14 @@ static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactShortArray *myToUnicode = NULL;
CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
UChar oldMySourceChar = 0x0000;
bool_t *myStarters = NULL;
myToUnicode = &_this->sharedData->table->mbcs.toUnicode;
myToUnicodeFallback = &_this->sharedData->table->mbcs.toUnicodeFallback;
myStarters = _this->sharedData->table->mbcs.starters;
while (mySourceIndex < sourceLength)
@ -236,9 +270,25 @@ static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
oldMySourceChar = mySourceChar;
}
else
{
int32_t currentOffset = offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1);
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu (myToUnicodeFallback, mySourceChar);
/*writes the UniChar to the output stream */
{
if (targetUniChar > 0x00FF)
offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */
else
offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */
}
myTarget[myTargetIndex++] = targetUniChar;
oldMySourceChar = mySourceChar;
}
if (targetUniChar == missingUCharMarker)
{
int32_t currentOffset = offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1);
*err = U_INVALID_CHAR_FOUND;
if (mySourceChar > 0xff)
@ -312,11 +362,12 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this,
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactShortArray *myFromUnicode = NULL;
CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
myFromUnicode = &_this->sharedData->table->mbcs.fromUnicode;
myFromUnicodeFallback = &_this->sharedData->table->mbcs.fromUnicodeFallback;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
@ -326,7 +377,6 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this,
mySourceChar = (UChar) mySource[mySourceIndex++];
targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar);
if (targetUniChar != missingCharMarker)
{
if (targetUniChar <= 0x00FF)
@ -347,8 +397,35 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this,
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
else
}
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasFromUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar);
if (targetUniChar != missingCharMarker)
{
if (targetUniChar <= 0x00FF)
{
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
if (myTargetIndex < targetLength)
{
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
}
if (targetUniChar == missingCharMarker)
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
@ -401,11 +478,12 @@ static void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactShortArray *myFromUnicode = NULL;
CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
myFromUnicode = &_this->sharedData->table->mbcs.fromUnicode;
myFromUnicodeFallback = &_this->sharedData->table->mbcs.fromUnicodeFallback;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
@ -440,7 +518,38 @@ static void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this,
}
}
}
else
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasFromUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar);
if (targetUniChar != missingCharMarker)
{
if (targetUniChar <= 0x00FF)
{
offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
if (myTargetIndex < targetLength)
{
offsets[myTargetIndex] = mySourceIndex-1;
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
}
if (targetUniChar == missingCharMarker)
{
int32_t currentOffset = mySourceIndex -1;
@ -501,6 +610,13 @@ static UChar32 T_UConverter_getNextUChar_MBCS(UConverter* converter,
/*Not lead byte: we update the source ptr and get the codepoint*/
myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicode),
(UChar)(**source));
if ((converter->useFallback == TRUE) &&
(converter->sharedData->staticData->hasToUnicodeFallback == TRUE) &&
(myUChar == 0xFFFD))
{
myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicodeFallback),
(UChar)(**source));
}
(*source)++;
}
else
@ -516,6 +632,13 @@ static UChar32 T_UConverter_getNextUChar_MBCS(UConverter* converter,
myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicode),
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))));
if ((converter->useFallback == TRUE) &&
(converter->sharedData->staticData->hasToUnicodeFallback == TRUE) &&
(myUChar == 0xFFFD))
{
myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicodeFallback),
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))));
}
(*source) += 2;
}

View file

@ -10,6 +10,10 @@
*
* created on: 2000feb03
* created by: Markus W. Scherer
*
* Change history:
*
* 05/09/00 helena Added implementation to handle fallback mappings.
*/
#include "unicode/utypes.h"
@ -24,14 +28,31 @@
static void
_SBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
const uint8_t *oldraw = raw;
sharedData->table->sbcs.toUnicode = (uint16_t*)raw;
raw += sizeof(uint16_t)*256;
raw += sizeof(uint16_t)*256; oldraw = raw;
ucmp8_initFromData(&sharedData->table->sbcs.fromUnicode, &raw, pErrorCode);
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
{
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
ucmp8_initFromData(&sharedData->table->sbcs.fromUnicodeFallback, &raw, pErrorCode);
}
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
{
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
sharedData->table->sbcs.toUnicodeFallback = (uint16_t*)raw;
}
}
static void
_SBCSUnload(UConverterSharedData *sharedData) {
ucmp8_close (&sharedData->table->sbcs.fromUnicode);
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
ucmp8_close (&sharedData->table->sbcs.fromUnicodeFallback);
uprv_free (sharedData->table);
}
@ -50,11 +71,11 @@ void T_UConverter_toUnicode_SBCS (UConverter * _this,
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
UChar *myToUnicode = NULL;
UChar *myToUnicode = NULL, *myToUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
myToUnicode = _this->sharedData->table->sbcs.toUnicode;
myToUnicodeFallback = _this->sharedData->table->sbcs.toUnicodeFallback;
while (mySourceIndex < sourceLength)
{
@ -71,23 +92,36 @@ void T_UConverter_toUnicode_SBCS (UConverter * _this,
}
else
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
_this->invalidCharLength = 1;
if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
/* Look up in the fallback table first */
targetUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
if (targetUniChar != missingUCharMarker)
{
myTarget[myTargetIndex++] = targetUniChar;
}
}
if (targetUniChar == missingUCharMarker)
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
_this->invalidCharLength = 1;
ToU_CALLBACK_MACRO(_this,
myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
ToU_CALLBACK_MACRO(_this,
myTarget,
myTargetIndex,
targetLimit,
mySource,
mySourceIndex,
sourceLimit,
offsets,
flush,
err);
if (U_FAILURE (*err)) break;
_this->invalidCharLength = 0;
}
}
}
else
@ -118,10 +152,11 @@ void T_UConverter_fromUnicode_SBCS (UConverter * _this,
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactByteArray *myFromUnicode;
CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
unsigned char targetChar = 0x00;
myFromUnicode = &_this->sharedData->table->sbcs.fromUnicode;
myFromUnicodeFallback = &_this->sharedData->table->sbcs.fromUnicodeFallback;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
@ -136,9 +171,19 @@ void T_UConverter_fromUnicode_SBCS (UConverter * _this,
/*writes the char to the output stream */
myTarget[myTargetIndex++] = targetChar;
}
else
{
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasFromUnicodeFallback == TRUE))
{
/* Look up in the fallback table first */
targetChar = ucmp8_getu (myFromUnicodeFallback, mySource[mySourceIndex-1]);
if (targetChar != 0 || !mySource[mySourceIndex - 1])
{
/*writes the char to the output stream */
myTarget[myTargetIndex++] = targetChar;
}
}
if (targetChar == 0 && !mySource[mySourceIndex-1])
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1];
_this->invalidUCharLength = 1;
@ -159,7 +204,7 @@ void T_UConverter_fromUnicode_SBCS (UConverter * _this,
break;
}
_this->invalidUCharLength = 0;
}
}
}
else
{
@ -199,7 +244,15 @@ UChar32 T_UConverter_getNextUChar_SBCS(UConverter* converter,
{
UChar* myUCharPtr = &myUChar;
const char* sourceFinal = *source;
/* Do the fallback stuff */
if ((converter->useFallback == TRUE)&&
(converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
myUChar = converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*((*source)-1)];
if (myUChar != 0xFFFD) return myUChar;
}
*err = U_INVALID_CHAR_FOUND;
/*Calls the ErrorFunctor after rewinding the input buffer*/
@ -261,13 +314,33 @@ _DBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErr
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
oldraw = raw;
ucmp16_initFromData(&sharedData->table->dbcs.fromUnicode, &raw, pErrorCode);
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
{
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
ucmp16_initFromData(&sharedData->table->dbcs.fromUnicodeFallback, &raw, pErrorCode);
oldraw = raw;
}
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
{
if(((raw-oldraw)&3)!=0) {
raw+=4-((raw-oldraw)&3); /* pad to 4 */
}
ucmp16_initFromData(&sharedData->table->dbcs.toUnicodeFallback, &raw, pErrorCode);
}
}
U_CFUNC void
_DBCSUnload(UConverterSharedData *sharedData) {
ucmp16_close (&sharedData->table->dbcs.fromUnicode);
ucmp16_close (&sharedData->table->dbcs.toUnicode);
if (sharedData->staticData->hasFromUnicodeFallback == TRUE)
ucmp16_close (&sharedData->table->dbcs.fromUnicodeFallback);
if (sharedData->staticData->hasToUnicodeFallback == TRUE)
ucmp16_close (&sharedData->table->dbcs.toUnicodeFallback);
uprv_free (sharedData->table);
}
@ -286,11 +359,12 @@ void T_UConverter_toUnicode_DBCS (UConverter * _this,
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - myTarget;
int32_t sourceLength = sourceLimit - (char *) mySource;
CompactShortArray *myToUnicode = NULL;
CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
myToUnicode = &_this->sharedData->table->dbcs.toUnicode;
myToUnicodeFallback = &_this->sharedData->table->dbcs.toUnicodeFallback;
while (mySourceIndex < sourceLength)
{
@ -320,8 +394,18 @@ void T_UConverter_toUnicode_DBCS (UConverter * _this,
/*writes the UniChar to the output stream */
myTarget[myTargetIndex++] = targetUniChar;
}
else
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
if (targetUniChar != missingUCharMarker)
{
myTarget[myTargetIndex++] = targetUniChar;
}
}
if (targetUniChar == missingUCharMarker)
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
_this->invalidCharBuffer[1] = (char) mySourceChar;
@ -386,11 +470,12 @@ void T_UConverter_fromUnicode_DBCS (UConverter * _this,
int32_t myTargetIndex = 0;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = sourceLimit - mySource;
CompactShortArray *myFromUnicode = NULL;
CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
UChar targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
myFromUnicode = &_this->sharedData->table->dbcs.fromUnicode;
myFromUnicodeFallback = &_this->sharedData->table->dbcs.fromUnicodeFallback;
/*writing the char to the output stream */
while (mySourceIndex < sourceLength)
@ -417,8 +502,29 @@ void T_UConverter_fromUnicode_DBCS (UConverter * _this,
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
else
{
else if ((_this->useFallback == TRUE) &&
(_this->sharedData->staticData->hasFromUnicodeFallback == TRUE))
{
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar);
if (targetUniChar != missingCharMarker)
{
/*writes the char to the output stream */
myTarget[myTargetIndex++] = (char) (targetUniChar >> 8);
if (myTargetIndex < targetLength)
{
myTarget[myTargetIndex++] = (char) targetUniChar;
}
else
{
_this->charErrorBuffer[0] = (char) targetUniChar;
_this->charErrorBufferLength = 1;
*err = U_INDEX_OUTOFBOUNDS_ERROR;
}
}
}
if (targetUniChar == missingCharMarker)
{
*err = U_INVALID_CHAR_FOUND;
_this->invalidUCharBuffer[0] = (UChar) mySourceChar;
_this->invalidUCharLength = 1;
@ -490,8 +596,20 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverter* converter,
UChar* myUCharPtr = &myUChar;
const char* sourceFinal = *source;
/*Calls the ErrorFunctor after rewinding the input buffer*/
/* rewinding the input buffer*/
(*source) -= 2;
/* Do the fallback stuff */
if ((converter->useFallback == TRUE) &&
(converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
{
myUChar = ucmp16_getu((&converter->sharedData->table->dbcs.toUnicodeFallback),
(uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)-1))));
if (myUChar != 0xFFFD)
{
*source += 2;
return myUChar;
}
}
*err = U_INVALID_CHAR_FOUND;

View file

@ -11,6 +11,7 @@
*
* Date Name Description
* 04/04/99 helena Fixed internal header inclusion.
* 05/11/00 helena Added setFallback and usesFallback APIs.
*/
/**
@ -65,8 +66,7 @@ U_CDECL_END
*/
U_CAPI
UConverter* U_EXPORT2 ucnv_open (const char *converterName,
UErrorCode * err);
UConverter* U_EXPORT2 ucnv_open (const char *converterName, UErrorCode * err);
/**
@ -83,7 +83,7 @@ UConverter* U_EXPORT2 ucnv_open (const char *converterName,
* @stable
*/
U_CAPI UConverter* U_EXPORT2 ucnv_openU (const UChar * name,
UErrorCode * err);
UErrorCode * err);
@ -102,7 +102,7 @@ U_CAPI UConverter* U_EXPORT2 ucnv_openU (const UChar * name,
U_CAPI UConverter* U_EXPORT2 ucnv_openCCSID (int32_t codepage,
UConverterPlatform platform,
UErrorCode * err);
UErrorCode * err);
/**
@ -692,6 +692,23 @@ U_CAPI void U_EXPORT2 ucnv_fixFileSeparator(const UConverter *cnv, UChar* source
*/
U_CAPI bool_t U_EXPORT2 ucnv_isAmbiguous(const UConverter *cnv);
/**
* Sets the converter to use fallback mapping or not.
* @param cnv The converter to set the fallback mapping usage for.
* @param usesFallback TRUE if the user wants the converter to take advantage of the fallback
* mapping, FALSE otherwise.
* @draft
*/
U_CAPI void U_EXPORT2 ucnv_setFallback(UConverter *cnv, bool_t usesFallback);
/**
* Determines if the converter uses fallback mappings or not.
* @return TRUE if the converter uses fallback, FALSE otherwise.
* @draft
*/
U_CAPI bool_t U_EXPORT2 ucnv_usesFallback(const UConverter *cnv);
#endif
/*_UCNV*/

View file

@ -153,8 +153,9 @@ typedef struct {
int8_t subCharLen;
uint8_t subChar[UCNV_MAX_SUBCHAR_LEN];
uint8_t reserved[3]; /* to round out the structure */
uint8_t hasToUnicodeFallback; /* bool_t needs to be changed to UBool to be consistent across platform */
uint8_t hasFromUnicodeFallback;
uint8_t reserved[19]; /* to round out the structure */
} UConverterStaticData;
@ -173,8 +174,8 @@ typedef struct {
bool_t staticDataOwned; /* T if we own the staticData */
const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */
/*initial values of some members of the mutable part of object */
uint32_t toUnicodeStatus;
/*initial values of some members of the mutable part of object */
uint32_t toUnicodeStatus;
} UConverterSharedData;
@ -184,6 +185,7 @@ struct UConverter {
uint32_t toUnicodeStatus; /* Used to internalize stream status information */
uint32_t fromUnicodeStatus;
int32_t mode;
bool_t useFallback;
int8_t subCharLen; /* length of the codepage specific character sequence */
int8_t invalidCharLength;

View file

@ -10,6 +10,8 @@
* makeconv.c:
* tool creating a binary (compressed) representation of the conversion mapping
* table (IBM NLTC ucmap format).
*
* 05/04/2000 helena Added fallback mapping into the picture...
*/
#include <stdio.h>
@ -92,6 +94,7 @@ static int32_t getCodepageNumberFromName(char* name);
static const char NLTC_SEPARATORS[9] = { '\r', '\n', '\t', ' ', '<', '>' ,'"' , 'U', '\0' };
static const char FALLBACK_SEPARATOR = '|';
static const char PLAIN_SEPARATORS[9] = { '\r', '\n', '\t', ' ', '<', '>' ,'"' , '\0' };
static const char CODEPOINT_SEPARATORS[8] = { '\r', '>', '\\', 'x', '\n', ' ', '\t', '\0' };
static const char UNICODE_CODEPOINT_SEPARATORS[6] = { '<', '>', 'U', ' ', '\t', '\0' };
@ -104,9 +107,18 @@ char *
removeComments (char *line)
{
char *pound = uprv_strchr (line, '#');
char *fallback = uprv_strchr(line, '|');
if (pound != NULL)
*pound = '\0';
{
if (fallback != NULL)
{
uprv_memset(pound, ' ', fallback-pound);
}
else
{
*pound = '\0';
}
}
return line;
}
@ -178,14 +190,17 @@ static const UDataInfo dataInfo={
0,
0x63, 0x6e, 0x76, 0x74, /* dataFormat="cnvt" */
3, 0, 0, 0, /* formatVersion */
1, 4, 2, 0 /* dataVersion */
4, 0, 0, 0, /* formatVersion */
1, 5, 0, 1 /* dataVersion */
};
void writeConverterData(UConverterSharedData *mySharedData, const char *cnvName, const char *cnvDir, UErrorCode *status)
void writeConverterData(UConverterSharedData *mySharedData,
const char *cnvName,
const char *cnvDir,
UErrorCode *status)
{
UNewDataMemory *mem;
UNewDataMemory *mem = U_NULL;
uint32_t sz2;
if(U_FAILURE(*status))
@ -556,19 +571,18 @@ void readHeaderFromFile(UConverterStaticData* myConverter,
return;
}
UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticData* myConverter, UErrorCode* err)
{
char storageLine[UCNV_MAX_LINE_TEXT];
char* line = NULL;
UConverterTable* myUConverterTable = NULL;
UChar unicodeValue = 0xFFFF;
int32_t sbcsCodepageValue = 0;
int32_t sbcsCodepageValue = 0, fallback = 0;
bool_t seenFallback = FALSE;
char codepointBytes[5];
unsigned char replacementChar = '\0';
int32_t i = 0;
CompactByteArray* myFromUnicode = NULL;
CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
if (U_FAILURE(*err)) return NULL;
@ -581,24 +595,29 @@ UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticDat
return NULL;
}
uprv_memset(myUConverterTable, 0, sizeof(UConverterSBCSTable));
myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = FALSE;
/*create a compact array with replacement chars as default chars*/
ucmp8_init(&myUConverterTable->sbcs.fromUnicode, 0);
myFromUnicode = &myUConverterTable->sbcs.fromUnicode;
if (myFromUnicode == NULL)
/*create a bogus compact array */
ucmp8_initBogus(&myUConverterTable->sbcs.fromUnicodeFallback);
myFromUnicodeFallback = &myUConverterTable->sbcs.fromUnicodeFallback;
if (myFromUnicode == NULL)
{
uprv_free(myUConverterTable);
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
myUConverterTable->sbcs.toUnicode = (UChar*)malloc(sizeof(UChar)*256);
/*fills in the toUnicode array with the Unicode Replacement Char*/
for (i=0;i<255;i++) myUConverterTable->sbcs.toUnicode[i] = unicodeValue;
myUConverterTable->sbcs.toUnicode = (UChar*)malloc(sizeof(UChar)*256);
myUConverterTable->sbcs.toUnicodeFallback = (UChar*)malloc(sizeof(UChar)*256);
/*fills in the toUnicode array with the Unicode Replacement Char*/
for (i=0;i<255;i++)
{
myUConverterTable->sbcs.toUnicode[i] = unicodeValue;
myUConverterTable->sbcs.toUnicodeFallback[i] = unicodeValue;
}
while (T_FileStream_readLine(convFile, storageLine, UCNV_MAX_LINE_TEXT))
{
/*removes comments*/
@ -615,12 +634,52 @@ UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticDat
unicodeValue = (UChar)T_CString_stringToInteger(codepointBytes, 16);
line = getToken(codepointBytes, line, CODEPOINT_SEPARATORS);
sbcsCodepageValue = T_CString_stringToInteger(codepointBytes, 16);
/*Store in the toUnicode array*/
myUConverterTable->sbcs.toUnicode[sbcsCodepageValue] = unicodeValue;
/*Store in the fromUnicode compact array*/
ucmp8_set(myFromUnicode, unicodeValue, (int8_t)sbcsCodepageValue);
}
}
/* hsys: check fallback value here... */
line = uprv_strchr(line, FALLBACK_SEPARATOR);
uprv_memset(codepointBytes, 0, 5);
if (line != NULL)
{
uprv_memcpy(codepointBytes, line+1, 1);
}
fallback = T_CString_stringToInteger(codepointBytes, 10);
if (fallback == 0) {
/*Store in the toUnicode array*/
myUConverterTable->sbcs.toUnicode[sbcsCodepageValue] = unicodeValue;
/*Store in the fromUnicode compact array*/
ucmp8_set(myFromUnicode, unicodeValue, (int8_t)sbcsCodepageValue);
} else if (fallback == 1) {
/* Check if this fallback is in the toUnicode or fromUnicode table */
if (seenFallback == FALSE)
{
myConverter->hasToUnicodeFallback = myConverter->hasFromUnicodeFallback = seenFallback = TRUE;
ucmp8_init(myFromUnicodeFallback, 0);
}
myUConverterTable->sbcs.toUnicodeFallback[sbcsCodepageValue] = unicodeValue;
ucmp8_set(myFromUnicodeFallback, unicodeValue, (int8_t)sbcsCodepageValue);
}
}
}
seenFallback = FALSE;
for (i = 0; i < 256; i++)
{
if ((myUConverterTable->sbcs.toUnicode[i] == 0xFFFF) &&
(myUConverterTable->sbcs.toUnicodeFallback[i] != 0xFFFF))
{
seenFallback = TRUE;
break;
}
}
if (seenFallback == FALSE)
{
free(myUConverterTable->sbcs.toUnicodeFallback);
myUConverterTable->sbcs.toUnicodeFallback = NULL;
myConverter->hasToUnicodeFallback = FALSE;
}
else if (myConverter->hasFromUnicodeFallback == TRUE)
{
ucmp8_compact(myFromUnicodeFallback, 1);
}
ucmp8_compact(myFromUnicode, 1);
/*Initially sets the referenceCounter to 1*/
@ -635,10 +694,11 @@ UConverterTable *loadMBCSTableFromFile(FileStream* convFile, UConverterStaticDat
UChar unicodeValue = 0xFFFF;
int32_t mbcsCodepageValue = '\0';
char codepointBytes[6];
int32_t replacementChar = 0x0000;
int32_t replacementChar = 0x0000, fallback = 0;
bool_t seenFallback = FALSE;
uint16_t i = 0;
CompactShortArray* myFromUnicode = NULL;
CompactShortArray* myToUnicode = NULL;
CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;
CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL;
/*Evaluates the replacement codepoint*/
replacementChar = 0xFFFF;
@ -669,9 +729,13 @@ UConverterTable *loadMBCSTableFromFile(FileStream* convFile, UConverterStaticDat
myFromUnicode = &myUConverterTable->mbcs.fromUnicode;
ucmp16_init(myFromUnicode, (uint16_t)replacementChar);
myFromUnicodeFallback = &myUConverterTable->mbcs.fromUnicodeFallback;
ucmp16_initBogus(myFromUnicodeFallback);
myToUnicode = &myUConverterTable->mbcs.toUnicode;
ucmp16_init(myToUnicode, (int16_t)0xFFFD);
myToUnicodeFallback = &myUConverterTable->mbcs.toUnicodeFallback;
ucmp16_initBogus(myToUnicodeFallback);
while (T_FileStream_readLine(convFile, storageLine, UCNV_MAX_LINE_TEXT))
{
@ -691,12 +755,55 @@ UConverterTable *loadMBCSTableFromFile(FileStream* convFile, UConverterStaticDat
}
mbcsCodepageValue = T_CString_stringToInteger(codepointBytes, 16);
ucmp16_set(myToUnicode, (int16_t)mbcsCodepageValue, unicodeValue);
ucmp16_set(myFromUnicode, unicodeValue, (int16_t)mbcsCodepageValue);
line = uprv_strchr(line, FALLBACK_SEPARATOR);
uprv_memset(codepointBytes, 0, 5);
if (line != NULL)
{
uprv_memcpy(codepointBytes, line+1, 1);
}
fallback = T_CString_stringToInteger(codepointBytes, 10);
if (fallback == 0)
{
ucmp16_set(myToUnicode, (int16_t)mbcsCodepageValue, unicodeValue);
ucmp16_set(myFromUnicode, unicodeValue, (int16_t)mbcsCodepageValue);
}
else if (fallback == 1)
{
/* Check if this fallback is in the toUnicode or fromUnicode table */
if (seenFallback == FALSE)
{
myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE;
ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar);
ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD);
}
ucmp16_set(myToUnicodeFallback, (int16_t)mbcsCodepageValue, unicodeValue);
ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)mbcsCodepageValue);
}
}
}
seenFallback = FALSE;
if (myConverter->hasToUnicodeFallback == TRUE)
{
for (i = 0; i < ucmp16_getkUnicodeCount(); i++)
{
if ((ucmp16_get(myToUnicode, i) == 0xFFFD) &&
(ucmp16_get(myToUnicodeFallback, i) != 0xFFFD))
{
seenFallback = TRUE;
break;
}
}
if (seenFallback == FALSE)
{
ucmp16_close(myToUnicodeFallback);
myConverter->hasToUnicodeFallback = FALSE;
}
else if (myConverter->hasFromUnicodeFallback == TRUE)
{
ucmp16_compact(myFromUnicodeFallback);
ucmp16_compact(myToUnicodeFallback);
}
}
ucmp16_compact(myFromUnicode);
ucmp16_compact(myToUnicode);
@ -717,10 +824,13 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
UChar unicodeValue = 0xFFFF;
int32_t mbcsCodepageValue = '\0';
char codepointBytes[6];
int32_t replacementChar = 0x0000;
int32_t replacementChar = 0x0000, fallback = 0;
uint8_t i = 0;
bool_t seenFallback = FALSE;
CompactShortArray* myFromUnicode = NULL;
CompactShortArray* myToUnicode = NULL;
CompactShortArray* myFromUnicodeFallback = NULL;
CompactShortArray* myToUnicodeFallback = NULL;
/*Evaluates the replacement codepoint*/
replacementChar = 0xFFFF;
@ -736,10 +846,14 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
myFromUnicode = &myUConverterTable->dbcs.fromUnicode;
ucmp16_init(myFromUnicode, (uint16_t)replacementChar);
myToUnicode = &myUConverterTable->dbcs.toUnicode;
ucmp16_init(myToUnicode, (int16_t)0xFFFD);
myFromUnicodeFallback = &myUConverterTable->dbcs.fromUnicodeFallback;
ucmp16_initBogus(myFromUnicodeFallback);
myToUnicodeFallback = &myUConverterTable->dbcs.toUnicodeFallback;
ucmp16_initBogus(myToUnicodeFallback);
while (T_FileStream_readLine(convFile, storageLine, UCNV_MAX_LINE_TEXT))
{
removeComments(storageLine);
@ -758,10 +872,55 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
mbcsCodepageValue = T_CString_stringToInteger(codepointBytes, 16);
ucmp16_set(myToUnicode, (int16_t)mbcsCodepageValue, unicodeValue);
ucmp16_set(myFromUnicode, unicodeValue, (int16_t)mbcsCodepageValue);
line = uprv_strchr(line, FALLBACK_SEPARATOR);
uprv_memset(codepointBytes, 0, 6);
if (line != NULL)
{
uprv_memcpy(codepointBytes, line+1, 1);
}
fallback = T_CString_stringToInteger(codepointBytes, 10);
if (fallback == 0)
{
ucmp16_set(myToUnicode, (int16_t)mbcsCodepageValue, unicodeValue);
ucmp16_set(myFromUnicode, unicodeValue, (int16_t)mbcsCodepageValue);
}
else if (fallback == 1)
{
/* Check if this fallback is in the toUnicode or fromUnicode table */
if (seenFallback == FALSE)
{
myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE;
ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar);
ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD);
}
ucmp16_set(myToUnicodeFallback, (int16_t)mbcsCodepageValue, unicodeValue);
ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)mbcsCodepageValue);
}
}
}
seenFallback = FALSE;
if (myConverter->hasToUnicodeFallback == TRUE)
{
for (i = 0; i < ucmp16_getkUnicodeCount(); i++)
{
if ((ucmp16_get(myToUnicode, i) == 0xFFFD) &&
(ucmp16_get(myToUnicodeFallback, i) != 0xFFFD))
{
seenFallback = TRUE;
break;
}
}
}
if (seenFallback == FALSE)
{
ucmp16_close(myToUnicodeFallback);
myConverter->hasToUnicodeFallback = FALSE;
}
else if (myConverter->hasFromUnicodeFallback == TRUE)
{
ucmp16_compact(myFromUnicodeFallback);
ucmp16_compact(myToUnicodeFallback);
}
}
ucmp16_compact(myFromUnicode);
ucmp16_compact(myToUnicode);
@ -778,10 +937,13 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
UChar unicodeValue = 0xFFFD;
int32_t dbcsCodepageValue = '\0';
char codepointBytes[6];
int32_t replacementChar = 0x0000;
int32_t replacementChar = 0x0000, fallback = 0;
uint8_t i = 0;
bool_t seenFallback = FALSE;
CompactShortArray* myFromUnicode = NULL;
CompactShortArray* myToUnicode = NULL;
CompactShortArray* myFromUnicodeFallback = NULL;
CompactShortArray* myToUnicodeFallback = NULL;
/*Evaluates the replacement codepoint*/
replacementChar = 0xFFFF;
@ -797,10 +959,14 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
myFromUnicode = &(myUConverterTable->dbcs.fromUnicode);
ucmp16_init(myFromUnicode, (int16_t)replacementChar);
myToUnicode = &(myUConverterTable->dbcs.toUnicode);
ucmp16_init(myToUnicode, (int16_t)0xFFFD);
myFromUnicodeFallback = &(myUConverterTable->dbcs.fromUnicodeFallback);
ucmp16_initBogus(myFromUnicodeFallback);
myToUnicodeFallback = &(myUConverterTable->dbcs.toUnicodeFallback);
ucmp16_initBogus(myToUnicodeFallback);
while (T_FileStream_readLine(convFile, storageLine, UCNV_MAX_LINE_TEXT))
{
removeComments(storageLine);
@ -819,9 +985,54 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
}
dbcsCodepageValue = T_CString_stringToInteger(codepointBytes, 16);
ucmp16_set(myToUnicode, (int16_t)dbcsCodepageValue, unicodeValue);
ucmp16_set(myFromUnicode, unicodeValue, (int16_t)dbcsCodepageValue);
}
line = uprv_strchr(line, FALLBACK_SEPARATOR);
uprv_memset(codepointBytes, 0, 6);
if (line != NULL)
{
uprv_memcpy(codepointBytes, line+1, 1);
}
fallback = T_CString_stringToInteger(codepointBytes, 10);
if (fallback == 0)
{
ucmp16_set(myToUnicode, (int16_t)dbcsCodepageValue, unicodeValue);
ucmp16_set(myFromUnicode, unicodeValue, (int16_t)dbcsCodepageValue);
}
else if (fallback == 1)
{
/* Check if this fallback is in the toUnicode or fromUnicode table */
if (seenFallback == FALSE)
{
myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE;
ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar);
ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD);
}
ucmp16_set(myToUnicodeFallback, (int16_t)dbcsCodepageValue, unicodeValue);
ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)dbcsCodepageValue);
}
}
seenFallback = FALSE;
if (myConverter->hasToUnicodeFallback == TRUE)
{
for (i = 0; i < ucmp16_getkUnicodeCount(); i++)
{
if ((ucmp16_get(myToUnicode, i) == 0xFFFD) &&
(ucmp16_get(myToUnicodeFallback, i) != 0xFFFD))
{
seenFallback = TRUE;
break;
}
}
if (seenFallback == FALSE)
{
ucmp16_close(myToUnicodeFallback);
myConverter->hasToUnicodeFallback = FALSE;
}
else if (myConverter->hasFromUnicodeFallback == TRUE)
{
ucmp16_compact(myFromUnicodeFallback);
ucmp16_compact(myToUnicodeFallback);
}
}
ucmp16_compact(myFromUnicode);
ucmp16_compact(myToUnicode);
@ -835,6 +1046,8 @@ bool_t makeconv_deleteSharedConverterData(UConverterSharedData* deadSharedData)
if (deadSharedData->staticData->conversionType == UCNV_SBCS)
{
ucmp8_close(&(deadSharedData->table->sbcs.fromUnicode));
if (deadSharedData->staticData->hasFromUnicodeFallback == TRUE)
ucmp8_close(&(deadSharedData->table->sbcs.fromUnicodeFallback));
uprv_free(deadSharedData->table);
uprv_free(deadSharedData);
}
@ -842,6 +1055,10 @@ bool_t makeconv_deleteSharedConverterData(UConverterSharedData* deadSharedData)
{
ucmp16_close(&(deadSharedData->table->mbcs.fromUnicode));
ucmp16_close(&(deadSharedData->table->mbcs.toUnicode));
if (deadSharedData->staticData->hasFromUnicodeFallback == TRUE)
ucmp16_close(&(deadSharedData->table->mbcs.fromUnicodeFallback));
if (deadSharedData->staticData->hasToUnicodeFallback == TRUE)
ucmp16_close(&(deadSharedData->table->mbcs.toUnicodeFallback));
uprv_free(deadSharedData->table);
uprv_free((UConverterStaticData*)deadSharedData->staticData);
uprv_free(deadSharedData);
@ -850,6 +1067,10 @@ bool_t makeconv_deleteSharedConverterData(UConverterSharedData* deadSharedData)
{
ucmp16_close(&(deadSharedData->table->dbcs.fromUnicode));
ucmp16_close(&(deadSharedData->table->dbcs.toUnicode));
if (deadSharedData->staticData->hasFromUnicodeFallback == TRUE)
ucmp16_close(&(deadSharedData->table->dbcs.fromUnicodeFallback));
if (deadSharedData->staticData->hasToUnicodeFallback == TRUE)
ucmp16_close(&(deadSharedData->table->dbcs.toUnicodeFallback));
uprv_free(deadSharedData->table);
uprv_free((UConverterStaticData*)deadSharedData->staticData);
uprv_free(deadSharedData);
@ -890,7 +1111,7 @@ UConverterSharedData* createConverterFromTableFile(const char* converterName, UE
T_FileStream_close(convFile);
return NULL;
}
uprv_memset(mySharedData, 0, sizeof(UConverterSharedData));
mySharedData->structSize = sizeof(UConverterSharedData);
@ -965,7 +1186,25 @@ static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterShar
udata_writeBlock(pData, (void*)data->table->sbcs.toUnicode, sizeof(uint16_t)*256);
size += sizeof(uint16_t)*256;
size += udata_write_ucmp8(pData, &data->table->sbcs.fromUnicode);
/* don't care about alignment anymore */
if (data->staticData->hasFromUnicodeFallback == TRUE)
{
if (size%4)
{
udata_writePadding(pData, 4-(size%4));
size+= 4-(size%4);
}
size += udata_write_ucmp8(pData, &data->table->sbcs.fromUnicodeFallback);
}
if (data->staticData->hasToUnicodeFallback == TRUE)
{
if (size%4)
{
udata_writePadding(pData, 4-(size%4));
size+= 4-(size%4);
}
udata_writeBlock(pData, (void*)data->table->sbcs.toUnicodeFallback, sizeof(uint16_t)*256);
/* don't care about alignment anymore */
}
}
break;
@ -979,6 +1218,25 @@ static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterShar
size+= 4-(size%4);
}
size += udata_write_ucmp16(pData,&data->table->dbcs.fromUnicode);
if (data->staticData->hasFromUnicodeFallback == TRUE)
{
if(size%4)
{
udata_writePadding(pData, 4-(size%4) );
size+= 4-(size%4);
}
size += udata_write_ucmp16(pData,&data->table->dbcs.fromUnicodeFallback);
}
if (data->staticData->hasToUnicodeFallback == TRUE)
{
if(size%4)
{
udata_writePadding(pData, 4-(size%4) );
size+= 4-(size%4);
}
size += udata_write_ucmp16(pData,&data->table->dbcs.toUnicodeFallback);
}
}
break;
@ -993,6 +1251,24 @@ static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterShar
size+= 4-(size%4);
}
size += udata_write_ucmp16(pData,&data->table->mbcs.fromUnicode);
if (data->staticData->hasFromUnicodeFallback == TRUE)
{
if(size%4)
{
udata_writePadding(pData, 4-(size%4) );
size+= 4-(size%4);
}
size += udata_write_ucmp16(pData,&data->table->mbcs.fromUnicodeFallback);
}
if (data->staticData->hasToUnicodeFallback == TRUE)
{
if(size%4)
{
udata_writePadding(pData, 4-(size%4) );
size+= 4-(size%4);
}
size += udata_write_ucmp16(pData,&data->table->mbcs.toUnicodeFallback);
}
}
break;

View file

@ -19,32 +19,32 @@ static const UConverterStaticData _SBCSStaticData={
sizeof(UConverterStaticData),
"SBCS",
0, UCNV_IBM, UCNV_SBCS, 1, 1,
1, { 0, 0, 0, 0 },
{ 0,0,0 } /* reserved bytes */
1, { 0, 0, 0, 0 },FALSE, FALSE,
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved bytes */
};
static const UConverterStaticData _DBCSStaticData={
sizeof(UConverterStaticData),
"DBCS",
0, UCNV_IBM, UCNV_DBCS, 2, 2,
1, { 0, 0, 0, 0 }, /* subchar */
{ 0,0,0 } /* reserved bytes */
1, { 0, 0, 0, 0 }, FALSE, FALSE, /* subchar */
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved bytes */
};
static const UConverterStaticData _MBCSStaticData={
sizeof(UConverterStaticData),
"MBCS",
0, UCNV_IBM, UCNV_MBCS, 1, 1,
1, { 0, 0, 0, 0 },
{ 0,0,0 } /* reserved bytes */
1, { 0, 0, 0, 0 }, FALSE, FALSE,
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved bytes */
};
static const UConverterStaticData _EBCDICStatefulStaticData={
sizeof(UConverterStaticData),
"EBCDICStateful",
0, UCNV_IBM, UCNV_EBCDIC_STATEFUL, 1, 1,
1, { 0, 0, 0, 0 },
{ 0,0,0 } /* reserved bytes */
1, { 0, 0, 0, 0 }, FALSE, FALSE,
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved bytes */
};
/* NULLs for algorithmic types, their tables live in ucnv_bld.c */