diff --git a/icu4c/source/common/ucmp16.c b/icu4c/source/common/ucmp16.c index 0110191c926..4077f89d070 100644 --- a/icu4c/source/common/ucmp16.c +++ b/icu4c/source/common/ucmp16.c @@ -130,6 +130,20 @@ CompactShortArray* ucmp16_open(int16_t defaultValue) return this_obj; } +void ucmp16_initBogus(CompactShortArray *this_obj) +{ + if (this_obj == NULL) return; + this_obj->fStructSize = sizeof(CompactShortArray); + this_obj->fCount = UCMP16_kUnicodeCount; + this_obj->fCompact = FALSE; + this_obj->fBogus = TRUE; + this_obj->fArray = NULL; + this_obj->fAlias = FALSE; + this_obj->fIndex = NULL; + this_obj->fHashes = NULL; + this_obj->fIAmOwned = TRUE; + this_obj->fDefaultValue = 0; +} void ucmp16_init(CompactShortArray *this_obj, int16_t defaultValue) { diff --git a/icu4c/source/common/ucmp16.h b/icu4c/source/common/ucmp16.h index 913b7b77064..6278b5e2ffa 100644 --- a/icu4c/source/common/ucmp16.h +++ b/icu4c/source/common/ucmp16.h @@ -70,7 +70,7 @@ * @see CompactIntArray * @see CompactCharArray * @see CompactStringArray - * @version $Revision: 1.9 $ 8/25/98 + * @version $Revision: 1.10 $ 8/25/98 * @author Helena Shih */ @@ -99,6 +99,7 @@ U_CAPI int32_t U_EXPORT2 ucmp16_getkBlockCount(void); */ U_CAPI CompactShortArray* U_EXPORT2 ucmp16_open(int16_t defaultValue); U_CAPI void U_EXPORT2 ucmp16_init(CompactShortArray* array, int16_t defaultValue); +U_CAPI void U_EXPORT2 ucmp16_initBogus(CompactShortArray* array); /** * Construct a CompactShortArray from a pre-computed index and values array. The values diff --git a/icu4c/source/common/ucmp8.c b/icu4c/source/common/ucmp8.c index 3db0c0f1345..d1cee1792a3 100644 --- a/icu4c/source/common/ucmp8.c +++ b/icu4c/source/common/ucmp8.c @@ -29,6 +29,22 @@ static int32_t findOverlappingPosition(CompactByteArray* this_obj, int32_t ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;} int32_t ucmp8_getkBlockCount() { return UCMP8_kBlockCount;} +void ucmp8_initBogus(CompactByteArray* array) +{ + CompactByteArray* this_obj = array; + + if (this_obj == NULL) return; + + this_obj->fStructSize = sizeof(CompactByteArray); + this_obj->fArray = NULL; + this_obj->fIndex = NULL; + this_obj->fCount = UCMP8_kUnicodeCount; + this_obj->fCompact = FALSE; + this_obj->fBogus = TRUE; + this_obj->fAlias = FALSE; + this_obj->fIAmOwned = TRUE; +} + /* debug flags*/ /*=======================================================*/ void ucmp8_init(CompactByteArray* array, int8_t defaultValue) diff --git a/icu4c/source/common/ucmp8.h b/icu4c/source/common/ucmp8.h index 2393739c079..2ec82de8a4e 100644 --- a/icu4c/source/common/ucmp8.h +++ b/icu4c/source/common/ucmp8.h @@ -51,6 +51,7 @@ typedef struct CompactByteArray { U_CAPI CompactByteArray* U_EXPORT2 ucmp8_open(int8_t defaultValue); U_CAPI void U_EXPORT2 ucmp8_init(CompactByteArray* array, int8_t defaultValue); +U_CAPI void U_EXPORT2 ucmp8_initBogus(CompactByteArray* array); U_CAPI CompactByteArray* U_EXPORT2 ucmp8_openAdopt(uint16_t* indexArray, int8_t* newValues, diff --git a/icu4c/source/common/ucnv.c b/icu4c/source/common/ucnv.c index e9793f6684a..af5c5b81b52 100644 --- a/icu4c/source/common/ucnv.c +++ b/icu4c/source/common/ucnv.c @@ -15,6 +15,7 @@ * * Date Name Description * 04/04/99 helena Fixed internal header inclusion. +* 05/09/00 helena Added implementation to handle fallback mappings. */ #include "umutex.h" #include "unicode/ures.h" @@ -1113,3 +1114,13 @@ bool_t ucnv_isAmbiguous(const UConverter *cnv) { return (ucnv_getAmbiguousCCSID(cnv) == -1 ? FALSE : TRUE); } + +void ucnv_setFallback(UConverter *cnv, bool_t usesFallback) +{ + cnv->useFallback = usesFallback; +} + +bool_t ucnv_usesFallback(const UConverter *cnv) +{ + return cnv->useFallback; +} diff --git a/icu4c/source/common/ucnv2022.c b/icu4c/source/common/ucnv2022.c index a3bb9b08251..b00ad54b20b 100644 --- a/icu4c/source/common/ucnv2022.c +++ b/icu4c/source/common/ucnv2022.c @@ -691,8 +691,8 @@ const UConverterStaticData _ISO2022StaticData={ sizeof(UConverterStaticData), "ISO_2022", 2022, UCNV_IBM, UCNV_ISO_2022, 1, 4, - 1, { 0x1a, 0, 0, 0 }, - { 0,0,0} /* reserved */ + 1, { 0x1a, 0, 0, 0 }, FALSE, FALSE, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} /* reserved */ }; diff --git a/icu4c/source/common/ucnv_bld.c b/icu4c/source/common/ucnv_bld.c index 024c12ba963..ea0ae570139 100644 --- a/icu4c/source/common/ucnv_bld.c +++ b/icu4c/source/common/ucnv_bld.c @@ -100,7 +100,7 @@ isCnvAcceptable(void *context, pInfo->dataFormat[1]==0x6e && pInfo->dataFormat[2]==0x76 && pInfo->dataFormat[3]==0x74 && - pInfo->formatVersion[0]==3; + pInfo->formatVersion[0]==4; } #define DATA_TYPE "cnv" diff --git a/icu4c/source/common/ucnv_cnv.h b/icu4c/source/common/ucnv_cnv.h index 5c85012c6d3..bc7eb9e13d5 100644 --- a/icu4c/source/common/ucnv_cnv.h +++ b/icu4c/source/common/ucnv_cnv.h @@ -3,11 +3,16 @@ * Copyright (C) 1999, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** - * - * uconv_cnv.h: - * defines all the low level conversion functions - * T_UnicodeConverter_{to,from}Unicode_$ConversionType - */ +* +* uconv_cnv.h: +* defines all the low level conversion functions +* T_UnicodeConverter_{to,from}Unicode_$ConversionType +* +* Modification History: +* +* Date Name Description +* 05/09/00 helena Added implementation to handle fallback mappings. +*/ #ifndef UCNV_CNV_H #define UCNV_CNV_H @@ -23,6 +28,8 @@ typedef struct { UChar *toUnicode; /* [256]; */ CompactByteArray fromUnicode; + UChar *toUnicodeFallback; + CompactByteArray fromUnicodeFallback; } UConverterSBCSTable; @@ -30,6 +37,8 @@ typedef struct { CompactShortArray toUnicode; CompactShortArray fromUnicode; + CompactShortArray toUnicodeFallback; + CompactShortArray fromUnicodeFallback; } UConverterDBCSTable; @@ -38,6 +47,8 @@ typedef struct bool_t *starters; /* [256]; */ CompactShortArray toUnicode; CompactShortArray fromUnicode; + CompactShortArray toUnicodeFallback; + CompactShortArray fromUnicodeFallback; } UConverterMBCSTable; diff --git a/icu4c/source/common/ucnvmbcs.c b/icu4c/source/common/ucnvmbcs.c index f88c447ad39..a9a87be7a05 100644 --- a/icu4c/source/common/ucnvmbcs.c +++ b/icu4c/source/common/ucnvmbcs.c @@ -10,6 +10,10 @@ * * created on: 2000feb03 * created by: Markus W. Scherer +* +* Change history: +* +* 05/09/00 helena Added implementation to handle fallback mappings. */ #include "unicode/utypes.h" @@ -33,13 +37,33 @@ _MBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErr if(((raw-oldraw)&3)!=0) { raw+=4-((raw-oldraw)&3); /* pad to 4 */ } + oldraw = raw; ucmp16_initFromData(&sharedData->table->mbcs.fromUnicode, &raw, pErrorCode); + if (sharedData->staticData->hasFromUnicodeFallback == TRUE) + { + if(((raw-oldraw)&3)!=0) { + raw+=4-((raw-oldraw)&3); /* pad to 4 */ + } + oldraw = raw; + ucmp16_initFromData(&sharedData->table->mbcs.fromUnicodeFallback, &raw, pErrorCode); + } + if (sharedData->staticData->hasToUnicodeFallback == TRUE) + { + if(((raw-oldraw)&3)!=0) { + raw+=4-((raw-oldraw)&3); /* pad to 4 */ + } + ucmp16_initFromData(&sharedData->table->mbcs.toUnicodeFallback, &raw, pErrorCode); + } } static void _MBCSUnload(UConverterSharedData *sharedData) { ucmp16_close (&sharedData->table->mbcs.fromUnicode); ucmp16_close (&sharedData->table->mbcs.toUnicode); + if (sharedData->staticData->hasFromUnicodeFallback == TRUE) + ucmp16_close (&sharedData->table->mbcs.fromUnicodeFallback); + if (sharedData->staticData->hasToUnicodeFallback == TRUE) + ucmp16_close (&sharedData->table->mbcs.toUnicodeFallback); uprv_free (sharedData->table); } @@ -58,7 +82,7 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this, int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myToUnicode = NULL; + CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; bool_t *myStarters = NULL; @@ -67,6 +91,7 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this, myToUnicode = &_this->sharedData->table->mbcs.toUnicode; + myToUnicodeFallback = &_this->sharedData->table->mbcs.toUnicodeFallback; myStarters = _this->sharedData->table->mbcs.starters; while (mySourceIndex < sourceLength) @@ -102,11 +127,19 @@ static void T_UConverter_toUnicode_MBCS (UConverter * _this, /*writing the UniChar to the output stream */ if (targetUniChar != missingUCharMarker) { - myTarget[myTargetIndex++] = targetUniChar; - + myTarget[myTargetIndex++] = targetUniChar; } - else - { + else if ((_this->useFallback == TRUE) && + (_this->sharedData->staticData->hasToUnicodeFallback == TRUE)) + { + targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar); + if (targetUniChar != missingUCharMarker) + { + myTarget[myTargetIndex++] = targetUniChar; + } + } + if (targetUniChar == missingUCharMarker) + { *err = U_INVALID_CHAR_FOUND; if (mySourceChar > 0xff) { @@ -178,13 +211,14 @@ static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myToUnicode = NULL; + CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; UChar oldMySourceChar = 0x0000; bool_t *myStarters = NULL; myToUnicode = &_this->sharedData->table->mbcs.toUnicode; + myToUnicodeFallback = &_this->sharedData->table->mbcs.toUnicodeFallback; myStarters = _this->sharedData->table->mbcs.starters; while (mySourceIndex < sourceLength) @@ -236,9 +270,25 @@ static void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, oldMySourceChar = mySourceChar; } - else - { - int32_t currentOffset = offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1); + else if ((_this->useFallback == TRUE) && + (_this->sharedData->staticData->hasToUnicodeFallback == TRUE)) + { + + targetUniChar = (UChar) ucmp16_getu (myToUnicodeFallback, mySourceChar); + /*writes the UniChar to the output stream */ + { + if (targetUniChar > 0x00FF) + offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */ + else + offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */ + + } + myTarget[myTargetIndex++] = targetUniChar; + oldMySourceChar = mySourceChar; + } + if (targetUniChar == missingUCharMarker) + { + int32_t currentOffset = offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1); *err = U_INVALID_CHAR_FOUND; if (mySourceChar > 0xff) @@ -312,11 +362,12 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this, int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myFromUnicode = NULL; + CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; myFromUnicode = &_this->sharedData->table->mbcs.fromUnicode; + myFromUnicodeFallback = &_this->sharedData->table->mbcs.fromUnicodeFallback; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) @@ -326,7 +377,6 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this, mySourceChar = (UChar) mySource[mySourceIndex++]; targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); - if (targetUniChar != missingCharMarker) { if (targetUniChar <= 0x00FF) @@ -347,8 +397,35 @@ static void T_UConverter_fromUnicode_MBCS (UConverter * _this, *err = U_INDEX_OUTOFBOUNDS_ERROR; } } - } - else + } + else if ((_this->useFallback == TRUE) && + (_this->sharedData->staticData->hasFromUnicodeFallback == TRUE)) + { + targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar); + + if (targetUniChar != missingCharMarker) + { + if (targetUniChar <= 0x00FF) + { + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); + if (myTargetIndex < targetLength) + { + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + _this->charErrorBuffer[0] = (char) targetUniChar; + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + } + } + if (targetUniChar == missingCharMarker) { *err = U_INVALID_CHAR_FOUND; _this->invalidUCharBuffer[0] = (UChar) mySourceChar; @@ -401,11 +478,12 @@ static void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myFromUnicode = NULL; + CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; myFromUnicode = &_this->sharedData->table->mbcs.fromUnicode; + myFromUnicodeFallback = &_this->sharedData->table->mbcs.fromUnicodeFallback; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) @@ -440,7 +518,38 @@ static void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, } } } - else + else if ((_this->useFallback == TRUE) && + (_this->sharedData->staticData->hasFromUnicodeFallback == TRUE)) + { + targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar); + if (targetUniChar != missingCharMarker) + { + if (targetUniChar <= 0x00FF) + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) targetUniChar; + + } + else + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); + if (myTargetIndex < targetLength) + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + _this->charErrorBuffer[0] = (char) targetUniChar; + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + } + } + + if (targetUniChar == missingCharMarker) { int32_t currentOffset = mySourceIndex -1; @@ -501,6 +610,13 @@ static UChar32 T_UConverter_getNextUChar_MBCS(UConverter* converter, /*Not lead byte: we update the source ptr and get the codepoint*/ myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicode), (UChar)(**source)); + if ((converter->useFallback == TRUE) && + (converter->sharedData->staticData->hasToUnicodeFallback == TRUE) && + (myUChar == 0xFFFD)) + { + myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicodeFallback), + (UChar)(**source)); + } (*source)++; } else @@ -516,6 +632,13 @@ static UChar32 T_UConverter_getNextUChar_MBCS(UConverter* converter, myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicode), (uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1)))); + if ((converter->useFallback == TRUE) && + (converter->sharedData->staticData->hasToUnicodeFallback == TRUE) && + (myUChar == 0xFFFD)) + { + myUChar = ucmp16_getu((&converter->sharedData->table->mbcs.toUnicodeFallback), + (uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1)))); + } (*source) += 2; } diff --git a/icu4c/source/common/ucnvsbcs.c b/icu4c/source/common/ucnvsbcs.c index c9629954b52..93f87a283a2 100644 --- a/icu4c/source/common/ucnvsbcs.c +++ b/icu4c/source/common/ucnvsbcs.c @@ -10,6 +10,10 @@ * * created on: 2000feb03 * created by: Markus W. Scherer +* +* Change history: +* +* 05/09/00 helena Added implementation to handle fallback mappings. */ #include "unicode/utypes.h" @@ -24,14 +28,31 @@ static void _SBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) { + const uint8_t *oldraw = raw; sharedData->table->sbcs.toUnicode = (uint16_t*)raw; - raw += sizeof(uint16_t)*256; + raw += sizeof(uint16_t)*256; oldraw = raw; ucmp8_initFromData(&sharedData->table->sbcs.fromUnicode, &raw, pErrorCode); + if (sharedData->staticData->hasFromUnicodeFallback == TRUE) + { + if(((raw-oldraw)&3)!=0) { + raw+=4-((raw-oldraw)&3); /* pad to 4 */ + } + ucmp8_initFromData(&sharedData->table->sbcs.fromUnicodeFallback, &raw, pErrorCode); + } + if (sharedData->staticData->hasToUnicodeFallback == TRUE) + { + if(((raw-oldraw)&3)!=0) { + raw+=4-((raw-oldraw)&3); /* pad to 4 */ + } + sharedData->table->sbcs.toUnicodeFallback = (uint16_t*)raw; + } } static void _SBCSUnload(UConverterSharedData *sharedData) { ucmp8_close (&sharedData->table->sbcs.fromUnicode); + if (sharedData->staticData->hasFromUnicodeFallback == TRUE) + ucmp8_close (&sharedData->table->sbcs.fromUnicodeFallback); uprv_free (sharedData->table); } @@ -50,11 +71,11 @@ void T_UConverter_toUnicode_SBCS (UConverter * _this, int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - (char *) mySource; - UChar *myToUnicode = NULL; + UChar *myToUnicode = NULL, *myToUnicodeFallback = NULL; UChar targetUniChar = 0x0000; myToUnicode = _this->sharedData->table->sbcs.toUnicode; - + myToUnicodeFallback = _this->sharedData->table->sbcs.toUnicodeFallback; while (mySourceIndex < sourceLength) { @@ -71,23 +92,36 @@ void T_UConverter_toUnicode_SBCS (UConverter * _this, } else { - *err = U_INVALID_CHAR_FOUND; - _this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1]; - _this->invalidCharLength = 1; + if ((_this->useFallback == TRUE) && + (_this->sharedData->staticData->hasToUnicodeFallback == TRUE)) + { + /* Look up in the fallback table first */ + targetUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]]; + if (targetUniChar != missingUCharMarker) + { + myTarget[myTargetIndex++] = targetUniChar; + } + } + if (targetUniChar == missingUCharMarker) + { + *err = U_INVALID_CHAR_FOUND; + _this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1]; + _this->invalidCharLength = 1; - ToU_CALLBACK_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidCharLength = 0; + ToU_CALLBACK_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidCharLength = 0; + } } } else @@ -118,10 +152,11 @@ void T_UConverter_fromUnicode_SBCS (UConverter * _this, int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - (char *) myTarget; int32_t sourceLength = sourceLimit - mySource; - CompactByteArray *myFromUnicode; + CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL; unsigned char targetChar = 0x00; myFromUnicode = &_this->sharedData->table->sbcs.fromUnicode; + myFromUnicodeFallback = &_this->sharedData->table->sbcs.fromUnicodeFallback; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) @@ -136,9 +171,19 @@ void T_UConverter_fromUnicode_SBCS (UConverter * _this, /*writes the char to the output stream */ myTarget[myTargetIndex++] = targetChar; } - else - { - + else if ((_this->useFallback == TRUE) && + (_this->sharedData->staticData->hasFromUnicodeFallback == TRUE)) + { + /* Look up in the fallback table first */ + targetChar = ucmp8_getu (myFromUnicodeFallback, mySource[mySourceIndex-1]); + if (targetChar != 0 || !mySource[mySourceIndex - 1]) + { + /*writes the char to the output stream */ + myTarget[myTargetIndex++] = targetChar; + } + } + if (targetChar == 0 && !mySource[mySourceIndex-1]) + { *err = U_INVALID_CHAR_FOUND; _this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1]; _this->invalidUCharLength = 1; @@ -159,7 +204,7 @@ void T_UConverter_fromUnicode_SBCS (UConverter * _this, break; } _this->invalidUCharLength = 0; - } + } } else { @@ -199,7 +244,15 @@ UChar32 T_UConverter_getNextUChar_SBCS(UConverter* converter, { UChar* myUCharPtr = &myUChar; const char* sourceFinal = *source; - + + /* Do the fallback stuff */ + if ((converter->useFallback == TRUE)&& + (converter->sharedData->staticData->hasToUnicodeFallback == TRUE)) + { + myUChar = converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*((*source)-1)]; + if (myUChar != 0xFFFD) return myUChar; + } + *err = U_INVALID_CHAR_FOUND; /*Calls the ErrorFunctor after rewinding the input buffer*/ @@ -261,13 +314,33 @@ _DBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErr if(((raw-oldraw)&3)!=0) { raw+=4-((raw-oldraw)&3); /* pad to 4 */ } + oldraw = raw; ucmp16_initFromData(&sharedData->table->dbcs.fromUnicode, &raw, pErrorCode); + if (sharedData->staticData->hasFromUnicodeFallback == TRUE) + { + if(((raw-oldraw)&3)!=0) { + raw+=4-((raw-oldraw)&3); /* pad to 4 */ + } + ucmp16_initFromData(&sharedData->table->dbcs.fromUnicodeFallback, &raw, pErrorCode); + oldraw = raw; + } + if (sharedData->staticData->hasToUnicodeFallback == TRUE) + { + if(((raw-oldraw)&3)!=0) { + raw+=4-((raw-oldraw)&3); /* pad to 4 */ + } + ucmp16_initFromData(&sharedData->table->dbcs.toUnicodeFallback, &raw, pErrorCode); + } } U_CFUNC void _DBCSUnload(UConverterSharedData *sharedData) { ucmp16_close (&sharedData->table->dbcs.fromUnicode); ucmp16_close (&sharedData->table->dbcs.toUnicode); + if (sharedData->staticData->hasFromUnicodeFallback == TRUE) + ucmp16_close (&sharedData->table->dbcs.fromUnicodeFallback); + if (sharedData->staticData->hasToUnicodeFallback == TRUE) + ucmp16_close (&sharedData->table->dbcs.toUnicodeFallback); uprv_free (sharedData->table); } @@ -286,11 +359,12 @@ void T_UConverter_toUnicode_DBCS (UConverter * _this, int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - myTarget; int32_t sourceLength = sourceLimit - (char *) mySource; - CompactShortArray *myToUnicode = NULL; + CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; myToUnicode = &_this->sharedData->table->dbcs.toUnicode; + myToUnicodeFallback = &_this->sharedData->table->dbcs.toUnicodeFallback; while (mySourceIndex < sourceLength) { @@ -320,8 +394,18 @@ void T_UConverter_toUnicode_DBCS (UConverter * _this, /*writes the UniChar to the output stream */ myTarget[myTargetIndex++] = targetUniChar; } - else + else if ((_this->useFallback == TRUE) && + (_this->sharedData->staticData->hasToUnicodeFallback == TRUE)) + { + targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar); + if (targetUniChar != missingUCharMarker) + { + myTarget[myTargetIndex++] = targetUniChar; + } + } + if (targetUniChar == missingUCharMarker) { + *err = U_INVALID_CHAR_FOUND; _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); _this->invalidCharBuffer[1] = (char) mySourceChar; @@ -386,11 +470,12 @@ void T_UConverter_fromUnicode_DBCS (UConverter * _this, int32_t myTargetIndex = 0; int32_t targetLength = targetLimit - (char *) myTarget; int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myFromUnicode = NULL; + CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL; UChar targetUniChar = 0x0000; UChar mySourceChar = 0x0000; myFromUnicode = &_this->sharedData->table->dbcs.fromUnicode; + myFromUnicodeFallback = &_this->sharedData->table->dbcs.fromUnicodeFallback; /*writing the char to the output stream */ while (mySourceIndex < sourceLength) @@ -417,8 +502,29 @@ void T_UConverter_fromUnicode_DBCS (UConverter * _this, *err = U_INDEX_OUTOFBOUNDS_ERROR; } } - else - { + else if ((_this->useFallback == TRUE) && + (_this->sharedData->staticData->hasFromUnicodeFallback == TRUE)) + { + + targetUniChar = (UChar) ucmp16_getu (myFromUnicodeFallback, mySourceChar); + if (targetUniChar != missingCharMarker) + { + /*writes the char to the output stream */ + myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); + if (myTargetIndex < targetLength) + { + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + _this->charErrorBuffer[0] = (char) targetUniChar; + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + } + if (targetUniChar == missingCharMarker) + { *err = U_INVALID_CHAR_FOUND; _this->invalidUCharBuffer[0] = (UChar) mySourceChar; _this->invalidUCharLength = 1; @@ -490,8 +596,20 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverter* converter, UChar* myUCharPtr = &myUChar; const char* sourceFinal = *source; - /*Calls the ErrorFunctor after rewinding the input buffer*/ + /* rewinding the input buffer*/ (*source) -= 2; + /* Do the fallback stuff */ + if ((converter->useFallback == TRUE) && + (converter->sharedData->staticData->hasToUnicodeFallback == TRUE)) + { + myUChar = ucmp16_getu((&converter->sharedData->table->dbcs.toUnicodeFallback), + (uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)-1)))); + if (myUChar != 0xFFFD) + { + *source += 2; + return myUChar; + } + } *err = U_INVALID_CHAR_FOUND; diff --git a/icu4c/source/common/unicode/ucnv.h b/icu4c/source/common/unicode/ucnv.h index a7b903c022e..895f51e9026 100644 --- a/icu4c/source/common/unicode/ucnv.h +++ b/icu4c/source/common/unicode/ucnv.h @@ -11,6 +11,7 @@ * * Date Name Description * 04/04/99 helena Fixed internal header inclusion. + * 05/11/00 helena Added setFallback and usesFallback APIs. */ /** @@ -65,8 +66,7 @@ U_CDECL_END */ U_CAPI -UConverter* U_EXPORT2 ucnv_open (const char *converterName, - UErrorCode * err); +UConverter* U_EXPORT2 ucnv_open (const char *converterName, UErrorCode * err); /** @@ -83,7 +83,7 @@ UConverter* U_EXPORT2 ucnv_open (const char *converterName, * @stable */ U_CAPI UConverter* U_EXPORT2 ucnv_openU (const UChar * name, - UErrorCode * err); + UErrorCode * err); @@ -102,7 +102,7 @@ U_CAPI UConverter* U_EXPORT2 ucnv_openU (const UChar * name, U_CAPI UConverter* U_EXPORT2 ucnv_openCCSID (int32_t codepage, UConverterPlatform platform, - UErrorCode * err); + UErrorCode * err); /** @@ -692,6 +692,23 @@ U_CAPI void U_EXPORT2 ucnv_fixFileSeparator(const UConverter *cnv, UChar* source */ U_CAPI bool_t U_EXPORT2 ucnv_isAmbiguous(const UConverter *cnv); +/** + * Sets the converter to use fallback mapping or not. + * @param cnv The converter to set the fallback mapping usage for. + * @param usesFallback TRUE if the user wants the converter to take advantage of the fallback + * mapping, FALSE otherwise. + * @draft + */ +U_CAPI void U_EXPORT2 ucnv_setFallback(UConverter *cnv, bool_t usesFallback); + +/** + * Determines if the converter uses fallback mappings or not. + * @return TRUE if the converter uses fallback, FALSE otherwise. + * @draft + */ +U_CAPI bool_t U_EXPORT2 ucnv_usesFallback(const UConverter *cnv); #endif /*_UCNV*/ + + diff --git a/icu4c/source/common/unicode/ucnv_bld.h b/icu4c/source/common/unicode/ucnv_bld.h index f8b33b512db..49505f5eba0 100644 --- a/icu4c/source/common/unicode/ucnv_bld.h +++ b/icu4c/source/common/unicode/ucnv_bld.h @@ -153,8 +153,9 @@ typedef struct { int8_t subCharLen; uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; - - uint8_t reserved[3]; /* to round out the structure */ + uint8_t hasToUnicodeFallback; /* bool_t needs to be changed to UBool to be consistent across platform */ + uint8_t hasFromUnicodeFallback; + uint8_t reserved[19]; /* to round out the structure */ } UConverterStaticData; @@ -173,8 +174,8 @@ typedef struct { bool_t staticDataOwned; /* T if we own the staticData */ const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */ - /*initial values of some members of the mutable part of object */ - uint32_t toUnicodeStatus; + /*initial values of some members of the mutable part of object */ + uint32_t toUnicodeStatus; } UConverterSharedData; @@ -184,6 +185,7 @@ struct UConverter { uint32_t toUnicodeStatus; /* Used to internalize stream status information */ uint32_t fromUnicodeStatus; int32_t mode; + bool_t useFallback; int8_t subCharLen; /* length of the codepage specific character sequence */ int8_t invalidCharLength; diff --git a/icu4c/source/tools/makeconv/makeconv.c b/icu4c/source/tools/makeconv/makeconv.c index 21cc2673ed3..0e3648cb509 100644 --- a/icu4c/source/tools/makeconv/makeconv.c +++ b/icu4c/source/tools/makeconv/makeconv.c @@ -10,6 +10,8 @@ * makeconv.c: * tool creating a binary (compressed) representation of the conversion mapping * table (IBM NLTC ucmap format). + * + * 05/04/2000 helena Added fallback mapping into the picture... */ #include @@ -92,6 +94,7 @@ static int32_t getCodepageNumberFromName(char* name); static const char NLTC_SEPARATORS[9] = { '\r', '\n', '\t', ' ', '<', '>' ,'"' , 'U', '\0' }; +static const char FALLBACK_SEPARATOR = '|'; static const char PLAIN_SEPARATORS[9] = { '\r', '\n', '\t', ' ', '<', '>' ,'"' , '\0' }; static const char CODEPOINT_SEPARATORS[8] = { '\r', '>', '\\', 'x', '\n', ' ', '\t', '\0' }; static const char UNICODE_CODEPOINT_SEPARATORS[6] = { '<', '>', 'U', ' ', '\t', '\0' }; @@ -104,9 +107,18 @@ char * removeComments (char *line) { char *pound = uprv_strchr (line, '#'); - + char *fallback = uprv_strchr(line, '|'); if (pound != NULL) - *pound = '\0'; + { + if (fallback != NULL) + { + uprv_memset(pound, ' ', fallback-pound); + } + else + { + *pound = '\0'; + } + } return line; } @@ -178,14 +190,17 @@ static const UDataInfo dataInfo={ 0, 0x63, 0x6e, 0x76, 0x74, /* dataFormat="cnvt" */ - 3, 0, 0, 0, /* formatVersion */ - 1, 4, 2, 0 /* dataVersion */ + 4, 0, 0, 0, /* formatVersion */ + 1, 5, 0, 1 /* dataVersion */ }; -void writeConverterData(UConverterSharedData *mySharedData, const char *cnvName, const char *cnvDir, UErrorCode *status) +void writeConverterData(UConverterSharedData *mySharedData, + const char *cnvName, + const char *cnvDir, + UErrorCode *status) { - UNewDataMemory *mem; + UNewDataMemory *mem = U_NULL; uint32_t sz2; if(U_FAILURE(*status)) @@ -556,19 +571,18 @@ void readHeaderFromFile(UConverterStaticData* myConverter, return; } - - UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticData* myConverter, UErrorCode* err) { char storageLine[UCNV_MAX_LINE_TEXT]; char* line = NULL; UConverterTable* myUConverterTable = NULL; UChar unicodeValue = 0xFFFF; - int32_t sbcsCodepageValue = 0; + int32_t sbcsCodepageValue = 0, fallback = 0; + bool_t seenFallback = FALSE; char codepointBytes[5]; unsigned char replacementChar = '\0'; int32_t i = 0; - CompactByteArray* myFromUnicode = NULL; + CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL; if (U_FAILURE(*err)) return NULL; @@ -581,24 +595,29 @@ UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticDat return NULL; } - uprv_memset(myUConverterTable, 0, sizeof(UConverterSBCSTable)); - + myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = FALSE; /*create a compact array with replacement chars as default chars*/ ucmp8_init(&myUConverterTable->sbcs.fromUnicode, 0); myFromUnicode = &myUConverterTable->sbcs.fromUnicode; - if (myFromUnicode == NULL) + /*create a bogus compact array */ + ucmp8_initBogus(&myUConverterTable->sbcs.fromUnicodeFallback); + myFromUnicodeFallback = &myUConverterTable->sbcs.fromUnicodeFallback; + if (myFromUnicode == NULL) { uprv_free(myUConverterTable); *err = U_MEMORY_ALLOCATION_ERROR; return NULL; } - - myUConverterTable->sbcs.toUnicode = (UChar*)malloc(sizeof(UChar)*256); - /*fills in the toUnicode array with the Unicode Replacement Char*/ - for (i=0;i<255;i++) myUConverterTable->sbcs.toUnicode[i] = unicodeValue; - + myUConverterTable->sbcs.toUnicode = (UChar*)malloc(sizeof(UChar)*256); + myUConverterTable->sbcs.toUnicodeFallback = (UChar*)malloc(sizeof(UChar)*256); + /*fills in the toUnicode array with the Unicode Replacement Char*/ + for (i=0;i<255;i++) + { + myUConverterTable->sbcs.toUnicode[i] = unicodeValue; + myUConverterTable->sbcs.toUnicodeFallback[i] = unicodeValue; + } while (T_FileStream_readLine(convFile, storageLine, UCNV_MAX_LINE_TEXT)) { /*removes comments*/ @@ -615,12 +634,52 @@ UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticDat unicodeValue = (UChar)T_CString_stringToInteger(codepointBytes, 16); line = getToken(codepointBytes, line, CODEPOINT_SEPARATORS); sbcsCodepageValue = T_CString_stringToInteger(codepointBytes, 16); - /*Store in the toUnicode array*/ - myUConverterTable->sbcs.toUnicode[sbcsCodepageValue] = unicodeValue; - /*Store in the fromUnicode compact array*/ - ucmp8_set(myFromUnicode, unicodeValue, (int8_t)sbcsCodepageValue); - } - } + /* hsys: check fallback value here... */ + line = uprv_strchr(line, FALLBACK_SEPARATOR); + uprv_memset(codepointBytes, 0, 5); + if (line != NULL) + { + uprv_memcpy(codepointBytes, line+1, 1); + } + fallback = T_CString_stringToInteger(codepointBytes, 10); + if (fallback == 0) { + /*Store in the toUnicode array*/ + myUConverterTable->sbcs.toUnicode[sbcsCodepageValue] = unicodeValue; + /*Store in the fromUnicode compact array*/ + ucmp8_set(myFromUnicode, unicodeValue, (int8_t)sbcsCodepageValue); + } else if (fallback == 1) { + /* Check if this fallback is in the toUnicode or fromUnicode table */ + if (seenFallback == FALSE) + { + myConverter->hasToUnicodeFallback = myConverter->hasFromUnicodeFallback = seenFallback = TRUE; + ucmp8_init(myFromUnicodeFallback, 0); + } + myUConverterTable->sbcs.toUnicodeFallback[sbcsCodepageValue] = unicodeValue; + ucmp8_set(myFromUnicodeFallback, unicodeValue, (int8_t)sbcsCodepageValue); + } + } + } + seenFallback = FALSE; + for (i = 0; i < 256; i++) + { + if ((myUConverterTable->sbcs.toUnicode[i] == 0xFFFF) && + (myUConverterTable->sbcs.toUnicodeFallback[i] != 0xFFFF)) + + { + seenFallback = TRUE; + break; + } + } + if (seenFallback == FALSE) + { + free(myUConverterTable->sbcs.toUnicodeFallback); + myUConverterTable->sbcs.toUnicodeFallback = NULL; + myConverter->hasToUnicodeFallback = FALSE; + } + else if (myConverter->hasFromUnicodeFallback == TRUE) + { + ucmp8_compact(myFromUnicodeFallback, 1); + } ucmp8_compact(myFromUnicode, 1); /*Initially sets the referenceCounter to 1*/ @@ -635,10 +694,11 @@ UConverterTable *loadMBCSTableFromFile(FileStream* convFile, UConverterStaticDat UChar unicodeValue = 0xFFFF; int32_t mbcsCodepageValue = '\0'; char codepointBytes[6]; - int32_t replacementChar = 0x0000; + int32_t replacementChar = 0x0000, fallback = 0; + bool_t seenFallback = FALSE; uint16_t i = 0; - CompactShortArray* myFromUnicode = NULL; - CompactShortArray* myToUnicode = NULL; + CompactShortArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL; + CompactShortArray *myToUnicode = NULL, *myToUnicodeFallback = NULL; /*Evaluates the replacement codepoint*/ replacementChar = 0xFFFF; @@ -669,9 +729,13 @@ UConverterTable *loadMBCSTableFromFile(FileStream* convFile, UConverterStaticDat myFromUnicode = &myUConverterTable->mbcs.fromUnicode; ucmp16_init(myFromUnicode, (uint16_t)replacementChar); + myFromUnicodeFallback = &myUConverterTable->mbcs.fromUnicodeFallback; + ucmp16_initBogus(myFromUnicodeFallback); myToUnicode = &myUConverterTable->mbcs.toUnicode; ucmp16_init(myToUnicode, (int16_t)0xFFFD); + myToUnicodeFallback = &myUConverterTable->mbcs.toUnicodeFallback; + ucmp16_initBogus(myToUnicodeFallback); while (T_FileStream_readLine(convFile, storageLine, UCNV_MAX_LINE_TEXT)) { @@ -691,12 +755,55 @@ UConverterTable *loadMBCSTableFromFile(FileStream* convFile, UConverterStaticDat } mbcsCodepageValue = T_CString_stringToInteger(codepointBytes, 16); - - ucmp16_set(myToUnicode, (int16_t)mbcsCodepageValue, unicodeValue); - ucmp16_set(myFromUnicode, unicodeValue, (int16_t)mbcsCodepageValue); + line = uprv_strchr(line, FALLBACK_SEPARATOR); + uprv_memset(codepointBytes, 0, 5); + if (line != NULL) + { + uprv_memcpy(codepointBytes, line+1, 1); + } + fallback = T_CString_stringToInteger(codepointBytes, 10); + if (fallback == 0) + { + ucmp16_set(myToUnicode, (int16_t)mbcsCodepageValue, unicodeValue); + ucmp16_set(myFromUnicode, unicodeValue, (int16_t)mbcsCodepageValue); + } + else if (fallback == 1) + { + /* Check if this fallback is in the toUnicode or fromUnicode table */ + if (seenFallback == FALSE) + { + myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE; + ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar); + ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD); + } + ucmp16_set(myToUnicodeFallback, (int16_t)mbcsCodepageValue, unicodeValue); + ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)mbcsCodepageValue); + } } } - + seenFallback = FALSE; + if (myConverter->hasToUnicodeFallback == TRUE) + { + for (i = 0; i < ucmp16_getkUnicodeCount(); i++) + { + if ((ucmp16_get(myToUnicode, i) == 0xFFFD) && + (ucmp16_get(myToUnicodeFallback, i) != 0xFFFD)) + { + seenFallback = TRUE; + break; + } + } + if (seenFallback == FALSE) + { + ucmp16_close(myToUnicodeFallback); + myConverter->hasToUnicodeFallback = FALSE; + } + else if (myConverter->hasFromUnicodeFallback == TRUE) + { + ucmp16_compact(myFromUnicodeFallback); + ucmp16_compact(myToUnicodeFallback); + } + } ucmp16_compact(myFromUnicode); ucmp16_compact(myToUnicode); @@ -717,10 +824,13 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert UChar unicodeValue = 0xFFFF; int32_t mbcsCodepageValue = '\0'; char codepointBytes[6]; - int32_t replacementChar = 0x0000; + int32_t replacementChar = 0x0000, fallback = 0; uint8_t i = 0; + bool_t seenFallback = FALSE; CompactShortArray* myFromUnicode = NULL; CompactShortArray* myToUnicode = NULL; + CompactShortArray* myFromUnicodeFallback = NULL; + CompactShortArray* myToUnicodeFallback = NULL; /*Evaluates the replacement codepoint*/ replacementChar = 0xFFFF; @@ -736,10 +846,14 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert myFromUnicode = &myUConverterTable->dbcs.fromUnicode; ucmp16_init(myFromUnicode, (uint16_t)replacementChar); - myToUnicode = &myUConverterTable->dbcs.toUnicode; ucmp16_init(myToUnicode, (int16_t)0xFFFD); + myFromUnicodeFallback = &myUConverterTable->dbcs.fromUnicodeFallback; + ucmp16_initBogus(myFromUnicodeFallback); + myToUnicodeFallback = &myUConverterTable->dbcs.toUnicodeFallback; + ucmp16_initBogus(myToUnicodeFallback); + while (T_FileStream_readLine(convFile, storageLine, UCNV_MAX_LINE_TEXT)) { removeComments(storageLine); @@ -758,10 +872,55 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert mbcsCodepageValue = T_CString_stringToInteger(codepointBytes, 16); - ucmp16_set(myToUnicode, (int16_t)mbcsCodepageValue, unicodeValue); - ucmp16_set(myFromUnicode, unicodeValue, (int16_t)mbcsCodepageValue); + line = uprv_strchr(line, FALLBACK_SEPARATOR); + uprv_memset(codepointBytes, 0, 6); + if (line != NULL) + { + uprv_memcpy(codepointBytes, line+1, 1); + } + fallback = T_CString_stringToInteger(codepointBytes, 10); + if (fallback == 0) + { + ucmp16_set(myToUnicode, (int16_t)mbcsCodepageValue, unicodeValue); + ucmp16_set(myFromUnicode, unicodeValue, (int16_t)mbcsCodepageValue); + } + else if (fallback == 1) + { + /* Check if this fallback is in the toUnicode or fromUnicode table */ + if (seenFallback == FALSE) + { + myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE; + ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar); + ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD); + } + ucmp16_set(myToUnicodeFallback, (int16_t)mbcsCodepageValue, unicodeValue); + ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)mbcsCodepageValue); + } + } + } + seenFallback = FALSE; + if (myConverter->hasToUnicodeFallback == TRUE) + { + for (i = 0; i < ucmp16_getkUnicodeCount(); i++) + { + if ((ucmp16_get(myToUnicode, i) == 0xFFFD) && + (ucmp16_get(myToUnicodeFallback, i) != 0xFFFD)) + { + seenFallback = TRUE; + break; } - } + } + if (seenFallback == FALSE) + { + ucmp16_close(myToUnicodeFallback); + myConverter->hasToUnicodeFallback = FALSE; + } + else if (myConverter->hasFromUnicodeFallback == TRUE) + { + ucmp16_compact(myFromUnicodeFallback); + ucmp16_compact(myToUnicodeFallback); + } + } ucmp16_compact(myFromUnicode); ucmp16_compact(myToUnicode); @@ -778,10 +937,13 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa UChar unicodeValue = 0xFFFD; int32_t dbcsCodepageValue = '\0'; char codepointBytes[6]; - int32_t replacementChar = 0x0000; + int32_t replacementChar = 0x0000, fallback = 0; uint8_t i = 0; + bool_t seenFallback = FALSE; CompactShortArray* myFromUnicode = NULL; CompactShortArray* myToUnicode = NULL; + CompactShortArray* myFromUnicodeFallback = NULL; + CompactShortArray* myToUnicodeFallback = NULL; /*Evaluates the replacement codepoint*/ replacementChar = 0xFFFF; @@ -797,10 +959,14 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa myFromUnicode = &(myUConverterTable->dbcs.fromUnicode); ucmp16_init(myFromUnicode, (int16_t)replacementChar); - myToUnicode = &(myUConverterTable->dbcs.toUnicode); ucmp16_init(myToUnicode, (int16_t)0xFFFD); + myFromUnicodeFallback = &(myUConverterTable->dbcs.fromUnicodeFallback); + ucmp16_initBogus(myFromUnicodeFallback); + myToUnicodeFallback = &(myUConverterTable->dbcs.toUnicodeFallback); + ucmp16_initBogus(myToUnicodeFallback); + while (T_FileStream_readLine(convFile, storageLine, UCNV_MAX_LINE_TEXT)) { removeComments(storageLine); @@ -819,9 +985,54 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa } dbcsCodepageValue = T_CString_stringToInteger(codepointBytes, 16); - ucmp16_set(myToUnicode, (int16_t)dbcsCodepageValue, unicodeValue); - ucmp16_set(myFromUnicode, unicodeValue, (int16_t)dbcsCodepageValue); - } + line = uprv_strchr(line, FALLBACK_SEPARATOR); + uprv_memset(codepointBytes, 0, 6); + if (line != NULL) + { + uprv_memcpy(codepointBytes, line+1, 1); + } + fallback = T_CString_stringToInteger(codepointBytes, 10); + if (fallback == 0) + { + ucmp16_set(myToUnicode, (int16_t)dbcsCodepageValue, unicodeValue); + ucmp16_set(myFromUnicode, unicodeValue, (int16_t)dbcsCodepageValue); + } + else if (fallback == 1) + { + /* Check if this fallback is in the toUnicode or fromUnicode table */ + if (seenFallback == FALSE) + { + myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE; + ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar); + ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD); + } + ucmp16_set(myToUnicodeFallback, (int16_t)dbcsCodepageValue, unicodeValue); + ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)dbcsCodepageValue); + } + } + seenFallback = FALSE; + if (myConverter->hasToUnicodeFallback == TRUE) + { + for (i = 0; i < ucmp16_getkUnicodeCount(); i++) + { + if ((ucmp16_get(myToUnicode, i) == 0xFFFD) && + (ucmp16_get(myToUnicodeFallback, i) != 0xFFFD)) + { + seenFallback = TRUE; + break; + } + } + if (seenFallback == FALSE) + { + ucmp16_close(myToUnicodeFallback); + myConverter->hasToUnicodeFallback = FALSE; + } + else if (myConverter->hasFromUnicodeFallback == TRUE) + { + ucmp16_compact(myFromUnicodeFallback); + ucmp16_compact(myToUnicodeFallback); + } + } ucmp16_compact(myFromUnicode); ucmp16_compact(myToUnicode); @@ -835,6 +1046,8 @@ bool_t makeconv_deleteSharedConverterData(UConverterSharedData* deadSharedData) if (deadSharedData->staticData->conversionType == UCNV_SBCS) { ucmp8_close(&(deadSharedData->table->sbcs.fromUnicode)); + if (deadSharedData->staticData->hasFromUnicodeFallback == TRUE) + ucmp8_close(&(deadSharedData->table->sbcs.fromUnicodeFallback)); uprv_free(deadSharedData->table); uprv_free(deadSharedData); } @@ -842,6 +1055,10 @@ bool_t makeconv_deleteSharedConverterData(UConverterSharedData* deadSharedData) { ucmp16_close(&(deadSharedData->table->mbcs.fromUnicode)); ucmp16_close(&(deadSharedData->table->mbcs.toUnicode)); + if (deadSharedData->staticData->hasFromUnicodeFallback == TRUE) + ucmp16_close(&(deadSharedData->table->mbcs.fromUnicodeFallback)); + if (deadSharedData->staticData->hasToUnicodeFallback == TRUE) + ucmp16_close(&(deadSharedData->table->mbcs.toUnicodeFallback)); uprv_free(deadSharedData->table); uprv_free((UConverterStaticData*)deadSharedData->staticData); uprv_free(deadSharedData); @@ -850,6 +1067,10 @@ bool_t makeconv_deleteSharedConverterData(UConverterSharedData* deadSharedData) { ucmp16_close(&(deadSharedData->table->dbcs.fromUnicode)); ucmp16_close(&(deadSharedData->table->dbcs.toUnicode)); + if (deadSharedData->staticData->hasFromUnicodeFallback == TRUE) + ucmp16_close(&(deadSharedData->table->dbcs.fromUnicodeFallback)); + if (deadSharedData->staticData->hasToUnicodeFallback == TRUE) + ucmp16_close(&(deadSharedData->table->dbcs.toUnicodeFallback)); uprv_free(deadSharedData->table); uprv_free((UConverterStaticData*)deadSharedData->staticData); uprv_free(deadSharedData); @@ -890,7 +1111,7 @@ UConverterSharedData* createConverterFromTableFile(const char* converterName, UE T_FileStream_close(convFile); return NULL; } - + uprv_memset(mySharedData, 0, sizeof(UConverterSharedData)); mySharedData->structSize = sizeof(UConverterSharedData); @@ -965,7 +1186,25 @@ static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterShar udata_writeBlock(pData, (void*)data->table->sbcs.toUnicode, sizeof(uint16_t)*256); size += sizeof(uint16_t)*256; size += udata_write_ucmp8(pData, &data->table->sbcs.fromUnicode); - /* don't care about alignment anymore */ + if (data->staticData->hasFromUnicodeFallback == TRUE) + { + if (size%4) + { + udata_writePadding(pData, 4-(size%4)); + size+= 4-(size%4); + } + size += udata_write_ucmp8(pData, &data->table->sbcs.fromUnicodeFallback); + } + if (data->staticData->hasToUnicodeFallback == TRUE) + { + if (size%4) + { + udata_writePadding(pData, 4-(size%4)); + size+= 4-(size%4); + } + udata_writeBlock(pData, (void*)data->table->sbcs.toUnicodeFallback, sizeof(uint16_t)*256); + /* don't care about alignment anymore */ + } } break; @@ -979,6 +1218,25 @@ static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterShar size+= 4-(size%4); } size += udata_write_ucmp16(pData,&data->table->dbcs.fromUnicode); + if (data->staticData->hasFromUnicodeFallback == TRUE) + { + if(size%4) + { + udata_writePadding(pData, 4-(size%4) ); + size+= 4-(size%4); + } + size += udata_write_ucmp16(pData,&data->table->dbcs.fromUnicodeFallback); + } + if (data->staticData->hasToUnicodeFallback == TRUE) + { + + if(size%4) + { + udata_writePadding(pData, 4-(size%4) ); + size+= 4-(size%4); + } + size += udata_write_ucmp16(pData,&data->table->dbcs.toUnicodeFallback); + } } break; @@ -993,6 +1251,24 @@ static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterShar size+= 4-(size%4); } size += udata_write_ucmp16(pData,&data->table->mbcs.fromUnicode); + if (data->staticData->hasFromUnicodeFallback == TRUE) + { + if(size%4) + { + udata_writePadding(pData, 4-(size%4) ); + size+= 4-(size%4); + } + size += udata_write_ucmp16(pData,&data->table->mbcs.fromUnicodeFallback); + } + if (data->staticData->hasToUnicodeFallback == TRUE) + { + if(size%4) + { + udata_writePadding(pData, 4-(size%4) ); + size+= 4-(size%4); + } + size += udata_write_ucmp16(pData,&data->table->mbcs.toUnicodeFallback); + } } break; diff --git a/icu4c/source/tools/makeconv/ucnvstat.c b/icu4c/source/tools/makeconv/ucnvstat.c index 4237a9b577c..a1850eeca8e 100644 --- a/icu4c/source/tools/makeconv/ucnvstat.c +++ b/icu4c/source/tools/makeconv/ucnvstat.c @@ -19,32 +19,32 @@ static const UConverterStaticData _SBCSStaticData={ sizeof(UConverterStaticData), "SBCS", 0, UCNV_IBM, UCNV_SBCS, 1, 1, - 1, { 0, 0, 0, 0 }, - { 0,0,0 } /* reserved bytes */ + 1, { 0, 0, 0, 0 },FALSE, FALSE, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved bytes */ }; static const UConverterStaticData _DBCSStaticData={ sizeof(UConverterStaticData), "DBCS", 0, UCNV_IBM, UCNV_DBCS, 2, 2, - 1, { 0, 0, 0, 0 }, /* subchar */ - { 0,0,0 } /* reserved bytes */ + 1, { 0, 0, 0, 0 }, FALSE, FALSE, /* subchar */ + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved bytes */ }; static const UConverterStaticData _MBCSStaticData={ sizeof(UConverterStaticData), "MBCS", 0, UCNV_IBM, UCNV_MBCS, 1, 1, - 1, { 0, 0, 0, 0 }, - { 0,0,0 } /* reserved bytes */ + 1, { 0, 0, 0, 0 }, FALSE, FALSE, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved bytes */ }; static const UConverterStaticData _EBCDICStatefulStaticData={ sizeof(UConverterStaticData), "EBCDICStateful", 0, UCNV_IBM, UCNV_EBCDIC_STATEFUL, 1, 1, - 1, { 0, 0, 0, 0 }, - { 0,0,0 } /* reserved bytes */ + 1, { 0, 0, 0, 0 }, FALSE, FALSE, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved bytes */ }; /* NULLs for algorithmic types, their tables live in ucnv_bld.c */