From 7f7b2d90f3e0bd2e299ed38ae104d70a35c51850 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Sat, 5 Feb 2000 00:19:15 +0000 Subject: [PATCH] ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 721 --- icu4c/source/common/Makefile.in | 3 +- icu4c/source/common/common.dsp | 16 + icu4c/source/common/convert.cpp | 12 +- icu4c/source/common/ucnv.c | 129 +- icu4c/source/common/ucnv_2022.c | 1238 +++++++++ icu4c/source/common/ucnv_bld.c | 479 +--- icu4c/source/common/ucnv_cnv.c | 3325 +----------------------- icu4c/source/common/ucnv_cnv.h | 412 +-- icu4c/source/common/ucnv_mbcs.c | 552 ++++ icu4c/source/common/ucnv_sbcs.c | 486 ++++ icu4c/source/common/ucnv_utf.c | 1184 +++++++++ icu4c/source/common/unicode/ucnv_bld.h | 194 +- icu4c/source/tools/makeconv/makeconv.c | 143 +- 13 files changed, 3999 insertions(+), 4174 deletions(-) create mode 100644 icu4c/source/common/ucnv_2022.c create mode 100644 icu4c/source/common/ucnv_mbcs.c create mode 100644 icu4c/source/common/ucnv_sbcs.c create mode 100644 icu4c/source/common/ucnv_utf.c diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index e160b402c4c..64c7d2e79dc 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -74,7 +74,8 @@ mutex.o normlzr.o putil.o rbcache.o resbund.o schriter.o scsu.o \ uchar.o uchriter.o ucmp8.o ucmp16.o ucmp32.o ucnv.o ucnv_bld.o \ ucnv_cnv.o ucnv_err.o ucnv_io.o uhash.o uloc.o unicode.o unistr.o \ ures.o ustring.o rbread.o rbdata.o ubidi.o ubidiln.o \ -bidi.o uvector.o udata.o unames.o utf_impl.o +bidi.o uvector.o udata.o unames.o utf_impl.o \ +ucnv_2022.o ucnv_utf.o ucnv_sbcs.o ucnv_mbcs.o DEPS = $(OBJECTS:.o=.d) diff --git a/icu4c/source/common/common.dsp b/icu4c/source/common/common.dsp index e252684d04b..85ad7682938 100644 --- a/icu4c/source/common/common.dsp +++ b/icu4c/source/common/common.dsp @@ -211,6 +211,10 @@ SOURCE=.\ucnv.c # End Source File # Begin Source File +SOURCE=.\ucnv_2022.c +# End Source File +# Begin Source File + SOURCE=.\ucnv_bld.c # End Source File # Begin Source File @@ -227,6 +231,18 @@ SOURCE=.\ucnv_io.c # End Source File # Begin Source File +SOURCE=.\ucnv_mbcs.c +# End Source File +# Begin Source File + +SOURCE=.\ucnv_sbcs.c +# End Source File +# Begin Source File + +SOURCE=.\ucnv_utf.c +# End Source File +# Begin Source File + SOURCE=.\udata.c # ADD CPP /Ze # End Source File diff --git a/icu4c/source/common/convert.cpp b/icu4c/source/common/convert.cpp index 7e30dd8a793..206f6008704 100644 --- a/icu4c/source/common/convert.cpp +++ b/icu4c/source/common/convert.cpp @@ -61,8 +61,12 @@ UnicodeConverterCPP& UnicodeConverterCPP::operator=(const UnicodeConverterCPP& *Increments the assigner converter's ref count */ Mutex updateReferenceCounters; - myUnicodeConverter->sharedData->referenceCounter--; - that.myUnicodeConverter->sharedData->referenceCounter++; + if (myUnicodeConverter->sharedData->referenceCounter != 0 && myUnicodeConverter->sharedData->referenceCounter != ~0) { + myUnicodeConverter->sharedData->referenceCounter--; + } + if (that.myUnicodeConverter->sharedData->referenceCounter != ~0) { + that.myUnicodeConverter->sharedData->referenceCounter++; + } } *myUnicodeConverter = *(that.myUnicodeConverter); @@ -98,7 +102,9 @@ UnicodeConverterCPP::UnicodeConverterCPP(const UnicodeConverterCPP& that) myUnicodeConverter = new UConverter; { Mutex updateReferenceCounter; - that.myUnicodeConverter->sharedData->referenceCounter++; + if (that.myUnicodeConverter->sharedData->referenceCounter != ~0) { + that.myUnicodeConverter->sharedData->referenceCounter++; + } } *myUnicodeConverter = *(that.myUnicodeConverter); } diff --git a/icu4c/source/common/ucnv.c b/icu4c/source/common/ucnv.c index 324b5bf418b..f81a96fcbd9 100644 --- a/icu4c/source/common/ucnv.c +++ b/icu4c/source/common/ucnv.c @@ -38,112 +38,6 @@ static int32_t ucnv_getAmbiguousCCSID (const UConverter* cnv); /* Internal function : end */ -typedef void (*T_ToUnicodeFunction) (UConverter *, - UChar **, - const UChar *, - const char **, - const char *, - int32_t* offsets, - bool_t, - UErrorCode *); - -typedef void (*T_FromUnicodeFunction) (UConverter *, - char **, - const char *, - const UChar **, - const UChar *, - int32_t* offsets, - bool_t, - UErrorCode *); - -typedef UChar (*T_GetNextUCharFunction) (UConverter *, - const char **, - const char *, - UErrorCode *); - -static T_ToUnicodeFunction TO_UNICODE_FUNCTIONS[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] = - -{ - T_UConverter_toUnicode_SBCS, - T_UConverter_toUnicode_DBCS, - T_UConverter_toUnicode_MBCS, - T_UConverter_toUnicode_LATIN_1, - T_UConverter_toUnicode_UTF8, - T_UConverter_toUnicode_UTF16_BE, - T_UConverter_toUnicode_UTF16_LE, - T_UConverter_toUnicode_EBCDIC_STATEFUL, - T_UConverter_toUnicode_ISO_2022 -}; - -static T_ToUnicodeFunction TO_UNICODE_FUNCTIONS_OFFSETS_LOGIC[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] = - -{ - NULL, /*UCNV_SBCS*/ - NULL, /*UCNV_DBCS*/ - T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC, - NULL, /*UCNV_LATIN_1*/ - T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC, - NULL, /*UTF16_BE*/ - NULL, /*UTF16_LE*/ - T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC, - T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC -}; - -static T_FromUnicodeFunction FROM_UNICODE_FUNCTIONS_OFFSETS_LOGIC[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] = - -{ - NULL, /*UCNV_SBCS*/ - NULL, /*UCNV_DBCS*/ - T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC, - NULL, /*UCNV_LATIN_1*/ - T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC, - NULL, /*UTF16_BE*/ - NULL, /*UTF16_LE*/ - T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC, - T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC -}; - -static T_FromUnicodeFunction FROM_UNICODE_FUNCTIONS[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] = -{ - T_UConverter_fromUnicode_SBCS, - T_UConverter_fromUnicode_DBCS, - T_UConverter_fromUnicode_MBCS, - T_UConverter_fromUnicode_LATIN_1, - T_UConverter_fromUnicode_UTF8, - T_UConverter_fromUnicode_UTF16_BE, - T_UConverter_fromUnicode_UTF16_LE, - T_UConverter_fromUnicode_EBCDIC_STATEFUL, - T_UConverter_fromUnicode_ISO_2022 -}; - -static T_GetNextUCharFunction GET_NEXT_UChar_FUNCTIONS[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES] = -{ - T_UConverter_getNextUChar_SBCS, - T_UConverter_getNextUChar_DBCS, - T_UConverter_getNextUChar_MBCS, - T_UConverter_getNextUChar_LATIN_1, - T_UConverter_getNextUChar_UTF8, - T_UConverter_getNextUChar_UTF16_BE, - T_UConverter_getNextUChar_UTF16_LE, - T_UConverter_getNextUChar_EBCDIC_STATEFUL, - T_UConverter_getNextUChar_ISO_2022 -}; - -void flushInternalUnicodeBuffer (UConverter * _this, - UChar * myTarget, - int32_t * myTargetIndex, - int32_t targetLength, - int32_t** offsets, - UErrorCode * err); - -void flushInternalCharBuffer (UConverter * _this, - char *myTarget, - int32_t * myTargetIndex, - int32_t targetLength, - int32_t** offsets, - UErrorCode * err); - - static void T_UConverter_fromCodepageToCodepage (UConverter * outConverter, UConverter * inConverter, char **target, @@ -219,6 +113,7 @@ void ucnv_close (UConverter * converter) { if (converter == NULL) return; + /* ### this cleanup would be cleaner in a function in UConverterImpl */ if ((converter->sharedData->conversionType == UCNV_ISO_2022) && (converter->mode == UCNV_SO)) { @@ -226,9 +121,13 @@ void ucnv_close (UConverter * converter) uprv_free (converter->extraInfo); } - umtx_lock (NULL); - converter->sharedData->referenceCounter--; - umtx_unlock (NULL); + if (converter->sharedData->referenceCounter != ~0) { + umtx_lock (NULL); + if (converter->sharedData->referenceCounter != 0) { + converter->sharedData->referenceCounter--; + } + umtx_unlock (NULL); + } uprv_free (converter); return; @@ -597,8 +496,7 @@ void ucnv_fromUnicode (UConverter * _this, } default: { - - FROM_UNICODE_FUNCTIONS_OFFSETS_LOGIC[(int) myConvType] (_this, + _this->sharedData->impl->fromUnicodeWithOffsets(_this, target, targetLimit, source, @@ -611,7 +509,7 @@ void ucnv_fromUnicode (UConverter * _this, }; } /*calls the specific conversion routines */ - FROM_UNICODE_FUNCTIONS[(int)myConvType] (_this, + _this->sharedData->impl->fromUnicode(_this, target, targetLimit, source, @@ -687,8 +585,7 @@ void ucnv_toUnicode (UConverter * _this, } default: { - - TO_UNICODE_FUNCTIONS_OFFSETS_LOGIC[(int) myConvType] (_this, + _this->sharedData->impl->toUnicodeWithOffsets(_this, target, targetLimit, source, @@ -701,7 +598,7 @@ void ucnv_toUnicode (UConverter * _this, }; } /*calls the specific conversion routines */ - TO_UNICODE_FUNCTIONS[(int) myConvType] (_this, + _this->sharedData->impl->toUnicode(_this, target, targetLimit, source, @@ -959,7 +856,7 @@ UChar ucnv_getNextUChar (UConverter * converter, } /*calls the specific conversion routines */ /*as dictated in a code review, avoids a switch statement */ - return GET_NEXT_UChar_FUNCTIONS[(int) (converter->sharedData->conversionType)] (converter, + return converter->sharedData->impl->getNextUChar(converter, source, sourceLimit, err); diff --git a/icu4c/source/common/ucnv_2022.c b/icu4c/source/common/ucnv_2022.c new file mode 100644 index 00000000000..6bf1ebc4a2b --- /dev/null +++ b/icu4c/source/common/ucnv_2022.c @@ -0,0 +1,1238 @@ +/* +********************************************************************** +* Copyright (C) 2000, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_2022.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000feb03 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "cmemory.h" +#include "ucmp16.h" +#include "ucmp8.h" +#include "unicode/ucnv_bld.h" +#include "unicode/ucnv.h" +#include "ucnv_cnv.h" + +/* ISO-2022 ----------------------------------------------------------------- */ + +void T_UConverter_fromUnicode_UTF8 (UConverter * converter, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t* offsets, + bool_t flush, + UErrorCode * err); + +void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * converter, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t* offsets, + bool_t flush, + UErrorCode * err); + +#define ESC_2022 0x1B /*ESC*/ +typedef enum +{ + INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/ + VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/ + VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/ + VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/ +} UCNV_TableStates_2022; + +/*Below are the 3 arrays depicting a state transition table*/ +int8_t normalize_esq_chars_2022[256] = { + 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,0 ,0 + ,2 ,0 ,0 ,0 ,0 ,3 ,0 ,6 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12 + ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,0 ,0 + ,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0}; +#define MAX_STATES_2022 54 +int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = { + 1 ,34 ,36 ,39 ,1093 ,1096 ,1097 ,1098 ,1099 ,1100 + ,1101 ,1102 ,1103 ,1104 ,1105 ,1106 ,1109 ,1154 ,1157 ,1160 + ,1161 ,1254 ,1257 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940 + ,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,40133 ,40136 ,40138 + ,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630 + ,35947631 ,35947635 ,35947636 ,35947638}; + +const char* escSeqStateTable_Result_2022[MAX_STATES_2022] = { + NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1" ,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" + ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-895" ,"ibm-943" ,"latin1" ,"latin1" ,NULL ,"ibm-955" ,"ibm-367" + ,"ibm-952" ,NULL ,"UTF8" ,NULL ,"ibm-955" ,"bm-367" ,"ibm-952" ,"ibm-949" ,"ibm-953" ,"ibm-1383" + ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" + ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089" + ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"}; + +UCNV_TableStates_2022 escSeqStateTable_Value_2022[MAX_STATES_2022] = { + VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022}; + +/*for 2022 looks ahead in the stream + *to determine the longest possible convertible + *data stream*/ +static const char* getEndOfBuffer_2022(const char* source, + const char* sourceLimit, + bool_t flush); +/*runs through a state machine to determine the escape sequence - codepage correspondance + *changes the pointer pointed to be _this->extraInfo*/ +static void changeState_2022(UConverter* _this, + const char** source, + const char* sourceLimit, + bool_t flush, + UErrorCode* err); + +UCNV_TableStates_2022 getKey_2022(char source, + int32_t* key, + int32_t* offset); + +void T_UConverter_fromUnicode_ISO_2022(UConverter* _this, + char** target, + const char* targetLimit, + const UChar** source, + const UChar* sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode* err) +{ + char const* targetStart = *target; + T_UConverter_fromUnicode_UTF8(_this, + target, + targetLimit, + source, + sourceLimit, + NULL, + flush, + err); +} + + +void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverter* _this, + char** target, + const char* targetLimit, + const UChar** source, + const UChar* sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode* err) +{ + + char const* targetStart = *target; + T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC(_this, + target, + targetLimit, + source, + sourceLimit, + offsets, + flush, + err); + { + int32_t len = *target - targetStart; + int32_t i; + /* uprv_memmove(offsets+3, offsets, len); MEMMOVE SEEMS BROKEN --srl */ + + for(i=len-1;i>=0;i--) offsets[i] = offsets[i]; + + } +} + +UCNV_TableStates_2022 getKey_2022(char c, + int32_t* key, + int32_t* offset) +{ + int32_t togo = *key; + int32_t low = 0; + int32_t hi = MAX_STATES_2022; + int32_t oldmid; + + if (*key == 0) togo = normalize_esq_chars_2022[c]; + else + { + togo <<= 5; + togo += normalize_esq_chars_2022[c]; + } + + while (hi != low) /*binary search*/ + { + register int32_t mid = (hi+low) >> 1; /*Finds median*/ + + if (mid == oldmid) break; + if (escSeqStateTable_Key_2022[mid] > togo) hi = mid; + else if (escSeqStateTable_Key_2022[mid] < togo) low = mid; + else /*we found it*/ + { + *key = togo; + *offset = mid; +#ifdef Debug + printf("found at @ %d\n", mid); +#endif /*Debug*/ + return escSeqStateTable_Value_2022[mid]; + } + oldmid = mid; + + } + +#ifdef Debug + printf("Could not find \"%d\" for %X\n", togo, c); +#endif /*Debug*/ + *key = 0; + *offset = 0; + + + return INVALID_2022; +} + +void changeState_2022(UConverter* _this, + const char** source, + const char* sourceLimit, + bool_t flush, + UErrorCode* err) +{ + UConverter* myUConverter; + uint32_t key = _this->toUnicodeStatus; + UCNV_TableStates_2022 value; + UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo); + const char* chosenConverterName = NULL; + int32_t offset; + + /*Close the old Converter*/ + if (_this->mode == UCNV_SO) ucnv_close(myData2022->currentConverter); + myData2022->currentConverter = NULL; + _this->mode = UCNV_SI; + + /*In case we were in the process of consuming an escape sequence + we need to reprocess it */ + + do + { +#ifdef Debug + printf("Pre Stage: char = %x, key = %d, value =%d\n", **source, key, value); + fflush(stdout); +#endif /*Debug*/ +/* Needed explicit cast for key on MVS to make compiler happy - JJD */ + value = getKey_2022(**source,(int32_t *) &key, &offset); +#ifdef Debug + printf("Post Stage: char = %x, key = %d, value =%d\n", **source, key, value); + fflush(stdout); +#endif /*Debug*/ + switch (value) + { + case VALID_NON_TERMINAL_2022 : + { +#ifdef Debug + puts("VALID_NON_TERMINAL_2022"); +#endif /*Debug*/ + };break; + + case VALID_TERMINAL_2022: + { +#ifdef Debug + puts("VALID_TERMINAL_2022"); +#endif /*Debug*/ + chosenConverterName = escSeqStateTable_Result_2022[offset]; + key = 0; + goto DONE; + };break; + + case INVALID_2022: + { +#ifdef Debug + puts("INVALID_2022"); +#endif /*Debug*/ + _this->toUnicodeStatus = 0; + *err = U_ILLEGAL_CHAR_FOUND; + return; + } + + case VALID_MAYBE_TERMINAL_2022: + { + const char* mySource = (*source + 1); + int32_t myKey = key; + UCNV_TableStates_2022 myValue = value; + int32_t myOffset; +#ifdef Debug + puts("VALID_MAYBE_TERMINAL_2022"); +#endif /*Debug*/ + + while ((mySource < sourceLimit) && + ((myValue == VALID_MAYBE_TERMINAL_2022)||(myValue == VALID_NON_TERMINAL_2022))) + { +#ifdef Debug + printf("MAYBE value = %d myKey = %d %X\n", myValue, myKey, *mySource); +#endif /*Debug*/ + myValue = getKey_2022(*(mySource++), &myKey, &myOffset); + } +#ifdef Debug + printf("myValue = %d\n", myValue); +#endif /*Debug*/ + switch (myValue) + { + case INVALID_2022: + { + /*Backs off*/ +#ifdef Debug + puts("VALID_MAYBE_TERMINAL INVALID"); + printf("offset = %d\n", offset); +#endif /*Debug*/ + chosenConverterName = escSeqStateTable_Result_2022[offset]; + value = VALID_TERMINAL_2022; +#ifdef Debug + printf("%d\n", offset); + fflush(stdout); +#endif /*Debug*/ + goto DONE; + };break; + + case VALID_TERMINAL_2022: + { + /*uses longer escape sequence*/ +#ifdef Debug + puts("VALID_MAYBE_TERMINAL TERMINAL"); +#endif /*Debug*/ + *source = mySource-1; /*deals with the overshot in the while above*/ + chosenConverterName = escSeqStateTable_Result_2022[myOffset]; + key = 0; + value = VALID_TERMINAL_2022; + goto DONE; + };break; + + case VALID_NON_TERMINAL_2022: +#ifdef Debug + puts("VALID_MAYBE_TERMINAL NON_TERMINAL"); +#endif /*Debug*/ + case VALID_MAYBE_TERMINAL_2022: + { +#ifdef Debug + puts("VALID_MAYBE_TERMINAL MAYBE_TERMINAL"); +#endif /*Debug*/ + if (flush) + { + /*Backs off*/ + chosenConverterName = escSeqStateTable_Result_2022[offset]; + value = VALID_TERMINAL_2022; + key = 0; + goto DONE; + } + else + { + key = myKey; + value = VALID_NON_TERMINAL_2022; + } + };break; + };break; + };break; + } + } while ((*source)++ <= sourceLimit); + + DONE: + _this->toUnicodeStatus = key; + + if ((value == VALID_NON_TERMINAL_2022) || (value == VALID_MAYBE_TERMINAL_2022)) + { +#ifdef Debug + printf("Out: current **source = %X", **source); +#endif + + return; + } + if (value > 0) myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err); + { +#ifdef Debug + printf("Error = %d open \"%s\"\n", *err, chosenConverterName); +#endif /*Debug*/ + if (U_SUCCESS(*err)) + { + /*Customize the converter with the attributes set on the 2022 converter*/ + myUConverter->fromUCharErrorBehaviour = _this->fromUCharErrorBehaviour; + myUConverter->fromCharErrorBehaviour = _this->fromCharErrorBehaviour; + uprv_memcpy(myUConverter->subChar, + _this->subChar, + myUConverter->subCharLen = _this->subCharLen); + + _this->mode = UCNV_SO; + } + } + + + return; +} + +/*Checks the first 3 characters of the buffer against valid 2022 escape sequences + *if the match we return a pointer to the initial start of the sequence otherwise + *we return sourceLimit + */ +const char* getEndOfBuffer_2022(const char* source, + const char* sourceLimit, + bool_t flush) +{ + const char* mySource = source; + + if (source >= sourceLimit) return sourceLimit; + + do + { + if (*mySource == ESC_2022) + { + int8_t i; + int32_t key = 0; + int32_t offset; + UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022; + + for (i=0; + (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022); + i++) + { + value = getKey_2022(*(mySource+i), &key, &offset); +#ifdef Debug + printf("Look ahead value = %d\n", value); +#endif /*Debug*/ + } + if (value > 0) return mySource; + if ((value == VALID_NON_TERMINAL_2022)&&(!flush) ) return sourceLimit; + } + } + while (mySource++ < sourceLimit); + + return sourceLimit; +} + + + +void T_UConverter_toUnicode_ISO_2022(UConverter* _this, + UChar** target, + const UChar* targetLimit, + const char** source, + const char* sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode* err) +{ + int32_t base = 0; + const char* mySourceLimit; + char const* sourceStart; + + /*Arguments Check*/ + if (U_FAILURE(*err)) return; + if ((_this == NULL) || (targetLimit < *target) || (sourceLimit < *source)) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + for (;;) + { + + mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, flush); + + + /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ + if (_this->mode == UCNV_SO) /*Already doing some conversion*/ + { + const UChar* myTargetStart = *target; +#ifdef Debug + printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); +#endif /*Debug*/ + + ucnv_toUnicode(((UConverterDataISO2022*)(_this->extraInfo))->currentConverter, + target, + targetLimit, + source, + mySourceLimit, + NULL, + flush, + err); + + +#ifdef Debug + puts("---------------------------> CONVERTED"); + printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); + printf("err =%d", *err); +#endif /*Debug*/ + } + /*-Done with buffer with entire buffer + -Error while converting + */ + + if (U_FAILURE(*err) || (*source == sourceLimit)) return; +#ifdef Debug + puts("Got Here!"); + fflush(stdout); +#endif /*Debug*/ + sourceStart = *source; + changeState_2022(_this, + source, + sourceLimit, + flush, + err); + (*source)++; + + } + + return; +} + +void T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverter* _this, + UChar** target, + const UChar* targetLimit, + const char** source, + const char* sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode* err) +{ + int32_t myOffset=0; + int32_t base = 0; + const char* mySourceLimit; + char const* sourceStart; + + /*Arguments Check*/ + if (U_FAILURE(*err)) return; + if ((_this == NULL) || (targetLimit < *target) || (sourceLimit < *source)) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + for (;;) + { + + mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, flush); + /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ + + if (_this->mode == UCNV_SO) /*Already doing some conversion*/ + { + const UChar* myTargetStart = *target; +#ifdef Debug + printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); +#endif /*Debug*/ + + ucnv_toUnicode(((UConverterDataISO2022*)(_this->extraInfo))->currentConverter, + target, + targetLimit, + source, + mySourceLimit, + offsets, + flush, + err); + + { + int32_t lim = *target - myTargetStart; + int32_t i = 0; + for (i=base; i < lim;i++) offsets[i] += myOffset; + base += lim; + } + +#ifdef Debug + puts("---------------------------> CONVERTED"); + printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); + printf("err =%d", *err); +#endif /*Debug*/ + } + + /*-Done with buffer with entire buffer + -Error while converting + */ + + if (U_FAILURE(*err) || (*source == sourceLimit)) return; +#ifdef Debug + puts("Got Here!"); + fflush(stdout); +#endif /*Debug*/ + sourceStart = *source; + changeState_2022(_this, + source, + sourceLimit, + flush, + err); + (*source)++; + myOffset += *source - sourceStart; + + } + + return; +} + +UChar T_UConverter_getNextUChar_ISO_2022(UConverter* converter, + const char** source, + const char* sourceLimit, + UErrorCode* err) +{ + const char* mySourceLimit; + /*Arguments Check*/ + if (sourceLimit < *source) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return 0xFFFD; + } + + for (;;) + { + mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, TRUE); + /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ + if (converter->mode == UCNV_SO) /*Already doing some conversion*/ + { + + return ucnv_getNextUChar(((UConverterDataISO2022*)(converter->extraInfo))->currentConverter, + source, + mySourceLimit, + err); + + + } + /*-Done with buffer with entire buffer + -Error while converting + */ + + + changeState_2022(converter, + source, + sourceLimit, + TRUE, + err); + (*source)++; + } + + return 0xFFFD; +} + +static UConverterImpl _ISO2022Impl={ + UCNV_ISO_2022, + + T_UConverter_toUnicode_ISO_2022, + T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC, + T_UConverter_fromUnicode_ISO_2022, + T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC, + T_UConverter_getNextUChar_ISO_2022 +}; + +extern UConverterSharedData _ISO2022Data={ + sizeof(UConverterSharedData), ~0, + NULL, NULL, &_ISO2022Impl, "ISO_2022", + 2022, UCNV_IBM, UCNV_ISO_2022, 1, 4, + { 0, 1, 0x1a, 0, 0, 0 } +}; + +/* EBCDICStateful ----------------------------------------------------------- */ + +void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverter * _this, + UChar ** target, + const UChar * targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const char *mySource = *source; + UChar *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - mySource; + CompactShortArray *myToUnicode = NULL; + UChar targetUniChar = 0x0000; + UChar mySourceChar = 0x0000; + int32_t myMode = _this->mode; + + + myToUnicode = _this->sharedData->table->dbcs.toUnicode; + + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + /*gets the corresponding UniChar */ + mySourceChar = (unsigned char) (mySource[mySourceIndex++]); + if (mySourceChar == UCNV_SI) myMode = UCNV_SI; + else if (mySourceChar == UCNV_SO) myMode = UCNV_SO; + else if ((myMode == UCNV_SO) && + (_this->toUnicodeStatus == 0x00)) + { + _this->toUnicodeStatus = (unsigned char) mySourceChar; + } + else + { + /*In case there is a state, we update the source char + *by concatenating the previous char with the current + *one + */ + if (_this->toUnicodeStatus != 0x00) + { + mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); + _this->toUnicodeStatus = 0x00; + } + else mySourceChar &= 0x00FF; + + /*gets the corresponding Unicode codepoint */ + targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); + + /*writing the UniChar to the output stream */ + if (targetUniChar != missingUCharMarker) + { + /*writes the UniChar to the output stream */ + myTarget[myTargetIndex++] = targetUniChar; + } + else + { + *err = U_INVALID_CHAR_FOUND; + if (mySourceChar > 0xff) + { + _this->invalidCharLength = 2; + _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); + _this->invalidCharBuffer[1] = (char) mySourceChar; + } + else + { + _this->invalidCharLength = 1; + _this->invalidCharBuffer[0] = (char) mySourceChar; + } + _this->mode = myMode; + ToU_CALLBACK_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidCharLength = 0; + } + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + /*If at the end of conversion we are still carrying state information + *flush is TRUE, we can deduce that the input stream is truncated + */ + if (_this->toUnicodeStatus + && (mySourceIndex == sourceLength) + && (flush == TRUE)) + { + if (U_SUCCESS(*err)) + { + *err = U_TRUNCATED_CHAR_FOUND; + _this->toUnicodeStatus = 0x00; + } + } + + *target += myTargetIndex; + *source += mySourceIndex; + _this->mode = myMode; + + return; +} + +void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverter * _this, + UChar ** target, + const UChar * targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const char *mySource = *source; + UChar *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - mySource; + CompactShortArray *myToUnicode = NULL; + UChar targetUniChar = 0x0000; + UChar mySourceChar = 0x0000; + int32_t myMode = _this->mode; + int32_t* originalOffsets = offsets; + + + myToUnicode = _this->sharedData->table->dbcs.toUnicode; + + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + /*gets the corresponding UniChar */ + mySourceChar = (unsigned char) (mySource[mySourceIndex++]); + if (mySourceChar == UCNV_SI) myMode = UCNV_SI; + else if (mySourceChar == UCNV_SO) myMode = UCNV_SO; + else if ((myMode == UCNV_SO) && + (_this->toUnicodeStatus == 0x00)) + { + _this->toUnicodeStatus = (unsigned char) mySourceChar; + } + else + { + /*In case there is a state, we update the source char + *by concatenating the previous char with the current + *one + */ + if (_this->toUnicodeStatus != 0x00) + { + mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); + _this->toUnicodeStatus = 0x00; + } + else mySourceChar &= 0x00FF; + + /*gets the corresponding Unicode codepoint */ + targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); + + /*writing the UniChar to the output stream */ + if (targetUniChar != missingUCharMarker) + { + /*writes the UniChar to the output stream */ + { + if(myMode == UCNV_SO) + offsets[myTargetIndex] = mySourceIndex-2; /* double byte */ + else + offsets[myTargetIndex] = mySourceIndex-1; /* single byte */ + } + myTarget[myTargetIndex++] = targetUniChar; + } + else + { + int32_t currentOffset = offsets[myTargetIndex-1] + 2;/* Because mySourceIndex was already incremented */ + + *err = U_INVALID_CHAR_FOUND; + if (mySourceChar > 0xFF) + { + _this->invalidCharLength = 2; + _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); + _this->invalidCharBuffer[1] = (char) mySourceChar; + } + else + { + _this->invalidCharLength = 1; + _this->invalidCharBuffer[0] = (char) mySourceChar; + } + _this->mode = myMode; + ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + + if (U_FAILURE (*err)) break; + _this->invalidCharLength = 0; + } + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + /*If at the end of conversion we are still carrying state information + *flush is TRUE, we can deduce that the input stream is truncated + */ + if (_this->toUnicodeStatus + && (mySourceIndex == sourceLength) + && (flush == TRUE)) + { + if (U_SUCCESS(*err)) + { + *err = U_TRUNCATED_CHAR_FOUND; + _this->toUnicodeStatus = 0x00; + } + } + + *target += myTargetIndex; + *source += mySourceIndex; + _this->mode = myMode; + + return; +} + +void T_UConverter_fromUnicode_EBCDIC_STATEFUL (UConverter * _this, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) + +{ + const UChar *mySource = *source; + char *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - mySource; + CompactShortArray *myFromUnicode = NULL; + UChar targetUniChar = 0x0000; + int8_t targetUniCharByteNum = 0; + UChar mySourceChar = 0x0000; + bool_t isTargetUCharDBCS = (bool_t)_this->fromUnicodeStatus; + bool_t oldIsTargetUCharDBCS = isTargetUCharDBCS; + myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; + + /*writing the char to the output stream */ + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + mySourceChar = (UChar) mySource[mySourceIndex++]; + targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); + oldIsTargetUCharDBCS = isTargetUCharDBCS; + isTargetUCharDBCS = (targetUniChar>0x00FF); + + if (targetUniChar != missingCharMarker) + { + if (oldIsTargetUCharDBCS != isTargetUCharDBCS) + { + if (isTargetUCharDBCS) myTarget[myTargetIndex++] = UCNV_SO; + else myTarget[myTargetIndex++] = UCNV_SI; + + + if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength)) + { + _this->charErrorBuffer[0] = (char) targetUniChar; + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + else if (myTargetIndex+1 >= targetLength) + { + _this->charErrorBuffer[0] = (char) (targetUniChar >> 8); + _this->charErrorBuffer[1] = (char) targetUniChar & 0x00FF; + _this->charErrorBufferLength = 2; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + + } + + if (!isTargetUCharDBCS) + { + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); + if (myTargetIndex < targetLength) + { + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + _this->charErrorBuffer[0] = (char) targetUniChar; + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + } + else + { + isTargetUCharDBCS = oldIsTargetUCharDBCS; + *err = U_INVALID_CHAR_FOUND; + _this->invalidUCharBuffer[0] = (UChar) mySourceChar; + _this->invalidUCharLength = 1; + + _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; + FromU_CALLBACK_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidUCharLength = 0; + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + + } + + + *target += myTargetIndex; + *source += mySourceIndex; + + _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; + + return; +} + +void T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverter * _this, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) + +{ + const UChar *mySource = *source; + char *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - mySource; + CompactShortArray *myFromUnicode = NULL; + UChar targetUniChar = 0x0000; + int8_t targetUniCharByteNum = 0; + UChar mySourceChar = 0x0000; + bool_t isTargetUCharDBCS = (bool_t)_this->fromUnicodeStatus; + bool_t oldIsTargetUCharDBCS = isTargetUCharDBCS; + int32_t* originalOffsets = offsets; + + myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; + + /*writing the char to the output stream */ + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + mySourceChar = (UChar) mySource[mySourceIndex++]; + targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); + oldIsTargetUCharDBCS = isTargetUCharDBCS; + isTargetUCharDBCS = (targetUniChar>0x00FF); + + if (targetUniChar != missingCharMarker) + { + if (oldIsTargetUCharDBCS != isTargetUCharDBCS) + { + offsets[myTargetIndex] = mySourceIndex-1; + if (isTargetUCharDBCS) myTarget[myTargetIndex++] = UCNV_SO; + else myTarget[myTargetIndex++] = UCNV_SI; + + + if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength)) + { + _this->charErrorBuffer[0] = (char) targetUniChar; + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + else if (myTargetIndex+1 >= targetLength) + { + _this->charErrorBuffer[0] = (char) (targetUniChar >> 8); + _this->charErrorBuffer[1] = (char) targetUniChar & 0x00FF; + _this->charErrorBufferLength = 2; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + if (!isTargetUCharDBCS) + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); + if (myTargetIndex < targetLength) + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + _this->charErrorBuffer[0] = (char) targetUniChar; + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + } + else + { + int32_t currentOffset = offsets[myTargetIndex-1]+1; + *err = U_INVALID_CHAR_FOUND; + _this->invalidUCharBuffer[0] = (UChar) mySourceChar; + _this->invalidUCharLength = 1; + + /* Breaks out of the loop since behaviour was set to stop */ + _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; + FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidUCharLength = 0; + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + + } + + + *target += myTargetIndex; + *source += mySourceIndex;; + + _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; + + return; +} + +UChar T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverter* converter, + const char** source, + const char* sourceLimit, + UErrorCode* err) +{ + UChar myUChar; + char const *sourceInitial = *source; + /*safe keeps a ptr to the beginning in case we need to step back*/ + + /*Input boundary check*/ + if ((*source)+1 > sourceLimit) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xFFFD; + } + + /*Checks to see if with have SI/SO shifters + if we do we change the mode appropriately and we consume the byte*/ + if ((**source == UCNV_SI) || (**source == UCNV_SO)) + { + converter->mode = **source; + (*source)++; + + /*Rechecks boundary after consuming the shift sequence*/ + if ((*source)+1 > sourceLimit) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xFFFD; + } + } + + if (converter->mode == UCNV_SI) + { + /*Not lead byte: we update the source ptr and get the codepoint*/ + myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, + (UChar)(**source)); + (*source)++; + } + else + { + /*Lead byte: we Build the codepoint and get the corresponding character + * and update the source ptr*/ + if ((*source + 2) > sourceLimit) + { + *err = U_TRUNCATED_CHAR_FOUND; + return 0xFFFD; + } + + myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, + ((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))); + + (*source) += 2; + } + + if (myUChar != 0xFFFD) return myUChar; + else + { + /*rewinds source*/ + const char* sourceFinal = *source; + UChar* myUCharPtr = &myUChar; + + *err = U_INVALID_CHAR_FOUND; + *source = sourceInitial; + + /*It's is very likely that the ErrorFunctor will write to the + *internal buffers */ + converter->fromCharErrorBehaviour(converter, + &myUCharPtr, + myUCharPtr + 1, + &sourceFinal, + sourceLimit, + NULL, + TRUE, + err); + + /*makes the internal caching transparent to the user*/ + if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; + + return myUChar; + } +} + +static UConverterImpl _EBCDICStatefulImpl={ + UCNV_EBCDIC_STATEFUL, + + T_UConverter_toUnicode_EBCDIC_STATEFUL, + T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC, + T_UConverter_fromUnicode_EBCDIC_STATEFUL, + T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC, + T_UConverter_getNextUChar_EBCDIC_STATEFUL +}; + +extern UConverterSharedData _EBCDICStatefulData={ + sizeof(UConverterSharedData), 1, + NULL, NULL, &_EBCDICStatefulImpl, "EBCDICStateful", + 0, UCNV_IBM, UCNV_EBCDIC_STATEFUL, 1, 1, + { 0, 1, 0, 0, 0, 0 } +}; diff --git a/icu4c/source/common/ucnv_bld.c b/icu4c/source/common/ucnv_bld.c index fa801b89cbd..e1b4a391755 100644 --- a/icu4c/source/common/ucnv_bld.c +++ b/icu4c/source/common/ucnv_bld.c @@ -20,6 +20,7 @@ #include "ucmp8.h" #include "unicode/ucnv_bld.h" #include "unicode/ucnv_err.h" +#include "ucnv_cnv.h" #include "ucnv_imp.h" #include "unicode/udata.h" #include "unicode/ucnv.h" @@ -30,21 +31,29 @@ #include -/*Array used to generate ALGORITHMIC_CONVERTERS_HASHTABLE - *should ALWAYS BE EMPTY STRING TERMINATED. - */ -static const char *algorithmicConverterNames[] = { - "LATIN_1", - "UTF8", - "UTF16_BigEndian", - "UTF16_LittleEndian", - "UTF16_PlatformEndian", - "UTF16_OppositeEndian", - "ISO_2022", - "JIS", - "EUC", - "GB", - "" +static const UConverterSharedData * +converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ + &_SBCSData, &_DBCSData, &_MBCSData, &_Latin1Data, + &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_EBCDICStatefulData, + &_ISO2022Data +}; + +static struct { + const char *name; + UConverterType type; +} cnvNameType[] = { + { "LATIN_1", UCNV_LATIN_1 }, + { "UTF8", UCNV_UTF8 }, + { "UTF16_BigEndian", UCNV_UTF16_BigEndian }, + { "UTF16_LittleEndian", UCNV_UTF16_LittleEndian }, +#if U_IS_BIG_ENDIAN + { "UTF16_PlatformEndian", UCNV_UTF16_BigEndian }, + { "UTF16_OppositeEndian", UCNV_UTF16_LittleEndian }, +#else + { "UTF16_PlatformEndian", UCNV_UTF16_LittleEndian }, + { "UTF16_OppositeEndian", UCNV_UTF16_BigEndian}, +#endif + { "ISO_2022", UCNV_ISO_2022 } }; /*Takes an alias name gets an actual converter file name @@ -52,7 +61,6 @@ static const char *algorithmicConverterNames[] = { *allocates the memory and returns a new UConverter object */ static UConverter *createConverterFromFile (const char *converterName, UErrorCode * err); -static UConverter *createConverterFromAlgorithmicType (const char *realName, UErrorCode * err); /*Given a file returns a newly allocated CompactByteArray based on the a serialized one */ static CompactByteArray *createCompactByteArrayFromFile (FileStream * infile, UErrorCode * err); @@ -70,7 +78,7 @@ static CompactShortArray *createCompactShortArrayFromFile (FileStream * infile, static UConverterPlatform getPlatformFromName (char *name); static int32_t getCodepageNumberFromName (char *name); -static UConverterType getAlgorithmicTypeFromName (const char *realName); +static const UConverterSharedData *getAlgorithmicTypeFromName (const char *realName); /*these functions initialize the lightweight mutable part of the @@ -84,10 +92,38 @@ static void initializeAlgorithmicConverter (UConverter * myConverter); static int32_t uhash_hashSharedData (void *sharedData); +/*Defines the struct of a UConverterSharedData the immutable, shared part of + *UConverter - + * This is the definition from ICU 1.4, necessary to read converter data + * version 1 because the structure is directly embedded in the data. + * See udata.html for why this is bad (pointers, enums, padding...). + */ +typedef struct + { + uint32_t structSize; /* Size of this structure */ + void *dataMemory; + uint32_t referenceCounter; /*used to count number of clients */ + char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */ + UConverterPlatform platform; /*platform of the converter (only IBM now) */ + int32_t codepage; /*codepage # (now IBM-$codepage) */ + UConverterType conversionType; /*conversion type */ + int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */ + int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */ + struct + { /*initial values of some members of the mutable part of object */ + uint32_t toUnicodeStatus; + int8_t subCharLen; + unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; + } + defaultConverterValues; + UConverterTable *table; /*Pointer to conversion data */ + } +UConverterSharedData_1_4; + /** * Un flatten shared data from a UDATA.. */ -U_CAPI UConverterSharedData* U_EXPORT2 ucnv_data_unFlattenClone(const UConverterSharedData *data, UErrorCode *status); +U_CAPI UConverterSharedData* U_EXPORT2 ucnv_data_unFlattenClone(const UConverterSharedData_1_4 *data, UErrorCode *status); /*initializes some global variables */ @@ -328,7 +364,7 @@ UConverter* createConverterFromFile (const char *fileName, UErrorCode * err) } /* clone it. OK to drop the original sharedData */ - myConverter->sharedData = ucnv_data_unFlattenClone(myConverter->sharedData, err); + myConverter->sharedData = ucnv_data_unFlattenClone((UConverterSharedData_1_4 *)myConverter->sharedData, err); myConverter->sharedData->dataMemory = (void*)data; /* for future use */ @@ -372,39 +408,16 @@ void /*returns a converter type from a string */ -UConverterType +const UConverterSharedData * getAlgorithmicTypeFromName (const char *realName) { - if (uprv_strcmp (realName, "UTF8") == 0) - return UCNV_UTF8; - else if (uprv_strcmp (realName, "UTF16_BigEndian") == 0) - return UCNV_UTF16_BigEndian; - else if (uprv_strcmp (realName, "UTF16_LittleEndian") == 0) - return UCNV_UTF16_LittleEndian; - else if (uprv_strcmp (realName, "LATIN_1") == 0) - return UCNV_LATIN_1; - else if (uprv_strcmp (realName, "JIS") == 0) - return UCNV_JIS; - else if (uprv_strcmp (realName, "EUC") == 0) - return UCNV_EUC; - else if (uprv_strcmp (realName, "GB") == 0) - return UCNV_GB; - else if (uprv_strcmp (realName, "ISO_2022") == 0) - return UCNV_ISO_2022; - else if (uprv_strcmp (realName, "UTF16_PlatformEndian") == 0) -# if U_IS_BIG_ENDIAN - return UCNV_UTF16_BigEndian; -# else - return UCNV_UTF16_LittleEndian; -# endif - else if (uprv_strcmp (realName, "UTF16_OppositeEndian") == 0) -# if U_IS_BIG_ENDIAN - return UCNV_UTF16_LittleEndian; -# else - return UCNV_UTF16_BigEndian; -# endif - else - return UCNV_UNSUPPORTED_CONVERTER; + int i; + for(i=0; itable->mbcs.fromUnicode); ucmp16_close (deadSharedData->table->mbcs.toUnicode); - uprv_free (deadSharedData->table); + uprv_free (deadSharedData->table); }; break; @@ -519,7 +534,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData) { ucmp16_close (deadSharedData->table->dbcs.fromUnicode); ucmp16_close (deadSharedData->table->dbcs.toUnicode); - uprv_free (deadSharedData->table); + uprv_free (deadSharedData->table); }; break; @@ -537,55 +552,6 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData) return TRUE; } -bool_t isDataBasedConverter (const char *name) -{ - int32_t i = 0; - bool_t result = FALSE; - UErrorCode err = U_ZERO_ERROR; - - /*Lazy evaluates the hashtable */ - if (ALGORITHMIC_CONVERTERS_HASHTABLE == NULL) - { - UHashtable* myHT; - - { - myHT = uhash_open ((UHashFunction)uhash_hashIString, &err); - - if (U_FAILURE (err)) return FALSE; - while (algorithmicConverterNames[i][0] != '\0') - { - /*Stores in the hashtable a pointer to the statically init'ed array containing - *the names - */ - - uhash_put (myHT, - (void *) algorithmicConverterNames[i], - &err); - i++; /*Some Compilers (Solaris WSpro and MSVC-Release Mode - *don't differentiate between i++ and ++i - *so we have to increment in a line by itself - */ - } - } - - umtx_lock (NULL); - if (ALGORITHMIC_CONVERTERS_HASHTABLE == NULL) ALGORITHMIC_CONVERTERS_HASHTABLE = myHT; - else uhash_close(myHT); - umtx_unlock (NULL); - - - } - - - if (uhash_get (ALGORITHMIC_CONVERTERS_HASHTABLE, - uhash_hashIString (name)) == NULL) - { - result = TRUE; - } - - - return result; -} /*Logic determines if the converter is Algorithmic AND/OR cached *depending on that: * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) @@ -636,7 +602,8 @@ UConverter * } } - if (isDataBasedConverter (realName)) + mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName (realName); + if (mySharedConverterData == NULL) { mySharedConverterData = getSharedConverterData (realName); @@ -652,8 +619,6 @@ UConverter * else { /*shared it with other library clients */ - - shareConverterData (myUConverter->sharedData); return myUConverter; } @@ -668,6 +633,7 @@ UConverter * return NULL; } + /* ### this is unsafe: the shared data could have been deleted since sharing or getting it - these operations should increase the counter! */ /*update the reference counter: one more client */ umtx_lock (NULL); mySharedConverterData->referenceCounter++; @@ -681,45 +647,24 @@ UConverter * } else { - /*with have an algorithmic converter */ - mySharedConverterData = getSharedConverterData (realName); - - /*Non cached */ - if (mySharedConverterData == NULL) + /* ### we have an algorithmic converter, it does not need to be cached?! */ + if (getSharedConverterData (realName) == NULL) { - myUConverter = createConverterFromAlgorithmicType (realName, err); - if (U_FAILURE (*err) || (myUConverter == NULL)) - { - uprv_free (myUConverter); - return NULL; - } - else - { - /* put the shared object in shared table */ - shareConverterData (myUConverter->sharedData); - return myUConverter; - } - } - else - { - myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); - if (myUConverter == NULL) - { - *err = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - /*Increase the reference counter */ - umtx_lock (NULL); - mySharedConverterData->referenceCounter++; - umtx_unlock (NULL); - - /*initializes the converter */ - myUConverter->sharedData = mySharedConverterData; - initializeAlgorithmicConverter (myUConverter); - return myUConverter; + /* put the shared object in shared table */ + shareConverterData (mySharedConverterData); } + myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); + if (myUConverter == NULL) + { + *err = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + /*initializes the converter */ + uprv_memset(myUConverter, 0, sizeof(UConverter)); + myUConverter->sharedData = mySharedConverterData; + initializeAlgorithmicConverter (myUConverter); return myUConverter; } @@ -751,21 +696,11 @@ void initializeDataConverter (UConverter * myUConverter) } /* This function initializes algorithmic converters - * based on there type + * based on their type */ void initializeAlgorithmicConverter (UConverter * myConverter) { - char UTF8_subChar[] = {(char) 0xFF, (char) 0xFF, (char) 0xFF}; - char UTF16BE_subChar[] = {(char) 0xFF, (char) 0xFD}; - char UTF16LE_subChar[] = {(char) 0xFD, (char) 0xFF}; - char EUC_subChar[] = {(char) 0xAF, (char) 0xFE}; - char GB_subChar[] = {(char) 0xFF, (char) 0xFF}; - char JIS_subChar[] = {(char) 0xFF, (char) 0xFF}; - char LATIN1_subChar = 0x1A; - - - myConverter->mode = UCNV_SI; myConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE; myConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE; @@ -774,216 +709,79 @@ void myConverter->extraInfo = NULL; + myConverter->fromUnicodeStatus = 0; + myConverter->toUnicodeStatus = myConverter->sharedData->defaultConverterValues.toUnicodeStatus; + myConverter->subCharLen = myConverter->sharedData->defaultConverterValues.subCharLen; + uprv_memcpy (myConverter->subChar, myConverter->sharedData->defaultConverterValues.subChar, UCNV_MAX_SUBCHAR_LEN); + /* ### it would be cleaner to have the following in a function in UConverterImpl, with a UErrorCode */ switch (myConverter->sharedData->conversionType) { - case UCNV_UTF8: - { - myConverter->sharedData->minBytesPerChar = 1; - myConverter->sharedData->maxBytesPerChar = 4; - myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0; - myConverter->sharedData->defaultConverterValues.subCharLen = 3; - myConverter->subCharLen = 3; - myConverter->toUnicodeStatus = 0; - myConverter->fromUnicodeStatus = 0; /* srl */ - myConverter->sharedData->platform = UCNV_IBM; - myConverter->sharedData->codepage = 1208; - uprv_strcpy(myConverter->sharedData->name, "UTF8"); - uprv_memcpy (myConverter->subChar, UTF8_subChar, 3); - uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF8_subChar, 3); - - break; - } - case UCNV_LATIN_1: - { - myConverter->sharedData->minBytesPerChar = 1; - myConverter->sharedData->maxBytesPerChar = 1; - myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0; - myConverter->sharedData->defaultConverterValues.subCharLen = 1; - myConverter->subCharLen = 1; - myConverter->toUnicodeStatus = 0; - myConverter->sharedData->platform = UCNV_IBM; - myConverter->sharedData->codepage = 819; - uprv_strcpy(myConverter->sharedData->name, "LATIN_1"); - *(myConverter->subChar) = LATIN1_subChar; - *(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar; - break; - } - - case UCNV_UTF16_BigEndian: - { - myConverter->sharedData->minBytesPerChar = 2; - myConverter->sharedData->maxBytesPerChar = 2; - myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0; - myConverter->sharedData->defaultConverterValues.subCharLen = 2; - myConverter->subCharLen = 2; - myConverter->toUnicodeStatus = 0; - myConverter->fromUnicodeStatus = 0; - uprv_strcpy(myConverter->sharedData->name, "UTF_16BE"); - myConverter->sharedData->platform = UCNV_IBM; - myConverter->sharedData->codepage = 1200; - uprv_memcpy (myConverter->subChar, UTF16BE_subChar, 2); - uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16BE_subChar, 2); - - break; - } - - case UCNV_UTF16_LittleEndian: - { - myConverter->sharedData->minBytesPerChar = 2; - myConverter->sharedData->maxBytesPerChar = 2; - myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0; - myConverter->sharedData->defaultConverterValues.subCharLen = 2; - myConverter->subCharLen = 2; - myConverter->toUnicodeStatus = 0; - myConverter->fromUnicodeStatus = 0; - myConverter->sharedData->platform = UCNV_IBM; - myConverter->sharedData->codepage = 1200; - uprv_strcpy(myConverter->sharedData->name, "UTF_16LE"); - uprv_memcpy (myConverter->subChar, UTF16LE_subChar, 2); - uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16LE_subChar, 2); - break; - } - case UCNV_EUC: - { - myConverter->sharedData->minBytesPerChar = 1; - myConverter->sharedData->maxBytesPerChar = 2; - myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0; - myConverter->sharedData->defaultConverterValues.subCharLen = 2; - myConverter->subCharLen = 2; - myConverter->toUnicodeStatus = 0; - uprv_memcpy (myConverter->subChar, EUC_subChar, 2); - uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, EUC_subChar, 2); - break; - } case UCNV_ISO_2022: { myConverter->charErrorBuffer[0] = 0x1b; myConverter->charErrorBuffer[1] = 0x25; myConverter->charErrorBuffer[2] = 0x42; myConverter->charErrorBufferLength = 3; - myConverter->sharedData->minBytesPerChar = 1; - myConverter->sharedData->maxBytesPerChar = 3; - myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0; - myConverter->sharedData->defaultConverterValues.subCharLen = 1; - myConverter->subCharLen = 1; - myConverter->toUnicodeStatus = 0; - myConverter->fromUnicodeStatus = 0; /* srl */ - myConverter->sharedData->codepage = 2022; - uprv_strcpy(myConverter->sharedData->name, "ISO_2022"); - *(myConverter->subChar) = LATIN1_subChar; - *(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar; myConverter->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022)); + /* ### check for extraInfo==NULL !! does this need to be allocated at all? */ ((UConverterDataISO2022 *) myConverter->extraInfo)->currentConverter = NULL; ((UConverterDataISO2022 *) myConverter->extraInfo)->escSeq2022Length = 0; break; } - case UCNV_GB: - { - myConverter->sharedData->minBytesPerChar = 2; - myConverter->sharedData->maxBytesPerChar = 2; - myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0; - myConverter->sharedData->defaultConverterValues.subCharLen = 2; - myConverter->subCharLen = 2; - myConverter->toUnicodeStatus = 0; - uprv_memcpy (myConverter->subChar, GB_subChar, 2); - uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, GB_subChar, 2); - break; - } - case UCNV_JIS: - { - myConverter->sharedData->minBytesPerChar = 2; - myConverter->sharedData->maxBytesPerChar = 2; - myConverter->sharedData->defaultConverterValues.toUnicodeStatus = 0; - myConverter->sharedData->defaultConverterValues.subCharLen = 2; - myConverter->subCharLen = 2; - myConverter->toUnicodeStatus = 0; - uprv_memcpy (myConverter->subChar, JIS_subChar, 2); - uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, JIS_subChar, 2); - break; - } default: break; }; - - myConverter->toUnicodeStatus = myConverter->sharedData->defaultConverterValues.toUnicodeStatus; } - -/*This function creates an algorithmic converter - *Note That even algorithmic converters are shared - * (The UConverterSharedData->table == NULL since - * there are no tables) - *for uniformity of design and control flow - */ -UConverter * - createConverterFromAlgorithmicType (const char *actualName, UErrorCode * err) -{ - int32_t i = 0; - UConverter *myConverter = NULL; - UConverterSharedData *mySharedData = NULL; - UConverterType myType = getAlgorithmicTypeFromName (actualName); - - if (U_FAILURE (*err)) - return NULL; - - myConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); - if (myConverter == NULL) - { - *err = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - myConverter->sharedData = NULL; - mySharedData = (UConverterSharedData *) uprv_malloc (sizeof (UConverterSharedData)); - if (mySharedData == NULL) - { - *err = U_MEMORY_ALLOCATION_ERROR; - uprv_free (myConverter); - return NULL; - } - mySharedData->structSize = sizeof(UConverterSharedData); - mySharedData->table = NULL; - mySharedData->dataMemory = NULL; - uprv_strcpy (mySharedData->name, actualName); - /*Initializes the referenceCounter to 1 */ - mySharedData->referenceCounter = 1; - mySharedData->platform = UCNV_UNKNOWN; - mySharedData->codepage = 0; - mySharedData->conversionType = myType; - myConverter->sharedData = mySharedData; - - initializeAlgorithmicConverter (myConverter); - return myConverter; -} - - -UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *source, UErrorCode *status) +UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData_1_4 *source, UErrorCode *status) { const uint8_t *raw, *oldraw; UConverterSharedData *data = NULL; - + UConverterType type = source->conversionType; + if(U_FAILURE(*status)) return NULL; - if(source->structSize != sizeof(UConverterSharedData)) + if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || + converterData[type]->referenceCounter != 1 || + source->structSize != sizeof(UConverterSharedData_1_4)) { *status = U_INVALID_TABLE_FORMAT; return NULL; } - data = (UConverterSharedData*) malloc(sizeof(UConverterSharedData)); - raw = (uint8_t*)source; - uprv_memcpy(data,source,sizeof(UConverterSharedData)); - - raw += data->structSize; + data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); + if(data == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } - /* data->table = (UConverterTable*)raw; */ - + /* copy initial values from the static structure for this type */ + uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); + + /* ### it would be much more efficient if the table were a direct member, not a pointer */ + data->table = (UConverterTable *)uprv_malloc(sizeof(UConverterTable)); + if(data->table == NULL) { + uprv_free(data); + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + /* fill in fields from the loaded data */ + data->name = source->name; /* ### this could/should come from the caller - should be the same as the canonical name?!! */ + data->codepage = source->codepage; + data->platform = source->platform; + data->minBytesPerChar = source->minBytesPerChar; + data->maxBytesPerChar = source->maxBytesPerChar; + uprv_memcpy(&data->defaultConverterValues, &source->defaultConverterValues, sizeof(data->defaultConverterValues)); + + raw = (uint8_t*)source + source->structSize; + + /* the checks above made sure that the type is valid for a data-based converter */ switch (data->conversionType) { case UCNV_SBCS: - data->table = malloc(sizeof(UConverterSBCSTable)); data->table->sbcs.toUnicode = (UChar*)raw; raw += sizeof(UChar)*256; @@ -993,22 +791,20 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc case UCNV_EBCDIC_STATEFUL: case UCNV_DBCS: - data->table = uprv_malloc(sizeof(UConverterDBCSTable)); - oldraw = raw; data->table->dbcs.toUnicode=ucmp16_cloneFromData(&raw, status); - while((raw-oldraw)%4) /* pad to 4 */ - raw++; + /* pad to 4 */ + if(((raw-oldraw)&3)!=0) { + raw+=4-((raw-oldraw)&3); + } data->table->dbcs.fromUnicode =ucmp16_cloneFromData(&raw, status); break; case UCNV_MBCS: - data->table = uprv_malloc(sizeof(UConverterMBCSTable)); - data->table->mbcs.starters = (bool_t*)raw; raw += sizeof(bool_t)*256; @@ -1016,24 +812,15 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc data->table->mbcs.toUnicode = ucmp16_cloneFromData(&raw, status); - while((raw-oldraw)%4) /* pad to 4 */ - raw++; + /* pad to 4 */ + if(((raw-oldraw)&3)!=0) { + raw+=4-((raw-oldraw)&3); + } data->table->mbcs.fromUnicode = ucmp16_cloneFromData(&raw, status); break; - - default: - *status = U_INVALID_TABLE_FORMAT; - return NULL; } return data; } - - - - - - - diff --git a/icu4c/source/common/ucnv_cnv.c b/icu4c/source/common/ucnv_cnv.c index 2271965e9c1..c35c876a05b 100644 --- a/icu4c/source/common/ucnv_cnv.c +++ b/icu4c/source/common/ucnv_cnv.c @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1998-1999, International Business Machines +* Copyright (C) 2000, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -13,2830 +13,12 @@ */ #include "unicode/utypes.h" -#include "uhash.h" -#include "ucmp16.h" -#include "ucmp8.h" #include "unicode/ucnv_bld.h" #include "unicode/ucnv_err.h" #include "ucnv_cnv.h" #include "unicode/ucnv.h" #include "cmemory.h" -#ifdef Debug -#include -#endif - - - - - -void flushInternalUnicodeBuffer (UConverter * _this, - UChar * myTarget, - int32_t * myTargetIndex, - int32_t targetLength, - int32_t** offsets, - UErrorCode * err); - -void flushInternalCharBuffer (UConverter * _this, - char *myTarget, - int32_t * myTargetIndex, - int32_t targetLength, - int32_t** offsets, - UErrorCode * err); - -#define FromU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ - if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\ - else \ - { \ - char *myTargetCopy = myTarget + myTargetIndex; \ - const UChar *mySourceCopy = mySource + mySourceIndex; \ - /*copies current values for the ErrorFunctor to update */ \ - /*Calls the ErrorFunctor */ \ - _this->fromUCharErrorBehaviour (_this, \ - (char **) &myTargetCopy, \ - targetLimit, \ - (const UChar **) &mySourceCopy, \ - sourceLimit, \ - offsets, \ - flush, \ - err); \ - /*Update the local Indexes so that the conversion can restart at the right points */ \ - mySourceIndex = (mySourceCopy - mySource) ; \ - myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \ - } - -#define ToU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ - if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \ - else \ - { \ - UChar *myTargetCopy = myTarget + myTargetIndex; \ - const char *mySourceCopy = mySource + mySourceIndex; \ - /*Calls the ErrorFunctor */ \ - _this->fromCharErrorBehaviour (_this, \ - &myTargetCopy, \ - targetLimit, \ - (const char **) &mySourceCopy, \ - sourceLimit, \ - offsets, \ - flush, \ - err); \ - /*Update the local Indexes so that the conversion can restart at the right points */ \ - mySourceIndex = ((char*)mySourceCopy - (char*)mySource); \ - myTargetIndex = (myTargetCopy - myTarget); \ - } - -#define FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ - if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\ - else \ - { \ - char *myTargetCopy = myTarget + myTargetIndex; \ - const UChar *mySourceCopy = mySource + mySourceIndex; \ - int32_t My_i = myTargetIndex; \ - /*copies current values for the ErrorFunctor to update */ \ - /*Calls the ErrorFunctor */ \ - _this->fromUCharErrorBehaviour (_this, \ - (char **) &myTargetCopy, \ - targetLimit, \ - (const UChar **) &mySourceCopy, \ - sourceLimit, \ - offsets + myTargetIndex, \ - flush, \ - err); \ - /*Update the local Indexes so that the conversion can restart at the right points */ \ - mySourceIndex = mySourceCopy - mySource ; \ - myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \ - for (;My_i < myTargetIndex;My_i++) offsets[My_i] += currentOffset ; \ - } - - - -#define ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ - if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \ - else \ - { \ - UChar *myTargetCopy = myTarget + myTargetIndex; \ - const char *mySourceCopy = mySource + mySourceIndex; \ - int32_t My_i = myTargetIndex; \ - _this->fromCharErrorBehaviour (_this, \ - &myTargetCopy, \ - targetLimit, \ - (const char **) &mySourceCopy, \ - sourceLimit, \ - offsets + myTargetIndex, \ - flush, \ - err); \ - /*Update the local Indexes so that the conversion can restart at the right points */ \ - mySourceIndex = (char *)mySourceCopy - (char*)mySource; \ - myTargetIndex = ((UChar*)myTargetCopy - (UChar*)myTarget); \ - for (;My_i < myTargetIndex;My_i++) {offsets[My_i] += currentOffset ; } \ - } - - - -/* UTF-8 Conversion DATA - * for more information see Unicode Strandard 2.0 , Transformation Formats Appendix A-9 - */ -const uint32_t kReplacementCharacter = 0x0000FFFD; -const uint32_t kMaximumUCS2 = 0x0000FFFF; -const uint32_t kMaximumUTF16 = 0x0010FFFF; -const uint32_t kMaximumUCS4 = 0x7FFFFFFF; -const int8_t halfShift = 10; -const uint32_t halfBase = 0x0010000; -const uint32_t halfMask = 0x3FF; -const uint32_t kSurrogateHighStart = 0xD800; -const uint32_t kSurrogateHighEnd = 0xDBFF; -const uint32_t kSurrogateLowStart = 0xDC00; -const uint32_t kSurrogateLowEnd = 0xDFFF; - -const uint32_t offsetsFromUTF8[7] = {0, - (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080, - (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080 -}; - -#define ESC_2022 0x1B /*ESC*/ -typedef enum -{ - INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/ - VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/ - VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/ - VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/ -} UCNV_TableStates_2022; - -/*Below are the 3 arrays depicting a state transition table*/ -int8_t normalize_esq_chars_2022[256] = { - 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,0 ,0 - ,2 ,0 ,0 ,0 ,0 ,3 ,0 ,6 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12 - ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,0 ,0 - ,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0}; -#define MAX_STATES_2022 54 -int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = { - 1 ,34 ,36 ,39 ,1093 ,1096 ,1097 ,1098 ,1099 ,1100 - ,1101 ,1102 ,1103 ,1104 ,1105 ,1106 ,1109 ,1154 ,1157 ,1160 - ,1161 ,1254 ,1257 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940 - ,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,40133 ,40136 ,40138 - ,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630 - ,35947631 ,35947635 ,35947636 ,35947638}; - -const char* escSeqStateTable_Result_2022[MAX_STATES_2022] = { - NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1" ,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" - ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-895" ,"ibm-943" ,"latin1" ,"latin1" ,NULL ,"ibm-955" ,"ibm-367" - ,"ibm-952" ,NULL ,"UTF8" ,NULL ,"ibm-955" ,"bm-367" ,"ibm-952" ,"ibm-949" ,"ibm-953" ,"ibm-1383" - ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" - ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089" - ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"}; - -UCNV_TableStates_2022 escSeqStateTable_Value_2022[MAX_STATES_2022] = { - VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022}; - -/*for 2022 looks ahead in the stream - *to determine the longest possible convertible - *data stream*/ -static const char* getEndOfBuffer_2022(const char* source, - const char* sourceLimit, - bool_t flush); -/*runs through a state machine to determine the escape sequence - codepage correspondance - *changes the pointer pointed to be _this->extraInfo*/ -static void changeState_2022(UConverter* _this, - const char** source, - const char* sourceLimit, - bool_t flush, - UErrorCode* err); - -UCNV_TableStates_2022 getKey_2022(char source, - int32_t* key, - int32_t* offset); - -/* END OF UTF-8 Conversion DATA */ - -const int8_t bytesFromUTF8[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 -}; - -const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC}; -#define missingCharMarker 0xFFFF -#define missingUCharMarker 0xFFFD - - - -void T_UConverter_toUnicode_SBCS (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - char *mySource = (char *) *source; - UChar *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - (char *) mySource; - UChar *myToUnicode = NULL; - UChar targetUniChar = 0x0000; - - myToUnicode = _this->sharedData->table->sbcs.toUnicode; - - while (mySourceIndex < sourceLength) - { - - /*writing the UniChar to the output stream */ - if (myTargetIndex < targetLength) - { - /*gets the corresponding UniChar */ - targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]]; - - if (targetUniChar != missingUCharMarker) - { - /* writes the UniChar to the output stream */ - myTarget[myTargetIndex++] = targetUniChar; - } - else - { - *err = U_INVALID_CHAR_FOUND; - _this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1]; - _this->invalidCharLength = 1; - - ToU_CALLBACK_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidCharLength = 0; - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - *target += myTargetIndex; - *source += mySourceIndex; - - return; -} - - -void T_UConverter_toUnicode_DBCS (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const char *mySource = ( char *) *source; - UChar *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - (char *) mySource; - CompactShortArray *myToUnicode = NULL; - UChar targetUniChar = 0x0000; - UChar mySourceChar = 0x0000; - - myToUnicode = _this->sharedData->table->dbcs.toUnicode; - - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - /*gets the corresponding UniChar */ - mySourceChar = (unsigned char) mySource[mySourceIndex++]; - - /*We have no internal state, we should */ - if (_this->toUnicodeStatus == 0x00) - { - _this->toUnicodeStatus = (unsigned char) mySourceChar; - } - else - { - if (_this->toUnicodeStatus != 0x00) - { - mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | (mySourceChar & 0x00FF)); - _this->toUnicodeStatus = 0x00; - } - - targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); - - /*writing the UniChar to the output stream */ - if (targetUniChar != missingUCharMarker) - { - /*writes the UniChar to the output stream */ - myTarget[myTargetIndex++] = targetUniChar; - } - else - { - *err = U_INVALID_CHAR_FOUND; - _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); - _this->invalidCharBuffer[1] = (char) mySourceChar; - _this->invalidCharLength = 2; - - ToU_CALLBACK_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidCharLength = 0; - } - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - /*If at the end of conversion we are still carrying state information - *flush is TRUE, we can deduce that the input stream is truncated - */ - if ((flush == TRUE) - && (mySourceIndex == sourceLength) - && (_this->toUnicodeStatus != 0x00)) - { - - if (U_SUCCESS(*err)) - { - *err = U_TRUNCATED_CHAR_FOUND; - _this->toUnicodeStatus = 0x00; - } - } - - *target += myTargetIndex; - *source += mySourceIndex; - - return; -} - - - -void T_UConverter_toUnicode_LATIN_1 (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - unsigned char *mySource = (unsigned char *) *source; - UChar *myTarget = *target; - int32_t sourceLength = sourceLimit - (char *) mySource; - int32_t readLen = 0; - int32_t i = 0; - - /*Since there is no risk of encountering illegal Chars - *we need to pad our latin1 chars to create Unicode codepoints - *we need to go as far a min(targetLen, sourceLen) - *in case we don't have enough buffer space - *we set the error flag accordingly - */ - if ((targetLimit - *target) < sourceLength) - { - readLen = targetLimit - *target; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - else - { - readLen = sourceLimit - (char *) mySource; - } - - for (i = 0; i < readLen; i++) myTarget[i] = (UChar) mySource[i]; - - *target += i; - *source += i; - return; -} - -void T_UConverter_fromUnicode_LATIN_1 (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const UChar *mySource = *source; - unsigned char *myTarget = (unsigned char *) *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - (char *) myTarget; - int32_t sourceLength = sourceLimit - mySource; - - /*writing the char to the output stream */ - while (mySourceIndex < sourceLength) - { - - if (myTargetIndex < targetLength) - { - if (mySource[mySourceIndex] < 0x0100) - { - /*writes the char to the output stream */ - myTarget[myTargetIndex++] = (char) mySource[mySourceIndex++]; - } - else - { - *err = U_INVALID_CHAR_FOUND; - _this->invalidUCharBuffer[0] = (UChar) mySource[mySourceIndex++]; - _this->invalidUCharLength = 1; - -/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */ - FromU_CALLBACK_MACRO(_this, - (char *)myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidUCharLength = 0; - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - *target += myTargetIndex; - *source += mySourceIndex;; - - return; -} - - -void T_UConverter_fromUnicode_SBCS (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const UChar *mySource = *source; - unsigned char *myTarget = (unsigned char *) *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - (char *) myTarget; - int32_t sourceLength = sourceLimit - mySource; - CompactByteArray *myFromUnicode; - unsigned char targetChar = 0x00; - - myFromUnicode = _this->sharedData->table->sbcs.fromUnicode; - - /*writing the char to the output stream */ - while (mySourceIndex < sourceLength) - { - targetChar = ucmp8_getu (myFromUnicode, mySource[mySourceIndex]); - - if (myTargetIndex < targetLength) - { - mySourceIndex++; - if (targetChar != 0 || !mySource[mySourceIndex - 1]) - { - /*writes the char to the output stream */ - myTarget[myTargetIndex++] = targetChar; - } - else - { - - *err = U_INVALID_CHAR_FOUND; - _this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1]; - _this->invalidUCharLength = 1; - -/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */ - FromU_CALLBACK_MACRO(_this, - (char *)myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - if (U_FAILURE (*err)) - { - break; - } - _this->invalidUCharLength = 0; - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - - } - - *target += myTargetIndex; - *source += mySourceIndex; - - - return; -} - -void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const char *mySource = *source; - UChar *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myToUnicode = NULL; - UChar targetUniChar = 0x0000; - UChar mySourceChar = 0x0000; - int32_t myMode = _this->mode; - - - myToUnicode = _this->sharedData->table->dbcs.toUnicode; - - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - /*gets the corresponding UniChar */ - mySourceChar = (unsigned char) (mySource[mySourceIndex++]); - if (mySourceChar == UCNV_SI) myMode = UCNV_SI; - else if (mySourceChar == UCNV_SO) myMode = UCNV_SO; - else if ((myMode == UCNV_SO) && - (_this->toUnicodeStatus == 0x00)) - { - _this->toUnicodeStatus = (unsigned char) mySourceChar; - } - else - { - /*In case there is a state, we update the source char - *by concatenating the previous char with the current - *one - */ - if (_this->toUnicodeStatus != 0x00) - { - mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); - _this->toUnicodeStatus = 0x00; - } - else mySourceChar &= 0x00FF; - - /*gets the corresponding Unicode codepoint */ - targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); - - /*writing the UniChar to the output stream */ - if (targetUniChar != missingUCharMarker) - { - /*writes the UniChar to the output stream */ - myTarget[myTargetIndex++] = targetUniChar; - } - else - { - *err = U_INVALID_CHAR_FOUND; - if (mySourceChar > 0xff) - { - _this->invalidCharLength = 2; - _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); - _this->invalidCharBuffer[1] = (char) mySourceChar; - } - else - { - _this->invalidCharLength = 1; - _this->invalidCharBuffer[0] = (char) mySourceChar; - } - _this->mode = myMode; - ToU_CALLBACK_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidCharLength = 0; - } - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - /*If at the end of conversion we are still carrying state information - *flush is TRUE, we can deduce that the input stream is truncated - */ - if (_this->toUnicodeStatus - && (mySourceIndex == sourceLength) - && (flush == TRUE)) - { - if (U_SUCCESS(*err)) - { - *err = U_TRUNCATED_CHAR_FOUND; - _this->toUnicodeStatus = 0x00; - } - } - - *target += myTargetIndex; - *source += mySourceIndex; - _this->mode = myMode; - - return; -} - - -void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const char *mySource = *source; - UChar *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myToUnicode = NULL; - UChar targetUniChar = 0x0000; - UChar mySourceChar = 0x0000; - int32_t myMode = _this->mode; - int32_t* originalOffsets = offsets; - - - myToUnicode = _this->sharedData->table->dbcs.toUnicode; - - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - /*gets the corresponding UniChar */ - mySourceChar = (unsigned char) (mySource[mySourceIndex++]); - if (mySourceChar == UCNV_SI) myMode = UCNV_SI; - else if (mySourceChar == UCNV_SO) myMode = UCNV_SO; - else if ((myMode == UCNV_SO) && - (_this->toUnicodeStatus == 0x00)) - { - _this->toUnicodeStatus = (unsigned char) mySourceChar; - } - else - { - /*In case there is a state, we update the source char - *by concatenating the previous char with the current - *one - */ - if (_this->toUnicodeStatus != 0x00) - { - mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); - _this->toUnicodeStatus = 0x00; - } - else mySourceChar &= 0x00FF; - - /*gets the corresponding Unicode codepoint */ - targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); - - /*writing the UniChar to the output stream */ - if (targetUniChar != missingUCharMarker) - { - /*writes the UniChar to the output stream */ - { - if(myMode == UCNV_SO) - offsets[myTargetIndex] = mySourceIndex-2; /* double byte */ - else - offsets[myTargetIndex] = mySourceIndex-1; /* single byte */ - } - myTarget[myTargetIndex++] = targetUniChar; - } - else - { - int32_t currentOffset = offsets[myTargetIndex-1] + 2;/* Because mySourceIndex was already incremented */ - - *err = U_INVALID_CHAR_FOUND; - if (mySourceChar > 0xFF) - { - _this->invalidCharLength = 2; - _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); - _this->invalidCharBuffer[1] = (char) mySourceChar; - } - else - { - _this->invalidCharLength = 1; - _this->invalidCharBuffer[0] = (char) mySourceChar; - } - _this->mode = myMode; - ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - - if (U_FAILURE (*err)) break; - _this->invalidCharLength = 0; - } - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - /*If at the end of conversion we are still carrying state information - *flush is TRUE, we can deduce that the input stream is truncated - */ - if (_this->toUnicodeStatus - && (mySourceIndex == sourceLength) - && (flush == TRUE)) - { - if (U_SUCCESS(*err)) - { - *err = U_TRUNCATED_CHAR_FOUND; - _this->toUnicodeStatus = 0x00; - } - } - - *target += myTargetIndex; - *source += mySourceIndex; - _this->mode = myMode; - - return; -} - - -void T_UConverter_toUnicode_MBCS (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const char *mySource = *source; - UChar *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myToUnicode = NULL; - UChar targetUniChar = 0x0000; - UChar mySourceChar = 0x0000; - bool_t *myStarters = NULL; - - - - - myToUnicode = _this->sharedData->table->mbcs.toUnicode; - myStarters = _this->sharedData->table->mbcs.starters; - - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - /*gets the corresponding UniChar */ - mySourceChar = (unsigned char) (mySource[mySourceIndex++]); - - - if (myStarters[(uint8_t) mySourceChar] && - (_this->toUnicodeStatus == 0x00)) - { - _this->toUnicodeStatus = (unsigned char) mySourceChar; - } - else - { - /*In case there is a state, we update the source char - *by concatenating the previous char with the current - *one - */ - - if (_this->toUnicodeStatus != 0x00) - { - mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); - - _this->toUnicodeStatus = 0x00; - } - - /*gets the corresponding Unicode codepoint */ - targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); - - /*writing the UniChar to the output stream */ - if (targetUniChar != missingUCharMarker) - { - myTarget[myTargetIndex++] = targetUniChar; - - } - else - { - *err = U_INVALID_CHAR_FOUND; - if (mySourceChar > 0xff) - { - _this->invalidCharLength = 2; - _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); - _this->invalidCharBuffer[1] = (char) mySourceChar; - } - else - { - _this->invalidCharLength = 1; - _this->invalidCharBuffer[0] = (char) mySourceChar; - } - - ToU_CALLBACK_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidCharLength = 0; - } - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - /*If at the end of conversion we are still carrying state information - *flush is TRUE, we can deduce that the input stream is truncated - */ - if (_this->toUnicodeStatus - && (mySourceIndex == sourceLength) - && (flush == TRUE)) - { - if (U_SUCCESS(*err)) - { - *err = U_TRUNCATED_CHAR_FOUND; - _this->toUnicodeStatus = 0x00; - } - } - - *target += myTargetIndex; - *source += mySourceIndex; - - return; -} - -void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const char *mySource = *source; - UChar *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myToUnicode = NULL; - UChar targetUniChar = 0x0000; - UChar mySourceChar = 0x0000; - UChar oldMySourceChar; - bool_t *myStarters = NULL; - int32_t* originalOffsets = offsets; - - - - myToUnicode = _this->sharedData->table->mbcs.toUnicode; - myStarters = _this->sharedData->table->mbcs.starters; - - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - /*gets the corresponding UniChar */ - mySourceChar = (unsigned char) (mySource[mySourceIndex++]); - - - if (myStarters[(uint8_t) mySourceChar] && - (_this->toUnicodeStatus == 0x00)) - { - _this->toUnicodeStatus = (unsigned char) mySourceChar; - } - else - { - /*In case there is a state, we update the source char - *by concatenating the previous char with the current - *one - */ - - if (_this->toUnicodeStatus != 0x00) - { - mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); - - _this->toUnicodeStatus = 0x00; - } - - /*gets the corresponding Unicode codepoint */ - targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); - - - /*writing the UniChar to the output stream */ - if (targetUniChar != missingUCharMarker) - { - /*writes the UniChar to the output stream */ - { - - - if (targetUniChar > 0x00FF) - offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */ - else - offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */ - - - } - myTarget[myTargetIndex++] = targetUniChar; - oldMySourceChar = mySourceChar; - - } - else - { - int32_t currentOffset = offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1); - - *err = U_INVALID_CHAR_FOUND; - if (mySourceChar > 0xff) - { - _this->invalidCharLength = 2; - _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); - _this->invalidCharBuffer[1] = (char) mySourceChar; - } - else - { - _this->invalidCharLength = 1; - _this->invalidCharBuffer[0] = (char) mySourceChar; - } - - ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidCharLength = 0; - } - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - /*If at the end of conversion we are still carrying state information - *flush is TRUE, we can deduce that the input stream is truncated - */ - if (_this->toUnicodeStatus - && (mySourceIndex == sourceLength) - && (flush == TRUE)) - { - if (U_SUCCESS(*err)) - { - *err = U_TRUNCATED_CHAR_FOUND; - _this->toUnicodeStatus = 0x00; - } - } - - *target += myTargetIndex; - *source += mySourceIndex; - - return; -} - - -void T_UConverter_fromUnicode_EBCDIC_STATEFUL (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) - -{ - const UChar *mySource = *source; - char *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myFromUnicode = NULL; - UChar targetUniChar = 0x0000; - int8_t targetUniCharByteNum = 0; - UChar mySourceChar = 0x0000; - bool_t isTargetUCharDBCS = (bool_t)_this->fromUnicodeStatus; - bool_t oldIsTargetUCharDBCS = isTargetUCharDBCS; - myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; - - /*writing the char to the output stream */ - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - mySourceChar = (UChar) mySource[mySourceIndex++]; - targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); - oldIsTargetUCharDBCS = isTargetUCharDBCS; - isTargetUCharDBCS = (targetUniChar>0x00FF); - - if (targetUniChar != missingCharMarker) - { - if (oldIsTargetUCharDBCS != isTargetUCharDBCS) - { - if (isTargetUCharDBCS) myTarget[myTargetIndex++] = UCNV_SO; - else myTarget[myTargetIndex++] = UCNV_SI; - - - if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength)) - { - _this->charErrorBuffer[0] = (char) targetUniChar; - _this->charErrorBufferLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - else if (myTargetIndex+1 >= targetLength) - { - _this->charErrorBuffer[0] = (char) (targetUniChar >> 8); - _this->charErrorBuffer[1] = (char) targetUniChar & 0x00FF; - _this->charErrorBufferLength = 2; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - - } - - if (!isTargetUCharDBCS) - { - myTarget[myTargetIndex++] = (char) targetUniChar; - } - else - { - myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); - if (myTargetIndex < targetLength) - { - myTarget[myTargetIndex++] = (char) targetUniChar; - } - else - { - _this->charErrorBuffer[0] = (char) targetUniChar; - _this->charErrorBufferLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - } - else - { - isTargetUCharDBCS = oldIsTargetUCharDBCS; - *err = U_INVALID_CHAR_FOUND; - _this->invalidUCharBuffer[0] = (UChar) mySourceChar; - _this->invalidUCharLength = 1; - - _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; - FromU_CALLBACK_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidUCharLength = 0; - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - - } - - - *target += myTargetIndex; - *source += mySourceIndex; - - _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; - - return; -} - -void T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) - -{ - const UChar *mySource = *source; - char *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myFromUnicode = NULL; - UChar targetUniChar = 0x0000; - int8_t targetUniCharByteNum = 0; - UChar mySourceChar = 0x0000; - bool_t isTargetUCharDBCS = (bool_t)_this->fromUnicodeStatus; - bool_t oldIsTargetUCharDBCS = isTargetUCharDBCS; - int32_t* originalOffsets = offsets; - - myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; - - /*writing the char to the output stream */ - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - mySourceChar = (UChar) mySource[mySourceIndex++]; - targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); - oldIsTargetUCharDBCS = isTargetUCharDBCS; - isTargetUCharDBCS = (targetUniChar>0x00FF); - - if (targetUniChar != missingCharMarker) - { - if (oldIsTargetUCharDBCS != isTargetUCharDBCS) - { - offsets[myTargetIndex] = mySourceIndex-1; - if (isTargetUCharDBCS) myTarget[myTargetIndex++] = UCNV_SO; - else myTarget[myTargetIndex++] = UCNV_SI; - - - if ((!isTargetUCharDBCS)&&(myTargetIndex+1 >= targetLength)) - { - _this->charErrorBuffer[0] = (char) targetUniChar; - _this->charErrorBufferLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - else if (myTargetIndex+1 >= targetLength) - { - _this->charErrorBuffer[0] = (char) (targetUniChar >> 8); - _this->charErrorBuffer[1] = (char) targetUniChar & 0x00FF; - _this->charErrorBufferLength = 2; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - if (!isTargetUCharDBCS) - { - offsets[myTargetIndex] = mySourceIndex-1; - myTarget[myTargetIndex++] = (char) targetUniChar; - } - else - { - offsets[myTargetIndex] = mySourceIndex-1; - myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); - if (myTargetIndex < targetLength) - { - offsets[myTargetIndex] = mySourceIndex-1; - myTarget[myTargetIndex++] = (char) targetUniChar; - } - else - { - _this->charErrorBuffer[0] = (char) targetUniChar; - _this->charErrorBufferLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - } - else - { - int32_t currentOffset = offsets[myTargetIndex-1]+1; - *err = U_INVALID_CHAR_FOUND; - _this->invalidUCharBuffer[0] = (UChar) mySourceChar; - _this->invalidUCharLength = 1; - - /* Breaks out of the loop since behaviour was set to stop */ - _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; - FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidUCharLength = 0; - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - - } - - - *target += myTargetIndex; - *source += mySourceIndex;; - - _this->fromUnicodeStatus = (int32_t)isTargetUCharDBCS; - - return; -} - -void T_UConverter_fromUnicode_MBCS (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) - -{ - const UChar *mySource = *source; - char *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myFromUnicode = NULL; - UChar targetUniChar = 0x0000; - int8_t targetUniCharByteNum = 0; - UChar mySourceChar = 0x0000; - - myFromUnicode = _this->sharedData->table->mbcs.fromUnicode; - - /*writing the char to the output stream */ - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - mySourceChar = (UChar) mySource[mySourceIndex++]; - targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); - - - if (targetUniChar != missingCharMarker) - { - if (targetUniChar <= 0x00FF) - { - myTarget[myTargetIndex++] = (char) targetUniChar; - } - else - { - myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); - if (myTargetIndex < targetLength) - { - myTarget[myTargetIndex++] = (char) targetUniChar; - } - else - { - _this->charErrorBuffer[0] = (char) targetUniChar; - _this->charErrorBufferLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - } - } - else - { - *err = U_INVALID_CHAR_FOUND; - _this->invalidUCharBuffer[0] = (UChar) mySourceChar; - _this->invalidUCharLength = 1; - - FromU_CALLBACK_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidUCharLength = 0; - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - - } - - - *target += myTargetIndex; - *source += mySourceIndex;; - - - return; -} - -void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) - -{ - const UChar *mySource = *source; - char *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myFromUnicode = NULL; - UChar targetUniChar = 0x0000; - int8_t targetUniCharByteNum = 0; - UChar mySourceChar = 0x0000; - int32_t* originalOffsets = offsets; - - myFromUnicode = _this->sharedData->table->mbcs.fromUnicode; - - - - /*writing the char to the output stream */ - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - mySourceChar = (UChar) mySource[mySourceIndex++]; - targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); - - if (targetUniChar != missingCharMarker) - { - if (targetUniChar <= 0x00FF) - { - offsets[myTargetIndex] = mySourceIndex-1; - myTarget[myTargetIndex++] = (char) targetUniChar; - - } - else - { - offsets[myTargetIndex] = mySourceIndex-1; - myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); - if (myTargetIndex < targetLength) - { - offsets[myTargetIndex] = mySourceIndex-1; - myTarget[myTargetIndex++] = (char) targetUniChar; - } - else - { - _this->charErrorBuffer[0] = (char) targetUniChar; - _this->charErrorBufferLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - } - } - else - { - int32_t currentOffset = mySourceIndex -1; - int32_t* offsetsAnchor = offsets; - - *err = U_INVALID_CHAR_FOUND; - _this->invalidUCharBuffer[0] = (UChar) mySourceChar; - _this->invalidUCharLength = 1; - - FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidUCharLength = 0; - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - - } - - - *target += myTargetIndex; - *source += mySourceIndex;; - - - return; -} -void T_UConverter_fromUnicode_ISO_2022(UConverter* _this, - char** target, - const char* targetLimit, - const UChar** source, - const UChar* sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode* err) -{ - char const* targetStart = *target; - T_UConverter_fromUnicode_UTF8(_this, - target, - targetLimit, - source, - sourceLimit, - NULL, - flush, - err); -} - - -void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverter* _this, - char** target, - const char* targetLimit, - const UChar** source, - const UChar* sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode* err) -{ - - char const* targetStart = *target; - T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC(_this, - target, - targetLimit, - source, - sourceLimit, - offsets, - flush, - err); - { - int32_t len = *target - targetStart; - int32_t i; - /* uprv_memmove(offsets+3, offsets, len); MEMMOVE SEEMS BROKEN --srl */ - - for(i=len-1;i>=0;i--) offsets[i] = offsets[i]; - - } -} - -UCNV_TableStates_2022 getKey_2022(char c, - int32_t* key, - int32_t* offset) -{ - int32_t togo = *key; - int32_t low = 0; - int32_t hi = MAX_STATES_2022; - int32_t oldmid; - - if (*key == 0) togo = normalize_esq_chars_2022[c]; - else - { - togo <<= 5; - togo += normalize_esq_chars_2022[c]; - } - - while (hi != low) /*binary search*/ - { - register int32_t mid = (hi+low) >> 1; /*Finds median*/ - - if (mid == oldmid) break; - if (escSeqStateTable_Key_2022[mid] > togo) hi = mid; - else if (escSeqStateTable_Key_2022[mid] < togo) low = mid; - else /*we found it*/ - { - *key = togo; - *offset = mid; -#ifdef Debug - printf("found at @ %d\n", mid); -#endif /*Debug*/ - return escSeqStateTable_Value_2022[mid]; - } - oldmid = mid; - - } - -#ifdef Debug - printf("Could not find \"%d\" for %X\n", togo, c); -#endif /*Debug*/ - *key = 0; - *offset = 0; - - - return INVALID_2022; -} - -void changeState_2022(UConverter* _this, - const char** source, - const char* sourceLimit, - bool_t flush, - UErrorCode* err) -{ - UConverter* myUConverter; - uint32_t key = _this->toUnicodeStatus; - UCNV_TableStates_2022 value; - UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo); - const char* chosenConverterName = NULL; - int32_t offset; - - /*Close the old Converter*/ - if (_this->mode == UCNV_SO) ucnv_close(myData2022->currentConverter); - myData2022->currentConverter = NULL; - _this->mode = UCNV_SI; - - /*In case we were in the process of consuming an escape sequence - we need to reprocess it */ - - do - { -#ifdef Debug - printf("Pre Stage: char = %x, key = %d, value =%d\n", **source, key, value); - fflush(stdout); -#endif /*Debug*/ -/* Needed explicit cast for key on MVS to make compiler happy - JJD */ - value = getKey_2022(**source,(int32_t *) &key, &offset); -#ifdef Debug - printf("Post Stage: char = %x, key = %d, value =%d\n", **source, key, value); - fflush(stdout); -#endif /*Debug*/ - switch (value) - { - case VALID_NON_TERMINAL_2022 : - { -#ifdef Debug - puts("VALID_NON_TERMINAL_2022"); -#endif /*Debug*/ - };break; - - case VALID_TERMINAL_2022: - { -#ifdef Debug - puts("VALID_TERMINAL_2022"); -#endif /*Debug*/ - chosenConverterName = escSeqStateTable_Result_2022[offset]; - key = 0; - goto DONE; - };break; - - case INVALID_2022: - { -#ifdef Debug - puts("INVALID_2022"); -#endif /*Debug*/ - _this->toUnicodeStatus = 0; - *err = U_ILLEGAL_CHAR_FOUND; - return; - } - - case VALID_MAYBE_TERMINAL_2022: - { - const char* mySource = (*source + 1); - int32_t myKey = key; - UCNV_TableStates_2022 myValue = value; - int32_t myOffset; -#ifdef Debug - puts("VALID_MAYBE_TERMINAL_2022"); -#endif /*Debug*/ - - while ((mySource < sourceLimit) && - ((myValue == VALID_MAYBE_TERMINAL_2022)||(myValue == VALID_NON_TERMINAL_2022))) - { -#ifdef Debug - printf("MAYBE value = %d myKey = %d %X\n", myValue, myKey, *mySource); -#endif /*Debug*/ - myValue = getKey_2022(*(mySource++), &myKey, &myOffset); - } -#ifdef Debug - printf("myValue = %d\n", myValue); -#endif /*Debug*/ - switch (myValue) - { - case INVALID_2022: - { - /*Backs off*/ -#ifdef Debug - puts("VALID_MAYBE_TERMINAL INVALID"); - printf("offset = %d\n", offset); -#endif /*Debug*/ - chosenConverterName = escSeqStateTable_Result_2022[offset]; - value = VALID_TERMINAL_2022; -#ifdef Debug - printf("%d\n", offset); - fflush(stdout); -#endif /*Debug*/ - goto DONE; - };break; - - case VALID_TERMINAL_2022: - { - /*uses longer escape sequence*/ -#ifdef Debug - puts("VALID_MAYBE_TERMINAL TERMINAL"); -#endif /*Debug*/ - *source = mySource-1; /*deals with the overshot in the while above*/ - chosenConverterName = escSeqStateTable_Result_2022[myOffset]; - key = 0; - value = VALID_TERMINAL_2022; - goto DONE; - };break; - - case VALID_NON_TERMINAL_2022: -#ifdef Debug - puts("VALID_MAYBE_TERMINAL NON_TERMINAL"); -#endif /*Debug*/ - case VALID_MAYBE_TERMINAL_2022: - { -#ifdef Debug - puts("VALID_MAYBE_TERMINAL MAYBE_TERMINAL"); -#endif /*Debug*/ - if (flush) - { - /*Backs off*/ - chosenConverterName = escSeqStateTable_Result_2022[offset]; - value = VALID_TERMINAL_2022; - key = 0; - goto DONE; - } - else - { - key = myKey; - value = VALID_NON_TERMINAL_2022; - } - };break; - };break; - };break; - } - } while ((*source)++ <= sourceLimit); - - DONE: - _this->toUnicodeStatus = key; - - if ((value == VALID_NON_TERMINAL_2022) || (value == VALID_MAYBE_TERMINAL_2022)) - { -#ifdef Debug - printf("Out: current **source = %X", **source); -#endif - - return; - } - if (value > 0) myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err); - { -#ifdef Debug - printf("Error = %d open \"%s\"\n", *err, chosenConverterName); -#endif /*Debug*/ - if (U_SUCCESS(*err)) - { - /*Customize the converter with the attributes set on the 2022 converter*/ - myUConverter->fromUCharErrorBehaviour = _this->fromUCharErrorBehaviour; - myUConverter->fromCharErrorBehaviour = _this->fromCharErrorBehaviour; - uprv_memcpy(myUConverter->subChar, - _this->subChar, - myUConverter->subCharLen = _this->subCharLen); - - _this->mode = UCNV_SO; - } - } - - - return; -} - -/*Checks the first 3 characters of the buffer against valid 2022 escape sequences - *if the match we return a pointer to the initial start of the sequence otherwise - *we return sourceLimit - */ -const char* getEndOfBuffer_2022(const char* source, - const char* sourceLimit, - bool_t flush) -{ - const char* mySource = source; - - if (source >= sourceLimit) return sourceLimit; - - do - { - if (*mySource == ESC_2022) - { - int8_t i; - int32_t key = 0; - int32_t offset; - UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022; - - for (i=0; - (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022); - i++) - { - value = getKey_2022(*(mySource+i), &key, &offset); -#ifdef Debug - printf("Look ahead value = %d\n", value); -#endif /*Debug*/ - } - if (value > 0) return mySource; - if ((value == VALID_NON_TERMINAL_2022)&&(!flush) ) return sourceLimit; - } - } - while (mySource++ < sourceLimit); - - return sourceLimit; -} - - - -void T_UConverter_toUnicode_ISO_2022(UConverter* _this, - UChar** target, - const UChar* targetLimit, - const char** source, - const char* sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode* err) -{ - int32_t base = 0; - const char* mySourceLimit; - char const* sourceStart; - - /*Arguments Check*/ - if (U_FAILURE(*err)) return; - if ((_this == NULL) || (targetLimit < *target) || (sourceLimit < *source)) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - for (;;) - { - - mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, flush); - - - /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ - if (_this->mode == UCNV_SO) /*Already doing some conversion*/ - { - const UChar* myTargetStart = *target; -#ifdef Debug - printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); -#endif /*Debug*/ - - ucnv_toUnicode(((UConverterDataISO2022*)(_this->extraInfo))->currentConverter, - target, - targetLimit, - source, - mySourceLimit, - NULL, - flush, - err); - - -#ifdef Debug - puts("---------------------------> CONVERTED"); - printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); - printf("err =%d", *err); -#endif /*Debug*/ - } - /*-Done with buffer with entire buffer - -Error while converting - */ - - if (U_FAILURE(*err) || (*source == sourceLimit)) return; -#ifdef Debug - puts("Got Here!"); - fflush(stdout); -#endif /*Debug*/ - sourceStart = *source; - changeState_2022(_this, - source, - sourceLimit, - flush, - err); - (*source)++; - - } - - return; -} - -void T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverter* _this, - UChar** target, - const UChar* targetLimit, - const char** source, - const char* sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode* err) -{ - int32_t myOffset=0; - int32_t base = 0; - const char* mySourceLimit; - char const* sourceStart; - - /*Arguments Check*/ - if (U_FAILURE(*err)) return; - if ((_this == NULL) || (targetLimit < *target) || (sourceLimit < *source)) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - for (;;) - { - - mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, flush); - /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ - - if (_this->mode == UCNV_SO) /*Already doing some conversion*/ - { - const UChar* myTargetStart = *target; -#ifdef Debug - printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); -#endif /*Debug*/ - - ucnv_toUnicode(((UConverterDataISO2022*)(_this->extraInfo))->currentConverter, - target, - targetLimit, - source, - mySourceLimit, - offsets, - flush, - err); - - { - int32_t lim = *target - myTargetStart; - int32_t i = 0; - for (i=base; i < lim;i++) offsets[i] += myOffset; - base += lim; - } - -#ifdef Debug - puts("---------------------------> CONVERTED"); - printf("source %X\n mySourceLimit %X\n sourceLimit %X\n", *source, mySourceLimit, sourceLimit); - printf("err =%d", *err); -#endif /*Debug*/ - } - - /*-Done with buffer with entire buffer - -Error while converting - */ - - if (U_FAILURE(*err) || (*source == sourceLimit)) return; -#ifdef Debug - puts("Got Here!"); - fflush(stdout); -#endif /*Debug*/ - sourceStart = *source; - changeState_2022(_this, - source, - sourceLimit, - flush, - err); - (*source)++; - myOffset += *source - sourceStart; - - } - - return; -} - - - - - - -void T_UConverter_fromUnicode_DBCS (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const UChar *mySource = *source; - unsigned char *myTarget = (unsigned char *) *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - (char *) myTarget; - int32_t sourceLength = sourceLimit - mySource; - CompactShortArray *myFromUnicode = NULL; - UChar targetUniChar = 0x0000; - UChar mySourceChar = 0x0000; - - myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; - - /*writing the char to the output stream */ - while (mySourceIndex < sourceLength) - { - - if (myTargetIndex < targetLength) - { - mySourceChar = (UChar) mySource[mySourceIndex++]; - - /*Gets the corresponding codepoint */ - targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); - if (targetUniChar != missingCharMarker) - { - /*writes the char to the output stream */ - myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); - if (myTargetIndex < targetLength) - { - myTarget[myTargetIndex++] = (char) targetUniChar; - } - else - { - _this->charErrorBuffer[0] = (char) targetUniChar; - _this->charErrorBufferLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - } - else - { - *err = U_INVALID_CHAR_FOUND; - _this->invalidUCharBuffer[0] = (UChar) mySourceChar; - _this->invalidUCharLength = 1; - - -/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */ - FromU_CALLBACK_MACRO(_this, - (char *)myTarget, - myTargetIndex, - targetLimit, - mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - if (U_FAILURE (*err)) break; - _this->invalidUCharLength = 0; - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - *target += myTargetIndex; - *source += mySourceIndex;; - - - return; -} - -void T_UConverter_fromUnicode_UTF8 (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const UChar *mySource = *source; - unsigned char *myTarget = (unsigned char *) *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - (char *) myTarget; - int32_t sourceLength = sourceLimit - mySource; - int8_t targetCharByteNum = 0; - UChar mySourceChar = 0x0000; - uint32_t ch; - int16_t i, bytesToWrite = 0; - uint32_t ch2; - char temp[4]; - - if (_this->fromUnicodeStatus) - { - ch = _this->fromUnicodeStatus; - _this->fromUnicodeStatus = 0; - goto lowsurogate; - } - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - bytesToWrite = 0; - ch = mySource[mySourceIndex++]; - - if (ch < 0x80) /* Single byte */ - { - myTarget[myTargetIndex++] = (char) ch; - } - else if (ch < 0x800) /* Double byte */ - { - myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0); - if (myTargetIndex < targetLength) - { - myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80); - } - else - { - _this->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80); - _this->charErrorBufferLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - } - else - /* Check for surogates */ - { - if ((ch >= kSurrogateHighStart) && (ch <= kSurrogateHighEnd)) - { - lowsurogate: - if (mySourceIndex < sourceLength && !flush) - { - ch2 = mySource[mySourceIndex]; - if ((ch2 >= kSurrogateLowStart) && (ch2 <= kSurrogateLowEnd)) - { - ch = ((ch - kSurrogateHighStart) << halfShift) + (ch2 - kSurrogateLowStart) + halfBase; - ++mySourceIndex; - } - } - } - if (ch < 0x10000) - { - bytesToWrite = 3; - temp[0] = (char) ((ch >> 12) | 0xe0); - temp[1] = (char) ((ch >> 6) & 0x3f | 0x80); - temp[2] = (char) (ch & 0x3f | 0x80); - } - else - { - bytesToWrite = 4; - temp[0] = (char) ((ch >> 18) | 0xf0); - temp[1] = (char) ((ch >> 12) & 0x3f | 0xe0); - temp[2] = (char) ((ch >> 6) & 0x3f | 0x80); - temp[3] = (char) (ch & 0x3f | 0x80); - } - for (i = 0; i < bytesToWrite; i++) - { - if (myTargetIndex < targetLength) - { - myTarget[myTargetIndex++] = temp[i]; - } - else - { - _this->charErrorBuffer[_this->charErrorBufferLength++] = temp[i]; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - } - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - - } - - *target += myTargetIndex; - *source += mySourceIndex; - - return; -} - -void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const UChar *mySource = *source; - unsigned char *myTarget = (unsigned char *) *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - (char *) myTarget; - int32_t sourceLength = sourceLimit - mySource; - int8_t targetCharByteNum = 0; - UChar mySourceChar = 0x0000; - uint32_t ch; - int16_t i, bytesToWrite = 0; - uint32_t ch2; - char temp[4]; - - if (_this->fromUnicodeStatus) - { - ch = _this->fromUnicodeStatus; - _this->fromUnicodeStatus = 0; - goto lowsurogate; - } - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - bytesToWrite = 0; - ch = mySource[mySourceIndex++]; - - if (ch < 0x80) /* Single byte */ - { - offsets[myTargetIndex] = mySourceIndex-1; - myTarget[myTargetIndex++] = (char) ch; - } - else if (ch < 0x800) /* Double byte */ - { - offsets[myTargetIndex] = mySourceIndex-1; - myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0); - if (myTargetIndex < targetLength) - { - offsets[myTargetIndex] = mySourceIndex-1; - myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80); - } - else - { - _this->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80); - _this->charErrorBufferLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - } - else - /* Check for surogates */ - { - if ((ch >= kSurrogateHighStart) && (ch <= kSurrogateHighEnd)) - { - lowsurogate: - if (mySourceIndex < sourceLength && !flush) - { - ch2 = mySource[mySourceIndex]; - if ((ch2 >= kSurrogateLowStart) && (ch2 <= kSurrogateLowEnd)) - { - ch = ((ch - kSurrogateHighStart) << halfShift) + (ch2 - kSurrogateLowStart) + halfBase; - ++mySourceIndex; - } - } - } - if (ch < 0x10000) - { - bytesToWrite = 3; - temp[0] = (char) ((ch >> 12) | 0xe0); - temp[1] = (char) ((ch >> 6) & 0x3f | 0x80); - temp[2] = (char) (ch & 0x3f | 0x80); - } - else - { - bytesToWrite = 4; - temp[0] = (char) ((ch >> 18) | 0xf0); - temp[1] = (char) ((ch >> 12) & 0x3f | 0xe0); - temp[2] = (char) ((ch >> 6) & 0x3f | 0x80); - temp[3] = (char) (ch & 0x3f | 0x80); - } - for (i = 0; i < bytesToWrite; i++) - { - if (myTargetIndex < targetLength) - { - offsets[myTargetIndex] = mySourceIndex-1; - myTarget[myTargetIndex++] = temp[i]; - } - else - { - _this->charErrorBuffer[_this->charErrorBufferLength++] = temp[i]; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - } - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - - } - - *target += myTargetIndex; - *source += mySourceIndex; - - return; -} - - -void T_UConverter_fromUnicode_UTF16_BE (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const UChar *mySource = *source; - unsigned char *myTarget = (unsigned char *) *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - (char *) myTarget; - int32_t sourceLength = sourceLimit - mySource; - UChar mySourceChar; - - /*writing the char to the output stream */ - while (mySourceIndex < sourceLength) - { - - if (myTargetIndex < targetLength) - { - mySourceChar = (UChar) mySource[mySourceIndex++]; - myTarget[myTargetIndex++] = (char) (mySourceChar >> 8); - if (myTargetIndex < targetLength) - { - myTarget[myTargetIndex++] = (char) mySourceChar; - } - else - { - _this->charErrorBuffer[0] = (char) mySourceChar; - _this->charErrorBufferLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - *target += myTargetIndex; - *source += mySourceIndex;; - - return; -} - -void T_UConverter_fromUnicode_UTF16_LE (UConverter * _this, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const UChar *mySource = *source; - unsigned char *myTarget = (unsigned char *) *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - (char *) myTarget; - int32_t sourceLength = sourceLimit - mySource; - UChar mySourceChar; - - - /*writing the char to the output stream */ - while (mySourceIndex < sourceLength) - { - - if (myTargetIndex < targetLength) - { - mySourceChar = (UChar) mySource[mySourceIndex++]; - myTarget[myTargetIndex++] = (char) mySourceChar; - if (myTargetIndex < targetLength) - { - myTarget[myTargetIndex++] = (char) (mySourceChar >> 8); - } - else - { - _this->charErrorBuffer[0] = (char) (mySourceChar >> 8); - _this->charErrorBufferLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - *target += myTargetIndex; - *source += mySourceIndex;; - - return; -} - -void T_UConverter_toUnicode_UTF16_BE (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) *source; - UChar *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - (char *) mySource; - UChar mySourceChar = 0x0000; - UChar oldmySourceChar = 0x0000; - - - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - /*gets the corresponding UChar */ - mySourceChar = (unsigned char) mySource[mySourceIndex++]; - oldmySourceChar = mySourceChar; - if (_this->toUnicodeStatus == 0) - { - _this->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar; - } - else - { - if (_this->toUnicodeStatus != 0xFFFF) - mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | mySourceChar); - _this->toUnicodeStatus = 0; - - - - myTarget[myTargetIndex++] = mySourceChar; - - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - if (U_SUCCESS(*err) && flush - && (mySourceIndex == sourceLength) - && (_this->toUnicodeStatus != 0x00)) - { - if (U_SUCCESS(*err)) - { - *err = U_TRUNCATED_CHAR_FOUND; - _this->toUnicodeStatus = 0x00; - } - } - - *target += myTargetIndex; - *source += mySourceIndex; - - return; -} - -void T_UConverter_toUnicode_UTF16_LE (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) *source; - UChar *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - (char *) mySource; - CompactShortArray *myToUnicode = NULL; - UChar targetUniChar = 0x0000; - UChar mySourceChar = 0x0000; - - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - /*gets the corresponding UniChar */ - mySourceChar = (unsigned char) mySource[mySourceIndex++]; - - if (_this->toUnicodeStatus == 0x00) - { - _this->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar; - } - else - { - if (_this->toUnicodeStatus == 0xFFFF) - mySourceChar = (UChar) (mySourceChar << 8); - else - { - mySourceChar <<= 8; - mySourceChar |= (UChar) (_this->toUnicodeStatus); - } - _this->toUnicodeStatus = 0x00; - myTarget[myTargetIndex++] = mySourceChar; - } - } - else - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - - - if (U_SUCCESS(*err) && flush - && (mySourceIndex == sourceLength) - && (_this->toUnicodeStatus != 0x00)) - { - if (U_SUCCESS(*err)) - { - *err = U_TRUNCATED_CHAR_FOUND; - _this->toUnicodeStatus = 0x00; - } - } - - *target += myTargetIndex; - *source += mySourceIndex; - - - return; -} - -void T_UConverter_toUnicode_UTF8 (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) *source; - UChar *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - (char *) mySource; - uint32_t ch = 0 , - ch2 =0 , - i =0; /* Index into the current # of bytes consumed in the current sequence */ - uint32_t inBytes = 0; /* Total number of bytes in the current UTF8 sequence */ - - if (_this->toUnicodeStatus) - { - i = _this->invalidCharLength; /* restore # of bytes consumed */ - inBytes = _this->toUnicodeStatus; /* Restore size of current sequence */ - - ch = _this->mode; /*Stores the previously calculated ch from a previous call*/ - _this->toUnicodeStatus = 0; - _this->invalidCharLength = 0; - goto morebytes; - } - - - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - ch = 0; - ch = ((uint32_t)mySource[mySourceIndex++]) & 0x000000FF; - if (ch < 0x80) /* Simple case */ - { - myTarget[myTargetIndex++] = (UChar) ch; - } - else - { - /* store the first char */ - - inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */ - _this->invalidCharBuffer[0] = (char)ch; - i = 1; - - morebytes: - for (; i < inBytes; i++) - { - { - if (mySourceIndex >= sourceLength) - { - if (flush) - { - if (U_SUCCESS(*err)) - { - *err = U_TRUNCATED_CHAR_FOUND; - _this->toUnicodeStatus = 0x00; - } - } - else - { - _this->toUnicodeStatus = inBytes; - _this->invalidCharLength = (int8_t)i; - } - goto donefornow; - } - _this->invalidCharBuffer[i] = (char) (ch2 = (((uint32_t)mySource[mySourceIndex++]) & 0x000000FF)); - if ((ch2 & 0xC0) != 0x80) /* Invalid trailing byte */ - break; - } - ch <<= 6; - ch += ch2; - } - - - ch -= offsetsFromUTF8[inBytes]; - - if (i == inBytes && ch <= kMaximumUTF16) - { - if (ch <= kMaximumUCS2) - { - myTarget[myTargetIndex++] = (UChar) ch; - } - else - { - ch -= halfBase; - myTarget[myTargetIndex++] = (UChar) ((ch >> halfShift) + kSurrogateHighStart); - ch = (ch & halfMask) + kSurrogateLowStart; - if (myTargetIndex < targetLength) - { - myTarget[myTargetIndex++] = (char)ch; - } - else - { - _this->invalidUCharBuffer[0] = (UChar) ch; - _this->invalidUCharLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - } - } - else - { - *err = U_ILLEGAL_CHAR_FOUND; - _this->invalidCharLength = (int8_t)i; - -#ifdef Debug - printf("inbytes %d\n, _this->invalidCharLength = %d,\n mySource[mySourceIndex]=%X\n", inBytes, _this->invalidCharLength, mySource[mySourceIndex]); -#endif -/* Needed explicit cast for mySource on MVS to make compiler happy - JJD */ - ToU_CALLBACK_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - (const char *)mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - if (U_FAILURE (*err)) break; - _this->invalidCharLength = 0; - } - } - } - else - /* End of target buffer */ - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - -donefornow: - *target += myTargetIndex; - *source += mySourceIndex; - _this->mode = ch; /*stores a partially calculated target*/ -} - -void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - bool_t flush, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) *source; - UChar *myTarget = *target; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = targetLimit - myTarget; - int32_t sourceLength = sourceLimit - (char *) mySource; - uint32_t ch = 0, ch2 = 0, i = 0; - uint32_t inBytes = 0; - int32_t* originalOffsets = offsets; - - - - if (_this->toUnicodeStatus) - { - i = _this->invalidCharLength; - inBytes = _this->toUnicodeStatus; - _this->toUnicodeStatus = 0; - ch = _this->mode; - goto morebytes; - } - - while (mySourceIndex < sourceLength) - { - if (myTargetIndex < targetLength) - { - ch = mySource[mySourceIndex++]; - if (ch < 0x80) /* Simple case */ - { - offsets[myTargetIndex] = mySourceIndex-1; - myTarget[myTargetIndex++] = (UChar) ch; - } - else - { - inBytes = bytesFromUTF8[ch]; - _this->invalidCharBuffer[0] = (char)ch; - i = 1; - - morebytes: - for (; i < inBytes; i++) - { - { - if (mySourceIndex >= sourceLength) - { - if (flush) - { - if (U_SUCCESS(*err)) - { - *err = U_TRUNCATED_CHAR_FOUND; - _this->toUnicodeStatus = 0x00; - } - } - else - { - _this->toUnicodeStatus = inBytes; - _this->invalidCharLength = (int8_t)i; - } - goto donefornow; - } - _this->invalidCharBuffer[i] = (char) (ch2 = mySource[mySourceIndex++]); - if ((ch2 & 0xC0) != 0x80) /* Invalid trailing byte */ - break; - } - ch <<= 6; - ch += ch2; - } - - ch -= offsetsFromUTF8[inBytes]; - if (i == inBytes && ch <= kMaximumUTF16) - { - if (ch <= kMaximumUCS2) { - - offsets[myTargetIndex] = mySourceIndex-3; - myTarget[myTargetIndex++] = (UChar) ch; - - } - else - { - ch -= halfBase; - offsets[myTargetIndex] = mySourceIndex-4; - myTarget[myTargetIndex++] = (UChar) ((ch >> halfShift) + kSurrogateHighStart); - ch = (ch & halfMask) + kSurrogateLowStart; - if (myTargetIndex < targetLength) - { - offsets[myTargetIndex] = mySourceIndex-4; - myTarget[myTargetIndex++] = (char)ch; - } - else - { - _this->invalidUCharBuffer[0] = (UChar) ch; - _this->invalidUCharLength = 1; - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - } - } - else - { - int32_t currentOffset = offsets[myTargetIndex-1]; - - *err = U_ILLEGAL_CHAR_FOUND; - _this->invalidCharLength = (int8_t)i; - -/* Needed explicit cast for mySource on MVS to make compiler happy - JJD */ - ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, - myTarget, - myTargetIndex, - targetLimit, - (const char *)mySource, - mySourceIndex, - sourceLimit, - offsets, - flush, - err); - - - if (U_FAILURE (*err)) break; - _this->invalidCharLength = 0; - } - } - } - else - /* End of target buffer */ - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - break; - } - } - -donefornow: - *target += myTargetIndex; - *source += mySourceIndex; - _this->mode = ch; - -} - /*Empties the internal unicode output buffer */ void flushInternalUnicodeBuffer (UConverter * _this, UChar * myTarget, @@ -2931,508 +113,3 @@ void flushInternalCharBuffer (UConverter * _this, return; } - - - -UChar T_UConverter_getNextUChar_SBCS(UConverter* converter, - const char** source, - const char* sourceLimit, - UErrorCode* err) -{ - UChar myUChar; - - - if ((*source)+1 > sourceLimit) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xFFFD; - } - - - /*Gets the corresponding codepoint*/ - myUChar = converter->sharedData->table->sbcs.toUnicode[(unsigned char)*((*source)++)]; - - if (myUChar != 0xFFFD) return myUChar; - else - { - UChar* myUCharPtr = &myUChar; - const char* sourceFinal = *source; - - *err = U_INVALID_CHAR_FOUND; - - /*Calls the ErrorFunctor after rewinding the input buffer*/ - (*source)--; - /*It's is very likely that the ErrorFunctor will write to the - *internal buffers */ - converter->fromCharErrorBehaviour(converter, - &myUCharPtr, - myUCharPtr + 1, - &sourceFinal, - sourceLimit, - NULL, - TRUE, - err); - - /*makes the internal caching transparent to the user*/ - if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; - - return myUChar; - } -} - -UChar T_UConverter_getNextUChar_LATIN_1(UConverter* converter, - const char** source, - const char* sourceLimit, - UErrorCode* err) -{ - - /* Empties the internal buffers if need be - * In this case since ErrorFunctors are never called - * (LATIN_1 is a subset of Unicode) - */ - - if ((*source)+1 > sourceLimit) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xFFFD; - } - - return (UChar)*((*source)++); -} - -UChar T_UConverter_getNextUChar_ISO_2022(UConverter* converter, - const char** source, - const char* sourceLimit, - UErrorCode* err) -{ - const char* mySourceLimit; - /*Arguments Check*/ - if (sourceLimit < *source) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return 0xFFFD; - } - - for (;;) - { - mySourceLimit = getEndOfBuffer_2022(*source, sourceLimit, TRUE); - /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ - if (converter->mode == UCNV_SO) /*Already doing some conversion*/ - { - - return ucnv_getNextUChar(((UConverterDataISO2022*)(converter->extraInfo))->currentConverter, - source, - mySourceLimit, - err); - - - } - /*-Done with buffer with entire buffer - -Error while converting - */ - - - changeState_2022(converter, - source, - sourceLimit, - TRUE, - err); - (*source)++; - } - - return 0xFFFD; -} - -UChar T_UConverter_getNextUChar_DBCS(UConverter* converter, - const char** source, - const char* sourceLimit, - UErrorCode* err) -{ - UChar myUChar; - - /*Checks boundaries and set appropriate error codes*/ - if ((*source)+2 > sourceLimit) - { - if ((*source) >= sourceLimit) - { - /*Either caller has reached the end of the byte stream*/ - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - else if (((*source)+1) == sourceLimit) - { - /* a character was cut in half*/ - *err = U_TRUNCATED_CHAR_FOUND; - } - - return 0xFFFD; - } - - /*Gets the corresponding codepoint*/ - myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, - (uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1)))); - - /*update the input pointer*/ - *source += 2; - if (myUChar != 0xFFFD) return myUChar; - else - { - UChar* myUCharPtr = &myUChar; - const char* sourceFinal = *source; - - /*Calls the ErrorFunctor after rewinding the input buffer*/ - (*source) -= 2; - - *err = U_INVALID_CHAR_FOUND; - - /*It's is very likely that the ErrorFunctor will write to the - *internal buffers */ - converter->fromCharErrorBehaviour(converter, - &myUCharPtr, - myUCharPtr + 1, - &sourceFinal, - sourceLimit, - NULL, - TRUE, - err); - /*makes the internal caching transparent to the user*/ - if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; - - return myUChar; - } -} - -UChar T_UConverter_getNextUChar_MBCS(UConverter* converter, - const char** source, - const char* sourceLimit, - UErrorCode* err) -{ - UChar myUChar; - char const *sourceInitial = *source; - /*safe keeps a ptr to the beginning in case we need to step back*/ - - /*Input boundary check*/ - if ((*source)+1 > sourceLimit) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xFFFD; - } - - /*Checks to see if the byte is a lead*/ - if (converter->sharedData->table->mbcs.starters[(uint8_t)**source] == FALSE) - { - /*Not lead byte: we update the source ptr and get the codepoint*/ - myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode, - (UChar)(**source)); - (*source)++; - } - else - { - /*Lead byte: we Build the codepoint and get the corresponding character - * and update the source ptr*/ - if ((*source + 2) > sourceLimit) - { - *err = U_TRUNCATED_CHAR_FOUND; - return 0xFFFD; - } - - myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode, - (uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1)))); - - (*source) += 2; - } - - if (myUChar != 0xFFFD) return myUChar; - else - { - /*rewinds source*/ - const char* sourceFinal = *source; - UChar* myUCharPtr = &myUChar; - - *err = U_INVALID_CHAR_FOUND; - *source = sourceInitial; - - /*It's is very likely that the ErrorFunctor will write to the - *internal buffers */ - converter->fromCharErrorBehaviour(converter, - &myUCharPtr, - myUCharPtr + 1, - &sourceFinal, - sourceLimit, - NULL, - TRUE, - err); - - /*makes the internal caching transparent to the user*/ - if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; - - return myUChar; - } -} - -UChar T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverter* converter, - const char** source, - const char* sourceLimit, - UErrorCode* err) -{ - UChar myUChar; - char const *sourceInitial = *source; - /*safe keeps a ptr to the beginning in case we need to step back*/ - - /*Input boundary check*/ - if ((*source)+1 > sourceLimit) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xFFFD; - } - - /*Checks to see if with have SI/SO shifters - if we do we change the mode appropriately and we consume the byte*/ - if ((**source == UCNV_SI) || (**source == UCNV_SO)) - { - converter->mode = **source; - (*source)++; - - /*Rechecks boundary after consuming the shift sequence*/ - if ((*source)+1 > sourceLimit) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xFFFD; - } - } - - if (converter->mode == UCNV_SI) - { - /*Not lead byte: we update the source ptr and get the codepoint*/ - myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, - (UChar)(**source)); - (*source)++; - } - else - { - /*Lead byte: we Build the codepoint and get the corresponding character - * and update the source ptr*/ - if ((*source + 2) > sourceLimit) - { - *err = U_TRUNCATED_CHAR_FOUND; - return 0xFFFD; - } - - myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, - ((UChar)((**source)) << 8) |((uint8_t)*((*source)+1))); - - (*source) += 2; - } - - if (myUChar != 0xFFFD) return myUChar; - else - { - /*rewinds source*/ - const char* sourceFinal = *source; - UChar* myUCharPtr = &myUChar; - - *err = U_INVALID_CHAR_FOUND; - *source = sourceInitial; - - /*It's is very likely that the ErrorFunctor will write to the - *internal buffers */ - converter->fromCharErrorBehaviour(converter, - &myUCharPtr, - myUCharPtr + 1, - &sourceFinal, - sourceLimit, - NULL, - TRUE, - err); - - /*makes the internal caching transparent to the user*/ - if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; - - return myUChar; - } -} - -UChar T_UConverter_getNextUChar_UTF16_BE(UConverter* converter, - const char** source, - const char* sourceLimit, - UErrorCode* err) -{ - UChar myUChar; - /*Checks boundaries and set appropriate error codes*/ - if ((*source)+2 > sourceLimit) - { - if ((*source) >= sourceLimit) - { - /*Either caller has reached the end of the byte stream*/ - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - else if (((*source)+1) == sourceLimit) - { - /* a character was cut in half*/ - *err = U_TRUNCATED_CHAR_FOUND; - } - - return 0xFFFD; - } - - - /*Gets the corresponding codepoint*/ - - myUChar = ((uint16_t)((**source)) << 8) |((uint8_t)*((*source)+1)); - *source += 2; - return myUChar; -} - - -UChar T_UConverter_getNextUChar_UTF16_LE(UConverter* converter, - const char** source, - const char* sourceLimit, - UErrorCode* err) -{ - UChar myUChar; - /*Checks boundaries and set appropriate error codes*/ - if ((*source)+2 > sourceLimit) - { - if ((*source) >= sourceLimit) - { - /*Either caller has reached the end of the byte stream*/ - *err = U_INDEX_OUTOFBOUNDS_ERROR; - } - else if (((*source)+1) == sourceLimit) - { - /* a character was cut in half*/ - *err = U_TRUNCATED_CHAR_FOUND; - } - - return 0xFFFD; - } - - - /*Gets the corresponding codepoint*/ - myUChar = ((uint16_t)*((*source)+1) << 8) |((uint8_t)((**source))); - /*updates the source*/ - *source += 2; - return myUChar; -} - -UChar T_UConverter_getNextUChar_UTF8(UConverter* converter, - const char** source, - const char* sourceLimit, - UErrorCode* err) -{ - UChar myUChar; - /*safe keeps a ptr to the beginning in case we need to step back*/ - char const *sourceInitial = *source; - uint16_t extraBytesToWrite; - uint8_t myByte; - uint32_t ch; - int8_t isLegalSequence = 1; - - /*Input boundary check*/ - if ((*source) >= sourceLimit) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xFFFD; - } - - myByte = (uint8_t)*((*source)++); - if(myByte < 0x80) { - return (UChar)myByte; - } - extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte]; - if (extraBytesToWrite == 0 || extraBytesToWrite > 4) { - goto CALL_ERROR_FUNCTION; - } - - - /*The byte sequence is longer than the buffer area passed*/ - - if ((*source + extraBytesToWrite - 1) > sourceLimit) - { - *err = U_TRUNCATED_CHAR_FOUND; - return 0xFFFD; - } - else - { - ch = myByte << 6; - switch(extraBytesToWrite) - { - /* note: code falls through cases! (sic)*/ - case 6: ch += (myByte = (uint8_t)*((*source)++)); ch <<= 6; - if ((myByte & 0xC0) != 0x80) - { - isLegalSequence = 0; - break; - } - case 5: ch += (myByte = *((*source)++)); ch <<= 6; - if ((myByte & 0xC0) != 0x80) - { - isLegalSequence = 0; - break; - } - case 4: ch += (myByte = *((*source)++)); ch <<= 6; - if ((myByte & 0xC0) != 0x80) - { - isLegalSequence = 0; - break; - } - case 3: ch += (myByte = *((*source)++)); ch <<= 6; - if ((myByte & 0xC0) != 0x80) - { - isLegalSequence = 0; - break; - } - case 2: ch += (myByte = *((*source)++)); - if ((myByte & 0xC0) != 0x80) - { - isLegalSequence = 0; - } - }; - } - ch -= offsetsFromUTF8[extraBytesToWrite]; - - - if (isLegalSequence == 0) goto CALL_ERROR_FUNCTION; - - /*we got a UCS-2 Character*/ - if (ch <= kMaximumUCS2) return (UChar)ch; - /*character out of bounds*/ - else if (ch >= kMaximumUTF16) goto CALL_ERROR_FUNCTION; - /*Surrogates found*/ - else - { - ch -= halfBase; - /*stores the 2nd surrogate inside the converter for the next call*/ - converter->UCharErrorBuffer[0] = (UChar)((ch >> halfShift) + kSurrogateHighStart); - converter->UCharErrorBufferLength = 1; - - /*returns the 1st surrogate*/ - return (UChar)((ch & halfMask) + kSurrogateLowStart); - } - - - CALL_ERROR_FUNCTION: - { - /*rewinds source*/ - const char* sourceFinal = *source; - UChar* myUCharPtr = &myUChar; - - *err = U_ILLEGAL_CHAR_FOUND; - *source = sourceInitial; - - /*It's is very likely that the ErrorFunctor will write to the - *internal buffers */ - converter->fromCharErrorBehaviour(converter, - &myUCharPtr, - myUCharPtr + 1, - &sourceFinal, - sourceLimit, - NULL, - TRUE, - err); - - /*makes the internal caching transparent to the user*/ - if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; - - return myUChar; - } -} diff --git a/icu4c/source/common/ucnv_cnv.h b/icu4c/source/common/ucnv_cnv.h index 26d389b0494..9be2800ae39 100644 --- a/icu4c/source/common/ucnv_cnv.h +++ b/icu4c/source/common/ucnv_cnv.h @@ -15,286 +15,150 @@ #include "unicode/utypes.h" #include "unicode/ucnv_bld.h" +U_CDECL_BEGIN + +#define missingCharMarker 0xFFFF +#define missingUCharMarker 0xFFFD + +#define FromU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ + if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\ + else \ + { \ + char *myTargetCopy = myTarget + myTargetIndex; \ + const UChar *mySourceCopy = mySource + mySourceIndex; \ + /*copies current values for the ErrorFunctor to update */ \ + /*Calls the ErrorFunctor */ \ + _this->fromUCharErrorBehaviour (_this, \ + (char **) &myTargetCopy, \ + targetLimit, \ + (const UChar **) &mySourceCopy, \ + sourceLimit, \ + offsets, \ + flush, \ + err); \ + /*Update the local Indexes so that the conversion can restart at the right points */ \ + mySourceIndex = (mySourceCopy - mySource) ; \ + myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \ + } + +#define ToU_CALLBACK_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ + if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \ + else \ + { \ + UChar *myTargetCopy = myTarget + myTargetIndex; \ + const char *mySourceCopy = mySource + mySourceIndex; \ + /*Calls the ErrorFunctor */ \ + _this->fromCharErrorBehaviour (_this, \ + &myTargetCopy, \ + targetLimit, \ + (const char **) &mySourceCopy, \ + sourceLimit, \ + offsets, \ + flush, \ + err); \ + /*Update the local Indexes so that the conversion can restart at the right points */ \ + mySourceIndex = ((char*)mySourceCopy - (char*)mySource); \ + myTargetIndex = (myTargetCopy - myTarget); \ + } + +#define FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ + if (_this->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\ + else \ + { \ + char *myTargetCopy = myTarget + myTargetIndex; \ + const UChar *mySourceCopy = mySource + mySourceIndex; \ + int32_t My_i = myTargetIndex; \ + /*copies current values for the ErrorFunctor to update */ \ + /*Calls the ErrorFunctor */ \ + _this->fromUCharErrorBehaviour (_this, \ + (char **) &myTargetCopy, \ + targetLimit, \ + (const UChar **) &mySourceCopy, \ + sourceLimit, \ + offsets + myTargetIndex, \ + flush, \ + err); \ + /*Update the local Indexes so that the conversion can restart at the right points */ \ + mySourceIndex = mySourceCopy - mySource ; \ + myTargetIndex = (char*)myTargetCopy - (char*)myTarget ; \ + for (;My_i < myTargetIndex;My_i++) offsets[My_i] += currentOffset ; \ + } + +#define ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, myTarget, myTargetIndex, targetLimit, mySource, mySourceIndex, sourceLimit, offsets, flush, err) \ + if (_this->fromCharErrorBehaviour == (UConverterToUCallback) UCNV_TO_U_CALLBACK_STOP) break; \ + else \ + { \ + UChar *myTargetCopy = myTarget + myTargetIndex; \ + const char *mySourceCopy = mySource + mySourceIndex; \ + int32_t My_i = myTargetIndex; \ + _this->fromCharErrorBehaviour (_this, \ + &myTargetCopy, \ + targetLimit, \ + (const char **) &mySourceCopy, \ + sourceLimit, \ + offsets + myTargetIndex, \ + flush, \ + err); \ + /*Update the local Indexes so that the conversion can restart at the right points */ \ + mySourceIndex = (char *)mySourceCopy - (char*)mySource; \ + myTargetIndex = ((UChar*)myTargetCopy - (UChar*)myTarget); \ + for (;My_i < myTargetIndex;My_i++) {offsets[My_i] += currentOffset ; } \ + } + +typedef void (*T_ToUnicodeFunction) (UConverter *, + UChar **, + const UChar *, + const char **, + const char *, + int32_t* offsets, + bool_t, + UErrorCode *); + +typedef void (*T_FromUnicodeFunction) (UConverter *, + char **, + const char *, + const UChar **, + const UChar *, + int32_t* offsets, + bool_t, + UErrorCode *); + +typedef UChar (*T_GetNextUCharFunction) (UConverter *, + const char **, + const char *, + UErrorCode *); + bool_t CONVERSION_U_SUCCESS (UErrorCode err); -void T_UConverter_toUnicode_SBCS (UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); +void flushInternalUnicodeBuffer (UConverter * _this, + UChar * myTarget, + int32_t * myTargetIndex, + int32_t targetLength, + int32_t** offsets, + UErrorCode * err); -void T_UConverter_fromUnicode_SBCS (UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); +void flushInternalCharBuffer (UConverter * _this, + char *myTarget, + int32_t * myTargetIndex, + int32_t targetLength, + int32_t** offsets, + UErrorCode * err); -void T_UConverter_toUnicode_MBCS (UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); +struct UConverterImpl { + UConverterType type; -void T_UConverter_fromUnicode_MBCS (UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); -void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); + T_ToUnicodeFunction toUnicode; + T_ToUnicodeFunction toUnicodeWithOffsets; + T_FromUnicodeFunction fromUnicode; + T_FromUnicodeFunction fromUnicodeWithOffsets; + T_GetNextUCharFunction getNextUChar; +}; -void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); +extern UConverterSharedData + _SBCSData, _DBCSData, _MBCSData, _Latin1Data, + _UTF8Data, _UTF16BEData, _UTF16LEData, _EBCDICStatefulData, + _ISO2022Data; -void T_UConverter_toUnicode_DBCS (UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_fromUnicode_DBCS (UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_fromUnicode_UTF16_BE (UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_toUnicode_UTF16_BE (UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_fromUnicode_UTF16_LE (UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_toUnicode_EBCDIC_STATEFUL(UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_fromUnicode_EBCDIC_STATEFUL(UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC(UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC(UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_toUnicode_ISO_2022(UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_fromUnicode_ISO_2022(UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - - -void T_UConverter_toUnicode_UTF16_LE (UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_fromUnicode_UTF8 (UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_toUnicode_UTF8 (UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_fromUnicode_LATIN_1 (UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -void T_UConverter_toUnicode_LATIN_1 (UConverter * converter, - UChar ** target, - const UChar * targetLimit, - const char **source, - const char *sourceLimit, - int32_t* offsets, - bool_t flush, - UErrorCode * err); - -UChar T_UConverter_getNextUChar_LATIN_1 (UConverter * converter, - const char **source, - const char *sourceLimit, - UErrorCode * err); - -UChar T_UConverter_getNextUChar_SBCS (UConverter * converter, - const char **source, - const char *sourceLimit, - UErrorCode * err); - -UChar T_UConverter_getNextUChar_DBCS (UConverter * converter, - const char **source, - const char *sourceLimit, - UErrorCode * err); - -UChar T_UConverter_getNextUChar_MBCS (UConverter * converter, - const char **source, - const char *sourceLimit, - UErrorCode * err); - -UChar T_UConverter_getNextUChar_UTF8 (UConverter * converter, - const char **source, - const char *sourceLimit, - UErrorCode * err); - -UChar T_UConverter_getNextUChar_UTF16_BE (UConverter * converter, - const char **source, - const char *sourceLimit, - UErrorCode * err); - -UChar T_UConverter_getNextUChar_UTF16_LE (UConverter * converter, - const char **source, - const char *sourceLimit, - UErrorCode * err); - - -UChar T_UConverter_getNextUChar_EBCDIC_STATEFUL (UConverter * converter, - const char **source, - const char *sourceLimit, - UErrorCode * err); - -UChar T_UConverter_getNextUChar_ISO_2022 (UConverter * converter, - const char **source, - const char *sourceLimit, - UErrorCode * err); +U_CDECL_END #endif /* UCNV_CNV */ diff --git a/icu4c/source/common/ucnv_mbcs.c b/icu4c/source/common/ucnv_mbcs.c new file mode 100644 index 00000000000..1b1ba1da4b6 --- /dev/null +++ b/icu4c/source/common/ucnv_mbcs.c @@ -0,0 +1,552 @@ +/* +********************************************************************** +* Copyright (C) 2000, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_mbcs.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000feb03 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "ucmp16.h" +#include "ucmp8.h" +#include "unicode/ucnv_bld.h" +#include "unicode/ucnv.h" +#include "ucnv_cnv.h" + +/* MBCS --------------------------------------------------------------------- */ + +void T_UConverter_toUnicode_MBCS (UConverter * _this, + UChar ** target, + const UChar * targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const char *mySource = *source; + UChar *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - mySource; + CompactShortArray *myToUnicode = NULL; + UChar targetUniChar = 0x0000; + UChar mySourceChar = 0x0000; + bool_t *myStarters = NULL; + + + + + myToUnicode = _this->sharedData->table->mbcs.toUnicode; + myStarters = _this->sharedData->table->mbcs.starters; + + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + /*gets the corresponding UniChar */ + mySourceChar = (unsigned char) (mySource[mySourceIndex++]); + + + if (myStarters[(uint8_t) mySourceChar] && + (_this->toUnicodeStatus == 0x00)) + { + _this->toUnicodeStatus = (unsigned char) mySourceChar; + } + else + { + /*In case there is a state, we update the source char + *by concatenating the previous char with the current + *one + */ + + if (_this->toUnicodeStatus != 0x00) + { + mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); + + _this->toUnicodeStatus = 0x00; + } + + /*gets the corresponding Unicode codepoint */ + targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); + + /*writing the UniChar to the output stream */ + if (targetUniChar != missingUCharMarker) + { + myTarget[myTargetIndex++] = targetUniChar; + + } + else + { + *err = U_INVALID_CHAR_FOUND; + if (mySourceChar > 0xff) + { + _this->invalidCharLength = 2; + _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); + _this->invalidCharBuffer[1] = (char) mySourceChar; + } + else + { + _this->invalidCharLength = 1; + _this->invalidCharBuffer[0] = (char) mySourceChar; + } + + ToU_CALLBACK_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidCharLength = 0; + } + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + /*If at the end of conversion we are still carrying state information + *flush is TRUE, we can deduce that the input stream is truncated + */ + if (_this->toUnicodeStatus + && (mySourceIndex == sourceLength) + && (flush == TRUE)) + { + if (U_SUCCESS(*err)) + { + *err = U_TRUNCATED_CHAR_FOUND; + _this->toUnicodeStatus = 0x00; + } + } + + *target += myTargetIndex; + *source += mySourceIndex; + + return; +} + +void T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, + UChar ** target, + const UChar * targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const char *mySource = *source; + UChar *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - mySource; + CompactShortArray *myToUnicode = NULL; + UChar targetUniChar = 0x0000; + UChar mySourceChar = 0x0000; + UChar oldMySourceChar; + bool_t *myStarters = NULL; + int32_t* originalOffsets = offsets; + + + + myToUnicode = _this->sharedData->table->mbcs.toUnicode; + myStarters = _this->sharedData->table->mbcs.starters; + + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + /*gets the corresponding UniChar */ + mySourceChar = (unsigned char) (mySource[mySourceIndex++]); + + + if (myStarters[(uint8_t) mySourceChar] && + (_this->toUnicodeStatus == 0x00)) + { + _this->toUnicodeStatus = (unsigned char) mySourceChar; + } + else + { + /*In case there is a state, we update the source char + *by concatenating the previous char with the current + *one + */ + + if (_this->toUnicodeStatus != 0x00) + { + mySourceChar |= (UChar) (_this->toUnicodeStatus << 8); + + _this->toUnicodeStatus = 0x00; + } + + /*gets the corresponding Unicode codepoint */ + targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); + + + /*writing the UniChar to the output stream */ + if (targetUniChar != missingUCharMarker) + { + /*writes the UniChar to the output stream */ + { + + + if (targetUniChar > 0x00FF) + offsets[myTargetIndex] = mySourceIndex -2; /* double byte character - make the offset point to the first char */ + else + offsets[myTargetIndex] = mySourceIndex -1 ; /* single byte char. Offset is OK */ + + + } + myTarget[myTargetIndex++] = targetUniChar; + oldMySourceChar = mySourceChar; + + } + else + { + int32_t currentOffset = offsets[myTargetIndex-1] + ((oldMySourceChar>0x00FF)?2:1); + + *err = U_INVALID_CHAR_FOUND; + if (mySourceChar > 0xff) + { + _this->invalidCharLength = 2; + _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); + _this->invalidCharBuffer[1] = (char) mySourceChar; + } + else + { + _this->invalidCharLength = 1; + _this->invalidCharBuffer[0] = (char) mySourceChar; + } + + ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidCharLength = 0; + } + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + /*If at the end of conversion we are still carrying state information + *flush is TRUE, we can deduce that the input stream is truncated + */ + if (_this->toUnicodeStatus + && (mySourceIndex == sourceLength) + && (flush == TRUE)) + { + if (U_SUCCESS(*err)) + { + *err = U_TRUNCATED_CHAR_FOUND; + _this->toUnicodeStatus = 0x00; + } + } + + *target += myTargetIndex; + *source += mySourceIndex; + + return; +} + +void T_UConverter_fromUnicode_MBCS (UConverter * _this, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) + +{ + const UChar *mySource = *source; + char *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - mySource; + CompactShortArray *myFromUnicode = NULL; + UChar targetUniChar = 0x0000; + int8_t targetUniCharByteNum = 0; + UChar mySourceChar = 0x0000; + + myFromUnicode = _this->sharedData->table->mbcs.fromUnicode; + + /*writing the char to the output stream */ + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + mySourceChar = (UChar) mySource[mySourceIndex++]; + targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); + + + if (targetUniChar != missingCharMarker) + { + if (targetUniChar <= 0x00FF) + { + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); + if (myTargetIndex < targetLength) + { + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + _this->charErrorBuffer[0] = (char) targetUniChar; + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + } + else + { + *err = U_INVALID_CHAR_FOUND; + _this->invalidUCharBuffer[0] = (UChar) mySourceChar; + _this->invalidUCharLength = 1; + + FromU_CALLBACK_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidUCharLength = 0; + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + + } + + + *target += myTargetIndex; + *source += mySourceIndex;; + + + return; +} + +void T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC (UConverter * _this, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) + +{ + const UChar *mySource = *source; + char *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - mySource; + CompactShortArray *myFromUnicode = NULL; + UChar targetUniChar = 0x0000; + int8_t targetUniCharByteNum = 0; + UChar mySourceChar = 0x0000; + int32_t* originalOffsets = offsets; + + myFromUnicode = _this->sharedData->table->mbcs.fromUnicode; + + + + /*writing the char to the output stream */ + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + mySourceChar = (UChar) mySource[mySourceIndex++]; + targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); + + if (targetUniChar != missingCharMarker) + { + if (targetUniChar <= 0x00FF) + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) targetUniChar; + + } + else + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); + if (myTargetIndex < targetLength) + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + _this->charErrorBuffer[0] = (char) targetUniChar; + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + } + else + { + int32_t currentOffset = mySourceIndex -1; + int32_t* offsetsAnchor = offsets; + + *err = U_INVALID_CHAR_FOUND; + _this->invalidUCharBuffer[0] = (UChar) mySourceChar; + _this->invalidUCharLength = 1; + + FromU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidUCharLength = 0; + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + + } + + + *target += myTargetIndex; + *source += mySourceIndex;; + + + return; +} + +UChar T_UConverter_getNextUChar_MBCS(UConverter* converter, + const char** source, + const char* sourceLimit, + UErrorCode* err) +{ + UChar myUChar; + char const *sourceInitial = *source; + /*safe keeps a ptr to the beginning in case we need to step back*/ + + /*Input boundary check*/ + if ((*source)+1 > sourceLimit) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xFFFD; + } + + /*Checks to see if the byte is a lead*/ + if (converter->sharedData->table->mbcs.starters[(uint8_t)**source] == FALSE) + { + /*Not lead byte: we update the source ptr and get the codepoint*/ + myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode, + (UChar)(**source)); + (*source)++; + } + else + { + /*Lead byte: we Build the codepoint and get the corresponding character + * and update the source ptr*/ + if ((*source + 2) > sourceLimit) + { + *err = U_TRUNCATED_CHAR_FOUND; + return 0xFFFD; + } + + myUChar = ucmp16_getu(converter->sharedData->table->mbcs.toUnicode, + (uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1)))); + + (*source) += 2; + } + + if (myUChar != 0xFFFD) return myUChar; + else + { + /*rewinds source*/ + const char* sourceFinal = *source; + UChar* myUCharPtr = &myUChar; + + *err = U_INVALID_CHAR_FOUND; + *source = sourceInitial; + + /*It's is very likely that the ErrorFunctor will write to the + *internal buffers */ + converter->fromCharErrorBehaviour(converter, + &myUCharPtr, + myUCharPtr + 1, + &sourceFinal, + sourceLimit, + NULL, + TRUE, + err); + + /*makes the internal caching transparent to the user*/ + if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; + + return myUChar; + } +} + +static UConverterImpl _MBCSImpl={ + UCNV_MBCS, + + T_UConverter_toUnicode_MBCS, + T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC, + T_UConverter_fromUnicode_MBCS, + T_UConverter_fromUnicode_MBCS_OFFSETS_LOGIC, + T_UConverter_getNextUChar_MBCS +}; + +extern UConverterSharedData _MBCSData={ + sizeof(UConverterSharedData), 1, + NULL, NULL, &_MBCSImpl, "MBCS", + 0, UCNV_IBM, UCNV_MBCS, 1, 1, + { 0, 1, 0, 0, 0, 0 } +}; diff --git a/icu4c/source/common/ucnv_sbcs.c b/icu4c/source/common/ucnv_sbcs.c new file mode 100644 index 00000000000..c09bf2e7ff8 --- /dev/null +++ b/icu4c/source/common/ucnv_sbcs.c @@ -0,0 +1,486 @@ +/* +********************************************************************** +* Copyright (C) 2000, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_sbcs.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000feb03 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "ucmp16.h" +#include "ucmp8.h" +#include "unicode/ucnv_bld.h" +#include "unicode/ucnv.h" +#include "ucnv_cnv.h" + +/* SBCS --------------------------------------------------------------------- */ + +void T_UConverter_toUnicode_SBCS (UConverter * _this, + UChar ** target, + const UChar * targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + char *mySource = (char *) *source; + UChar *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - (char *) mySource; + UChar *myToUnicode = NULL; + UChar targetUniChar = 0x0000; + + myToUnicode = _this->sharedData->table->sbcs.toUnicode; + + while (mySourceIndex < sourceLength) + { + + /*writing the UniChar to the output stream */ + if (myTargetIndex < targetLength) + { + /*gets the corresponding UniChar */ + targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]]; + + if (targetUniChar != missingUCharMarker) + { + /* writes the UniChar to the output stream */ + myTarget[myTargetIndex++] = targetUniChar; + } + else + { + *err = U_INVALID_CHAR_FOUND; + _this->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1]; + _this->invalidCharLength = 1; + + ToU_CALLBACK_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidCharLength = 0; + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + *target += myTargetIndex; + *source += mySourceIndex; + + return; +} + +void T_UConverter_fromUnicode_SBCS (UConverter * _this, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const UChar *mySource = *source; + unsigned char *myTarget = (unsigned char *) *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - (char *) myTarget; + int32_t sourceLength = sourceLimit - mySource; + CompactByteArray *myFromUnicode; + unsigned char targetChar = 0x00; + + myFromUnicode = _this->sharedData->table->sbcs.fromUnicode; + + /*writing the char to the output stream */ + while (mySourceIndex < sourceLength) + { + targetChar = ucmp8_getu (myFromUnicode, mySource[mySourceIndex]); + + if (myTargetIndex < targetLength) + { + mySourceIndex++; + if (targetChar != 0 || !mySource[mySourceIndex - 1]) + { + /*writes the char to the output stream */ + myTarget[myTargetIndex++] = targetChar; + } + else + { + + *err = U_INVALID_CHAR_FOUND; + _this->invalidUCharBuffer[0] = (UChar)mySource[mySourceIndex - 1]; + _this->invalidUCharLength = 1; + +/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */ + FromU_CALLBACK_MACRO(_this, + (char *)myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + if (U_FAILURE (*err)) + { + break; + } + _this->invalidUCharLength = 0; + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + + } + + *target += myTargetIndex; + *source += mySourceIndex; + + + return; +} + +UChar T_UConverter_getNextUChar_SBCS(UConverter* converter, + const char** source, + const char* sourceLimit, + UErrorCode* err) +{ + UChar myUChar; + + + if ((*source)+1 > sourceLimit) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xFFFD; + } + + + /*Gets the corresponding codepoint*/ + myUChar = converter->sharedData->table->sbcs.toUnicode[(unsigned char)*((*source)++)]; + + if (myUChar != 0xFFFD) return myUChar; + else + { + UChar* myUCharPtr = &myUChar; + const char* sourceFinal = *source; + + *err = U_INVALID_CHAR_FOUND; + + /*Calls the ErrorFunctor after rewinding the input buffer*/ + (*source)--; + /*It's is very likely that the ErrorFunctor will write to the + *internal buffers */ + converter->fromCharErrorBehaviour(converter, + &myUCharPtr, + myUCharPtr + 1, + &sourceFinal, + sourceLimit, + NULL, + TRUE, + err); + + /*makes the internal caching transparent to the user*/ + if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; + + return myUChar; + } +} + +static UConverterImpl _SBCSImpl={ + UCNV_SBCS, + + T_UConverter_toUnicode_SBCS, + NULL, + T_UConverter_fromUnicode_SBCS, + NULL, + T_UConverter_getNextUChar_SBCS +}; + +extern UConverterSharedData _SBCSData={ + sizeof(UConverterSharedData), 1, + NULL, NULL, &_SBCSImpl, "SBCS", + 0, UCNV_IBM, UCNV_SBCS, 1, 1, + { 0, 1, 0, 0, 0, 0 } +}; + +/* DBCS --------------------------------------------------------------------- */ + +void T_UConverter_toUnicode_DBCS (UConverter * _this, + UChar ** target, + const UChar * targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const char *mySource = ( char *) *source; + UChar *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - (char *) mySource; + CompactShortArray *myToUnicode = NULL; + UChar targetUniChar = 0x0000; + UChar mySourceChar = 0x0000; + + myToUnicode = _this->sharedData->table->dbcs.toUnicode; + + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + /*gets the corresponding UniChar */ + mySourceChar = (unsigned char) mySource[mySourceIndex++]; + + /*We have no internal state, we should */ + if (_this->toUnicodeStatus == 0x00) + { + _this->toUnicodeStatus = (unsigned char) mySourceChar; + } + else + { + if (_this->toUnicodeStatus != 0x00) + { + mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | (mySourceChar & 0x00FF)); + _this->toUnicodeStatus = 0x00; + } + + targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar); + + /*writing the UniChar to the output stream */ + if (targetUniChar != missingUCharMarker) + { + /*writes the UniChar to the output stream */ + myTarget[myTargetIndex++] = targetUniChar; + } + else + { + *err = U_INVALID_CHAR_FOUND; + _this->invalidCharBuffer[0] = (char) (mySourceChar >> 8); + _this->invalidCharBuffer[1] = (char) mySourceChar; + _this->invalidCharLength = 2; + + ToU_CALLBACK_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidCharLength = 0; + } + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + /*If at the end of conversion we are still carrying state information + *flush is TRUE, we can deduce that the input stream is truncated + */ + if ((flush == TRUE) + && (mySourceIndex == sourceLength) + && (_this->toUnicodeStatus != 0x00)) + { + + if (U_SUCCESS(*err)) + { + *err = U_TRUNCATED_CHAR_FOUND; + _this->toUnicodeStatus = 0x00; + } + } + + *target += myTargetIndex; + *source += mySourceIndex; + + return; +} + +void T_UConverter_fromUnicode_DBCS (UConverter * _this, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const UChar *mySource = *source; + unsigned char *myTarget = (unsigned char *) *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - (char *) myTarget; + int32_t sourceLength = sourceLimit - mySource; + CompactShortArray *myFromUnicode = NULL; + UChar targetUniChar = 0x0000; + UChar mySourceChar = 0x0000; + + myFromUnicode = _this->sharedData->table->dbcs.fromUnicode; + + /*writing the char to the output stream */ + while (mySourceIndex < sourceLength) + { + + if (myTargetIndex < targetLength) + { + mySourceChar = (UChar) mySource[mySourceIndex++]; + + /*Gets the corresponding codepoint */ + targetUniChar = (UChar) ucmp16_getu (myFromUnicode, mySourceChar); + if (targetUniChar != missingCharMarker) + { + /*writes the char to the output stream */ + myTarget[myTargetIndex++] = (char) (targetUniChar >> 8); + if (myTargetIndex < targetLength) + { + myTarget[myTargetIndex++] = (char) targetUniChar; + } + else + { + _this->charErrorBuffer[0] = (char) targetUniChar; + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + else + { + *err = U_INVALID_CHAR_FOUND; + _this->invalidUCharBuffer[0] = (UChar) mySourceChar; + _this->invalidUCharLength = 1; + + +/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */ + FromU_CALLBACK_MACRO(_this, + (char *)myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidUCharLength = 0; + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + *target += myTargetIndex; + *source += mySourceIndex;; + + + return; +} + +UChar T_UConverter_getNextUChar_DBCS(UConverter* converter, + const char** source, + const char* sourceLimit, + UErrorCode* err) +{ + UChar myUChar; + + /*Checks boundaries and set appropriate error codes*/ + if ((*source)+2 > sourceLimit) + { + if ((*source) >= sourceLimit) + { + /*Either caller has reached the end of the byte stream*/ + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + else if (((*source)+1) == sourceLimit) + { + /* a character was cut in half*/ + *err = U_TRUNCATED_CHAR_FOUND; + } + + return 0xFFFD; + } + + /*Gets the corresponding codepoint*/ + myUChar = ucmp16_getu(converter->sharedData->table->dbcs.toUnicode, + (uint16_t)(((UChar)((**source)) << 8) |((uint8_t)*((*source)+1)))); + + /*update the input pointer*/ + *source += 2; + if (myUChar != 0xFFFD) return myUChar; + else + { + UChar* myUCharPtr = &myUChar; + const char* sourceFinal = *source; + + /*Calls the ErrorFunctor after rewinding the input buffer*/ + (*source) -= 2; + + *err = U_INVALID_CHAR_FOUND; + + /*It's is very likely that the ErrorFunctor will write to the + *internal buffers */ + converter->fromCharErrorBehaviour(converter, + &myUCharPtr, + myUCharPtr + 1, + &sourceFinal, + sourceLimit, + NULL, + TRUE, + err); + /*makes the internal caching transparent to the user*/ + if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; + + return myUChar; + } +} + +static UConverterImpl _DBCSImpl={ + UCNV_DBCS, + + T_UConverter_toUnicode_DBCS, + NULL, + T_UConverter_fromUnicode_DBCS, + NULL, + T_UConverter_getNextUChar_DBCS +}; + +extern UConverterSharedData _DBCSData={ + sizeof(UConverterSharedData), 1, + NULL, NULL, &_DBCSImpl, "DBCS", + 0, UCNV_IBM, UCNV_DBCS, 2, 2, + { 0, 1, 0, 0, 0, 0 } +}; diff --git a/icu4c/source/common/ucnv_utf.c b/icu4c/source/common/ucnv_utf.c new file mode 100644 index 00000000000..d16e47b4ec8 --- /dev/null +++ b/icu4c/source/common/ucnv_utf.c @@ -0,0 +1,1184 @@ +/* +********************************************************************** +* Copyright (C) 2000, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_utf.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000feb03 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "ucmp16.h" +#include "ucmp8.h" +#include "unicode/ucnv_bld.h" +#include "unicode/ucnv.h" +#include "ucnv_cnv.h" + +/* ISO 8859-1 --------------------------------------------------------------- */ + +void T_UConverter_toUnicode_LATIN_1 (UConverter * _this, + UChar ** target, + const UChar * targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + unsigned char *mySource = (unsigned char *) *source; + UChar *myTarget = *target; + int32_t sourceLength = sourceLimit - (char *) mySource; + int32_t readLen = 0; + int32_t i = 0; + + /*Since there is no risk of encountering illegal Chars + *we need to pad our latin1 chars to create Unicode codepoints + *we need to go as far a min(targetLen, sourceLen) + *in case we don't have enough buffer space + *we set the error flag accordingly + */ + if ((targetLimit - *target) < sourceLength) + { + readLen = targetLimit - *target; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + else + { + readLen = sourceLimit - (char *) mySource; + } + + for (i = 0; i < readLen; i++) myTarget[i] = (UChar) mySource[i]; + + *target += i; + *source += i; + return; +} + +void T_UConverter_fromUnicode_LATIN_1 (UConverter * _this, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const UChar *mySource = *source; + unsigned char *myTarget = (unsigned char *) *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - (char *) myTarget; + int32_t sourceLength = sourceLimit - mySource; + + /*writing the char to the output stream */ + while (mySourceIndex < sourceLength) + { + + if (myTargetIndex < targetLength) + { + if (mySource[mySourceIndex] < 0x0100) + { + /*writes the char to the output stream */ + myTarget[myTargetIndex++] = (char) mySource[mySourceIndex++]; + } + else + { + *err = U_INVALID_CHAR_FOUND; + _this->invalidUCharBuffer[0] = (UChar) mySource[mySourceIndex++]; + _this->invalidUCharLength = 1; + +/* Needed explicit cast for myTarget on MVS to make compiler happy - JJD */ + FromU_CALLBACK_MACRO(_this, + (char *)myTarget, + myTargetIndex, + targetLimit, + mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + if (U_FAILURE (*err)) break; + _this->invalidUCharLength = 0; + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + *target += myTargetIndex; + *source += mySourceIndex;; + + return; +} + +UChar T_UConverter_getNextUChar_LATIN_1(UConverter* converter, + const char** source, + const char* sourceLimit, + UErrorCode* err) +{ + + /* Empties the internal buffers if need be + * In this case since ErrorFunctors are never called + * (LATIN_1 is a subset of Unicode) + */ + + if ((*source)+1 > sourceLimit) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xFFFD; + } + + return (UChar)*((*source)++); +} + +static UConverterImpl _Latin1Impl={ + UCNV_LATIN_1, + + T_UConverter_toUnicode_LATIN_1, + NULL, + T_UConverter_fromUnicode_LATIN_1, + NULL, + T_UConverter_getNextUChar_LATIN_1 +}; + +extern UConverterSharedData _Latin1Data={ + sizeof(UConverterSharedData), ~0, + NULL, NULL, &_Latin1Impl, "LATIN_1", + 819, UCNV_IBM, UCNV_LATIN_1, 1, 1, + { 0, 1, 0x1a, 0, 0, 0 } +}; + +/* UTF-8 -------------------------------------------------------------------- */ + +/* UTF-8 Conversion DATA + * for more information see Unicode Strandard 2.0 , Transformation Formats Appendix A-9 + */ +const uint32_t kReplacementCharacter = 0x0000FFFD; +const uint32_t kMaximumUCS2 = 0x0000FFFF; +const uint32_t kMaximumUTF16 = 0x0010FFFF; +const uint32_t kMaximumUCS4 = 0x7FFFFFFF; +const int8_t halfShift = 10; +const uint32_t halfBase = 0x0010000; +const uint32_t halfMask = 0x3FF; +const uint32_t kSurrogateHighStart = 0xD800; +const uint32_t kSurrogateHighEnd = 0xDBFF; +const uint32_t kSurrogateLowStart = 0xDC00; +const uint32_t kSurrogateLowEnd = 0xDFFF; + +const uint32_t offsetsFromUTF8[7] = {0, + (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080, + (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080 +}; + +/* END OF UTF-8 Conversion DATA */ + +const int8_t bytesFromUTF8[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 +}; + +const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC}; + +void T_UConverter_toUnicode_UTF8 (UConverter * _this, + UChar ** target, + const UChar * targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) *source; + UChar *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - (char *) mySource; + uint32_t ch = 0 , + ch2 =0 , + i =0; /* Index into the current # of bytes consumed in the current sequence */ + uint32_t inBytes = 0; /* Total number of bytes in the current UTF8 sequence */ + + if (_this->toUnicodeStatus) + { + i = _this->invalidCharLength; /* restore # of bytes consumed */ + inBytes = _this->toUnicodeStatus; /* Restore size of current sequence */ + + ch = _this->mode; /*Stores the previously calculated ch from a previous call*/ + _this->toUnicodeStatus = 0; + _this->invalidCharLength = 0; + goto morebytes; + } + + + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + ch = 0; + ch = ((uint32_t)mySource[mySourceIndex++]) & 0x000000FF; + if (ch < 0x80) /* Simple case */ + { + myTarget[myTargetIndex++] = (UChar) ch; + } + else + { + /* store the first char */ + + inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */ + _this->invalidCharBuffer[0] = (char)ch; + i = 1; + + morebytes: + for (; i < inBytes; i++) + { + { + if (mySourceIndex >= sourceLength) + { + if (flush) + { + if (U_SUCCESS(*err)) + { + *err = U_TRUNCATED_CHAR_FOUND; + _this->toUnicodeStatus = 0x00; + } + } + else + { + _this->toUnicodeStatus = inBytes; + _this->invalidCharLength = (int8_t)i; + } + goto donefornow; + } + _this->invalidCharBuffer[i] = (char) (ch2 = (((uint32_t)mySource[mySourceIndex++]) & 0x000000FF)); + if ((ch2 & 0xC0) != 0x80) /* Invalid trailing byte */ + break; + } + ch <<= 6; + ch += ch2; + } + + + ch -= offsetsFromUTF8[inBytes]; + + if (i == inBytes && ch <= kMaximumUTF16) + { + if (ch <= kMaximumUCS2) + { + myTarget[myTargetIndex++] = (UChar) ch; + } + else + { + ch -= halfBase; + myTarget[myTargetIndex++] = (UChar) ((ch >> halfShift) + kSurrogateHighStart); + ch = (ch & halfMask) + kSurrogateLowStart; + if (myTargetIndex < targetLength) + { + myTarget[myTargetIndex++] = (char)ch; + } + else + { + _this->invalidUCharBuffer[0] = (UChar) ch; + _this->invalidUCharLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + } + else + { + *err = U_ILLEGAL_CHAR_FOUND; + _this->invalidCharLength = (int8_t)i; + +#ifdef Debug + printf("inbytes %d\n, _this->invalidCharLength = %d,\n mySource[mySourceIndex]=%X\n", inBytes, _this->invalidCharLength, mySource[mySourceIndex]); +#endif +/* Needed explicit cast for mySource on MVS to make compiler happy - JJD */ + ToU_CALLBACK_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + (const char *)mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + if (U_FAILURE (*err)) break; + _this->invalidCharLength = 0; + } + } + } + else + /* End of target buffer */ + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + +donefornow: + *target += myTargetIndex; + *source += mySourceIndex; + _this->mode = ch; /*stores a partially calculated target*/ +} + +void T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this, + UChar ** target, + const UChar * targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) *source; + UChar *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - (char *) mySource; + uint32_t ch = 0, ch2 = 0, i = 0; + uint32_t inBytes = 0; + int32_t* originalOffsets = offsets; + + + + if (_this->toUnicodeStatus) + { + i = _this->invalidCharLength; + inBytes = _this->toUnicodeStatus; + _this->toUnicodeStatus = 0; + ch = _this->mode; + goto morebytes; + } + + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + ch = mySource[mySourceIndex++]; + if (ch < 0x80) /* Simple case */ + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (UChar) ch; + } + else + { + inBytes = bytesFromUTF8[ch]; + _this->invalidCharBuffer[0] = (char)ch; + i = 1; + + morebytes: + for (; i < inBytes; i++) + { + { + if (mySourceIndex >= sourceLength) + { + if (flush) + { + if (U_SUCCESS(*err)) + { + *err = U_TRUNCATED_CHAR_FOUND; + _this->toUnicodeStatus = 0x00; + } + } + else + { + _this->toUnicodeStatus = inBytes; + _this->invalidCharLength = (int8_t)i; + } + goto donefornow; + } + _this->invalidCharBuffer[i] = (char) (ch2 = mySource[mySourceIndex++]); + if ((ch2 & 0xC0) != 0x80) /* Invalid trailing byte */ + break; + } + ch <<= 6; + ch += ch2; + } + + ch -= offsetsFromUTF8[inBytes]; + if (i == inBytes && ch <= kMaximumUTF16) + { + if (ch <= kMaximumUCS2) { + + offsets[myTargetIndex] = mySourceIndex-3; + myTarget[myTargetIndex++] = (UChar) ch; + + } + else + { + ch -= halfBase; + offsets[myTargetIndex] = mySourceIndex-4; + myTarget[myTargetIndex++] = (UChar) ((ch >> halfShift) + kSurrogateHighStart); + ch = (ch & halfMask) + kSurrogateLowStart; + if (myTargetIndex < targetLength) + { + offsets[myTargetIndex] = mySourceIndex-4; + myTarget[myTargetIndex++] = (char)ch; + } + else + { + _this->invalidUCharBuffer[0] = (UChar) ch; + _this->invalidUCharLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + } + else + { + int32_t currentOffset = offsets[myTargetIndex-1]; + + *err = U_ILLEGAL_CHAR_FOUND; + _this->invalidCharLength = (int8_t)i; + +/* Needed explicit cast for mySource on MVS to make compiler happy - JJD */ + ToU_CALLBACK_OFFSETS_LOGIC_MACRO(_this, + myTarget, + myTargetIndex, + targetLimit, + (const char *)mySource, + mySourceIndex, + sourceLimit, + offsets, + flush, + err); + + + if (U_FAILURE (*err)) break; + _this->invalidCharLength = 0; + } + } + } + else + /* End of target buffer */ + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + +donefornow: + *target += myTargetIndex; + *source += mySourceIndex; + _this->mode = ch; + +} + +void T_UConverter_fromUnicode_UTF8 (UConverter * _this, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const UChar *mySource = *source; + unsigned char *myTarget = (unsigned char *) *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - (char *) myTarget; + int32_t sourceLength = sourceLimit - mySource; + int8_t targetCharByteNum = 0; + UChar mySourceChar = 0x0000; + uint32_t ch; + int16_t i, bytesToWrite = 0; + uint32_t ch2; + char temp[4]; + + if (_this->fromUnicodeStatus) + { + ch = _this->fromUnicodeStatus; + _this->fromUnicodeStatus = 0; + goto lowsurogate; + } + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + bytesToWrite = 0; + ch = mySource[mySourceIndex++]; + + if (ch < 0x80) /* Single byte */ + { + myTarget[myTargetIndex++] = (char) ch; + } + else if (ch < 0x800) /* Double byte */ + { + myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0); + if (myTargetIndex < targetLength) + { + myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80); + } + else + { + _this->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80); + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + else + /* Check for surogates */ + { + if ((ch >= kSurrogateHighStart) && (ch <= kSurrogateHighEnd)) + { + lowsurogate: + if (mySourceIndex < sourceLength && !flush) + { + ch2 = mySource[mySourceIndex]; + if ((ch2 >= kSurrogateLowStart) && (ch2 <= kSurrogateLowEnd)) + { + ch = ((ch - kSurrogateHighStart) << halfShift) + (ch2 - kSurrogateLowStart) + halfBase; + ++mySourceIndex; + } + } + } + if (ch < 0x10000) + { + bytesToWrite = 3; + temp[0] = (char) ((ch >> 12) | 0xe0); + temp[1] = (char) ((ch >> 6) & 0x3f | 0x80); + temp[2] = (char) (ch & 0x3f | 0x80); + } + else + { + bytesToWrite = 4; + temp[0] = (char) ((ch >> 18) | 0xf0); + temp[1] = (char) ((ch >> 12) & 0x3f | 0xe0); + temp[2] = (char) ((ch >> 6) & 0x3f | 0x80); + temp[3] = (char) (ch & 0x3f | 0x80); + } + for (i = 0; i < bytesToWrite; i++) + { + if (myTargetIndex < targetLength) + { + myTarget[myTargetIndex++] = temp[i]; + } + else + { + _this->charErrorBuffer[_this->charErrorBufferLength++] = temp[i]; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + + } + + *target += myTargetIndex; + *source += mySourceIndex; + + return; +} + +void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter * _this, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const UChar *mySource = *source; + unsigned char *myTarget = (unsigned char *) *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - (char *) myTarget; + int32_t sourceLength = sourceLimit - mySource; + int8_t targetCharByteNum = 0; + UChar mySourceChar = 0x0000; + uint32_t ch; + int16_t i, bytesToWrite = 0; + uint32_t ch2; + char temp[4]; + + if (_this->fromUnicodeStatus) + { + ch = _this->fromUnicodeStatus; + _this->fromUnicodeStatus = 0; + goto lowsurogate; + } + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + bytesToWrite = 0; + ch = mySource[mySourceIndex++]; + + if (ch < 0x80) /* Single byte */ + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) ch; + } + else if (ch < 0x800) /* Double byte */ + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) ((ch >> 6) | 0xc0); + if (myTargetIndex < targetLength) + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = (char) ((ch & 0x3f) | 0x80); + } + else + { + _this->charErrorBuffer[0] = (char) ((ch & 0x3f) | 0x80); + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + else + /* Check for surogates */ + { + if ((ch >= kSurrogateHighStart) && (ch <= kSurrogateHighEnd)) + { + lowsurogate: + if (mySourceIndex < sourceLength && !flush) + { + ch2 = mySource[mySourceIndex]; + if ((ch2 >= kSurrogateLowStart) && (ch2 <= kSurrogateLowEnd)) + { + ch = ((ch - kSurrogateHighStart) << halfShift) + (ch2 - kSurrogateLowStart) + halfBase; + ++mySourceIndex; + } + } + } + if (ch < 0x10000) + { + bytesToWrite = 3; + temp[0] = (char) ((ch >> 12) | 0xe0); + temp[1] = (char) ((ch >> 6) & 0x3f | 0x80); + temp[2] = (char) (ch & 0x3f | 0x80); + } + else + { + bytesToWrite = 4; + temp[0] = (char) ((ch >> 18) | 0xf0); + temp[1] = (char) ((ch >> 12) & 0x3f | 0xe0); + temp[2] = (char) ((ch >> 6) & 0x3f | 0x80); + temp[3] = (char) (ch & 0x3f | 0x80); + } + for (i = 0; i < bytesToWrite; i++) + { + if (myTargetIndex < targetLength) + { + offsets[myTargetIndex] = mySourceIndex-1; + myTarget[myTargetIndex++] = temp[i]; + } + else + { + _this->charErrorBuffer[_this->charErrorBufferLength++] = temp[i]; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + + } + + *target += myTargetIndex; + *source += mySourceIndex; + + return; +} + +UChar T_UConverter_getNextUChar_UTF8(UConverter* converter, + const char** source, + const char* sourceLimit, + UErrorCode* err) +{ + UChar myUChar; + /*safe keeps a ptr to the beginning in case we need to step back*/ + char const *sourceInitial = *source; + uint16_t extraBytesToWrite; + uint8_t myByte; + uint32_t ch; + int8_t isLegalSequence = 1; + + /*Input boundary check*/ + if ((*source) >= sourceLimit) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xFFFD; + } + + myByte = (uint8_t)*((*source)++); + if(myByte < 0x80) { + return (UChar)myByte; + } + extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte]; + if (extraBytesToWrite == 0 || extraBytesToWrite > 4) { + goto CALL_ERROR_FUNCTION; + } + + + /*The byte sequence is longer than the buffer area passed*/ + + if ((*source + extraBytesToWrite - 1) > sourceLimit) + { + *err = U_TRUNCATED_CHAR_FOUND; + return 0xFFFD; + } + else + { + ch = myByte << 6; + switch(extraBytesToWrite) + { + /* note: code falls through cases! (sic)*/ + case 6: ch += (myByte = (uint8_t)*((*source)++)); ch <<= 6; + if ((myByte & 0xC0) != 0x80) + { + isLegalSequence = 0; + break; + } + case 5: ch += (myByte = *((*source)++)); ch <<= 6; + if ((myByte & 0xC0) != 0x80) + { + isLegalSequence = 0; + break; + } + case 4: ch += (myByte = *((*source)++)); ch <<= 6; + if ((myByte & 0xC0) != 0x80) + { + isLegalSequence = 0; + break; + } + case 3: ch += (myByte = *((*source)++)); ch <<= 6; + if ((myByte & 0xC0) != 0x80) + { + isLegalSequence = 0; + break; + } + case 2: ch += (myByte = *((*source)++)); + if ((myByte & 0xC0) != 0x80) + { + isLegalSequence = 0; + } + }; + } + ch -= offsetsFromUTF8[extraBytesToWrite]; + + + if (isLegalSequence == 0) goto CALL_ERROR_FUNCTION; + + /*we got a UCS-2 Character*/ + if (ch <= kMaximumUCS2) return (UChar)ch; + /*character out of bounds*/ + else if (ch >= kMaximumUTF16) goto CALL_ERROR_FUNCTION; + /*Surrogates found*/ + else + { + ch -= halfBase; + /*stores the 2nd surrogate inside the converter for the next call*/ + converter->UCharErrorBuffer[0] = (UChar)((ch >> halfShift) + kSurrogateHighStart); + converter->UCharErrorBufferLength = 1; + + /*returns the 1st surrogate*/ + return (UChar)((ch & halfMask) + kSurrogateLowStart); + } + + + CALL_ERROR_FUNCTION: + { + /*rewinds source*/ + const char* sourceFinal = *source; + UChar* myUCharPtr = &myUChar; + + *err = U_ILLEGAL_CHAR_FOUND; + *source = sourceInitial; + + /*It's is very likely that the ErrorFunctor will write to the + *internal buffers */ + converter->fromCharErrorBehaviour(converter, + &myUCharPtr, + myUCharPtr + 1, + &sourceFinal, + sourceLimit, + NULL, + TRUE, + err); + + /*makes the internal caching transparent to the user*/ + if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR; + + return myUChar; + } +} + +static UConverterImpl _UTF8Impl={ + UCNV_UTF8, + + T_UConverter_toUnicode_UTF8, + T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC, + T_UConverter_fromUnicode_UTF8, + T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC, + T_UConverter_getNextUChar_UTF8 +}; + +extern UConverterSharedData _UTF8Data={ + sizeof(UConverterSharedData), ~0, + NULL, NULL, &_UTF8Impl, "UTF8", + 1208, UCNV_IBM, UCNV_UTF8, 1, 4, + { 0, 3, 0xef, 0xbf, 0xbd, 0 } +}; + +/* UTF-16BE ----------------------------------------------------------------- */ + +void T_UConverter_toUnicode_UTF16_BE (UConverter * _this, + UChar ** target, + const UChar * targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) *source; + UChar *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - (char *) mySource; + UChar mySourceChar = 0x0000; + UChar oldmySourceChar = 0x0000; + + + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + /*gets the corresponding UChar */ + mySourceChar = (unsigned char) mySource[mySourceIndex++]; + oldmySourceChar = mySourceChar; + if (_this->toUnicodeStatus == 0) + { + _this->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar; + } + else + { + if (_this->toUnicodeStatus != 0xFFFF) + mySourceChar = (UChar) ((_this->toUnicodeStatus << 8) | mySourceChar); + _this->toUnicodeStatus = 0; + + + + myTarget[myTargetIndex++] = mySourceChar; + + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + if (U_SUCCESS(*err) && flush + && (mySourceIndex == sourceLength) + && (_this->toUnicodeStatus != 0x00)) + { + if (U_SUCCESS(*err)) + { + *err = U_TRUNCATED_CHAR_FOUND; + _this->toUnicodeStatus = 0x00; + } + } + + *target += myTargetIndex; + *source += mySourceIndex; + + return; +} + +void T_UConverter_fromUnicode_UTF16_BE (UConverter * _this, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const UChar *mySource = *source; + unsigned char *myTarget = (unsigned char *) *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - (char *) myTarget; + int32_t sourceLength = sourceLimit - mySource; + UChar mySourceChar; + + /*writing the char to the output stream */ + while (mySourceIndex < sourceLength) + { + + if (myTargetIndex < targetLength) + { + mySourceChar = (UChar) mySource[mySourceIndex++]; + myTarget[myTargetIndex++] = (char) (mySourceChar >> 8); + if (myTargetIndex < targetLength) + { + myTarget[myTargetIndex++] = (char) mySourceChar; + } + else + { + _this->charErrorBuffer[0] = (char) mySourceChar; + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + *target += myTargetIndex; + *source += mySourceIndex;; + + return; +} + +UChar T_UConverter_getNextUChar_UTF16_BE(UConverter* converter, + const char** source, + const char* sourceLimit, + UErrorCode* err) +{ + UChar myUChar; + /*Checks boundaries and set appropriate error codes*/ + if ((*source)+2 > sourceLimit) + { + if ((*source) >= sourceLimit) + { + /*Either caller has reached the end of the byte stream*/ + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + else if (((*source)+1) == sourceLimit) + { + /* a character was cut in half*/ + *err = U_TRUNCATED_CHAR_FOUND; + } + + return 0xFFFD; + } + + + /*Gets the corresponding codepoint*/ + + myUChar = ((uint16_t)((**source)) << 8) |((uint8_t)*((*source)+1)); + *source += 2; + return myUChar; +} + +static UConverterImpl _UTF16BEImpl={ + UCNV_UTF16_BigEndian, + + T_UConverter_toUnicode_UTF16_BE, + NULL, + T_UConverter_fromUnicode_UTF16_BE, + NULL, + T_UConverter_getNextUChar_UTF16_BE +}; + +extern UConverterSharedData _UTF16BEData={ + sizeof(UConverterSharedData), ~0, + NULL, NULL, &_UTF16BEImpl, "UTF16_BigEndian", + 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2, + { 0, 2, 0xff, 0xfd, 0, 0 } +}; + +/* UTF-16LE ----------------------------------------------------------------- */ + +void T_UConverter_toUnicode_UTF16_LE (UConverter * _this, + UChar ** target, + const UChar * targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) *source; + UChar *myTarget = *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - myTarget; + int32_t sourceLength = sourceLimit - (char *) mySource; + CompactShortArray *myToUnicode = NULL; + UChar targetUniChar = 0x0000; + UChar mySourceChar = 0x0000; + + while (mySourceIndex < sourceLength) + { + if (myTargetIndex < targetLength) + { + /*gets the corresponding UniChar */ + mySourceChar = (unsigned char) mySource[mySourceIndex++]; + + if (_this->toUnicodeStatus == 0x00) + { + _this->toUnicodeStatus = (unsigned char) mySourceChar == 0x00 ? 0xFFFF : mySourceChar; + } + else + { + if (_this->toUnicodeStatus == 0xFFFF) + mySourceChar = (UChar) (mySourceChar << 8); + else + { + mySourceChar <<= 8; + mySourceChar |= (UChar) (_this->toUnicodeStatus); + } + _this->toUnicodeStatus = 0x00; + myTarget[myTargetIndex++] = mySourceChar; + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + + if (U_SUCCESS(*err) && flush + && (mySourceIndex == sourceLength) + && (_this->toUnicodeStatus != 0x00)) + { + if (U_SUCCESS(*err)) + { + *err = U_TRUNCATED_CHAR_FOUND; + _this->toUnicodeStatus = 0x00; + } + } + + *target += myTargetIndex; + *source += mySourceIndex; + + + return; +} + +void T_UConverter_fromUnicode_UTF16_LE (UConverter * _this, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t *offsets, + bool_t flush, + UErrorCode * err) +{ + const UChar *mySource = *source; + unsigned char *myTarget = (unsigned char *) *target; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = targetLimit - (char *) myTarget; + int32_t sourceLength = sourceLimit - mySource; + UChar mySourceChar; + + + /*writing the char to the output stream */ + while (mySourceIndex < sourceLength) + { + + if (myTargetIndex < targetLength) + { + mySourceChar = (UChar) mySource[mySourceIndex++]; + myTarget[myTargetIndex++] = (char) mySourceChar; + if (myTargetIndex < targetLength) + { + myTarget[myTargetIndex++] = (char) (mySourceChar >> 8); + } + else + { + _this->charErrorBuffer[0] = (char) (mySourceChar >> 8); + _this->charErrorBufferLength = 1; + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + } + else + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + } + + *target += myTargetIndex; + *source += mySourceIndex;; + + return; +} + +UChar T_UConverter_getNextUChar_UTF16_LE(UConverter* converter, + const char** source, + const char* sourceLimit, + UErrorCode* err) +{ + UChar myUChar; + /*Checks boundaries and set appropriate error codes*/ + if ((*source)+2 > sourceLimit) + { + if ((*source) >= sourceLimit) + { + /*Either caller has reached the end of the byte stream*/ + *err = U_INDEX_OUTOFBOUNDS_ERROR; + } + else if (((*source)+1) == sourceLimit) + { + /* a character was cut in half*/ + *err = U_TRUNCATED_CHAR_FOUND; + } + + return 0xFFFD; + } + + + /*Gets the corresponding codepoint*/ + myUChar = ((uint16_t)*((*source)+1) << 8) |((uint8_t)((**source))); + /*updates the source*/ + *source += 2; + return myUChar; +} + +static UConverterImpl _UTF16LEImpl={ + UCNV_UTF16_LittleEndian, + + T_UConverter_toUnicode_UTF16_LE, + NULL, + T_UConverter_fromUnicode_UTF16_LE, + NULL, + T_UConverter_getNextUChar_UTF16_LE +}; + +extern UConverterSharedData _UTF16LEData={ + sizeof(UConverterSharedData), ~0, + NULL, NULL, &_UTF16LEImpl, "UTF16_LittleEndian", + 1200, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2, + { 0, 2, 0xfd, 0xff, 0, 0 } +}; diff --git a/icu4c/source/common/unicode/ucnv_bld.h b/icu4c/source/common/unicode/ucnv_bld.h index 36ab329a9c3..eb206659678 100644 --- a/icu4c/source/common/unicode/ucnv_bld.h +++ b/icu4c/source/common/unicode/ucnv_bld.h @@ -46,8 +46,8 @@ typedef struct _CompactByteArray CompactByteArray; /*Pointer to the aforementioned file */ #define UCNV_MAX_LINE_TEXT (UCNV_MAX_CONVERTER_NAME_LENGTH*400) -#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */ -#define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */ +#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */ +#define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */ typedef enum { UCNV_UNSUPPORTED_CONVERTER = -1, @@ -61,12 +61,10 @@ typedef enum { UCNV_EBCDIC_STATEFUL = 7, UCNV_ISO_2022 = 8, /* Number of converter types for which we have conversion routines. */ - UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9, - UCNV_JIS = 9, - UCNV_EUC = 10, - UCNV_GB = 11 + UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9 } UConverterType; +/* ### move the following typedef and array into implementation files! */ typedef struct { int32_t ccsid; @@ -121,95 +119,125 @@ typedef union UConverterTable; -/*Defines the struct of a UConverterSharedData the immutable, shared part of - *UConverter - */ -typedef struct - { - uint32_t structSize; /* Size of this structure */ - void *dataMemory; - uint32_t referenceCounter; /*used to count number of clients */ - char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */ - UConverterPlatform platform; /*platform of the converter (only IBM now) */ - int32_t codepage; /*codepage # (now IBM-$codepage) */ - UConverterType conversionType; /*conversion type */ - int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */ - int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */ - struct - { /*initial values of some members of the mutable part of object */ - uint32_t toUnicodeStatus; - int8_t subCharLen; - unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; - } - defaultConverterValues; - UConverterTable *table; /*Pointer to conversion data */ - } -UConverterSharedData; - - -/*Defines a UConverter, the lightweight mutable part the user sees */ - U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv itself is compiled under C++, the linkage of the funcptrs will work. - */ + */ -struct UConverter - { - int32_t toUnicodeStatus; /*Used to internalize stream status information */ - int32_t fromUnicodeStatus; +struct UConverterImpl; +typedef struct UConverterImpl UConverterImpl; + +/* ### + * Markus Scherer on 2000feb04: + * I have change UConverter and UConverterSharedData; there may be more changes, + * or we may decide to roll back the structure definitions to what they were + * before, with the additional UConverterImpl field and the new semantics for + * referenceCounter. + * + * Reasons for changes: Attempt at performance improvements, especially + * a) decrease amount of internal, implicit padding by reordering the fields + * b) save space by storing the internal name of the converter only with a + * pointer instead of an array + * + * In addition to that, I added the UConverterImpl field for better + * modularizing the code and making it more maintainable. It may actually + * become slightly faster by doing this. + * + * I changed the UConverter.to|fromUnicodeStatus to be unsigned because + * the defaultValues.toUnicodeStatus is unsigned, and it seemed to be a safer choice. + * + * Ultimately, I would prefer not to expose these definitions any more at all, + * but this is suspect to discussions, proposals and design reviews. + * + * I would personally like to see more information hiding (with helper APIs), + * useful state fields in UConverter that are reserved for the callbacks, + * and directly included structures instead of pointers to allocated + * memory, like for UConverterTable and its variant fields. + */ + +/* + * Defines the UConverterSharedData struct, + * the immutable, shared part of UConverter. + */ +typedef struct { + uint32_t structSize; /* Size of this structure */ + uint32_t referenceCounter; /* used to count number of clients, 0xffffffff for static SharedData */ + + const void *dataMemory; /* from udata_openChoice() */ + UConverterTable *table; /* Pointer to conversion data */ + const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */ + const char *name; /* internal name of the converter */ + + int32_t codepage; /* codepage # (now IBM-$codepage) */ + + int8_t platform; /* platform of the converter (only IBM now) */ + int8_t conversionType; /* conversion type */ + + int8_t minBytesPerChar; /* Minimum # bytes per char in this codepage */ + int8_t maxBytesPerChar; /* Maximum # bytes per char in this codepage */ + + /*initial values of some members of the mutable part of object */ + struct { + uint32_t toUnicodeStatus; + int8_t subCharLen; + uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; + } defaultConverterValues; +} UConverterSharedData; + + +/* Defines a UConverter, the lightweight mutable part the user sees */ + +struct UConverter { + uint32_t toUnicodeStatus; /* Used to internalize stream status information */ + uint32_t fromUnicodeStatus; + int32_t mode; + + int8_t subCharLen; /* length of the codepage specific character sequence */ int8_t invalidCharLength; int8_t invalidUCharLength; - int8_t pad; - int32_t mode; - int8_t subCharLen; /*length of the codepage specific character sequence */ - unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; /*codepage specific character sequence */ - UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store unicode data meant for - *output stream by the Error function pointers - */ - unsigned char charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store codepage data meant for - * output stream by the Error function pointers - */ - int8_t UCharErrorBufferLength; /*used to indicate the number of valid UChars - *in charErrorBuffer - */ - int8_t charErrorBufferLength; /*used to indicate the number of valid bytes - *in charErrorBuffer - */ + int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */ + int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */ + + uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* codepage specific character sequence */ + char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN]; + uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */ UChar invalidUCharBuffer[3]; - char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN]; - /*Error function pointer called when conversion issues - *occur during a T_UConverter_fromUnicode call + UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */ + + /* + * Error function pointer called when conversion issues + * occur during a T_UConverter_fromUnicode call */ void (*fromUCharErrorBehaviour) (struct UConverter *, - char **, - const char *, - const UChar **, - const UChar *, - int32_t* offsets, - bool_t, - UErrorCode *); - /*Error function pointer called when conversion issues - *occur during a T_UConverter_toUnicode call + char **, + const char *, + const UChar **, + const UChar *, + int32_t* offsets, + bool_t, + UErrorCode *); + /* + * Error function pointer called when conversion issues + * occur during a T_UConverter_toUnicode call */ void (*fromCharErrorBehaviour) (struct UConverter *, - UChar **, - const UChar *, - const char **, - const char *, - int32_t* offsets, - bool_t, - UErrorCode *); + UChar **, + const UChar *, + const char **, + const char *, + int32_t* offsets, + bool_t, + UErrorCode *); - UConverterSharedData *sharedData; /*Pointer to the shared immutable part of the - *converter object - */ - void *extraInfo; /*currently only used to point to a struct containing UConverter used by iso 2022 - Could be used by clients writing their own call back function to - pass context to them - */ - }; + UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */ + + /* + * currently only used to point to a struct containing UConverter used by iso 2022; + * could be used by clients writing their own call back function to pass context to them + */ + void *extraInfo; +}; U_CDECL_END /* end of UConverter */ @@ -219,7 +247,7 @@ typedef struct UConverter UConverter; typedef struct { UConverter *currentConverter; - unsigned char escSeq2022[10]; + uint8_t escSeq2022[10]; int8_t escSeq2022Length; } UConverterDataISO2022; diff --git a/icu4c/source/tools/makeconv/makeconv.c b/icu4c/source/tools/makeconv/makeconv.c index b42e40f2a3f..96f19d2e7f1 100644 --- a/icu4c/source/tools/makeconv/makeconv.c +++ b/icu4c/source/tools/makeconv/makeconv.c @@ -28,28 +28,115 @@ #include "unewdata.h" #include "ucmpwrit.h" +/*Defines the struct of a UConverterSharedData the immutable, shared part of + *UConverter - + * This is the definition from ICU 1.4, necessary to read converter data + * version 1 because the structure is directly embedded in the data. + * See udata.html for why this is bad (pointers, enums, padding...). + */ +typedef struct + { + uint32_t structSize; /* Size of this structure */ + void *dataMemory; + uint32_t referenceCounter; /*used to count number of clients */ + char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */ + UConverterPlatform platform; /*platform of the converter (only IBM now) */ + int32_t codepage; /*codepage # (now IBM-$codepage) */ + UConverterType conversionType; /*conversion type */ + int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */ + int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */ + struct + { /*initial values of some members of the mutable part of object */ + uint32_t toUnicodeStatus; + int8_t subCharLen; + unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; + } + defaultConverterValues; + UConverterTable *table; /*Pointer to conversion data */ + } +UConverterSharedData_1_4; + +struct UConverter_1_4 + { + int32_t toUnicodeStatus; /*Used to internalize stream status information */ + int32_t fromUnicodeStatus; + int8_t invalidCharLength; + int8_t invalidUCharLength; + int8_t pad; + int32_t mode; + int8_t subCharLen; /*length of the codepage specific character sequence */ + unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; /*codepage specific character sequence */ + UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store unicode data meant for + *output stream by the Error function pointers + */ + unsigned char charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store codepage data meant for + * output stream by the Error function pointers + */ + int8_t UCharErrorBufferLength; /*used to indicate the number of valid UChars + *in charErrorBuffer + */ + int8_t charErrorBufferLength; /*used to indicate the number of valid bytes + *in charErrorBuffer + */ + + UChar invalidUCharBuffer[3]; + char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN]; + /*Error function pointer called when conversion issues + *occur during a T_UConverter_fromUnicode call + */ + void (*fromUCharErrorBehaviour) (struct UConverter_1_4 *, + char **, + const char *, + const UChar **, + const UChar *, + int32_t* offsets, + bool_t, + UErrorCode *); + /*Error function pointer called when conversion issues + *occur during a T_UConverter_toUnicode call + */ + void (*fromCharErrorBehaviour) (struct UConverter_1_4 *, + UChar **, + const UChar *, + const char **, + const char *, + int32_t* offsets, + bool_t, + UErrorCode *); + + UConverterSharedData_1_4 *sharedData; /*Pointer to the shared immutable part of the + *converter object + */ + void *extraInfo; /*currently only used to point to a struct containing UConverter_1_4 used by iso 2022 + Could be used by clients writing their own call back function to + pass context to them + */ + }; + +typedef struct UConverter_1_4 UConverter_1_4; + /*Reads the header of the table file and fills in basic knowledge about the converter *in "converter" */ -static void readHeaderFromFile(UConverter* myConverter, FileStream* convFile, UErrorCode* err); +static void readHeaderFromFile(UConverter_1_4* myConverter, FileStream* convFile, UErrorCode* err); /*Reads the rest of the file, and fills up the shared objects if necessary*/ -static void loadMBCSTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err); +static void loadMBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err); /*Reads the rest of the file, and fills up the shared objects if necessary*/ -static void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err); +static void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err); /*Reads the rest of the file, and fills up the shared objects if necessary*/ -static void loadSBCSTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err); +static void loadSBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err); /*Reads the rest of the file, and fills up the shared objects if necessary*/ -static void loadDBCSTableFromFile(FileStream* convFile, UConverter* converter, UErrorCode* err); +static void loadDBCSTableFromFile(FileStream* convFile, UConverter_1_4* converter, UErrorCode* err); -/* creates a UConverterSharedData from a mapping file, fills in necessary links to it the +/* creates a UConverterSharedData_1_4 from a mapping file, fills in necessary links to it the * appropriate function pointers * if the data tables are already in memory */ -static UConverterSharedData* createConverterFromTableFile(const char* realName, UErrorCode* err); +static UConverterSharedData_1_4* createConverterFromTableFile(const char* realName, UErrorCode* err); /*writes a CompactShortArray to a file*/ @@ -60,11 +147,13 @@ static void writeCompactByteArrayToFile(FileStream* outfile, const CompactByteAr /*writes a binary to a file*/ static void writeUConverterSharedDataToFile(const char* filename, - UConverterSharedData* mySharedData, + UConverterSharedData_1_4* mySharedData, UErrorCode* err); -static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData* data); +static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData_1_4* data); + +bool_t makeconv_deleteSharedConverterData(UConverterSharedData_1_4* deadSharedData); static UConverterPlatform getPlatformFromName(char* name); static int32_t getCodepageNumberFromName(char* name); @@ -158,7 +247,7 @@ static const UDataInfo dataInfo={ }; -void writeConverterData(UConverterSharedData *mySharedData, const char *cName, UErrorCode *status) +void writeConverterData(UConverterSharedData_1_4 *mySharedData, const char *cName, UErrorCode *status) { UNewDataMemory *mem; const char *cnvName, *cnvName2; @@ -192,7 +281,7 @@ void writeConverterData(UConverterSharedData *mySharedData, const char *cName, U int main(int argc, char** argv) { - UConverterSharedData* mySharedData = NULL; + UConverterSharedData_1_4* mySharedData = NULL; UErrorCode err = U_ZERO_ERROR; char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; char* dot = NULL, *arg; @@ -232,7 +321,7 @@ int main(int argc, char** argv) { /* writeUConverterSharedDataToFile(outFileName, mySharedData, &err); */ writeConverterData(mySharedData, cnvName, &err); - deleteSharedConverterData(mySharedData); + makeconv_deleteSharedConverterData(mySharedData); if(U_FAILURE(err)) { @@ -289,7 +378,7 @@ int32_t getCodepageNumberFromName(char* name) } /*Reads the header of the table file and fills in basic knowledge about the converter in "converter"*/ -void readHeaderFromFile(UConverter* myConverter, +void readHeaderFromFile(UConverter_1_4* myConverter, FileStream* convFile, UErrorCode* err) { @@ -412,7 +501,7 @@ void readHeaderFromFile(UConverter* myConverter, -void loadSBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err) +void loadSBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err) { char storageLine[UCNV_MAX_LINE_TEXT]; char* line = NULL; @@ -479,7 +568,7 @@ void loadSBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UError return; } -void loadMBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err) +void loadMBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err) { char storageLine[UCNV_MAX_LINE_TEXT]; char* line = NULL; @@ -560,7 +649,7 @@ void loadMBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UError return; } -void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err) +void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err) { char storageLine[UCNV_MAX_LINE_TEXT]; char* line = NULL; @@ -627,7 +716,7 @@ void loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConverter* myConver } -void loadDBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UErrorCode* err) +void loadDBCSTableFromFile(FileStream* convFile, UConverter_1_4* myConverter, UErrorCode* err) { char storageLine[UCNV_MAX_LINE_TEXT]; char* line = NULL; @@ -688,7 +777,7 @@ void loadDBCSTableFromFile(FileStream* convFile, UConverter* myConverter, UError } /*deletes the "shared" type object*/ -bool_t deleteSharedConverterData(UConverterSharedData* deadSharedData) +bool_t makeconv_deleteSharedConverterData(UConverterSharedData_1_4* deadSharedData) { if (deadSharedData->conversionType == UCNV_SBCS) { @@ -719,13 +808,13 @@ bool_t deleteSharedConverterData(UConverterSharedData* deadSharedData) -/*creates a UConverter, fills in necessary links to it the appropriate function pointers*/ -UConverterSharedData* createConverterFromTableFile(const char* converterName, UErrorCode* err) +/*creates a UConverter_1_4, fills in necessary links to it the appropriate function pointers*/ +UConverterSharedData_1_4* createConverterFromTableFile(const char* converterName, UErrorCode* err) { FileStream* convFile = NULL; int32_t i = 0; - UConverterSharedData* mySharedData = NULL; - UConverter myConverter; + UConverterSharedData_1_4* mySharedData = NULL; + UConverter_1_4 myConverter; if (U_FAILURE(*err)) return NULL; @@ -738,14 +827,14 @@ UConverterSharedData* createConverterFromTableFile(const char* converterName, UE } - mySharedData = (UConverterSharedData*) uprv_malloc(sizeof(UConverterSharedData)); + mySharedData = (UConverterSharedData_1_4*) uprv_malloc(sizeof(UConverterSharedData_1_4)); if (mySharedData == NULL) { *err = U_MEMORY_ALLOCATION_ERROR; T_FileStream_close(convFile); } - mySharedData->structSize = sizeof(UConverterSharedData); + mySharedData->structSize = sizeof(UConverterSharedData_1_4); mySharedData->dataMemory = NULL; /* for init */ myConverter.sharedData = mySharedData; @@ -787,13 +876,13 @@ UConverterSharedData* createConverterFromTableFile(const char* converterName, UE -static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData* data) +static void WriteConverterSharedData(UNewDataMemory *pData, const UConverterSharedData_1_4* data) { uint32_t size = 0; - udata_writeBlock(pData, data, sizeof(UConverterSharedData)); + udata_writeBlock(pData, data, sizeof(UConverterSharedData_1_4)); - size += sizeof(UConverterSharedData); /* Is 4-aligned- it ends with a pointer */ + size += sizeof(UConverterSharedData_1_4); /* Is 4-aligned- it ends with a pointer */ switch (data->conversionType) {