diff --git a/icu4c/source/common/ucnv.c b/icu4c/source/common/ucnv.c index d0c4fd0c76b..3e1a773fa93 100644 --- a/icu4c/source/common/ucnv.c +++ b/icu4c/source/common/ucnv.c @@ -171,21 +171,10 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", ucnv_getName(cnv, status), cnv, stackBuffer); - /* Pointers on 64-bit platforms need to be aligned - * on a 64-bit boundry in memory. - */ - if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { - int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); - *pBufferSize -= offsetUp; - stackBufferChars += offsetUp; - } - - stackBuffer = (void *)stackBufferChars; - if (cnv->sharedData->impl->safeClone != NULL) { /* call the custom safeClone function for sizing */ bufferSizeNeeded = 0; - cnv->sharedData->impl->safeClone(cnv, stackBuffer, &bufferSizeNeeded, status); + cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); } else { @@ -200,6 +189,22 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U } + /* Pointers on 64-bit platforms need to be aligned + * on a 64-bit boundary in memory. + */ + if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { + int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); + if(*pBufferSize > offsetUp) { + *pBufferSize -= offsetUp; + stackBufferChars += offsetUp; + } else { + /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ + *pBufferSize = 1; + } + } + + stackBuffer = (void *)stackBufferChars; + /* Now, see if we must allocate any memory */ if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL) { @@ -224,6 +229,8 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U allocatedConverter = NULL; } + uprv_memset(localConverter, 0, bufferSizeNeeded); + /* Copy initial state */ uprv_memcpy(localConverter, cnv, sizeof(UConverter)); localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; @@ -255,8 +262,6 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U localConverter->isCopyLocal = TRUE; } - localConverter->isExtraLocal = localConverter->isCopyLocal; - /* allow callback functions to handle any memory allocation */ toUArgs.converter = fromUArgs.converter = localConverter; cbErr = U_ZERO_ERROR; diff --git a/icu4c/source/common/ucnv2022.c b/icu4c/source/common/ucnv2022.c index 2989d3d97d0..4d1925cce8a 100644 --- a/icu4c/source/common/ucnv2022.c +++ b/icu4c/source/common/ucnv2022.c @@ -34,6 +34,7 @@ #include "unicode/uset.h" #include "unicode/ucnv_err.h" #include "unicode/ucnv_cb.h" +#include "ucnv_imp.h" #include "ucnv_bld.h" #include "ucnv_cnv.h" #include "ucnvmbcs.h" @@ -173,7 +174,7 @@ typedef struct{ #endif Cnv2022Type currentType; ISO2022State toU2022State, fromU2022State; - UConverter* myConverterArray[UCNV_2022_MAX_CONVERTERS]; + UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS]; uint32_t key; uint32_t version; char locale[3]; @@ -432,24 +433,23 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti } myConverterData->version= 0; version = options & UCNV_OPTIONS_VERSION_MASK; - myConverterData->myConverterArray[0] =NULL; if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && (myLocale[2]=='_' || myLocale[2]=='\0')){ int len=0; /* open the required converters and cache them */ if(jpCharsetMasks[version]&CSM(ISO8859_7)) { - myConverterData->myConverterArray[ISO8859_7]= ucnv_open("ISO8859_7", errorCode); + myConverterData->myConverterArray[ISO8859_7]= ucnv_loadSharedData("ISO8859_7", NULL, errorCode); } - myConverterData->myConverterArray[JISX201] = ucnv_open("jisx-201", errorCode); - myConverterData->myConverterArray[JISX208] = ucnv_open("jisx-208", errorCode); + myConverterData->myConverterArray[JISX201] = ucnv_loadSharedData("jisx-201", NULL, errorCode); + myConverterData->myConverterArray[JISX208] = ucnv_loadSharedData("jisx-208", NULL, errorCode); if(jpCharsetMasks[version]&CSM(JISX212)) { - myConverterData->myConverterArray[JISX212] = ucnv_open("jisx-212", errorCode); + myConverterData->myConverterArray[JISX212] = ucnv_loadSharedData("jisx-212", NULL, errorCode); } if(jpCharsetMasks[version]&CSM(GB2312)) { - myConverterData->myConverterArray[GB2312] = ucnv_open("ibm-5478", errorCode); /* gb_2312_80-1 */ + myConverterData->myConverterArray[GB2312] = ucnv_loadSharedData("ibm-5478", NULL, errorCode); /* gb_2312_80-1 */ } if(jpCharsetMasks[version]&CSM(KSC5601)) { - myConverterData->myConverterArray[KSC5601] = ucnv_open("ksc_5601", errorCode); + myConverterData->myConverterArray[KSC5601] = ucnv_loadSharedData("ksc_5601", NULL, errorCode); } /* set the function pointers to appropriate funtions */ @@ -491,11 +491,11 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti (myLocale[2]=='_' || myLocale[2]=='\0')){ /* open the required converters and cache them */ - myConverterData->myConverterArray[GB2312_1] = ucnv_open("ibm-5478",errorCode); + myConverterData->myConverterArray[GB2312_1] = ucnv_loadSharedData("ibm-5478", NULL, errorCode); if(version==1) { - myConverterData->myConverterArray[ISO_IR_165] = ucnv_open("iso-ir-165",errorCode); + myConverterData->myConverterArray[ISO_IR_165] = ucnv_loadSharedData("iso-ir-165", NULL, errorCode); } - myConverterData->myConverterArray[CNS_11643] = ucnv_open("cns-11643-1992",errorCode); + myConverterData->myConverterArray[CNS_11643] = ucnv_loadSharedData("cns-11643-1992", NULL, errorCode); /* set the function pointers to appropriate funtions */ @@ -528,6 +528,10 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti } cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar; + + if(U_FAILURE(*errorCode)) { + _ISO2022Close(cnv); + } } else { *errorCode = U_MEMORY_ALLOCATION_ERROR; } @@ -537,24 +541,22 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti static void _ISO2022Close(UConverter *converter) { UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo); - UConverter **array = myData->myConverterArray; + UConverterSharedData **array = myData->myConverterArray; int32_t i; if (converter->extraInfo != NULL) { /*close the array of converter pointers and free the memory*/ for (i=0; icurrentConverter) { - myData->currentConverter=NULL; - } - ucnv_close(array[i]); + ucnv_unloadSharedDataIfReady(array[i]); } } - ucnv_close(myData->currentConverter); /* if not closed above */ + ucnv_close(myData->currentConverter); if(!converter->isExtraLocal){ uprv_free (converter->extraInfo); + converter->extraInfo = NULL; } } } @@ -1435,7 +1437,7 @@ getTrail: case JISX201: /* G0 SBCS */ MBCS_SINGLE_FROM_UCHAR32( - converterData->myConverterArray[cs]->sharedData, + converterData->myConverterArray[cs], sourceChar, &targetValue, useFallback); if(targetValue <= 0x7f) { @@ -1445,7 +1447,7 @@ getTrail: case ISO8859_7: /* G0 SBCS forced to 7-bit output */ MBCS_SINGLE_FROM_UCHAR32( - converterData->myConverterArray[cs]->sharedData, + converterData->myConverterArray[cs], sourceChar, &targetValue, useFallback); if(0x80 <= targetValue && targetValue <= 0xff) { @@ -1457,7 +1459,7 @@ getTrail: default: /* G0 DBCS */ MBCS_FROM_UCHAR32_ISO2022( - converterData->myConverterArray[cs]->sharedData, + converterData->myConverterArray[cs], sourceChar, &targetValue, useFallback, &len, MBCS_OUTPUT_2); if(len != 2) { @@ -1737,7 +1739,7 @@ escape: /* convert mySourceChar+0x80 to use a normal 8-bit table */ targetUniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( - myData->myConverterArray[cs]->sharedData, + myData->myConverterArray[cs], mySourceChar + 0x80); } /* return from a single-shift state to the previous one */ @@ -1747,7 +1749,7 @@ escape: if(mySourceChar <= 0x7f) { targetUniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( - myData->myConverterArray[cs]->sharedData, + myData->myConverterArray[cs], mySourceChar); } break; @@ -1765,7 +1767,7 @@ getTrailByte: tempBuf[0] = (char) (mySourceChar); tempBuf[1] = trailByte = *mySource++; mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte); - targetUniChar = _MBCSSimpleGetNextUChar(myData->myConverterArray[cs]->sharedData, tempBuf, 2, FALSE); + targetUniChar = _MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE); } else { args->converter->toUBytes[0] = (uint8_t)mySourceChar; args->converter->toULength = 1; @@ -2452,7 +2454,7 @@ getTrail: } else{ /* convert U+0080..U+10ffff */ - UConverter *cnv; + UConverterSharedData *cnv; int32_t i; int8_t cs, g; @@ -2507,7 +2509,7 @@ getTrail: if(cs > 0) { if(cs > CNS_11643_0) { cnv = converterData->myConverterArray[CNS_11643]; - MBCS_FROM_UCHAR32_ISO2022(cnv->sharedData,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_3); + MBCS_FROM_UCHAR32_ISO2022(cnv,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_3); if(len==3) { cs = (int8_t)(CNS_11643_0 + (targetValue >> 16) - 0x80); len = 2; @@ -2525,7 +2527,7 @@ getTrail: } else { /* GB2312_1 or ISO-IR-165 */ cnv = converterData->myConverterArray[cs]; - MBCS_FROM_UCHAR32_ISO2022(cnv->sharedData,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_2); + MBCS_FROM_UCHAR32_ISO2022(cnv,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_2); g = 1; /* used if len == 2 */ } } @@ -2740,7 +2742,7 @@ escape: /* convert one or two bytes */ if(pToU2022State->g != 0) { if(mySource < mySourceLimit) { - UConverter *cnv; + UConverterSharedData *cnv; StateEnum tempState; int32_t tempBufLen; char trailByte; @@ -2765,7 +2767,7 @@ getTrailByte: /* return from a single-shift state to the previous one */ pToU2022State->g=pToU2022State->prevG; } - targetUniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, tempBuf, tempBufLen, FALSE); + targetUniChar = _MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE); } else { args->converter->toUBytes[0] = (uint8_t)mySourceChar; args->converter->toULength = 1; @@ -2921,9 +2923,7 @@ struct cloneStruct { UConverter cnv; UConverterDataISO2022 mydata; - UConverter currentCnv; /**< for ISO_2022 converter if the current converter is open */ - - UConverter clonedConverters[1]; /* Actually a variable sized array for all of the sub converters to be cloned. */ + UConverter currentConverter; }; @@ -2935,89 +2935,42 @@ _ISO_2022_SafeClone( UErrorCode *status) { struct cloneStruct * localClone; - int32_t bufferSizeNeeded = sizeof(struct cloneStruct); - UConverterDataISO2022* cnvData = (UConverterDataISO2022*)cnv->extraInfo; - int32_t i; - int32_t sizes[UCNV_2022_MAX_CONVERTERS]; - int32_t numConverters = 0; - int32_t currentConverterIndex = -1; - int32_t currentConverterSize = 0; - char *ptr; /* buffer pointer */ - - if (U_FAILURE(*status)) { - return 0; - } - - for(i=0;(imyConverterArray[i];i++) { - int32_t size; - - size = 0; - ucnv_safeClone(cnvData->myConverterArray[i], NULL, &size, status); - bufferSizeNeeded += size; - sizes[i] = size; - numConverters++; - - if(cnvData->currentConverter == cnvData->myConverterArray[i]) { - currentConverterIndex = i; - } - } - - if(currentConverterIndex == -1) { /* -1 means - not found in array. Clone separately */ - currentConverterSize = 0; - if(cnvData->currentConverter) { - ucnv_safeClone(cnvData->currentConverter, NULL, ¤tConverterSize, status); - bufferSizeNeeded += currentConverterSize; - } - } - - for(;iextraInfo; localClone = (struct cloneStruct *)stackBuffer; - uprv_memcpy(&localClone->cnv, cnv, sizeof(UConverter)); - uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISO2022)); + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ - /* clone back sub cnvs */ + uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022)); - ptr = (char*)&localClone->clonedConverters; - for(i=0;imydata.myConverterArray[i] = ucnv_safeClone(cnvData->myConverterArray[i], (UConverter*)ptr, &size, status); - ptr += size; - } - for(;imydata.myConverterArray[i] = NULL; - } + /* share the subconverters */ - if(currentConverterIndex == -1) { /* -1 = not found in list */ - /* KR version 1 also uses the state in currentConverter for preserving state - * so we need to clone it too! - */ - if(cnvData->currentConverter) { - localClone->mydata.currentConverter = ucnv_safeClone(cnvData->currentConverter, ptr, ¤tConverterSize, status); - ptr += currentConverterSize; - } else { - localClone->mydata.currentConverter = NULL; + if(cnvData->currentConverter != NULL) { + size = (int32_t)sizeof(UConverter); + localClone->mydata.currentConverter = + ucnv_safeClone(cnvData->currentConverter, + &localClone->currentConverter, + &size, status); + if(U_FAILURE(*status)) { + return NULL; + } + } + + for(i=0; imyConverterArray[i] != NULL) { + ucnv_incrementRefCount(cnvData->myConverterArray[i]); } - } else { - localClone->mydata.currentConverter = localClone->mydata.myConverterArray[currentConverterIndex]; } localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */ - + localClone->cnv.isExtraLocal = TRUE; return &localClone->cnv; } @@ -3028,7 +2981,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, UErrorCode *pErrorCode) { int32_t i; - USet *cnvSet; UConverterDataISO2022* cnvData; if (U_FAILURE(*pErrorCode)) { @@ -3044,10 +2996,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, #endif cnvData = (UConverterDataISO2022*)cnv->extraInfo; - if (cnv->sharedData == &_ISO2022KRData && cnvData->currentConverter != NULL) { - ucnv_getUnicodeSet(cnvData->currentConverter, set, which, pErrorCode); - return; - } /* open a set and initialize it with code points that are algorithmically round-tripped */ switch(cnvData->locale[0]){ @@ -3077,13 +3025,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, break; } - /* open a helper set because ucnv_getUnicodeSet() first empties its result set */ - cnvSet = uset_open(1, 0); - if (!cnvSet) { - *pErrorCode =U_MEMORY_ALLOCATION_ERROR; - return; - } - /* * TODO: need to make this version-specific for CN. * CN version 0 does not map CNS planes 3..7 although @@ -3103,12 +3044,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, 0, 0x81, 0x82, pErrorCode); } else { - ucnv_getUnicodeSet(cnvData->myConverterArray[i], cnvSet, which, pErrorCode); - uset_addAll(set, cnvSet /* pErrorCode */); + _MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], set, which, pErrorCode); } } } - uset_close(cnvSet); } static const UConverterImpl _ISO2022Impl={ diff --git a/icu4c/source/common/ucnv_bld.c b/icu4c/source/common/ucnv_bld.c index 5bdafe90225..59b4e5f8c1d 100644 --- a/icu4c/source/common/ucnv_bld.c +++ b/icu4c/source/common/ucnv_bld.c @@ -554,32 +554,26 @@ ucnv_unload(UConverterSharedData *sharedData) { void ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) { - umtx_lock(&cnvCacheMutex); /* - Double checking doesn't work on some platforms. - Don't check referenceCounter outside of a mutex block. - - TODO We should be able to check for ~0 outside of the mutex, - improving performance for opening and closing of algorithmic converters. + Checking whether it's an algorithic converter is okay + in multithreaded applications because the value never changes. + Don't check referenceCounter for any other value. */ - if (sharedData->referenceCounter != ~0) { + if(sharedData != NULL && sharedData->referenceCounter != ~0) { + umtx_lock(&cnvCacheMutex); ucnv_unload(sharedData); + umtx_unlock(&cnvCacheMutex); } - umtx_unlock(&cnvCacheMutex); } void ucnv_incrementRefCount(UConverterSharedData *sharedData) { - umtx_lock(&cnvCacheMutex); - /* - Double checking doesn't work on some platforms. - Don't check referenceCounter outside of a mutex block. - */ - if (sharedData->referenceCounter != ~0) { + if(sharedData != NULL && sharedData->referenceCounter != ~0) { + umtx_lock(&cnvCacheMutex); sharedData->referenceCounter++; + umtx_unlock(&cnvCacheMutex); } - umtx_unlock(&cnvCacheMutex); } static void @@ -663,60 +657,58 @@ parseConverterOptions(const char *inName, * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) */ -UConverter * -ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) -{ - char cnvName[UCNV_MAX_CONVERTER_NAME_LENGTH], locale[ULOC_FULLNAME_CAPACITY]; - const char *realName; +UConverterSharedData * +ucnv_loadSharedData(const char *converterName, UConverterLookupData *lookup, UErrorCode * err) { + UConverterLookupData stackLookup; UConverterSharedData *mySharedConverterData = NULL; UErrorCode internalErrorCode = U_ZERO_ERROR; - uint32_t options = 0; - - UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); if (U_FAILURE (*err)) { - goto exitError; + return NULL; } - UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); + if(lookup == NULL) { + lookup = &stackLookup; + } - locale[0] = 0; + lookup->locale[0] = 0; + lookup->options = 0; /* In case "name" is NULL we want to open the default converter. */ if (converterName == NULL) { - realName = ucnv_io_getDefaultConverterName(); - if (realName == NULL) { + lookup->realName = ucnv_io_getDefaultConverterName(); + if (lookup->realName == NULL) { *err = U_MISSING_RESOURCE_ERROR; - goto exitError; + return NULL; } /* the default converter name is already canonical */ } else { /* separate the converter name from the options */ - parseConverterOptions(converterName, cnvName, locale, &options, err); + parseConverterOptions(converterName, lookup->cnvName, lookup->locale, &lookup->options, err); if (U_FAILURE(*err)) { /* Very bad name used. */ - goto exitError; + return NULL; } /* get the canonical converter name */ - realName = ucnv_io_getConverterName(cnvName, &internalErrorCode); - if (U_FAILURE(internalErrorCode) || realName == NULL) { + lookup->realName = ucnv_io_getConverterName(lookup->cnvName, &internalErrorCode); + if (U_FAILURE(internalErrorCode) || lookup->realName == NULL) { /* * set the input name in case the converter was added * without updating the alias table, or when there is no alias table */ - realName = cnvName; + lookup->realName = lookup->cnvName; } } /* separate the converter name from the options */ - if(realName != cnvName) { - parseConverterOptions(realName, cnvName, locale, &options, err); - realName = cnvName; + if(lookup->realName != lookup->cnvName) { + parseConverterOptions(lookup->realName, lookup->cnvName, lookup->locale, &lookup->options, err); + lookup->realName = lookup->cnvName; } /* get the shared data for an algorithmic converter, if it is one */ - mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(realName); + mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(lookup->realName); if (mySharedConverterData == NULL) { /* it is a data-based converter, get its shared data. */ @@ -728,40 +720,51 @@ ucnv_createConverter(UConverter *myUConverter, const char *converterName, UError args.size=sizeof(UConverterLoadArgs); args.nestedLoads=1; - args.options=options; + args.options=lookup->options; args.pkg=NULL; - args.name=realName; + args.name=lookup->realName; umtx_lock(&cnvCacheMutex); mySharedConverterData = ucnv_load(&args, err); umtx_unlock(&cnvCacheMutex); if (U_FAILURE (*err) || (mySharedConverterData == NULL)) { - goto exitError; + return NULL; } } - myUConverter = ucnv_createConverterFromSharedData(myUConverter, mySharedConverterData, realName, locale, options, err); + return mySharedConverterData; +} - if (U_FAILURE(*err)) - { - /* - Checking whether it's an algorithic converter is okay - in multithreaded applications because the value never changes. - Don't check referenceCounter for any other value. - */ - if (mySharedConverterData->referenceCounter != ~0) { - umtx_lock(&cnvCacheMutex); - --mySharedConverterData->referenceCounter; - umtx_unlock(&cnvCacheMutex); +UConverter * +ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) +{ + UConverterLookupData stackLookup; + UConverterSharedData *mySharedConverterData; + + UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); + + if(U_SUCCESS(*err)) { + UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); + + mySharedConverterData = ucnv_loadSharedData(converterName, &stackLookup, err); + + if(U_SUCCESS(*err)) { + myUConverter = ucnv_createConverterFromSharedData( + myUConverter, mySharedConverterData, + stackLookup.realName, stackLookup.locale, stackLookup.options, + err); + + if(U_SUCCESS(*err)) { + UTRACE_EXIT_PTR_STATUS(myUConverter, *err); + return myUConverter; + } else { + ucnv_unloadSharedDataIfReady(mySharedConverterData); + } } - goto exitError; } - UTRACE_EXIT_PTR_STATUS(myUConverter, *err); - return myUConverter; - -exitError: + /* exit with error */ UTRACE_EXIT_STATUS(*err); return NULL; } diff --git a/icu4c/source/common/ucnv_bld.h b/icu4c/source/common/ucnv_bld.h index d53c2142884..a2e6a1a1fd7 100644 --- a/icu4c/source/common/ucnv_bld.h +++ b/icu4c/source/common/ucnv_bld.h @@ -216,15 +216,6 @@ struct UConverter { U_CDECL_END /* end of UConverter */ -typedef struct - { - UConverter *OptGrpConverter[0x20]; /* Converter per Opt. grp. */ - uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */ - uint8_t localeConverterIndex; /* reasonable locale match for index */ - - } -UConverterDataLMBCS; - #define CONVERTER_FILE_EXTENSION ".cnv" /** diff --git a/icu4c/source/common/ucnv_ext.c b/icu4c/source/common/ucnv_ext.c index b4bcf6266ec..08ac7becfc4 100644 --- a/icu4c/source/common/ucnv_ext.c +++ b/icu4c/source/common/ucnv_ext.c @@ -930,7 +930,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv, } static void -ucnv_extGetUnicodeSetString(const UConverter *cnv, +ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, const int32_t *cx, USet *set, UConverterUnicodeSet which, @@ -974,7 +974,7 @@ ucnv_extGetUnicodeSetString(const UConverter *cnv, /* no mapping, do nothing */ } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { ucnv_extGetUnicodeSetString( - cnv, cx, set, which, minLength, + sharedData, cx, set, which, minLength, U_SENTINEL, s, length+1, (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), pErrorCode); @@ -988,7 +988,7 @@ ucnv_extGetUnicodeSetString(const UConverter *cnv, } U_CFUNC void -ucnv_extGetUnicodeSet(const UConverter *cnv, +ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, USet *set, UConverterUnicodeSet which, UErrorCode *pErrorCode) { @@ -1003,7 +1003,7 @@ ucnv_extGetUnicodeSet(const UConverter *cnv, UChar32 c; int32_t length; - cx=cnv->sharedData->mbcs.extIndexes; + cx=sharedData->mbcs.extIndexes; if(cx==NULL) { return; } @@ -1017,7 +1017,7 @@ ucnv_extGetUnicodeSet(const UConverter *cnv, /* enumerate the from-Unicode trie table */ c=0; /* keep track of the current code point while enumerating */ - if(cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) { + if(sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) { /* DBCS-only, ignore single-byte results */ minLength=2; } else { @@ -1051,7 +1051,7 @@ ucnv_extGetUnicodeSet(const UConverter *cnv, length=0; U16_APPEND_UNSAFE(s, length, c); ucnv_extGetUnicodeSetString( - cnv, cx, set, which, minLength, + sharedData, cx, set, which, minLength, c, s, length, (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), pErrorCode); diff --git a/icu4c/source/common/ucnv_ext.h b/icu4c/source/common/ucnv_ext.h index 4e2940551a1..9fd02921ed7 100644 --- a/icu4c/source/common/ucnv_ext.h +++ b/icu4c/source/common/ucnv_ext.h @@ -379,7 +379,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv, UErrorCode *pErrorCode); U_CFUNC void -ucnv_extGetUnicodeSet(const UConverter *cnv, +ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, USet *set, UConverterUnicodeSet which, UErrorCode *pErrorCode); diff --git a/icu4c/source/common/ucnv_imp.h b/icu4c/source/common/ucnv_imp.h index d697a398680..ecc037257e4 100644 --- a/icu4c/source/common/ucnv_imp.h +++ b/icu4c/source/common/ucnv_imp.h @@ -21,6 +21,7 @@ #define UCNV_IMP_H #include "unicode/utypes.h" +#include "unicode/uloc.h" #include "ucnv_bld.h" /* figures out if we need to go to file to read in the data tables. @@ -55,6 +56,21 @@ ucnv_createConverterFromSharedData(UConverter *myUConverter, UConverterSharedDat UConverter* ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode *err); +typedef struct { + char cnvName[UCNV_MAX_CONVERTER_NAME_LENGTH], locale[ULOC_FULLNAME_CAPACITY]; + const char *realName; + uint32_t options; +} UConverterLookupData; + +/** + * Load a converter but do not create a UConverter object. + * Simply return the UConverterSharedData. + * Performs alias lookup etc. + * @internal + */ +UConverterSharedData * +ucnv_loadSharedData(const char *converterName, UConverterLookupData *lookup, UErrorCode * err); + /** * This may unload the shared data in a thread safe manner. * This will only unload the data if no other converters are sharing it. diff --git a/icu4c/source/common/ucnv_lmb.c b/icu4c/source/common/ucnv_lmb.c index 46633a86349..99dcae8e1ae 100644 --- a/icu4c/source/common/ucnv_lmb.c +++ b/icu4c/source/common/ucnv_lmb.c @@ -27,13 +27,18 @@ #if !UCONFIG_NO_LEGACY_CONVERSION +#include "unicode/ucnv_err.h" +#include "unicode/ucnv.h" +#include "unicode/uset.h" #include "cmemory.h" #include "cstring.h" -#include "unicode/ucnv_err.h" +#include "uassert.h" +#include "ucnv_imp.h" #include "ucnv_bld.h" -#include "unicode/ucnv.h" #include "ucnv_cnv.h" +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) + /* LMBCS @@ -219,7 +224,13 @@ Because of the extensive use of other character sets, the LMBCS converter keeps a mapping between optimization groups and IBM character sets, so that ICU converters can be created and used as needed. */ -static const char * const OptGroupByteToCPName[ULMBCS_CTRLOFFSET] = { +/* As you can see, even though any byte below 0x20 could be an optimization +byte, only those at 0x13 or below can map to an actual converter. To limit +some loops and searches, we define a value for that last group converter:*/ + +#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */ + +static const char * const OptGroupByteToCPName[ULMBCS_GRP_LAST + 1] = { /* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */ /* 0x0001 */ "ibm-850", /* 0x0002 */ "ibm-851", @@ -245,12 +256,6 @@ static const char * const OptGroupByteToCPName[ULMBCS_CTRLOFFSET] = { and 0x0019, the 1-2-3 system range control char */ }; -/* As you can see, even though any byte below 0x20 could be an optimization -byte, only those at 0x13 or below can map to an actual converter. To limit -some loops and searches, we define a value for that last group converter:*/ - -#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */ - /* That's approximately all the data that's needed for translating LMBCS to Unicode. @@ -506,6 +511,13 @@ FindLMBCSLocale(const char *LocaleID) the definitions of these structures, see unicode\ucnv_bld.h */ +typedef struct + { + UConverterSharedData *OptGrpConverter[ULMBCS_GRP_LAST+1]; /* Converter per Opt. grp. */ + uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */ + uint8_t localeConverterIndex; /* reasonable locale match for index */ + } +UConverterDataLMBCS; #define DECLARE_LMBCS_DATA(n) \ @@ -523,8 +535,8 @@ static const UConverterImpl _LMBCSImpl##n={\ NULL,\ NULL,\ NULL,\ - NULL,\ - ucnv_getCompleteUnicodeSet\ + _LMBCSSafeClone,\ + _LMBCSGetUnicodeSet\ };\ static const UConverterStaticData _LMBCSStaticData##n={\ sizeof(UConverterStaticData),\ @@ -559,21 +571,32 @@ _LMBCSOpenWorker(UConverter* _this, ulmbcs_byte_t OptGroup ) { - UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS)); - if(extraInfo != NULL) + UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS)); + if(extraInfo != NULL) { - ulmbcs_byte_t i; - ulmbcs_byte_t imax; - imax = sizeof(extraInfo->OptGrpConverter)/sizeof(extraInfo->OptGrpConverter[0]); + ulmbcs_byte_t i; - for (i=0; i < imax; i++) - { - extraInfo->OptGrpConverter[i] = - (OptGroupByteToCPName[i] != NULL) ? - ucnv_open(OptGroupByteToCPName[i], err) : NULL; - } - extraInfo->OptGroup = OptGroup; - extraInfo->localeConverterIndex = FindLMBCSLocale(locale); + uprv_memset(extraInfo, 0, sizeof(UConverterDataLMBCS)); + + for (i=0; i <= ULMBCS_GRP_LAST && U_SUCCESS(*err); i++) + { + if(OptGroupByteToCPName[i] != NULL) { + extraInfo->OptGrpConverter[i] = ucnv_loadSharedData(OptGroupByteToCPName[i], NULL, err); + } + } + + if(U_SUCCESS(*err)) { + extraInfo->OptGroup = OptGroup; + extraInfo->localeConverterIndex = FindLMBCSLocale(locale); + } else { + /* one of the subconverters could not be loaded, unload the previous ones */ + while(i > 0) { + if(extraInfo->OptGrpConverter[--i] != NULL) { + ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[i]); + extraInfo->OptGrpConverter[i] = NULL; + } + } + } } else { @@ -590,25 +613,62 @@ _LMBCSClose(UConverter * _this) ulmbcs_byte_t Ix; UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo; - for (Ix=0; Ix < ULMBCS_GRP_UNICODE; Ix++) + for (Ix=0; Ix <= ULMBCS_GRP_LAST; Ix++) { if (extraInfo->OptGrpConverter[Ix] != NULL) - ucnv_close (extraInfo->OptGrpConverter[Ix]); + ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[Ix]); } uprv_free (_this->extraInfo); } } -/* -Here's an all-crash stop for debugging, since ICU does not have asserts. -Turn this on by defining LMBCS_DEBUG, or by changing it to -#if 1 -*/ -#if LMBCS_DEBUG -#define MyAssert(b) {if (!(b)) {*(char *)0 = 1;}} -#else -#define MyAssert(b) -#endif +typedef struct LMBCSClone { + UConverter cnv; + UConverterDataLMBCS lmbcs; +} LMBCSClone; + +static UConverter * +_LMBCSSafeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status) { + LMBCSClone *newLMBCS; + UConverterDataLMBCS *extraInfo; + int32_t i; + + if(*pBufferSize<=0) { + *pBufferSize=(int32_t)sizeof(LMBCSClone); + return NULL; + } + + extraInfo=(UConverterDataLMBCS *)cnv->extraInfo; + newLMBCS=(LMBCSClone *)stackBuffer; + + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ + + uprv_memcpy(&newLMBCS->lmbcs, extraInfo, sizeof(UConverterDataLMBCS)); + + /* share the subconverters */ + for(i = 0; i <= ULMBCS_GRP_LAST; ++i) { + if(extraInfo->OptGrpConverter[i] != NULL) { + ucnv_incrementRefCount(extraInfo->OptGrpConverter[i]); + } + } + + newLMBCS->cnv.extraInfo = &newLMBCS->lmbcs; + newLMBCS->cnv.isExtraLocal = TRUE; + return &newLMBCS->cnv; +} + +U_CFUNC void +_LMBCSGetUnicodeSet(const UConverter *cnv, + USet *set, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + /* all but U+F6xx, see LMBCS explanation above (search for F6xx) */ + uset_addRange(set, 0, 0xf5ff); + uset_addRange(set, 0xf700, 0x10ffff); +} /* Here's the basic helper function that we use when converting from @@ -628,33 +688,21 @@ LMBCSConversionWorker ( ) { ulmbcs_byte_t * pLMBCS = pStartLMBCS; - UConverter * xcnv = extraInfo->OptGrpConverter[group]; + UConverterSharedData * xcnv = extraInfo->OptGrpConverter[group]; int bytesConverted; uint32_t value; ulmbcs_byte_t firstByte; - MyAssert(xcnv); - MyAssert(groupsharedData, *pUniChar, &value, FALSE); + bytesConverted = _MBCSFromUChar32(xcnv, *pUniChar, &value, FALSE); /* get the first result byte */ - switch(bytesConverted) - { - case 4: - firstByte = (ulmbcs_byte_t)(value >> 24); - break; - case 3: - firstByte = (ulmbcs_byte_t)(value >> 16); - break; - case 2: - firstByte = (ulmbcs_byte_t)(value >> 8); - break; - case 1: - firstByte = (ulmbcs_byte_t)value; - break; - default: + if(bytesConverted > 0) { + firstByte = (ulmbcs_byte_t)(value >> ((bytesConverted - 1) * 8)); + } else { /* most common failure mode is an unassigned character */ groups_tried[group] = TRUE; return 0; @@ -665,7 +713,7 @@ LMBCSConversionWorker ( /* All initial byte values in lower ascii range should have been caught by now, except with the exception group. */ - MyAssert((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT)); + U_ASSERT((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT)); /* use converted data: first write 0, 1 or two group bytes */ if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group) @@ -1002,7 +1050,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args, { UConverterDataLMBCS * extraInfo; ulmbcs_byte_t group; - UConverter* cnv; + UConverterSharedData *cnv; if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */ { @@ -1038,11 +1086,11 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args, if (*args->source == group) { /* single byte */ ++args->source; - uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source, 1, FALSE); + uniChar = _MBCSSimpleGetNextUChar(cnv, args->source, 1, FALSE); ++args->source; } else { /* double byte */ - uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source, 2, FALSE); + uniChar = _MBCSSimpleGetNextUChar(cnv, args->source, 2, FALSE); args->source += 2; } } @@ -1052,7 +1100,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args, if (CurByte >= ULMBCS_C1START) { - uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv->sharedData, CurByte); + uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); } else { @@ -1067,7 +1115,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args, /* Lookup value must include opt group */ bytes[0] = group; bytes[1] = CurByte; - uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, bytes, 2, FALSE); + uniChar = _MBCSSimpleGetNextUChar(cnv, bytes, 2, FALSE); } } } @@ -1078,24 +1126,24 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args, cnv = extraInfo->OptGrpConverter[group]; if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */ { - if (!_MBCSIsLeadByte(cnv->sharedData, CurByte)) + if (!_MBCSIsLeadByte(cnv, CurByte)) { CHECK_SOURCE_LIMIT(0); /* let the MBCS conversion consume CurByte again */ - uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source - 1, 1, FALSE); + uniChar = _MBCSSimpleGetNextUChar(cnv, args->source - 1, 1, FALSE); } else { CHECK_SOURCE_LIMIT(1); /* let the MBCS conversion consume CurByte again */ - uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source - 1, 2, FALSE); + uniChar = _MBCSSimpleGetNextUChar(cnv, args->source - 1, 2, FALSE); ++args->source; } } else /* single byte conversion */ { - uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv->sharedData, CurByte); + uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); } } } diff --git a/icu4c/source/common/ucnvmbcs.c b/icu4c/source/common/ucnvmbcs.c index 1bb2b8aa849..bc86122574c 100644 --- a/icu4c/source/common/ucnvmbcs.c +++ b/icu4c/source/common/ucnvmbcs.c @@ -428,7 +428,7 @@ _MBCSSizeofFromUBytes(UConverterMBCSTable *mbcsTable) { /* similar to _MBCSGetNextUChar() but recursive */ static void -_getUnicodeSetForBytes(const UConverter *cnv, +_getUnicodeSetForBytes(const UConverterSharedData *sharedData, const int32_t (*stateTable)[256], const uint16_t *unicodeCodeUnits, USet *set, UConverterUnicodeSet which, @@ -441,7 +441,7 @@ _getUnicodeSetForBytes(const UConverter *cnv, entry=stateTable[state][b]; if(MBCS_ENTRY_IS_TRANSITION(entry)) { _getUnicodeSetForBytes( - cnv, stateTable, unicodeCodeUnits, + sharedData, stateTable, unicodeCodeUnits, set, which, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry), offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), @@ -506,24 +506,24 @@ _getUnicodeSetForBytes(const UConverter *cnv, * Does not empty the set first. */ U_CFUNC void -_MBCSGetUnicodeSetForBytes(const UConverter *cnv, +_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData, USet *set, UConverterUnicodeSet which, uint8_t state, int32_t lowByte, int32_t highByte, UErrorCode *pErrorCode) { _getUnicodeSetForBytes( - cnv, cnv->sharedData->mbcs.stateTable, cnv->sharedData->mbcs.unicodeCodeUnits, + sharedData, sharedData->mbcs.stateTable, sharedData->mbcs.unicodeCodeUnits, set, which, state, 0, lowByte, highByte, pErrorCode); } -static void -_MBCSGetUnicodeSet(const UConverter *cnv, - USet *set, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - UConverterMBCSTable *mbcsTable; +U_CFUNC void +_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, + USet *set, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + const UConverterMBCSTable *mbcsTable; const uint16_t *table; uint32_t st3; @@ -531,14 +531,8 @@ _MBCSGetUnicodeSet(const UConverter *cnv, UChar32 c; - if(cnv->options&_MBCS_OPTION_GB18030) { - uset_addRange(set, 0, 0xd7ff); - uset_addRange(set, 0xe000, 0x10ffff); - return; - } - /* enumerate the from-Unicode trie table */ - mbcsTable=&cnv->sharedData->mbcs; + mbcsTable=&sharedData->mbcs; table=mbcsTable->fromUnicodeTable; if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { maxStage1=0x440; @@ -658,7 +652,20 @@ _MBCSGetUnicodeSet(const UConverter *cnv, } } - ucnv_extGetUnicodeSet(cnv, set, which, pErrorCode); + ucnv_extGetUnicodeSet(sharedData, set, which, pErrorCode); +} + +static void +_MBCSGetUnicodeSet(const UConverter *cnv, + USet *set, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + if(cnv->options&_MBCS_OPTION_GB18030) { + uset_addRange(set, 0, 0xd7ff); + uset_addRange(set, 0xe000, 0x10ffff); + } else { + _MBCSGetUnicodeSetForUnicode(cnv->sharedData, set, which, pErrorCode); + } } /* conversion extensions for input not in the main table -------------------- */ diff --git a/icu4c/source/common/ucnvmbcs.h b/icu4c/source/common/ucnvmbcs.h index c35335e7174..6efd3c2d11b 100644 --- a/icu4c/source/common/ucnvmbcs.h +++ b/icu4c/source/common/ucnvmbcs.h @@ -369,10 +369,24 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, * Does not empty the set first. */ U_CFUNC void -_MBCSGetUnicodeSetForBytes(const UConverter *cnv, +_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData, USet *set, UConverterUnicodeSet which, uint8_t state, int32_t lowByte, int32_t highByte, UErrorCode *pErrorCode); +/* + * Internal function returning a UnicodeSet for toUnicode() conversion. + * Currently only used for ISO-2022-CN, and only handles roundtrip mappings. + * In the future, if we add support for fallback sets, this function + * needs to be updated. + * Handles extensions. + * Does not empty the set first. + */ +U_CFUNC void +_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, + USet *set, + UConverterUnicodeSet which, + UErrorCode *pErrorCode); + #endif diff --git a/icu4c/source/common/ucnvscsu.c b/icu4c/source/common/ucnvscsu.c index 7b3901af19e..5d358842bcf 100644 --- a/icu4c/source/common/ucnvscsu.c +++ b/icu4c/source/common/ucnvscsu.c @@ -1969,8 +1969,7 @@ _SCSUSafeClone(const UConverter *cnv, } localClone = (struct cloneStruct *)stackBuffer; - uprv_memcpy(&localClone->cnv, cnv, sizeof(UConverter)); - localClone->cnv.isCopyLocal = TRUE; + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData)); localClone->cnv.extraInfo = &localClone->mydata; @@ -1980,9 +1979,6 @@ _SCSUSafeClone(const UConverter *cnv, } - - - static const UConverterImpl _SCSUImpl={ UCNV_SCSU, diff --git a/icu4c/source/common/unicode/ucnv.h b/icu4c/source/common/unicode/ucnv.h index 7aab950df19..362548cd33b 100644 --- a/icu4c/source/common/unicode/ucnv.h +++ b/icu4c/source/common/unicode/ucnv.h @@ -457,8 +457,18 @@ ucnv_safeClone(const UConverter *cnv, int32_t *pBufferSize, UErrorCode *status); -/** @stable ICU 2.0 */ -#define U_CNV_SAFECLONE_BUFFERSIZE 4096 +/** + * \def U_CNV_SAFECLONE_BUFFERSIZE + * Definition of a buffer size that is designed to be large enough for + * converters to be cloned with ucnv_safeClone(). + * @stable ICU 2.0 + */ +#ifdef OS400 + /* OS/400 uses 16-byte pointers, making objects larger */ +# define U_CNV_SAFECLONE_BUFFERSIZE 2048 +#else +# define U_CNV_SAFECLONE_BUFFERSIZE 1024 +#endif /** * Deletes the unicode converter and releases resources associated diff --git a/icu4c/source/test/cintltst/ccapitst.c b/icu4c/source/test/cintltst/ccapitst.c index 65c28e1645f..f0b5d3f069b 100644 --- a/icu4c/source/test/cintltst/ccapitst.c +++ b/icu4c/source/test/cintltst/ccapitst.c @@ -21,6 +21,7 @@ #include "unicode/ucnv_err.h" #include "unicode/utypes.h" #include "unicode/ustring.h" +#include "ucnv_bld.h" /* for sizeof(UConverter) */ #include "cintltst.h" #include "ccapitst.h" @@ -1560,17 +1561,56 @@ static void TestConvertSafeCloneCallback() } } +static UBool +containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { + while(length>0) { + if(*p!=b) { + return TRUE; + } + ++p; + --length; + } + return FALSE; +} + +static UBool +usedStackBuffer(const void *p, const void *q) { + return + (UBool) + (p==q || + ((const char *)p-(const char *)q)<16); +} + static void TestConvertSafeClone() { -#define CLONETEST_CONVERTER_COUNT 12 + /* one 'regular' & all the 'private stateful' converters */ + static const char *const names[] = { + "ibm-1047", + "ISO_2022,locale=zh,version=1", + "SCSU", + "HZ", + "lmbcs", + "ISCII,version=0", + "ISO_2022,locale=kr,version=1", + "ISO_2022,locale=jp,version=2", + "BOCU-1", + "UTF-7", + "IMAP-mailbox-name", + "ibm-1047-s390" + }; + + static const int32_t bufferSizes[] = { + U_CNV_SAFECLONE_BUFFERSIZE, + (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ + (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ + }; char charBuffer [21]; /* Leave at an odd number for alignment testing */ - uint8_t buffer [CLONETEST_CONVERTER_COUNT] [U_CNV_SAFECLONE_BUFFERSIZE]; - int32_t bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; - UConverter * someConverters [CLONETEST_CONVERTER_COUNT]; - UConverter * someClonedConverters [CLONETEST_CONVERTER_COUNT]; - UConverter * cnv; - UErrorCode err = U_ZERO_ERROR; + uint8_t buffer [3] [U_CNV_SAFECLONE_BUFFERSIZE]; + int32_t bufferSize, maxBufferSize; + const char *maxName; + UConverter * cnv, *cnv2; + UErrorCode err; char *pCharBuffer; const char *pConstCharBuffer; @@ -1584,139 +1624,172 @@ static void TestConvertSafeClone() UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer); const UChar * pUniBuffer; const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer); - int index; + int32_t index, j; - /* one 'regular' & all the 'private stateful' converters */ - someConverters[0] = ucnv_open("ibm-1047", &err); - someConverters[1] = ucnv_open("ISO_2022,locale=zh,version=1", &err); - someConverters[2] = ucnv_open("SCSU", &err); - someConverters[3] = ucnv_open("HZ", &err); - someConverters[4] = ucnv_open("lmbcs", &err); - someConverters[5] = ucnv_open("ISCII,version=0",&err); - someConverters[6] = ucnv_open("ISO_2022,locale=kr,version=1",&err); - someConverters[7] = ucnv_open("ISO_2022,locale=jp,version=1",&err); - someConverters[8] = ucnv_open("BOCU-1", &err); - someConverters[9] = ucnv_open("UTF-7", &err); - someConverters[10] = ucnv_open("IMAP-mailbox-name", &err); - someConverters[11] = ucnv_open("ibm-1047-s390", &err); + err = U_ZERO_ERROR; + cnv = ucnv_open(names[0], &err); + if(U_SUCCESS(err)) { + /* Check the various error & informational states: */ + + /* Null status - just returns NULL */ + bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; + if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, 0)) + { + log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); + } + /* error status - should return 0 & keep error the same */ + err = U_MEMORY_ALLOCATION_ERROR; + if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) + { + log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); + } + err = U_ZERO_ERROR; + + /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/ + if (0 != ucnv_safeClone(cnv, buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) + { + log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); + } + err = U_ZERO_ERROR; + + /* buffer size pointer is 0 - fill in pbufferSize with a size */ + bufferSize = 0; + if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) + { + log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); + } + /* Verify our define is large enough */ + if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) + { + log_err("FAIL: Pre-calculated buffer size is too small\n"); + } + /* Verify we can use this run-time calculated size */ + if (0 == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) + { + log_err("FAIL: Converter can't be cloned with run-time size\n"); + } + if (cnv2) { + ucnv_close(cnv2); + } + + /* size one byte too small - should allocate & let us know */ + --bufferSize; + if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) + { + log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); + } + if (cnv2) { + ucnv_close(cnv2); + } + + err = U_ZERO_ERROR; + bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; + + /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ + if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) + { + log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); + } + if (cnv2) { + ucnv_close(cnv2); + } + + err = U_ZERO_ERROR; - if(U_FAILURE(err)) { - log_data_err("problems creating converters to clone- check the data.\n"); - return; /* bail - leak */ - } - /* Check the various error & informational states: */ + /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ + if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) + { + log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); + } - /* Null status - just returns NULL */ - if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, 0)) - { - log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); - } - /* error status - should return 0 & keep error the same */ - err = U_MEMORY_ALLOCATION_ERROR; - if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) - { - log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); - } - err = U_ZERO_ERROR; - - /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/ - if (0 != ucnv_safeClone(someConverters[0], buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) - { - log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); - } - err = U_ZERO_ERROR; - - /* buffer size pointer is 0 - fill in pbufferSize with a size */ - bufferSize = 0; - if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) - { - log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); - } - /* Verify our define is large enough */ - if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) - { - log_err("FAIL: Pre-calculated buffer size is too small\n"); - } - /* Verify we can use this run-time calculated size */ - if (0 == (cnv = ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err)) || U_FAILURE(err)) - { - log_err("FAIL: Converter can't be cloned with run-time size\n"); - } - if (cnv) ucnv_close(cnv); - /* size one byte too small - should allocate & let us know */ - --bufferSize; - if (0 == (cnv = ucnv_safeClone(someConverters[0], 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) - { - log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); - } - if (cnv) - ucnv_close(cnv); - err = U_ZERO_ERROR; - bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; - - /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ - if (0 == (cnv = ucnv_safeClone(someConverters[0], 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) - { - log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); - } - if (cnv) - ucnv_close(cnv); - err = U_ZERO_ERROR; - - /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ - if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) - { - log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); } - err = U_ZERO_ERROR; + maxBufferSize = 0; + maxName = ""; /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ - for (index = 0; index < CLONETEST_CONVERTER_COUNT; index++) - { - bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; - someClonedConverters[index] = ucnv_safeClone(someConverters[index], buffer[index], &bufferSize, &err); - - /* close the original immediately to make sure that the clone works by itself */ - ucnv_close(someConverters[index]); - - pCharBuffer = charBuffer; - pUniBuffer = uniBuffer; - - ucnv_fromUnicode(someClonedConverters[index], - &pCharBuffer, - charBufferLimit, - &pUniBuffer, - uniBufferLimit, - NULL, - TRUE, - &err); - if(U_FAILURE(err)){ - log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); - } - ucnv_toUnicode(someClonedConverters[index], - &pUCharTarget, - pUCharTargetLimit, - &pCharSource, - pCharSourceLimit, - NULL, - TRUE, - &err - ); - - if(U_FAILURE(err)){ - log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); - } - - pConstCharBuffer = charBuffer; - if (uniBuffer [0] != ucnv_getNextUChar(someClonedConverters[index], &pConstCharBuffer, pCharBuffer, &err)) + for(j = 0; j < LENGTHOF(bufferSizes); ++j) { + for (index = 0; index < LENGTHOF(names); index++) { - log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); + err = U_ZERO_ERROR; + cnv = ucnv_open(names[index], &err); + if(U_FAILURE(err)) { + log_data_err("ucnv_open(\"%s\") failed - %s\n", names[index], u_errorName(err)); + continue; + } + + if(j == 0) { + /* preflight to get maxBufferSize */ + bufferSize = 0; + ucnv_safeClone(cnv, NULL, &bufferSize, &err); + if(bufferSize > maxBufferSize) { + maxBufferSize = bufferSize; + maxName = names[index]; + } + } + + memset(buffer, 0xaa, sizeof(buffer)); + + bufferSize = bufferSizes[j]; + cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err); + + /* close the original immediately to make sure that the clone works by itself */ + ucnv_close(cnv); + + /* check if the clone function overwrote any bytes that it is not supposed to touch */ + if( + usedStackBuffer(cnv2, buffer[1]) ? + bufferSize > bufferSizes[j] || + containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) || + containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) + : + containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa) + ) { + log_err("cloning %s overwrote bytes outside the bufferSize %d (requested %d)\n", + names[index], bufferSize, bufferSizes[j]); + } + + pCharBuffer = charBuffer; + pUniBuffer = uniBuffer; + + ucnv_fromUnicode(cnv2, + &pCharBuffer, + charBufferLimit, + &pUniBuffer, + uniBufferLimit, + NULL, + TRUE, + &err); + if(U_FAILURE(err)){ + log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); + } + ucnv_toUnicode(cnv2, + &pUCharTarget, + pUCharTargetLimit, + &pCharSource, + pCharSourceLimit, + NULL, + TRUE, + &err + ); + + if(U_FAILURE(err)){ + log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); + } + + pConstCharBuffer = charBuffer; + if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err)) + { + log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); + } + ucnv_close(cnv2); } - ucnv_close(someClonedConverters[index]); } + + log_info("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", + sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); } static void TestCCSID() { diff --git a/icu4c/source/test/cintltst/ncnvtst.c b/icu4c/source/test/cintltst/ncnvtst.c index ff687aeb61d..d16e5181f88 100644 --- a/icu4c/source/test/cintltst/ncnvtst.c +++ b/icu4c/source/test/cintltst/ncnvtst.c @@ -1732,7 +1732,10 @@ TestUnicodeSet() { "BOCU-1", "CESU-8", "gb18030", - "IMAP-mailbox-name", + "IMAP-mailbox-name" + }; + + static const char *const lmbcsNames[]={ "LMBCS-1", "LMBCS-2", "LMBCS-3", @@ -1806,6 +1809,29 @@ TestUnicodeSet() { ucnv_close(cnv); } + /* test LMBCS variants which convert all of Unicode except for U+F6xx */ + for(i=0; i