ICU-3343 fix and improve ucnv_safeClone() implementations

X-SVN-Rev: 14141
This commit is contained in:
Markus Scherer 2003-12-16 05:16:55 +00:00
parent 39d9921424
commit 7b5b479d09
14 changed files with 552 additions and 424 deletions

View file

@ -171,21 +171,10 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
ucnv_getName(cnv, status), cnv, stackBuffer);
/* Pointers on 64-bit platforms need to be aligned
* on a 64-bit boundry in memory.
*/
if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
*pBufferSize -= offsetUp;
stackBufferChars += offsetUp;
}
stackBuffer = (void *)stackBufferChars;
if (cnv->sharedData->impl->safeClone != NULL) {
/* call the custom safeClone function for sizing */
bufferSizeNeeded = 0;
cnv->sharedData->impl->safeClone(cnv, stackBuffer, &bufferSizeNeeded, status);
cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
}
else
{
@ -200,6 +189,22 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
}
/* Pointers on 64-bit platforms need to be aligned
* on a 64-bit boundary in memory.
*/
if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
if(*pBufferSize > offsetUp) {
*pBufferSize -= offsetUp;
stackBufferChars += offsetUp;
} else {
/* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
*pBufferSize = 1;
}
}
stackBuffer = (void *)stackBufferChars;
/* Now, see if we must allocate any memory */
if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL)
{
@ -224,6 +229,8 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
allocatedConverter = NULL;
}
uprv_memset(localConverter, 0, bufferSizeNeeded);
/* Copy initial state */
uprv_memcpy(localConverter, cnv, sizeof(UConverter));
localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
@ -255,8 +262,6 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
localConverter->isCopyLocal = TRUE;
}
localConverter->isExtraLocal = localConverter->isCopyLocal;
/* allow callback functions to handle any memory allocation */
toUArgs.converter = fromUArgs.converter = localConverter;
cbErr = U_ZERO_ERROR;

View file

@ -34,6 +34,7 @@
#include "unicode/uset.h"
#include "unicode/ucnv_err.h"
#include "unicode/ucnv_cb.h"
#include "ucnv_imp.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"
#include "ucnvmbcs.h"
@ -173,7 +174,7 @@ typedef struct{
#endif
Cnv2022Type currentType;
ISO2022State toU2022State, fromU2022State;
UConverter* myConverterArray[UCNV_2022_MAX_CONVERTERS];
UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
uint32_t key;
uint32_t version;
char locale[3];
@ -432,24 +433,23 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti
}
myConverterData->version= 0;
version = options & UCNV_OPTIONS_VERSION_MASK;
myConverterData->myConverterArray[0] =NULL;
if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
(myLocale[2]=='_' || myLocale[2]=='\0')){
int len=0;
/* open the required converters and cache them */
if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
myConverterData->myConverterArray[ISO8859_7]= ucnv_open("ISO8859_7", errorCode);
myConverterData->myConverterArray[ISO8859_7]= ucnv_loadSharedData("ISO8859_7", NULL, errorCode);
}
myConverterData->myConverterArray[JISX201] = ucnv_open("jisx-201", errorCode);
myConverterData->myConverterArray[JISX208] = ucnv_open("jisx-208", errorCode);
myConverterData->myConverterArray[JISX201] = ucnv_loadSharedData("jisx-201", NULL, errorCode);
myConverterData->myConverterArray[JISX208] = ucnv_loadSharedData("jisx-208", NULL, errorCode);
if(jpCharsetMasks[version]&CSM(JISX212)) {
myConverterData->myConverterArray[JISX212] = ucnv_open("jisx-212", errorCode);
myConverterData->myConverterArray[JISX212] = ucnv_loadSharedData("jisx-212", NULL, errorCode);
}
if(jpCharsetMasks[version]&CSM(GB2312)) {
myConverterData->myConverterArray[GB2312] = ucnv_open("ibm-5478", errorCode); /* gb_2312_80-1 */
myConverterData->myConverterArray[GB2312] = ucnv_loadSharedData("ibm-5478", NULL, errorCode); /* gb_2312_80-1 */
}
if(jpCharsetMasks[version]&CSM(KSC5601)) {
myConverterData->myConverterArray[KSC5601] = ucnv_open("ksc_5601", errorCode);
myConverterData->myConverterArray[KSC5601] = ucnv_loadSharedData("ksc_5601", NULL, errorCode);
}
/* set the function pointers to appropriate funtions */
@ -491,11 +491,11 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti
(myLocale[2]=='_' || myLocale[2]=='\0')){
/* open the required converters and cache them */
myConverterData->myConverterArray[GB2312_1] = ucnv_open("ibm-5478",errorCode);
myConverterData->myConverterArray[GB2312_1] = ucnv_loadSharedData("ibm-5478", NULL, errorCode);
if(version==1) {
myConverterData->myConverterArray[ISO_IR_165] = ucnv_open("iso-ir-165",errorCode);
myConverterData->myConverterArray[ISO_IR_165] = ucnv_loadSharedData("iso-ir-165", NULL, errorCode);
}
myConverterData->myConverterArray[CNS_11643] = ucnv_open("cns-11643-1992",errorCode);
myConverterData->myConverterArray[CNS_11643] = ucnv_loadSharedData("cns-11643-1992", NULL, errorCode);
/* set the function pointers to appropriate funtions */
@ -528,6 +528,10 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti
}
cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
if(U_FAILURE(*errorCode)) {
_ISO2022Close(cnv);
}
} else {
*errorCode = U_MEMORY_ALLOCATION_ERROR;
}
@ -537,24 +541,22 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti
static void
_ISO2022Close(UConverter *converter) {
UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
UConverter **array = myData->myConverterArray;
UConverterSharedData **array = myData->myConverterArray;
int32_t i;
if (converter->extraInfo != NULL) {
/*close the array of converter pointers and free the memory*/
for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
if(array[i]!=NULL) {
if(array[i]==myData->currentConverter) {
myData->currentConverter=NULL;
}
ucnv_close(array[i]);
ucnv_unloadSharedDataIfReady(array[i]);
}
}
ucnv_close(myData->currentConverter); /* if not closed above */
ucnv_close(myData->currentConverter);
if(!converter->isExtraLocal){
uprv_free (converter->extraInfo);
converter->extraInfo = NULL;
}
}
}
@ -1435,7 +1437,7 @@ getTrail:
case JISX201:
/* G0 SBCS */
MBCS_SINGLE_FROM_UCHAR32(
converterData->myConverterArray[cs]->sharedData,
converterData->myConverterArray[cs],
sourceChar, &targetValue,
useFallback);
if(targetValue <= 0x7f) {
@ -1445,7 +1447,7 @@ getTrail:
case ISO8859_7:
/* G0 SBCS forced to 7-bit output */
MBCS_SINGLE_FROM_UCHAR32(
converterData->myConverterArray[cs]->sharedData,
converterData->myConverterArray[cs],
sourceChar, &targetValue,
useFallback);
if(0x80 <= targetValue && targetValue <= 0xff) {
@ -1457,7 +1459,7 @@ getTrail:
default:
/* G0 DBCS */
MBCS_FROM_UCHAR32_ISO2022(
converterData->myConverterArray[cs]->sharedData,
converterData->myConverterArray[cs],
sourceChar, &targetValue,
useFallback, &len, MBCS_OUTPUT_2);
if(len != 2) {
@ -1737,7 +1739,7 @@ escape:
/* convert mySourceChar+0x80 to use a normal 8-bit table */
targetUniChar =
_MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
myData->myConverterArray[cs]->sharedData,
myData->myConverterArray[cs],
mySourceChar + 0x80);
}
/* return from a single-shift state to the previous one */
@ -1747,7 +1749,7 @@ escape:
if(mySourceChar <= 0x7f) {
targetUniChar =
_MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
myData->myConverterArray[cs]->sharedData,
myData->myConverterArray[cs],
mySourceChar);
}
break;
@ -1765,7 +1767,7 @@ getTrailByte:
tempBuf[0] = (char) (mySourceChar);
tempBuf[1] = trailByte = *mySource++;
mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
targetUniChar = _MBCSSimpleGetNextUChar(myData->myConverterArray[cs]->sharedData, tempBuf, 2, FALSE);
targetUniChar = _MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
} else {
args->converter->toUBytes[0] = (uint8_t)mySourceChar;
args->converter->toULength = 1;
@ -2452,7 +2454,7 @@ getTrail:
}
else{
/* convert U+0080..U+10ffff */
UConverter *cnv;
UConverterSharedData *cnv;
int32_t i;
int8_t cs, g;
@ -2507,7 +2509,7 @@ getTrail:
if(cs > 0) {
if(cs > CNS_11643_0) {
cnv = converterData->myConverterArray[CNS_11643];
MBCS_FROM_UCHAR32_ISO2022(cnv->sharedData,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_3);
MBCS_FROM_UCHAR32_ISO2022(cnv,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_3);
if(len==3) {
cs = (int8_t)(CNS_11643_0 + (targetValue >> 16) - 0x80);
len = 2;
@ -2525,7 +2527,7 @@ getTrail:
} else {
/* GB2312_1 or ISO-IR-165 */
cnv = converterData->myConverterArray[cs];
MBCS_FROM_UCHAR32_ISO2022(cnv->sharedData,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_2);
MBCS_FROM_UCHAR32_ISO2022(cnv,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_2);
g = 1; /* used if len == 2 */
}
}
@ -2740,7 +2742,7 @@ escape:
/* convert one or two bytes */
if(pToU2022State->g != 0) {
if(mySource < mySourceLimit) {
UConverter *cnv;
UConverterSharedData *cnv;
StateEnum tempState;
int32_t tempBufLen;
char trailByte;
@ -2765,7 +2767,7 @@ getTrailByte:
/* return from a single-shift state to the previous one */
pToU2022State->g=pToU2022State->prevG;
}
targetUniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, tempBuf, tempBufLen, FALSE);
targetUniChar = _MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
} else {
args->converter->toUBytes[0] = (uint8_t)mySourceChar;
args->converter->toULength = 1;
@ -2921,9 +2923,7 @@ struct cloneStruct
{
UConverter cnv;
UConverterDataISO2022 mydata;
UConverter currentCnv; /**< for ISO_2022 converter if the current converter is open */
UConverter clonedConverters[1]; /* Actually a variable sized array for all of the sub converters to be cloned. */
UConverter currentConverter;
};
@ -2935,89 +2935,42 @@ _ISO_2022_SafeClone(
UErrorCode *status)
{
struct cloneStruct * localClone;
int32_t bufferSizeNeeded = sizeof(struct cloneStruct);
UConverterDataISO2022* cnvData = (UConverterDataISO2022*)cnv->extraInfo;
int32_t i;
int32_t sizes[UCNV_2022_MAX_CONVERTERS];
int32_t numConverters = 0;
int32_t currentConverterIndex = -1;
int32_t currentConverterSize = 0;
char *ptr; /* buffer pointer */
if (U_FAILURE(*status)) {
return 0;
}
for(i=0;(i<UCNV_2022_MAX_CONVERTERS)&&cnvData->myConverterArray[i];i++) {
int32_t size;
size = 0;
ucnv_safeClone(cnvData->myConverterArray[i], NULL, &size, status);
bufferSizeNeeded += size;
sizes[i] = size;
numConverters++;
if(cnvData->currentConverter == cnvData->myConverterArray[i]) {
currentConverterIndex = i;
}
}
if(currentConverterIndex == -1) { /* -1 means - not found in array. Clone separately */
currentConverterSize = 0;
if(cnvData->currentConverter) {
ucnv_safeClone(cnvData->currentConverter, NULL, &currentConverterSize, status);
bufferSizeNeeded += currentConverterSize;
}
}
for(;i<UCNV_2022_MAX_CONVERTERS;i++) { /* zero the other sizes */
sizes[i]=0;
}
UConverterDataISO2022 *cnvData;
int32_t i, size;
if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
*pBufferSize = bufferSizeNeeded;
return 0;
}
if(*pBufferSize < bufferSizeNeeded) {
*status = U_BUFFER_OVERFLOW_ERROR;
return 0;
*pBufferSize = (int32_t)sizeof(struct cloneStruct);
return NULL;
}
cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
localClone = (struct cloneStruct *)stackBuffer;
uprv_memcpy(&localClone->cnv, cnv, sizeof(UConverter));
uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISO2022));
/* ucnv.c/ucnv_safeClone() copied the main UConverter already */
/* clone back sub cnvs */
uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
ptr = (char*)&localClone->clonedConverters;
for(i=0;i<numConverters;i++) {
int32_t size;
size = sizes[i];
localClone->mydata.myConverterArray[i] = ucnv_safeClone(cnvData->myConverterArray[i], (UConverter*)ptr, &size, status);
ptr += size;
}
for(;i<UCNV_2022_MAX_CONVERTERS;i++) {
localClone->mydata.myConverterArray[i] = NULL;
}
/* share the subconverters */
if(currentConverterIndex == -1) { /* -1 = not found in list */
/* KR version 1 also uses the state in currentConverter for preserving state
* so we need to clone it too!
*/
if(cnvData->currentConverter) {
localClone->mydata.currentConverter = ucnv_safeClone(cnvData->currentConverter, ptr, &currentConverterSize, status);
ptr += currentConverterSize;
} else {
localClone->mydata.currentConverter = NULL;
if(cnvData->currentConverter != NULL) {
size = (int32_t)sizeof(UConverter);
localClone->mydata.currentConverter =
ucnv_safeClone(cnvData->currentConverter,
&localClone->currentConverter,
&size, status);
if(U_FAILURE(*status)) {
return NULL;
}
}
for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
if(cnvData->myConverterArray[i] != NULL) {
ucnv_incrementRefCount(cnvData->myConverterArray[i]);
}
} else {
localClone->mydata.currentConverter = localClone->mydata.myConverterArray[currentConverterIndex];
}
localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
localClone->cnv.isExtraLocal = TRUE;
return &localClone->cnv;
}
@ -3028,7 +2981,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
UErrorCode *pErrorCode)
{
int32_t i;
USet *cnvSet;
UConverterDataISO2022* cnvData;
if (U_FAILURE(*pErrorCode)) {
@ -3044,10 +2996,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
#endif
cnvData = (UConverterDataISO2022*)cnv->extraInfo;
if (cnv->sharedData == &_ISO2022KRData && cnvData->currentConverter != NULL) {
ucnv_getUnicodeSet(cnvData->currentConverter, set, which, pErrorCode);
return;
}
/* open a set and initialize it with code points that are algorithmically round-tripped */
switch(cnvData->locale[0]){
@ -3077,13 +3025,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
break;
}
/* open a helper set because ucnv_getUnicodeSet() first empties its result set */
cnvSet = uset_open(1, 0);
if (!cnvSet) {
*pErrorCode =U_MEMORY_ALLOCATION_ERROR;
return;
}
/*
* TODO: need to make this version-specific for CN.
* CN version 0 does not map CNS planes 3..7 although
@ -3103,12 +3044,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
0, 0x81, 0x82,
pErrorCode);
} else {
ucnv_getUnicodeSet(cnvData->myConverterArray[i], cnvSet, which, pErrorCode);
uset_addAll(set, cnvSet /* pErrorCode */);
_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], set, which, pErrorCode);
}
}
}
uset_close(cnvSet);
}
static const UConverterImpl _ISO2022Impl={

View file

@ -554,32 +554,26 @@ ucnv_unload(UConverterSharedData *sharedData) {
void
ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData)
{
umtx_lock(&cnvCacheMutex);
/*
Double checking doesn't work on some platforms.
Don't check referenceCounter outside of a mutex block.
TODO We should be able to check for ~0 outside of the mutex,
improving performance for opening and closing of algorithmic converters.
Checking whether it's an algorithic converter is okay
in multithreaded applications because the value never changes.
Don't check referenceCounter for any other value.
*/
if (sharedData->referenceCounter != ~0) {
if(sharedData != NULL && sharedData->referenceCounter != ~0) {
umtx_lock(&cnvCacheMutex);
ucnv_unload(sharedData);
umtx_unlock(&cnvCacheMutex);
}
umtx_unlock(&cnvCacheMutex);
}
void
ucnv_incrementRefCount(UConverterSharedData *sharedData)
{
umtx_lock(&cnvCacheMutex);
/*
Double checking doesn't work on some platforms.
Don't check referenceCounter outside of a mutex block.
*/
if (sharedData->referenceCounter != ~0) {
if(sharedData != NULL && sharedData->referenceCounter != ~0) {
umtx_lock(&cnvCacheMutex);
sharedData->referenceCounter++;
umtx_unlock(&cnvCacheMutex);
}
umtx_unlock(&cnvCacheMutex);
}
static void
@ -663,60 +657,58 @@ parseConverterOptions(const char *inName,
* -Call dataConverter initializer (Data=TRUE, Cached=TRUE)
* -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE)
*/
UConverter *
ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err)
{
char cnvName[UCNV_MAX_CONVERTER_NAME_LENGTH], locale[ULOC_FULLNAME_CAPACITY];
const char *realName;
UConverterSharedData *
ucnv_loadSharedData(const char *converterName, UConverterLookupData *lookup, UErrorCode * err) {
UConverterLookupData stackLookup;
UConverterSharedData *mySharedConverterData = NULL;
UErrorCode internalErrorCode = U_ZERO_ERROR;
uint32_t options = 0;
UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN);
if (U_FAILURE (*err)) {
goto exitError;
return NULL;
}
UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName);
if(lookup == NULL) {
lookup = &stackLookup;
}
locale[0] = 0;
lookup->locale[0] = 0;
lookup->options = 0;
/* In case "name" is NULL we want to open the default converter. */
if (converterName == NULL) {
realName = ucnv_io_getDefaultConverterName();
if (realName == NULL) {
lookup->realName = ucnv_io_getDefaultConverterName();
if (lookup->realName == NULL) {
*err = U_MISSING_RESOURCE_ERROR;
goto exitError;
return NULL;
}
/* the default converter name is already canonical */
} else {
/* separate the converter name from the options */
parseConverterOptions(converterName, cnvName, locale, &options, err);
parseConverterOptions(converterName, lookup->cnvName, lookup->locale, &lookup->options, err);
if (U_FAILURE(*err)) {
/* Very bad name used. */
goto exitError;
return NULL;
}
/* get the canonical converter name */
realName = ucnv_io_getConverterName(cnvName, &internalErrorCode);
if (U_FAILURE(internalErrorCode) || realName == NULL) {
lookup->realName = ucnv_io_getConverterName(lookup->cnvName, &internalErrorCode);
if (U_FAILURE(internalErrorCode) || lookup->realName == NULL) {
/*
* set the input name in case the converter was added
* without updating the alias table, or when there is no alias table
*/
realName = cnvName;
lookup->realName = lookup->cnvName;
}
}
/* separate the converter name from the options */
if(realName != cnvName) {
parseConverterOptions(realName, cnvName, locale, &options, err);
realName = cnvName;
if(lookup->realName != lookup->cnvName) {
parseConverterOptions(lookup->realName, lookup->cnvName, lookup->locale, &lookup->options, err);
lookup->realName = lookup->cnvName;
}
/* get the shared data for an algorithmic converter, if it is one */
mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(realName);
mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(lookup->realName);
if (mySharedConverterData == NULL)
{
/* it is a data-based converter, get its shared data. */
@ -728,40 +720,51 @@ ucnv_createConverter(UConverter *myUConverter, const char *converterName, UError
args.size=sizeof(UConverterLoadArgs);
args.nestedLoads=1;
args.options=options;
args.options=lookup->options;
args.pkg=NULL;
args.name=realName;
args.name=lookup->realName;
umtx_lock(&cnvCacheMutex);
mySharedConverterData = ucnv_load(&args, err);
umtx_unlock(&cnvCacheMutex);
if (U_FAILURE (*err) || (mySharedConverterData == NULL))
{
goto exitError;
return NULL;
}
}
myUConverter = ucnv_createConverterFromSharedData(myUConverter, mySharedConverterData, realName, locale, options, err);
return mySharedConverterData;
}
if (U_FAILURE(*err))
{
/*
Checking whether it's an algorithic converter is okay
in multithreaded applications because the value never changes.
Don't check referenceCounter for any other value.
*/
if (mySharedConverterData->referenceCounter != ~0) {
umtx_lock(&cnvCacheMutex);
--mySharedConverterData->referenceCounter;
umtx_unlock(&cnvCacheMutex);
UConverter *
ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err)
{
UConverterLookupData stackLookup;
UConverterSharedData *mySharedConverterData;
UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN);
if(U_SUCCESS(*err)) {
UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName);
mySharedConverterData = ucnv_loadSharedData(converterName, &stackLookup, err);
if(U_SUCCESS(*err)) {
myUConverter = ucnv_createConverterFromSharedData(
myUConverter, mySharedConverterData,
stackLookup.realName, stackLookup.locale, stackLookup.options,
err);
if(U_SUCCESS(*err)) {
UTRACE_EXIT_PTR_STATUS(myUConverter, *err);
return myUConverter;
} else {
ucnv_unloadSharedDataIfReady(mySharedConverterData);
}
}
goto exitError;
}
UTRACE_EXIT_PTR_STATUS(myUConverter, *err);
return myUConverter;
exitError:
/* exit with error */
UTRACE_EXIT_STATUS(*err);
return NULL;
}

View file

@ -216,15 +216,6 @@ struct UConverter {
U_CDECL_END /* end of UConverter */
typedef struct
{
UConverter *OptGrpConverter[0x20]; /* Converter per Opt. grp. */
uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */
uint8_t localeConverterIndex; /* reasonable locale match for index */
}
UConverterDataLMBCS;
#define CONVERTER_FILE_EXTENSION ".cnv"
/**

View file

@ -930,7 +930,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,
}
static void
ucnv_extGetUnicodeSetString(const UConverter *cnv,
ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
const int32_t *cx,
USet *set,
UConverterUnicodeSet which,
@ -974,7 +974,7 @@ ucnv_extGetUnicodeSetString(const UConverter *cnv,
/* no mapping, do nothing */
} else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
ucnv_extGetUnicodeSetString(
cnv, cx, set, which, minLength,
sharedData, cx, set, which, minLength,
U_SENTINEL, s, length+1,
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
pErrorCode);
@ -988,7 +988,7 @@ ucnv_extGetUnicodeSetString(const UConverter *cnv,
}
U_CFUNC void
ucnv_extGetUnicodeSet(const UConverter *cnv,
ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
USet *set,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
@ -1003,7 +1003,7 @@ ucnv_extGetUnicodeSet(const UConverter *cnv,
UChar32 c;
int32_t length;
cx=cnv->sharedData->mbcs.extIndexes;
cx=sharedData->mbcs.extIndexes;
if(cx==NULL) {
return;
}
@ -1017,7 +1017,7 @@ ucnv_extGetUnicodeSet(const UConverter *cnv,
/* enumerate the from-Unicode trie table */
c=0; /* keep track of the current code point while enumerating */
if(cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) {
if(sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) {
/* DBCS-only, ignore single-byte results */
minLength=2;
} else {
@ -1051,7 +1051,7 @@ ucnv_extGetUnicodeSet(const UConverter *cnv,
length=0;
U16_APPEND_UNSAFE(s, length, c);
ucnv_extGetUnicodeSetString(
cnv, cx, set, which, minLength,
sharedData, cx, set, which, minLength,
c, s, length,
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
pErrorCode);

View file

@ -379,7 +379,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv,
UErrorCode *pErrorCode);
U_CFUNC void
ucnv_extGetUnicodeSet(const UConverter *cnv,
ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
USet *set,
UConverterUnicodeSet which,
UErrorCode *pErrorCode);

View file

@ -21,6 +21,7 @@
#define UCNV_IMP_H
#include "unicode/utypes.h"
#include "unicode/uloc.h"
#include "ucnv_bld.h"
/* figures out if we need to go to file to read in the data tables.
@ -55,6 +56,21 @@ ucnv_createConverterFromSharedData(UConverter *myUConverter, UConverterSharedDat
UConverter* ucnv_createConverterFromPackage(const char *packageName, const char *converterName,
UErrorCode *err);
typedef struct {
char cnvName[UCNV_MAX_CONVERTER_NAME_LENGTH], locale[ULOC_FULLNAME_CAPACITY];
const char *realName;
uint32_t options;
} UConverterLookupData;
/**
* Load a converter but do not create a UConverter object.
* Simply return the UConverterSharedData.
* Performs alias lookup etc.
* @internal
*/
UConverterSharedData *
ucnv_loadSharedData(const char *converterName, UConverterLookupData *lookup, UErrorCode * err);
/**
* This may unload the shared data in a thread safe manner.
* This will only unload the data if no other converters are sharing it.

View file

@ -27,13 +27,18 @@
#if !UCONFIG_NO_LEGACY_CONVERSION
#include "unicode/ucnv_err.h"
#include "unicode/ucnv.h"
#include "unicode/uset.h"
#include "cmemory.h"
#include "cstring.h"
#include "unicode/ucnv_err.h"
#include "uassert.h"
#include "ucnv_imp.h"
#include "ucnv_bld.h"
#include "unicode/ucnv.h"
#include "ucnv_cnv.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
/*
LMBCS
@ -219,7 +224,13 @@ Because of the extensive use of other character sets, the LMBCS converter
keeps a mapping between optimization groups and IBM character sets, so that
ICU converters can be created and used as needed. */
static const char * const OptGroupByteToCPName[ULMBCS_CTRLOFFSET] = {
/* As you can see, even though any byte below 0x20 could be an optimization
byte, only those at 0x13 or below can map to an actual converter. To limit
some loops and searches, we define a value for that last group converter:*/
#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */
static const char * const OptGroupByteToCPName[ULMBCS_GRP_LAST + 1] = {
/* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */
/* 0x0001 */ "ibm-850",
/* 0x0002 */ "ibm-851",
@ -245,12 +256,6 @@ static const char * const OptGroupByteToCPName[ULMBCS_CTRLOFFSET] = {
and 0x0019, the 1-2-3 system range control char */
};
/* As you can see, even though any byte below 0x20 could be an optimization
byte, only those at 0x13 or below can map to an actual converter. To limit
some loops and searches, we define a value for that last group converter:*/
#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */
/* That's approximately all the data that's needed for translating
LMBCS to Unicode.
@ -506,6 +511,13 @@ FindLMBCSLocale(const char *LocaleID)
the definitions of these structures, see unicode\ucnv_bld.h
*/
typedef struct
{
UConverterSharedData *OptGrpConverter[ULMBCS_GRP_LAST+1]; /* Converter per Opt. grp. */
uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */
uint8_t localeConverterIndex; /* reasonable locale match for index */
}
UConverterDataLMBCS;
#define DECLARE_LMBCS_DATA(n) \
@ -523,8 +535,8 @@ static const UConverterImpl _LMBCSImpl##n={\
NULL,\
NULL,\
NULL,\
NULL,\
ucnv_getCompleteUnicodeSet\
_LMBCSSafeClone,\
_LMBCSGetUnicodeSet\
};\
static const UConverterStaticData _LMBCSStaticData##n={\
sizeof(UConverterStaticData),\
@ -559,21 +571,32 @@ _LMBCSOpenWorker(UConverter* _this,
ulmbcs_byte_t OptGroup
)
{
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS));
if(extraInfo != NULL)
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS));
if(extraInfo != NULL)
{
ulmbcs_byte_t i;
ulmbcs_byte_t imax;
imax = sizeof(extraInfo->OptGrpConverter)/sizeof(extraInfo->OptGrpConverter[0]);
ulmbcs_byte_t i;
for (i=0; i < imax; i++)
{
extraInfo->OptGrpConverter[i] =
(OptGroupByteToCPName[i] != NULL) ?
ucnv_open(OptGroupByteToCPName[i], err) : NULL;
}
extraInfo->OptGroup = OptGroup;
extraInfo->localeConverterIndex = FindLMBCSLocale(locale);
uprv_memset(extraInfo, 0, sizeof(UConverterDataLMBCS));
for (i=0; i <= ULMBCS_GRP_LAST && U_SUCCESS(*err); i++)
{
if(OptGroupByteToCPName[i] != NULL) {
extraInfo->OptGrpConverter[i] = ucnv_loadSharedData(OptGroupByteToCPName[i], NULL, err);
}
}
if(U_SUCCESS(*err)) {
extraInfo->OptGroup = OptGroup;
extraInfo->localeConverterIndex = FindLMBCSLocale(locale);
} else {
/* one of the subconverters could not be loaded, unload the previous ones */
while(i > 0) {
if(extraInfo->OptGrpConverter[--i] != NULL) {
ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[i]);
extraInfo->OptGrpConverter[i] = NULL;
}
}
}
}
else
{
@ -590,25 +613,62 @@ _LMBCSClose(UConverter * _this)
ulmbcs_byte_t Ix;
UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo;
for (Ix=0; Ix < ULMBCS_GRP_UNICODE; Ix++)
for (Ix=0; Ix <= ULMBCS_GRP_LAST; Ix++)
{
if (extraInfo->OptGrpConverter[Ix] != NULL)
ucnv_close (extraInfo->OptGrpConverter[Ix]);
ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[Ix]);
}
uprv_free (_this->extraInfo);
}
}
/*
Here's an all-crash stop for debugging, since ICU does not have asserts.
Turn this on by defining LMBCS_DEBUG, or by changing it to
#if 1
*/
#if LMBCS_DEBUG
#define MyAssert(b) {if (!(b)) {*(char *)0 = 1;}}
#else
#define MyAssert(b)
#endif
typedef struct LMBCSClone {
UConverter cnv;
UConverterDataLMBCS lmbcs;
} LMBCSClone;
static UConverter *
_LMBCSSafeClone(const UConverter *cnv,
void *stackBuffer,
int32_t *pBufferSize,
UErrorCode *status) {
LMBCSClone *newLMBCS;
UConverterDataLMBCS *extraInfo;
int32_t i;
if(*pBufferSize<=0) {
*pBufferSize=(int32_t)sizeof(LMBCSClone);
return NULL;
}
extraInfo=(UConverterDataLMBCS *)cnv->extraInfo;
newLMBCS=(LMBCSClone *)stackBuffer;
/* ucnv.c/ucnv_safeClone() copied the main UConverter already */
uprv_memcpy(&newLMBCS->lmbcs, extraInfo, sizeof(UConverterDataLMBCS));
/* share the subconverters */
for(i = 0; i <= ULMBCS_GRP_LAST; ++i) {
if(extraInfo->OptGrpConverter[i] != NULL) {
ucnv_incrementRefCount(extraInfo->OptGrpConverter[i]);
}
}
newLMBCS->cnv.extraInfo = &newLMBCS->lmbcs;
newLMBCS->cnv.isExtraLocal = TRUE;
return &newLMBCS->cnv;
}
U_CFUNC void
_LMBCSGetUnicodeSet(const UConverter *cnv,
USet *set,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
/* all but U+F6xx, see LMBCS explanation above (search for F6xx) */
uset_addRange(set, 0, 0xf5ff);
uset_addRange(set, 0xf700, 0x10ffff);
}
/*
Here's the basic helper function that we use when converting from
@ -628,33 +688,21 @@ LMBCSConversionWorker (
)
{
ulmbcs_byte_t * pLMBCS = pStartLMBCS;
UConverter * xcnv = extraInfo->OptGrpConverter[group];
UConverterSharedData * xcnv = extraInfo->OptGrpConverter[group];
int bytesConverted;
uint32_t value;
ulmbcs_byte_t firstByte;
MyAssert(xcnv);
MyAssert(group<ULMBCS_GRP_UNICODE);
U_ASSERT(xcnv);
U_ASSERT(group<ULMBCS_GRP_UNICODE);
bytesConverted = _MBCSFromUChar32(xcnv->sharedData, *pUniChar, &value, FALSE);
bytesConverted = _MBCSFromUChar32(xcnv, *pUniChar, &value, FALSE);
/* get the first result byte */
switch(bytesConverted)
{
case 4:
firstByte = (ulmbcs_byte_t)(value >> 24);
break;
case 3:
firstByte = (ulmbcs_byte_t)(value >> 16);
break;
case 2:
firstByte = (ulmbcs_byte_t)(value >> 8);
break;
case 1:
firstByte = (ulmbcs_byte_t)value;
break;
default:
if(bytesConverted > 0) {
firstByte = (ulmbcs_byte_t)(value >> ((bytesConverted - 1) * 8));
} else {
/* most common failure mode is an unassigned character */
groups_tried[group] = TRUE;
return 0;
@ -665,7 +713,7 @@ LMBCSConversionWorker (
/* All initial byte values in lower ascii range should have been caught by now,
except with the exception group.
*/
MyAssert((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT));
U_ASSERT((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT));
/* use converted data: first write 0, 1 or two group bytes */
if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group)
@ -1002,7 +1050,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
{
UConverterDataLMBCS * extraInfo;
ulmbcs_byte_t group;
UConverter* cnv;
UConverterSharedData *cnv;
if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */
{
@ -1038,11 +1086,11 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
if (*args->source == group) {
/* single byte */
++args->source;
uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source, 1, FALSE);
uniChar = _MBCSSimpleGetNextUChar(cnv, args->source, 1, FALSE);
++args->source;
} else {
/* double byte */
uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source, 2, FALSE);
uniChar = _MBCSSimpleGetNextUChar(cnv, args->source, 2, FALSE);
args->source += 2;
}
}
@ -1052,7 +1100,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
if (CurByte >= ULMBCS_C1START)
{
uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv->sharedData, CurByte);
uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte);
}
else
{
@ -1067,7 +1115,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
/* Lookup value must include opt group */
bytes[0] = group;
bytes[1] = CurByte;
uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, bytes, 2, FALSE);
uniChar = _MBCSSimpleGetNextUChar(cnv, bytes, 2, FALSE);
}
}
}
@ -1078,24 +1126,24 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args,
cnv = extraInfo->OptGrpConverter[group];
if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */
{
if (!_MBCSIsLeadByte(cnv->sharedData, CurByte))
if (!_MBCSIsLeadByte(cnv, CurByte))
{
CHECK_SOURCE_LIMIT(0);
/* let the MBCS conversion consume CurByte again */
uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source - 1, 1, FALSE);
uniChar = _MBCSSimpleGetNextUChar(cnv, args->source - 1, 1, FALSE);
}
else
{
CHECK_SOURCE_LIMIT(1);
/* let the MBCS conversion consume CurByte again */
uniChar = _MBCSSimpleGetNextUChar(cnv->sharedData, args->source - 1, 2, FALSE);
uniChar = _MBCSSimpleGetNextUChar(cnv, args->source - 1, 2, FALSE);
++args->source;
}
}
else /* single byte conversion */
{
uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv->sharedData, CurByte);
uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte);
}
}
}

View file

@ -428,7 +428,7 @@ _MBCSSizeofFromUBytes(UConverterMBCSTable *mbcsTable) {
/* similar to _MBCSGetNextUChar() but recursive */
static void
_getUnicodeSetForBytes(const UConverter *cnv,
_getUnicodeSetForBytes(const UConverterSharedData *sharedData,
const int32_t (*stateTable)[256], const uint16_t *unicodeCodeUnits,
USet *set,
UConverterUnicodeSet which,
@ -441,7 +441,7 @@ _getUnicodeSetForBytes(const UConverter *cnv,
entry=stateTable[state][b];
if(MBCS_ENTRY_IS_TRANSITION(entry)) {
_getUnicodeSetForBytes(
cnv, stateTable, unicodeCodeUnits,
sharedData, stateTable, unicodeCodeUnits,
set, which,
(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry),
offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
@ -506,24 +506,24 @@ _getUnicodeSetForBytes(const UConverter *cnv,
* Does not empty the set first.
*/
U_CFUNC void
_MBCSGetUnicodeSetForBytes(const UConverter *cnv,
_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
USet *set,
UConverterUnicodeSet which,
uint8_t state, int32_t lowByte, int32_t highByte,
UErrorCode *pErrorCode) {
_getUnicodeSetForBytes(
cnv, cnv->sharedData->mbcs.stateTable, cnv->sharedData->mbcs.unicodeCodeUnits,
sharedData, sharedData->mbcs.stateTable, sharedData->mbcs.unicodeCodeUnits,
set, which,
state, 0, lowByte, highByte,
pErrorCode);
}
static void
_MBCSGetUnicodeSet(const UConverter *cnv,
USet *set,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
UConverterMBCSTable *mbcsTable;
U_CFUNC void
_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
USet *set,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
const UConverterMBCSTable *mbcsTable;
const uint16_t *table;
uint32_t st3;
@ -531,14 +531,8 @@ _MBCSGetUnicodeSet(const UConverter *cnv,
UChar32 c;
if(cnv->options&_MBCS_OPTION_GB18030) {
uset_addRange(set, 0, 0xd7ff);
uset_addRange(set, 0xe000, 0x10ffff);
return;
}
/* enumerate the from-Unicode trie table */
mbcsTable=&cnv->sharedData->mbcs;
mbcsTable=&sharedData->mbcs;
table=mbcsTable->fromUnicodeTable;
if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
maxStage1=0x440;
@ -658,7 +652,20 @@ _MBCSGetUnicodeSet(const UConverter *cnv,
}
}
ucnv_extGetUnicodeSet(cnv, set, which, pErrorCode);
ucnv_extGetUnicodeSet(sharedData, set, which, pErrorCode);
}
static void
_MBCSGetUnicodeSet(const UConverter *cnv,
USet *set,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
if(cnv->options&_MBCS_OPTION_GB18030) {
uset_addRange(set, 0, 0xd7ff);
uset_addRange(set, 0xe000, 0x10ffff);
} else {
_MBCSGetUnicodeSetForUnicode(cnv->sharedData, set, which, pErrorCode);
}
}
/* conversion extensions for input not in the main table -------------------- */

View file

@ -369,10 +369,24 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
* Does not empty the set first.
*/
U_CFUNC void
_MBCSGetUnicodeSetForBytes(const UConverter *cnv,
_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
USet *set,
UConverterUnicodeSet which,
uint8_t state, int32_t lowByte, int32_t highByte,
UErrorCode *pErrorCode);
/*
* Internal function returning a UnicodeSet for toUnicode() conversion.
* Currently only used for ISO-2022-CN, and only handles roundtrip mappings.
* In the future, if we add support for fallback sets, this function
* needs to be updated.
* Handles extensions.
* Does not empty the set first.
*/
U_CFUNC void
_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
USet *set,
UConverterUnicodeSet which,
UErrorCode *pErrorCode);
#endif

View file

@ -1969,8 +1969,7 @@ _SCSUSafeClone(const UConverter *cnv,
}
localClone = (struct cloneStruct *)stackBuffer;
uprv_memcpy(&localClone->cnv, cnv, sizeof(UConverter));
localClone->cnv.isCopyLocal = TRUE;
/* ucnv.c/ucnv_safeClone() copied the main UConverter already */
uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
localClone->cnv.extraInfo = &localClone->mydata;
@ -1980,9 +1979,6 @@ _SCSUSafeClone(const UConverter *cnv,
}
static const UConverterImpl _SCSUImpl={
UCNV_SCSU,

View file

@ -457,8 +457,18 @@ ucnv_safeClone(const UConverter *cnv,
int32_t *pBufferSize,
UErrorCode *status);
/** @stable ICU 2.0 */
#define U_CNV_SAFECLONE_BUFFERSIZE 4096
/**
* \def U_CNV_SAFECLONE_BUFFERSIZE
* Definition of a buffer size that is designed to be large enough for
* converters to be cloned with ucnv_safeClone().
* @stable ICU 2.0
*/
#ifdef OS400
/* OS/400 uses 16-byte pointers, making objects larger */
# define U_CNV_SAFECLONE_BUFFERSIZE 2048
#else
# define U_CNV_SAFECLONE_BUFFERSIZE 1024
#endif
/**
* Deletes the unicode converter and releases resources associated

View file

@ -21,6 +21,7 @@
#include "unicode/ucnv_err.h"
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "ucnv_bld.h" /* for sizeof(UConverter) */
#include "cintltst.h"
#include "ccapitst.h"
@ -1560,17 +1561,56 @@ static void TestConvertSafeCloneCallback()
}
}
static UBool
containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) {
while(length>0) {
if(*p!=b) {
return TRUE;
}
++p;
--length;
}
return FALSE;
}
static UBool
usedStackBuffer(const void *p, const void *q) {
return
(UBool)
(p==q ||
((const char *)p-(const char *)q)<16);
}
static void TestConvertSafeClone()
{
#define CLONETEST_CONVERTER_COUNT 12
/* one 'regular' & all the 'private stateful' converters */
static const char *const names[] = {
"ibm-1047",
"ISO_2022,locale=zh,version=1",
"SCSU",
"HZ",
"lmbcs",
"ISCII,version=0",
"ISO_2022,locale=kr,version=1",
"ISO_2022,locale=jp,version=2",
"BOCU-1",
"UTF-7",
"IMAP-mailbox-name",
"ibm-1047-s390"
};
static const int32_t bufferSizes[] = {
U_CNV_SAFECLONE_BUFFERSIZE,
(int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */
(int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */
};
char charBuffer [21]; /* Leave at an odd number for alignment testing */
uint8_t buffer [CLONETEST_CONVERTER_COUNT] [U_CNV_SAFECLONE_BUFFERSIZE];
int32_t bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
UConverter * someConverters [CLONETEST_CONVERTER_COUNT];
UConverter * someClonedConverters [CLONETEST_CONVERTER_COUNT];
UConverter * cnv;
UErrorCode err = U_ZERO_ERROR;
uint8_t buffer [3] [U_CNV_SAFECLONE_BUFFERSIZE];
int32_t bufferSize, maxBufferSize;
const char *maxName;
UConverter * cnv, *cnv2;
UErrorCode err;
char *pCharBuffer;
const char *pConstCharBuffer;
@ -1584,139 +1624,172 @@ static void TestConvertSafeClone()
UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer);
const UChar * pUniBuffer;
const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer);
int index;
int32_t index, j;
/* one 'regular' & all the 'private stateful' converters */
someConverters[0] = ucnv_open("ibm-1047", &err);
someConverters[1] = ucnv_open("ISO_2022,locale=zh,version=1", &err);
someConverters[2] = ucnv_open("SCSU", &err);
someConverters[3] = ucnv_open("HZ", &err);
someConverters[4] = ucnv_open("lmbcs", &err);
someConverters[5] = ucnv_open("ISCII,version=0",&err);
someConverters[6] = ucnv_open("ISO_2022,locale=kr,version=1",&err);
someConverters[7] = ucnv_open("ISO_2022,locale=jp,version=1",&err);
someConverters[8] = ucnv_open("BOCU-1", &err);
someConverters[9] = ucnv_open("UTF-7", &err);
someConverters[10] = ucnv_open("IMAP-mailbox-name", &err);
someConverters[11] = ucnv_open("ibm-1047-s390", &err);
err = U_ZERO_ERROR;
cnv = ucnv_open(names[0], &err);
if(U_SUCCESS(err)) {
/* Check the various error & informational states: */
/* Null status - just returns NULL */
bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, 0))
{
log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
}
/* error status - should return 0 & keep error the same */
err = U_MEMORY_ALLOCATION_ERROR;
if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
{
log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
}
err = U_ZERO_ERROR;
/* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/
if (0 != ucnv_safeClone(cnv, buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
{
log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
}
err = U_ZERO_ERROR;
/* buffer size pointer is 0 - fill in pbufferSize with a size */
bufferSize = 0;
if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
{
log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
}
/* Verify our define is large enough */
if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize)
{
log_err("FAIL: Pre-calculated buffer size is too small\n");
}
/* Verify we can use this run-time calculated size */
if (0 == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err))
{
log_err("FAIL: Converter can't be cloned with run-time size\n");
}
if (cnv2) {
ucnv_close(cnv2);
}
/* size one byte too small - should allocate & let us know */
--bufferSize;
if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
{
log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
}
if (cnv2) {
ucnv_close(cnv2);
}
err = U_ZERO_ERROR;
bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
/* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
{
log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
}
if (cnv2) {
ucnv_close(cnv2);
}
err = U_ZERO_ERROR;
if(U_FAILURE(err)) {
log_data_err("problems creating converters to clone- check the data.\n");
return; /* bail - leak */
}
/* Check the various error & informational states: */
/* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
{
log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
}
/* Null status - just returns NULL */
if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, 0))
{
log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
}
/* error status - should return 0 & keep error the same */
err = U_MEMORY_ALLOCATION_ERROR;
if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
{
log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
}
err = U_ZERO_ERROR;
/* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/
if (0 != ucnv_safeClone(someConverters[0], buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
{
log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
}
err = U_ZERO_ERROR;
/* buffer size pointer is 0 - fill in pbufferSize with a size */
bufferSize = 0;
if (0 != ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
{
log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
}
/* Verify our define is large enough */
if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize)
{
log_err("FAIL: Pre-calculated buffer size is too small\n");
}
/* Verify we can use this run-time calculated size */
if (0 == (cnv = ucnv_safeClone(someConverters[0], buffer[0], &bufferSize, &err)) || U_FAILURE(err))
{
log_err("FAIL: Converter can't be cloned with run-time size\n");
}
if (cnv)
ucnv_close(cnv);
/* size one byte too small - should allocate & let us know */
--bufferSize;
if (0 == (cnv = ucnv_safeClone(someConverters[0], 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
{
log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
}
if (cnv)
ucnv_close(cnv);
err = U_ZERO_ERROR;
bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
/* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
if (0 == (cnv = ucnv_safeClone(someConverters[0], 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
{
log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
}
if (cnv)
ucnv_close(cnv);
err = U_ZERO_ERROR;
/* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
{
log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
}
err = U_ZERO_ERROR;
maxBufferSize = 0;
maxName = "";
/* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
for (index = 0; index < CLONETEST_CONVERTER_COUNT; index++)
{
bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
someClonedConverters[index] = ucnv_safeClone(someConverters[index], buffer[index], &bufferSize, &err);
/* close the original immediately to make sure that the clone works by itself */
ucnv_close(someConverters[index]);
pCharBuffer = charBuffer;
pUniBuffer = uniBuffer;
ucnv_fromUnicode(someClonedConverters[index],
&pCharBuffer,
charBufferLimit,
&pUniBuffer,
uniBufferLimit,
NULL,
TRUE,
&err);
if(U_FAILURE(err)){
log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
}
ucnv_toUnicode(someClonedConverters[index],
&pUCharTarget,
pUCharTargetLimit,
&pCharSource,
pCharSourceLimit,
NULL,
TRUE,
&err
);
if(U_FAILURE(err)){
log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
}
pConstCharBuffer = charBuffer;
if (uniBuffer [0] != ucnv_getNextUChar(someClonedConverters[index], &pConstCharBuffer, pCharBuffer, &err))
for(j = 0; j < LENGTHOF(bufferSizes); ++j) {
for (index = 0; index < LENGTHOF(names); index++)
{
log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
err = U_ZERO_ERROR;
cnv = ucnv_open(names[index], &err);
if(U_FAILURE(err)) {
log_data_err("ucnv_open(\"%s\") failed - %s\n", names[index], u_errorName(err));
continue;
}
if(j == 0) {
/* preflight to get maxBufferSize */
bufferSize = 0;
ucnv_safeClone(cnv, NULL, &bufferSize, &err);
if(bufferSize > maxBufferSize) {
maxBufferSize = bufferSize;
maxName = names[index];
}
}
memset(buffer, 0xaa, sizeof(buffer));
bufferSize = bufferSizes[j];
cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err);
/* close the original immediately to make sure that the clone works by itself */
ucnv_close(cnv);
/* check if the clone function overwrote any bytes that it is not supposed to touch */
if(
usedStackBuffer(cnv2, buffer[1]) ?
bufferSize > bufferSizes[j] ||
containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) ||
containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa)
:
containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)
) {
log_err("cloning %s overwrote bytes outside the bufferSize %d (requested %d)\n",
names[index], bufferSize, bufferSizes[j]);
}
pCharBuffer = charBuffer;
pUniBuffer = uniBuffer;
ucnv_fromUnicode(cnv2,
&pCharBuffer,
charBufferLimit,
&pUniBuffer,
uniBufferLimit,
NULL,
TRUE,
&err);
if(U_FAILURE(err)){
log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
}
ucnv_toUnicode(cnv2,
&pUCharTarget,
pUCharTargetLimit,
&pCharSource,
pCharSourceLimit,
NULL,
TRUE,
&err
);
if(U_FAILURE(err)){
log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
}
pConstCharBuffer = charBuffer;
if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
{
log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
}
ucnv_close(cnv2);
}
ucnv_close(someClonedConverters[index]);
}
log_info("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
}
static void TestCCSID() {

View file

@ -1732,7 +1732,10 @@ TestUnicodeSet() {
"BOCU-1",
"CESU-8",
"gb18030",
"IMAP-mailbox-name",
"IMAP-mailbox-name"
};
static const char *const lmbcsNames[]={
"LMBCS-1",
"LMBCS-2",
"LMBCS-3",
@ -1806,6 +1809,29 @@ TestUnicodeSet() {
ucnv_close(cnv);
}
/* test LMBCS variants which convert all of Unicode except for U+F6xx */
for(i=0; i<LENGTHOF(lmbcsNames); ++i) {
errorCode=U_ZERO_ERROR;
name=lmbcsNames[i];
cnv=ucnv_open(name, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("error: unable to open converter %s - %s\n",
name, u_errorName(errorCode));
continue;
}
uset_clear(set);
ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
name, u_errorName(errorCode));
} else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) {
log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name);
}
ucnv_close(cnv);
}
/* test specific sets */
for(i=0; i<LENGTHOF(nameRanges); ++i) {
errorCode=U_ZERO_ERROR;