ICU-2949 define and fix ucnv_getMaxCharSize() behavior

X-SVN-Rev: 13537
This commit is contained in:
Markus Scherer 2003-10-30 22:54:18 +00:00
parent 0d6d3ceda0
commit cefe1b98ab
8 changed files with 61 additions and 12 deletions

View file

@ -645,7 +645,7 @@ ucnv_resetFromUnicode(UConverter *converter)
U_CAPI int8_t U_EXPORT2
ucnv_getMaxCharSize (const UConverter * converter)
{
return converter->sharedData->staticData->maxBytesPerChar;
return converter->maxBytesPerUChar;
}

View file

@ -435,10 +435,10 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti
uprv_strcpy(myConverterData->name,"ISO_2022");
}
cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
} else {
*errorCode = U_MEMORY_ALLOCATION_ERROR;
}
}
@ -2823,7 +2823,7 @@ static const UConverterStaticData _ISO2022StaticData={
UCNV_IBM,
UCNV_ISO_2022,
1,
4,
3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
{ 0x1a, 0, 0, 0 },
1,
FALSE,
@ -2873,7 +2873,7 @@ static const UConverterStaticData _ISO2022JPStaticData={
UCNV_IBM,
UCNV_ISO_2022,
1,
6,
6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
{ 0x1a, 0, 0, 0 },
1,
FALSE,
@ -2923,7 +2923,7 @@ static const UConverterStaticData _ISO2022KRStaticData={
UCNV_IBM,
UCNV_ISO_2022,
1,
3,
3, /* max 3 bytes per UChar: SO+DBCS */
{ 0x1a, 0, 0, 0 },
1,
FALSE,
@ -2974,7 +2974,7 @@ static const UConverterStaticData _ISO2022CNStaticData={
UCNV_IBM,
UCNV_ISO_2022,
2,
8,
8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
{ 0x1a, 0, 0, 0 },
1,
FALSE,

View file

@ -775,6 +775,7 @@ ucnv_createConverterFromSharedData(UConverter *myUConverter,
myUConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE;
myUConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE;
myUConverter->toUnicodeStatus = myUConverter->sharedData->toUnicodeStatus;
myUConverter->maxBytesPerUChar = myUConverter->sharedData->staticData->maxBytesPerChar;
myUConverter->subChar1 = myUConverter->sharedData->staticData->subChar1;
myUConverter->subCharLen = myUConverter->sharedData->staticData->subCharLen;
uprv_memcpy (myUConverter->subChar, myUConverter->sharedData->staticData->subChar, myUConverter->subCharLen);

View file

@ -64,7 +64,7 @@ typedef struct UConverterStaticData { /* +offset: size */
int8_t conversionType; /* +69: 1 conversion type */
int8_t minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes per char in this codepage */
int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* +72: 4 [note: 4 and 8 byte boundary] */
int8_t subCharLen; /* +76: 1 */
@ -161,6 +161,14 @@ struct UConverter {
*/
UChar32 fromUChar32;
/*
* value for ucnv_getMaxCharSize()
*
* usually simply copied from the static data, but ucnvmbcs.c modifies
* the value depending on the converter type and options
*/
int8_t maxBytesPerUChar;
int8_t subCharLen; /* length of the codepage specific character sequence */
int8_t invalidCharLength;
int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */

View file

@ -124,7 +124,19 @@
* [15] index of fromUStage3b[] (array of uint32_t like fromUTableValues[])
* [16] length of fromUStage3b[]
*
* [17]..[30] reserved
* [17] Bit field containing numbers of bytes:
* 31..24 reserved, 0
* 23..16 maximum input bytes
* 15.. 8 maximum output bytes
* 7.. 0 maximum bytes per UChar
*
* [18] Bit field containing numbers of UChars:
* 31..24 reserved, 0
* 23..16 maximum input UChars
* 15.. 8 maximum output UChars
* 7.. 0 maximum UChars per byte
*
* [19]..[30] reserved, 0
* [31] number of bytes for the entire extension structure
* [>31] reserved; there are indexes[0] indexes
*
@ -303,7 +315,10 @@ enum {
UCNV_EXT_FROM_U_STAGE_3B_INDEX,
UCNV_EXT_FROM_U_STAGE_3B_LENGTH,
UCNV_EXT_RESERVED_INDEX, /* 17, moves with additional indexes */
UCNV_EXT_COUNT_BYTES, /* 17 */
UCNV_EXT_COUNT_UCHARS,
UCNV_EXT_RESERVED_INDEX, /* 19, moves with additional indexes */
UCNV_EXT_SIZE=31,
UCNV_EXT_INDEXES_MIN_LENGTH=32
@ -313,6 +328,9 @@ enum {
#define UCNV_EXT_ARRAY(indexes, index, itemType) \
((const itemType *)((const char *)(indexes)+(indexes)[index]))
#define UCNV_GET_MAX_BYTES_PER_UCHAR(indexes) \
((indexes)[UCNV_EXT_COUNT_BYTES]&0xff)
/* internal API ------------------------------------------------------------- */
U_CFUNC UBool

View file

@ -528,7 +528,7 @@ static const UConverterImpl _LMBCSImpl##n={\
static const UConverterStaticData _LMBCSStaticData##n={\
sizeof(UConverterStaticData),\
"LMBCS-" #n,\
0, UCNV_IBM, UCNV_LMBCS_##n, 1, 2,\
0, UCNV_IBM, UCNV_LMBCS_##n, 1, 3,\
{ 0x3f, 0, 0, 0 },1,FALSE,FALSE,0,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} \
};\
const UConverterSharedData _LMBCSData##n={\

View file

@ -757,7 +757,8 @@ static const UConverterImpl _UTF8Impl={
static const UConverterStaticData _UTF8StaticData={
sizeof(UConverterStaticData),
"UTF-8",
1208, UCNV_IBM, UCNV_UTF8, 1, 4,
1208, UCNV_IBM, UCNV_UTF8,
1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
{ 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
0,
0,

View file

@ -898,6 +898,10 @@ _MBCSOpen(UConverter *cnv,
const char *locale,
uint32_t options,
UErrorCode *pErrorCode) {
const int32_t *extIndexes;
uint8_t outputType;
int8_t maxBytesPerUChar;
if((options&UCNV_OPTION_SWAP_LFNL)!=0) {
/* do this because double-checked locking is broken */
UBool isCached;
@ -914,7 +918,6 @@ _MBCSOpen(UConverter *cnv,
}
}
if(uprv_strstr(name, "18030")!=NULL) {
if(uprv_strstr(name, "gb18030")!=NULL || uprv_strstr(name, "GB18030")!=NULL) {
/* set a flag for GB 18030 mode, which changes the callback behavior */
@ -922,6 +925,24 @@ _MBCSOpen(UConverter *cnv,
}
}
/* fix maxBytesPerUChar depending on outputType and options etc. */
outputType=cnv->sharedData->table->mbcs.outputType;
if(outputType==MBCS_OUTPUT_2_SISO) {
cnv->maxBytesPerUChar=3; /* SO+DBCS */
}
extIndexes=cnv->sharedData->table->mbcs.extIndexes;
if(extIndexes!=NULL) {
maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes);
if(outputType==MBCS_OUTPUT_2_SISO) {
++maxBytesPerUChar; /* SO + multiple DBCS */
}
if(maxBytesPerUChar>cnv->maxBytesPerUChar) {
cnv->maxBytesPerUChar=maxBytesPerUChar;
}
}
#if 0
/*
* documentation of UConverter fields used for status