mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-2949 define and fix ucnv_getMaxCharSize() behavior
X-SVN-Rev: 13537
This commit is contained in:
parent
0d6d3ceda0
commit
cefe1b98ab
8 changed files with 61 additions and 12 deletions
|
@ -645,7 +645,7 @@ ucnv_resetFromUnicode(UConverter *converter)
|
|||
U_CAPI int8_t U_EXPORT2
|
||||
ucnv_getMaxCharSize (const UConverter * converter)
|
||||
{
|
||||
return converter->sharedData->staticData->maxBytesPerChar;
|
||||
return converter->maxBytesPerUChar;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -435,10 +435,10 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti
|
|||
uprv_strcpy(myConverterData->name,"ISO_2022");
|
||||
}
|
||||
|
||||
cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
|
||||
} else {
|
||||
*errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -2823,7 +2823,7 @@ static const UConverterStaticData _ISO2022StaticData={
|
|||
UCNV_IBM,
|
||||
UCNV_ISO_2022,
|
||||
1,
|
||||
4,
|
||||
3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
|
||||
{ 0x1a, 0, 0, 0 },
|
||||
1,
|
||||
FALSE,
|
||||
|
@ -2873,7 +2873,7 @@ static const UConverterStaticData _ISO2022JPStaticData={
|
|||
UCNV_IBM,
|
||||
UCNV_ISO_2022,
|
||||
1,
|
||||
6,
|
||||
6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
|
||||
{ 0x1a, 0, 0, 0 },
|
||||
1,
|
||||
FALSE,
|
||||
|
@ -2923,7 +2923,7 @@ static const UConverterStaticData _ISO2022KRStaticData={
|
|||
UCNV_IBM,
|
||||
UCNV_ISO_2022,
|
||||
1,
|
||||
3,
|
||||
3, /* max 3 bytes per UChar: SO+DBCS */
|
||||
{ 0x1a, 0, 0, 0 },
|
||||
1,
|
||||
FALSE,
|
||||
|
@ -2974,7 +2974,7 @@ static const UConverterStaticData _ISO2022CNStaticData={
|
|||
UCNV_IBM,
|
||||
UCNV_ISO_2022,
|
||||
2,
|
||||
8,
|
||||
8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
|
||||
{ 0x1a, 0, 0, 0 },
|
||||
1,
|
||||
FALSE,
|
||||
|
|
|
@ -775,6 +775,7 @@ ucnv_createConverterFromSharedData(UConverter *myUConverter,
|
|||
myUConverter->fromCharErrorBehaviour = (UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE;
|
||||
myUConverter->fromUCharErrorBehaviour = (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE;
|
||||
myUConverter->toUnicodeStatus = myUConverter->sharedData->toUnicodeStatus;
|
||||
myUConverter->maxBytesPerUChar = myUConverter->sharedData->staticData->maxBytesPerChar;
|
||||
myUConverter->subChar1 = myUConverter->sharedData->staticData->subChar1;
|
||||
myUConverter->subCharLen = myUConverter->sharedData->staticData->subCharLen;
|
||||
uprv_memcpy (myUConverter->subChar, myUConverter->sharedData->staticData->subChar, myUConverter->subCharLen);
|
||||
|
|
|
@ -64,7 +64,7 @@ typedef struct UConverterStaticData { /* +offset: size */
|
|||
int8_t conversionType; /* +69: 1 conversion type */
|
||||
|
||||
int8_t minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
|
||||
int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes per char in this codepage */
|
||||
int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
|
||||
|
||||
uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* +72: 4 [note: 4 and 8 byte boundary] */
|
||||
int8_t subCharLen; /* +76: 1 */
|
||||
|
@ -161,6 +161,14 @@ struct UConverter {
|
|||
*/
|
||||
UChar32 fromUChar32;
|
||||
|
||||
/*
|
||||
* value for ucnv_getMaxCharSize()
|
||||
*
|
||||
* usually simply copied from the static data, but ucnvmbcs.c modifies
|
||||
* the value depending on the converter type and options
|
||||
*/
|
||||
int8_t maxBytesPerUChar;
|
||||
|
||||
int8_t subCharLen; /* length of the codepage specific character sequence */
|
||||
int8_t invalidCharLength;
|
||||
int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */
|
||||
|
|
|
@ -124,7 +124,19 @@
|
|||
* [15] index of fromUStage3b[] (array of uint32_t like fromUTableValues[])
|
||||
* [16] length of fromUStage3b[]
|
||||
*
|
||||
* [17]..[30] reserved
|
||||
* [17] Bit field containing numbers of bytes:
|
||||
* 31..24 reserved, 0
|
||||
* 23..16 maximum input bytes
|
||||
* 15.. 8 maximum output bytes
|
||||
* 7.. 0 maximum bytes per UChar
|
||||
*
|
||||
* [18] Bit field containing numbers of UChars:
|
||||
* 31..24 reserved, 0
|
||||
* 23..16 maximum input UChars
|
||||
* 15.. 8 maximum output UChars
|
||||
* 7.. 0 maximum UChars per byte
|
||||
*
|
||||
* [19]..[30] reserved, 0
|
||||
* [31] number of bytes for the entire extension structure
|
||||
* [>31] reserved; there are indexes[0] indexes
|
||||
*
|
||||
|
@ -303,7 +315,10 @@ enum {
|
|||
UCNV_EXT_FROM_U_STAGE_3B_INDEX,
|
||||
UCNV_EXT_FROM_U_STAGE_3B_LENGTH,
|
||||
|
||||
UCNV_EXT_RESERVED_INDEX, /* 17, moves with additional indexes */
|
||||
UCNV_EXT_COUNT_BYTES, /* 17 */
|
||||
UCNV_EXT_COUNT_UCHARS,
|
||||
|
||||
UCNV_EXT_RESERVED_INDEX, /* 19, moves with additional indexes */
|
||||
|
||||
UCNV_EXT_SIZE=31,
|
||||
UCNV_EXT_INDEXES_MIN_LENGTH=32
|
||||
|
@ -313,6 +328,9 @@ enum {
|
|||
#define UCNV_EXT_ARRAY(indexes, index, itemType) \
|
||||
((const itemType *)((const char *)(indexes)+(indexes)[index]))
|
||||
|
||||
#define UCNV_GET_MAX_BYTES_PER_UCHAR(indexes) \
|
||||
((indexes)[UCNV_EXT_COUNT_BYTES]&0xff)
|
||||
|
||||
/* internal API ------------------------------------------------------------- */
|
||||
|
||||
U_CFUNC UBool
|
||||
|
|
|
@ -528,7 +528,7 @@ static const UConverterImpl _LMBCSImpl##n={\
|
|||
static const UConverterStaticData _LMBCSStaticData##n={\
|
||||
sizeof(UConverterStaticData),\
|
||||
"LMBCS-" #n,\
|
||||
0, UCNV_IBM, UCNV_LMBCS_##n, 1, 2,\
|
||||
0, UCNV_IBM, UCNV_LMBCS_##n, 1, 3,\
|
||||
{ 0x3f, 0, 0, 0 },1,FALSE,FALSE,0,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} \
|
||||
};\
|
||||
const UConverterSharedData _LMBCSData##n={\
|
||||
|
|
|
@ -757,7 +757,8 @@ static const UConverterImpl _UTF8Impl={
|
|||
static const UConverterStaticData _UTF8StaticData={
|
||||
sizeof(UConverterStaticData),
|
||||
"UTF-8",
|
||||
1208, UCNV_IBM, UCNV_UTF8, 1, 4,
|
||||
1208, UCNV_IBM, UCNV_UTF8,
|
||||
1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
|
||||
{ 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
|
||||
0,
|
||||
0,
|
||||
|
|
|
@ -898,6 +898,10 @@ _MBCSOpen(UConverter *cnv,
|
|||
const char *locale,
|
||||
uint32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
const int32_t *extIndexes;
|
||||
uint8_t outputType;
|
||||
int8_t maxBytesPerUChar;
|
||||
|
||||
if((options&UCNV_OPTION_SWAP_LFNL)!=0) {
|
||||
/* do this because double-checked locking is broken */
|
||||
UBool isCached;
|
||||
|
@ -914,7 +918,6 @@ _MBCSOpen(UConverter *cnv,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
if(uprv_strstr(name, "18030")!=NULL) {
|
||||
if(uprv_strstr(name, "gb18030")!=NULL || uprv_strstr(name, "GB18030")!=NULL) {
|
||||
/* set a flag for GB 18030 mode, which changes the callback behavior */
|
||||
|
@ -922,6 +925,24 @@ _MBCSOpen(UConverter *cnv,
|
|||
}
|
||||
}
|
||||
|
||||
/* fix maxBytesPerUChar depending on outputType and options etc. */
|
||||
outputType=cnv->sharedData->table->mbcs.outputType;
|
||||
if(outputType==MBCS_OUTPUT_2_SISO) {
|
||||
cnv->maxBytesPerUChar=3; /* SO+DBCS */
|
||||
}
|
||||
|
||||
extIndexes=cnv->sharedData->table->mbcs.extIndexes;
|
||||
if(extIndexes!=NULL) {
|
||||
maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes);
|
||||
if(outputType==MBCS_OUTPUT_2_SISO) {
|
||||
++maxBytesPerUChar; /* SO + multiple DBCS */
|
||||
}
|
||||
|
||||
if(maxBytesPerUChar>cnv->maxBytesPerUChar) {
|
||||
cnv->maxBytesPerUChar=maxBytesPerUChar;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* documentation of UConverter fields used for status
|
||||
|
|
Loading…
Add table
Reference in a new issue