mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 22:15:31 +00:00
ICU-703 Added ISO-2022-JP version selection functionality
X-SVN-Rev: 2833
This commit is contained in:
parent
8ed1e11d82
commit
2ade9b3ccb
6 changed files with 219 additions and 164 deletions
|
@ -65,23 +65,26 @@
|
|||
|
||||
/* for ISO-2022JP implementation*/
|
||||
typedef enum {
|
||||
ASCII = 0,
|
||||
ISO8859_1 ,
|
||||
ISO8859_7 ,
|
||||
JISX201 ,
|
||||
JISX208 ,
|
||||
JISX212 ,
|
||||
GB2312 ,
|
||||
KSC5601
|
||||
ASCII = 0,
|
||||
ISO8859_1 = 1 ,
|
||||
ISO8859_7 = 2 ,
|
||||
JISX201 = 3,
|
||||
JISX208 = 4,
|
||||
JISX212 = 5,
|
||||
GB2312 =6,
|
||||
KSC5601 =7,
|
||||
INVALID_STATE
|
||||
|
||||
} StateEnum;
|
||||
|
||||
|
||||
|
||||
typedef enum {
|
||||
SBCS = 0,
|
||||
SBCS = 0,
|
||||
DBCS,
|
||||
MBCS,
|
||||
LATIN1
|
||||
LATIN1,
|
||||
ASCII1
|
||||
}Cnv2022Type;
|
||||
|
||||
|
||||
|
@ -99,6 +102,7 @@ typedef struct{
|
|||
UBool isShiftAppended;
|
||||
UBool isLocaleSpecified;
|
||||
uint32_t key;
|
||||
uint32_t version;
|
||||
}UConverterDataISO2022;
|
||||
|
||||
/* ISO-2022 ----------------------------------------------------------------- */
|
||||
|
@ -221,9 +225,9 @@ typedef enum
|
|||
|
||||
/*Below are the 3 arrays depicting a state transition table*/
|
||||
int8_t normalize_esq_chars_2022[256] = {
|
||||
/* 0 1 2 3 4 5 6 7 8 9 */
|
||||
|
||||
0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
||||
/* 0 1 2 3 4 5 6 7 8 9 */
|
||||
|
||||
0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
||||
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
|
||||
,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0
|
||||
,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,0 ,0
|
||||
|
@ -267,16 +271,16 @@ int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
|
|||
|
||||
|
||||
const char* escSeqStateTable_Result_2022[MAX_STATES_2022] = {
|
||||
/* 0 1 2 3 4 5 6 7 8 9 */
|
||||
/* 0 1 2 3 4 5 6 7 8 9 */
|
||||
|
||||
NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1" ,"latin1"
|
||||
,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX-201" ,"JISX-201" ,"latin1" ,"latin1"
|
||||
,NULL ,"ibm-955" ,"gb_2312_80-1" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8" ,"ISO-8859-1"
|
||||
,"ISO-8859-7" ,NULL ,"ibm-955" ,"bm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383" ,"ibm-952" ,"ibm-964"
|
||||
,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"gb_2312_80-1" ,"ibm-949" ,"ISO-IR-165" ,"CNS-11643-1992,1" ,"CNS-11643-1992,2"
|
||||
NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1" ,"latin1"
|
||||
,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX-201" ,"JISX-201" ,"latin1" ,"latin1"
|
||||
,NULL ,"JISX-208" ,"gb_2312_80-1" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8" ,"ISO-8859-1"
|
||||
,"ISO-8859-7" ,NULL ,"ibm-955" ,"bm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383" ,"ibm-952" ,"ibm-964"
|
||||
,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"gb_2312_80-1" ,"ibm-949" ,"ISO-IR-165" ,"CNS-11643-1992,1" ,"CNS-11643-1992,2"
|
||||
,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
|
||||
,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089" ,"ibm-920" ,"ibm-915"
|
||||
,"ibm-915" ,"latin1"
|
||||
,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089" ,"ibm-920" ,"ibm-915"
|
||||
,"ibm-915" ,"latin1"
|
||||
};
|
||||
|
||||
|
||||
|
@ -316,7 +320,7 @@ UCNV_TableStates_2022 getKey_2022(char source,
|
|||
int32_t* offset);
|
||||
|
||||
/*********** ISO 2022 Converter Protos ***********/
|
||||
static void _ISO2022Open(UConverter *cnv, const char *name, const char *locale, UErrorCode *errorCode);
|
||||
static void _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t* version, UErrorCode *errorCode);
|
||||
static void _ISO2022Close(UConverter *converter);
|
||||
static void _ISO2022Reset(UConverter *converter);
|
||||
|
||||
|
@ -354,7 +358,7 @@ const UConverterSharedData _ISO2022Data={
|
|||
0
|
||||
};
|
||||
|
||||
static void _ISO2022Open(UConverter *cnv, const char *name, const char *locale, UErrorCode *errorCode){
|
||||
static void _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t* version, UErrorCode *errorCode){
|
||||
|
||||
char myLocale[6]={' ',' ',' ',' ',' ',' '};
|
||||
|
||||
|
@ -370,7 +374,7 @@ static void _ISO2022Open(UConverter *cnv, const char *name, const char *locale,
|
|||
uprv_strcpy(myLocale,locale);
|
||||
myConverterData->isLocaleSpecified = TRUE;
|
||||
}
|
||||
|
||||
myConverterData->version= 0;
|
||||
myConverterData->myConverterArray[0] =NULL;
|
||||
if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
|
||||
(myLocale[2]=='_' || myLocale[2]=='\0')){
|
||||
|
@ -402,6 +406,18 @@ static void _ISO2022Open(UConverter *cnv, const char *name, const char *locale,
|
|||
_ISO2022Impl.fromUnicodeWithOffsets = UConverter_fromUnicode_ISO_2022_JP;
|
||||
_ISO2022Impl.getNextUChar = UConverter_getNextUChar_ISO_2022_JP;
|
||||
|
||||
if(version){
|
||||
switch (*version){
|
||||
case '0':
|
||||
myConverterData->version = 0;
|
||||
break;
|
||||
case '1':
|
||||
myConverterData->version = 1;
|
||||
break;
|
||||
default:
|
||||
myConverterData->version = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
|
||||
(myLocale[2]=='_' || myLocale[2]=='\0')){
|
||||
|
@ -451,6 +467,18 @@ static void _ISO2022Open(UConverter *cnv, const char *name, const char *locale,
|
|||
_ISO2022Impl.fromUnicode = UConverter_fromUnicode_ISO_2022_CN;
|
||||
_ISO2022Impl.fromUnicodeWithOffsets = UConverter_fromUnicode_ISO_2022_CN;
|
||||
_ISO2022Impl.getNextUChar = UConverter_getNextUChar_ISO_2022_CN;
|
||||
if(version){
|
||||
switch (*version){
|
||||
case '0':
|
||||
myConverterData->version = 0;
|
||||
break;
|
||||
case '1':
|
||||
myConverterData->version = 1;
|
||||
break;
|
||||
default:
|
||||
myConverterData->version = 1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else{
|
||||
|
@ -473,13 +501,20 @@ static void _ISO2022Open(UConverter *cnv, const char *name, const char *locale,
|
|||
|
||||
static void
|
||||
_ISO2022Close(UConverter *converter) {
|
||||
UConverter **array = ((UConverterDataISO2022 *) (converter->extraInfo))->myConverterArray;
|
||||
UConverter **array = ((UConverterDataISO2022 *) (converter->extraInfo))->myConverterArray;
|
||||
|
||||
if (converter->extraInfo != NULL) {
|
||||
ucnv_close (((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter);
|
||||
/*ucnv_close (((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter);*/
|
||||
/*close the array of converter pointers and free the memory*/
|
||||
while(*array!=NULL){
|
||||
if(*array==((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter){
|
||||
((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter=NULL;
|
||||
}
|
||||
ucnv_close(*array++);
|
||||
|
||||
}
|
||||
if(((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter){
|
||||
ucnv_close(((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter);
|
||||
}
|
||||
uprv_free (converter->extraInfo);
|
||||
}
|
||||
|
@ -1061,7 +1096,7 @@ static const char* getEndOfBuffer_2022(const char** source,
|
|||
*/
|
||||
|
||||
static Cnv2022Type myConverterType[8]={
|
||||
SBCS,
|
||||
ASCII1,
|
||||
LATIN1,
|
||||
SBCS,
|
||||
SBCS,
|
||||
|
@ -1072,8 +1107,12 @@ static Cnv2022Type myConverterType[8]={
|
|||
|
||||
};
|
||||
|
||||
|
||||
static const char* escSeqChars[8] ={
|
||||
static StateEnum nextStateArray[3][8]= {
|
||||
{JISX201,INVALID_STATE,INVALID_STATE,JISX208,ASCII,INVALID_STATE,INVALID_STATE,INVALID_STATE},
|
||||
{JISX201,INVALID_STATE,INVALID_STATE,JISX208,JISX212,ASCII,INVALID_STATE,INVALID_STATE},
|
||||
{ISO8859_1,ISO8859_7,JISX201,JISX208,JISX212,GB2312,KSC5601,ASCII}
|
||||
};
|
||||
static char* escSeqChars[8] ={
|
||||
"\x1B\x28\x42", /* <ESC>(B ASCII */
|
||||
"\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */
|
||||
"\x1B\x2E\x46", /* <ESC>.F ISO-8859-7 */
|
||||
|
@ -1196,14 +1235,13 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_JP(UConverterFromUnicodeArgs* args,
|
|||
}
|
||||
/*Do the conversion*/
|
||||
if(mySourceChar == 0x0020){
|
||||
targetUniChar = mySourceChar;
|
||||
|
||||
|
||||
if(currentState > 2){
|
||||
concatEscape(args, &myTargetIndex, &myTargetLength, escSeqChars[0],err,strlen(escSeqChars[0]));
|
||||
|
||||
isTargetUCharDBCS=FALSE;
|
||||
}
|
||||
concatString(args, &myTargetIndex, &myTargetLength,&targetUniChar,err,&mySourceIndex);
|
||||
concatString(args, &myTargetIndex, &myTargetLength,&mySourceChar,err,&mySourceIndex);
|
||||
myConverterData->isEscapeAppended=isEscapeAppended =FALSE;
|
||||
TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err);
|
||||
continue;
|
||||
|
@ -1211,7 +1249,7 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_JP(UConverterFromUnicodeArgs* args,
|
|||
/* if the source character is CR or LF then append the ASCII escape sequence*/
|
||||
else if(mySourceChar== 0x000A || mySourceChar== 0x000D || mySourceChar==0x0009 || mySourceChar==0x000B){
|
||||
|
||||
if(isTargetUCharDBCS && mySource[mySourceIndex-2]!=0x000D){
|
||||
if((isTargetUCharDBCS || currentState==JISX201) && mySource[mySourceIndex-2]!=0x000D){
|
||||
concatEscape(args, &myTargetIndex, &myTargetLength, escSeqChars[0],err,strlen(escSeqChars[0]));
|
||||
isTargetUCharDBCS=FALSE;
|
||||
isShiftAppended =FALSE;
|
||||
|
@ -1220,8 +1258,7 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_JP(UConverterFromUnicodeArgs* args,
|
|||
|
||||
}
|
||||
|
||||
targetUniChar = mySourceChar;
|
||||
concatString(args, &myTargetIndex, &myTargetLength,&targetUniChar,err,&mySourceIndex);
|
||||
concatString(args, &myTargetIndex, &myTargetLength,&mySourceChar,err,&mySourceIndex);
|
||||
|
||||
if(currentState==ISO8859_1 || currentState ==ISO8859_7)
|
||||
isEscapeAppended =FALSE;
|
||||
|
@ -1232,65 +1269,85 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_JP(UConverterFromUnicodeArgs* args,
|
|||
}
|
||||
else{
|
||||
|
||||
switch (myType){
|
||||
do{
|
||||
switch (myType){
|
||||
|
||||
case SBCS:
|
||||
if(mySourceChar< 0xff){
|
||||
myFromUnicodeSBCS = &myConverterData->fromUnicodeConverter->sharedData->table->sbcs.fromUnicode;
|
||||
myFromUnicodeSBCSFallback = &myConverterData->fromUnicodeConverter->sharedData->table->sbcs.fromUnicodeFallback;
|
||||
case SBCS:
|
||||
|
||||
myFromUnicodeSBCS = &myConverterData->fromUnicodeConverter->sharedData->table->sbcs.fromUnicode;
|
||||
myFromUnicodeSBCSFallback = &myConverterData->fromUnicodeConverter->sharedData->table->sbcs.fromUnicodeFallback;
|
||||
|
||||
targetUniChar = (UChar32) ucmp8_getu (myFromUnicodeSBCS, mySourceChar);
|
||||
|
||||
if ((targetUniChar==0)&&(args->converter->useFallback == TRUE) &&
|
||||
(myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
|
||||
targetUniChar = (UChar32) ucmp8_getu (myFromUnicodeSBCSFallback, mySourceChar);
|
||||
}
|
||||
/* ucmp8_getU returns 0 for missing char so explicitly set it missingCharMarker*/
|
||||
targetUniChar=(UChar)((targetUniChar==0) ? (UChar) missingCharMarker : targetUniChar);
|
||||
|
||||
break;
|
||||
|
||||
case DBCS:
|
||||
if(mySourceChar < 0xffff){
|
||||
myFromUnicodeDBCS = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicode;
|
||||
myFromUnicodeDBCSFallback = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicodeFallback;
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCS, mySourceChar);
|
||||
|
||||
targetUniChar = (UChar32) ucmp8_getu (myFromUnicodeSBCS, mySourceChar);
|
||||
|
||||
if ((targetUniChar==0)&&(myConverterData->fromUnicodeConverter->useFallback == TRUE) &&
|
||||
(myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
|
||||
targetUniChar = (UChar32) ucmp8_getu (myFromUnicodeSBCSFallback, mySourceChar);
|
||||
}
|
||||
/* ucmp8_getU returns 0 for missing char so explicitly set it missingCharMarker*/
|
||||
targetUniChar=(UChar)((targetUniChar==0) ? (UChar) missingCharMarker : targetUniChar);
|
||||
if ((targetUniChar==missingCharMarker)&&(args->converter->useFallback== TRUE) &&
|
||||
(myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCSFallback, mySourceChar);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case MBCS:
|
||||
length= _MBCSFromUChar32(myConverterData->fromUnicodeConverter->sharedData,
|
||||
mySourceChar,&targetValue,args->converter->useFallback);
|
||||
|
||||
targetUniChar = (UChar32) targetValue;
|
||||
|
||||
if(length==0x0000){
|
||||
targetUniChar = missingCharMarker;
|
||||
*err =U_ZERO_ERROR;
|
||||
}
|
||||
/* only DBCS or SBCS characters are expected*/
|
||||
else if(length > 2){
|
||||
reason =UCNV_ILLEGAL;
|
||||
*err =U_INVALID_CHAR_FOUND;
|
||||
goto CALLBACK;
|
||||
}
|
||||
break;
|
||||
|
||||
case LATIN1:
|
||||
if(mySourceChar < 0x0100){
|
||||
targetUniChar = mySourceChar;
|
||||
} else targetUniChar = missingCharMarker;
|
||||
break;
|
||||
|
||||
default:
|
||||
/*not expected */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case DBCS:
|
||||
if(mySourceChar < 0xffff){
|
||||
myFromUnicodeDBCS = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicode;
|
||||
myFromUnicodeDBCSFallback = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicodeFallback;
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCS, mySourceChar);
|
||||
|
||||
if ((targetUniChar==missingCharMarker)&&(myConverterData->fromUnicodeConverter->useFallback == TRUE) &&
|
||||
(myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCSFallback, mySourceChar);
|
||||
}
|
||||
if(targetUniChar==missingCharMarker){
|
||||
isEscapeAppended = FALSE;
|
||||
/* save the state */
|
||||
myConverterData->currentState=nextStateArray[myConverterData->version][currentState];
|
||||
myConverterData->isEscapeAppended = isEscapeAppended;
|
||||
myConverterData->isShiftAppended =isShiftAppended;
|
||||
args->converter->fromUnicodeStatus = isTargetUCharDBCS;
|
||||
myConverterData->sourceIndex = mySourceIndex;
|
||||
myConverterData->targetIndex = myTargetIndex;
|
||||
currentState = myConverterData->currentState;
|
||||
myConverterData->fromUnicodeConverter = (myConverterData->fromUnicodeConverter == NULL) ?
|
||||
myConverterData->myConverterArray[0] :
|
||||
myConverterData->myConverterArray[(int)myConverterData->currentState];
|
||||
isTargetUCharDBCS = (UBool) args->converter->fromUnicodeStatus;
|
||||
myType= (Cnv2022Type) myConverterType[currentState];
|
||||
}
|
||||
break;
|
||||
|
||||
case MBCS:
|
||||
length= _MBCSFromUChar32(myConverterData->fromUnicodeConverter->sharedData,
|
||||
mySourceChar,&targetValue,args->converter->useFallback);
|
||||
|
||||
targetUniChar = (UChar32) targetValue;
|
||||
|
||||
if(length==0x0000){
|
||||
targetUniChar = missingCharMarker;
|
||||
*err =U_ZERO_ERROR;
|
||||
}
|
||||
/* only DBCS or SBCS characters are expected*/
|
||||
else if(length > 2){
|
||||
reason =UCNV_ILLEGAL;
|
||||
*err =U_INVALID_CHAR_FOUND;
|
||||
goto CALLBACK;
|
||||
}
|
||||
break;
|
||||
|
||||
case LATIN1:
|
||||
if(mySourceChar < 0x0100){
|
||||
targetUniChar = mySourceChar;
|
||||
} else targetUniChar = missingCharMarker;
|
||||
break;
|
||||
|
||||
default:
|
||||
/*not expected */
|
||||
break;
|
||||
}
|
||||
|
||||
}while(targetUniChar==missingCharMarker && initIterState != currentState);
|
||||
|
||||
}
|
||||
|
||||
if(targetUniChar!= missingCharMarker){
|
||||
|
@ -1311,39 +1368,14 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_JP(UConverterFromUnicodeArgs* args,
|
|||
isEscapeAppended =TRUE;
|
||||
myConverterData->isEscapeAppended=TRUE;
|
||||
|
||||
|
||||
|
||||
/* Append SSN for shifting to G2 */
|
||||
if(currentState==ISO8859_1 || currentState==ISO8859_7){
|
||||
|
||||
concatEscape(args, &myTargetIndex, &myTargetLength,
|
||||
UCNV_SS2,err,strlen(UCNV_SS2));
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
/*else{
|
||||
|
||||
if(oldIsTargetUCharDBCS != isTargetUCharDBCS ){
|
||||
/*Shifting from a double byte to single byte mode
|
||||
if(!isTargetUCharDBCS){
|
||||
|
||||
concatChar(args, &myTargetIndex,
|
||||
&myTargetLength, UCNV_SI,err);
|
||||
isShiftAppended =FALSE;
|
||||
|
||||
TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err);
|
||||
}
|
||||
else{ /* Shifting from a single byte to double byte mode
|
||||
concatChar(args, &myTargetIndex,
|
||||
&myTargetLength, UCNV_SO,err);
|
||||
isShiftAppended =TRUE;
|
||||
myConverterData->isShiftAppended =isShiftAppended;
|
||||
|
||||
TEST_ERROR_CONDITION(args,myTargetIndex, mySourceIndex, isTargetUCharDBCS,myConverterData, err);
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
|
||||
concatString(args, &myTargetIndex, &myTargetLength,
|
||||
&targetUniChar,err, &mySourceIndex);
|
||||
|
||||
|
@ -1352,28 +1384,9 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_JP(UConverterFromUnicodeArgs* args,
|
|||
}/* end of end if(targetUniChar==missingCharMarker)*/
|
||||
else{
|
||||
|
||||
iterCount = (iterCount<8)? iterCount+1 : 0;
|
||||
|
||||
myConverterData->currentState=currentState=(StateEnum)(currentState<7)? currentState+1:0;
|
||||
|
||||
if((currentState!= initIterState) ){
|
||||
|
||||
/* explicitly decrement source since it has already been incremented */
|
||||
mySourceIndex--;
|
||||
targetUniChar =missingCharMarker;
|
||||
isEscapeAppended = FALSE;
|
||||
/* save the state */
|
||||
myConverterData->isEscapeAppended = isEscapeAppended;
|
||||
myConverterData->isShiftAppended =isShiftAppended;
|
||||
args->converter->fromUnicodeStatus = isTargetUCharDBCS;
|
||||
myConverterData->sourceIndex = mySourceIndex;
|
||||
myConverterData->targetIndex = myTargetIndex;
|
||||
continue;
|
||||
}
|
||||
else{
|
||||
/* if we cannot find the character after checking all codepages
|
||||
* then this is an error
|
||||
*/
|
||||
* then this is an error
|
||||
*/
|
||||
reason = UCNV_UNASSIGNED;
|
||||
*err = U_INVALID_CHAR_FOUND;
|
||||
|
||||
|
@ -1407,7 +1420,6 @@ CALLBACK:
|
|||
break;
|
||||
}
|
||||
args->converter->invalidUCharLength = 0;
|
||||
}
|
||||
}
|
||||
targetUniChar =missingCharMarker;
|
||||
} /* end if(myTargetIndex<myTargetLength) */
|
||||
|
@ -1418,7 +1430,6 @@ CALLBACK:
|
|||
|
||||
}/* end while(mySourceIndex<mySourceLength) */
|
||||
|
||||
|
||||
/*save the state and return */
|
||||
args->target += myTargetIndex;
|
||||
args->source += mySourceIndex;
|
||||
|
@ -1428,6 +1439,7 @@ CALLBACK:
|
|||
|
||||
}
|
||||
|
||||
|
||||
static void concatString(UConverterFromUnicodeArgs* args, int32_t *targetIndex, int32_t *targetLength,
|
||||
const UChar32* strToAppend,UErrorCode* err, int32_t *sourceIndex){
|
||||
|
||||
|
@ -1545,6 +1557,19 @@ UConverter_getNextUChar_ISO_2022_JP(UConverterToUnicodeArgs *pArgs,
|
|||
return 0xffff;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
* Recognized escape sequences are
|
||||
* <ESC>(B ASCII
|
||||
* <ESC>.A ISO-8859-1
|
||||
* <ESC>.F ISO-8859-7
|
||||
* <ESC>(J JISX-201
|
||||
* <ESC>(I JISX-201
|
||||
* <ESC>$B JISX-208
|
||||
* <ESC>$@ JISX-208
|
||||
* <ESC>$(D JISX-212
|
||||
* <ESC>$A GB2312
|
||||
* <ESC>$(C KSC5601
|
||||
*/
|
||||
U_CFUNC void UConverter_toUnicode_ISO_2022_JP(UConverterToUnicodeArgs *args,
|
||||
UErrorCode* err){
|
||||
|
||||
|
@ -2509,7 +2534,7 @@ UConverter_getNextUChar_ISO_2022_KR(UConverterToUnicodeArgs *pArgs,
|
|||
|
||||
/********************** ISO2022-CN Data **************************/
|
||||
static const char* escSeqCharsCN[10] ={
|
||||
"\x1B\x28\x42", /* ASCII */
|
||||
"\x0F", /* ASCII */
|
||||
"\x1B\x24\x29\x41", /* GB 2312-80 */
|
||||
"\x1B\x24\x29\x45", /* ISO-IR-165 */
|
||||
"\x1B\x24\x29\x47", /* CNS 11643-1992 Plane 1 */
|
||||
|
@ -2522,7 +2547,7 @@ static const char* escSeqCharsCN[10] ={
|
|||
};
|
||||
|
||||
static const char* shiftSeqCharsCN[10] ={
|
||||
"",
|
||||
"",
|
||||
(const char*) "\x0E",
|
||||
(const char*) "\x0E",
|
||||
(const char*) "\x0E",
|
||||
|
@ -2535,14 +2560,14 @@ static const char* shiftSeqCharsCN[10] ={
|
|||
};
|
||||
|
||||
typedef enum {
|
||||
ASCII_1=0,
|
||||
ASCII_1=0,
|
||||
GB2312_1=1,
|
||||
ISO_IR_165=2,
|
||||
CNS_11643=3
|
||||
} StateEnumCN;
|
||||
|
||||
static Cnv2022Type myConverterTypeCN[4]={
|
||||
SBCS,
|
||||
SBCS,
|
||||
DBCS,
|
||||
DBCS,
|
||||
MBCS
|
||||
|
@ -2652,7 +2677,7 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_CN(UConverterFromUnicodeArgs* args,
|
|||
|
||||
targetUniChar = (UChar32) ucmp8_getu (myFromUnicodeSBCS, mySourceChar);
|
||||
|
||||
if ((targetUniChar==0)&&(myConverterData->fromUnicodeConverter->useFallback == TRUE) &&
|
||||
if ((targetUniChar==0)&&(args->converter->useFallback == TRUE) &&
|
||||
(myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
|
||||
targetUniChar = (UChar32) ucmp8_getu (myFromUnicodeSBCSFallback, mySourceChar);
|
||||
}
|
||||
|
@ -2668,11 +2693,14 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_CN(UConverterFromUnicodeArgs* args,
|
|||
myFromUnicodeDBCSFallback = &myConverterData->fromUnicodeConverter->sharedData->table->dbcs.fromUnicodeFallback;
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCS, mySourceChar);
|
||||
|
||||
if ((targetUniChar==missingCharMarker)&&(myConverterData->fromUnicodeConverter->useFallback == TRUE) &&
|
||||
if ((targetUniChar==missingCharMarker)&&(args->converter->useFallback == TRUE) &&
|
||||
(myConverterData->fromUnicodeConverter->sharedData->staticData->hasFromUnicodeFallback == TRUE)){
|
||||
targetUniChar = (UChar) ucmp16_getu (myFromUnicodeDBCSFallback, mySourceChar);
|
||||
}
|
||||
}
|
||||
if(( myConverterData->version) == 0 && currentState==ISO_IR_165){
|
||||
targetUniChar=missingCharMarker;
|
||||
}
|
||||
break;
|
||||
|
||||
case MBCS:
|
||||
|
@ -2698,6 +2726,9 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_CN(UConverterFromUnicodeArgs* args,
|
|||
*err =U_INVALID_CHAR_FOUND;
|
||||
goto CALLBACK;
|
||||
}
|
||||
if(myConverterData->version == 0 && plane >2){
|
||||
targetUniChar = missingCharMarker;
|
||||
}
|
||||
break;
|
||||
|
||||
case LATIN1:
|
||||
|
@ -2997,8 +3028,21 @@ DONE:
|
|||
}
|
||||
else{
|
||||
_this->mode = UCNV_SI;
|
||||
ucnv_close(myData2022->currentConverter);
|
||||
myData2022->currentConverter = myUConverter = ucnv_open(cnvName, err);
|
||||
/* ucnv_close(myData2022->currentConverter);
|
||||
myData2022->currentConverter = myUConverter = ucnv_open(cnvName, err);*/
|
||||
if( cnvName[0] == 'l' ){
|
||||
myData2022->currentConverter = myUConverter = myData2022->myConverterArray[0];
|
||||
}
|
||||
else if( cnvName[0] == 'g' ){
|
||||
myData2022->currentConverter = myUConverter = myData2022->myConverterArray[1];
|
||||
}
|
||||
else if(cnvName[0] == 'I' ){
|
||||
myData2022->currentConverter = myUConverter = myData2022->myConverterArray[2];
|
||||
}
|
||||
else if( cnvName[0] == 'C'){
|
||||
myData2022->currentConverter = myUConverter = myData2022->myConverterArray[3];
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if (U_SUCCESS(*err)){
|
||||
|
|
|
@ -327,13 +327,22 @@ parseConverterOptions(const char *inName,
|
|||
} else {
|
||||
/* ignore any other options until we define some */
|
||||
for(;;) {
|
||||
c=*inName;
|
||||
if(c!=0) {
|
||||
++inName;
|
||||
if(c==UCNV_OPTION_SEP_CHAR) {
|
||||
break;
|
||||
if(uprv_strncmp(inName, "version=", 8)==0) {
|
||||
/*copy the version option value*/
|
||||
inName+=8;
|
||||
c=*inName;
|
||||
if(c!=0){
|
||||
++inName;
|
||||
if(c!=UCNV_OPTION_SEP_CHAR){
|
||||
*pFlags = c;
|
||||
}
|
||||
else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
}
|
||||
else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -357,7 +366,7 @@ UConverter *
|
|||
UConverter *myUConverter = NULL;
|
||||
UConverterSharedData *mySharedConverterData = NULL;
|
||||
UErrorCode internalErrorCode = U_ZERO_ERROR;
|
||||
|
||||
uint32_t version=0;
|
||||
if (U_FAILURE (*err))
|
||||
return NULL;
|
||||
|
||||
|
@ -373,7 +382,7 @@ UConverter *
|
|||
/* the default converter name is already canonical */
|
||||
} else {
|
||||
/* separate the converter name from the options */
|
||||
parseConverterOptions(converterName, cnvName, locale, NULL);
|
||||
parseConverterOptions(converterName, cnvName, locale,&version);
|
||||
|
||||
/* get the canonical converter name */
|
||||
realName = ucnv_io_getConverterName(cnvName, &internalErrorCode);
|
||||
|
@ -388,7 +397,7 @@ UConverter *
|
|||
|
||||
/* separate the converter name from the options */
|
||||
if(realName != cnvName) {
|
||||
parseConverterOptions(realName, cnvName, locale, NULL);
|
||||
parseConverterOptions(realName, cnvName, locale,&version);
|
||||
realName = cnvName;
|
||||
}
|
||||
|
||||
|
@ -446,7 +455,7 @@ UConverter *
|
|||
uprv_memcpy (myUConverter->subChar, myUConverter->sharedData->staticData->subChar, myUConverter->subCharLen);
|
||||
|
||||
if(myUConverter != NULL && myUConverter->sharedData->impl->open != NULL) {
|
||||
myUConverter->sharedData->impl->open(myUConverter, realName, locale, err);
|
||||
myUConverter->sharedData->impl->open(myUConverter, realName, locale,&version, err);
|
||||
if(U_FAILURE(*err)) {
|
||||
ucnv_close(myUConverter);
|
||||
return NULL;
|
||||
|
|
|
@ -137,7 +137,7 @@ U_CDECL_BEGIN
|
|||
typedef void (*UConverterLoad) (UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode);
|
||||
typedef void (*UConverterUnload) (UConverterSharedData *sharedData);
|
||||
|
||||
typedef void (*UConverterOpen) (UConverter *cnv, const char *name, const char *locale, UErrorCode *pErrorCode);
|
||||
typedef void (*UConverterOpen) (UConverter *cnv, const char *name, const char *locale,uint32_t* version, UErrorCode *pErrorCode);
|
||||
typedef void (*UConverterClose) (UConverter *cnv);
|
||||
|
||||
typedef void (*UConverterReset) (UConverter *cnv);
|
||||
|
|
|
@ -532,8 +532,8 @@ optimization group. So, we put the common stuff into a worker function,
|
|||
and set up another macro to stamp out the 12 open functions:*/
|
||||
#define DEFINE_LMBCS_OPEN(n) \
|
||||
static void \
|
||||
_LMBCSOpen##n(UConverter* _this,const char* name,const char* locale,UErrorCode* err) \
|
||||
{ _LMBCSOpenWorker(_this, name,locale, err, n);}
|
||||
_LMBCSOpen##n(UConverter* _this,const char* name,const char* locale,uint32_t* version,UErrorCode* err) \
|
||||
{ _LMBCSOpenWorker(_this, name,locale,version, err, n);}
|
||||
|
||||
|
||||
|
||||
|
@ -558,7 +558,8 @@ _LMBCSGetNextUChar(UConverterToUnicodeArgs *args,
|
|||
static void
|
||||
_LMBCSOpenWorker(UConverter* _this,
|
||||
const char* name,
|
||||
const char* locale,
|
||||
const char* locale,
|
||||
uint32_t* version,
|
||||
UErrorCode* err,
|
||||
ulmbcs_byte_t OptGroup
|
||||
)
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
}
|
||||
|
||||
/*********** HZ Converter Protos ***********/
|
||||
static void _HZOpen(UConverter *cnv, const char *name, const char *locale, UErrorCode *errorCode);
|
||||
static void _HZOpen(UConverter *cnv, const char *name, const char *locale, uint32_t *version,UErrorCode *errorCode);
|
||||
static void _HZClose(UConverter *converter);
|
||||
static void _HZReset(UConverter *converter);
|
||||
|
||||
|
@ -108,7 +108,7 @@ typedef struct{
|
|||
|
||||
|
||||
|
||||
static void _HZOpen(UConverter *cnv, const char *name,const char* locale, UErrorCode *errorCode){
|
||||
static void _HZOpen(UConverter *cnv, const char *name,const char *locale,uint32_t *version, UErrorCode *errorCode){
|
||||
cnv->toUnicodeStatus = 0;
|
||||
cnv->fromUnicodeStatus= 0;
|
||||
cnv->mode=0;
|
||||
|
|
|
@ -224,6 +224,7 @@ U_CFUNC void
|
|||
_MBCSOpen(UConverter *cnv,
|
||||
const char *name,
|
||||
const char *locale,
|
||||
uint32_t *version,
|
||||
UErrorCode *pErrorCode) {
|
||||
_MBCSReset(cnv);
|
||||
if(uprv_strstr(name, "gb18030")!=NULL || uprv_strstr(name, "GB18030")!=NULL) {
|
||||
|
|
Loading…
Add table
Reference in a new issue