mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-11296 based on patch from Jungshik, approved option name UCONFIG_ONLY_HTML_CONVERSION, turn off UTF-32, simplify changes, fix warnings
X-SVN-Rev: 37045
This commit is contained in:
parent
71035aa827
commit
8e6898ae3d
17 changed files with 155 additions and 51 deletions
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000-2014, International Business Machines
|
||||
* Copyright (C) 2000-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv2022.cpp
|
||||
|
@ -75,8 +75,10 @@
|
|||
*/
|
||||
#endif
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
static const char SHIFT_IN_STR[] = "\x0F";
|
||||
// static const char SHIFT_OUT_STR[] = "\x0E";
|
||||
#endif
|
||||
|
||||
#define CR 0x0D
|
||||
#define LF 0x0A
|
||||
|
@ -152,7 +154,11 @@ typedef enum {
|
|||
} StateEnum;
|
||||
|
||||
/* is the StateEnum charset value for a DBCS charset? */
|
||||
#if UCONFIG_ONLY_HTML_CONVERSION
|
||||
#define IS_JP_DBCS(cs) (JISX208==(cs))
|
||||
#else
|
||||
#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
|
||||
#endif
|
||||
|
||||
#define CSM(cs) ((uint16_t)1<<(cs))
|
||||
|
||||
|
@ -165,13 +171,19 @@ typedef enum {
|
|||
* all versions, not just JIS7 and JIS8.
|
||||
* - ICU does not distinguish between different versions of JIS X 0208.
|
||||
*/
|
||||
#if UCONFIG_ONLY_HTML_CONVERSION
|
||||
enum { MAX_JA_VERSION=0 };
|
||||
#else
|
||||
enum { MAX_JA_VERSION=4 };
|
||||
#endif
|
||||
static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
|
||||
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
|
||||
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
|
||||
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
|
||||
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
|
@ -358,15 +370,16 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
|
|||
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
|
||||
};
|
||||
|
||||
|
||||
/* Type def for refactoring changeState_2022 code*/
|
||||
typedef enum{
|
||||
#ifdef U_ENABLE_GENERIC_ISO_2022
|
||||
ISO_2022=0,
|
||||
#endif
|
||||
ISO_2022_JP=1,
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
ISO_2022_KR=2,
|
||||
ISO_2022_CN=3
|
||||
#endif
|
||||
} Variant2022;
|
||||
|
||||
/*********** ISO 2022 Converter Protos ***********/
|
||||
|
@ -397,8 +410,11 @@ namespace {
|
|||
|
||||
/*const UConverterSharedData _ISO2022Data;*/
|
||||
extern const UConverterSharedData _ISO2022JPData;
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
extern const UConverterSharedData _ISO2022KRData;
|
||||
extern const UConverterSharedData _ISO2022CNData;
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
|
@ -511,6 +527,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
|
|||
myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
|
||||
myConverterData->name[len+1]='\0';
|
||||
}
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
|
||||
(myLocale[2]=='_' || myLocale[2]=='\0'))
|
||||
{
|
||||
|
@ -580,6 +597,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
|
|||
(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
|
||||
}
|
||||
}
|
||||
#endif // !UCONFIG_ONLY_HTML_CONVERSION
|
||||
else{
|
||||
#ifdef U_ENABLE_GENERIC_ISO_2022
|
||||
myConverterData->isFirstBuffer = TRUE;
|
||||
|
@ -714,6 +732,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
|
|||
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
|
||||
};
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
/*************** to unicode *******************/
|
||||
static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
|
||||
/* 0 1 2 3 4 5 6 7 8 9 */
|
||||
|
@ -726,6 +745,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
|
|||
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
|
||||
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
static UCNV_TableStates_2022
|
||||
|
@ -898,6 +918,7 @@ DONE:
|
|||
}
|
||||
}
|
||||
break;
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
case ISO_2022_CN:
|
||||
{
|
||||
StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
|
||||
|
@ -959,6 +980,7 @@ DONE:
|
|||
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
|
||||
}
|
||||
break;
|
||||
#endif // !UCONFIG_ONLY_HTML_CONVERSION
|
||||
|
||||
default:
|
||||
*err = U_ILLEGAL_ESCAPE_SEQUENCE;
|
||||
|
@ -1001,6 +1023,7 @@ DONE:
|
|||
}
|
||||
}
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
/*Checks the characters of the buffer against valid 2022 escape sequences
|
||||
*if the match we return a pointer to the initial start of the sequence otherwise
|
||||
*we return sourceLimit
|
||||
|
@ -1055,7 +1078,7 @@ getEndOfBuffer_2022(const char** source,
|
|||
return mySource;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
|
||||
* any future change in _MBCSFromUChar32() function should be reflected here.
|
||||
|
@ -2269,6 +2292,7 @@ endloop:
|
|||
}
|
||||
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
/***************************************************************
|
||||
* Rules for ISO-2022-KR encoding
|
||||
* i) The KSC5601 designator sequence should appear only once in a file,
|
||||
|
@ -3412,6 +3436,7 @@ endloop:
|
|||
args->target = myTarget;
|
||||
args->source = mySource;
|
||||
}
|
||||
#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
|
||||
|
||||
static void
|
||||
_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
|
||||
|
@ -3638,6 +3663,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
|||
sa->addRange(sa->set, HWKANA_START, HWKANA_END);
|
||||
}
|
||||
break;
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
case 'c':
|
||||
case 'z':
|
||||
/* include ASCII for CN */
|
||||
|
@ -3649,6 +3675,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
|||
cnvData->currentConverter, sa, which, pErrorCode);
|
||||
/* the loop over myConverterArray[] will simply not find another converter */
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -3669,9 +3696,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
|||
for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
|
||||
UConverterSetFilter filter;
|
||||
if(cnvData->myConverterArray[i]!=NULL) {
|
||||
if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
|
||||
cnvData->version==0 && i==CNS_11643
|
||||
) {
|
||||
if(cnvData->locale[0]=='j' && i==JISX208) {
|
||||
/*
|
||||
* Only add code points that map to Shift-JIS codes
|
||||
* corresponding to JIS X 0208.
|
||||
*/
|
||||
filter=UCNV_SET_FILTER_SJIS;
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
} else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
|
||||
cnvData->version==0 && i==CNS_11643) {
|
||||
/*
|
||||
* Version-specific for CN:
|
||||
* CN version 0 does not map CNS planes 3..7 although
|
||||
|
@ -3680,18 +3713,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
|||
* The two versions create different Unicode sets.
|
||||
*/
|
||||
filter=UCNV_SET_FILTER_2022_CN;
|
||||
} else if(cnvData->locale[0]=='j' && i==JISX208) {
|
||||
/*
|
||||
* Only add code points that map to Shift-JIS codes
|
||||
* corresponding to JIS X 0208.
|
||||
*/
|
||||
filter=UCNV_SET_FILTER_SJIS;
|
||||
} else if(i==KSC5601) {
|
||||
/*
|
||||
* Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
|
||||
* are broader than GR94.
|
||||
*/
|
||||
filter=UCNV_SET_FILTER_GR94DBCS;
|
||||
#endif
|
||||
} else {
|
||||
filter=UCNV_SET_FILTER_NONE;
|
||||
}
|
||||
|
@ -3829,6 +3857,7 @@ const UConverterSharedData _ISO2022JPData={
|
|||
|
||||
} // namespace
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
/************* KR ***************/
|
||||
static const UConverterImpl _ISO2022KRImpl={
|
||||
UCNV_ISO_2022,
|
||||
|
@ -3945,5 +3974,6 @@ const UConverterSharedData _ISO2022CNData={
|
|||
};
|
||||
|
||||
} // namespace
|
||||
#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
/*
|
||||
********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1996-2014, International Business Machines Corporation and
|
||||
* Copyright (c) 1996-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************
|
||||
*
|
||||
* uconv_bld.cpp:
|
||||
* ucnv_bld.cpp:
|
||||
*
|
||||
* Defines functions that are used in the creation/initialization/deletion
|
||||
* of converters and related structures.
|
||||
|
@ -64,33 +64,51 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
|
|||
#endif
|
||||
|
||||
&_Latin1Data,
|
||||
&_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData,
|
||||
&_UTF8Data, &_UTF16BEData, &_UTF16LEData,
|
||||
#if UCONFIG_ONLY_HTML_CONVERSION
|
||||
NULL, NULL,
|
||||
#else
|
||||
&_UTF32BEData, &_UTF32LEData,
|
||||
#endif
|
||||
NULL,
|
||||
|
||||
#if UCONFIG_NO_LEGACY_CONVERSION
|
||||
NULL,
|
||||
#else
|
||||
&_ISO2022Data,
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
|
||||
NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL,
|
||||
#else
|
||||
&_ISO2022Data,
|
||||
&_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
|
||||
&_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19,
|
||||
&_HZData,
|
||||
#endif
|
||||
|
||||
#if UCONFIG_ONLY_HTML_CONVERSION
|
||||
NULL,
|
||||
#else
|
||||
&_SCSUData,
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_LEGACY_CONVERSION
|
||||
|
||||
#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
|
||||
NULL,
|
||||
#else
|
||||
&_ISCIIData,
|
||||
#endif
|
||||
|
||||
&_ASCIIData,
|
||||
#if UCONFIG_ONLY_HTML_CONVERSION
|
||||
NULL, NULL, &_UTF16Data, NULL, NULL, NULL,
|
||||
#else
|
||||
&_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,
|
||||
#endif
|
||||
|
||||
#if UCONFIG_NO_LEGACY_CONVERSION
|
||||
#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
|
||||
NULL,
|
||||
#else
|
||||
&_CompoundTextData
|
||||
|
@ -105,18 +123,24 @@ static struct {
|
|||
const char *name;
|
||||
const UConverterType type;
|
||||
} const cnvNameType[] = {
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
{ "bocu1", UCNV_BOCU1 },
|
||||
{ "cesu8", UCNV_CESU8 },
|
||||
#if !UCONFIG_NO_LEGACY_CONVERSION
|
||||
#endif
|
||||
#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
|
||||
{ "hz",UCNV_HZ },
|
||||
#endif
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
{ "imapmailboxname", UCNV_IMAP_MAILBOX },
|
||||
#if !UCONFIG_NO_LEGACY_CONVERSION
|
||||
#endif
|
||||
#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
|
||||
{ "iscii", UCNV_ISCII },
|
||||
#endif
|
||||
#if !UCONFIG_NO_LEGACY_CONVERSION
|
||||
{ "iso2022", UCNV_ISO_2022 },
|
||||
#endif
|
||||
{ "iso88591", UCNV_LATIN_1 },
|
||||
#if !UCONFIG_NO_LEGACY_CONVERSION
|
||||
#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
|
||||
{ "lmbcs1", UCNV_LMBCS_1 },
|
||||
{ "lmbcs11",UCNV_LMBCS_11 },
|
||||
{ "lmbcs16",UCNV_LMBCS_16 },
|
||||
|
@ -130,7 +154,9 @@ static struct {
|
|||
{ "lmbcs6", UCNV_LMBCS_6 },
|
||||
{ "lmbcs8", UCNV_LMBCS_8 },
|
||||
#endif
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
{ "scsu", UCNV_SCSU },
|
||||
#endif
|
||||
{ "usascii", UCNV_US_ASCII },
|
||||
{ "utf16", UCNV_UTF16 },
|
||||
{ "utf16be", UCNV_UTF16_BigEndian },
|
||||
|
@ -142,6 +168,7 @@ static struct {
|
|||
{ "utf16oppositeendian", UCNV_UTF16_BigEndian},
|
||||
{ "utf16platformendian", UCNV_UTF16_LittleEndian },
|
||||
#endif
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
{ "utf32", UCNV_UTF32 },
|
||||
{ "utf32be", UCNV_UTF32_BigEndian },
|
||||
{ "utf32le", UCNV_UTF32_LittleEndian },
|
||||
|
@ -152,9 +179,14 @@ static struct {
|
|||
{ "utf32oppositeendian", UCNV_UTF32_BigEndian },
|
||||
{ "utf32platformendian", UCNV_UTF32_LittleEndian },
|
||||
#endif
|
||||
#endif
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
{ "utf7", UCNV_UTF7 },
|
||||
#endif
|
||||
{ "utf8", UCNV_UTF8 },
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
{ "x11compoundtext", UCNV_COMPOUND_TEXT}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2010-2014, International Business Machines
|
||||
* Copyright (C) 2010-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_ct.c
|
||||
|
@ -14,7 +14,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/uset.h"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000-2014, International Business Machines
|
||||
* Copyright (C) 2000-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_lmb.cpp
|
||||
|
@ -25,7 +25,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
|
||||
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/ucnv.h"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2011, International Business Machines
|
||||
* Copyright (C) 2002-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_u32.c
|
||||
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/utf.h"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2011, International Business Machines
|
||||
* Copyright (C) 2002-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_u7.c
|
||||
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "ucnv_bld.h"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2012, International Business Machines
|
||||
* Copyright (C) 2002-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnv_u8.c
|
||||
|
@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = {
|
|||
static const uint32_t
|
||||
utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
|
||||
|
||||
static UBool hasCESU8Data(const UConverter *cnv)
|
||||
{
|
||||
#if UCONFIG_ONLY_HTML_CONVERSION
|
||||
return FALSE;
|
||||
#else
|
||||
return (UBool)(cnv->sharedData == &_CESU8Data);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
|
||||
UErrorCode * err)
|
||||
{
|
||||
|
@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
|
|||
const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
|
||||
const UChar *targetLimit = args->targetLimit;
|
||||
unsigned char *toUBytes = cnv->toUBytes;
|
||||
UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
|
||||
UBool isCESU8 = hasCESU8Data(cnv);
|
||||
uint32_t ch, ch2 = 0;
|
||||
int32_t i, inBytes;
|
||||
|
||||
|
||||
/* Restore size of current sequence */
|
||||
if (cnv->toUnicodeStatus && myTarget < targetLimit)
|
||||
{
|
||||
|
@ -226,7 +235,7 @@ static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
|
|||
const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
|
||||
const UChar *targetLimit = args->targetLimit;
|
||||
unsigned char *toUBytes = cnv->toUBytes;
|
||||
UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
|
||||
UBool isCESU8 = hasCESU8Data(cnv);
|
||||
uint32_t ch, ch2 = 0;
|
||||
int32_t i, inBytes;
|
||||
|
||||
|
@ -357,7 +366,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
|
|||
UChar32 ch;
|
||||
uint8_t tempBuf[4];
|
||||
int32_t indexToWrite;
|
||||
UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
|
||||
UBool isNotCESU8 = !hasCESU8Data(cnv);
|
||||
|
||||
if (cnv->fromUChar32 && myTarget < targetLimit)
|
||||
{
|
||||
|
@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * ar
|
|||
int32_t offsetNum, nextSourceIndex;
|
||||
int32_t indexToWrite;
|
||||
uint8_t tempBuf[4];
|
||||
UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
|
||||
UBool isNotCESU8 = !hasCESU8Data(cnv);
|
||||
|
||||
if (cnv->fromUChar32 && myTarget < targetLimit)
|
||||
{
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2011, International Business Machines
|
||||
* Copyright (C) 2002-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000-2014, International Business Machines
|
||||
* Copyright (C) 2000-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnvhz.c
|
||||
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
|
||||
|
||||
#include "cmemory.h"
|
||||
#include "unicode/ucnv.h"
|
||||
|
@ -635,4 +635,4 @@ const UConverterSharedData _HZData={
|
|||
0
|
||||
};
|
||||
|
||||
#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
|
||||
#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2000-2012, International Business Machines
|
||||
* Copyright (C) 2000-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: ucnvisci.c
|
||||
|
@ -17,7 +17,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2000-2011, International Business Machines
|
||||
* Copyright (C) 2000-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
|
@ -21,7 +21,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucnv_cb.h"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2014, International Business Machines
|
||||
* Copyright (C) 2002-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: uconfig.h
|
||||
|
@ -200,7 +200,7 @@
|
|||
* It does not turn off legacy conversion because that is necessary
|
||||
* for ICU to work on EBCDIC platforms (for the default converter).
|
||||
* If you want "only collation" and do not build for EBCDIC,
|
||||
* then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well.
|
||||
* then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
|
||||
*
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
@ -269,6 +269,21 @@
|
|||
# define UCONFIG_NO_LEGACY_CONVERSION 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_ONLY_HTML_CONVERSION
|
||||
* This switch turns off all of the converters NOT listed in
|
||||
* the HTML encoding standard:
|
||||
* http://www.w3.org/TR/encoding/#names-and-labels
|
||||
*
|
||||
* This is not possible on EBCDIC platforms
|
||||
* because they need ibm-37 or ibm-1047 default converters.
|
||||
*
|
||||
* @draft ICU 55
|
||||
*/
|
||||
#ifndef UCONFIG_ONLY_HTML_CONVERSION
|
||||
# define UCONFIG_ONLY_HTML_CONVERSION 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_LEGACY_CONVERSION
|
||||
* This switch turns off all converters except for
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2005-2013, International Business Machines
|
||||
* Copyright (C) 2005-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
|
|||
new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
|
||||
|
||||
new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
|
||||
new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
|
||||
|
||||
|
@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
|
|||
new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
|
||||
new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
|
||||
new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
|
||||
#endif
|
||||
};
|
||||
int32_t rCount = ARRAY_SIZE(tempArray);
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2005-2012, International Business Machines
|
||||
* Copyright (C) 2005-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = {
|
|||
{0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7
|
||||
};
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
static const uint8_t escapeSequences_2022KR[][5] = {
|
||||
{0x1b, 0x24, 0x29, 0x43, 0x00}
|
||||
};
|
||||
|
@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = {
|
|||
{0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2
|
||||
{0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3
|
||||
};
|
||||
#endif
|
||||
|
||||
CharsetRecog_2022JP::~CharsetRecog_2022JP() {}
|
||||
|
||||
|
@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const
|
|||
return (confidence > 0);
|
||||
}
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
CharsetRecog_2022KR::~CharsetRecog_2022KR() {}
|
||||
|
||||
const char *CharsetRecog_2022KR::getName() const {
|
||||
|
@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const
|
|||
results->set(textIn, this, confidence);
|
||||
return (confidence > 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
CharsetRecog_2022::~CharsetRecog_2022() {
|
||||
// nothing to do
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2005-2012, International Business Machines
|
||||
* Copyright (C) 2005-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
@ -65,6 +65,7 @@ public:
|
|||
UBool match(InputText *textIn, CharsetMatch *results) const;
|
||||
};
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
class CharsetRecog_2022KR :public CharsetRecog_2022 {
|
||||
public:
|
||||
virtual ~CharsetRecog_2022KR();
|
||||
|
@ -84,6 +85,7 @@ public:
|
|||
|
||||
UBool match(InputText *textIn, CharsetMatch *results) const;
|
||||
};
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2005-2013, International Business Machines
|
||||
* Copyright (C) 2005-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det)
|
|||
return (int32_t) (rawPercent * 300.0);
|
||||
}
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
static const uint8_t unshapeMap_IBM420[] = {
|
||||
/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
|
||||
/* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
|
||||
|
@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det)
|
|||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
CharsetRecog_sbcs::CharsetRecog_sbcs()
|
||||
{
|
||||
|
@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = {
|
|||
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
|
||||
};
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
static const int32_t ngrams_IBM424_he_rtl[] = {
|
||||
0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
|
||||
0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
|
||||
|
@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= {
|
|||
/* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
|
||||
/* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
|
||||
};
|
||||
#endif
|
||||
|
||||
//ISO-8859-1,2,5,6,7,8,9 Ngrams
|
||||
|
||||
|
@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
|
|||
return (confidence > 0);
|
||||
}
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
|
||||
{
|
||||
// nothing to do
|
||||
|
@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results
|
|||
results->set(textIn, this, confidence);
|
||||
return (confidence > 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2005-2013, International Business Machines
|
||||
* Copyright (C) 2005-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
@ -50,6 +50,7 @@ public:
|
|||
|
||||
};
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
class NGramParser_IBM420 : public NGramParser
|
||||
{
|
||||
private:
|
||||
|
@ -61,6 +62,7 @@ private:
|
|||
public:
|
||||
NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
class CharsetRecog_sbcs : public CharsetRecognizer
|
||||
|
@ -229,6 +231,7 @@ public:
|
|||
virtual UBool match(InputText *det, CharsetMatch *results) const;
|
||||
};
|
||||
|
||||
#if !UCONFIG_ONLY_HTML_CONVERSION
|
||||
class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
|
||||
{
|
||||
public:
|
||||
|
@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
|
|||
|
||||
virtual UBool match(InputText *det, CharsetMatch *results) const;
|
||||
};
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue