ICU-11296 based on patch from Jungshik, approved option name UCONFIG_ONLY_HTML_CONVERSION, turn off UTF-32, simplify changes, fix warnings

X-SVN-Rev: 37045
2025-04-07 22:44:49 +00:00 · 2015-02-20 19:31:33 +00:00 · 2015-02-20 19:31:33 +00:00 · 8e6898ae3d
commit 8e6898ae3d
parent 71035aa827
17 changed files with 155 additions and 51 deletions
--- a/icu4c/source/common/ucnv2022.cpp
+++ b/icu4c/source/common/ucnv2022.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2000-2014, International Business Machines
+*   Copyright (C) 2000-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucnv2022.cpp
@ -75,8 +75,10 @@
 */
 #endif

+#if !UCONFIG_ONLY_HTML_CONVERSION
 static const char SHIFT_IN_STR[]  = "\x0F";
 // static const char SHIFT_OUT_STR[] = "\x0E";
+#endif

 #define CR      0x0D
 #define LF      0x0A
@ -152,7 +154,11 @@ typedef enum  {
 } StateEnum;

 /* is the StateEnum charset value for a DBCS charset? */
+#if UCONFIG_ONLY_HTML_CONVERSION
+#define IS_JP_DBCS(cs) (JISX208==(cs))
+#else
 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
+#endif

 #define CSM(cs) ((uint16_t)1<<(cs))

@ -165,13 +171,19 @@ typedef enum  {
 *   all versions, not just JIS7 and JIS8.
 * - ICU does not distinguish between different versions of JIS X 0208.
 */
+#if UCONFIG_ONLY_HTML_CONVERSION
+enum { MAX_JA_VERSION=0 };
+#else
 enum { MAX_JA_VERSION=4 };
+#endif
 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
    CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
+#if !UCONFIG_ONLY_HTML_CONVERSION
    CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
    CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
+#endif
 };

 typedef enum {
@ -358,15 +370,16 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 };

-
 /* Type def for refactoring changeState_2022 code*/
 typedef enum{
 #ifdef U_ENABLE_GENERIC_ISO_2022
    ISO_2022=0,
 #endif
    ISO_2022_JP=1,
+#if !UCONFIG_ONLY_HTML_CONVERSION
    ISO_2022_KR=2,
    ISO_2022_CN=3
+#endif
 } Variant2022;

 /*********** ISO 2022 Converter Protos ***********/
@ -397,8 +410,11 @@ namespace {

 /*const UConverterSharedData _ISO2022Data;*/
 extern const UConverterSharedData _ISO2022JPData;
+
+#if !UCONFIG_ONLY_HTML_CONVERSION
 extern const UConverterSharedData _ISO2022KRData;
 extern const UConverterSharedData _ISO2022CNData;
+#endif

 }  // namespace

@ -511,6 +527,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
            myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
            myConverterData->name[len+1]='\0';
        }
+#if !UCONFIG_ONLY_HTML_CONVERSION
        else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
            (myLocale[2]=='_' || myLocale[2]=='\0'))
        {
@ -580,6 +597,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
                (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
            }
        }
+#endif  // !UCONFIG_ONLY_HTML_CONVERSION
        else{
 #ifdef U_ENABLE_GENERIC_ISO_2022
            myConverterData->isFirstBuffer = TRUE;
@ -714,6 +732,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 };

+#if !UCONFIG_ONLY_HTML_CONVERSION
 /*************** to unicode *******************/
 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
 /*      0                1               2               3               4               5               6               7               8               9    */
@ -726,6 +745,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 };
+#endif


 static UCNV_TableStates_2022
@ -898,6 +918,7 @@ DONE:
                }
            }
            break;
+#if !UCONFIG_ONLY_HTML_CONVERSION
        case ISO_2022_CN:
            {
                StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
@ -959,6 +980,7 @@ DONE:
                *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
            }
            break;
+#endif  // !UCONFIG_ONLY_HTML_CONVERSION

        default:
            *err = U_ILLEGAL_ESCAPE_SEQUENCE;
@ -1001,6 +1023,7 @@ DONE:
    }
 }

+#if !UCONFIG_ONLY_HTML_CONVERSION
 /*Checks the characters of the buffer against valid 2022 escape sequences
 *if the match we return a pointer to the initial start of the sequence otherwise
 *we return sourceLimit
@ -1055,7 +1078,7 @@ getEndOfBuffer_2022(const char** source,
    return mySource;
 #endif
 }
-
+#endif

 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
 * any future change in _MBCSFromUChar32() function should be reflected here.
@ -2269,6 +2292,7 @@ endloop:
 }


+#if !UCONFIG_ONLY_HTML_CONVERSION
 /***************************************************************
 *   Rules for ISO-2022-KR encoding
 *   i) The KSC5601 designator sequence should appear only once in a file,
@ -3412,6 +3436,7 @@ endloop:
    args->target = myTarget;
    args->source = mySource;
 }
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */

 static void
 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
@ -3638,6 +3663,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
            sa->addRange(sa->set, HWKANA_START, HWKANA_END);
        }
        break;
+#if !UCONFIG_ONLY_HTML_CONVERSION
    case 'c':
    case 'z':
        /* include ASCII for CN */
@ -3649,6 +3675,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
                cnvData->currentConverter, sa, which, pErrorCode);
        /* the loop over myConverterArray[] will simply not find another converter */
        break;
+#endif
    default:
        break;
    }
@ -3669,9 +3696,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
    for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
        UConverterSetFilter filter;
        if(cnvData->myConverterArray[i]!=NULL) {
-            if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
-                cnvData->version==0 && i==CNS_11643
-            ) {
+            if(cnvData->locale[0]=='j' && i==JISX208) {
+                /*
+                 * Only add code points that map to Shift-JIS codes
+                 * corresponding to JIS X 0208.
+                 */
+                filter=UCNV_SET_FILTER_SJIS;
+#if !UCONFIG_ONLY_HTML_CONVERSION
+            } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
+                       cnvData->version==0 && i==CNS_11643) {
                /*
                 * Version-specific for CN:
                 * CN version 0 does not map CNS planes 3..7 although
@ -3680,18 +3713,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
                 * The two versions create different Unicode sets.
                 */
                filter=UCNV_SET_FILTER_2022_CN;
-            } else if(cnvData->locale[0]=='j' && i==JISX208) {
-                /*
-                 * Only add code points that map to Shift-JIS codes
-                 * corresponding to JIS X 0208.
-                 */
-                filter=UCNV_SET_FILTER_SJIS;
            } else if(i==KSC5601) {
                /*
                 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
                 * are broader than GR94.
                 */
                filter=UCNV_SET_FILTER_GR94DBCS;
+#endif
            } else {
                filter=UCNV_SET_FILTER_NONE;
            }
@ -3829,6 +3857,7 @@ const UConverterSharedData _ISO2022JPData={

 }  // namespace

+#if !UCONFIG_ONLY_HTML_CONVERSION
 /************* KR ***************/
 static const UConverterImpl _ISO2022KRImpl={
    UCNV_ISO_2022,
@ -3945,5 +3974,6 @@ const UConverterSharedData _ISO2022CNData={
 };

 }  // namespace
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */

 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
--- a/icu4c/source/common/ucnv_bld.cpp
+++ b/icu4c/source/common/ucnv_bld.cpp
@ -1,11 +1,11 @@
 /*
 ********************************************************************
 * COPYRIGHT:
- * Copyright (c) 1996-2014, International Business Machines Corporation and
+ * Copyright (c) 1996-2015, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************
 *
- *  uconv_bld.cpp:
+ *  ucnv_bld.cpp:
 *
 *  Defines functions that are used in the creation/initialization/deletion
 *  of converters and related structures.
@ -64,33 +64,51 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
 #endif

    &_Latin1Data,
-    &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData,
+    &_UTF8Data, &_UTF16BEData, &_UTF16LEData,
+#if UCONFIG_ONLY_HTML_CONVERSION
+    NULL, NULL,
+#else
+    &_UTF32BEData, &_UTF32LEData,
+#endif
    NULL,

 #if UCONFIG_NO_LEGACY_CONVERSION
    NULL,
+#else
+    &_ISO2022Data,
+#endif
+
+#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
    NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL,
    NULL,
 #else
-    &_ISO2022Data,
    &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
    &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19,
    &_HZData,
 #endif

+#if UCONFIG_ONLY_HTML_CONVERSION
+    NULL,
+#else
    &_SCSUData,
+#endif

-#if UCONFIG_NO_LEGACY_CONVERSION
+
+#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
    NULL,
 #else
    &_ISCIIData,
 #endif

    &_ASCIIData,
+#if UCONFIG_ONLY_HTML_CONVERSION
+    NULL, NULL, &_UTF16Data, NULL, NULL, NULL,
+#else
    &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,
+#endif

-#if UCONFIG_NO_LEGACY_CONVERSION
+#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
    NULL,
 #else
    &_CompoundTextData
@ -105,18 +123,24 @@ static struct {
  const char *name;
  const UConverterType type;
 } const cnvNameType[] = {
+#if !UCONFIG_ONLY_HTML_CONVERSION
  { "bocu1", UCNV_BOCU1 },
  { "cesu8", UCNV_CESU8 },
-#if !UCONFIG_NO_LEGACY_CONVERSION
+#endif
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
  { "hz",UCNV_HZ },
 #endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
  { "imapmailboxname", UCNV_IMAP_MAILBOX },
-#if !UCONFIG_NO_LEGACY_CONVERSION
+#endif
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
  { "iscii", UCNV_ISCII },
+#endif
+#if !UCONFIG_NO_LEGACY_CONVERSION
  { "iso2022", UCNV_ISO_2022 },
 #endif
  { "iso88591", UCNV_LATIN_1 },
-#if !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
  { "lmbcs1", UCNV_LMBCS_1 },
  { "lmbcs11",UCNV_LMBCS_11 },
  { "lmbcs16",UCNV_LMBCS_16 },
@ -130,7 +154,9 @@ static struct {
  { "lmbcs6", UCNV_LMBCS_6 },
  { "lmbcs8", UCNV_LMBCS_8 },
 #endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
  { "scsu", UCNV_SCSU },
+#endif
  { "usascii", UCNV_US_ASCII },
  { "utf16", UCNV_UTF16 },
  { "utf16be", UCNV_UTF16_BigEndian },
@ -142,6 +168,7 @@ static struct {
  { "utf16oppositeendian", UCNV_UTF16_BigEndian},
  { "utf16platformendian", UCNV_UTF16_LittleEndian },
 #endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
  { "utf32", UCNV_UTF32 },
  { "utf32be", UCNV_UTF32_BigEndian },
  { "utf32le", UCNV_UTF32_LittleEndian },
@ -152,9 +179,14 @@ static struct {
  { "utf32oppositeendian", UCNV_UTF32_BigEndian },
  { "utf32platformendian", UCNV_UTF32_LittleEndian },
 #endif
+#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
  { "utf7", UCNV_UTF7 },
+#endif
  { "utf8", UCNV_UTF8 },
+#if !UCONFIG_ONLY_HTML_CONVERSION
  { "x11compoundtext", UCNV_COMPOUND_TEXT}
+#endif
 };


--- a/icu4c/source/common/ucnv_ct.c
+++ b/icu4c/source/common/ucnv_ct.c
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2010-2014, International Business Machines
+*   Copyright (C) 2010-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucnv_ct.c
@ -14,7 +14,7 @@

 #include "unicode/utypes.h"

-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION

 #include "unicode/ucnv.h"
 #include "unicode/uset.h"
--- a/icu4c/source/common/ucnv_lmb.c
+++ b/icu4c/source/common/ucnv_lmb.c
@ -1,6 +1,6 @@
 /*  
 **********************************************************************
-*   Copyright (C) 2000-2014, International Business Machines
+*   Copyright (C) 2000-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucnv_lmb.cpp
@ -25,7 +25,7 @@

 #include "unicode/utypes.h"

-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION

 #include "unicode/ucnv_err.h"
 #include "unicode/ucnv.h"
--- a/icu4c/source/common/ucnv_u32.c
+++ b/icu4c/source/common/ucnv_u32.c
@ -1,6 +1,6 @@
 /*  
 **********************************************************************
-*   Copyright (C) 2002-2011, International Business Machines
+*   Copyright (C) 2002-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucnv_u32.c
@ -16,7 +16,7 @@

 #include "unicode/utypes.h"

-#if !UCONFIG_NO_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION

 #include "unicode/ucnv.h"
 #include "unicode/utf.h"
--- a/icu4c/source/common/ucnv_u7.c
+++ b/icu4c/source/common/ucnv_u7.c
@ -1,6 +1,6 @@
 /*  
 **********************************************************************
-*   Copyright (C) 2002-2011, International Business Machines
+*   Copyright (C) 2002-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucnv_u7.c
@ -16,7 +16,7 @@

 #include "unicode/utypes.h"

-#if !UCONFIG_NO_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION

 #include "unicode/ucnv.h"
 #include "ucnv_bld.h"
--- a/icu4c/source/common/ucnv_u8.c
+++ b/icu4c/source/common/ucnv_u8.c
@ -1,6 +1,6 @@
 /*  
 **********************************************************************
-*   Copyright (C) 2002-2012, International Business Machines
+*   Copyright (C) 2002-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucnv_u8.c
@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = {
 static const uint32_t
 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };

+static UBool hasCESU8Data(const UConverter *cnv)
+{
+#if UCONFIG_ONLY_HTML_CONVERSION
+    return FALSE;
+#else
+    return (UBool)(cnv->sharedData == &_CESU8Data);
+#endif
+}
+
 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
                                  UErrorCode * err)
 {
@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
    const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
    const UChar *targetLimit = args->targetLimit;
    unsigned char *toUBytes = cnv->toUBytes;
-    UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
+    UBool isCESU8 = hasCESU8Data(cnv);
    uint32_t ch, ch2 = 0;
    int32_t i, inBytes;
-  
+
    /* Restore size of current sequence */
    if (cnv->toUnicodeStatus && myTarget < targetLimit)
    {
@ -226,7 +235,7 @@ static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
    const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
    const UChar *targetLimit = args->targetLimit;
    unsigned char *toUBytes = cnv->toUBytes;
-    UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
+    UBool isCESU8 = hasCESU8Data(cnv);
    uint32_t ch, ch2 = 0;
    int32_t i, inBytes;

@ -357,7 +366,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
    UChar32 ch;
    uint8_t tempBuf[4];
    int32_t indexToWrite;
-    UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
+    UBool isNotCESU8 = !hasCESU8Data(cnv);

    if (cnv->fromUChar32 && myTarget < targetLimit)
    {
@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * ar
    int32_t offsetNum, nextSourceIndex;
    int32_t indexToWrite;
    uint8_t tempBuf[4];
-    UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
+    UBool isNotCESU8 = !hasCESU8Data(cnv);

    if (cnv->fromUChar32 && myTarget < targetLimit)
    {
--- a/icu4c/source/common/ucnvbocu.cpp
+++ b/icu4c/source/common/ucnvbocu.cpp
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2002-2011, International Business Machines
+*   Copyright (C) 2002-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -19,7 +19,7 @@

 #include "unicode/utypes.h"

-#if !UCONFIG_NO_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION

 #include "unicode/ucnv.h"
 #include "unicode/ucnv_cb.h"
--- a/icu4c/source/common/ucnvhz.c
+++ b/icu4c/source/common/ucnvhz.c
@ -1,6 +1,6 @@
 /*  
 **********************************************************************
-*   Copyright (C) 2000-2014, International Business Machines
+*   Copyright (C) 2000-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucnvhz.c
@ -16,7 +16,7 @@

 #include "unicode/utypes.h"

-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION

 #include "cmemory.h"
 #include "unicode/ucnv.h"
@ -635,4 +635,4 @@ const UConverterSharedData _HZData={
        0
 };

-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
+#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */
--- a/icu4c/source/common/ucnvisci.c
+++ b/icu4c/source/common/ucnvisci.c
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2000-2012, International Business Machines
+*   Copyright (C) 2000-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucnvisci.c
@ -17,7 +17,7 @@

 #include "unicode/utypes.h"

-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION

 #include "unicode/ucnv.h"
 #include "unicode/ucnv_cb.h"
--- a/icu4c/source/common/ucnvscsu.c
+++ b/icu4c/source/common/ucnvscsu.c
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2000-2011, International Business Machines
+*   Copyright (C) 2000-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -21,7 +21,7 @@

 #include "unicode/utypes.h"

-#if !UCONFIG_NO_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION

 #include "unicode/ucnv.h"
 #include "unicode/ucnv_cb.h"
--- a/icu4c/source/common/unicode/uconfig.h
+++ b/icu4c/source/common/unicode/uconfig.h
@ -1,6 +1,6 @@
 /*  
 **********************************************************************
-*   Copyright (C) 2002-2014, International Business Machines
+*   Copyright (C) 2002-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  uconfig.h
@ -200,7 +200,7 @@
 * It does not turn off legacy conversion because that is necessary
 * for ICU to work on EBCDIC platforms (for the default converter).
 * If you want "only collation" and do not build for EBCDIC,
- * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well.
+ * then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
 *
 * @stable ICU 2.4
 */
@ -269,6 +269,21 @@
 #   define UCONFIG_NO_LEGACY_CONVERSION 1
 #endif

+/**
+ * \def UCONFIG_ONLY_HTML_CONVERSION
+ * This switch turns off all of the converters NOT listed in
+ * the HTML encoding standard:
+ * http://www.w3.org/TR/encoding/#names-and-labels
+ *
+ * This is not possible on EBCDIC platforms
+ * because they need ibm-37 or ibm-1047 default converters.
+ *
+ * @draft ICU 55
+ */
+#ifndef UCONFIG_ONLY_HTML_CONVERSION
+#   define UCONFIG_ONLY_HTML_CONVERSION 0
+#endif
+
 /**
 * \def UCONFIG_NO_LEGACY_CONVERSION
 * This switch turns off all converters except for
--- a/icu4c/source/i18n/csdetect.cpp
+++ b/icu4c/source/i18n/csdetect.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 2005-2013, International Business Machines
+ *   Copyright (C) 2005-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
        new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),

        new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
+#if !UCONFIG_ONLY_HTML_CONVERSION
        new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
        new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),

@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
        new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
+#endif
    };
    int32_t rCount = ARRAY_SIZE(tempArray);

--- a/icu4c/source/i18n/csr2022.cpp
+++ b/icu4c/source/i18n/csr2022.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 2005-2012, International Business Machines
+ *   Copyright (C) 2005-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = {
    {0x1b, 0x2e, 0x46, 0x00, 0x00}    // ISO 8859-7
 };

+#if !UCONFIG_ONLY_HTML_CONVERSION
 static const uint8_t escapeSequences_2022KR[][5] = {
    {0x1b, 0x24, 0x29, 0x43, 0x00}   
 };
@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = {
    {0x1b, 0x4e, 0x00, 0x00, 0x00},   // SS2
    {0x1b, 0x4f, 0x00, 0x00, 0x00},   // SS3
 };
+#endif

 CharsetRecog_2022JP::~CharsetRecog_2022JP() {}

@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const
    return (confidence > 0);
 }

+#if !UCONFIG_ONLY_HTML_CONVERSION
 CharsetRecog_2022KR::~CharsetRecog_2022KR() {}

 const char *CharsetRecog_2022KR::getName() const {
@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const
    results->set(textIn, this, confidence);
    return (confidence > 0);
 }
+#endif

 CharsetRecog_2022::~CharsetRecog_2022() {
    // nothing to do
--- a/icu4c/source/i18n/csr2022.h
+++ b/icu4c/source/i18n/csr2022.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 2005-2012, International Business Machines
+ *   Copyright (C) 2005-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@ -65,6 +65,7 @@ public:
    UBool match(InputText *textIn, CharsetMatch *results) const;
 };

+#if !UCONFIG_ONLY_HTML_CONVERSION
 class CharsetRecog_2022KR :public CharsetRecog_2022 {
 public:
    virtual ~CharsetRecog_2022KR();
@ -84,6 +85,7 @@ public:

    UBool match(InputText *textIn, CharsetMatch *results) const;
 };
+#endif

 U_NAMESPACE_END

--- a/icu4c/source/i18n/csrsbcs.cpp
+++ b/icu4c/source/i18n/csrsbcs.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 2005-2013, International Business Machines
+ *   Copyright (C) 2005-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det)
    return (int32_t) (rawPercent * 300.0);
 }

+#if !UCONFIG_ONLY_HTML_CONVERSION
 static const uint8_t unshapeMap_IBM420[] = {
 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 
@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det)
        }
    }
 }
+#endif

 CharsetRecog_sbcs::CharsetRecog_sbcs()
 {
@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = {
    0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 
 };

+#if !UCONFIG_ONLY_HTML_CONVERSION
 static const int32_t ngrams_IBM424_he_rtl[] = {
    0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, 
    0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, 
@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= {
 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, 
 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, 
 };
+#endif

 //ISO-8859-1,2,5,6,7,8,9 Ngrams

@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
    return (confidence > 0);
 }

+#if !UCONFIG_ONLY_HTML_CONVERSION
 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
 {
    // nothing to do
@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results
    results->set(textIn, this, confidence);
    return (confidence > 0);
 }
+#endif

 U_NAMESPACE_END
 #endif
--- a/icu4c/source/i18n/csrsbcs.h
+++ b/icu4c/source/i18n/csrsbcs.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 2005-2013, International Business Machines
+ *   Copyright (C) 2005-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@ -50,6 +50,7 @@ public:

 };

+#if !UCONFIG_ONLY_HTML_CONVERSION
 class NGramParser_IBM420 : public NGramParser
 {
 private:
@ -61,6 +62,7 @@ private:
 public:
    NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
 };
+#endif


 class CharsetRecog_sbcs : public CharsetRecognizer
@ -229,6 +231,7 @@ public:
    virtual UBool match(InputText *det, CharsetMatch *results) const;
 };

+#if !UCONFIG_ONLY_HTML_CONVERSION
 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
 {
 public:
@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
    
    virtual UBool match(InputText *det, CharsetMatch *results) const;
 };
+#endif

 U_NAMESPACE_END