ICU-1439 implement ucnv_getUnicodeSet() for roundtrippable code points

X-SVN-Rev: 11464
2025-04-14 17:24:01 +00:00 · 2003-04-05 01:33:02 +00:00 · 2003-04-05 01:33:02 +00:00 · a6213ee1c0
commit a6213ee1c0
parent 45065374f1
15 changed files with 418 additions and 29 deletions
--- a/icu4c/source/common/ucnv.c
+++ b/icu4c/source/common/ucnv.c
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 1998-2001, International Business Machines
+*   Copyright (C) 1998-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -24,6 +24,7 @@
 #include "unicode/ures.h"
 #include "unicode/ucnv.h"
 #include "unicode/ucnv_err.h"
+#include "unicode/uset.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "umutex.h"
@ -669,6 +670,34 @@ ucnv_getPlatform (const UConverter * converter,
    return (UConverterPlatform)converter->sharedData->staticData->platform;
 }

+/** ### TODO @draft ICU 2.6 */
+U_CAPI void U_EXPORT2
+ucnv_getUnicodeSet(const UConverter *cnv,
+                   USet *set,
+                   UConverterUnicodeSet which,
+                   UErrorCode *pErrorCode) {
+    /* argument checking */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return;
+    }
+    if(cnv==NULL || set==NULL || which<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=which) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    /* does this converter support this function? */
+    if(cnv->sharedData->impl->getUnicodeSet==NULL) {
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return;
+    }
+
+    /* empty the set */
+    uset_clear(set);
+
+    /* call the converter to add the code points it supports */
+    cnv->sharedData->impl->getUnicodeSet(cnv, set, which, pErrorCode);
+}
+
 U_CAPI void U_EXPORT2
    ucnv_getToUCallBack (const UConverter * converter,
                         UConverterToUCallback *action,
--- a/icu4c/source/common/ucnv_cnv.c
+++ b/icu4c/source/common/ucnv_cnv.c
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2000-2001, International Business Machines
+*   Copyright (C) 2000-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -17,8 +17,9 @@

 #include "unicode/utypes.h"
 #include "unicode/ucnv_err.h"
-#include "ucnv_cnv.h"
 #include "unicode/ucnv.h"
+#include "unicode/uset.h"
+#include "ucnv_cnv.h"
 #include "cmemory.h"

 /*Empties the internal unicode output buffer */
@ -239,3 +240,20 @@ ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    return 0xffff;
 }
+
+U_CFUNC void
+ucnv_getCompleteUnicodeSet(const UConverter *cnv,
+                   USet *set,
+                   UConverterUnicodeSet which,
+                   UErrorCode *pErrorCode) {
+    uset_addRange(set, 0, 0x10ffff);
+}
+
+U_CFUNC void
+ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
+                               USet *set,
+                               UConverterUnicodeSet which,
+                               UErrorCode *pErrorCode) {
+    uset_addRange(set, 0, 0xd7ff);
+    uset_addRange(set, 0xe000, 0x10ffff);
+}
--- a/icu4c/source/common/ucnv_cnv.h
+++ b/icu4c/source/common/ucnv_cnv.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1999-2002, International Business Machines
+*   Copyright (C) 1999-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *
@ -19,6 +19,7 @@
 #define UCNV_CNV_H

 #include "unicode/utypes.h"
+#include "unicode/ucnv.h"
 #include "unicode/ucnv_err.h"
 #include "ucnv_bld.h"
 #include "ucnvmbcs.h"
@ -97,6 +98,12 @@ typedef UConverter * (*UConverterSafeClone) (const UConverter   *cnv,
                                             int32_t            *pBufferSize, 
                                             UErrorCode         *status);

+/** ### TODO @draft ICU 2.6 */
+typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv,
+                                         USet *set,
+                                         UConverterUnicodeSet which,
+                                         UErrorCode *pErrorCode);
+
 UBool CONVERSION_U_SUCCESS (UErrorCode err);

 void ucnv_flushInternalUnicodeBuffer (UConverter * _this,
@ -149,6 +156,7 @@ struct UConverterImpl {
    UConverterGetName getName;
    UConverterWriteSub writeSub;
    UConverterSafeClone safeClone;
+    UConverterGetUnicodeSet getUnicodeSet; /* ### TODO ICU 2.6 */
 };

 extern const UConverterSharedData
@ -231,4 +239,16 @@ ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
                             UBool collectPairs,
                             UErrorCode *pErrorCode);

+U_CFUNC void
+ucnv_getCompleteUnicodeSet(const UConverter *cnv,
+                   USet *set,
+                   UConverterUnicodeSet which,
+                   UErrorCode *pErrorCode);
+
+U_CFUNC void
+ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
+                               USet *set,
+                               UConverterUnicodeSet which,
+                               UErrorCode *pErrorCode);
+
 #endif /* UCNV_CNV */
--- a/icu4c/source/common/ucnv_lmb.c
+++ b/icu4c/source/common/ucnv_lmb.c
@ -519,7 +519,11 @@ static const UConverterImpl _LMBCSImpl##n={\
    _LMBCSFromUnicode,\
    _LMBCSFromUnicode,\
    _LMBCSGetNextUChar,\
-    NULL\
+    NULL,\
+    NULL,\
+    NULL,\
+    NULL,\
+    ucnv_getCompleteUnicodeSet\
 };\
 static const UConverterStaticData _LMBCSStaticData##n={\
  sizeof(UConverterStaticData),\
--- a/icu4c/source/common/ucnv_u16.c
+++ b/icu4c/source/common/ucnv_u16.c
@ -439,7 +439,10 @@ static const UConverterImpl _UTF16BEImpl={
    T_UConverter_getNextUChar_UTF16_BE,

    NULL,
-    NULL
+    NULL,
+    NULL,
+    NULL,
+    ucnv_getCompleteUnicodeSet
 };

 /* The 1200 CCSID refers to any version of Unicode with any endianess of UTF-16 */
@ -532,7 +535,10 @@ static const UConverterImpl _UTF16LEImpl={
    T_UConverter_getNextUChar_UTF16_LE,

    NULL,
-    NULL
+    NULL,
+    NULL,
+    NULL,
+    ucnv_getCompleteUnicodeSet
 };


@ -761,7 +767,8 @@ static const UConverterImpl _UTF16Impl = {
    NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
    NULL,
    NULL,
-    NULL
+    NULL,
+    ucnv_getCompleteUnicodeSet
 };

 static const UConverterStaticData _UTF16StaticData = {
--- a/icu4c/source/common/ucnv_u32.c
+++ b/icu4c/source/common/ucnv_u32.c
@ -551,7 +551,10 @@ static const UConverterImpl _UTF32BEImpl = {
    T_UConverter_getNextUChar_UTF32_BE,

    NULL,
-    NULL
+    NULL,
+    NULL,
+    NULL,
+    ucnv_getCompleteUnicodeSet
 };

 /* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
@ -1035,7 +1038,10 @@ static const UConverterImpl _UTF32LEImpl = {
    T_UConverter_getNextUChar_UTF32_LE,

    NULL,
-    NULL
+    NULL,
+    NULL,
+    NULL,
+    ucnv_getCompleteUnicodeSet
 };

 /* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
@ -1292,7 +1298,8 @@ static const UConverterImpl _UTF32Impl = {
    NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
    NULL,
    NULL,
-    NULL
+    NULL,
+    ucnv_getCompleteUnicodeSet
 };

 static const UConverterStaticData _UTF32StaticData = {
--- a/icu4c/source/common/ucnv_u7.c
+++ b/icu4c/source/common/ucnv_u7.c
@ -792,7 +792,9 @@ static const UConverterImpl _UTF7Impl={

    NULL,
    _UTF7GetName,
-    NULL /* we don't need writeSub() because we never call a callback at fromUnicode() */
+    NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
+    NULL,
+    ucnv_getCompleteUnicodeSet
 };

 static const UConverterStaticData _UTF7StaticData={
@ -1527,7 +1529,9 @@ static const UConverterImpl _IMAPImpl={

    NULL,
    NULL,
-    NULL /* we don't need writeSub() because we never call a callback at fromUnicode() */
+    NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
+    NULL,
+    ucnv_getCompleteUnicodeSet
 };

 static const UConverterStaticData _IMAPStaticData={
--- a/icu4c/source/common/ucnv_u8.c
+++ b/icu4c/source/common/ucnv_u8.c
@ -975,7 +975,8 @@ static const UConverterImpl _UTF8Impl={
    NULL,
    NULL,
    NULL,
-    NULL
+    NULL,
+    ucnv_getNonSurrogateUnicodeSet
 };

 /* The 1208 CCSID refers to any version of Unicode of UTF-8 */
--- a/icu4c/source/common/ucnvbocu.c
+++ b/icu4c/source/common/ucnvbocu.c
@ -1597,7 +1597,8 @@ static const UConverterImpl _Bocu1Impl={
    NULL,
    NULL,
    NULL,
-    NULL
+    NULL,
+    ucnv_getCompleteUnicodeSet
 };

 static const UConverterStaticData _Bocu1StaticData={
--- a/icu4c/source/common/ucnvhz.c
+++ b/icu4c/source/common/ucnvhz.c
@ -20,10 +20,11 @@

 #include "cmemory.h"
 #include "unicode/ucnv_err.h"
-#include "ucnv_bld.h"
 #include "unicode/ucnv.h"
-#include "ucnv_cnv.h"
 #include "unicode/ucnv_cb.h"
+#include "unicode/uset.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"

 #define UCNV_TILDE 0x7E          /* ~ */
 #define UCNV_OPEN_BRACE 0x7B     /* { */
@ -635,7 +636,20 @@ _HZ_SafeClone(const UConverter *cnv,
    return &localClone->cnv;
 }

+static void
+_HZ_GetUnicodeSet(const UConverter *cnv,
+                  USet *set,
+                  UConverterUnicodeSet which,
+                  UErrorCode *pErrorCode) {
+    /* the tilde '~' is hardcoded in the converter */
+    uset_add(set, 0x7e);

+    /* add all of the code points that the sub-converter handles */
+    ((UConverterDataHZ*)cnv->extraInfo)->
+        gbConverter->sharedData->impl->
+            getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
+                          set, which, pErrorCode);
+}

 static const UConverterImpl _HZImpl={

@ -657,7 +671,8 @@ static const UConverterImpl _HZImpl={
    NULL,
    NULL,
    _HZ_WriteSub,
-    _HZ_SafeClone
+    _HZ_SafeClone,
+    _HZ_GetUnicodeSet
 };

 static const UConverterStaticData _HZStaticData={
--- a/icu4c/source/common/ucnvlat1.c
+++ b/icu4c/source/common/ucnvlat1.c
@ -15,6 +15,7 @@
 #include "unicode/utypes.h"
 #include "unicode/ucnv.h"
 #include "unicode/ucnv_err.h"
+#include "unicode/uset.h"
 #include "ucnv_bld.h"
 #include "ucnv_cnv.h"

@ -425,6 +426,14 @@ getTrail:
    pArgs->offsets=offsets;
 }

+static void
+_Latin1GetUnicodeSet(const UConverter *cnv,
+                     USet *set,
+                     UConverterUnicodeSet which,
+                     UErrorCode *pErrorCode) {
+    uset_addRange(set, 0, 0xff);
+}
+
 static const UConverterImpl _Latin1Impl={
    UCNV_LATIN_1,

@ -442,7 +451,10 @@ static const UConverterImpl _Latin1Impl={
    _Latin1GetNextUChar,

    NULL,
-    NULL
+    NULL,
+    NULL,
+    NULL,
+    _Latin1GetUnicodeSet
 };

 static const UConverterStaticData _Latin1StaticData={
@ -713,6 +725,14 @@ _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
    return 0xffff;
 }

+static void
+_ASCIIGetUnicodeSet(const UConverter *cnv,
+                    USet *set,
+                    UConverterUnicodeSet which,
+                    UErrorCode *pErrorCode) {
+    uset_addRange(set, 0, 0x7f);
+}
+
 static const UConverterImpl _ASCIIImpl={
    UCNV_US_ASCII,

@ -730,7 +750,10 @@ static const UConverterImpl _ASCIIImpl={
    _ASCIIGetNextUChar,

    NULL,
-    NULL
+    NULL,
+    NULL,
+    NULL,
+    _ASCIIGetUnicodeSet
 };

 static const UConverterStaticData _ASCIIStaticData={
--- a/icu4c/source/common/ucnvmbcs.c
+++ b/icu4c/source/common/ucnvmbcs.c
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2000-2001, International Business Machines
+*   Copyright (C) 2000-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -43,6 +43,7 @@
 #include "unicode/ucnv.h"
 #include "unicode/ucnv_cb.h"
 #include "unicode/udata.h"
+#include "unicode/uset.h"
 #include "ucnv_bld.h"
 #include "ucnvmbcs.h"
 #include "ucnv_cnv.h"
@ -398,7 +399,7 @@ _MBCSSizeofFromUBytes(UConverterMBCSTable *mbcsTable) {

        for(st1=0; st1<maxStage1; ++st1) {
            st2=table[st1];
-            if(st2!=0) {
+            if(st2>maxStage1) {
                stage2=table+st2;
                for(st2=0; st2<64; ++st2) {
                    st3=stage2[st2];
@ -419,7 +420,7 @@ _MBCSSizeofFromUBytes(UConverterMBCSTable *mbcsTable) {

        for(st1=0; st1<maxStage1; ++st1) {
            st2=table[st1];
-            if(st2!=0) {
+            if(st2>(maxStage1>>1)) {
                stage2=(const uint32_t *)table+st2;
                for(st2=0; st2<64; ++st2) {
                    st3=stage2[st2]&0xffff;
@ -452,6 +453,105 @@ _MBCSSizeofFromUBytes(UConverterMBCSTable *mbcsTable) {
    }
 }

+static void
+_MBCSGetUnicodeSet(const UConverter *cnv,
+                   USet *set,
+                   UConverterUnicodeSet which,
+                   UErrorCode *pErrorCode) {
+    UConverterMBCSTable *mbcsTable;
+    const uint16_t *table;
+
+    uint32_t st3;
+    uint16_t st1, maxStage1, st2;
+
+    UChar32 c;
+
+    if(cnv->options&_MBCS_OPTION_GB18030) {
+        uset_addRange(set, 0, 0xd7ff);
+        uset_addRange(set, 0xe000, 0x10ffff);
+        return;
+    }
+
+    /* enumerate the from-Unicode trie table */
+    mbcsTable=&cnv->sharedData->table->mbcs;
+    table=mbcsTable->fromUnicodeTable;
+    if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
+        maxStage1=0x440;
+    } else {
+        maxStage1=0x40;
+    }
+
+    c=0; /* keep track of the current code point while enumerating */
+
+    if(mbcsTable->outputType==MBCS_OUTPUT_1) {
+        const uint16_t *stage2, *stage3, *results;
+
+        results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
+
+        for(st1=0; st1<maxStage1; ++st1) {
+            st2=table[st1];
+            if(st2>maxStage1) {
+                stage2=table+st2;
+                for(st2=0; st2<64; ++st2) {
+                    if((st3=stage2[st2])!=0) {
+                        /* read the stage 3 block */
+                        stage3=results+st3;
+
+                        /*
+                         * Add code points for which the roundtrip flag is set.
+                         * Once we get a set for fallback mappings, we have to use
+                         * a threshold variable with a value of 0x800.
+                         * See _MBCSSingleFromBMPWithOffsets() and
+                         * MBCS_SINGLE_RESULT_FROM_U() for details.
+                         */
+                        do {
+                            if(*stage3++>=0xf00) {
+                                uset_add(set, c);
+                            }
+                        } while((++c&0xf)!=0);
+                    } else {
+                        c+=16; /* empty stage 3 block */
+                    }
+                }
+            } else {
+                c+=1024; /* empty stage 2 block */
+            }
+        }
+    } else {
+        const uint32_t *stage2;
+
+        for(st1=0; st1<maxStage1; ++st1) {
+            st2=table[st1];
+            if(st2>(maxStage1>>1)) {
+                stage2=(const uint32_t *)table+st2;
+                for(st2=0; st2<64; ++st2) {
+                    if((st3=stage2[st2])!=0) {
+                        /* get the roundtrip flags for the stage 3 block */
+                        st3>>=16;
+
+                        /*
+                         * Add code points for which the roundtrip flag is set.
+                         * Once we get a set for fallback mappings, we have to check
+                         * non-roundtrip stage 3 results for whether they are 0.
+                         * See _MBCSFromUnicodeWithOffsets() for details.
+                         */
+                        do {
+                            if(st3&1) {
+                                uset_add(set, c);
+                            }
+                            st3>>=1;
+                        } while((++c&0xf)!=0);
+                    } else {
+                        c+=16; /* empty stage 3 block */
+                    }
+                }
+            } else {
+                c+=1024; /* empty stage 2 block */
+            }
+        }
+    }
+}
+
 /* EBCDIC swap LF<->NL ------------------------------------------------------ */

 /*
@ -3561,7 +3661,9 @@ static const UConverterImpl _MBCSImpl={

    _MBCSGetStarters,
    _MBCSGetName,
-    _MBCSWriteSub
+    _MBCSWriteSub,
+    NULL,
+    _MBCSGetUnicodeSet
 };


--- a/icu4c/source/common/ucnvscsu.c
+++ b/icu4c/source/common/ucnvscsu.c
@ -2198,7 +2198,8 @@ static const UConverterImpl _SCSUImpl={
    NULL,
    _SCSUGetName,
    _SCSUWriteSub,
-    _SCSUSafeClone
+    _SCSUSafeClone,
+    ucnv_getCompleteUnicodeSet
 };

 static const UConverterStaticData _SCSUStaticData={
--- a/icu4c/source/common/unicode/ucnv.h
+++ b/icu4c/source/common/unicode/ucnv.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1999-2002, International Business Machines
+*   Copyright (C) 1999-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *  ucnv.h:
@ -51,6 +51,14 @@ typedef struct UConverter UConverter;
 #include "unicode/ucnv_err.h"
 #include "unicode/uenum.h"

+#ifndef __USET_H__
+
+/* see unicode/uset.h */
+struct USet;
+typedef struct USet USet;
+
+#endif
+
 U_CDECL_BEGIN

 /** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
@ -679,6 +687,19 @@ ucnv_getStarters(const UConverter* converter,
                 UBool starters[256],
                 UErrorCode* err);

+/** ### TODO @draft ICU 2.6 */
+typedef enum UConverterUnicodeSet {
+    UCNV_ROUNDTRIP_SET,
+    UCNV_SET_COUNT
+} UConverterUnicodeSet;
+
+/** ### TODO @draft ICU 2.6 */
+U_CAPI void U_EXPORT2
+ucnv_getUnicodeSet(const UConverter *cnv,
+                   USet *set,
+                   UConverterUnicodeSet which,
+                   UErrorCode *pErrorCode);
+
 /**
 * Gets the current calback function used by the converter when an illegal
 *  or invalid codepage sequence is found. 
--- a/icu4c/source/test/cintltst/ncnvtst.c
+++ b/icu4c/source/test/cintltst/ncnvtst.c
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT: 
- * Copyright (c) 1997-2001, International Business Machines Corporation and
+ * Copyright (c) 1997-2003, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/
 /********************************************************************************
@ -16,9 +16,10 @@
 #include "cmemory.h"
 #include "unicode/uloc.h"
 #include "unicode/ucnv.h"
-#include "cintltst.h"
 #include "unicode/utypes.h"
 #include "unicode/ustring.h"
+#include "unicode/uset.h"
+#include "cintltst.h"

 #define MAX_LENGTH 999

@ -31,7 +32,7 @@ static int32_t  gOutBufferSize = 0;
 static char     gNuConvTestName[1024];

 #define nct_min(x,y)  ((x<y) ? x : y)
-#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

 static void printSeq(const unsigned char* a, int len);
 static void printSeqErr(const unsigned char* a, int len);
@ -67,6 +68,7 @@ static void TestAvailableConverters(void);
 static void TestFlushInternalBuffer(void);  /*for improved code coverage in ucnv_cnv.c*/
 static void TestResetBehaviour(void);
 static void TestTruncated(void);
+static void TestUnicodeSet(void);

 static void TestWithBufferSize(int32_t osize, int32_t isize);

@ -120,6 +122,7 @@ void addExtraTests(TestNode** root)
     addTest(root, &TestRegressionUTF8,             "tsconv/ncnvtst/TestRegressionUTF8");
     addTest(root, &TestRegressionUTF32,            "tsconv/ncnvtst/TestRegressionUTF32");
     addTest(root, &TestTruncated,                  "tsconv/ncnvtst/TestTruncated");
+     addTest(root, &TestUnicodeSet,                 "tsconv/ncnvtst/TestUnicodeSet");
 }

 /*test surrogate behaviour*/
@ -1810,3 +1813,136 @@ TestTruncated() {
        doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
    }
 }
+
+typedef struct NameRange {
+    const char *name;
+    UChar32 start, end, start2, end2, notStart, notEnd;
+} NameRange;
+
+static void
+TestUnicodeSet() {
+    UErrorCode errorCode;
+    UConverter *cnv;
+    USet *set;
+    const char *name;
+    int32_t i, count;
+
+    static const char *const completeSetNames[]={
+        "UTF-7",
+        "UTF-8",
+        "UTF-16",
+        "UTF-16BE",
+        "UTF-16LE",
+        "UTF-32",
+        "UTF-32BE",
+        "UTF-32LE",
+        "SCSU",
+        "BOCU-1",
+        "CESU-8",
+        "gb18030",
+        "IMAP-mailbox-name",
+        "LMBCS-1",
+        "LMBCS-2",
+        "LMBCS-3",
+        "LMBCS-4",
+        "LMBCS-5",
+        "LMBCS-6",
+        "LMBCS-8",
+        "LMBCS-11",
+        "LMBCS-16",
+        "LMBCS-17",
+        "LMBCS-18",
+        "LMBCS-19"
+    };
+
+    static const NameRange nameRanges[]={
+        { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
+        { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
+        { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
+        { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
+        { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
+        { "HZ", 0x410, 0x44f, 0x4e00, 0x4eff, 0xac00, 0xd7ff },
+        { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
+    };
+
+    /* open an empty set */
+    set=uset_open(1, 0);
+
+    count=ucnv_countAvailable();
+    for(i=0; i<count; ++i) {
+        errorCode=U_ZERO_ERROR;
+        name=ucnv_getAvailableName(i);
+        cnv=ucnv_open(name, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            log_err("error: unable to open converter %s - %s\n",
+                    name, u_errorName(errorCode));
+            continue;
+        }
+
+        uset_clear(set);
+        ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
+                    name, u_errorName(errorCode));
+        } else if(uset_size(set)==0) {
+            log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
+        }
+
+        ucnv_close(cnv);
+    }
+
+    /* test converters that are known to convert all of Unicode (except maybe for surrogates) */
+    for(i=0; i<LENGTHOF(completeSetNames); ++i) {
+        errorCode=U_ZERO_ERROR;
+        name=completeSetNames[i];
+        cnv=ucnv_open(name, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            log_err("error: unable to open converter %s - %s\n",
+                    name, u_errorName(errorCode));
+            continue;
+        }
+
+        uset_clear(set);
+        ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
+                    name, u_errorName(errorCode));
+        } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
+            log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
+        }
+
+        ucnv_close(cnv);
+    }
+
+    /* test specific sets */
+    for(i=0; i<LENGTHOF(nameRanges); ++i) {
+        errorCode=U_ZERO_ERROR;
+        name=nameRanges[i].name;
+        cnv=ucnv_open(name, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            log_data_err("error: unable to open converter %s - %s\n",
+                         name, u_errorName(errorCode));
+            continue;
+        }
+
+        uset_clear(set);
+        ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
+                    name, u_errorName(errorCode));
+        } else if(
+            !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
+            nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2)
+        ) {
+            log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
+        } else if(nameRanges[i].notStart>=0) {
+            /* simulate containsAny() with the C API */
+            uset_complement(set);
+            if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
+                log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
+            }
+        }
+
+        ucnv_close(cnv);
+    }
+}