From 48eda8bd06a5bc2243743a7985758b0b3ac28b81 Mon Sep 17 00:00:00 2001 From: George Rhoten Date: Wed, 9 Apr 2003 00:00:51 +0000 Subject: [PATCH] ICU-1439 implement ucnv_getUnicodeSet() for roundtrippable code points. X-SVN-Rev: 11475 --- icu4c/source/common/ucnv2022.c | 64 ++++++++++++++++++++++++++---- icu4c/source/common/unicode/uset.h | 9 +++++ icu4c/source/common/uset.cpp | 5 +++ 3 files changed, 71 insertions(+), 7 deletions(-) diff --git a/icu4c/source/common/ucnv2022.c b/icu4c/source/common/ucnv2022.c index d04932eed60..25fec6a4d19 100644 --- a/icu4c/source/common/ucnv2022.c +++ b/icu4c/source/common/ucnv2022.c @@ -31,6 +31,7 @@ #if !UCONFIG_NO_LEGACY_CONVERSION #include "unicode/ucnv.h" +#include "unicode/uset.h" #include "unicode/ucnv_err.h" #include "unicode/ucnv_cb.h" #include "ucnv_bld.h" @@ -116,6 +117,12 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, /* Protos */ /***************** ISO-2022 ********************************/ +static void +_ISO_2022_GetUnicodeSet(const UConverter *cnv, + USet *set, + UConverterUnicodeSet which, + UErrorCode *pErrorCode); + static void T_UConverter_toUnicode_ISO_2022(UConverterToUnicodeArgs * args, UErrorCode * err); @@ -369,7 +376,8 @@ static const UConverterImpl _ISO2022Impl={ NULL, _ISO2022getName, _ISO_2022_WriteSub, - _ISO_2022_SafeClone + _ISO_2022_SafeClone, + _ISO_2022_GetUnicodeSet }; static const UConverterStaticData _ISO2022StaticData={ sizeof(UConverterStaticData), @@ -418,7 +426,8 @@ static const UConverterImpl _ISO2022JPImpl={ NULL, _ISO2022getName, _ISO_2022_WriteSub, - _ISO_2022_SafeClone + _ISO_2022_SafeClone, + _ISO_2022_GetUnicodeSet }; static const UConverterStaticData _ISO2022JPStaticData={ sizeof(UConverterStaticData), @@ -467,7 +476,8 @@ static const UConverterImpl _ISO2022KRImpl={ NULL, _ISO2022getName, _ISO_2022_WriteSub, - _ISO_2022_SafeClone + _ISO_2022_SafeClone, + _ISO_2022_GetUnicodeSet }; static const UConverterStaticData _ISO2022KRStaticData={ sizeof(UConverterStaticData), @@ -517,7 +527,8 @@ static const UConverterImpl _ISO2022CNImpl={ NULL, _ISO2022getName, _ISO_2022_WriteSub, - _ISO_2022_SafeClone + _ISO_2022_SafeClone, + _ISO_2022_GetUnicodeSet }; static const UConverterStaticData _ISO2022CNStaticData={ sizeof(UConverterStaticData), @@ -565,7 +576,7 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti myConverterData->isFirstBuffer = TRUE; cnv->fromUnicodeStatus =FALSE; if(locale){ - uprv_strcpy(myLocale,locale); + uprv_strncpy(myLocale, locale, sizeof(myLocale)); myConverterData->isLocaleSpecified = TRUE; } myConverterData->version= 0; @@ -642,8 +653,8 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti uprv_strcpy(myConverterData->locale,"cn"); if ((options & UCNV_OPTIONS_VERSION_MASK)==1){ - myConverterData->version = 1; - uprv_strcpy(myConverterData->name,"ISO_2022,locale=cn,version=1"); + myConverterData->version = 1; + uprv_strcpy(myConverterData->name,"ISO_2022,locale=cn,version=1"); }else{ uprv_strcpy(myConverterData->name,"ISO_2022,locale=cn,version=0"); myConverterData->version = 0; @@ -3406,4 +3417,43 @@ _ISO_2022_SafeClone( return &localClone->cnv; } +static void +_ISO_2022_GetUnicodeSet(const UConverter *cnv, + USet *set, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) +{ + int32_t i; + USet *cnvSet; + UConverterDataISO2022* cnvData; + + if (U_FAILURE(*pErrorCode)) { + return; + } + if (cnv->sharedData == &_ISO2022Data) { + /* We use UTF-8 in this case */ + uset_addRange(set, 0, 0xd7FF); + uset_addRange(set, 0xE000, 0x10FFFF); + return; + } + + cnvData = (UConverterDataISO2022*)cnv->extraInfo; + if (cnv->sharedData == &_ISO2022KRData && cnvData->currentConverter != NULL) { + ucnv_getUnicodeSet(cnvData->currentConverter, set, which, pErrorCode); + return; + } + + cnvSet = uset_open(0, 0); + if (!cnvSet) { + *pErrorCode =U_MEMORY_ALLOCATION_ERROR; + return; + } + + for (i=0; (imyConverterArray[i]; i++) { + ucnv_getUnicodeSet(cnvData->myConverterArray[i], cnvSet, which, pErrorCode); + uset_addAll(set, cnvSet /* pErrorCode */); + } + uset_close(cnvSet); +} + #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/icu4c/source/common/unicode/uset.h b/icu4c/source/common/unicode/uset.h index 2cbbabe37ca..ded37ccd7d3 100644 --- a/icu4c/source/common/unicode/uset.h +++ b/icu4c/source/common/unicode/uset.h @@ -183,6 +183,15 @@ uset_toPattern(const USet* set, U_CAPI void U_EXPORT2 uset_add(USet* set, UChar32 c); +/** + * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * @param s the source string + * @draft ICU 2.4 + */ +U_CAPI void U_EXPORT2 +uset_addAll(USet* set, const USet *additionalSet); + /** * Adds the given range of characters to the given USet. After this call, * uset_contains(set, start, end) will return TRUE. diff --git a/icu4c/source/common/uset.cpp b/icu4c/source/common/uset.cpp index 8efc5313ac6..4b405f966be 100644 --- a/icu4c/source/common/uset.cpp +++ b/icu4c/source/common/uset.cpp @@ -88,6 +88,11 @@ uset_toPattern(const USet* set, return pat.extract(result, resultCapacity, *ec); } +U_CAPI void U_EXPORT2 +uset_addAll(USet* set, const USet *additionalSet) { + ((UnicodeSet*) set)->addAll(*((const UnicodeSet*)additionalSet)); +} + U_CAPI void U_EXPORT2 uset_add(USet* set, UChar32 c) { ((UnicodeSet*) set)->add(c);