From 8a3a93deed6f777a30f8bbf860b9c61b92a8309f Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 7 Sep 2004 17:59:53 +0000 Subject: [PATCH] ICU-4078 use USetAdder interface to remove dependencies of low-level code on the USet/UnicodeSet implementation X-SVN-Rev: 16265 --- icu4c/source/common/Makefile.in | 4 +- icu4c/source/common/common.dsp | 12 ++++ icu4c/source/common/common.vcproj | 9 +++ icu4c/source/common/ucase.c | 8 +-- icu4c/source/common/ucase.h | 3 +- icu4c/source/common/uchar.c | 95 ++++++++++++++-------------- icu4c/source/common/ucnv.c | 27 -------- icu4c/source/common/ucnv2022.c | 21 +++--- icu4c/source/common/ucnv_cnv.c | 10 +-- icu4c/source/common/ucnv_cnv.h | 8 ++- icu4c/source/common/ucnv_ext.c | 16 ++--- icu4c/source/common/ucnv_ext.h | 2 +- icu4c/source/common/ucnv_lmb.c | 6 +- icu4c/source/common/ucnv_set.c | 62 ++++++++++++++++++ icu4c/source/common/ucnvhz.c | 6 +- icu4c/source/common/ucnvisci.c | 14 ++-- icu4c/source/common/ucnvlat1.c | 8 +-- icu4c/source/common/ucnvmbcs.c | 28 ++++---- icu4c/source/common/ucnvmbcs.h | 4 +- icu4c/source/common/unames.c | 20 +++--- icu4c/source/common/uniset_props.cpp | 28 +++++++- icu4c/source/common/unorm.cpp | 17 ++--- icu4c/source/common/unormimp.h | 2 +- icu4c/source/common/uprops.c | 74 ++-------------------- icu4c/source/common/uprops.h | 15 +++-- icu4c/source/common/uset_imp.h | 51 +++++++++++++++ 26 files changed, 312 insertions(+), 238 deletions(-) create mode 100644 icu4c/source/common/ucnv_set.c create mode 100644 icu4c/source/common/uset_imp.h diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index a4c14828700..1f1119f0cd6 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -63,11 +63,11 @@ udata.o ucmndata.o udatamem.o udataswp.o umapfile.o ucol_swp.o \ uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \ ucat.o locmap.o uloc.o locid.o \ uhash.o uhash_us.o \ -ucnv.o ucnv_bld.o ucnv_cb.o ucnv_cnv.o ucnv_err.o ucnv_ext.o ucnv_io.o ucnvlat1.o \ +ucnv.o ucnv_set.o ucnv_bld.o ucnv_cb.o ucnv_cnv.o ucnv_err.o ucnv_ext.o ucnv_io.o ucnvlat1.o \ ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \ ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o \ unistr.o unistr_case.o unistr_cnv.o unistr_props.o \ -utf_impl.o ustring.o ustr_cnv.o ustrcase.o cstring.o ustrfmt.o ustrtrns.o \ +utf_impl.o ustring.o ustr_cnv.o ustrcase.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o \ normlzr.o unorm.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \ uchar.o uprops.o ucase.o propname.o ubidi.o ubidiwrt.o ubidiln.o ushape.o unames.o \ ucln_cmn.o uscript.o usc_impl.o uvector.o ustack.o uvectr32.o ucmp8.o \ diff --git a/icu4c/source/common/common.dsp b/icu4c/source/common/common.dsp index cb6ba888cc7..f14a2fced6f 100644 --- a/icu4c/source/common/common.dsp +++ b/icu4c/source/common/common.dsp @@ -1516,6 +1516,10 @@ SOURCE=.\ucnv_lmb.c # End Source File # Begin Source File +SOURCE=.\ucnv_set.c +# End Source File +# Begin Source File + SOURCE=.\ucnv_u16.c # End Source File # Begin Source File @@ -2911,6 +2915,10 @@ InputPath=.\unicode\uset.h # End Source File # Begin Source File +SOURCE=.\uset_imp.h +# End Source File +# Begin Source File + SOURCE=.\uset_props.cpp # End Source File # Begin Source File @@ -3458,6 +3466,10 @@ SOURCE=.\ustr_imp.h # End Source File # Begin Source File +SOURCE=.\ustr_wcs.c +# End Source File +# Begin Source File + SOURCE=.\ustrcase.c # End Source File # Begin Source File diff --git a/icu4c/source/common/common.vcproj b/icu4c/source/common/common.vcproj index 7a8601b9a41..cd119ac1b56 100644 --- a/icu4c/source/common/common.vcproj +++ b/icu4c/source/common/common.vcproj @@ -755,6 +755,9 @@ + + + + + + diff --git a/icu4c/source/common/ucase.c b/icu4c/source/common/ucase.c index 5cb1bd6e888..86b0363345a 100644 --- a/icu4c/source/common/ucase.c +++ b/icu4c/source/common/ucase.c @@ -340,15 +340,15 @@ ucase_swap(const UDataSwapper *ds, static UBool U_CALLCONV _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) { /* add the start code point to the USet */ - uset_add((USet *)context, start); + USetAdder *sa=(USetAdder *)context; + sa->add(sa->set, start); return TRUE; } -/* TODO define/use USetAdder */ U_CAPI void U_EXPORT2 -ucase_addPropertyStarts(const UCaseProps *csp, USet *set, UErrorCode *pErrorCode) { +ucase_addPropertyStarts(const UCaseProps *csp, USetAdder *sa, UErrorCode *pErrorCode) { /* add the start code point of each same-value range of the trie */ - utrie_enum(&csp->trie, NULL, _enumPropertyStartsRange, set); + utrie_enum(&csp->trie, NULL, _enumPropertyStartsRange, sa); /* add code points with hardcoded properties, plus the ones following them */ diff --git a/icu4c/source/common/ucase.h b/icu4c/source/common/ucase.h index 16ad41510ef..c8f7646cd39 100644 --- a/icu4c/source/common/ucase.h +++ b/icu4c/source/common/ucase.h @@ -21,6 +21,7 @@ #include "unicode/utypes.h" #include "unicode/uset.h" +#include "uset_imp.h" #include "udataswp.h" U_CDECL_BEGIN @@ -53,7 +54,7 @@ ucase_swap(const UDataSwapper *ds, UErrorCode *pErrorCode); U_CAPI void U_EXPORT2 -ucase_addPropertyStarts(const UCaseProps *csp, USet *set, UErrorCode *pErrorCode); +ucase_addPropertyStarts(const UCaseProps *csp, USetAdder *sa, UErrorCode *pErrorCode); /** * Bit mask for getting just the options from a string compare options word diff --git a/icu4c/source/common/uchar.c b/icu4c/source/common/uchar.c index 472aa407bd3..6cf85e03afa 100644 --- a/icu4c/source/common/uchar.c +++ b/icu4c/source/common/uchar.c @@ -982,14 +982,15 @@ uprv_getMaxValues(int32_t column) { static UBool U_CALLCONV _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) { /* add the start code point to the USet */ - uset_add((USet *)context, start); + USetAdder *sa=(USetAdder *)context; + sa->add(sa->set, start); return TRUE; } -#define USET_ADD_CP_AND_NEXT(set, cp) uset_add(set, cp); uset_add(set, cp+1) +#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1) U_CAPI void U_EXPORT2 -uchar_addPropertyStarts(USet *set, UErrorCode *pErrorCode) { +uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) { UChar32 c; int32_t value, value2; @@ -999,62 +1000,62 @@ uchar_addPropertyStarts(USet *set, UErrorCode *pErrorCode) { } /* add the start code point of each same-value range of each trie */ - utrie_enum(&propsTrie, NULL, _enumPropertyStartsRange, set); - utrie_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, set); + utrie_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa); + utrie_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); /* add code points with hardcoded properties, plus the ones following them */ /* add for IS_THAT_CONTROL_SPACE() */ - uset_add(set, TAB); /* range TAB..CR */ - uset_add(set, CR+1); - uset_add(set, 0x1c); - uset_add(set, 0x1f+1); - USET_ADD_CP_AND_NEXT(set, NL); + sa->add(sa->set, TAB); /* range TAB..CR */ + sa->add(sa->set, CR+1); + sa->add(sa->set, 0x1c); + sa->add(sa->set, 0x1f+1); + USET_ADD_CP_AND_NEXT(sa, NL); /* add for u_isIDIgnorable() what was not added above */ - uset_add(set, DEL); /* range DEL..NBSP-1, NBSP added below */ - uset_add(set, HAIRSP); - uset_add(set, RLM+1); - uset_add(set, INHSWAP); - uset_add(set, NOMDIG+1); - USET_ADD_CP_AND_NEXT(set, ZWNBSP); + sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ + sa->add(sa->set, HAIRSP); + sa->add(sa->set, RLM+1); + sa->add(sa->set, INHSWAP); + sa->add(sa->set, NOMDIG+1); + USET_ADD_CP_AND_NEXT(sa, ZWNBSP); /* add no-break spaces for u_isWhitespace() what was not added above */ - USET_ADD_CP_AND_NEXT(set, NBSP); - USET_ADD_CP_AND_NEXT(set, FIGURESP); - USET_ADD_CP_AND_NEXT(set, NNBSP); + USET_ADD_CP_AND_NEXT(sa, NBSP); + USET_ADD_CP_AND_NEXT(sa, FIGURESP); + USET_ADD_CP_AND_NEXT(sa, NNBSP); /* add for u_charDigitValue() */ - USET_ADD_CP_AND_NEXT(set, 0x3007); - USET_ADD_CP_AND_NEXT(set, 0x4e00); - USET_ADD_CP_AND_NEXT(set, 0x4e8c); - USET_ADD_CP_AND_NEXT(set, 0x4e09); - USET_ADD_CP_AND_NEXT(set, 0x56db); - USET_ADD_CP_AND_NEXT(set, 0x4e94); - USET_ADD_CP_AND_NEXT(set, 0x516d); - USET_ADD_CP_AND_NEXT(set, 0x4e03); - USET_ADD_CP_AND_NEXT(set, 0x516b); - USET_ADD_CP_AND_NEXT(set, 0x4e5d); + USET_ADD_CP_AND_NEXT(sa, 0x3007); + USET_ADD_CP_AND_NEXT(sa, 0x4e00); + USET_ADD_CP_AND_NEXT(sa, 0x4e8c); + USET_ADD_CP_AND_NEXT(sa, 0x4e09); + USET_ADD_CP_AND_NEXT(sa, 0x56db); + USET_ADD_CP_AND_NEXT(sa, 0x4e94); + USET_ADD_CP_AND_NEXT(sa, 0x516d); + USET_ADD_CP_AND_NEXT(sa, 0x4e03); + USET_ADD_CP_AND_NEXT(sa, 0x516b); + USET_ADD_CP_AND_NEXT(sa, 0x4e5d); /* add for u_digit() */ - uset_add(set, U_a); - uset_add(set, U_z+1); - uset_add(set, U_A); - uset_add(set, U_Z+1); + sa->add(sa->set, U_a); + sa->add(sa->set, U_z+1); + sa->add(sa->set, U_A); + sa->add(sa->set, U_Z+1); /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ - uset_add(set, WJ); /* range WJ..NOMDIG */ - uset_add(set, 0xfff0); - uset_add(set, 0xfffb+1); - uset_add(set, 0xe0000); - uset_add(set, 0xe0fff+1); + sa->add(sa->set, WJ); /* range WJ..NOMDIG */ + sa->add(sa->set, 0xfff0); + sa->add(sa->set, 0xfffb+1); + sa->add(sa->set, 0xe0000); + sa->add(sa->set, 0xe0fff+1); /* add for UCHAR_GRAPHEME_BASE and others */ - USET_ADD_CP_AND_NEXT(set, CGJ); + USET_ADD_CP_AND_NEXT(sa, CGJ); /* add for UCHAR_JOINING_TYPE */ - uset_add(set, ZWNJ); /* range ZWNJ..ZWJ */ - uset_add(set, ZWJ+1); + sa->add(sa->set, ZWNJ); /* range ZWNJ..ZWJ */ + sa->add(sa->set, ZWJ+1); /* * Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE. @@ -1064,33 +1065,33 @@ uchar_addPropertyStarts(USet *set, UErrorCode *pErrorCode) { * at the end of the per-Jamo-block assignments in Unicode 4 or earlier. * (These have not changed since Unicode 2.) */ - uset_add(set, 0x1100); + sa->add(sa->set, 0x1100); value=U_HST_LEADING_JAMO; for(c=0x115a; c<=0x115f; ++c) { value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE); if(value!=value2) { value=value2; - uset_add(set, c); + sa->add(sa->set, c); } } - uset_add(set, 0x1160); + sa->add(sa->set, 0x1160); value=U_HST_VOWEL_JAMO; for(c=0x11a3; c<=0x11a7; ++c) { value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE); if(value!=value2) { value=value2; - uset_add(set, c); + sa->add(sa->set, c); } } - uset_add(set, 0x11a8); + sa->add(sa->set, 0x11a8); value=U_HST_TRAILING_JAMO; for(c=0x11fa; c<=0x11ff; ++c) { value2=u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE); if(value!=value2) { value=value2; - uset_add(set, c); + sa->add(sa->set, c); } } } diff --git a/icu4c/source/common/ucnv.c b/icu4c/source/common/ucnv.c index b82cc19ed3f..51a442c8aef 100644 --- a/icu4c/source/common/ucnv.c +++ b/icu4c/source/common/ucnv.c @@ -626,33 +626,6 @@ ucnv_getPlatform (const UConverter * converter, return (UConverterPlatform)converter->sharedData->staticData->platform; } -U_CAPI void U_EXPORT2 -ucnv_getUnicodeSet(const UConverter *cnv, - USet *setFillIn, - UConverterUnicodeSet whichSet, - UErrorCode *pErrorCode) { - /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } - if(cnv==NULL || setFillIn==NULL || whichSetsharedData->impl->getUnicodeSet==NULL) { - *pErrorCode=U_UNSUPPORTED_ERROR; - return; - } - - /* empty the set */ - uset_clear(setFillIn); - - /* call the converter to add the code points it supports */ - cnv->sharedData->impl->getUnicodeSet(cnv, setFillIn, whichSet, pErrorCode); -} - U_CAPI void U_EXPORT2 ucnv_getToUCallBack (const UConverter * converter, UConverterToUCallback *action, diff --git a/icu4c/source/common/ucnv2022.c b/icu4c/source/common/ucnv2022.c index 41f7a1b310d..dc85f874666 100644 --- a/icu4c/source/common/ucnv2022.c +++ b/icu4c/source/common/ucnv2022.c @@ -2985,7 +2985,7 @@ _ISO_2022_SafeClone( static void _ISO_2022_GetUnicodeSet(const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { @@ -2998,8 +2998,8 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, #ifdef U_ENABLE_GENERIC_ISO_2022 if (cnv->sharedData == &_ISO2022Data) { /* We use UTF-8 in this case */ - uset_addRange(set, 0, 0xd7FF); - uset_addRange(set, 0xE000, 0x10FFFF); + sa->addRange(sa->set, 0, 0xd7FF); + sa->addRange(sa->set, 0xE000, 0x10FFFF); return; } #endif @@ -3011,24 +3011,25 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, case 'j': if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { /* include Latin-1 for some variants of JP */ - uset_addRange(set, 0, 0xff); + sa->addRange(sa->set, 0, 0xff); } else { /* include ASCII for JP */ - uset_addRange(set, 0, 0x7f); + sa->addRange(sa->set, 0, 0x7f); } if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) { /* include half-width Katakana for JP */ - uset_addRange(set, 0xff61, 0xff9f); + sa->addRange(sa->set, 0xff61, 0xff9f); } break; case 'c': case 'z': /* include ASCII for CN */ - uset_addRange(set, 0, 0x7f); + sa->addRange(sa->set, 0, 0x7f); break; case 'k': /* there is only one converter for KR, and it is not in the myConverterArray[] */ - ucnv_getUnicodeSet(cnvData->currentConverter, set, which, pErrorCode); + cnvData->currentConverter->sharedData->impl->getUnicodeSet( + cnvData->currentConverter, sa, which, pErrorCode); return; default: break; @@ -3049,11 +3050,11 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */ _MBCSGetUnicodeSetForBytes( cnvData->myConverterArray[i], - set, UCNV_ROUNDTRIP_SET, + sa, UCNV_ROUNDTRIP_SET, 0, 0x81, 0x82, pErrorCode); } else { - _MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], set, which, pErrorCode); + _MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, pErrorCode); } } } diff --git a/icu4c/source/common/ucnv_cnv.c b/icu4c/source/common/ucnv_cnv.c index c10c3da7ee1..48c2201063b 100644 --- a/icu4c/source/common/ucnv_cnv.c +++ b/icu4c/source/common/ucnv_cnv.c @@ -28,19 +28,19 @@ U_CFUNC void ucnv_getCompleteUnicodeSet(const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { - uset_addRange(set, 0, 0x10ffff); + sa->addRange(sa->set, 0, 0x10ffff); } U_CFUNC void ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { - uset_addRange(set, 0, 0xd7ff); - uset_addRange(set, 0xe000, 0x10ffff); + sa->addRange(sa->set, 0, 0xd7ff); + sa->addRange(sa->set, 0xe000, 0x10ffff); } U_CFUNC void diff --git a/icu4c/source/common/ucnv_cnv.h b/icu4c/source/common/ucnv_cnv.h index 9aef352ba22..e0692a50086 100644 --- a/icu4c/source/common/ucnv_cnv.h +++ b/icu4c/source/common/ucnv_cnv.h @@ -24,6 +24,8 @@ #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" +#include "unicode/uset.h" +#include "uset_imp.h" U_CDECL_BEGIN @@ -169,7 +171,7 @@ typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv, * For more documentation, see ucnv_getUnicodeSet() in ucnv.h. */ typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode); @@ -244,13 +246,13 @@ U_CDECL_END U_CFUNC void ucnv_getCompleteUnicodeSet(const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode); U_CFUNC void ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode); diff --git a/icu4c/source/common/ucnv_ext.c b/icu4c/source/common/ucnv_ext.c index 33ef398a694..7092f4f695c 100644 --- a/icu4c/source/common/ucnv_ext.c +++ b/icu4c/source/common/ucnv_ext.c @@ -932,7 +932,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv, static void ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, const int32_t *cx, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, int32_t minLength, UChar32 c, @@ -958,10 +958,10 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, ) { if(c>=0) { /* add the initial code point */ - uset_add(set, c); + sa->add(sa->set, c); } else { /* add the string so far */ - uset_addString(set, s, length); + sa->addString(sa->set, s, length); } } @@ -974,7 +974,7 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, /* no mapping, do nothing */ } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { ucnv_extGetUnicodeSetString( - sharedData, cx, set, which, minLength, + sharedData, cx, sa, which, minLength, U_SENTINEL, s, length+1, (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), pErrorCode); @@ -982,14 +982,14 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) && UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength ) { - uset_addString(set, s, length+1); + sa->addString(sa->set, s, length+1); } } } U_CFUNC void ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { const int32_t *cx; @@ -1051,7 +1051,7 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, length=0; U16_APPEND_UNSAFE(s, length, c); ucnv_extGetUnicodeSetString( - sharedData, cx, set, which, minLength, + sharedData, cx, sa, which, minLength, c, s, length, (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), pErrorCode); @@ -1059,7 +1059,7 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) && UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength ) { - uset_add(set, c); + sa->add(sa->set, c); } } while((++c&0xf)!=0); } else { diff --git a/icu4c/source/common/ucnv_ext.h b/icu4c/source/common/ucnv_ext.h index 01f15c25adf..6fb43d961c1 100644 --- a/icu4c/source/common/ucnv_ext.h +++ b/icu4c/source/common/ucnv_ext.h @@ -384,7 +384,7 @@ ucnv_extContinueMatchFromU(UConverter *cnv, U_CFUNC void ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode); diff --git a/icu4c/source/common/ucnv_lmb.c b/icu4c/source/common/ucnv_lmb.c index 69efe09526f..dc04f383c41 100644 --- a/icu4c/source/common/ucnv_lmb.c +++ b/icu4c/source/common/ucnv_lmb.c @@ -664,12 +664,12 @@ _LMBCSSafeClone(const UConverter *cnv, static void _LMBCSGetUnicodeSet(const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { /* all but U+F6xx, see LMBCS explanation above (search for F6xx) */ - uset_addRange(set, 0, 0xf5ff); - uset_addRange(set, 0xf700, 0x10ffff); + sa->addRange(sa->set, 0, 0xf5ff); + sa->addRange(sa->set, 0xf700, 0x10ffff); } /* diff --git a/icu4c/source/common/ucnv_set.c b/icu4c/source/common/ucnv_set.c new file mode 100644 index 00000000000..cbfd9632d8c --- /dev/null +++ b/icu4c/source/common/ucnv_set.c @@ -0,0 +1,62 @@ +/* +******************************************************************************* +* +* Copyright (C) 2003-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucnv_set.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004sep07 +* created by: Markus W. Scherer +* +* Conversion API functions using USet (ucnv_getUnicodeSet()) +* moved here from ucnv.c for removing the dependency of other ucnv_ +* implementation functions on the USet implementation. +*/ + +#include "unicode/utypes.h" +#include "unicode/uset.h" +#include "unicode/ucnv.h" +#include "ucnv_bld.h" +#include "uset_imp.h" + +U_CAPI void U_EXPORT2 +ucnv_getUnicodeSet(const UConverter *cnv, + USet *setFillIn, + UConverterUnicodeSet whichSet, + UErrorCode *pErrorCode) { + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return; + } + if(cnv==NULL || setFillIn==NULL || whichSetsharedData->impl->getUnicodeSet==NULL) { + *pErrorCode=U_UNSUPPORTED_ERROR; + return; + } + + { + USetAdder sa={ + NULL, + uset_add, + uset_addRange, + uset_addString + }; + sa.set=setFillIn; + + /* empty the set */ + uset_clear(setFillIn); + + /* call the converter to add the code points it supports */ + cnv->sharedData->impl->getUnicodeSet(cnv, &sa, whichSet, pErrorCode); + } +} diff --git a/icu4c/source/common/ucnvhz.c b/icu4c/source/common/ucnvhz.c index c030f1825cc..28a1ea7fd63 100644 --- a/icu4c/source/common/ucnvhz.c +++ b/icu4c/source/common/ucnvhz.c @@ -510,17 +510,17 @@ _HZ_SafeClone(const UConverter *cnv, static void _HZ_GetUnicodeSet(const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { /* the tilde '~' is hardcoded in the converter */ - uset_add(set, 0x7e); + sa->add(sa->set, 0x7e); /* add all of the code points that the sub-converter handles */ ((UConverterDataHZ*)cnv->extraInfo)-> gbConverter->sharedData->impl-> getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, - set, which, pErrorCode); + sa, which, pErrorCode); } static const UConverterImpl _HZImpl={ diff --git a/icu4c/source/common/ucnvisci.c b/icu4c/source/common/ucnvisci.c index 7eb3994ea01..dd3a0a92b3f 100644 --- a/icu4c/source/common/ucnvisci.c +++ b/icu4c/source/common/ucnvisci.c @@ -1332,7 +1332,7 @@ _ISCII_SafeClone(const UConverter *cnv, static void _ISCIIGetUnicodeSet(const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { @@ -1341,19 +1341,19 @@ _ISCIIGetUnicodeSet(const UConverter *cnv, /* Since all ISCII versions allow switching to other ISCII scripts, we add all roundtrippable characters to this set. */ - uset_addRange(set, 0, ASCII_END); + sa->addRange(sa->set, 0, ASCII_END); for (script = DEVANAGARI; script <= MALAYALAM; script++) { mask = (uint8_t)(lookupInitialData[script][1]); for (idx = 0; idx < DELTA; idx++) { if (validityTable[idx] & mask) { - uset_add(set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN); + sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN); } } } - uset_add(set, DANDA); - uset_add(set, DOUBLE_DANDA); - uset_add(set, ZWNJ); - uset_add(set, ZWJ); + sa->add(sa->set, DANDA); + sa->add(sa->set, DOUBLE_DANDA); + sa->add(sa->set, ZWNJ); + sa->add(sa->set, ZWJ); } static const UConverterImpl _ISCIIImpl={ diff --git a/icu4c/source/common/ucnvlat1.c b/icu4c/source/common/ucnvlat1.c index 7ddf376aa41..21aa78962f0 100644 --- a/icu4c/source/common/ucnvlat1.c +++ b/icu4c/source/common/ucnvlat1.c @@ -332,10 +332,10 @@ noMoreInput: static void _Latin1GetUnicodeSet(const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { - uset_addRange(set, 0, 0xff); + sa->addRange(sa->set, 0, 0xff); } static const UConverterImpl _Latin1Impl={ @@ -534,10 +534,10 @@ _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs, static void _ASCIIGetUnicodeSet(const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { - uset_addRange(set, 0, 0x7f); + sa->addRange(sa->set, 0, 0x7f); } static const UConverterImpl _ASCIIImpl={ diff --git a/icu4c/source/common/ucnvmbcs.c b/icu4c/source/common/ucnvmbcs.c index 6f163fd98c1..7a8ce24d71b 100644 --- a/icu4c/source/common/ucnvmbcs.c +++ b/icu4c/source/common/ucnvmbcs.c @@ -430,7 +430,7 @@ _MBCSSizeofFromUBytes(UConverterMBCSTable *mbcsTable) { static void _getUnicodeSetForBytes(const UConverterSharedData *sharedData, const int32_t (*stateTable)[256], const uint16_t *unicodeCodeUnits, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, uint8_t state, uint32_t offset, int32_t lowByte, int32_t highByte, @@ -442,7 +442,7 @@ _getUnicodeSetForBytes(const UConverterSharedData *sharedData, if(MBCS_ENTRY_IS_TRANSITION(entry)) { _getUnicodeSetForBytes( sharedData, stateTable, unicodeCodeUnits, - set, which, + sa, which, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry), offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), 0, 0xff, @@ -490,7 +490,7 @@ _getUnicodeSetForBytes(const UConverterSharedData *sharedData, } if(c>=0) { - uset_add(set, c); + sa->add(sa->set, c); } offset=rowOffset; } @@ -507,20 +507,20 @@ _getUnicodeSetForBytes(const UConverterSharedData *sharedData, */ U_CFUNC void _MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, uint8_t state, int32_t lowByte, int32_t highByte, UErrorCode *pErrorCode) { _getUnicodeSetForBytes( sharedData, sharedData->mbcs.stateTable, sharedData->mbcs.unicodeCodeUnits, - set, which, + sa, which, state, 0, lowByte, highByte, pErrorCode); } U_CFUNC void _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { const UConverterMBCSTable *mbcsTable; @@ -565,7 +565,7 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, */ do { if(*stage3++>=0xf00) { - uset_add(set, c); + sa->add(sa->set, c); } } while((++c&0xf)!=0); } else { @@ -605,7 +605,7 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, */ do { if((st3&1)!=0 && *stage3>=0x100) { - uset_add(set, c); + sa->add(sa->set, c); } st3>>=1; ++stage3; @@ -638,7 +638,7 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, */ do { if(st3&1) { - uset_add(set, c); + sa->add(sa->set, c); } st3>>=1; } while((++c&0xf)!=0); @@ -652,19 +652,19 @@ _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, } } - ucnv_extGetUnicodeSet(sharedData, set, which, pErrorCode); + ucnv_extGetUnicodeSet(sharedData, sa, which, pErrorCode); } static void _MBCSGetUnicodeSet(const UConverter *cnv, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { if(cnv->options&_MBCS_OPTION_GB18030) { - uset_addRange(set, 0, 0xd7ff); - uset_addRange(set, 0xe000, 0x10ffff); + sa->addRange(sa->set, 0, 0xd7ff); + sa->addRange(sa->set, 0xe000, 0x10ffff); } else { - _MBCSGetUnicodeSetForUnicode(cnv->sharedData, set, which, pErrorCode); + _MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode); } } diff --git a/icu4c/source/common/ucnvmbcs.h b/icu4c/source/common/ucnvmbcs.h index 6eca45406ad..f7100adda3f 100644 --- a/icu4c/source/common/ucnvmbcs.h +++ b/icu4c/source/common/ucnvmbcs.h @@ -373,7 +373,7 @@ _MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, */ U_CFUNC void _MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, uint8_t state, int32_t lowByte, int32_t highByte, UErrorCode *pErrorCode); @@ -388,7 +388,7 @@ _MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData, */ U_CFUNC void _MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, - USet *set, + USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode); diff --git a/icu4c/source/common/unames.c b/icu4c/source/common/unames.c index 01e887f5c9e..0a57587619e 100644 --- a/icu4c/source/common/unames.c +++ b/icu4c/source/common/unames.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 1999-2003, International Business Machines +* Copyright (C) 1999-2004, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -23,6 +23,7 @@ #include "unicode/uchar.h" #include "unicode/udata.h" #include "unicode/uset.h" +#include "uset_imp.h" #include "ustr_imp.h" #include "umutex.h" #include "cmemory.h" @@ -1723,7 +1724,7 @@ uprv_getMaxISOCommentLength() { * @param uset USet to receive characters. Existing contents are deleted. */ static void -charSetToUSet(uint32_t cset[8], USet* uset) { +charSetToUSet(uint32_t cset[8], USetAdder *sa) { UChar us[256]; char cs[256]; @@ -1731,7 +1732,6 @@ charSetToUSet(uint32_t cset[8], USet* uset) { UErrorCode errorCode; errorCode=U_ZERO_ERROR; - uset_clear(uset); if(!calcNameSetsLengths(&errorCode)) { return; @@ -1751,18 +1751,18 @@ charSetToUSet(uint32_t cset[8], USet* uset) { /* add each UChar to the USet */ for(i=0; iadd(sa->set, us[i]); } } } /** * Fills set with characters that are used in Unicode character names. - * @param set USet to receive characters. Existing contents are deleted. + * @param set USet to receive characters. */ U_CAPI void U_EXPORT2 -uprv_getCharNameCharacters(USet* set) { - charSetToUSet(gNameSet, set); +uprv_getCharNameCharacters(USetAdder *sa) { + charSetToUSet(gNameSet, sa); } #if 0 @@ -1772,11 +1772,11 @@ urename.h and uprops.h changed accordingly. */ /** * Fills set with characters that are used in Unicode character names. - * @param set USet to receive characters. Existing contents are deleted. + * @param set USetAdder to receive characters. */ U_CAPI void U_EXPORT2 -uprv_getISOCommentCharacters(USet* set) { - charSetToUSet(gISOCommentSet, set); +uprv_getISOCommentCharacters(USetAdder *sa) { + charSetToUSet(gISOCommentSet, sa); } #endif diff --git a/icu4c/source/common/uniset_props.cpp b/icu4c/source/common/uniset_props.cpp index c99d6c3380f..dd3f755b43e 100644 --- a/icu4c/source/common/uniset_props.cpp +++ b/icu4c/source/common/uniset_props.cpp @@ -22,6 +22,8 @@ #include "unicode/uchar.h" #include "unicode/uscript.h" #include "unicode/symtable.h" +#include "unicode/uset.h" +#include "uset_imp.h" #include "ruleiter.h" #include "cmemory.h" #include "uhash.h" @@ -1466,14 +1468,38 @@ void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars, // Inclusions list //---------------------------------------------------------------- +// USetAdder implementation +// Does not use uset.h to reduce code dependencies +static void U_CALLCONV +_set_add(USet *set, UChar32 c) { + ((UnicodeSet *)set)->add(c); +} + +static void U_CALLCONV +_set_addRange(USet *set, UChar32 start, UChar32 end) { + ((UnicodeSet *)set)->add(start, end); +} + +static void U_CALLCONV +_set_addString(USet *set, const UChar *str, int32_t length) { + ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); +} + const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) { umtx_lock(NULL); UBool f = (INCLUSIONS == NULL); umtx_unlock(NULL); if (f) { UnicodeSet* incl = new UnicodeSet(); + USetAdder sa = { + (USet *)incl, + _set_add, + _set_addRange, + _set_addString + }; + if (incl != NULL) { - uprv_getInclusions((USet*)incl, &status); + uprv_getInclusions(&sa, &status); if (U_SUCCESS(status)) { umtx_lock(NULL); if (INCLUSIONS == NULL) { diff --git a/icu4c/source/common/unorm.cpp b/icu4c/source/common/unorm.cpp index d40ff1e5a1d..0dfb3490629 100644 --- a/icu4c/source/common/unorm.cpp +++ b/icu4c/source/common/unorm.cpp @@ -262,7 +262,8 @@ isAcceptable(void * /* context */, static UBool U_CALLCONV _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*limit*/, uint32_t /*value*/) { /* add the start code point to the USet */ - uset_add((USet *)context, start); + USetAdder *sa=(USetAdder *)context; + sa->add(sa->set, start); return TRUE; } @@ -1153,7 +1154,7 @@ unorm_isNFSkippable(UChar32 c, UNormalizationMode mode) { } U_CAPI void U_EXPORT2 -unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode) { +unorm_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) { UChar c; if(!_haveData(*pErrorCode)) { @@ -1161,18 +1162,18 @@ unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode) { } /* add the start code point of each same-value range of each trie */ - utrie_enum(&normTrie, NULL, _enumPropertyStartsRange, set); - utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set); + utrie_enum(&normTrie, NULL, _enumPropertyStartsRange, sa); + utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, sa); if(formatVersion_2_1) { - utrie_enum(&auxTrie, NULL, _enumPropertyStartsRange, set); + utrie_enum(&auxTrie, NULL, _enumPropertyStartsRange, sa); } /* add Hangul LV syllables and LV+1 because of skippables */ for(c=HANGUL_BASE; cadd(sa->set, c); + sa->add(sa->set, c+1); } - uset_add(set, HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */ + sa->add(sa->set, HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */ } U_CAPI UNormalizationCheckResult U_EXPORT2 diff --git a/icu4c/source/common/unormimp.h b/icu4c/source/common/unormimp.h index 1819686e9af..180fb1ffbe4 100644 --- a/icu4c/source/common/unormimp.h +++ b/icu4c/source/common/unormimp.h @@ -395,7 +395,7 @@ unorm_isNFSkippable(UChar32 c, UNormalizationMode mode); * @internal */ U_CAPI void U_EXPORT2 -unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode); +unorm_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode); /** * Swap unorm.icu. See udataswp.h. diff --git a/icu4c/source/common/uprops.c b/icu4c/source/common/uprops.c index 2391ff1821a..46bdedf1dc7 100644 --- a/icu4c/source/common/uprops.c +++ b/icu4c/source/common/uprops.c @@ -27,10 +27,6 @@ #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) -#ifdef DEBUG -#include -#endif - /** * Get the next non-ignorable ASCII character from a property name * and lowercases it. @@ -584,76 +580,14 @@ strrch(const char* source,uint32_t sourceLen,char find){ #endif U_CAPI void U_EXPORT2 -uprv_getInclusions(USet* set, UErrorCode *pErrorCode) { +uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) { if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } - uset_clear(set); - #if !UCONFIG_NO_NORMALIZATION - unorm_addPropertyStarts(set, pErrorCode); -#endif - uchar_addPropertyStarts(set, pErrorCode); - ucase_addPropertyStarts(uchar_getCaseProps(pErrorCode), set, pErrorCode); - -#ifdef DEBUG - { - UChar* result=NULL; - int32_t resultCapacity=0; - int32_t bufLen = uset_toPattern(set,result,resultCapacity,TRUE,pErrorCode); - char* resultChars = NULL; - if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR){ - uint32_t len = 0, add=0; - char *buf=NULL, *current = NULL; - *pErrorCode = U_ZERO_ERROR; - resultCapacity = bufLen; - result = (UChar*) uprv_malloc(resultCapacity * U_SIZEOF_UCHAR); - bufLen = uset_toPattern(set,result,resultCapacity,TRUE,pErrorCode); - resultChars = (char*) uprv_malloc(len+1); - u_UCharsToChars(result,resultChars,bufLen); - resultChars[bufLen] = 0; - buf = resultChars; - /*printf(resultChars);*/ - while(len < bufLen){ - add = 70-5/* for ", +\n */; - current = buf +len; - if (add < (bufLen-len)) { - uint32_t index = strrch(current,add,'\\'); - if (index > add) { - index = add; - } else { - int32_t num =index-1; - uint32_t seqLen; - while(num>0){ - if(current[num]=='\\'){ - num--; - }else{ - break; - } - } - if ((index-num)%2==0) { - index--; - } - seqLen = (current[index+1]=='u') ? 6 : 2; - if ((add-index) < seqLen) { - add = index + seqLen; - } - } - } - fwrite("\"",1,1,stdout); - if(len+addadd(sa->set, c); + */ +struct USetAdder { + USet *set; + USetAdd *add; + USetAddRange *addRange; + USetAddString *addString; +}; +typedef struct USetAdder USetAdder; + +U_CDECL_END + +#endif