diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index eba1ff513f0..cfde141f3d6 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -63,7 +63,8 @@ normlzr.o unorm.o chariter.o schriter.o uchriter.o uiter.o \ uchar.o uprops.o bidi.o ubidi.o ubidiwrt.o ubidiln.o ushape.o unames.o \ ucln_cmn.o uscript.o umemstrm.o ucmp8.o uvector.o digitlst.o \ brkiter.o brkdict.o ubrk.o dbbi.o dbbi_tbl.o rbbi.o rbbi_tbl.o \ -unicode.o scsu.o convert.o utrie.o uset.o +unicode.o scsu.o convert.o utrie.o uset.o \ +unifilt.o unifunct.o uniset.o upropset.o usetiter.o util.o STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O)) diff --git a/icu4c/source/common/common.dsp b/icu4c/source/common/common.dsp index af0f1f4cd6e..46c51f8244b 100644 --- a/icu4c/source/common/common.dsp +++ b/icu4c/source/common/common.dsp @@ -44,7 +44,7 @@ RSC=rc.exe # PROP Intermediate_Dir "Release" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" -# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /c +# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /c # ADD CPP /nologo /G6 /MD /Za /W3 /GX /Zi /O2 /Ob2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /D "UDATA_STATIC_LIB" /FD /GF /c # ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32 # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 @@ -71,8 +71,8 @@ LINK32=link.exe # PROP Intermediate_Dir "Debug" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" -# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /GZ /c -# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /ZI /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /D "UDATA_STATIC_LIB" /FR /FD /GF /GZ /c +# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /GZ /c +# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /ZI /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /D "UDATA_STATIC_LIB" /FR /FD /GF /GZ /c # ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32 # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32 # ADD BASE RSC /l 0x409 /d "_DEBUG" @@ -98,8 +98,8 @@ LINK32=link.exe # PROP Intermediate_Dir "Release" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" -# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /c -# ADD CPP /nologo /MD /Za /W3 /D"NDEBUG" /D"WIN64" /D"_WINDOWS" /D"_MBCS" /D"_USRDLL" /D"COMMON_EXPORTS" /D"U_COMMON_IMPLEMENTATION" /D"UDATA_STATIC_LIB" /FD /GF /c /O2 /GX /Op /QIA64_fmaopt /D"_IA64_" /Zi /D"WIN64" /D"WIN32" /D"_AFX_NO_DAO_SUPPORT" /Wp64 /Zm600 +# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /c +# ADD CPP /nologo /MD /Za /W3 /GX /Zi /O2 /D "NDEBUG" /D "WIN64" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /D "UDATA_STATIC_LIB" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /GF /QIA64_fmaopt /Wp64 /Zm600 /c # ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win64 # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win64 # ADD BASE RSC /l 0x409 /d "NDEBUG" @@ -108,9 +108,9 @@ BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:IA64 -# ADD LINK32 icudata.lib kernel32.lib user32.lib advapi32.lib shell32.lib /nologo /base:"0x4a800000" /dll /machine:IA64 /out:"..\..\bin\icuuc21.dll" /implib:"..\..\lib\icuuc.lib" /libpath:"..\..\lib" /incremental:no -# SUBTRACT LINK32 /debug +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:IX86 /machine:IA64 +# ADD LINK32 icudata.lib kernel32.lib user32.lib advapi32.lib shell32.lib /nologo /base:"0x4a800000" /dll /machine:IX86 /out:"..\..\bin\icuuc21.dll" /implib:"..\..\lib\icuuc.lib" /libpath:"..\..\lib" /machine:IA64 +# SUBTRACT LINK32 /debug !ELSEIF "$(CFG)" == "common - Win64 Debug" @@ -125,8 +125,8 @@ LINK32=link.exe # PROP Intermediate_Dir "Debug" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" -# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /GZ /c -# ADD CPP /nologo /MDd /Za /W3 /Gm /D"_DEBUG" /D"WIN64" /D"_WINDOWS" /D"_MBCS" /D"_USRDLL" /D"COMMON_EXPORTS" /D"U_COMMON_IMPLEMENTATION" /D"UDATA_STATIC_LIB" /FR /FD /GF /GZ /c /Od /GX /Op /QIA64_fmaopt /D"_IA64_" /Zi /D"WIN64" /D"WIN32" /D"_AFX_NO_DAO_SUPPORT" /Wp64 /Zm600 +# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /FD /GZ /c +# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /Zi /Od /D "_DEBUG" /D "WIN64" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "COMMON_EXPORTS" /D "U_COMMON_IMPLEMENTATION" /D "UDATA_STATIC_LIB" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FR /FD /GF /GZ /QIA64_fmaopt /Wp64 /Zm600 /c # ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win64 # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win64 # ADD BASE RSC /l 0x409 /d "_DEBUG" @@ -135,9 +135,8 @@ BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:IA64 /pdbtype:sept -# ADD LINK32 icudata.lib kernel32.lib user32.lib advapi32.lib shell32.lib /nologo /base:"0x4a800000" /dll /debug /machine:IA64 /out:"..\..\bin\icuuc21d.dll" /implib:"..\..\lib\icuucd.lib" /pdbtype:sept /libpath:"..\..\lib" /incremental:no -# SUBTRACT LINK32 +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:IX86 /pdbtype:sept /machine:IA64 +# ADD LINK32 icudata.lib kernel32.lib user32.lib advapi32.lib shell32.lib /nologo /base:"0x4a800000" /dll /incremental:no /debug /machine:IX86 /out:"..\..\bin\icuuc21d.dll" /implib:"..\..\lib\icuucd.lib" /pdbtype:sept /libpath:"..\..\lib" /machine:IA64 !ENDIF @@ -380,6 +379,18 @@ SOURCE=.\unicode.cpp # End Source File # Begin Source File +SOURCE=.\unifilt.cpp +# End Source File +# Begin Source File + +SOURCE=.\unifunct.cpp +# End Source File +# Begin Source File + +SOURCE=.\uniset.cpp +# End Source File +# Begin Source File + SOURCE=.\unistr.cpp # End Source File # Begin Source File @@ -392,6 +403,10 @@ SOURCE=.\uprops.c # End Source File # Begin Source File +SOURCE=.\upropset.cpp +# End Source File +# Begin Source File + SOURCE=.\uresbund.c # End Source File # Begin Source File @@ -404,7 +419,11 @@ SOURCE=.\uscript.c # End Source File # Begin Source File -SOURCE=.\uset.c +SOURCE=.\uset.cpp +# End Source File +# Begin Source File + +SOURCE=.\usetiter.cpp # End Source File # Begin Source File @@ -432,6 +451,10 @@ SOURCE=.\utf_impl.c # End Source File # Begin Source File +SOURCE=.\util.cpp +# End Source File +# Begin Source File + SOURCE=.\utrie.c # End Source File # Begin Source File @@ -817,6 +840,53 @@ InputPath=.\unicode\normlzr.h # End Source File # Begin Source File +SOURCE=.\unicode\parsepos.h + +!IF "$(CFG)" == "common - Win32 Release" + +# Begin Custom Build +InputPath=.\unicode\parsepos.h + +"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win32 Debug" + +# Begin Custom Build +InputPath=.\unicode\parsepos.h + +"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Release" + +# Begin Custom Build +InputPath=.\unicode\parsepos.h + +"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Debug" + +# Begin Custom Build +InputPath=.\unicode\parsepos.h + +"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + SOURCE=.\unicode\putil.h !IF "$(CFG)" == "common - Win32 Release" @@ -1150,6 +1220,10 @@ InputPath=.\unicode\scsu.h # End Source File # Begin Source File +SOURCE=.\symtable.h +# End Source File +# Begin Source File + SOURCE=.\unicode\ubidi.h !IF "$(CFG)" == "common - Win32 Release" @@ -1872,6 +1946,194 @@ InputPath=.\unicode\unicode.h # End Source File # Begin Source File +SOURCE=.\unicode\unifilt.h + +!IF "$(CFG)" == "common - Win32 Release" + +# Begin Custom Build +InputPath=.\unicode\unifilt.h + +"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win32 Debug" + +# Begin Custom Build +InputPath=.\unicode\unifilt.h + +"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Release" + +# Begin Custom Build +InputPath=.\unicode\unifilt.h + +"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Debug" + +# Begin Custom Build +InputPath=.\unicode\unifilt.h + +"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\unicode\unifunct.h + +!IF "$(CFG)" == "common - Win32 Release" + +# Begin Custom Build +InputPath=.\unicode\unifunct.h + +"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win32 Debug" + +# Begin Custom Build +InputPath=.\unicode\unifunct.h + +"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Release" + +# Begin Custom Build +InputPath=.\unicode\unifunct.h + +"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Debug" + +# Begin Custom Build +InputPath=.\unicode\unifunct.h + +"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\unicode\unimatch.h + +!IF "$(CFG)" == "common - Win32 Release" + +# Begin Custom Build +InputPath=.\unicode\unimatch.h + +"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win32 Debug" + +# Begin Custom Build +InputPath=.\unicode\unimatch.h + +"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Release" + +# Begin Custom Build +InputPath=.\unicode\unimatch.h + +"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Debug" + +# Begin Custom Build +InputPath=.\unicode\unimatch.h + +"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\unicode\uniset.h + +!IF "$(CFG)" == "common - Win32 Release" + +# Begin Custom Build +InputPath=.\unicode\uniset.h + +"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win32 Debug" + +# Begin Custom Build +InputPath=.\unicode\uniset.h + +"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Release" + +# Begin Custom Build +InputPath=.\unicode\uniset.h + +"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Debug" + +# Begin Custom Build +InputPath=.\unicode\uniset.h + +"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + SOURCE=.\unicode\unistr.h !IF "$(CFG)" == "common - Win32 Release" @@ -1974,6 +2236,10 @@ SOURCE=.\uprops.h # End Source File # Begin Source File +SOURCE=.\upropset.h +# End Source File +# Begin Source File + SOURCE=.\unicode\urename.h !IF "$(CFG)" == "common - Win32 Release" @@ -2170,7 +2436,97 @@ InputPath=.\unicode\uscript.h # End Source File # Begin Source File -SOURCE=.\uset.h +SOURCE=.\unicode\uset.h + +!IF "$(CFG)" == "common - Win32 Release" + +# Begin Custom Build +InputPath=.\unicode\uset.h + +"..\..\include\unicode\uset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win32 Debug" + +# Begin Custom Build +InputPath=.\unicode\uset.h + +"..\..\include\unicode\uset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Release" + +# Begin Custom Build +InputPath=.\unicode\uset.h + +"..\..\include\unicode\uset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Debug" + +# Begin Custom Build +InputPath=.\unicode\uset.h + +"..\..\include\unicode\uset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ENDIF + +# End Source File +# Begin Source File + +SOURCE=.\unicode\usetiter.h + +!IF "$(CFG)" == "common - Win32 Release" + +# Begin Custom Build +InputPath=.\unicode\usetiter.h + +"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win32 Debug" + +# Begin Custom Build +InputPath=.\unicode\usetiter.h + +"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Release" + +# Begin Custom Build +InputPath=.\unicode\usetiter.h + +"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ELSEIF "$(CFG)" == "common - Win64 Debug" + +# Begin Custom Build +InputPath=.\unicode\usetiter.h + +"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" + copy $(InputPath) ..\..\include\unicode + +# End Custom Build + +!ENDIF + # End Source File # Begin Source File @@ -2464,6 +2820,10 @@ InputPath=.\unicode\utf8.h # End Source File # Begin Source File +SOURCE=.\util.h +# End Source File +# Begin Source File + SOURCE=.\utrie.h # End Source File # Begin Source File diff --git a/icu4c/source/i18n/symtable.h b/icu4c/source/common/symtable.h similarity index 100% rename from icu4c/source/i18n/symtable.h rename to icu4c/source/common/symtable.h diff --git a/icu4c/source/common/ucln_cmn.c b/icu4c/source/common/ucln_cmn.c index 935a54036f6..3152db863e3 100644 --- a/icu4c/source/common/ucln_cmn.c +++ b/icu4c/source/common/ucln_cmn.c @@ -48,6 +48,7 @@ u_cleanup(void) } } + upropset_cleanup(); unorm_cleanup(); unames_cleanup(); uchar_cleanup(); diff --git a/icu4c/source/common/ucln_cmn.h b/icu4c/source/common/ucln_cmn.h index 9862dd39bf8..8e8543faa0f 100644 --- a/icu4c/source/common/ucln_cmn.h +++ b/icu4c/source/common/ucln_cmn.h @@ -46,4 +46,6 @@ U_CFUNC UBool udata_cleanup(void); U_CFUNC UBool putil_cleanup(void); +U_CFUNC UBool upropset_cleanup(void); + #endif diff --git a/icu4c/source/i18n/unicode/parsepos.h b/icu4c/source/common/unicode/parsepos.h similarity index 99% rename from icu4c/source/i18n/unicode/parsepos.h rename to icu4c/source/common/unicode/parsepos.h index 96149d86f85..1ed210ed07b 100644 --- a/icu4c/source/i18n/unicode/parsepos.h +++ b/icu4c/source/common/unicode/parsepos.h @@ -36,7 +36,7 @@ U_NAMESPACE_BEGIN * @see java.text.Format */ -class U_I18N_API ParsePosition { +class U_COMMON_API ParsePosition { public: /** * Default constructor, the index starts with 0 as default. diff --git a/icu4c/source/i18n/unicode/unifilt.h b/icu4c/source/common/unicode/unifilt.h similarity index 87% rename from icu4c/source/i18n/unicode/unifilt.h rename to icu4c/source/common/unicode/unifilt.h index f7a61949ee9..d9311d5ba61 100644 --- a/icu4c/source/i18n/unicode/unifilt.h +++ b/icu4c/source/common/unicode/unifilt.h @@ -13,6 +13,15 @@ U_NAMESPACE_BEGIN +/** + * U_ETHER is used to represent character values for positions outside + * a range. For example, transliterator uses this to represent + * characters outside the range contextStart..contextLimit-1. This + * allows explicit matching by rules and UnicodeSets of text outside a + * defined range. + */ +#define U_ETHER ((UChar)0xFFFF) + /** * UnicodeFilter defines a protocol for selecting a * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. @@ -38,7 +47,7 @@ U_NAMESPACE_BEGIN * @see UnicodeFilterLogic * @stable */ -class U_I18N_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { +class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { public: /** diff --git a/icu4c/source/i18n/unicode/unifunct.h b/icu4c/source/common/unicode/unifunct.h similarity index 99% rename from icu4c/source/i18n/unicode/unifunct.h rename to icu4c/source/common/unicode/unifunct.h index f58dd1be1a8..e98aeedc9bd 100644 --- a/icu4c/source/i18n/unicode/unifunct.h +++ b/icu4c/source/common/unicode/unifunct.h @@ -23,7 +23,7 @@ class TransliterationRuleData; * that perform match and/or replace operations on Unicode strings. * @author Alan Liu */ -class U_I18N_API UnicodeFunctor { +class U_COMMON_API UnicodeFunctor { public: diff --git a/icu4c/source/i18n/unicode/unimatch.h b/icu4c/source/common/unicode/unimatch.h similarity index 99% rename from icu4c/source/i18n/unicode/unimatch.h rename to icu4c/source/common/unicode/unimatch.h index 644115c39c0..05b82d28d1f 100644 --- a/icu4c/source/i18n/unicode/unimatch.h +++ b/icu4c/source/common/unicode/unimatch.h @@ -53,7 +53,7 @@ enum UMatchDegree { * UnicodeMatcher defines a protocol for objects that can * match a range of characters in a Replaceable string. */ -class U_I18N_API UnicodeMatcher { +class U_COMMON_API UnicodeMatcher { public: diff --git a/icu4c/source/i18n/unicode/uniset.h b/icu4c/source/common/unicode/uniset.h similarity index 95% rename from icu4c/source/i18n/unicode/uniset.h rename to icu4c/source/common/unicode/uniset.h index bafe73aa154..348a7d6eda9 100644 --- a/icu4c/source/i18n/unicode/uniset.h +++ b/icu4c/source/common/unicode/uniset.h @@ -216,7 +216,7 @@ class UVector; * @author Alan Liu * @stable */ -class U_I18N_API UnicodeSet : public UnicodeFilter { +class U_COMMON_API UnicodeSet : public UnicodeFilter { int32_t len; // length of list used; 0 <= len <= capacity int32_t capacity; // capacity of list @@ -821,6 +821,51 @@ public: */ virtual UChar32 getRangeEnd(int32_t index) const; + /** + * Serializes this set into an array of 16-bit integers. The array + * has following format (each line is one 16-bit integer): + * + * length = (n+2*m) | (m!=0?0x8000:0) + * bmpLength = n; present if m!=0 + * bmp[0] + * bmp[1] + * ... + * bmp[n-1] + * supp-high[0] + * supp-low[0] + * supp-high[1] + * supp-low[1] + * ... + * supp-high[m-1] + * supp-low[m-1] + * + * The array starts with a header. After the header are n bmp + * code points, then m supplementary code points. Either n or m + * or both may be zero. n+2*m is always <= 0x7FFF. + * + * If there are no supplementary characters (if m==0) then the + * header is one 16-bit integer, 'length', with value n. + * + * If there are supplementary characters (if m!=0) then the header + * is two 16-bit integers. The first, 'length', has value + * (n+2*m)|0x8000. The second, 'bmpLength', has value n. + * + * After the header the code points are stored in ascending order. + * Supplementary code points are stored as most significant 16 + * bits followed by least significant 16 bits. + * + * @param dest pointer to buffer of destCapacity 16-bit integers. + * May be NULL only if destCapacity is zero. + * @param destCapacity size of dest, or zero. Must not be negative. + * @param ec error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR + * if n+2*m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if + * n+2*m+(m!=0?2:1) > destCapacity. + * @return the total length of the serialized format, including + * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other + * than U_BUFFER_OVERFLOW_ERROR. + */ + int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const; + /** * Reallocate this objects internal structures to take up the least * possible space, without changing this object's value. diff --git a/icu4c/source/common/unicode/uset.h b/icu4c/source/common/unicode/uset.h new file mode 100644 index 00000000000..9a4c68284a7 --- /dev/null +++ b/icu4c/source/common/unicode/uset.h @@ -0,0 +1,231 @@ +/* +******************************************************************************* +* +* Copyright (C) 2002, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uset.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002mar07 +* created by: Markus W. Scherer +* +* C version of UnicodeSet. +*/ + +#ifndef __USET_H__ +#define __USET_H__ + +#include "unicode/utypes.h" + +struct USet; +typedef struct USet USet; + +enum { + USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 /**< enough for any single-code point set */ +}; + +/** + * A serialized form of a Unicode set. Limited manipulations are + * possible directly on a serialized set. + */ +struct USerializedSet { + const uint16_t *array; + int32_t bmpLength, length; + uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]; +}; +typedef struct USerializedSet USerializedSet; + +/** + * Creates a USet object that contains the range of characters + * start..end, inclusive. + * @param start first character of the range, inclusive + * @param end last character of the range, inclusive + * @return a newly created USet. The caller must call uset_close() on + * it when done. + */ +U_CAPI USet * U_EXPORT2 +uset_open(UChar32 start, UChar32 end); + +/** + * Disposes of the storage used by a USet object. This function should + * be called exactly once for objects returned by uset_open(). + * @param set the object to dispose of + */ +U_CAPI void U_EXPORT2 +uset_close(USet *set); + +/** + * Adds the given character to the given USet. After this call, + * uset_contains(set, c) will return TRUE. + * @param set the object to which to add the character + * @param c the character to add + */ +U_CAPI void U_EXPORT2 +uset_add(USet *set, UChar32 c); + +/** + * Removes the given character from the given USet. After this call, + * uset_contains(set, c) will return FALSE. + * @param set the object from which to remove the character + * @param c the character to remove + */ +U_CAPI void U_EXPORT2 +uset_remove(USet *set, UChar32 c); + +/** + * Returns TRUE if the given USet contains no characters and no + * strings. + * @param set the set + * @return true if set is empty + */ +U_CAPI UBool U_EXPORT2 +uset_isEmpty(const USet *set); + +/** + * Returns TRUE if the given USet contains the given character. + * @param set the set + * @return true if set contains c + */ +U_CAPI UBool U_EXPORT2 +uset_contains(const USet *set, UChar32 c); + +/** + * Returns the number of characters and strings contained in the given + * USet. + * @param set the set + * @return a non-negative integer counting the characters and strings + * contained in set + */ +U_CAPI int32_t U_EXPORT2 +uset_size(const USet* set); + +/** + * Returns the number of disjoint ranges of characters contained in + * the given set. Ignores any strings contained in the set. + * @param set the set + * @return a non-negative integer counting the character ranges + * contained in set + */ +U_CAPI int32_t U_EXPORT2 +uset_countRanges(const USet *set); + +/** + * Returns a range of characters contained in the given set. + * @param set the set + * @param rangeIndex a non-negative integer in the range 0.. + * uset_countRanges(set)-1 + * @param pStart pointer to variable to receive first character + * in range, inclusive + * @param pEnd pointer to variable to receive last character in range, + * inclusive + * @return true if rangeIndex is value, otherwise false + */ +U_CAPI UBool U_EXPORT2 +uset_getRange(const USet *set, int32_t rangeIndex, + UChar32 *pStart, UChar32 *pEnd); + +/** + * Serializes this set into an array of 16-bit integers. The array + * has following format (each line is one 16-bit integer): + * + * length = (n+2*m) | (m!=0?0x8000:0) + * bmpLength = n; present if m!=0 + * bmp[0] + * bmp[1] + * ... + * bmp[n-1] + * supp-high[0] + * supp-low[0] + * supp-high[1] + * supp-low[1] + * ... + * supp-high[m-1] + * supp-low[m-1] + * + * The array starts with a header. After the header are n bmp + * code points, then m supplementary code points. Either n or m + * or both may be zero. n+2*m is always <= 0x7FFF. + * + * If there are no supplementary characters (if m==0) then the + * header is one 16-bit integer, 'length', with value n. + * + * If there are supplementary characters (if m!=0) then the header + * is two 16-bit integers. The first, 'length', has value + * (n+2*m)|0x8000. The second, 'bmpLength', has value n. + * + * After the header the code points are stored in ascending order. + * Supplementary code points are stored as most significant 16 + * bits followed by least significant 16 bits. + * + * @param set the set + * @param dest pointer to buffer of destCapacity 16-bit integers. + * May be NULL only if destCapacity is zero. + * @param destCapacity size of dest, or zero. Must not be negative. + * @param pErrorCode pointer to the error code. Will be set to + * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to + * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity. + * @return the total length of the serialized format, including + * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other + * than U_BUFFER_OVERFLOW_ERROR. + */ +U_CAPI int32_t U_EXPORT2 +uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode); + +/** + * Given a serialized array, fill in the given serialized set object. + * @param fillSet pointer to result + * @param src pointer to start of array + * @param srcLength length of array + * @return true if the given array is valid, otherwise false + */ +U_CAPI UBool U_EXPORT2 +uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength); + +/** + * Set the USerializedSet to contain the given character (and nothing + * else). + */ +U_CAPI void U_EXPORT2 +uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c); + +/** + * Returns TRUE if the given USerializedSet contains the given + * character. + * @param set the serialized set + * @return true if set contains c + */ +U_CAPI UBool U_EXPORT2 +uset_serializedContains(const USerializedSet *set, UChar32 c); + +/** + * Returns the number of disjoint ranges of characters contained in + * the given serialized set. Ignores any strings contained in the + * set. + * @param set the serialized set + * @return a non-negative integer counting the character ranges + * contained in set + */ +U_CAPI int32_t U_EXPORT2 +uset_countSerializedRanges(const USerializedSet *set); + +/** + * Returns a range of characters contained in the given serialized + * set. + * @param set the serialized set + * @param rangeIndex a non-negative integer in the range 0.. + * uset_countSerializedRanges(set)-1 + * @param pStart pointer to variable to receive first character + * in range, inclusive + * @param pEnd pointer to variable to receive last character in range, + * inclusive + * @return true if rangeIndex is value, otherwise false + */ +U_CAPI UBool U_EXPORT2 +uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, + UChar32 *pStart, UChar32 *pEnd); + +#endif diff --git a/icu4c/source/i18n/unicode/usetiter.h b/icu4c/source/common/unicode/usetiter.h similarity index 98% rename from icu4c/source/i18n/unicode/usetiter.h rename to icu4c/source/common/unicode/usetiter.h index 0febe5b2ee2..afb04a4ffe8 100644 --- a/icu4c/source/i18n/unicode/usetiter.h +++ b/icu4c/source/common/unicode/usetiter.h @@ -3,8 +3,8 @@ * Copyright (c) 2002, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** -* $Source: /xsrl/Nsvn/icu/icu/source/i18n/unicode/Attic/usetiter.h,v $ -* $Revision: 1.5 $ +* $Source: /xsrl/Nsvn/icu/icu/source/common/unicode/usetiter.h,v $ +* $Revision: 1.1 $ ********************************************************************** */ #ifndef USETITER_H @@ -49,7 +49,7 @@ class UnicodeString; * @author M. Davis * @draft */ -class U_I18N_API UnicodeSetIterator { +class U_COMMON_API UnicodeSetIterator { protected: diff --git a/icu4c/source/i18n/unifilt.cpp b/icu4c/source/common/unifilt.cpp similarity index 98% rename from icu4c/source/i18n/unifilt.cpp rename to icu4c/source/common/unifilt.cpp index 4d0d48e7cc9..e8aeffbc350 100644 --- a/icu4c/source/i18n/unifilt.cpp +++ b/icu4c/source/common/unifilt.cpp @@ -8,7 +8,6 @@ #include "unicode/unifilt.h" #include "unicode/rep.h" -#include "rbt_rule.h" U_NAMESPACE_BEGIN diff --git a/icu4c/source/common/unifunct.cpp b/icu4c/source/common/unifunct.cpp new file mode 100644 index 00000000000..7acd0a43658 --- /dev/null +++ b/icu4c/source/common/unifunct.cpp @@ -0,0 +1,28 @@ +/* +********************************************************************** +* Copyright (c) 2002, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* $Source: /xsrl/Nsvn/icu/icu/source/common/unifunct.cpp,v $ +* $Date: 2002/05/29 18:33:33 $ +* $Revision: 1.1 $ +********************************************************************** +*/ + +#include "unicode/unifunct.h" + +U_NAMESPACE_BEGIN + +const char UnicodeFunctor::fgClassID = 0; + +UnicodeMatcher* UnicodeFunctor::toMatcher() const { + return 0; +} + +UnicodeReplacer* UnicodeFunctor::toReplacer() const { + return 0; +} + +U_NAMESPACE_END + +//eof diff --git a/icu4c/source/i18n/uniset.cpp b/icu4c/source/common/uniset.cpp similarity index 96% rename from icu4c/source/i18n/uniset.cpp rename to icu4c/source/common/uniset.cpp index b1af74d97bd..a3d55016a4a 100644 --- a/icu4c/source/i18n/uniset.cpp +++ b/icu4c/source/common/uniset.cpp @@ -14,7 +14,6 @@ #include "unicode/uscript.h" #include "symtable.h" #include "cmemory.h" -#include "rbt_rule.h" #include "uhash.h" #include "upropset.h" #include "util.h" @@ -780,7 +779,7 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text, // Strings, if any, have length != 0, so we don't worry // about them here. If we ever allow zero-length strings // we much check for them here. - if (contains(TransliterationRule::ETHER)) { + if (contains(U_ETHER)) { return incremental ? U_PARTIAL_MATCH : U_MATCH; } else { return U_MISMATCH; @@ -1382,6 +1381,87 @@ UnicodeSet& UnicodeSet::compact() { return *this; } +int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const { + int32_t bmpLength, length, destLength; + + if (U_FAILURE(ec)) { + return 0; + } + + if (destCapacity<0 || (destCapacity>0 && dest==NULL)) { + ec=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* count necessary 16-bit units */ + length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH + // assert(length>=0); + if (length==0) { + /* empty set */ + if (destCapacity>0) { + *dest=0; + } else { + ec=U_BUFFER_OVERFLOW_ERROR; + } + return 1; + } + /* now length>0 */ + + if (this->list[length-1]<=0xffff) { + /* all BMP */ + bmpLength=length; + } else if (this->list[0]>=0x10000) { + /* all supplementary */ + bmpLength=0; + length*=2; + } else { + /* some BMP, some supplementary */ + for (bmpLength=0; bmpLengthlist[bmpLength]<=0xffff; ++bmpLength) {} + length=bmpLength+2*(length-bmpLength); + } + + /* length: number of 16-bit array units */ + if (length>0x7fff) { + /* there are only 15 bits for the length in the first serialized word */ + ec=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + /* + * total serialized length: + * number of 16-bit array units (length) + + * 1 length unit (always) + + * 1 bmpLength unit (if there are supplementary values) + */ + destLength=length+((length>bmpLength)?2:1); + if (destLength<=destCapacity) { + const UChar32 *p; + int32_t i; + + *dest=(uint16_t)length; + if (length>bmpLength) { + *dest|=0x8000; + *++dest=(uint16_t)bmpLength; + } + ++dest; + + /* write the BMP part of the array */ + p=this->list; + for (i=0; i>16); + *dest++=(uint16_t)*p++; + } + } else { + ec=U_BUFFER_OVERFLOW_ERROR; + } + return destLength; +} + //---------------------------------------------------------------- // Implementation: Pattern parsing //---------------------------------------------------------------- @@ -1789,7 +1869,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern, if (anchor == 2) { rebuildPattern = TRUE; newPat.append((UChar)SymbolTable::SYMBOL_REF); - add(TransliterationRule::ETHER); + add(U_ETHER); } mode = 4; break; @@ -1833,13 +1913,13 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern, return; } - // Treat a trailing '$' as indicating ETHER. This code is only + // Treat a trailing '$' as indicating U_ETHER. This code is only // executed if symbols == NULL; otherwise other code parses the // anchor. if (lastChar == (UChar)SymbolTable::SYMBOL_REF && !isLastLiteral) { rebuildPattern = TRUE; newPat.append(lastChar); - add(TransliterationRule::ETHER); + add(U_ETHER); } else if (lastChar != NONE) { diff --git a/icu4c/source/common/unorm.cpp b/icu4c/source/common/unorm.cpp index 7ec84a57e05..dbb7696f571 100644 --- a/icu4c/source/common/unorm.cpp +++ b/icu4c/source/common/unorm.cpp @@ -32,7 +32,7 @@ #include "ustr_imp.h" #include "umutex.h" #include "utrie.h" -#include "uset.h" +#include "unicode/uset.h" #include "unormimp.h" /* diff --git a/icu4c/source/common/unormimp.h b/icu4c/source/common/unormimp.h index cb751667ffc..e2d85b93eb2 100644 --- a/icu4c/source/common/unormimp.h +++ b/icu4c/source/common/unormimp.h @@ -20,8 +20,8 @@ #include "unicode/utypes.h" #include "unicode/uiter.h" #include "unicode/unorm.h" +#include "unicode/uset.h" #include "utrie.h" -#include "uset.h" #include "ustr_imp.h" /* diff --git a/icu4c/source/i18n/upropset.cpp b/icu4c/source/common/upropset.cpp similarity index 98% rename from icu4c/source/i18n/upropset.cpp rename to icu4c/source/common/upropset.cpp index 6f31e83772a..9e5d6d47a85 100644 --- a/icu4c/source/i18n/upropset.cpp +++ b/icu4c/source/common/upropset.cpp @@ -13,7 +13,7 @@ #include "unicode/uchar.h" #include "hash.h" #include "mutex.h" -#include "ucln_in.h" +#include "ucln.h" #include "charstr.h" @@ -115,10 +115,9 @@ static const UChar INCLUSIONS_PATTERN[] = // "[^\\u3401-\\u4DB5 \\u4E01-\\u9FA5 \\uAC01-\\uD7A3 \\uD801-\\uDB7F \\uDB81-\\uDBFF \\uDC01-\\uDFFF \\uE001-\\uF8FF \\U0001044F-\\U0001CFFF \\U0001D801-\\U0001FFFF \\U00020001-\\U0002A6D6 \\U0002A6D8-\\U0002F7FF \\U0002FA1F-\\U000E0000 \\U000E0081-\\U000EFFFF \\U000F0001-\\U000FFFFD \\U00100001-\\U0010FFFD]" /** - * Cleanup function for transliterator component; delegates to - * Transliterator::cleanupRegistry(). + * Cleanup function for UnicodePropertySet */ -U_CFUNC UBool unicodePropertySet_cleanup(void) { +U_CFUNC UBool upropset_cleanup(void) { if (NAME_MAP != NULL) { delete NAME_MAP; NAME_MAP = NULL; delete CATEGORY_MAP; CATEGORY_MAP = NULL; @@ -524,8 +523,6 @@ void UnicodePropertySet::init() { CATEGORY_MAP = new Hashtable(TRUE); SCRIPT_CACHE = new UnicodeSet[(size_t)USCRIPT_CODE_LIMIT]; - ucln_i18n_registerCleanup(); // Call this when allocating statics - // NOTE: We munge all search keys to have no whitespace // and upper case. As such, all stored keys should have // this format. diff --git a/icu4c/source/i18n/upropset.h b/icu4c/source/common/upropset.h similarity index 100% rename from icu4c/source/i18n/upropset.h rename to icu4c/source/common/upropset.h diff --git a/icu4c/source/common/uset.c b/icu4c/source/common/uset.c deleted file mode 100644 index 6227426548f..00000000000 --- a/icu4c/source/common/uset.c +++ /dev/null @@ -1,499 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 2002, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uset.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002mar07 -* created by: Markus W. Scherer -* -* Poor man's C version of UnicodeSet, with only basic functions. -* The main data structure, the array of range limits, is -* the same as in UnicodeSet, except that the HIGH value is not stored. -* -* There are functions to efficiently serialize a USet into an array of uint16_t -* and functions to use such a serialized form efficiently without -* instantiating a new USet. -* -* If we needed more of UnicodeSet's functionality, then we should -* move UnicodeSet from the i18n to the common library and -* use it directly. -* The only part of this code that would still be useful is the serialization -* and the functions that use the serialized form directly. -*/ - -#include "unicode/utypes.h" -#include "cmemory.h" -#include "uset.h" - -#define USET_STATIC_CAPACITY 12 -#define USET_GROW_DELTA 20 - -struct USet { - UChar32 *array; - int32_t length, capacity; - UChar32 staticBuffer[USET_STATIC_CAPACITY]; -}; - -U_CAPI USet * U_EXPORT2 -uset_open(UChar32 start, UChar32 limit) { - USet *set; - - set=(USet *)uprv_malloc(sizeof(USet)); - if(set!=NULL) { - /* initialize to an empty set */ - set->array=set->staticBuffer; - set->length=0; - set->capacity=USET_STATIC_CAPACITY; - - /* set initial range */ - if(start<=0) { - start=0; /* UChar32 may be signed! */ - } - if(limit>0x110000) { - limit=0x110000; - } - if(startarray[0]=start; - if(limit<0x110000) { - set->array[1]=limit; - set->length=2; - } else { - set->length=1; - } - } - } - return set; -} - -U_CAPI void U_EXPORT2 -uset_close(USet *set) { - if(set!=NULL) { - if(set->array!=set->staticBuffer) { - uprv_free(set->array); - } - uprv_free(set); - } -} - -static U_INLINE int32_t -findChar(const UChar32 *array, int32_t length, UChar32 c) { - int32_t i; - - /* check the last range limit first for more efficient appending */ - if(length>0) { - if(c>=array[length-1]) { - return length; - } - - /* do not check the last range limit again in the loop below */ - --length; - } - - for(i=0; i=array[i]; ++i) {} - return i; -} - -static UBool -addRemove(USet *set, UChar32 c, int32_t doRemove) { - int32_t i, length, more; - - if(set==NULL || (uint32_t)c>0x10ffff) { - return FALSE; - } - - length=set->length; - i=findChar(set->array, length, c); - if((i&1)^doRemove) { - /* c is already in the set */ - return TRUE; - } - - /* how many more array items do we need? */ - if(iarray[i]) { - /* c is just before the following range, extend that in-place by one */ - set->array[i]=c; - if(i>0) { - --i; - if(c==set->array[i]) { - /* the previous range collapsed, remove it */ - set->length=length-=2; - if(iarray+i, set->array+i+2, (length-i)*4); - } - } - } - return TRUE; - } else if(i>0 && c==set->array[i-1]) { - /* c is just after the previous range, extend that in-place by one */ - if(++c<=0x10ffff) { - set->array[i-1]=c; - if(iarray[i]) { - /* the following range collapsed, remove it */ - --i; - set->length=length-=2; - if(iarray+i, set->array+i+2, (length-i)*4); - } - } - } else { - /* extend the previous range (had limit 0x10ffff) to the end of Unicode */ - set->length=i-1; - } - return TRUE; - } else if(i==length && c==0x10ffff) { - /* insert one range limit c */ - more=1; - } else { - /* insert two range limits c, c+1 */ - more=2; - } - - /* insert range limits */ - if(length+more>set->capacity) { - /* reallocate */ - int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA; - UChar32 *newArray=(UChar32 *)uprv_malloc(newCapacity*4); - if(newArray==NULL) { - return FALSE; - } - set->capacity=newCapacity; - uprv_memcpy(newArray, set->array, length*4); - - if(set->array!=set->staticBuffer) { - uprv_free(set->array); - } - set->array=newArray; - } - - if(iarray+i+more, set->array+i, (length-i)*4); - } - set->array[i]=c; - if(more==2) { - set->array[i+1]=c+1; - } - set->length+=more; - - return TRUE; -} - -U_CAPI UBool U_EXPORT2 -uset_add(USet *set, UChar32 c) { - return addRemove(set, c, 0); -} - -U_CAPI void U_EXPORT2 -uset_remove(USet *set, UChar32 c) { - addRemove(set, c, 1); -} - -U_CAPI UBool U_EXPORT2 -uset_isEmpty(const USet *set) { - return set==NULL || set->length<=0; -} - -U_CAPI UBool U_EXPORT2 -uset_contains(const USet *set, UChar32 c) { - int32_t i; - - if(set==NULL || (uint32_t)c>0x10ffff) { - return FALSE; - } - - i=findChar(set->array, set->length, c); - return (UBool)(i&1); -} - -U_CAPI int32_t U_EXPORT2 -uset_containsOne(const USet *set) { - if( set!=NULL && - ((set->length==2 && set->array[0]==(set->array[1]-1)) || - (set->length==1 && set->array[0]==0x10ffff)) - ) { - return (int32_t)set->array[0]; - } else { - return -1; - } -} - -U_CAPI int32_t U_EXPORT2 -uset_countRanges(const USet *set) { - if(set==NULL) { - return 0; - } else { - return (set->length+1)/2; - } -} - -U_CAPI UBool U_EXPORT2 -uset_getRange(const USet *set, int32_t rangeIndex, - UChar32 *pStart, UChar32 *pLimit) { - if(set==NULL || rangeIndex<0) { - return FALSE; - } - - rangeIndex*=2; - if(rangeIndexlength) { - *pStart=set->array[rangeIndex++]; - if(rangeIndexlength) { - *pLimit=set->array[rangeIndex]; - } else { - *pLimit=0x110000; - } - return TRUE; - } else { - return FALSE; - } -} - -/* - * Serialize a USet into 16-bit units. - * Store BMP code points as themselves with one 16-bit unit each. - * - * Important: the code points in the array are in ascending order, - * therefore all BMP code points precede all supplementary code points. - * - * Store each supplementary code point in 2 16-bit units, - * simply with higher-then-lower 16-bit halfs. - * - * Precede the entire list with the length. - * If there are supplementary code points, then set bit 15 in the length - * and add the bmpLength between it and the array. - * - * In other words: - * - all BMP: (length=bmpLength) BMP, .., BMP - * - some supplementary: (length|0x8000) (bmpLength0 && dest==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* count necessary 16-bit units */ - length=set->length; - if(length==0) { - /* empty set */ - if(destCapacity>0) { - *dest=0; - } - return 1; - } - /* now length>0 */ - - if(set->array[length-1]<=0xffff) { - /* all BMP */ - bmpLength=length; - } else if(set->array[0]>=0x10000) { - /* all supplementary */ - bmpLength=0; - length*=2; - } else { - /* some BMP, some supplementary */ - for(bmpLength=0; bmpLengtharray[bmpLength]<=0xffff; ++bmpLength) {} - length=bmpLength+2*(length-bmpLength); - } - - /* length: number of 16-bit array units */ - if(length>0x7fff) { - /* there are only 15 bits for the length in the first serialized word */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - /* - * total serialized length: - * number of 16-bit array units (length) + - * 1 length unit (always) + - * 1 bmpLength unit (if there are supplementary values) - */ - destLength=length+1+(length>bmpLength); - if(destLength<=destCapacity) { - const UChar32 *p; - int32_t i; - - *dest=(uint16_t)length; - if(length>bmpLength) { - *dest|=0x8000; - *++dest=(uint16_t)bmpLength; - } - ++dest; - - /* write the BMP part of the array */ - p=set->array; - for(i=0; i>16); - *dest++=(uint16_t)*p++; - } - } else { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return destLength; -} - -U_CAPI UBool U_EXPORT2 -uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcCapacity) { - int32_t length; - - if(fillSet==NULL) { - return FALSE; - } - if(src==NULL || srcCapacity<=0) { - fillSet->length=fillSet->bmpLength=0; - return FALSE; - } - - length=*src++; - if(length&0x8000) { - /* there are supplementary values */ - length&=0x7fff; - if(srcCapacity<(2+length)) { - fillSet->length=fillSet->bmpLength=0; - return FALSE; - } - fillSet->bmpLength=*src++; - } else { - /* only BMP values */ - if(srcCapacity<(1+length)) { - fillSet->length=fillSet->bmpLength=0; - return FALSE; - } - fillSet->bmpLength=length; - } - fillSet->array=src; - fillSet->length=length; - return TRUE; -} - -U_CAPI void U_EXPORT2 -uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c) { - if(fillSet==NULL || (uint32_t)c>0x10ffff) { - return; - } - - fillSet->array=fillSet->staticArray; - if(c<0xffff) { - fillSet->bmpLength=fillSet->length=2; - fillSet->staticArray[0]=(uint16_t)c; - fillSet->staticArray[1]=(uint16_t)c+1; - } else if(c==0xffff) { - fillSet->bmpLength=1; - fillSet->length=3; - fillSet->staticArray[0]=0xffff; - fillSet->staticArray[1]=1; - fillSet->staticArray[2]=0; - } else if(c<0x10ffff) { - fillSet->bmpLength=0; - fillSet->length=4; - fillSet->staticArray[0]=(uint16_t)(c>>16); - fillSet->staticArray[1]=(uint16_t)c; - ++c; - fillSet->staticArray[2]=(uint16_t)(c>>16); - fillSet->staticArray[3]=(uint16_t)c; - } else /* c==0x10ffff */ { - fillSet->bmpLength=0; - fillSet->length=2; - fillSet->staticArray[0]=0x10; - fillSet->staticArray[1]=0xffff; - } -} - -U_CAPI UBool U_EXPORT2 -uset_serializedContains(const USerializedSet *set, UChar32 c) { - const uint16_t *array; - - if(set==NULL || (uint32_t)c>0x10ffff) { - return FALSE; - } - - array=set->array; - if(c<=0xffff) { - /* find c in the BMP part */ - int32_t i, bmpLength=set->bmpLength; - for(i=0; i=array[i]; ++i) {} - return (UBool)(i&1); - } else { - /* find c in the supplementary part */ - int32_t i, length=set->length; - uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c; - for(i=set->bmpLength; - iarray[i] || (high==array[i] && low>=array[i+1])); - i+=2) {} - - /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */ - return (UBool)(((i+set->bmpLength)&2)!=0); - } -} - -U_CAPI int32_t U_EXPORT2 -uset_countSerializedRanges(const USerializedSet *set) { - if(set==NULL) { - return 0; - } - - return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2; -} - -U_CAPI UBool U_EXPORT2 -uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, - UChar32 *pStart, UChar32 *pLimit) { - const uint16_t *array; - int32_t bmpLength, length; - - if(set==NULL || rangeIndex<0 || pStart==NULL || pLimit==NULL) { - return FALSE; - } - - array=set->array; - length=set->length; - bmpLength=set->bmpLength; - - rangeIndex*=2; /* address start/limit pairs */ - if(rangeIndexadd(c); +} + +U_CAPI void U_EXPORT2 +uset_remove(USet *set, UChar32 c) { + ((UnicodeSet*) set)->remove(c); +} + +U_CAPI UBool U_EXPORT2 +uset_isEmpty(const USet *set) { + return ((const UnicodeSet*) set)->isEmpty(); +} + +U_CAPI UBool U_EXPORT2 +uset_contains(const USet *set, UChar32 c) { + return ((const UnicodeSet*) set)->contains(c); +} + +U_CAPI int32_t U_EXPORT2 +uset_size(const USet* set) { + return ((const UnicodeSet*) set)->size(); +} + +U_CAPI int32_t U_EXPORT2 +uset_getRangeCount(const USet *set) { + return ((const UnicodeSet*) set)->getRangeCount(); +} + +U_CAPI UBool U_EXPORT2 +uset_getRange(const USet *set, int32_t rangeIndex, + UChar32 *pStart, UChar32 *pEnd) { + if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) { + return FALSE; + } + const UnicodeSet* us = (const UnicodeSet*) set; + *pStart = us->getRangeStart(rangeIndex); + *pEnd = us->getRangeEnd(rangeIndex); + return TRUE; +} + +/* + * Serialize a USet into 16-bit units. + * Store BMP code points as themselves with one 16-bit unit each. + * + * Important: the code points in the array are in ascending order, + * therefore all BMP code points precede all supplementary code points. + * + * Store each supplementary code point in 2 16-bit units, + * simply with higher-then-lower 16-bit halfs. + * + * Precede the entire list with the length. + * If there are supplementary code points, then set bit 15 in the length + * and add the bmpLength between it and the array. + * + * In other words: + * - all BMP: (length=bmpLength) BMP, .., BMP + * - some supplementary: (length|0x8000) (bmpLengthserialize(dest, destCapacity, *ec); +} + +U_CAPI UBool U_EXPORT2 +uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength) { + int32_t length; + + if(fillSet==NULL) { + return FALSE; + } + if(src==NULL || srcLength<=0) { + fillSet->length=fillSet->bmpLength=0; + return FALSE; + } + + length=*src++; + if(length&0x8000) { + /* there are supplementary values */ + length&=0x7fff; + if(srcLength<(2+length)) { + fillSet->length=fillSet->bmpLength=0; + return FALSE; + } + fillSet->bmpLength=*src++; + } else { + /* only BMP values */ + if(srcLength<(1+length)) { + fillSet->length=fillSet->bmpLength=0; + return FALSE; + } + fillSet->bmpLength=length; + } + fillSet->array=src; + fillSet->length=length; + return TRUE; +} + +U_CAPI void U_EXPORT2 +uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c) { + if(fillSet==NULL || (uint32_t)c>0x10ffff) { + return; + } + + fillSet->array=fillSet->staticArray; + if(c<0xffff) { + fillSet->bmpLength=fillSet->length=2; + fillSet->staticArray[0]=(uint16_t)c; + fillSet->staticArray[1]=(uint16_t)c+1; + } else if(c==0xffff) { + fillSet->bmpLength=1; + fillSet->length=3; + fillSet->staticArray[0]=0xffff; + fillSet->staticArray[1]=1; + fillSet->staticArray[2]=0; + } else if(c<0x10ffff) { + fillSet->bmpLength=0; + fillSet->length=4; + fillSet->staticArray[0]=(uint16_t)(c>>16); + fillSet->staticArray[1]=(uint16_t)c; + ++c; + fillSet->staticArray[2]=(uint16_t)(c>>16); + fillSet->staticArray[3]=(uint16_t)c; + } else /* c==0x10ffff */ { + fillSet->bmpLength=0; + fillSet->length=2; + fillSet->staticArray[0]=0x10; + fillSet->staticArray[1]=0xffff; + } +} + +U_CAPI UBool U_EXPORT2 +uset_serializedContains(const USerializedSet *set, UChar32 c) { + const uint16_t *array; + + if(set==NULL || (uint32_t)c>0x10ffff) { + return FALSE; + } + + array=set->array; + if(c<=0xffff) { + /* find c in the BMP part */ + int32_t i, bmpLength=set->bmpLength; + for(i=0; i=array[i]; ++i) {} + return (UBool)(i&1); + } else { + /* find c in the supplementary part */ + int32_t i, length=set->length; + uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c; + for(i=set->bmpLength; + iarray[i] || (high==array[i] && low>=array[i+1])); + i+=2) {} + + /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */ + return (UBool)(((i+set->bmpLength)&2)!=0); + } +} + +U_CAPI int32_t U_EXPORT2 +uset_countSerializedRanges(const USerializedSet *set) { + if(set==NULL) { + return 0; + } + + return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2; +} + +U_CAPI UBool U_EXPORT2 +uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, + UChar32 *pStart, UChar32 *pEnd) { + const uint16_t *array; + int32_t bmpLength, length; + + if(set==NULL || rangeIndex<0 || pStart==NULL || pEnd==NULL) { + return FALSE; + } + + array=set->array; + length=set->length; + bmpLength=set->bmpLength; + + rangeIndex*=2; /* address start/limit pairs */ + if(rangeIndex0) { +// if(c>=array[length-1]) { +// return length; +// } +// +// /* do not check the last range limit again in the loop below */ +// --length; +// } +// +// for(i=0; i=array[i]; ++i) {} +// return i; +// } +// +// static UBool +// addRemove(USet *set, UChar32 c, int32_t doRemove) { +// int32_t i, length, more; +// +// if(set==NULL || (uint32_t)c>0x10ffff) { +// return FALSE; +// } +// +// length=set->length; +// i=findChar(set->array, length, c); +// if((i&1)^doRemove) { +// /* c is already in the set */ +// return TRUE; +// } +// +// /* how many more array items do we need? */ +// if(iarray[i]) { +// /* c is just before the following range, extend that in-place by one */ +// set->array[i]=c; +// if(i>0) { +// --i; +// if(c==set->array[i]) { +// /* the previous range collapsed, remove it */ +// set->length=length-=2; +// if(iarray+i, set->array+i+2, (length-i)*4); +// } +// } +// } +// return TRUE; +// } else if(i>0 && c==set->array[i-1]) { +// /* c is just after the previous range, extend that in-place by one */ +// if(++c<=0x10ffff) { +// set->array[i-1]=c; +// if(iarray[i]) { +// /* the following range collapsed, remove it */ +// --i; +// set->length=length-=2; +// if(iarray+i, set->array+i+2, (length-i)*4); +// } +// } +// } else { +// /* extend the previous range (had limit 0x10ffff) to the end of Unicode */ +// set->length=i-1; +// } +// return TRUE; +// } else if(i==length && c==0x10ffff) { +// /* insert one range limit c */ +// more=1; +// } else { +// /* insert two range limits c, c+1 */ +// more=2; +// } +// +// /* insert range limits */ +// if(length+more>set->capacity) { +// /* reallocate */ +// int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA; +// UChar32 *newArray=(UChar32 *)uprv_malloc(newCapacity*4); +// if(newArray==NULL) { +// return FALSE; +// } +// set->capacity=newCapacity; +// uprv_memcpy(newArray, set->array, length*4); +// +// if(set->array!=set->staticBuffer) { +// uprv_free(set->array); +// } +// set->array=newArray; +// } +// +// if(iarray+i+more, set->array+i, (length-i)*4); +// } +// set->array[i]=c; +// if(more==2) { +// set->array[i+1]=c+1; +// } +// set->length+=more; +// +// return TRUE; +// } +// +// U_CAPI UBool U_EXPORT2 +// uset_add(USet *set, UChar32 c) { +// return addRemove(set, c, 0); +// } +// +// U_CAPI void U_EXPORT2 +// uset_remove(USet *set, UChar32 c) { +// addRemove(set, c, 1); +// } diff --git a/icu4c/source/common/uset.h b/icu4c/source/common/uset.h deleted file mode 100644 index 2d58ac49f7b..00000000000 --- a/icu4c/source/common/uset.h +++ /dev/null @@ -1,94 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 2002, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uset.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002mar07 -* created by: Markus W. Scherer -* -* Poor man's C version of UnicodeSet, with only basic functions. -* See uset.c for more details. -*/ - -#ifndef __USET_H__ -#define __USET_H__ - -#include "unicode/utypes.h" - -struct USet; -typedef struct USet USet; - -enum { - USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 /**< enough for any single-code point set */ -}; - -struct USerializedSet { - const uint16_t *array; - int32_t bmpLength, length; - uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]; -}; -typedef struct USerializedSet USerializedSet; - -U_CAPI USet * U_EXPORT2 -uset_open(UChar32 start, UChar32 limit); - -U_CAPI void U_EXPORT2 -uset_close(USet *set); - -U_CAPI UBool U_EXPORT2 -uset_add(USet *set, UChar32 c); - -U_CAPI void U_EXPORT2 -uset_remove(USet *set, UChar32 c); - -U_CAPI UBool U_EXPORT2 -uset_isEmpty(const USet *set); - -U_CAPI UBool U_EXPORT2 -uset_contains(const USet *set, UChar32 c); - -/** - * Check if the set contains exactly one code point. - * - * @return The code point if the set contains exactly one, otherwise -1. - */ -U_CAPI int32_t U_EXPORT2 -uset_containsOne(const USet *set); - -U_CAPI int32_t U_EXPORT2 -uset_countRanges(const USet *set); - -U_CAPI UBool U_EXPORT2 -uset_getRange(const USet *set, int32_t rangeIndex, - UChar32 *pStart, UChar32 *pLimit); - -U_CAPI int32_t U_EXPORT2 -uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode); - -U_CAPI UBool U_EXPORT2 -uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcCapacity); - -/** - * Set the USerializedSet to contain exactly c. - */ -U_CAPI void U_EXPORT2 -uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c); - -U_CAPI UBool U_EXPORT2 -uset_serializedContains(const USerializedSet *set, UChar32 c); - -U_CAPI int32_t U_EXPORT2 -uset_countSerializedRanges(const USerializedSet *set); - -U_CAPI UBool U_EXPORT2 -uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, - UChar32 *pStart, UChar32 *pLimit); - -#endif diff --git a/icu4c/source/i18n/usetiter.cpp b/icu4c/source/common/usetiter.cpp similarity index 97% rename from icu4c/source/i18n/usetiter.cpp rename to icu4c/source/common/usetiter.cpp index db3b787bce4..496f58ec5be 100644 --- a/icu4c/source/i18n/usetiter.cpp +++ b/icu4c/source/common/usetiter.cpp @@ -3,9 +3,9 @@ * Copyright (c) 2002, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** -* $Source: /xsrl/Nsvn/icu/icu/source/i18n/Attic/usetiter.cpp,v $ -* $Date: 2002/04/30 22:11:47 $ -* $Revision: 1.2 $ +* $Source: /xsrl/Nsvn/icu/icu/source/common/usetiter.cpp,v $ +* $Date: 2002/05/29 18:33:33 $ +* $Revision: 1.1 $ ********************************************************************** */ #include "unicode/usetiter.h" diff --git a/icu4c/source/i18n/util.cpp b/icu4c/source/common/util.cpp similarity index 100% rename from icu4c/source/i18n/util.cpp rename to icu4c/source/common/util.cpp diff --git a/icu4c/source/i18n/util.h b/icu4c/source/common/util.h similarity index 99% rename from icu4c/source/i18n/util.h rename to icu4c/source/common/util.h index 581fc949722..ed2d9879b40 100644 --- a/icu4c/source/i18n/util.h +++ b/icu4c/source/common/util.h @@ -22,7 +22,7 @@ U_NAMESPACE_BEGIN class UnicodeMatcher; -class ICU_Utility { +class U_COMMON_API ICU_Utility { public: /** diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index ea1987c0ced..068e6f9dd22 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -49,12 +49,12 @@ ucal.o calendar.o gregocal.o timezone.o simpletz.o \ sortkey.o bocsu.o coleitr.o coll.o ucoleitr.o \ ucol.o ucol_bld.o ucol_cnt.o ucol_elm.o ucol_tok.o ucol_wgt.o tblcoll.o \ strmatch.o usearch.o search.o stsearch.o \ -uniset.o unifltlg.o translit.o utrans.o \ +unifltlg.o translit.o utrans.o \ cpdtrans.o hextouni.o rbt.o rbt_data.o rbt_pars.o rbt_rule.o rbt_set.o \ nultrans.o remtrans.o titletrn.o tolowtrn.o toupptrn.o \ -name2uni.o uni2name.o unitohex.o nortrans.o unifilt.o quant.o transreg.o \ -nfrs.o nfrule.o nfsubs.o rbnf.o upropset.o util.o esctrn.o unesctrn.o \ -funcrepl.o strrepl.o tridpars.o unifunct.o caniter.o usetiter.o \ +name2uni.o uni2name.o unitohex.o nortrans.o quant.o transreg.o \ +nfrs.o nfrule.o nfsubs.o rbnf.o esctrn.o unesctrn.o \ +funcrepl.o strrepl.o tridpars.o caniter.o \ currency.o diff --git a/icu4c/source/i18n/caniter.cpp b/icu4c/source/i18n/caniter.cpp index 6c8f4d86e12..77b89cb7c99 100644 --- a/icu4c/source/i18n/caniter.cpp +++ b/icu4c/source/i18n/caniter.cpp @@ -6,7 +6,7 @@ */ #include "hash.h" -#include "uset.h" +#include "unicode/uset.h" #include "unormimp.h" #include "caniter.h" #include "cmemory.h" @@ -395,7 +395,7 @@ Hashtable *CanonicalIterator::getEquivalents2(const UChar *segment, int32_t segL USerializedSet starts; // cycle through all the characters - UChar32 cp, limit = 0; + UChar32 cp, end = 0; int32_t i = 0, j; for (i = 0; i < segLen; i += UTF16_CHAR_LENGTH(cp)) { // see if any character is at the start of some decomposition @@ -404,7 +404,7 @@ Hashtable *CanonicalIterator::getEquivalents2(const UChar *segment, int32_t segL continue; } // if so, see which decompositions match - for(j = 0, cp = limit; cp < limit || uset_getSerializedRange(&starts, j++, &cp, &limit); ++cp) { + for(j = 0, cp = end+1; cp <= end || uset_getSerializedRange(&starts, j++, &cp, &end); ++cp) { //Hashtable *remainder = extract(cp, segment, segLen, i, status); Hashtable *remainder = extract(cp, segment, segLen, i, status); if (remainder == NULL) continue; diff --git a/icu4c/source/i18n/i18n.dsp b/icu4c/source/i18n/i18n.dsp index 2e422644530..9ba19d79109 100644 --- a/icu4c/source/i18n/i18n.dsp +++ b/icu4c/source/i18n/i18n.dsp @@ -99,7 +99,7 @@ LINK32=link.exe # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /FD /c -# ADD CPP /nologo /MD /Za /W3 /I "..\..\include" /I "..\..\source\common" /D"WIN64" /D"NDEBUG" /D"_WINDOWS" /D"_MBCS" /D"_USRDLL" /D"I18N_EXPORTS" /D"U_I18N_IMPLEMENTATION" /FR /FD /GF /c /O2 /GX /Op /QIA64_fmaopt /D"_IA64_" /Zi /D"WIN64" /D"WIN32" /D"_AFX_NO_DAO_SUPPORT" /Wp64 /Zm600 +# ADD CPP /nologo /MD /Za /W3 /GX /Zi /O2 /I "..\..\include" /I "..\..\source\common" /D "WIN64" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /D "U_I18N_IMPLEMENTATION" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FR /FD /GF /QIA64_fmaopt /Wp64 /Zm600 /c # ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win64 # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win64 # ADD BASE RSC /l 0x409 /d "NDEBUG" @@ -108,8 +108,8 @@ BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:IA64 -# ADD LINK32 icuuc.lib /nologo /base:"0x4a900000" /dll /machine:IA64 /out:"..\..\bin\icuin21.dll" /implib:"..\..\lib\icuin.lib" /libpath:"..\..\lib" /incremental:no +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:IX86 /machine:IA64 +# ADD LINK32 icuuc.lib /nologo /base:"0x4a900000" /dll /machine:IX86 /out:"..\..\bin\icuin21.dll" /implib:"..\..\lib\icuin.lib" /libpath:"..\..\lib" /machine:IA64 # SUBTRACT LINK32 /debug !ELSEIF "$(CFG)" == "i18n - Win64 Debug" @@ -126,7 +126,7 @@ LINK32=link.exe # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /FD /GZ /c -# ADD CPP /nologo /MDd /Za /W3 /Gm /I "..\..\include" /I "..\..\source\common" /D"_WINDOWS" /D"_USRDLL" /D"I18N_EXPORTS" /D"U_I18N_IMPLEMENTATION" /D"WIN64" /D"_DEBUG" /D"_MBCS" /D"UDATA_MAP" /FR /FD /GF /GZ /c /Od /GX /Op /QIA64_fmaopt /D"_IA64_" /Zi /D"WIN64" /D"WIN32" /D"_AFX_NO_DAO_SUPPORT" /Wp64 /Zm600 +# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /Zi /Od /I "..\..\include" /I "..\..\source\common" /D "_WINDOWS" /D "_USRDLL" /D "I18N_EXPORTS" /D "U_I18N_IMPLEMENTATION" /D "WIN64" /D "_DEBUG" /D "_MBCS" /D "UDATA_MAP" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FR /FD /GF /GZ /QIA64_fmaopt /Wp64 /Zm600 /c # ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win64 # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win64 # ADD BASE RSC /l 0x409 /d "_DEBUG" @@ -135,9 +135,8 @@ BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe -# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:IA64 /pdbtype:sept -# ADD LINK32 icuucd.lib /nologo /base:"0x4a900000" /dll /debug /machine:IA64 /out:"..\..\bin\icuin21d.dll" /implib:"..\..\lib\icuind.lib" /pdbtype:sept /libpath:"..\..\lib" /incremental:no -# SUBTRACT LINK32 +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:IX86 /pdbtype:sept /machine:IA64 +# ADD LINK32 icuucd.lib /nologo /base:"0x4a900000" /dll /incremental:no /debug /machine:IX86 /out:"..\..\bin\icuin21d.dll" /implib:"..\..\lib\icuind.lib" /pdbtype:sept /libpath:"..\..\lib" /machine:IA64 !ENDIF @@ -400,22 +399,10 @@ SOURCE=.\uni2name.cpp # End Source File # Begin Source File -SOURCE=.\unifilt.cpp -# End Source File -# Begin Source File - SOURCE=.\unifltlg.cpp # End Source File # Begin Source File -SOURCE=.\unifunct.cpp -# End Source File -# Begin Source File - -SOURCE=.\uniset.cpp -# End Source File -# Begin Source File - SOURCE=.\unitohex.cpp # End Source File # Begin Source File @@ -424,22 +411,10 @@ SOURCE=.\unum.cpp # End Source File # Begin Source File -SOURCE=.\upropset.cpp -# End Source File -# Begin Source File - SOURCE=.\usearch.cpp # End Source File # Begin Source File -SOURCE=.\usetiter.cpp -# End Source File -# Begin Source File - -SOURCE=.\util.cpp -# End Source File -# Begin Source File - SOURCE=.\utrans.cpp # End Source File # End Group @@ -1385,53 +1360,6 @@ InputPath=.\unicode\parseerr.h # End Source File # Begin Source File -SOURCE=.\unicode\parsepos.h - -!IF "$(CFG)" == "i18n - Win32 Release" - -# Begin Custom Build -InputPath=.\unicode\parsepos.h - -"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win32 Debug" - -# Begin Custom Build -InputPath=.\unicode\parsepos.h - -"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Release" - -# Begin Custom Build -InputPath=.\unicode\parsepos.h - -"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Debug" - -# Begin Custom Build -InputPath=.\unicode\parsepos.h - -"..\..\include\unicode\parsepos.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ENDIF - -# End Source File -# Begin Source File - SOURCE=.\quant.h # End Source File # Begin Source File @@ -2225,53 +2153,6 @@ SOURCE=.\uni2name.h # End Source File # Begin Source File -SOURCE=.\unicode\unifilt.h - -!IF "$(CFG)" == "i18n - Win32 Release" - -# Begin Custom Build -InputPath=.\unicode\unifilt.h - -"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win32 Debug" - -# Begin Custom Build -InputPath=.\unicode\unifilt.h - -"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Release" - -# Begin Custom Build -InputPath=.\unicode\unifilt.h - -"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Debug" - -# Begin Custom Build -InputPath=.\unicode\unifilt.h - -"..\..\include\unicode\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ENDIF - -# End Source File -# Begin Source File - SOURCE=.\unicode\unifltlg.h !IF "$(CFG)" == "i18n - Win32 Release" @@ -2319,100 +2200,6 @@ InputPath=.\unicode\unifltlg.h # End Source File # Begin Source File -SOURCE=.\unicode\unifunct.h - -!IF "$(CFG)" == "i18n - Win32 Release" - -# Begin Custom Build -InputPath=.\unicode\unifunct.h - -"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win32 Debug" - -# Begin Custom Build -InputPath=.\unicode\unifunct.h - -"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Release" - -# Begin Custom Build -InputPath=.\unicode\unifunct.h - -"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Debug" - -# Begin Custom Build -InputPath=.\unicode\unifunct.h - -"..\..\include\unicode\unifunct.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ENDIF - -# End Source File -# Begin Source File - -SOURCE=.\unicode\unimatch.h - -!IF "$(CFG)" == "i18n - Win32 Release" - -# Begin Custom Build -InputPath=.\unicode\unimatch.h - -"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win32 Debug" - -# Begin Custom Build -InputPath=.\unicode\unimatch.h - -"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Release" - -# Begin Custom Build -InputPath=.\unicode\unimatch.h - -"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Debug" - -# Begin Custom Build -InputPath=.\unicode\unimatch.h - -"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ENDIF - -# End Source File -# Begin Source File - SOURCE=.\unicode\unirepl.h !IF "$(CFG)" == "i18n - Win32 Release" @@ -2460,53 +2247,6 @@ InputPath=.\unicode\unirepl.h # End Source File # Begin Source File -SOURCE=.\unicode\uniset.h - -!IF "$(CFG)" == "i18n - Win32 Release" - -# Begin Custom Build -InputPath=.\unicode\uniset.h - -"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win32 Debug" - -# Begin Custom Build -InputPath=.\unicode\uniset.h - -"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Release" - -# Begin Custom Build -InputPath=.\unicode\uniset.h - -"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Debug" - -# Begin Custom Build -InputPath=.\unicode\uniset.h - -"..\..\include\unicode\uniset.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ENDIF - -# End Source File -# Begin Source File - SOURCE=.\unicode\unitohex.h !IF "$(CFG)" == "i18n - Win32 Release" @@ -2652,61 +2392,10 @@ InputPath=.\unicode\usearch.h # End Source File # Begin Source File -SOURCE=.\unicode\usetiter.h - -!IF "$(CFG)" == "i18n - Win32 Release" - -# Begin Custom Build -InputPath=.\unicode\usetiter.h - -"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win32 Debug" - -# Begin Custom Build -InputPath=.\unicode\usetiter.h - -"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Release" - -# Begin Custom Build -InputPath=.\unicode\usetiter.h - -"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ELSEIF "$(CFG)" == "i18n - Win64 Debug" - -# Begin Custom Build -InputPath=.\unicode\usetiter.h - -"..\..\include\unicode\usetiter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" - copy $(InputPath) ..\..\include\unicode - -# End Custom Build - -!ENDIF - -# End Source File -# Begin Source File - SOURCE=.\usrchimp.h # End Source File # Begin Source File -SOURCE=.\util.h -# End Source File -# Begin Source File - SOURCE=.\unicode\utrans.h !IF "$(CFG)" == "i18n - Win32 Release" diff --git a/icu4c/source/i18n/rbt_rule.cpp b/icu4c/source/i18n/rbt_rule.cpp index 1049a876d05..6ee55234ae0 100644 --- a/icu4c/source/i18n/rbt_rule.cpp +++ b/icu4c/source/i18n/rbt_rule.cpp @@ -21,8 +21,6 @@ static const UChar FORWARD_OP[] = {32,62,32,0}; // " > " U_NAMESPACE_BEGIN -const UChar TransliterationRule::ETHER = 0xFFFF; - /** * Construct a new rule with the given input, output text, and other * attributes. A cursor position may be specified for the output text. diff --git a/icu4c/source/i18n/rbt_rule.h b/icu4c/source/i18n/rbt_rule.h index 290e66b83aa..7b430c127d6 100644 --- a/icu4c/source/i18n/rbt_rule.h +++ b/icu4c/source/i18n/rbt_rule.h @@ -49,16 +49,6 @@ class UnicodeFunctor; */ class TransliterationRule { -public: - - /** - * The character at index i, where i < contextStart || i >= contextLimit, - * is ETHER. This allows explicit matching by rules and UnicodeSets - * of text outside the context. In traditional terms, this allows anchoring - * at the start and/or end. - */ - static const UChar ETHER; - private: // TODO Eliminate the pattern and keyLength data members. They diff --git a/icu4c/source/i18n/ucln_in.c b/icu4c/source/i18n/ucln_in.c index 2ff2e26441e..fd9cb5303ad 100644 --- a/icu4c/source/i18n/ucln_in.c +++ b/icu4c/source/i18n/ucln_in.c @@ -20,7 +20,6 @@ static UBool i18n_cleanup(void) { transliterator_cleanup(); - unicodePropertySet_cleanup(); dateFormatSymbols_cleanup(); timeZone_cleanup(); ucol_cleanup(); diff --git a/icu4c/source/i18n/unifunct.cpp b/icu4c/source/i18n/unifunct.cpp deleted file mode 100644 index 8539bf23231..00000000000 --- a/icu4c/source/i18n/unifunct.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "unicode/unifunct.h" - -const char UnicodeFunctor::fgClassID = 0; - -UnicodeMatcher* UnicodeFunctor::toMatcher() const { - return 0; -} - -UnicodeReplacer* UnicodeFunctor::toReplacer() const { - return 0; -} - -//eof diff --git a/icu4c/source/tools/gennorm/gennorm.h b/icu4c/source/tools/gennorm/gennorm.h index 42d8538c6aa..1823d9c327c 100644 --- a/icu4c/source/tools/gennorm/gennorm.h +++ b/icu4c/source/tools/gennorm/gennorm.h @@ -18,7 +18,7 @@ #define __GENPROPS_H__ #include "unicode/utypes.h" -#include "uset.h" +#include "unicode/uset.h" /* file definitions */ #define DATA_NAME "unorm" diff --git a/icu4c/source/tools/gennorm/store.c b/icu4c/source/tools/gennorm/store.c index 8364de3ac89..b69f631023c 100644 --- a/icu4c/source/tools/gennorm/store.c +++ b/icu4c/source/tools/gennorm/store.c @@ -25,7 +25,7 @@ #include "filestrm.h" #include "unicode/udata.h" #include "utrie.h" -#include "uset.h" +#include "unicode/uset.h" #include "unewdata.h" #include "unormimp.h" #include "gennorm.h" @@ -806,7 +806,7 @@ setHangulJamoSpecials() { norm->combiningFlags=1; /* for each Jamo L create a set with its associated Hangul block */ - norm->canonStart=uset_open(hangul, hangul+21*28); + norm->canonStart=uset_open(hangul, hangul+21*28-1); hangul+=21*28; } @@ -1000,13 +1000,14 @@ postParseFn(void *context, uint32_t code, Norm *norm) { c=norm->nfd[0]; otherNorm=createNorm(c); if(otherNorm->canonStart==NULL) { - otherNorm->canonStart=uset_open(code, code+1); + otherNorm->canonStart=uset_open(code, code); if(otherNorm->canonStart==NULL) { fprintf(stderr, "gennorm error: out of memory in uset_open()\n"); exit(U_MEMORY_ALLOCATION_ERROR); } } else { - if(!uset_add(otherNorm->canonStart, code)) { + uset_add(otherNorm->canonStart, code); + if(!uset_contains(otherNorm->canonStart, code)) { fprintf(stderr, "gennorm error: uset_add(setOf(U+%4lx), U+%4x)\n", c, code); exit(U_INTERNAL_PROGRAM_ERROR); } @@ -1201,15 +1202,29 @@ makeFCD() { } } +/** + * If the given set contains exactly one character, then return it. + * Otherwise return -1. + */ +static int32_t +usetContainsOne(const USet* set) { + if (uset_size(set) == 1) { + UChar32 start, end; + uset_getRange(set, 0, &start, &end); + return start; + } + return -1; +} + static void makeCanonSetFn(void *context, uint32_t code, Norm *norm) { - if(!uset_isEmpty(norm->canonStart)) { + if(norm->canonStart!=NULL && !uset_isEmpty(norm->canonStart)) { uint16_t *table; int32_t c, tableLength; UErrorCode errorCode=U_ZERO_ERROR; /* does the set contain exactly one code point? */ - c=uset_containsOne(norm->canonStart); + c=usetContainsOne(norm->canonStart); /* add an entry to the BMP or supplementary search table */ if(code<=0xffff) {