mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 09:21:03 +00:00
ICU-7074 copy r26745 from branch
X-SVN-Rev: 26766
This commit is contained in:
commit
fed2240af3
3271 changed files with 2120060 additions and 0 deletions
69
.gitattributes
vendored
Normal file
69
.gitattributes
vendored
Normal file
|
@ -0,0 +1,69 @@
|
|||
* text=auto !eol
|
||||
|
||||
*.c text !eol
|
||||
*.cc text !eol
|
||||
*.classpath text !eol
|
||||
*.cpp text !eol
|
||||
*.css text !eol
|
||||
*.dsp text !eol
|
||||
*.dsw text !eol
|
||||
*.filters text !eol
|
||||
*.h text !eol
|
||||
*.htm text !eol
|
||||
*.html text !eol
|
||||
*.in text !eol
|
||||
*.java text !eol
|
||||
*.launch text !eol
|
||||
*.mak text !eol
|
||||
*.md text !eol
|
||||
*.MF text !eol
|
||||
*.mk text !eol
|
||||
*.pl text !eol
|
||||
*.pm text !eol
|
||||
*.project text !eol
|
||||
*.properties text !eol
|
||||
*.py text !eol
|
||||
*.rc text !eol
|
||||
*.sh text eol=lf
|
||||
*.sln text !eol
|
||||
*.stub text !eol
|
||||
*.txt text !eol
|
||||
*.ucm text !eol
|
||||
*.vcproj text !eol
|
||||
*.vcxproj text !eol
|
||||
*.xml text !eol
|
||||
*.xsl text !eol
|
||||
*.xslt text !eol
|
||||
Makefile text !eol
|
||||
configure text !eol
|
||||
LICENSE text !eol
|
||||
README text !eol
|
||||
|
||||
*.bin -text
|
||||
*.brk -text
|
||||
*.cnv -text
|
||||
*.icu -text
|
||||
*.res -text
|
||||
*.nrm -text
|
||||
*.spp -text
|
||||
*.tri2 -text
|
||||
|
||||
/icu4c.css -text
|
||||
source/data/locales/pool.res -text
|
||||
source/samples/ucnv/data02.bin -text
|
||||
source/test/perf/README -text
|
||||
source/test/testdata/TestFont1.otf -text
|
||||
source/test/testdata/icu26_testtypes.res -text
|
||||
source/test/testdata/icu26e_testtypes.res -text
|
||||
source/test/testdata/importtest.bin -text
|
||||
source/test/testdata/iscii.bin -text
|
||||
source/test/testdata/uni-text.bin -text
|
||||
|
||||
# The following file types are stored in Git-LFS.
|
||||
*.jar filter=lfs diff=lfs merge=lfs -text
|
||||
*.dat filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.gif filter=lfs diff=lfs merge=lfs -text
|
||||
|
766
.gitignore
vendored
Normal file
766
.gitignore
vendored
Normal file
|
@ -0,0 +1,766 @@
|
|||
/bin
|
||||
/bin64
|
||||
/include
|
||||
/lib
|
||||
/lib64
|
||||
source/Doxyfile
|
||||
source/Makefile
|
||||
source/README
|
||||
source/allinone/*.ncb
|
||||
source/allinone/*.opt
|
||||
source/allinone/*.suo
|
||||
source/bin
|
||||
source/common/*.ao
|
||||
source/common/*.d
|
||||
source/common/*.o
|
||||
source/common/*.plg
|
||||
source/common/Debug
|
||||
source/common/Makefile
|
||||
source/common/Release
|
||||
source/common/common.res
|
||||
source/common/common.vcproj.*.*.user
|
||||
source/common/debug
|
||||
source/common/icucfg.h
|
||||
source/common/libicu*.*
|
||||
source/common/release
|
||||
source/common/svchook.mk
|
||||
source/common/unicode/platform.h
|
||||
source/common/x64
|
||||
source/common/x86
|
||||
source/config.cache
|
||||
source/config.log
|
||||
source/config.status
|
||||
source/config/Makefile.inc
|
||||
source/config/icu-config
|
||||
source/config/icu-config.1
|
||||
source/config/icucross.mk
|
||||
source/config/pkgdata.inc
|
||||
source/config/pkgdataMakefile
|
||||
source/data/*.plg
|
||||
source/data/Debug
|
||||
source/data/Makefile
|
||||
source/data/Release
|
||||
source/data/icupkg.inc
|
||||
source/data/in
|
||||
source/data/makedata.vcproj.*.*.user
|
||||
source/data/out
|
||||
source/data/pkgdataMakefile
|
||||
source/doc
|
||||
source/extra/Makefile
|
||||
source/extra/scrptrun/Makefile
|
||||
source/extra/scrptrun/scrptrun.d
|
||||
source/extra/scrptrun/srtest
|
||||
source/extra/scrptrun/srtest.d
|
||||
source/extra/uconv/*.d
|
||||
source/extra/uconv/*.o
|
||||
source/extra/uconv/Debug
|
||||
source/extra/uconv/Makefile
|
||||
source/extra/uconv/Release
|
||||
source/extra/uconv/debug
|
||||
source/extra/uconv/pkgdata.inc
|
||||
source/extra/uconv/pkgdataMakefile
|
||||
source/extra/uconv/pkgdatain.txt
|
||||
source/extra/uconv/release
|
||||
source/extra/uconv/resources/*.res
|
||||
source/extra/uconv/uconv
|
||||
source/extra/uconv/uconv.1
|
||||
source/extra/uconv/uconv.plg
|
||||
source/extra/uconv/uconv.vcproj.*.*.user
|
||||
source/extra/uconv/uconvmsg
|
||||
source/extra/uconv/x64
|
||||
source/extra/uconv/x86
|
||||
source/i18n/*.ao
|
||||
source/i18n/*.d
|
||||
source/i18n/*.o
|
||||
source/i18n/*.plg
|
||||
source/i18n/Debug
|
||||
source/i18n/Makefile
|
||||
source/i18n/Makefile.local
|
||||
source/i18n/Release
|
||||
source/i18n/debug
|
||||
source/i18n/i18n.res
|
||||
source/i18n/i18n.vcproj.*.*.user
|
||||
source/i18n/release
|
||||
source/i18n/x64
|
||||
source/i18n/x86
|
||||
source/icudefs.mk
|
||||
source/io/*.ao
|
||||
source/io/*.d
|
||||
source/io/*.o
|
||||
source/io/Debug
|
||||
source/io/Makefile
|
||||
source/io/Release
|
||||
source/io/debug
|
||||
source/io/io.res
|
||||
source/io/io.vcproj.*.*.user
|
||||
source/io/release
|
||||
source/io/x64
|
||||
source/io/x86
|
||||
source/layout/*.ao
|
||||
source/layout/*.d
|
||||
source/layout/*.o
|
||||
source/layout/*.pdb
|
||||
source/layout/Debug
|
||||
source/layout/Makefile
|
||||
source/layout/Release
|
||||
source/layout/debug
|
||||
source/layout/layout.res
|
||||
source/layout/layout.vcproj.*.*.user
|
||||
source/layout/release
|
||||
source/layout/x64
|
||||
source/layout/x86
|
||||
source/layoutex/*.ao
|
||||
source/layoutex/*.d
|
||||
source/layoutex/*.o
|
||||
source/layoutex/*.pdb
|
||||
source/layoutex/Debug
|
||||
source/layoutex/Makefile
|
||||
source/layoutex/Release
|
||||
source/layoutex/debug
|
||||
source/layoutex/layoutex.res
|
||||
source/layoutex/layoutex.vcproj.*.*.user
|
||||
source/layoutex/release
|
||||
source/layoutex/x64
|
||||
source/layoutex/x86
|
||||
source/lib
|
||||
source/samples/Makefile
|
||||
source/samples/all/all.ncb
|
||||
source/samples/all/all.suo
|
||||
source/samples/break/Debug
|
||||
source/samples/break/break.vcproj.*.*.user
|
||||
source/samples/break/release
|
||||
source/samples/cal/*.d
|
||||
source/samples/cal/*.pdb
|
||||
source/samples/cal/Debug
|
||||
source/samples/cal/Makefile
|
||||
source/samples/cal/Release
|
||||
source/samples/cal/cal.vcproj.*.*.user
|
||||
source/samples/cal/icucal
|
||||
source/samples/cal/icucal.exe
|
||||
source/samples/cal/release
|
||||
source/samples/cal/x64
|
||||
source/samples/cal/x86
|
||||
source/samples/case/Debug
|
||||
source/samples/case/case.vcproj.*.*.user
|
||||
source/samples/case/release
|
||||
source/samples/citer/Debug
|
||||
source/samples/citer/citer.vcproj.*.*.user
|
||||
source/samples/citer/release
|
||||
source/samples/coll/Debug
|
||||
source/samples/coll/coll.vcproj.*.*.user
|
||||
source/samples/coll/release
|
||||
source/samples/csdet/Debug
|
||||
source/samples/csdet/Makefile
|
||||
source/samples/csdet/csdet.vcproj.*.*.user
|
||||
source/samples/csdet/release
|
||||
source/samples/date/*.d
|
||||
source/samples/date/*.pdb
|
||||
source/samples/date/Debug
|
||||
source/samples/date/Makefile
|
||||
source/samples/date/Release
|
||||
source/samples/date/date.vcproj.*.*.user
|
||||
source/samples/date/icudate
|
||||
source/samples/date/icudate.exe
|
||||
source/samples/date/release
|
||||
source/samples/date/x64
|
||||
source/samples/date/x86
|
||||
source/samples/datefmt/Debug
|
||||
source/samples/datefmt/datefmt.vcproj.*.*.user
|
||||
source/samples/datefmt/release
|
||||
source/samples/layout/*.d
|
||||
source/samples/layout/*.pdb
|
||||
source/samples/layout/Debug
|
||||
source/samples/layout/Makefile
|
||||
source/samples/layout/Release
|
||||
source/samples/layout/layout.vcproj.*.*.user
|
||||
source/samples/layout/release
|
||||
source/samples/layout/tmp
|
||||
source/samples/legacy/Debug
|
||||
source/samples/legacy/legacy.vcproj.*.*.user
|
||||
source/samples/legacy/release
|
||||
source/samples/msgfmt/Debug
|
||||
source/samples/msgfmt/msgfmt.vcproj.*.*.user
|
||||
source/samples/msgfmt/release
|
||||
source/samples/numfmt/Debug
|
||||
source/samples/numfmt/numfmt.vcproj.*.*.user
|
||||
source/samples/numfmt/release
|
||||
source/samples/props/Debug
|
||||
source/samples/props/props.vcproj.*.*.user
|
||||
source/samples/props/release
|
||||
source/samples/strsrch/Debug
|
||||
source/samples/strsrch/release
|
||||
source/samples/strsrch/strsrch.vcproj.*.*.user
|
||||
source/samples/translit/Debug
|
||||
source/samples/translit/release
|
||||
source/samples/translit/translit.vcproj.*.*.user
|
||||
source/samples/uciter8/Debug
|
||||
source/samples/uciter8/release
|
||||
source/samples/uciter8/uciter8.vcproj.*.*.user
|
||||
source/samples/ucnv/Debug
|
||||
source/samples/ucnv/convsamp
|
||||
source/samples/ucnv/convsamp.out
|
||||
source/samples/ucnv/release
|
||||
source/samples/ucnv/ucnv.vcproj.*.*.user
|
||||
source/samples/udata/Debug
|
||||
source/samples/udata/reader.vcproj.*.*.user
|
||||
source/samples/udata/reader_Win32_Debug
|
||||
source/samples/udata/reader_win32_release
|
||||
source/samples/udata/release
|
||||
source/samples/udata/writer.vcproj.*.*.user
|
||||
source/samples/ufortune/Debug
|
||||
source/samples/ufortune/Release
|
||||
source/samples/ufortune/ufortune.vcproj.*.*.user
|
||||
source/samples/ugrep/debug
|
||||
source/samples/ugrep/release
|
||||
source/samples/ugrep/ugrep.vcproj.*.*.user
|
||||
source/samples/uresb/Debug
|
||||
source/samples/uresb/release
|
||||
source/samples/uresb/resources.vcproj.*.*.user
|
||||
source/samples/uresb/uresb.vcproj.*.*.user
|
||||
source/samples/ustring/Debug
|
||||
source/samples/ustring/release
|
||||
source/samples/ustring/ustring.vcproj.*.*.user
|
||||
source/stubdata/*.ao
|
||||
source/stubdata/*.d
|
||||
source/stubdata/*.o
|
||||
source/stubdata/*.plg
|
||||
source/stubdata/Debug
|
||||
source/stubdata/Makefile
|
||||
source/stubdata/Release
|
||||
source/stubdata/cygicudt*.*
|
||||
source/stubdata/debug
|
||||
source/stubdata/libicu*.*
|
||||
source/stubdata/libsicu*.*
|
||||
source/stubdata/release
|
||||
source/stubdata/stubdata.vcproj.*.*.user
|
||||
source/stubdata/stubdatabuilt.txt
|
||||
source/stubdata/x64
|
||||
source/stubdata/x86
|
||||
source/test/Makefile
|
||||
source/test/cintltst/*.d
|
||||
source/test/cintltst/*.o
|
||||
source/test/cintltst/*.plg
|
||||
source/test/cintltst/Debug
|
||||
source/test/cintltst/Makefile
|
||||
source/test/cintltst/Release
|
||||
source/test/cintltst/cintltst
|
||||
source/test/cintltst/cintltst.exe
|
||||
source/test/cintltst/cintltst.vcproj.*.*.user
|
||||
source/test/cintltst/debug
|
||||
source/test/cintltst/release
|
||||
source/test/cintltst/x64
|
||||
source/test/cintltst/x86
|
||||
source/test/compat/Makefile
|
||||
source/test/hdrtst/Makefile
|
||||
source/test/intltest/*.d
|
||||
source/test/intltest/*.o
|
||||
source/test/intltest/*.plg
|
||||
source/test/intltest/Debug
|
||||
source/test/intltest/Makefile
|
||||
source/test/intltest/Makefile.local
|
||||
source/test/intltest/Release
|
||||
source/test/intltest/debug
|
||||
source/test/intltest/intltest
|
||||
source/test/intltest/intltest.exe
|
||||
source/test/intltest/intltest.vcproj.*.*.user
|
||||
source/test/intltest/release
|
||||
source/test/intltest/x64
|
||||
source/test/intltest/x86
|
||||
source/test/iotest/*.d
|
||||
source/test/iotest/*.o
|
||||
source/test/iotest/Debug
|
||||
source/test/iotest/Makefile
|
||||
source/test/iotest/Release
|
||||
source/test/iotest/debug
|
||||
source/test/iotest/iotest
|
||||
source/test/iotest/iotest.exe
|
||||
source/test/iotest/iotest.vcproj.*.*.user
|
||||
source/test/iotest/release
|
||||
source/test/iotest/x64
|
||||
source/test/iotest/x86
|
||||
source/test/letest/*.d
|
||||
source/test/letest/*.o
|
||||
source/test/letest/Debug
|
||||
source/test/letest/Makefile
|
||||
source/test/letest/Release
|
||||
source/test/letest/debug
|
||||
source/test/letest/letest
|
||||
source/test/letest/letest.exe
|
||||
source/test/letest/letest.vcproj.*.*.user
|
||||
source/test/letest/release
|
||||
source/test/letest/x64
|
||||
source/test/letest/x86
|
||||
source/test/perf/Makefile
|
||||
source/test/perf/charperf/*.d
|
||||
source/test/perf/charperf/*.o
|
||||
source/test/perf/charperf/Debug
|
||||
source/test/perf/charperf/Makefile
|
||||
source/test/perf/charperf/Release
|
||||
source/test/perf/charperf/charperf
|
||||
source/test/perf/charperf/charperf.vcproj.*.*.user
|
||||
source/test/perf/charperf/debug
|
||||
source/test/perf/charperf/release
|
||||
source/test/perf/collationperf/Makefile
|
||||
source/test/perf/collperf/*.d
|
||||
source/test/perf/collperf/*.o
|
||||
source/test/perf/collperf/Debug
|
||||
source/test/perf/collperf/Makefile
|
||||
source/test/perf/collperf/Release
|
||||
source/test/perf/collperf/collperf
|
||||
source/test/perf/collperf/collperf.vcproj.*.*.user
|
||||
source/test/perf/collperf/debug
|
||||
source/test/perf/collperf/release
|
||||
source/test/perf/convperf/Makefile
|
||||
source/test/perf/convperf/convperf.vcproj.*.*.user
|
||||
source/test/perf/convperf/debug
|
||||
source/test/perf/convperf/release
|
||||
source/test/perf/normperf/*.d
|
||||
source/test/perf/normperf/*.o
|
||||
source/test/perf/normperf/Debug
|
||||
source/test/perf/normperf/Makefile
|
||||
source/test/perf/normperf/Release
|
||||
source/test/perf/normperf/debug
|
||||
source/test/perf/normperf/normperf
|
||||
source/test/perf/normperf/normperf.vcproj.*.*.user
|
||||
source/test/perf/normperf/release
|
||||
source/test/perf/perf.ncb
|
||||
source/test/perf/perf.suo
|
||||
source/test/perf/strsrchperf/Makefile
|
||||
source/test/perf/ubrkperf/*.d
|
||||
source/test/perf/ubrkperf/*.o
|
||||
source/test/perf/ubrkperf/Debug
|
||||
source/test/perf/ubrkperf/Makefile
|
||||
source/test/perf/ubrkperf/Release
|
||||
source/test/perf/ubrkperf/debug
|
||||
source/test/perf/ubrkperf/release
|
||||
source/test/perf/ubrkperf/ubrkperf
|
||||
source/test/perf/ubrkperf/ubrkperf.vcproj.*.*.user
|
||||
source/test/perf/unisetperf/*.d
|
||||
source/test/perf/unisetperf/*.o
|
||||
source/test/perf/unisetperf/Debug
|
||||
source/test/perf/unisetperf/Makefile
|
||||
source/test/perf/unisetperf/Release
|
||||
source/test/perf/unisetperf/debug
|
||||
source/test/perf/unisetperf/release
|
||||
source/test/perf/unisetperf/unisetperf
|
||||
source/test/perf/unisetperf/unisetperf.vcproj.*.*.user
|
||||
source/test/perf/usetperf/*.d
|
||||
source/test/perf/usetperf/*.o
|
||||
source/test/perf/usetperf/Debug
|
||||
source/test/perf/usetperf/Makefile
|
||||
source/test/perf/usetperf/Release
|
||||
source/test/perf/usetperf/debug
|
||||
source/test/perf/usetperf/release
|
||||
source/test/perf/usetperf/usetperf
|
||||
source/test/perf/usetperf/usetperf.vcproj.*.*.user
|
||||
source/test/perf/ustrperf/*.d
|
||||
source/test/perf/ustrperf/*.o
|
||||
source/test/perf/ustrperf/Debug
|
||||
source/test/perf/ustrperf/Makefile
|
||||
source/test/perf/ustrperf/Release
|
||||
source/test/perf/ustrperf/charperf
|
||||
source/test/perf/ustrperf/debug
|
||||
source/test/perf/ustrperf/release
|
||||
source/test/perf/ustrperf/stringperf.vcproj.*.*.user
|
||||
source/test/perf/utfperf/*.d
|
||||
source/test/perf/utfperf/*.o
|
||||
source/test/perf/utfperf/Debug
|
||||
source/test/perf/utfperf/Makefile
|
||||
source/test/perf/utfperf/Release
|
||||
source/test/perf/utfperf/debug
|
||||
source/test/perf/utfperf/release
|
||||
source/test/perf/utfperf/utfperf
|
||||
source/test/perf/utfperf/utfperf.vcproj.*.*.user
|
||||
source/test/perf/utrie2perf/Makefile
|
||||
source/test/testdata/Makefile
|
||||
source/test/testdata/out
|
||||
source/test/testdata/pkgdata.inc
|
||||
source/test/testdata/pkgdataMakefile
|
||||
source/test/testmap/*.d
|
||||
source/test/testmap/Debug
|
||||
source/test/testmap/Makefile
|
||||
source/test/testmap/Release
|
||||
source/test/testmap/testmap
|
||||
source/test/testmap/testmap.plg
|
||||
source/test/thaitest/Makefile
|
||||
source/test/threadtest/Makefile
|
||||
source/tools/Makefile
|
||||
source/tools/ctestfw/*.ao
|
||||
source/tools/ctestfw/*.d
|
||||
source/tools/ctestfw/*.o
|
||||
source/tools/ctestfw/*.pdb
|
||||
source/tools/ctestfw/*icutest*.dll
|
||||
source/tools/ctestfw/*icutest*.exp
|
||||
source/tools/ctestfw/*icutest*.lib
|
||||
source/tools/ctestfw/*icutest*.lnk
|
||||
source/tools/ctestfw/Debug
|
||||
source/tools/ctestfw/Makefile
|
||||
source/tools/ctestfw/Release
|
||||
source/tools/ctestfw/ctestfw.vcproj.*.*.user
|
||||
source/tools/ctestfw/debug
|
||||
source/tools/ctestfw/libicutest*
|
||||
source/tools/ctestfw/libsicutest*
|
||||
source/tools/ctestfw/release
|
||||
source/tools/ctestfw/x64
|
||||
source/tools/ctestfw/x86
|
||||
source/tools/dumpce/*.css
|
||||
source/tools/dumpce/*.d
|
||||
source/tools/dumpce/*.html
|
||||
source/tools/dumpce/*.o
|
||||
source/tools/dumpce/Makefile
|
||||
source/tools/dumpce/dumpce
|
||||
source/tools/genbidi/*.d
|
||||
source/tools/genbidi/*.o
|
||||
source/tools/genbidi/*.pdb
|
||||
source/tools/genbidi/*.plg
|
||||
source/tools/genbidi/Debug
|
||||
source/tools/genbidi/Makefile
|
||||
source/tools/genbidi/Release
|
||||
source/tools/genbidi/debug
|
||||
source/tools/genbidi/genbidi
|
||||
source/tools/genbidi/genbidi.[0-9]
|
||||
source/tools/genbidi/genbidi.vcproj.*.*.user
|
||||
source/tools/genbidi/release
|
||||
source/tools/genbidi/x64
|
||||
source/tools/genbidi/x86
|
||||
source/tools/genbrk/*.d
|
||||
source/tools/genbrk/*.o
|
||||
source/tools/genbrk/*.pdb
|
||||
source/tools/genbrk/*.plg
|
||||
source/tools/genbrk/Debug
|
||||
source/tools/genbrk/Makefile
|
||||
source/tools/genbrk/Release
|
||||
source/tools/genbrk/debug
|
||||
source/tools/genbrk/genbrk
|
||||
source/tools/genbrk/genbrk.1
|
||||
source/tools/genbrk/genbrk.vcproj.*.*.user
|
||||
source/tools/genbrk/release
|
||||
source/tools/genbrk/x64
|
||||
source/tools/genbrk/x86
|
||||
source/tools/gencase/*.d
|
||||
source/tools/gencase/*.ncb
|
||||
source/tools/gencase/*.o
|
||||
source/tools/gencase/*.opt
|
||||
source/tools/gencase/*.pdb
|
||||
source/tools/gencase/*.plg
|
||||
source/tools/gencase/Debug
|
||||
source/tools/gencase/Makefile
|
||||
source/tools/gencase/Release
|
||||
source/tools/gencase/debug
|
||||
source/tools/gencase/gencase
|
||||
source/tools/gencase/gencase.[0-9]
|
||||
source/tools/gencase/gencase.vcproj.*.*.user
|
||||
source/tools/gencase/release
|
||||
source/tools/gencase/x64
|
||||
source/tools/gencase/x86
|
||||
source/tools/genccode/*.d
|
||||
source/tools/genccode/*.o
|
||||
source/tools/genccode/*.pdb
|
||||
source/tools/genccode/*.plg
|
||||
source/tools/genccode/Debug
|
||||
source/tools/genccode/Makefile
|
||||
source/tools/genccode/Release
|
||||
source/tools/genccode/debug
|
||||
source/tools/genccode/genccode
|
||||
source/tools/genccode/genccode.8
|
||||
source/tools/genccode/genccode.vcproj.*.*.user
|
||||
source/tools/genccode/release
|
||||
source/tools/genccode/x64
|
||||
source/tools/genccode/x86
|
||||
source/tools/gencfu/*.d
|
||||
source/tools/gencfu/*.o
|
||||
source/tools/gencfu/*.pdb
|
||||
source/tools/gencfu/Debug
|
||||
source/tools/gencfu/Makefile
|
||||
source/tools/gencfu/Release
|
||||
source/tools/gencfu/debug
|
||||
source/tools/gencfu/gencfu
|
||||
source/tools/gencfu/gencfu.exe
|
||||
source/tools/gencfu/gencfu.vcproj.*.*.user
|
||||
source/tools/gencfu/release
|
||||
source/tools/gencfu/x64
|
||||
source/tools/gencfu/x86
|
||||
source/tools/gencmn/*.d
|
||||
source/tools/gencmn/*.o
|
||||
source/tools/gencmn/*.pdb
|
||||
source/tools/gencmn/*.plg
|
||||
source/tools/gencmn/Debug
|
||||
source/tools/gencmn/Makefile
|
||||
source/tools/gencmn/Release
|
||||
source/tools/gencmn/debug
|
||||
source/tools/gencmn/gencmn
|
||||
source/tools/gencmn/gencmn.8
|
||||
source/tools/gencmn/gencmn.[0-9]
|
||||
source/tools/gencmn/gencmn.vcproj.*.*.user
|
||||
source/tools/gencmn/release
|
||||
source/tools/gencmn/x64
|
||||
source/tools/gencmn/x86
|
||||
source/tools/gencnval/*.d
|
||||
source/tools/gencnval/*.ncb
|
||||
source/tools/gencnval/*.o
|
||||
source/tools/gencnval/*.opt
|
||||
source/tools/gencnval/*.pdb
|
||||
source/tools/gencnval/*.plg
|
||||
source/tools/gencnval/Debug
|
||||
source/tools/gencnval/Makefile
|
||||
source/tools/gencnval/Release
|
||||
source/tools/gencnval/debug
|
||||
source/tools/gencnval/gencnval
|
||||
source/tools/gencnval/gencnval.1
|
||||
source/tools/gencnval/gencnval.[0-9]
|
||||
source/tools/gencnval/gencnval.vcproj.*.*.user
|
||||
source/tools/gencnval/release
|
||||
source/tools/gencnval/x64
|
||||
source/tools/gencnval/x86
|
||||
source/tools/genctd/*.d
|
||||
source/tools/genctd/*.o
|
||||
source/tools/genctd/*.pdb
|
||||
source/tools/genctd/*.plg
|
||||
source/tools/genctd/Debug
|
||||
source/tools/genctd/Makefile
|
||||
source/tools/genctd/Release
|
||||
source/tools/genctd/debug
|
||||
source/tools/genctd/genctd
|
||||
source/tools/genctd/genctd.1
|
||||
source/tools/genctd/genctd.vcproj.*.*.user
|
||||
source/tools/genctd/release
|
||||
source/tools/genctd/x64
|
||||
source/tools/genctd/x86
|
||||
source/tools/gendraft/udeprctd.h
|
||||
source/tools/gendraft/udraft.h
|
||||
source/tools/gendraft/uintrnal.h
|
||||
source/tools/gendraft/usystem.h
|
||||
source/tools/gennames/*.d
|
||||
source/tools/gennames/*.ncb
|
||||
source/tools/gennames/*.o
|
||||
source/tools/gennames/*.opt
|
||||
source/tools/gennames/*.pdb
|
||||
source/tools/gennames/*.plg
|
||||
source/tools/gennames/Debug
|
||||
source/tools/gennames/Makefile
|
||||
source/tools/gennames/Release
|
||||
source/tools/gennames/debug
|
||||
source/tools/gennames/gennames
|
||||
source/tools/gennames/gennames.[0-9]
|
||||
source/tools/gennames/gennames.vcproj.*.*.user
|
||||
source/tools/gennames/release
|
||||
source/tools/gennames/x64
|
||||
source/tools/gennames/x86
|
||||
source/tools/gennorm/*.d
|
||||
source/tools/gennorm/*.o
|
||||
source/tools/gennorm/*.pdb
|
||||
source/tools/gennorm/*.plg
|
||||
source/tools/gennorm/Debug
|
||||
source/tools/gennorm/Makefile
|
||||
source/tools/gennorm/Release
|
||||
source/tools/gennorm/debug
|
||||
source/tools/gennorm/gennorm
|
||||
source/tools/gennorm/gennorm.[0-9]
|
||||
source/tools/gennorm/gennorm.vcproj.*.*.user
|
||||
source/tools/gennorm/release
|
||||
source/tools/gennorm/x64
|
||||
source/tools/gennorm/x86
|
||||
source/tools/genpname/*.d
|
||||
source/tools/genpname/*.o
|
||||
source/tools/genpname/*.pdb
|
||||
source/tools/genpname/*.plg
|
||||
source/tools/genpname/Debug
|
||||
source/tools/genpname/Makefile
|
||||
source/tools/genpname/Release
|
||||
source/tools/genpname/debug
|
||||
source/tools/genpname/genpname
|
||||
source/tools/genpname/genpname.vcproj.*.*.user
|
||||
source/tools/genpname/release
|
||||
source/tools/genpname/x64
|
||||
source/tools/genpname/x86
|
||||
source/tools/genprops/*.d
|
||||
source/tools/genprops/*.ncb
|
||||
source/tools/genprops/*.o
|
||||
source/tools/genprops/*.opt
|
||||
source/tools/genprops/*.pdb
|
||||
source/tools/genprops/*.plg
|
||||
source/tools/genprops/Debug
|
||||
source/tools/genprops/Makefile
|
||||
source/tools/genprops/Release
|
||||
source/tools/genprops/debug
|
||||
source/tools/genprops/genprops
|
||||
source/tools/genprops/genprops.[0-9]
|
||||
source/tools/genprops/genprops.vcproj.*.*.user
|
||||
source/tools/genprops/release
|
||||
source/tools/genprops/x64
|
||||
source/tools/genprops/x86
|
||||
source/tools/genrb/*.1
|
||||
source/tools/genrb/*.d
|
||||
source/tools/genrb/*.o
|
||||
source/tools/genrb/*.pdb
|
||||
source/tools/genrb/*.plg
|
||||
source/tools/genrb/Debug
|
||||
source/tools/genrb/Makefile
|
||||
source/tools/genrb/Makefile.local
|
||||
source/tools/genrb/Release
|
||||
source/tools/genrb/debug
|
||||
source/tools/genrb/derb
|
||||
source/tools/genrb/derb.[0-9]
|
||||
source/tools/genrb/derb.vcproj.*.*.user
|
||||
source/tools/genrb/derb_*
|
||||
source/tools/genrb/genrb
|
||||
source/tools/genrb/genrb.[0-9]
|
||||
source/tools/genrb/genrb.vcproj.*.*.user
|
||||
source/tools/genrb/release
|
||||
source/tools/genrb/temp
|
||||
source/tools/genrb/x64
|
||||
source/tools/genrb/x86
|
||||
source/tools/genren/ICUunrenamed
|
||||
source/tools/genren/Makefile.local
|
||||
source/tools/genren/urename.*
|
||||
source/tools/gensprep/*.8
|
||||
source/tools/gensprep/*.d
|
||||
source/tools/gensprep/*.o
|
||||
source/tools/gensprep/*.pdb
|
||||
source/tools/gensprep/Debug
|
||||
source/tools/gensprep/Makefile
|
||||
source/tools/gensprep/Release
|
||||
source/tools/gensprep/debug
|
||||
source/tools/gensprep/gensprep
|
||||
source/tools/gensprep/gensprep.[0-9]
|
||||
source/tools/gensprep/gensprep.plg
|
||||
source/tools/gensprep/gensprep.vcproj.*.*.user
|
||||
source/tools/gensprep/release
|
||||
source/tools/gensprep/x64
|
||||
source/tools/gensprep/x86
|
||||
source/tools/gentest/*.d
|
||||
source/tools/gentest/*.o
|
||||
source/tools/gentest/*.pdb
|
||||
source/tools/gentest/Debug
|
||||
source/tools/gentest/Makefile
|
||||
source/tools/gentest/Release
|
||||
source/tools/gentest/debug
|
||||
source/tools/gentest/gentest
|
||||
source/tools/gentest/gentest.exe
|
||||
source/tools/gentest/gentest.vcproj.*.*.user
|
||||
source/tools/gentest/release
|
||||
source/tools/gentest/x64
|
||||
source/tools/gentest/x86
|
||||
source/tools/genuca/*.d
|
||||
source/tools/genuca/*.o
|
||||
source/tools/genuca/*.pdb
|
||||
source/tools/genuca/Debug
|
||||
source/tools/genuca/Makefile
|
||||
source/tools/genuca/Release
|
||||
source/tools/genuca/debug
|
||||
source/tools/genuca/genuca
|
||||
source/tools/genuca/genuca.8
|
||||
source/tools/genuca/genuca.vcproj.*.*.user
|
||||
source/tools/genuca/release
|
||||
source/tools/genuca/x64
|
||||
source/tools/genuca/x86
|
||||
source/tools/icupkg/*.8
|
||||
source/tools/icupkg/*.d
|
||||
source/tools/icupkg/*.ncb
|
||||
source/tools/icupkg/*.o
|
||||
source/tools/icupkg/*.opt
|
||||
source/tools/icupkg/*.pdb
|
||||
source/tools/icupkg/*.plg
|
||||
source/tools/icupkg/Debug
|
||||
source/tools/icupkg/Makefile
|
||||
source/tools/icupkg/Release
|
||||
source/tools/icupkg/debug
|
||||
source/tools/icupkg/icupkg
|
||||
source/tools/icupkg/icupkg.[0-9]
|
||||
source/tools/icupkg/icupkg.vcproj.*.*.user
|
||||
source/tools/icupkg/release
|
||||
source/tools/icupkg/x64
|
||||
source/tools/icupkg/x86
|
||||
source/tools/icuswap/*.d
|
||||
source/tools/icuswap/*.ncb
|
||||
source/tools/icuswap/*.o
|
||||
source/tools/icuswap/*.opt
|
||||
source/tools/icuswap/*.pdb
|
||||
source/tools/icuswap/*.plg
|
||||
source/tools/icuswap/Debug
|
||||
source/tools/icuswap/Makefile
|
||||
source/tools/icuswap/Release
|
||||
source/tools/icuswap/debug
|
||||
source/tools/icuswap/icuswap
|
||||
source/tools/icuswap/icuswap.[0-9]
|
||||
source/tools/icuswap/icuswap.vcproj.*.*.user
|
||||
source/tools/icuswap/release
|
||||
source/tools/makeconv/*.1
|
||||
source/tools/makeconv/*.d
|
||||
source/tools/makeconv/*.o
|
||||
source/tools/makeconv/*.pdb
|
||||
source/tools/makeconv/*.plg
|
||||
source/tools/makeconv/Debug
|
||||
source/tools/makeconv/Makefile
|
||||
source/tools/makeconv/Release
|
||||
source/tools/makeconv/debug
|
||||
source/tools/makeconv/makeconv
|
||||
source/tools/makeconv/makeconv.[0-9]
|
||||
source/tools/makeconv/makeconv.vcproj.*.*.user
|
||||
source/tools/makeconv/release
|
||||
source/tools/makeconv/x64
|
||||
source/tools/makeconv/x86
|
||||
source/tools/pkgdata/*.1
|
||||
source/tools/pkgdata/*.d
|
||||
source/tools/pkgdata/*.ncb
|
||||
source/tools/pkgdata/*.o
|
||||
source/tools/pkgdata/*.opt
|
||||
source/tools/pkgdata/*.pdb
|
||||
source/tools/pkgdata/Debug
|
||||
source/tools/pkgdata/Makefile
|
||||
source/tools/pkgdata/Release
|
||||
source/tools/pkgdata/debug
|
||||
source/tools/pkgdata/icupkg.inc
|
||||
source/tools/pkgdata/pkgdata
|
||||
source/tools/pkgdata/pkgdata.[0-9]
|
||||
source/tools/pkgdata/pkgdata.vcproj.*.*.user
|
||||
source/tools/pkgdata/release
|
||||
source/tools/pkgdata/x64
|
||||
source/tools/pkgdata/x86
|
||||
source/tools/toolutil/*.ao
|
||||
source/tools/toolutil/*.d
|
||||
source/tools/toolutil/*.ncb
|
||||
source/tools/toolutil/*.o
|
||||
source/tools/toolutil/*.opt
|
||||
source/tools/toolutil/*.pdb
|
||||
source/tools/toolutil/*.plg
|
||||
source/tools/toolutil/Debug
|
||||
source/tools/toolutil/Makefile
|
||||
source/tools/toolutil/Release
|
||||
source/tools/toolutil/debug
|
||||
source/tools/toolutil/libicu*
|
||||
source/tools/toolutil/release
|
||||
source/tools/toolutil/toolutil.vcproj.*.*.user
|
||||
source/tools/toolutil/x64
|
||||
source/tools/toolutil/x86
|
||||
source/tools/tzcode/Makefile
|
||||
source/tools/tzcode/ZoneMetaData.java
|
||||
source/tools/tzcode/africa
|
||||
source/tools/tzcode/antarctica
|
||||
source/tools/tzcode/asia
|
||||
source/tools/tzcode/australasia
|
||||
source/tools/tzcode/backward
|
||||
source/tools/tzcode/etcetera
|
||||
source/tools/tzcode/europe
|
||||
source/tools/tzcode/factory
|
||||
source/tools/tzcode/icu_zone.txt
|
||||
source/tools/tzcode/icuzdump
|
||||
source/tools/tzcode/icuzdump.dSYM
|
||||
source/tools/tzcode/icuzdumpout
|
||||
source/tools/tzcode/iso3166.tab
|
||||
source/tools/tzcode/leapseconds
|
||||
source/tools/tzcode/northamerica
|
||||
source/tools/tzcode/pacificnew
|
||||
source/tools/tzcode/solar??
|
||||
source/tools/tzcode/southamerica
|
||||
source/tools/tzcode/systemv
|
||||
source/tools/tzcode/tz2icu
|
||||
source/tools/tzcode/tz?????????.tar.gz
|
||||
source/tools/tzcode/tzorig
|
||||
source/tools/tzcode/yearistype
|
||||
source/tools/tzcode/yearistype.sh
|
||||
source/tools/tzcode/zdumpout
|
||||
source/tools/tzcode/zic
|
||||
source/tools/tzcode/zone.tab
|
||||
source/tools/tzcode/zoneinfo
|
||||
source/tools/tzcode/zoneinfo.txt
|
117
APIChangeReport.html
Normal file
117
APIChangeReport.html
Normal file
|
@ -0,0 +1,117 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?><!--
|
||||
Copyright (C) 2009, International Business Machines Corporation, All Rights Reserved.
|
||||
--><html>
|
||||
<head>
|
||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<title>ICU4C API Comparison: 4.2.1 with 4.3.1</title>
|
||||
<link rel="stylesheet" href="icu4c.css" type="text/css">
|
||||
</head>
|
||||
<body>
|
||||
<a name="_top"></a>
|
||||
<h1>ICU4C API Comparison: 4.2.1 with 4.3.1</h1>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="#removed">Removed from 4.2.1</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#deprecated">Deprecated or Obsoleted in 4.3.1</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#changed">Changed in 4.3.1</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#promoted">Promoted to stable in 4.3.1</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#added">Added in 4.3.1</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#other">Other existing drafts in 4.3.1</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr>
|
||||
<a name="removed">
|
||||
<h2>Removed from 4.2.1</h2>
|
||||
</a>
|
||||
<table class="genTable" BORDER="1">
|
||||
<THEAD>
|
||||
<tr>
|
||||
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
|
||||
</tr>
|
||||
</THEAD>
|
||||
</table>
|
||||
<P></P>
|
||||
<a href="#_top">(jump back to top)</a>
|
||||
<hr>
|
||||
<a name="deprecated">
|
||||
<h2>Deprecated or Obsoleted in 4.3.1</h2>
|
||||
</a>
|
||||
<table class="genTable" BORDER="1">
|
||||
<THEAD>
|
||||
<tr>
|
||||
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
|
||||
</tr>
|
||||
</THEAD>
|
||||
</table>
|
||||
<P></P>
|
||||
<a href="#_top">(jump back to top)</a>
|
||||
<hr>
|
||||
<a name="changed">
|
||||
<h2>Changed in 4.3.1 (old, new)</h2>
|
||||
</a>
|
||||
<table class="genTable" BORDER="1">
|
||||
<THEAD>
|
||||
<tr>
|
||||
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
|
||||
</tr>
|
||||
</THEAD>
|
||||
</table>
|
||||
<P></P>
|
||||
<a href="#_top">(jump back to top)</a>
|
||||
<hr>
|
||||
<a name="promoted">
|
||||
<h2>Promoted to stable in 4.3.1</h2>
|
||||
</a>
|
||||
<table class="genTable" BORDER="1">
|
||||
<THEAD>
|
||||
<tr>
|
||||
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
|
||||
</tr>
|
||||
</THEAD>
|
||||
</table>
|
||||
<P></P>
|
||||
<a href="#_top">(jump back to top)</a>
|
||||
<hr>
|
||||
<a name="added">
|
||||
<h2>Added in 4.3.1</h2>
|
||||
</a>
|
||||
<table class="genTable" BORDER="1">
|
||||
<THEAD>
|
||||
<tr>
|
||||
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
|
||||
</tr>
|
||||
</THEAD>
|
||||
</table>
|
||||
<P></P>
|
||||
<a href="#_top">(jump back to top)</a>
|
||||
<hr>
|
||||
<a name="other">
|
||||
<h2>Other existing drafts in 4.3.1</h2>
|
||||
</a>
|
||||
<div class="other">
|
||||
<table class="genTable" BORDER="1">
|
||||
<THEAD>
|
||||
<tr>
|
||||
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
|
||||
</tr>
|
||||
</THEAD>
|
||||
</table>
|
||||
</div>
|
||||
<P></P>
|
||||
<a href="#_top">(jump back to top)</a>
|
||||
<hr>
|
||||
<p>
|
||||
<i><font size="-1">Contents generated by StableAPI tool on Wed Jul 22 11:58:09 GMT-08:00 2009<br>Copyright (C) 2009, International Business Machines Corporation, All Rights Reserved.</font></i>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
102
as_is/os390/unpax-icu.sh
Executable file
102
as_is/os390/unpax-icu.sh
Executable file
|
@ -0,0 +1,102 @@
|
|||
#!/bin/sh
|
||||
# Copyright (C) 2001-2007, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# Authors:
|
||||
# Ami Fixler
|
||||
# Steven R. Loomis
|
||||
# George Rhoten
|
||||
#
|
||||
# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
|
||||
# After extracting to EBCDIC, binary files are re-extracted without the
|
||||
# EBCDIC conversion, thus restoring them to original codepage.
|
||||
#
|
||||
# Set the following variable to the list of binary file suffixes (extensions)
|
||||
|
||||
#binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK'
|
||||
#ICU specific binary files
|
||||
binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML'
|
||||
|
||||
usage()
|
||||
{
|
||||
echo "Enter archive filename as a parameter: $0 icu-archive.tar"
|
||||
}
|
||||
# first make sure we at least one arg and it's a file we can read
|
||||
if [ $# -eq 0 ]; then
|
||||
usage
|
||||
exit
|
||||
fi
|
||||
tar_file=$1
|
||||
if [ ! -r $tar_file ]; then
|
||||
echo "$tar_file does not exist or cannot be read."
|
||||
usage
|
||||
exit
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Extracting from $tar_file ..."
|
||||
echo ""
|
||||
# extract files while converting them to EBCDIC
|
||||
pax -rvf $tar_file -o to=IBM-1047,from=ISO8859-1 -o setfiletag
|
||||
|
||||
echo ""
|
||||
echo "Determining binary files ..."
|
||||
echo ""
|
||||
|
||||
# When building in ASCII mode, text files are converted as ASCII
|
||||
if [ "${ICU_ENABLE_ASCII_STRINGS}" -eq 1 ]; then
|
||||
binary_suffixes="$binary_suffixes txt TXT ucm UCM"
|
||||
else
|
||||
for file in `find ./icu \( -name \*.txt -print \) | sed -e 's/^\.\///'`; do
|
||||
bom8=`head -c 3 $file|\
|
||||
od -t x1|\
|
||||
head -n 1|\
|
||||
sed 's/ */ /g'|\
|
||||
cut -f2-4 -d ' '|\
|
||||
tr 'A-Z' 'a-z'`;
|
||||
#Find a converted UTF-8 BOM
|
||||
if [ "$bom8" = "57 8b ab" ]
|
||||
then
|
||||
binary_files="$binary_files $file";
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
for i in $(pax -f $tar_file 2>/dev/null)
|
||||
do
|
||||
case $i in
|
||||
*/) ;; # then this entry is a directory
|
||||
*.*) # then this entry has a dot in the filename
|
||||
for j in $binary_suffixes
|
||||
do
|
||||
# We substitute the suffix more than once
|
||||
# to handle files like NormalizationTest-3.2.0.txt
|
||||
suf=${i#*.*}
|
||||
suf=${suf#*.*}
|
||||
suf=${suf#*.*}
|
||||
if [ "$suf" = "$j" ]
|
||||
then
|
||||
binary_files="$binary_files $i"
|
||||
break
|
||||
fi
|
||||
done
|
||||
;;
|
||||
*) ;; # then this entry does not have a dot in it
|
||||
esac
|
||||
done
|
||||
|
||||
# now see if a re-extract of binary files is necessary
|
||||
if [ ${#binary_files} -eq 0 ]; then
|
||||
echo ""
|
||||
echo "There are no binary files to restore."
|
||||
else
|
||||
echo "Restoring binary files ..."
|
||||
echo ""
|
||||
rm $binary_files
|
||||
pax -rvf $tar_file $binary_files
|
||||
# Tag the files as binary for proper interaction with the _BPXK_AUTOCVT
|
||||
# environment setting
|
||||
chtag -b $binary_files
|
||||
fi
|
||||
echo ""
|
||||
echo "$0 has completed extracting ICU from $tar_file."
|
32
as_is/os400/convertConfigure.sed
Normal file
32
as_is/os400/convertConfigure.sed
Normal file
|
@ -0,0 +1,32 @@
|
|||
# Copyright (C) 2006-2009, International Business Machines Corporation
|
||||
# and others. All Rights Reserved.
|
||||
#
|
||||
# Use "test -x" instead of "test -f" most of the time.
|
||||
# due to how executables are created in a different file system.
|
||||
s/as_executable_p="test -f"/as_executable_p="test -x"/g
|
||||
s/test -f "$ac_file"/test -x "$ac_file"/g
|
||||
s/test -f $ac_dir\/install-sh/test -x $ac_dir\/install-sh/g
|
||||
s/test -f $ac_dir\/install.sh/test -x $ac_dir\/install.sh/g
|
||||
s/test -f $ac_dir\/shtool/test -x $ac_dir\/shtool/g
|
||||
# Use the more efficient del instead of rm command.
|
||||
s/rm[ ]*-r[ ]*-f/del -f/g
|
||||
s/rm[ ]*-f[ ]*-r/del -f/g
|
||||
s/rm[ ]*-rf/del -f/g
|
||||
s/rm[ ]*-fr/del -f/g
|
||||
s/rm[ ]*-f/del -f/g
|
||||
##don't clean up some awks for debugging
|
||||
#s/[ ]*del -f [^ ]*.awk/#&/
|
||||
# Borne shell isn't always available on i5/OS
|
||||
s/\/bin\/sh/\/usr\/bin\/qsh/g
|
||||
# no diff in qsh the equivalent is cmp
|
||||
s/ diff / cmp -s /g
|
||||
## srl
|
||||
# trouble w/ redirects.
|
||||
s% >&$3%%g
|
||||
s%^ac_cr=%# AWK reads ASCII, not EBCDIC\
|
||||
touch -C 819 $tmp/defines.awk $tmp/subs.awk $tmp/subs1.awk conf$$subs.awk\
|
||||
\
|
||||
&%
|
||||
##OBSOLETE
|
||||
#(REPLACED BY CPP in runConfigureICU) Use -c qpponly instead of -E to enable the preprocessor on the compiler
|
||||
#s/\$CC -E/\$CC -c -qpponly/g
|
195
as_is/os400/unpax-icu.sh
Executable file
195
as_is/os400/unpax-icu.sh
Executable file
|
@ -0,0 +1,195 @@
|
|||
#!/usr/bin/qsh
|
||||
# Copyright (C) 2000-2009, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# Authors:
|
||||
# Ami Fixler
|
||||
# Barry Novinger
|
||||
# Steven R. Loomis
|
||||
# George Rhoten
|
||||
# Jason Spieth
|
||||
#
|
||||
# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
|
||||
# After extracting to EBCDIC, binary files are re-extracted without the
|
||||
# EBCDIC conversion, thus restoring them to original codepage.
|
||||
|
||||
if [ -z "$QSH_VERSION" ];
|
||||
then
|
||||
QSH=0
|
||||
echo "QSH not detected (QSH_VERSION not set) - just testing."
|
||||
else
|
||||
QSH=1
|
||||
#echo "QSH version $QSH_VERSION"
|
||||
fi
|
||||
export QSH
|
||||
|
||||
# Set the following variable to the list of binary file suffixes (extensions)
|
||||
|
||||
|
||||
#****************************************************************************
|
||||
#binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK'
|
||||
#ICU specific binary files
|
||||
#****************************************************************************
|
||||
binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML'
|
||||
data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*'
|
||||
|
||||
#****************************************************************************
|
||||
# Function: usage
|
||||
# Description: Prints out text that describes how to call this script
|
||||
# Input: None
|
||||
# Output: None
|
||||
#****************************************************************************
|
||||
usage()
|
||||
{
|
||||
echo "Enter archive filename as a parameter: $0 icu-archive.tar"
|
||||
}
|
||||
|
||||
#****************************************************************************
|
||||
# first make sure we at least one arg and it's a file we can read
|
||||
#****************************************************************************
|
||||
|
||||
# check for no arguments
|
||||
if [ $# -eq 0 ]; then
|
||||
usage
|
||||
exit
|
||||
fi
|
||||
|
||||
# tar file is argument 1
|
||||
tar_file=$1
|
||||
|
||||
# check that the file is valid
|
||||
if [ ! -r $tar_file ]; then
|
||||
echo "$tar_file does not exist or cannot be read."
|
||||
usage
|
||||
exit
|
||||
fi
|
||||
|
||||
#****************************************************************************
|
||||
# Determine which directories in the data_files list
|
||||
# are included in the provided archive
|
||||
#****************************************************************************
|
||||
echo "Finding data_files ..."
|
||||
for data_dir in $data_files
|
||||
do
|
||||
if (pax -f $tar_file $data_dir >/dev/null 2>&1)
|
||||
then
|
||||
ebcdic_data="$ebcdic_data `echo $data_dir`";
|
||||
fi
|
||||
done
|
||||
|
||||
#****************************************************************************
|
||||
# Extract files. We do this in two passes. One pass for 819 files and a
|
||||
# second pass for 37 files
|
||||
#****************************************************************************
|
||||
echo ""
|
||||
echo "Extracting from $tar_file ..."
|
||||
echo ""
|
||||
|
||||
# extract everything as iso-8859-1 except these directories
|
||||
pax -C 819 -rcvf $tar_file $ebcdic_data
|
||||
|
||||
# extract files while converting them to EBCDIC
|
||||
echo ""
|
||||
echo "Extracting files which must be in ibm-37 ..."
|
||||
echo ""
|
||||
pax -C 37 -rvf $tar_file $ebcdic_data
|
||||
|
||||
#****************************************************************************
|
||||
# For files we have restored as CCSID 37, check the BOM to see if they
|
||||
# should be processed as 819. Also handle files with special paths. Files
|
||||
# that match will be added to binary files lists. The lists will in turn
|
||||
# be processed to restore files as 819.
|
||||
#****************************************************************************
|
||||
echo ""
|
||||
echo "Determining binary files by BOM ..."
|
||||
echo ""
|
||||
bin_count=0
|
||||
# Process BOMs
|
||||
for file in `find ./icu \( -name \*.txt -print \)`; do
|
||||
bom8=`head -n 1 $file|\
|
||||
od -t x1|\
|
||||
head -n 1|\
|
||||
sed 's/ */ /g'|\
|
||||
cut -f2-4 -d ' '|\
|
||||
tr 'A-Z' 'a-z'`;
|
||||
#Find a converted UTF-8 BOM
|
||||
if [ "$bom8" = "057 08b 0ab" -o "$bom8" = "57 8b ab" ]
|
||||
then
|
||||
file="`echo $file | cut -d / -f2-`"
|
||||
|
||||
if [ `echo $binary_files | wc -w` -lt 200 ]
|
||||
then
|
||||
bin_count=`expr $bin_count + 1`
|
||||
binary_files="$binary_files $file";
|
||||
else
|
||||
echo "Restoring binary files by BOM ($bin_count)..."
|
||||
rm $binary_files;
|
||||
pax -C 819 -rvf $tar_file $binary_files;
|
||||
echo "Determining binary files by BOM ($bin_count)..."
|
||||
binary_files="$file";
|
||||
bin_count=`expr $bin_count + 1`
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Process special paths
|
||||
for i in $(pax -f $tar_file 2>/dev/null)
|
||||
do
|
||||
case $i in
|
||||
*/)
|
||||
# then this entry is a directory
|
||||
;;
|
||||
*.*)
|
||||
# then this entry has a dot in the filename
|
||||
for j in $binary_suffixes
|
||||
do
|
||||
suf=${i#*.*}
|
||||
if [ "$suf" = "$j" ]
|
||||
then
|
||||
|
||||
if [ `echo $binary_files | wc -w` -lt 200 ]
|
||||
then
|
||||
binary_files="$binary_files $i";
|
||||
bin_count=`expr $bin_count + 1`
|
||||
else
|
||||
echo "Restoring binary files by special paths ($bin_count) ..."
|
||||
rm $binary_files;
|
||||
pax -C 819 -rvf $tar_file $binary_files;
|
||||
echo "Determining binary files by special paths ($bin_count) ..."
|
||||
binary_files="$i";
|
||||
bin_count=`expr $bin_count + 1`
|
||||
fi
|
||||
break
|
||||
fi
|
||||
done
|
||||
;;
|
||||
*)
|
||||
# then this entry does not have a dot in it
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# now see if a re-extract of binary files is necessary
|
||||
if [ `echo $binary_files | wc -w` -gt 0 ]
|
||||
then
|
||||
echo "Restoring binary files ($bin_count) ..."
|
||||
rm $binary_files
|
||||
pax -C 819 -rvf $tar_file $binary_files
|
||||
fi
|
||||
|
||||
#****************************************************************************
|
||||
# Generate and run the configure script
|
||||
#****************************************************************************
|
||||
|
||||
echo ""
|
||||
echo "Generating qsh compatible configure ..."
|
||||
echo ""
|
||||
|
||||
sed -f icu/as_is/os400/convertConfigure.sed icu/source/configure > icu/source/configureTemp
|
||||
del -f icu/source/configure
|
||||
mv icu/source/configureTemp icu/source/configure
|
||||
chmod 755 icu/source/configure
|
||||
|
||||
echo ""
|
||||
echo "$0 has completed extracting ICU from $tar_file - $bin_count binary files extracted."
|
||||
|
447
icu4c.css
Normal file
447
icu4c.css
Normal file
|
@ -0,0 +1,447 @@
|
|||
/*
|
||||
* Default CSS style sheet for the ICU4C Open Source readme
|
||||
* Copyright (C) 2005-2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*/
|
||||
|
||||
/* Global styles */
|
||||
|
||||
body,p,li,ol,ul,th,td {
|
||||
font-size: 10pt;
|
||||
font-family: "Arial", "Helvetica", sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 1em;
|
||||
}
|
||||
|
||||
body.draft {
|
||||
background-image: url(images/draftbg.png);
|
||||
}
|
||||
|
||||
.mainbody {
|
||||
padding: 1em;
|
||||
}
|
||||
|
||||
/*
|
||||
* Customize the headers to have less space around them than usual
|
||||
*/
|
||||
|
||||
h1 {
|
||||
margin-bottom: .5em;
|
||||
margin-top: .5em;
|
||||
padding-bottom: .5em;
|
||||
padding-top: .5em;
|
||||
font-weight: 700;
|
||||
font-size: 20pt;
|
||||
font-family: Georgia, "Times New Roman", Times, serif;
|
||||
border-width: 2px;
|
||||
border-style: solid;
|
||||
text-align: center;
|
||||
width: 100%;
|
||||
font-size: 200%;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
h2 {
|
||||
border-top: 2px solid #22d;
|
||||
border-left: 2px solid #22d;
|
||||
margin-bottom: 0.5em;
|
||||
padding-left: 4px;
|
||||
margin-top: 12pt;
|
||||
font-weight: 700;
|
||||
font-size: 2em;
|
||||
font-family: Georgia, "Times New Roman", Times, serif;
|
||||
background-color: #eee;
|
||||
page-break-before: always;
|
||||
}
|
||||
|
||||
h2 a {
|
||||
text-decoration: none;
|
||||
color: black;
|
||||
}
|
||||
|
||||
h2 a:hover {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
h3 {
|
||||
border-top: 1px solid gray;
|
||||
color: #1e1c46;
|
||||
margin-bottom: 0pt;
|
||||
margin-top: 12pt;
|
||||
padding-left: 0;
|
||||
margin-left: 1em;
|
||||
margin-top: 0.2em;
|
||||
padding-bottom: 0.4em;
|
||||
font-size: 1.5em;
|
||||
font-family: Georgia, "Times New Roman", Times, serif;
|
||||
}
|
||||
|
||||
h3 a {
|
||||
text-decoration: none;
|
||||
color: black;
|
||||
}
|
||||
|
||||
h3 a:hover {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
h4 {
|
||||
margin-left: 1.5em;
|
||||
margin-bottom: 0pt;
|
||||
margin-top: 12pt;
|
||||
font-size: 1.0em;
|
||||
font-weight: bolder;
|
||||
font-family: Georgia, "Times New Roman", Times, serif;
|
||||
}
|
||||
|
||||
h4 a {
|
||||
text-decoration: none;
|
||||
color: black;
|
||||
}
|
||||
|
||||
h4 a:hover {
|
||||
color: blue;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
h5, h6 {
|
||||
margin-left: 1.8em;
|
||||
margin-bottom: 0pt;
|
||||
margin-top: 12pt;
|
||||
padding-left: 0.75em;
|
||||
font-size: x-small;
|
||||
font-family: Georgia, "Times New Roman", Times, serif;
|
||||
}
|
||||
|
||||
p,pre,table,ul,ol,dl {
|
||||
margin-left: 2em;
|
||||
}
|
||||
|
||||
/*
|
||||
* Navigation sidebar on the left hand of most pages
|
||||
*/
|
||||
|
||||
td.sidebar1 {
|
||||
background-color: #99CCFF;
|
||||
font-weight: 700;
|
||||
margin-top: 0px;
|
||||
margin-bottom: 0px;
|
||||
padding-top: 1em;
|
||||
padding-left: 0.2em;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
td.sidebar2 {
|
||||
background-color: #99CCFF;
|
||||
margin-top: 0px;
|
||||
margin-bottom: 0px;
|
||||
margin-left: 0px;
|
||||
padding-top: 1px;
|
||||
padding-bottom: 1px;
|
||||
padding-left: 1px;
|
||||
padding-right: 0.5em;
|
||||
white-space: nowrap;
|
||||
text-decoration: none;
|
||||
display: block;
|
||||
}
|
||||
|
||||
td.sidebar2:hover {
|
||||
background-color: #EEEEFF;
|
||||
padding-top: 1px;
|
||||
padding-bottom: 1px;
|
||||
padding-left: 1px;
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
|
||||
a.sidebar2 {
|
||||
text-decoration: none;
|
||||
display: block;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
a.sidebar2:link {
|
||||
color: #000099;
|
||||
display: block;
|
||||
}
|
||||
|
||||
a.sidebar2:hover {
|
||||
background-color: #EEEEFF;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.underlinehover:hover {
|
||||
background-color: #EEEEFF;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
/* This is the faded header at the top */
|
||||
|
||||
td.fadedtop {
|
||||
background-color: #006699;
|
||||
background-image: url(http://www.icu-project.org/images/gr100.gif);
|
||||
}
|
||||
|
||||
/* Related site on the left */
|
||||
|
||||
p.relatedsite {
|
||||
color: White;
|
||||
font-weight: 700;
|
||||
font-size: 10pt;
|
||||
margin-top: 1em;
|
||||
margin-bottom: 0;
|
||||
padding-left: 0.2em;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
/* Related site on the left */
|
||||
|
||||
p.sidebar3 {
|
||||
margin-top: 0.75em;
|
||||
margin-bottom: 0;
|
||||
padding-left: 0.8em;
|
||||
}
|
||||
|
||||
a.sidebar3 {
|
||||
font-size: 0.9em;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
a.sidebar3:link {
|
||||
text-decoration: none;
|
||||
color: White;
|
||||
}
|
||||
|
||||
a.sidebar3:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
/* FAQ */
|
||||
|
||||
li.faq_contents {
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
p.faq_q {
|
||||
font-weight: 700;
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
|
||||
p.faq_a {
|
||||
margin-top: 0px;
|
||||
}
|
||||
|
||||
/* News items */
|
||||
|
||||
table.newsItem {
|
||||
padding-left: 1em;
|
||||
padding-right: 1em;
|
||||
border-width: medium;
|
||||
}
|
||||
|
||||
th.newsItem {
|
||||
background-color: #666666;
|
||||
color: White;
|
||||
}
|
||||
|
||||
td.newsItem {
|
||||
background-color: #CCCCCC;
|
||||
}
|
||||
|
||||
td.release-line,th.release-line {
|
||||
padding-left: 0.5em;
|
||||
padding-right: 0.5em;
|
||||
white-space: nowrap;
|
||||
border: 1px;
|
||||
}
|
||||
|
||||
.note {
|
||||
font-style: italic;
|
||||
font-size: small;
|
||||
margin-left: 1em;
|
||||
}
|
||||
|
||||
samp {
|
||||
margin-left: 1em;
|
||||
margin-right: 2em;
|
||||
border-style: groove;
|
||||
padding: 1em;
|
||||
display: block;
|
||||
background-color: #EEEEEE
|
||||
}
|
||||
|
||||
table.rtable caption {
|
||||
margin-left: 2px;
|
||||
margin-right: 2px;
|
||||
padding: 3px;
|
||||
font-weight: bold;
|
||||
background-color: #dee2ff;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
table.rtable tr th {
|
||||
background-color: #dee2ff;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
table.rtable tr td {
|
||||
background-color: #c0c0fd;
|
||||
padding: 3px;
|
||||
}
|
||||
|
||||
table.rtable tr.broken td {
|
||||
background-color: #fbb;
|
||||
border: 1px dashed gray;
|
||||
padding: 3px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
table.rtable tr.rarely td {
|
||||
background-color: #efe9c2;
|
||||
padding: 3px;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* APIChangeReport specific things */
|
||||
|
||||
.row0 {
|
||||
background-color: white;
|
||||
}
|
||||
|
||||
.row1 {
|
||||
background-color: #dfd;
|
||||
}
|
||||
|
||||
.verchange {
|
||||
color: red;
|
||||
font-weight: bold;
|
||||
font-size: large;
|
||||
}
|
||||
|
||||
.stabchange {
|
||||
color: red;
|
||||
font-size: large;
|
||||
}
|
||||
|
||||
.bigwarn {
|
||||
color: red;
|
||||
background-color: white;
|
||||
font-size: x-large;
|
||||
margin: 0.5 em;
|
||||
}
|
||||
|
||||
table.genTable {
|
||||
border-collapse: collapse;
|
||||
border: 1px solid black;
|
||||
}
|
||||
|
||||
/* 'everything inc version */
|
||||
|
||||
table.gentable td {
|
||||
border: 1px solid gray;
|
||||
padding: 0.25em;
|
||||
font-size: small;
|
||||
}
|
||||
|
||||
/* not version */
|
||||
|
||||
table.genTable td.file,
|
||||
table.genTable td.proto {
|
||||
border: none;
|
||||
font-size: medium;
|
||||
}
|
||||
|
||||
table.genTable td.file {
|
||||
font-family: monospace;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
div.other .row0 {
|
||||
background-color: white;
|
||||
}
|
||||
|
||||
div.other .row1 {
|
||||
background-color: #ddf;
|
||||
}
|
||||
|
||||
table.docTable {
|
||||
border-collapse: collapse;
|
||||
border: 1px solid black;
|
||||
}
|
||||
|
||||
/* 'everything inc version */
|
||||
|
||||
table.docTable td,
|
||||
table.docTable th {
|
||||
border: 1px solid gray;
|
||||
padding: 0.25em;
|
||||
font-size: small;
|
||||
}
|
||||
|
||||
/* not version */
|
||||
|
||||
table.docTable td.file,
|
||||
table.docTable td.proto {
|
||||
border: none;
|
||||
font-size: medium;
|
||||
}
|
||||
|
||||
table.docTable td.file {
|
||||
font-family: monospace;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
abbr {
|
||||
border-bottom: 1px dashed #0B0;
|
||||
}
|
||||
|
||||
h2.TOC {
|
||||
page-break-before: auto;
|
||||
}
|
||||
|
||||
body.readme {
|
||||
|
||||
}
|
||||
|
||||
caption {
|
||||
font-weight: bold;
|
||||
text-align: left
|
||||
}
|
||||
|
||||
div.indent {
|
||||
margin-left: 2em
|
||||
}
|
||||
|
||||
ul.TOC {
|
||||
list-style-type: none;
|
||||
padding-left: 1em;
|
||||
font-size: larger;
|
||||
}
|
||||
|
||||
ul.TOC li a {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
ul.TOC li ul li a {
|
||||
font-weight: normal;
|
||||
list-style-type: none;
|
||||
font-size: small;
|
||||
}
|
||||
|
||||
ul.TOC li ul {
|
||||
margin-left: 0;
|
||||
padding-left: 2em;
|
||||
font-weight: normal;
|
||||
list-style-type: none;
|
||||
}
|
||||
|
||||
pre.samp,samp {
|
||||
margin-left: 1em;
|
||||
border-style: groove;
|
||||
padding: 1em;
|
||||
display: block;
|
||||
background-color: #EEEEEE
|
||||
}
|
51
license.html
Normal file
51
license.html
Normal file
|
@ -0,0 +1,51 @@
|
|||
<html>
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii"></meta>
|
||||
<title>ICU License - ICU 1.8.1 and later</title>
|
||||
</head>
|
||||
|
||||
<body BGCOLOR="#ffffff">
|
||||
<h2>ICU License - ICU 1.8.1 and later</h2>
|
||||
|
||||
<p>COPYRIGHT AND PERMISSION NOTICE</p>
|
||||
|
||||
<p>
|
||||
Copyright (c) 1995-2009 International Business Machines Corporation and others
|
||||
</p>
|
||||
<p>
|
||||
All rights reserved.
|
||||
</p>
|
||||
<p>
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, and/or sell
|
||||
copies of the Software, and to permit persons
|
||||
to whom the Software is furnished to do so, provided that the above
|
||||
copyright notice(s) and this permission notice appear in all copies
|
||||
of the Software and that both the above copyright notice(s) and this
|
||||
permission notice appear in supporting documentation.
|
||||
</p>
|
||||
<p>
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL
|
||||
THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM,
|
||||
OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
|
||||
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
|
||||
USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
</p>
|
||||
<p>
|
||||
Except as contained in this notice, the name of a copyright holder shall not be
|
||||
used in advertising or otherwise to promote the sale, use or other dealings in
|
||||
this Software without prior written authorization of the copyright holder.
|
||||
</p>
|
||||
|
||||
<hr>
|
||||
<p><small>
|
||||
All trademarks and registered trademarks mentioned herein are the property of their respective owners.
|
||||
</small></p>
|
||||
</body>
|
||||
</html>
|
159
packaging/PACKAGES
Normal file
159
packaging/PACKAGES
Normal file
|
@ -0,0 +1,159 @@
|
|||
Copyright (C) 2000-2003, International Business Machines
|
||||
Corporation and others. All Rights Reserved.
|
||||
ICU is packaged into a number of small, interdependent packages. This
|
||||
file describes what these packages are, what their name should be
|
||||
like, and what their contents are. It is useful as a reference and a
|
||||
guide when packaging ICU on a new system.
|
||||
|
||||
+ List of ICU packages.
|
||||
|
||||
ICU is distributed as the following packages:
|
||||
|
||||
- ICU libraries. This package contains the runtime libraries needed by
|
||||
applications that use ICU. All the other packages require this package
|
||||
to be installed.
|
||||
- ICU. This package contains the converters data, the timezones data,
|
||||
and all the ICU tools.
|
||||
- ICU locales. This package adds locales and break data.
|
||||
- ICU development. This package contains the files necessary to build
|
||||
applications that use ICU, i.e. header files, links to shared
|
||||
libraries used by the linker, static libraries, etc... It also
|
||||
contains sample applications and documentation.
|
||||
- ICU docs. This package contains further documentation for ICU,
|
||||
including a complete API reference.
|
||||
- ICU data. This package contains the source for the compiled data
|
||||
contained by the ICU package.
|
||||
- ICU international data. This package contains the source for the
|
||||
compiled data contained by the ICU locales package.
|
||||
|
||||
In this file, we will refer to Autoconf variables as in $(bindir). In
|
||||
addition to these, we will use the following variables to denote
|
||||
ICU-specific directories or information:
|
||||
|
||||
VERSION ICU's dotted version number, e.g. 1.6.0.1 as of this
|
||||
writing.
|
||||
|
||||
ICUDATADIR The directory where portable ICU data are. This is
|
||||
defined as $(datadir)/icu/$(VERSION).
|
||||
ICULIBDIR The directory where platform-specific ICU data
|
||||
are. This is defined as $(libdir)/icu/$(VERSION).
|
||||
ICUSYSCONFDIR The directory where ICU configuration files are. This
|
||||
is defined as $(sysconfdir)/icu.
|
||||
|
||||
When referring to libraries, .so will be used to denote the extension
|
||||
of a shared library, and .a to denote the extension of a static
|
||||
library. These extensions will actually be different on some platforms.
|
||||
|
||||
+ Configuration and compilation of ICU
|
||||
|
||||
ICU should be configured with the following options:
|
||||
|
||||
--with-data-packaging=files
|
||||
--disable-rpath
|
||||
--enable-shared
|
||||
--enable-static
|
||||
--without-samples
|
||||
|
||||
in addition to platform-specific settings (like a specific mandir or
|
||||
sysconfdir). Note that the use of --disable-rpath assumes that the
|
||||
packaging is made for a standard location, or that the package
|
||||
installation/deinstallation will correctly manage the configuration
|
||||
of the system's dyanmic loader. This is the right way of doing things.
|
||||
|
||||
The configure script invokation should also be done with
|
||||
|
||||
CFLAGS="-O2"
|
||||
|
||||
set, as in:
|
||||
|
||||
$ CFLAGS="-O2" ./configure ...
|
||||
|
||||
The files packaging mode is chosen because it offers the maximum
|
||||
flexibility. Packages can be split easily, and system administrators
|
||||
can add converters, aliases, and other resources with little
|
||||
effort. Ideally, the ICU build will be modified to allow for distributing a
|
||||
libicudata.so with all the converters and locales, but indexes and aliases
|
||||
as separate files. But for now, this is the easiest way to get started.
|
||||
|
||||
+ The ICU libraries package
|
||||
|
||||
The ICU libraries package is typically named `libicuXX' where XX is
|
||||
the major number of ICU's libraries. This number is ICU's version
|
||||
number multiplied by 10 and rounded down to the nearest integer (it is
|
||||
also the value of the LIB_VERSION_MAJOR configure substitution
|
||||
variable). For example, for ICU 1.6.0.1, it is 16, so the package name
|
||||
is `libicu16'. The major version is part of the package name to allow
|
||||
for the simultaneous installation of different ICU releases.
|
||||
|
||||
This package contains:
|
||||
|
||||
- All the shared libraries, and their major number symbolic link, but
|
||||
not the .so symbolic link that is only used at link time (this one is
|
||||
part of the development package). These are $(libdir)/libicu*.so.* and
|
||||
$(libdir)/libustdio.so.* at the time of this writing.
|
||||
|
||||
+ The ICU package
|
||||
|
||||
The ICU package is simply named `icu'. It provides data used by the ICU
|
||||
libraries package and commands to create and manipulate that data.
|
||||
|
||||
This package contains:
|
||||
|
||||
- The Unicode data files (uprops.dat and unames.dat as of this writing).
|
||||
- The time zones data files (tz.dat).
|
||||
- All the binary data files for converters (.cnv files).
|
||||
- All the ICU commands.
|
||||
- The manual pages for ICU commands and file formats.
|
||||
|
||||
+ The ICU locales package
|
||||
|
||||
The ICU locales package is named `icu-locales'. It provides data used by
|
||||
internationalization support in ICU.
|
||||
|
||||
This package contains:
|
||||
|
||||
- All the data for locales in ICU (.dat files).
|
||||
- All the break data for specific locales (.brk files).
|
||||
|
||||
+ The ICU development package
|
||||
|
||||
The ICU developpment package is named `libicu-dev'. It provides all
|
||||
the files necessary to write applications that use ICU, along with
|
||||
examples and some documentation.
|
||||
|
||||
This package contains:
|
||||
|
||||
- The /usr/include/unicode directory which contains all the ICU
|
||||
headers.
|
||||
- The .so symbolic links used by the linker to link against the
|
||||
latest version of the libraries.
|
||||
- A sample Makefile fragment that can be included by applications
|
||||
using ICU, to faciliate their building, along with a platform-specific
|
||||
configuration file included by this fragment.
|
||||
- The sample applications from the ICU source tree, in an appropriate
|
||||
location for the system that the package is installed on (for example,
|
||||
on Debian, in /usr/share/doc/libicu-dev/examples).
|
||||
|
||||
This package depends on the ICU libraries package with the exact same
|
||||
version, since it provides .so symbolic links to the latest libraries.
|
||||
|
||||
+ The ICU docs package
|
||||
|
||||
The ICU docs package is named `libicu-doc'. It contains the files
|
||||
generated by doxygen when the `make doc' command is executed, in a
|
||||
location appropriate for the system that the package is installed on.
|
||||
|
||||
+ The ICU data package
|
||||
|
||||
The ICU data package is named `icu-data'. It contains source files for
|
||||
the data found in the ICU package. These files are installed in
|
||||
$(ICUDATADIR).
|
||||
|
||||
+ The ICU international data package
|
||||
|
||||
The ICU data package is named `icu-i18ndata'. It contains source files for
|
||||
the dat founf in the ICU locales package. These files are installed in
|
||||
$(ICUDATADIR).
|
||||
|
||||
----
|
||||
Yves Arrouye <yves@realnames.com>
|
13
packaging/README
Normal file
13
packaging/README
Normal file
|
@ -0,0 +1,13 @@
|
|||
Copyright (C) 2000-2003, International Business Machines
|
||||
Corporation and others. All Rights Reserved.
|
||||
|
||||
This directory contains information, input files and scripts for
|
||||
packaging ICU using specific packaging tools. We assume that the
|
||||
packager is familiar with the tools and procedures needed to build a
|
||||
package for a given packaging method (for example, how to use
|
||||
dpkg-buildpackage(1) on Debian GNU/Linux, or rpm(8) on distributions that
|
||||
use RPM packages).
|
||||
|
||||
Please read the file PACKAGES if you are interested in packaging ICU
|
||||
yourself. It describes what the different packages should be, and what
|
||||
their contents are.
|
228
packaging/rpm/icu.spec
Normal file
228
packaging/rpm/icu.spec
Normal file
|
@ -0,0 +1,228 @@
|
|||
# Copyright (C) 2000-2005, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# RPM specification file for ICU.
|
||||
#
|
||||
# Neal Probert <nprobert@walid.com> is the current maintainer.
|
||||
# Yves Arrouye <yves@realnames.com> is the original author.
|
||||
|
||||
# This file can be freely redistributed under the same license as ICU.
|
||||
|
||||
Name: icu
|
||||
Version: 3.4
|
||||
Release: 1
|
||||
Requires: libicu34 >= %{version}
|
||||
Summary: International Components for Unicode
|
||||
Packager: Ian Holsman (CNET Networks) <ianh@cnet.com>
|
||||
Copyright: X License
|
||||
Group: System Environment/Libraries
|
||||
Source: icu-%{version}.tgz
|
||||
BuildRoot: /var/tmp/%{name}-%{version}
|
||||
%description
|
||||
ICU is a set of C and C++ libraries that provides robust and full-featured
|
||||
Unicode and locale support. The library provides calendar support, conversions
|
||||
for many character sets, language sensitive collation, date
|
||||
and time formatting, support for many locales, message catalogs
|
||||
and resources, message formatting, normalization, number and currency
|
||||
formatting, time zones support, transliteration, word, line and
|
||||
sentence breaking, etc.
|
||||
|
||||
This package contains the Unicode character database and derived
|
||||
properties, along with converters and time zones data.
|
||||
|
||||
This package contains the runtime libraries for ICU. It does
|
||||
not contain any of the data files needed at runtime and present in the
|
||||
`icu' and `icu-locales` packages.
|
||||
|
||||
%package -n libicu34
|
||||
Summary: International Components for Unicode (libraries)
|
||||
Group: Development/Libraries
|
||||
%description -n libicu34
|
||||
ICU is a set of C and C++ libraries that provides robust and full-featured
|
||||
Unicode support. This package contains the runtime libraries for ICU. It does
|
||||
not contain any of the data files needed at runtime and present in the
|
||||
`icu' and `icu-locales` packages.
|
||||
|
||||
%package -n libicu-devel
|
||||
Summary: International Components for Unicode (development files)
|
||||
Group: Development/Libraries
|
||||
Requires: libicu34 = %{version}
|
||||
%description -n libicu-devel
|
||||
ICU is a set of C and C++ libraries that provides robust and full-featured
|
||||
Unicode support. This package contains the development files for ICU.
|
||||
|
||||
%package locales
|
||||
Summary: Locale data for ICU
|
||||
Group: System Environment/Libraries
|
||||
Requires: libicu34 >= %{version}
|
||||
%description locales
|
||||
The locale data are used by ICU to provide localization (l10n),
|
||||
internationalization (i18n) and timezone support to ICU applications.
|
||||
This package also contains break data for various languages,
|
||||
and transliteration data.
|
||||
|
||||
%post
|
||||
# Adjust the current ICU link in /usr/lib/icu
|
||||
|
||||
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
|
||||
cd /usr/lib/icu
|
||||
rm -f /usr/lib/icu/current
|
||||
if test x"$icucurrent" != x
|
||||
then
|
||||
ln -s "$icucurrent" current
|
||||
fi
|
||||
|
||||
#ICU_DATA=/usr/share/icu/%{version}
|
||||
#export ICU_DATA
|
||||
|
||||
%preun
|
||||
# Adjust the current ICU link in /usr/lib/icu
|
||||
|
||||
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n -e '/\/%{version}\//d' -e 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
|
||||
cd /usr/lib/icu
|
||||
rm -f /usr/lib/icu/current
|
||||
if test x"$icucurrent" != x
|
||||
then
|
||||
ln -s "$icucurrent" current
|
||||
fi
|
||||
|
||||
%post -n libicu34
|
||||
ldconfig
|
||||
|
||||
# Adjust the current ICU link in /usr/lib/icu
|
||||
|
||||
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
|
||||
cd /usr/lib/icu
|
||||
rm -f /usr/lib/icu/current
|
||||
if test x"$icucurrent" != x
|
||||
then
|
||||
ln -s "$icucurrent" current
|
||||
fi
|
||||
|
||||
%preun -n libicu34
|
||||
# Adjust the current ICU link in /usr/lib/icu
|
||||
|
||||
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n -e '/\/%{version}\//d' -e 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
|
||||
cd /usr/lib/icu
|
||||
rm -f /usr/lib/icu/current
|
||||
if test x"$icucurrent" != x
|
||||
then
|
||||
ln -s "$icucurrent" current
|
||||
fi
|
||||
|
||||
%prep
|
||||
%setup -q -n icu
|
||||
|
||||
%build
|
||||
cd source
|
||||
chmod a+x ./configure
|
||||
CFLAGS="-O3" CXXFLAGS="-O" ./configure --prefix=/usr --sysconfdir=/etc --with-data-packaging=files --enable-shared --enable-static --disable-samples
|
||||
echo 'CPPFLAGS += -DICU_DATA_DIR=\"/usr/share/icu/%{version}\"' >> icudefs.mk
|
||||
make RPM_OPT_FLAGS="$RPM_OPT_FLAGS"
|
||||
|
||||
%install
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
cd source
|
||||
make install DESTDIR=$RPM_BUILD_ROOT
|
||||
|
||||
%files
|
||||
%defattr(-,root,root)
|
||||
%doc readme.html
|
||||
%doc license.html
|
||||
/usr/share/icu/%{version}/license.html
|
||||
/usr/share/icu/%{version}/icudt34l/*.cnv
|
||||
/usr/share/icu/%{version}/icudt34l/*.icu
|
||||
/usr/share/icu/%{version}/icudt34l/*.spp
|
||||
|
||||
/usr/bin/derb
|
||||
/usr/bin/genbrk
|
||||
/usr/bin/gencnval
|
||||
/usr/bin/genrb
|
||||
/usr/bin/icu-config
|
||||
/usr/bin/makeconv
|
||||
/usr/bin/pkgdata
|
||||
/usr/bin/uconv
|
||||
|
||||
/usr/sbin/decmn
|
||||
/usr/sbin/genccode
|
||||
/usr/sbin/gencmn
|
||||
/usr/sbin/gensprep
|
||||
/usr/sbin/genuca
|
||||
/usr/sbin/icuswap
|
||||
/usr/share/icu/%{version}/mkinstalldirs
|
||||
|
||||
/usr/man/man1/derb.1.*
|
||||
/usr/man/man1/gencnval.1.*
|
||||
/usr/man/man1/genrb.1.*
|
||||
/usr/man/man1/icu-config.1.*
|
||||
/usr/man/man1/makeconv.1.*
|
||||
/usr/man/man1/pkgdata.1.*
|
||||
/usr/man/man1/uconv.1.*
|
||||
/usr/man/man8/decmn.8.*
|
||||
/usr/man/man8/genccode.8.*
|
||||
/usr/man/man8/gencmn.8.*
|
||||
/usr/man/man8/gensprep.8.*
|
||||
/usr/man/man8/genuca.8.*
|
||||
|
||||
%files -n icu-locales
|
||||
/usr/share/icu/%{version}/icudt34l/*.brk
|
||||
/usr/share/icu/%{version}/icudt34l/*.res
|
||||
/usr/share/icu/%{version}/icudt34l/coll/*.res
|
||||
/usr/share/icu/%{version}/icudt34l/rbnf/*.res
|
||||
/usr/share/icu/%{version}/icudt34l/translit/*.res
|
||||
|
||||
%files -n libicu34
|
||||
%doc license.html
|
||||
/usr/lib/libicui18n.so.34
|
||||
/usr/lib/libicui18n.so.34.0
|
||||
/usr/lib/libicutu.so.34
|
||||
/usr/lib/libicutu.so.34.0
|
||||
/usr/lib/libicuuc.so.34
|
||||
/usr/lib/libicuuc.so.34.0
|
||||
/usr/lib/libicudata.so.34
|
||||
/usr/lib/libicudata.so.34.0
|
||||
/usr/lib/libicuio.so.34
|
||||
/usr/lib/libicuio.so.34.0
|
||||
/usr/lib/libiculx.so.34
|
||||
/usr/lib/libiculx.so.34.0
|
||||
/usr/lib/libicule.so.34
|
||||
/usr/lib/libicule.so.34.0
|
||||
|
||||
%files -n libicu-devel
|
||||
%doc readme.html
|
||||
%doc license.html
|
||||
/usr/lib/libicui18n.so
|
||||
/usr/lib/libsicui18n.a
|
||||
/usr/lib/libicuuc.so
|
||||
/usr/lib/libsicuuc.a
|
||||
/usr/lib/libicutu.so
|
||||
/usr/lib/libsicutu.a
|
||||
/usr/lib/libicuio.so
|
||||
/usr/lib/libsicuio.a
|
||||
/usr/lib/libicudata.so
|
||||
/usr/lib/libsicudata.a
|
||||
/usr/lib/libicule.so
|
||||
/usr/lib/libsicule.a
|
||||
/usr/lib/libiculx.so
|
||||
/usr/lib/libsiculx.a
|
||||
/usr/include/unicode/*.h
|
||||
/usr/include/layout/*.h
|
||||
/usr/lib/icu/%{version}/Makefile.inc
|
||||
/usr/lib/icu/Makefile.inc
|
||||
/usr/share/icu/%{version}/config
|
||||
/usr/share/doc/icu-%{version}/*
|
||||
|
||||
%changelog
|
||||
* Mon Jun 07 2004 Alexei Dets <adets@idsk.com>
|
||||
- update to 3.0
|
||||
* Tue Aug 16 2003 Steven Loomis <srl@jtcsv.com>
|
||||
- update to 2.6.1 - include license
|
||||
* Thu Jun 05 2003 Steven Loomis <srl@jtcsv.com>
|
||||
- Update to 2.6
|
||||
* Fri Dec 27 2002 Steven Loomis <srl@jtcsv.com>
|
||||
- Update to 2.4 spec
|
||||
* Fri Sep 27 2002 Steven Loomis <srl@jtcsv.com>
|
||||
- minor updates to 2.2 spec. Rpath is off by default, don't pass it as an option.
|
||||
* Mon Sep 16 2002 Ian Holsman <ian@holsman.net>
|
||||
- update to icu 2.2
|
||||
|
1929
readme.html
Normal file
1929
readme.html
Normal file
File diff suppressed because it is too large
Load diff
230
source/Doxyfile.in
Normal file
230
source/Doxyfile.in
Normal file
|
@ -0,0 +1,230 @@
|
|||
# Doxyfile 1.3.7
|
||||
# ********************************************************************
|
||||
# * COPYRIGHT:
|
||||
# * Copyright (c) 2004-2009, International Business Machines Corporation
|
||||
# * and others. All Rights Reserved.
|
||||
# ********************************************************************
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
PROJECT_NAME = "ICU @VERSION@"
|
||||
PROJECT_NUMBER =
|
||||
OUTPUT_DIRECTORY = doc
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
USE_WINDOWS_ENCODING = YES
|
||||
BRIEF_MEMBER_DESC = YES
|
||||
REPEAT_BRIEF = YES
|
||||
ABBREVIATE_BRIEF =
|
||||
ALWAYS_DETAILED_SEC = NO
|
||||
INLINE_INHERITED_MEMB = NO
|
||||
FULL_PATH_NAMES = NO
|
||||
STRIP_FROM_PATH =
|
||||
STRIP_FROM_INC_PATH =
|
||||
SHORT_NAMES = NO
|
||||
JAVADOC_AUTOBRIEF = YES
|
||||
MULTILINE_CPP_IS_BRIEF = NO
|
||||
DETAILS_AT_TOP = NO
|
||||
INHERIT_DOCS = YES
|
||||
DISTRIBUTE_GROUP_DOC = YES
|
||||
TAB_SIZE = 8
|
||||
ALIASES = "memo=\par Note:\n" \
|
||||
"draft=\xrefitem draft \"Draft\" \"Draft List\" This API may be changed in the future versions and was introduced in" \
|
||||
"stable=\xrefitem stable \"Stable\" \"Stable List\"" \
|
||||
"deprecated=\xrefitem deprecated \"Deprecated\" \"Deprecated List\"" \
|
||||
"obsolete=\xrefitem obsolete \"Obsolete\" \"Obsolete List\"" \
|
||||
"system=\xrefitem system \"System\" \"System List\" \n Do not use unless you know what you are doing." \
|
||||
"internal=\xrefitem internal \"Internal\" \"Internal List\" Do not use. This API is for internal use only."
|
||||
|
||||
OPTIMIZE_OUTPUT_FOR_C = YES
|
||||
OPTIMIZE_OUTPUT_JAVA = NO
|
||||
SUBGROUPING = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# Build related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
EXTRACT_ALL = NO
|
||||
EXTRACT_PRIVATE = NO
|
||||
EXTRACT_STATIC = NO
|
||||
EXTRACT_LOCAL_CLASSES = YES
|
||||
EXTRACT_LOCAL_METHODS = NO
|
||||
HIDE_UNDOC_MEMBERS = NO
|
||||
HIDE_UNDOC_CLASSES = NO
|
||||
HIDE_FRIEND_COMPOUNDS = NO
|
||||
HIDE_IN_BODY_DOCS = NO
|
||||
INTERNAL_DOCS = YES
|
||||
CASE_SENSE_NAMES = YES
|
||||
HIDE_SCOPE_NAMES = NO
|
||||
SHOW_INCLUDE_FILES = YES
|
||||
INLINE_INFO = YES
|
||||
SORT_MEMBER_DOCS = YES
|
||||
SORT_BRIEF_DOCS = NO
|
||||
SORT_BY_SCOPE_NAME = NO
|
||||
GENERATE_TODOLIST = YES
|
||||
GENERATE_TESTLIST = YES
|
||||
GENERATE_BUGLIST = YES
|
||||
GENERATE_DEPRECATEDLIST= YES
|
||||
ENABLED_SECTIONS =
|
||||
MAX_INITIALIZER_LINES = 30
|
||||
SHOW_USED_FILES = YES
|
||||
|
||||
# docset
|
||||
GENERATE_DOCSET = NO
|
||||
DOCSET_FEEDNAME = "ICU @VERSION@"
|
||||
DOCSET_BUNDLE_ID = org.icu-project.icu4c
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to warning and progress messages
|
||||
#---------------------------------------------------------------------------
|
||||
QUIET = NO
|
||||
WARNINGS = YES
|
||||
WARN_IF_UNDOCUMENTED = YES
|
||||
WARN_IF_DOC_ERROR = YES
|
||||
WARN_FORMAT = "$file:$line: $text"
|
||||
WARN_LOGFILE =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the input files
|
||||
#---------------------------------------------------------------------------
|
||||
INPUT = ./common/unicode/platform.h @srcdir@/common/unicode @srcdir@/i18n/unicode @srcdir@/io/unicode @srcdir@/layout/LEFontInstance.h @srcdir@/layout/LEGlyphStorage.h @srcdir@/layout/LELanguages.h @srcdir@/layout/LEScripts.h @srcdir@/layout/LESwaps.h @srcdir@/layout/LETypes.h @srcdir@/layout/LayoutEngine.h @srcdir@/layoutex/layout
|
||||
FILE_PATTERNS = *.h
|
||||
RECURSIVE = NO
|
||||
EXCLUDE = @srcdir@/common/unicode/urename.h @srcdir@/common/unicode/udraft.h @srcdir@/common/unicode/udeprctd.h @srcdir@/common/unicode/uobslete.h @srcdir@/common/unicode/ppalmos.h
|
||||
EXCLUDE_SYMLINKS = NO
|
||||
EXCLUDE_PATTERNS = config*.h
|
||||
EXAMPLE_PATH =
|
||||
EXAMPLE_PATTERNS =
|
||||
EXAMPLE_RECURSIVE = NO
|
||||
IMAGE_PATH =
|
||||
INPUT_FILTER =
|
||||
FILTER_SOURCE_FILES = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to source browsing
|
||||
#---------------------------------------------------------------------------
|
||||
SOURCE_BROWSER = YES
|
||||
INLINE_SOURCES = NO
|
||||
STRIP_CODE_COMMENTS = YES
|
||||
REFERENCED_BY_RELATION = YES
|
||||
REFERENCES_RELATION = YES
|
||||
VERBATIM_HEADERS = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the alphabetical class index
|
||||
#---------------------------------------------------------------------------
|
||||
ALPHABETICAL_INDEX = YES
|
||||
COLS_IN_ALPHA_INDEX = 5
|
||||
IGNORE_PREFIX =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the HTML output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_HTML = YES
|
||||
HTML_OUTPUT = html
|
||||
HTML_FILE_EXTENSION = .html
|
||||
HTML_HEADER =
|
||||
HTML_FOOTER =
|
||||
HTML_STYLESHEET =
|
||||
HTML_ALIGN_MEMBERS = YES
|
||||
GENERATE_HTMLHELP = NO
|
||||
CHM_FILE =
|
||||
HHC_LOCATION =
|
||||
GENERATE_CHI = NO
|
||||
BINARY_TOC = NO
|
||||
TOC_EXPAND = NO
|
||||
DISABLE_INDEX = NO
|
||||
ENUM_VALUES_PER_LINE = 4
|
||||
GENERATE_TREEVIEW = NO
|
||||
TREEVIEW_WIDTH = 250
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the LaTeX output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_LATEX = NO
|
||||
LATEX_OUTPUT = latex
|
||||
LATEX_CMD_NAME = latex
|
||||
MAKEINDEX_CMD_NAME = makeindex
|
||||
COMPACT_LATEX = NO
|
||||
PAPER_TYPE = a4wide
|
||||
EXTRA_PACKAGES =
|
||||
LATEX_HEADER =
|
||||
PDF_HYPERLINKS = NO
|
||||
USE_PDFLATEX = NO
|
||||
LATEX_BATCHMODE = NO
|
||||
LATEX_HIDE_INDICES = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the RTF output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_RTF = NO
|
||||
RTF_OUTPUT = rtf
|
||||
COMPACT_RTF = NO
|
||||
RTF_HYPERLINKS = NO
|
||||
RTF_STYLESHEET_FILE =
|
||||
RTF_EXTENSIONS_FILE =
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the man page output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_MAN = NO
|
||||
MAN_OUTPUT = man
|
||||
MAN_EXTENSION = .3
|
||||
MAN_LINKS = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the XML output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_XML = NO
|
||||
XML_OUTPUT = xml
|
||||
XML_SCHEMA =
|
||||
XML_DTD =
|
||||
XML_PROGRAMLISTING = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options for the AutoGen Definitions output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_AUTOGEN_DEF = NO
|
||||
#---------------------------------------------------------------------------
|
||||
# configuration options related to the Perl module output
|
||||
#---------------------------------------------------------------------------
|
||||
GENERATE_PERLMOD = NO
|
||||
PERLMOD_LATEX = YES
|
||||
PERLMOD_PRETTY = YES
|
||||
PERLMOD_MAKEVAR_PREFIX =
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to the preprocessor
|
||||
#---------------------------------------------------------------------------
|
||||
ENABLE_PREPROCESSING = YES
|
||||
MACRO_EXPANSION = YES
|
||||
EXPAND_ONLY_PREDEF = YES
|
||||
SEARCH_INCLUDES = YES
|
||||
INCLUDE_PATH =
|
||||
INCLUDE_FILE_PATTERNS =
|
||||
PREDEFINED = U_EXPORT2= U_STABLE= U_DRAFT= U_INTERNAL= U_SYSTEM= U_DEPRECATED= U_OBSOLETE= U_CALLCONV= U_CDECL_BEGIN= U_CDECL_END= U_NO_THROW= U_NAMESPACE_BEGIN= U_NAMESPACE_END=
|
||||
EXPAND_AS_DEFINED =
|
||||
SKIP_FUNCTION_MACROS = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration::additions related to external references
|
||||
#---------------------------------------------------------------------------
|
||||
TAGFILES =
|
||||
GENERATE_TAGFILE = "@builddir@/doc/html/icudocs.tag"
|
||||
ALLEXTERNALS = NO
|
||||
EXTERNAL_GROUPS = YES
|
||||
PERL_PATH = /usr/bin/perl
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration options related to the dot tool
|
||||
#---------------------------------------------------------------------------
|
||||
CLASS_DIAGRAMS = YES
|
||||
HIDE_UNDOC_RELATIONS = YES
|
||||
HAVE_DOT = NO
|
||||
CLASS_GRAPH = YES
|
||||
COLLABORATION_GRAPH = YES
|
||||
UML_LOOK = NO
|
||||
TEMPLATE_RELATIONS = NO
|
||||
INCLUDE_GRAPH = YES
|
||||
INCLUDED_BY_GRAPH = YES
|
||||
CALL_GRAPH = NO
|
||||
GRAPHICAL_HIERARCHY = YES
|
||||
DOT_IMAGE_FORMAT = png
|
||||
DOT_PATH =
|
||||
DOTFILE_DIRS =
|
||||
MAX_DOT_GRAPH_WIDTH = 1024
|
||||
MAX_DOT_GRAPH_HEIGHT = 1024
|
||||
MAX_DOT_GRAPH_DEPTH = 0
|
||||
GENERATE_LEGEND = YES
|
||||
DOT_CLEANUP = YES
|
||||
#---------------------------------------------------------------------------
|
||||
# Configuration::additions related to the search engine
|
||||
#---------------------------------------------------------------------------
|
||||
SEARCHENGINE = YES
|
247
source/Makefile.in
Normal file
247
source/Makefile.in
Normal file
|
@ -0,0 +1,247 @@
|
|||
#******************************************************************************
|
||||
#
|
||||
# Copyright (C) 1998-2009, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
#******************************************************************************
|
||||
## Top-level Makefile.in for ICU
|
||||
## Stephen F. Booth
|
||||
|
||||
srcdir = @srcdir@
|
||||
top_srcdir = @top_srcdir@
|
||||
|
||||
top_builddir = .
|
||||
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
docdir = $(datadir)/doc
|
||||
docsubdir = $(PACKAGE)$(ICULIBDASHSUFFIX)/html
|
||||
docsubsrchdir = $(docsubdir)/search
|
||||
docfilesdir = doc/html
|
||||
docfiles = $(docfilesdir)/*.gif $(docfilesdir)/*.png $(docfilesdir)/*.html $(docfilesdir)/*.css $(docfilesdir)/*.tag $(docfilesdir)/installdox
|
||||
docsrchdir = $(docfilesdir)/search
|
||||
docsrchfiles = $(docsrchdir)/*
|
||||
|
||||
##
|
||||
|
||||
## Build directory information
|
||||
subdir = .
|
||||
|
||||
#AUTOCONF = @AUTOCONF@
|
||||
|
||||
## Optional directory setup
|
||||
@LAYOUT_TRUE@LAYOUT = layout layoutex
|
||||
@ICUIO_TRUE@ICUIO = io
|
||||
@EXTRAS_TRUE@EXTRA = extra
|
||||
@TESTS_TRUE@TEST = test
|
||||
@SAMPLES_TRUE@SAMPLE = samples
|
||||
|
||||
DOXYGEN = @DOXYGEN@
|
||||
DOCZIP = icu-docs.zip
|
||||
|
||||
## Files to remove for 'make clean'
|
||||
CLEANFILES = *~
|
||||
|
||||
## Files built (autoconfed) and installed
|
||||
INSTALLED_BUILT_FILES = $(top_builddir)/config/Makefile.inc $(top_builddir)/config/pkgdata.inc $(top_builddir)/config/icu-config @platform_make_fragment@ $(EXTRA_DATA:%=$(DESTDIR)$(pkglibdir)/%)
|
||||
|
||||
## Files built (autoconfed) but not installed
|
||||
LOCAL_BUILT_FILES = icudefs.mk config/icucross.mk
|
||||
|
||||
DOCDIRS = common i18n
|
||||
SUBDIRS = stubdata common i18n $(LAYOUT) tools data $(ICUIO) $(EXTRA) $(SAMPLE) $(TEST)
|
||||
|
||||
SECTION = 1
|
||||
|
||||
MANX_FILES = config/icu-config.$(SECTION)
|
||||
|
||||
ALL_MAN_FILES = $(MANX_FILES)
|
||||
|
||||
## Extra files to install [nothing at present]
|
||||
EXTRA_DATA =
|
||||
|
||||
## List of phony targets
|
||||
.PHONY : all all-local all-recursive install install-local install-udata install-udata-files install-udata-dlls \
|
||||
install-recursive clean clean-local clean-recursive distclean \
|
||||
distclean-local distclean-recursive doc dist dist-local dist-recursive \
|
||||
check check-local check-recursive clean-recursive-with-twist install-icu \
|
||||
doc install-doc tests
|
||||
|
||||
## Clear suffix list
|
||||
.SUFFIXES :
|
||||
|
||||
## List of standard targets
|
||||
all: all-local all-recursive
|
||||
install: install-recursive install-local
|
||||
clean: clean-recursive-with-twist clean-local
|
||||
distclean : distclean-recursive distclean-local
|
||||
dist: dist-recursive dist-local
|
||||
check: all check-recursive
|
||||
check-recursive: all
|
||||
|
||||
ifeq ($(DOXYGEN),)
|
||||
doc:
|
||||
@echo you need Doxygen to generate documentation. Doxygen can be found on the Web
|
||||
@echo at http://www.doxygen.org/
|
||||
else
|
||||
doc: doc/html/index.html
|
||||
|
||||
doc/html/index.html: Doxyfile $(wildcard ./common/unicode/platform.h $(srcdir)/common/unicode/*.h $(srcdir)/i18n/unicode/*.h $(srcdir)/layout/unicode/*.h $(srcdir)/io/unicode/*.h)
|
||||
$(DOXYGEN)
|
||||
|
||||
Doxyfile: $(srcdir)/Doxyfile.in
|
||||
CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
$(DOCZIP): doc
|
||||
-$(RMV) $(DOCZIP)
|
||||
( cd doc/html ; zip -r ../../$(DOCZIP) * )
|
||||
endif
|
||||
|
||||
LOCAL_SUBDIRS = $(SUBDIRS)
|
||||
CLEAN_FIRST_SUBDIRS = tools
|
||||
|
||||
$(LIBDIR) $(BINDIR):
|
||||
-$(MKINSTALLDIRS) $@
|
||||
|
||||
## Recursive targets
|
||||
all-recursive install-recursive clean-recursive distclean-recursive dist-recursive check-recursive: $(LIBDIR) $(BINDIR)
|
||||
@dot_seen=no; \
|
||||
target=`echo $@ | sed s/-recursive//`; \
|
||||
list='$(LOCAL_SUBDIRS)'; for subdir in $$list; do \
|
||||
echo "$(MAKE)[$(MAKELEVEL)]: Making \`$$target' in \`$$subdir'"; \
|
||||
if test "$$subdir" = "."; then \
|
||||
dot_seen=yes; \
|
||||
local_target="$$target-local"; \
|
||||
else \
|
||||
local_target="$$target"; \
|
||||
fi; \
|
||||
(cd $$subdir && $(MAKE) RECURSIVE=YES $$local_target) || exit; \
|
||||
done; \
|
||||
if test "$$dot_seen" = "no"; then \
|
||||
$(MAKE) "$$target-local" || exit; \
|
||||
fi
|
||||
|
||||
clean-recursive-with-twist:
|
||||
$(MAKE) clean-recursive LOCAL_SUBDIRS='$(CLEAN_FIRST_SUBDIRS) $(filter-out $(CLEAN_FIRST_SUBDIRS),$(LOCAL_SUBDIRS))'
|
||||
|
||||
all-local: $(srcdir)/configure $(LOCAL_BUILT_FILES) $(INSTALLED_BUILT_FILES)
|
||||
|
||||
install-local: install-icu install-manx
|
||||
|
||||
install-icu: $(INSTALLED_BUILT_FILES)
|
||||
@$(MKINSTALLDIRS) $(DESTDIR)$(pkgdatadir)/config
|
||||
@$(MKINSTALLDIRS) $(DESTDIR)$(pkglibdir)
|
||||
@$(MKINSTALLDIRS) $(DESTDIR)$(bindir)
|
||||
@$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
|
||||
$(INSTALL_DATA) @platform_make_fragment@ $(DESTDIR)$(pkgdatadir)/config/@platform_make_fragment_name@
|
||||
$(INSTALL_SCRIPT) $(top_srcdir)/mkinstalldirs $(DESTDIR)$(pkgdatadir)/mkinstalldirs
|
||||
$(INSTALL_SCRIPT) $(top_srcdir)/install-sh $(DESTDIR)$(pkgdatadir)/install-sh
|
||||
$(INSTALL_DATA) $(top_srcdir)/../license.html $(DESTDIR)$(pkgdatadir)/license.html
|
||||
$(INSTALL_SCRIPT) $(top_builddir)/config/icu-config $(DESTDIR)$(bindir)/icu-config
|
||||
$(INSTALL_DATA) $(top_builddir)/config/Makefile.inc $(DESTDIR)$(pkglibdir)/Makefile.inc
|
||||
$(INSTALL_DATA) $(top_builddir)/config/pkgdata.inc $(DESTDIR)$(pkglibdir)/pkgdata.inc
|
||||
cd $(DESTDIR)$(pkglibdir)/..; \
|
||||
$(RM) current && ln -s $(VERSION) current; \
|
||||
$(RM) Makefile.inc && ln -s current/Makefile.inc Makefile.inc; \
|
||||
$(RM) pkgdata.inc && ln -s current/pkgdata.inc pkgdata.inc
|
||||
|
||||
ifeq ($(DOXYGEN),)
|
||||
install-doc:
|
||||
else
|
||||
install-doc: doc
|
||||
$(RM) -r $(DESTDIR)$(docdir)/$(docsubdir)
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(docdir)/$(docsubsrchdir)
|
||||
$(INSTALL_DATA) $(docfiles) $(DESTDIR)$(docdir)/$(docsubdir)
|
||||
$(INSTALL_DATA) $(docsrchfiles) $(DESTDIR)$(docdir)/$(docsubsrchdir)
|
||||
endif
|
||||
|
||||
$(DESTDIR)$(pkglibdir)/%: $(top_srcdir)/../data/%
|
||||
$(INSTALL_DATA) $< $@
|
||||
|
||||
# Build the tests, but don't run them.
|
||||
tests: all
|
||||
$(MAKE) -C $(top_builddir)/test
|
||||
|
||||
dist-local:
|
||||
|
||||
clean-local:
|
||||
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
|
||||
$(RMV) Doxyfile doc $(DOCZIP)
|
||||
|
||||
distclean-local: clean-local
|
||||
$(RMV) $(top_builddir)/config/Makefile.inc $(top_builddir)/config/pkgdata.inc $(top_builddir)/config/icu-config
|
||||
$(RMV) config.cache config.log config.status $(top_builddir)/config/icucross.mk
|
||||
$(RMV) Makefile config/Makefile icudefs.mk $(LIBDIR) $(BINDIR)
|
||||
|
||||
check-local: $(top_builddir)/config/icu-config $(top_builddir)/config/Makefile.inc $(top_builddir)/config/pkgdata.inc
|
||||
@echo verifying that icu-config --selfcheck can operate
|
||||
@test "passed" = "$(shell $(top_builddir)/config/icu-config --selfcheck 2>&1)" || (echo "FAIL: icu-config could not run properly." ; exit 1)
|
||||
@echo verifying that $(MAKE) -f Makefile.inc selfcheck can operate
|
||||
@test "passed" = "$(shell $(MAKE) --no-print-directory -f $(top_builddir)/config/Makefile.inc SELFCHECK=1 selfcheck)" || (echo "FAIL: Makefile.inc could not run properly." ; exit 1 )
|
||||
@echo "PASS: config selfcheck OK"
|
||||
|
||||
#$(srcdir)/configure : $(srcdir)/configure.in $(top_srcdir)/aclocal.m4
|
||||
# cd $(srcdir) && $(AUTOCONF)
|
||||
|
||||
icudefs.mk: $(srcdir)/icudefs.mk.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
config/icucross.mk: $(top_builddir)/icudefs.mk $(top_builddir)/Makefile
|
||||
@echo rebuilding $@
|
||||
@(echo "CROSS_ICU_VERSION=$(VERSION)" ;\
|
||||
echo "TOOLEXEEXT=$(EXEEXT)" \
|
||||
) > $@
|
||||
@(echo 'TOOLBINDIR=$$(cross_buildroot)/bin' ;\
|
||||
echo 'TOOLLIBDIR=$$(cross_buildroot)/lib' ;\
|
||||
echo "INVOKE=$(LDLIBRARYPATH_ENVVAR)=$(LIBRARY_PATH_PREFIX)"'$$(TOOLLIBDIR):$$(cross_buildroot)/stubdata:$$(cross_buildroot)/tools/ctestfw:$$$$'"$(LDLIBRARYPATH_ENVVAR)" ;\
|
||||
echo "PKGDATA_INVOKE=$(LDLIBRARYPATH_ENVVAR)=$(LIBRARY_PATH_PREFIX)"'$$(cross_buildroot)/stubdata:$$(cross_buildroot)/tools/ctestfw:$$(TOOLLIBDIR):$$$$'"$(LDLIBRARYPATH_ENVVAR) "'$$'"(PKGDATA_INVOKE_OPTS)" ;\
|
||||
echo ) >> $@
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in icudefs.mk $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
$(top_builddir)/config/Makefile.inc: $(srcdir)/config/Makefile.inc.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
$(top_builddir)/config/pkgdata.inc: icudefs.mk $(top_builddir)/config/pkgdataMakefile
|
||||
cd $(top_builddir)/config; \
|
||||
$(MAKE) -f pkgdataMakefile
|
||||
|
||||
$(top_builddir)/config/pkgdataMakefile:
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
$(top_builddir)/config/icu-config: $(top_builddir)/Makefile $(top_srcdir)/config/icu-config-top $(top_srcdir)/config/icu-config-bottom $(top_builddir)/config/Makefile.inc @platform_make_fragment@ $(top_srcdir)/config/make2sh.sed
|
||||
-$(RMV) $@
|
||||
$(INSTALL_SCRIPT) $(top_srcdir)/config/icu-config-top $@
|
||||
chmod u+w $@
|
||||
@echo "# Following from @platform_make_fragment@" >> $@
|
||||
sed -f $(top_srcdir)/config/make2sh.sed < $(top_builddir)/config/Makefile.inc | grep -v '#M#' | uniq >> $@
|
||||
sed -f $(top_srcdir)/config/make2sh.sed < @platform_make_fragment@ | grep -v '#M#' | uniq >> $@
|
||||
cat $(top_srcdir)/config/icu-config-bottom >> $@
|
||||
echo "# Rebuilt on "`date` >> $@
|
||||
chmod u-w $@
|
||||
|
||||
config.status: $(srcdir)/configure $(srcdir)/common/unicode/uversion.h
|
||||
@echo
|
||||
@echo
|
||||
@echo "*** config.status has become stale ***"
|
||||
@echo " 'configure' and/or 'uversion.h' have changed, please"
|
||||
@echo " do 'runConfigureICU' (or 'configure') again, as per"
|
||||
@echo " the readme.html."
|
||||
@echo
|
||||
@echo
|
||||
exit 1
|
||||
|
||||
|
||||
install-manx: $(MANX_FILES)
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
$(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
|
||||
config/%.$(SECTION): $(srcdir)/config/%.$(SECTION).in
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
483
source/aclocal.m4
vendored
Normal file
483
source/aclocal.m4
vendored
Normal file
|
@ -0,0 +1,483 @@
|
|||
# aclocal.m4 for ICU
|
||||
# Copyright (c) 1999-2009, International Business Machines Corporation and
|
||||
# others. All Rights Reserved.
|
||||
# Stephen F. Booth
|
||||
|
||||
# @TOP@
|
||||
|
||||
# ICU_CHECK_MH_FRAG
|
||||
AC_DEFUN(ICU_CHECK_MH_FRAG, [
|
||||
AC_CACHE_CHECK(
|
||||
[which Makefile fragment to use for ${host}],
|
||||
[icu_cv_host_frag],
|
||||
[
|
||||
case "${host}" in
|
||||
*-*-solaris*)
|
||||
if test "$GCC" = yes; then
|
||||
icu_cv_host_frag=mh-solaris-gcc
|
||||
else
|
||||
icu_cv_host_frag=mh-solaris
|
||||
fi ;;
|
||||
alpha*-*-linux-gnu)
|
||||
if test "$GCC" = yes; then
|
||||
icu_cv_host_frag=mh-alpha-linux-gcc
|
||||
else
|
||||
icu_cv_host_frag=mh-alpha-linux-cc
|
||||
fi ;;
|
||||
powerpc*-*-linux*)
|
||||
if test "$GCC" = yes; then
|
||||
icu_cv_host_frag=mh-linux
|
||||
else
|
||||
icu_cv_host_frag=mh-linux-va
|
||||
fi ;;
|
||||
*-*-linux*|*-pc-gnu) icu_cv_host_frag=mh-linux ;;
|
||||
*-*-cygwin|*-*-mingw32)
|
||||
if test "$GCC" = yes; then
|
||||
AC_TRY_COMPILE([
|
||||
#ifndef __MINGW32__
|
||||
#error This is not MinGW
|
||||
#endif], [], icu_cv_host_frag=mh-mingw, icu_cv_host_frag=mh-cygwin)
|
||||
else
|
||||
icu_cv_host_frag=mh-cygwin-msvc
|
||||
fi ;;
|
||||
*-*-*bsd*|*-*-dragonfly*) icu_cv_host_frag=mh-bsd-gcc ;;
|
||||
*-*-aix*)
|
||||
if test "$GCC" = yes; then
|
||||
icu_cv_host_frag=mh-aix-gcc
|
||||
else
|
||||
icu_cv_host_frag=mh-aix-va
|
||||
fi ;;
|
||||
*-*-hpux*)
|
||||
if test "$GCC" = yes; then
|
||||
icu_cv_host_frag=mh-hpux-gcc
|
||||
else
|
||||
case "$CXX" in
|
||||
*aCC) icu_cv_host_frag=mh-hpux-acc ;;
|
||||
esac
|
||||
fi ;;
|
||||
*-*ibm-openedition*|*-*-os390*) icu_cv_host_frag=mh-os390 ;;
|
||||
*-*-os400*) icu_cv_host_frag=mh-os400 ;;
|
||||
*-apple-rhapsody*) icu_cv_host_frag=mh-darwin ;;
|
||||
*-apple-darwin*) icu_cv_host_frag=mh-darwin ;;
|
||||
*-*-beos|*-*-haiku) icu_cv_host_frag=mh-beos ;;
|
||||
*-*-irix*) icu_cv_host_frag=mh-irix ;;
|
||||
*-dec-osf*) icu_cv_host_frag=mh-alpha-osf ;;
|
||||
*-*-nto*) icu_cv_host_frag=mh-qnx ;;
|
||||
*-ncr-*) icu_cv_host_frag=mh-mpras ;;
|
||||
*) icu_cv_host_frag=mh-unknown ;;
|
||||
esac
|
||||
]
|
||||
)
|
||||
])
|
||||
|
||||
# ICU_CONDITIONAL - similar example taken from Automake 1.4
|
||||
AC_DEFUN(ICU_CONDITIONAL,
|
||||
[AC_SUBST($1_TRUE)
|
||||
if $2; then
|
||||
$1_TRUE=
|
||||
else
|
||||
$1_TRUE='#'
|
||||
fi])
|
||||
|
||||
# ICU_PROG_LINK - Make sure that the linker is usable
|
||||
AC_DEFUN(ICU_PROG_LINK,
|
||||
[
|
||||
case "${host}" in
|
||||
*-*-cygwin*|*-*-mingw*)
|
||||
if test "$GCC" != yes && test -n "`link --version 2>&1 | grep 'GNU coreutils'`"; then
|
||||
AC_MSG_ERROR([link.exe is not a valid linker. Your PATH is incorrect.
|
||||
Please follow the directions in ICU's readme.])
|
||||
fi;;
|
||||
*);;
|
||||
esac])
|
||||
|
||||
# AC_SEARCH_LIBS_FIRST(FUNCTION, SEARCH-LIBS [, ACTION-IF-FOUND
|
||||
# [, ACTION-IF-NOT-FOUND [, OTHER-LIBRARIES]]])
|
||||
# Search for a library defining FUNC, then see if it's not already available.
|
||||
|
||||
AC_DEFUN(AC_SEARCH_LIBS_FIRST,
|
||||
[AC_PREREQ([2.13])
|
||||
AC_CACHE_CHECK([for library containing $1], [ac_cv_search_$1],
|
||||
[ac_func_search_save_LIBS="$LIBS"
|
||||
ac_cv_search_$1="no"
|
||||
for i in $2; do
|
||||
LIBS="-l$i $5 $ac_func_search_save_LIBS"
|
||||
AC_TRY_LINK_FUNC([$1],
|
||||
[ac_cv_search_$1="-l$i"
|
||||
break])
|
||||
done
|
||||
if test "$ac_cv_search_$1" = "no"; then
|
||||
AC_TRY_LINK_FUNC([$1], [ac_cv_search_$1="none required"])
|
||||
fi
|
||||
LIBS="$ac_func_search_save_LIBS"])
|
||||
if test "$ac_cv_search_$1" != "no"; then
|
||||
test "$ac_cv_search_$1" = "none required" || LIBS="$ac_cv_search_$1 $LIBS"
|
||||
$3
|
||||
else :
|
||||
$4
|
||||
fi])
|
||||
|
||||
|
||||
|
||||
# Check if we can build and use 64-bit libraries
|
||||
AC_DEFUN(AC_CHECK_64BIT_LIBS,
|
||||
[
|
||||
BITS_REQ=nochange
|
||||
ENABLE_64BIT_LIBS=unknown
|
||||
## revisit this for cross-compile.
|
||||
|
||||
AC_ARG_ENABLE(64bit-libs,
|
||||
[ --enable-64bit-libs (deprecated, use --with-library-bits) build 64-bit libraries [default= platform default]],
|
||||
[echo "note, use --with-library-bits instead of --*-64bit-libs"
|
||||
case "${enableval}" in
|
||||
no|false|32) with_library_bits=32; ;;
|
||||
yes|true|64) with_library_bits=64else32 ;;
|
||||
nochange) with_library_bits=nochange; ;;
|
||||
*) AC_MSG_ERROR(bad value ${enableval} for '--*-64bit-libs') ;;
|
||||
esac] )
|
||||
|
||||
|
||||
AC_ARG_WITH(library-bits,
|
||||
[ --with-library-bits=bits specify how many bits to use for the library (32, 64, 64else32, nochange) [default=nochange]],
|
||||
[case "${withval}" in
|
||||
""|nochange) BITS_REQ=$withval ;;
|
||||
32|64|64else32) BITS_REQ=$withval ;;
|
||||
*) AC_MSG_ERROR(bad value ${withval} for --with-library-bits) ;;
|
||||
esac])
|
||||
|
||||
# don't use these for cross compiling
|
||||
if test "$cross_compiling" = "yes" -a "${BITS_REQ}" != "nochange"; then
|
||||
AC_MSG_ERROR([Don't specify bitness when cross compiling. See readme.html for help with cross compilation., and set compiler options manually.])
|
||||
fi
|
||||
DEFAULT_64BIT=no
|
||||
AC_MSG_CHECKING([whether runnable 64 bit binaries are built by default])
|
||||
AC_RUN_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
|
||||
DEFAULT_64BIT=yes, DEFAULT_64BIT=no, DEFAULT_64BIT=unknown)
|
||||
BITS_GOT=unknown
|
||||
|
||||
# 'OK' here means, we can exit any further checking, everything's copa
|
||||
BITS_OK=yes
|
||||
|
||||
# do we need to check for buildable/runnable 32 or 64 bit?
|
||||
BITS_CHECK_32=no
|
||||
BITS_CHECK_64=no
|
||||
|
||||
# later, can we run the 32/64 bit binaries so made?
|
||||
BITS_RUN_32=no
|
||||
BITS_RUN_64=no
|
||||
|
||||
if test "$DEFAULT_64BIT" = "yes"; then
|
||||
# we get 64 bits by default.
|
||||
BITS_GOT=64
|
||||
case "$BITS_REQ" in
|
||||
32)
|
||||
# need to look for 32 bit support.
|
||||
BITS_CHECK_32=yes
|
||||
# not copa.
|
||||
BITS_OK=no;;
|
||||
# everyone else is happy.
|
||||
nochange) ;;
|
||||
*) ;;
|
||||
esac
|
||||
elif test "$DEFAULT_64BIT" = "no"; then
|
||||
# not 64 bit by default.
|
||||
BITS_GOT=32
|
||||
case "$BITS_REQ" in
|
||||
64|64else32)
|
||||
BITS_CHECK_64=yes
|
||||
#BITS_CHECK_32=yes
|
||||
BITS_OK=no;;
|
||||
nochange) ;;
|
||||
*) ;;
|
||||
esac
|
||||
elif test "$DEFAULT_64BIT" = "unknown"; then
|
||||
# cross compiling.
|
||||
BITS_GOT=unknown
|
||||
case "$BITS_REQ" in
|
||||
64|64else32) BITS_OK=no
|
||||
BITS_CHECK_32=yes
|
||||
BITS_CHECK_64=yes ;;
|
||||
32) BITS_OK=no;;
|
||||
nochange) ;;
|
||||
*) ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
AC_MSG_RESULT($DEFAULT_64BIT);
|
||||
|
||||
if test "$BITS_OK" != "yes"; then
|
||||
# not copa. back these up.
|
||||
CFLAGS_OLD="${CFLAGS}"
|
||||
CXXFLAGS_OLD="${CXXFLAGS}"
|
||||
LDFLAGS_OLD="${LDFLAGS}"
|
||||
ARFLAGS_OLD="${ARFLAGS}"
|
||||
|
||||
CFLAGS_32="${CFLAGS}"
|
||||
CXXFLAGS_32="${CXXFLAGS}"
|
||||
LDFLAGS_32="${LDFLAGS}"
|
||||
ARFLAGS_32="${ARFLAGS}"
|
||||
|
||||
CFLAGS_64="${CFLAGS}"
|
||||
CXXFLAGS_64="${CXXFLAGS}"
|
||||
LDFLAGS_64="${LDFLAGS}"
|
||||
ARFLAGS_64="${ARFLAGS}"
|
||||
|
||||
CAN_BUILD_64=unknown
|
||||
CAN_BUILD_32=unknown
|
||||
# These results can't be cached because is sets compiler flags.
|
||||
if test "$BITS_CHECK_64" = "yes"; then
|
||||
AC_MSG_CHECKING([how to build 64-bit executables])
|
||||
CAN_BUILD_64=no
|
||||
####
|
||||
# Find out if we think we can *build* for 64 bit. Doesn't check whether we can run it.
|
||||
# Note, we don't have to actually check if the options work- we'll try them before using them.
|
||||
# So, only try actually testing the options, if you are trying to decide between multiple options.
|
||||
# On exit from the following clauses:
|
||||
# if CAN_BUILD_64=yes:
|
||||
# *FLAGS are assumed to contain the right settings for 64bit
|
||||
# else if CAN_BUILD_64=no: (default)
|
||||
# *FLAGS are assumed to be trashed, and will be reset from *FLAGS_OLD
|
||||
|
||||
if test "$GCC" = yes; then
|
||||
CFLAGS="${CFLAGS} -m64"
|
||||
CXXFLAGS="${CXXFLAGS} -m64"
|
||||
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
|
||||
CAN_BUILD_64=yes, CAN_BUILD_64=no)
|
||||
else
|
||||
case "${host}" in
|
||||
sparc*-*-solaris*)
|
||||
# 1. try -m64
|
||||
CFLAGS="${CFLAGS} -m64"
|
||||
CXXFLAGS="${CXXFLAGS} -m64"
|
||||
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
|
||||
CAN_BUILD_64=yes, CAN_BUILD_64=no)
|
||||
if test "$CAN_BUILD_64" != yes; then
|
||||
# Nope. back out changes.
|
||||
CFLAGS="${CFLAGS_OLD}"
|
||||
CXXFLAGS="${CFLAGS_OLD}"
|
||||
# 2. try xarch=v9 [deprecated]
|
||||
## TODO: cross compile: the following won't work.
|
||||
SPARCV9=`isainfo -n 2>&1 | grep sparcv9`
|
||||
SOL64=`$CXX -xarch=v9 2>&1 && $CC -xarch=v9 2>&1 | grep -v usage:`
|
||||
# "Warning: -xarch=v9 is deprecated, use -m64 to create 64-bit programs"
|
||||
if test -z "$SOL64" && test -n "$SPARCV9"; then
|
||||
CFLAGS="${CFLAGS} -xtarget=ultra -xarch=v9"
|
||||
CXXFLAGS="${CXXFLAGS} -xtarget=ultra -xarch=v9"
|
||||
LDFLAGS="${LDFLAGS} -xtarget=ultra -xarch=v9"
|
||||
CAN_BUILD_64=yes
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
i386-*-solaris*)
|
||||
# 1. try -m64
|
||||
CFLAGS="${CFLAGS} -m64"
|
||||
CXXFLAGS="${CXXFLAGS} -m64"
|
||||
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
|
||||
CAN_BUILD_64=yes, CAN_BUILD_64=no)
|
||||
if test "$CAN_BUILD_64" != yes; then
|
||||
# Nope. back out changes.
|
||||
CFLAGS="${CFLAGS_OLD}"
|
||||
CXXFLAGS="${CXXFLAGS_OLD}"
|
||||
# 2. try the older compiler option
|
||||
## TODO: cross compile problem
|
||||
SOL64=`$CXX -xtarget=generic64 2>&1 && $CC -xtarget=generic64 2>&1 | grep -v usage:`
|
||||
if test -z "$SOL64" && test -n "$AMD64"; then
|
||||
CFLAGS="${CFLAGS} -xtarget=generic64"
|
||||
CXXFLAGS="${CXXFLAGS} -xtarget=generic64"
|
||||
CAN_BUILD_64=yes
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
ia64-*-linux*)
|
||||
# check for ecc/ecpc compiler support
|
||||
## TODO: cross compiler problem
|
||||
if test -n "`$CXX --help 2>&1 && $CC --help 2>&1 | grep -v Intel`"; then
|
||||
if test -n "`$CXX --help 2>&1 && $CC --help 2>&1 | grep -v Itanium`"; then
|
||||
CAN_BUILD_64=yes
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
*-*-cygwin)
|
||||
# vcvarsamd64.bat should have been used to enable 64-bit builds.
|
||||
# We only do this check to display the correct answer.
|
||||
## TODO: cross compiler problem
|
||||
if test -n "`$CXX -help 2>&1 | grep 'for x64'`"; then
|
||||
CAN_BUILD_64=yes
|
||||
fi
|
||||
;;
|
||||
*-*-aix*|powerpc64-*-linux*)
|
||||
CFLAGS="${CFLAGS} -q64"
|
||||
CXXFLAGS="${CXXFLAGS} -q64"
|
||||
LDFLAGS="${LDFLAGS} -q64"
|
||||
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
|
||||
CAN_BUILD_64=yes, CAN_BUILD_64=no)
|
||||
if test "$CAN_BUILD_64" = yes; then
|
||||
# worked- set other options.
|
||||
case "${host}" in
|
||||
*-*-aix*)
|
||||
# tell AIX what executable mode to use.
|
||||
ARFLAGS="${ARFLAGS} -X64"
|
||||
esac
|
||||
fi
|
||||
;;
|
||||
*-*-hpux*)
|
||||
# First we try the newer +DD64, if that doesn't work,
|
||||
# try other options.
|
||||
|
||||
CFLAGS="${CFLAGS} +DD64"
|
||||
CXXFLAGS="${CXXFLAGS} +DD64"
|
||||
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
|
||||
CAN_BUILD_64=yes, CAN_BUILD_64=no)
|
||||
if test "$CAN_BUILD_64" != yes; then
|
||||
# reset
|
||||
CFLAGS="${CFLAGS_OLD}"
|
||||
CXXFLAGS="${CXXFLAGS_OLD}"
|
||||
# append
|
||||
CFLAGS="${CFLAGS} +DA2.0W"
|
||||
CXXFLAGS="${CXXFLAGS} +DA2.0W"
|
||||
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
|
||||
CAN_BUILD_64=yes, CAN_BUILD_64=no)
|
||||
fi
|
||||
;;
|
||||
*-*ibm-openedition*|*-*-os390*)
|
||||
CFLAGS="${CFLAGS} -Wc,lp64"
|
||||
CXXFLAGS="${CXXFLAGS} -Wc,lp64"
|
||||
LDFLAGS="${LDFLAGS} -Wl,lp64"
|
||||
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
|
||||
CAN_BUILD_64=yes, CAN_BUILD_64=no)
|
||||
;;
|
||||
*)
|
||||
# unknown platform.
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
AC_MSG_RESULT($CAN_BUILD_64)
|
||||
if test "$CAN_BUILD_64" = yes; then
|
||||
AC_MSG_CHECKING([whether runnable 64-bit binaries are being built ])
|
||||
AC_TRY_RUN(int main(void) {return (sizeof(void*)*8==64)?0:1;},
|
||||
BITS_RUN_64=yes, BITS_RUN_64=no, BITS_RUN_64=unknown)
|
||||
AC_MSG_RESULT($BITS_RUN_64);
|
||||
|
||||
CFLAGS_64="${CFLAGS}"
|
||||
CXXFLAGS_64="${CXXFLAGS}"
|
||||
LDFLAGS_64="${LDFLAGS}"
|
||||
ARFLAGS_64="${ARFLAGS}"
|
||||
fi
|
||||
# put it back.
|
||||
CFLAGS="${CFLAGS_OLD}"
|
||||
CXXFLAGS="${CXXFLAGS_OLD}"
|
||||
LDFLAGS="${LDFLAGS_OLD}"
|
||||
ARFLAGS="${ARFLAGS_OLD}"
|
||||
fi
|
||||
if test "$BITS_CHECK_32" = "yes"; then
|
||||
# see comment under 'if BITS_CHECK_64', above.
|
||||
AC_MSG_CHECKING([how to build 32-bit executables])
|
||||
if test "$GCC" = yes; then
|
||||
CFLAGS="${CFLAGS} -m32"
|
||||
CXXFLAGS="${CXXFLAGS} -m32"
|
||||
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==32)?0:1;},
|
||||
CAN_BUILD_32=yes, CAN_BUILD_32=no)
|
||||
fi
|
||||
AC_MSG_RESULT($CAN_BUILD_32)
|
||||
if test "$CAN_BUILD_32" = yes; then
|
||||
AC_MSG_CHECKING([whether runnable 32-bit binaries are being built ])
|
||||
AC_TRY_RUN(int main(void) {return (sizeof(void*)*8==32)?0:1;},
|
||||
BITS_RUN_32=yes, BITS_RUN_32=no, BITS_RUN_32=unknown)
|
||||
AC_MSG_RESULT($BITS_RUN_32);
|
||||
CFLAGS_32="${CFLAGS}"
|
||||
CXXFLAGS_32="${CXXFLAGS}"
|
||||
LDFLAGS_32="${LDFLAGS}"
|
||||
ARFLAGS_32="${ARFLAGS}"
|
||||
fi
|
||||
# put it back.
|
||||
CFLAGS="${CFLAGS_OLD}"
|
||||
CXXFLAGS="${CXXFLAGS_OLD}"
|
||||
LDFLAGS="${LDFLAGS_OLD}"
|
||||
ARFLAGS="${ARFLAGS_OLD}"
|
||||
fi
|
||||
|
||||
##
|
||||
# OK. Now, we've tested for 32 and 64 bitness. Let's see what we'll do.
|
||||
#
|
||||
|
||||
# First, implement 64else32
|
||||
if test "$BITS_REQ" = "64else32"; then
|
||||
if test "$BITS_RUN_64" = "yes"; then
|
||||
BITS_REQ=64
|
||||
else
|
||||
# no changes.
|
||||
BITS_OK=yes
|
||||
fi
|
||||
fi
|
||||
|
||||
# implement.
|
||||
if test "$BITS_REQ" = "32" -a "$BITS_RUN_32" = "yes"; then
|
||||
CFLAGS="${CFLAGS_32}"
|
||||
CXXFLAGS="${CXXFLAGS_32}"
|
||||
LDFLAGS="${LDFLAGS_32}"
|
||||
ARFLAGS="${ARFLAGS_32}"
|
||||
BITS_OK=yes
|
||||
elif test "$BITS_REQ" = "64" -a "$BITS_RUN_64" = "yes"; then
|
||||
CFLAGS="${CFLAGS_64}"
|
||||
CXXFLAGS="${CXXFLAGS_64}"
|
||||
LDFLAGS="${LDFLAGS_64}"
|
||||
ARFLAGS="${ARFLAGS_64}"
|
||||
BITS_OK=yes
|
||||
elif test "$BITS_OK" != "yes"; then
|
||||
AC_MSG_ERROR([Requested $BITS_REQ bit binaries but could not compile and execute them. See readme.html for help with cross compilation., and set compiler options manually.])
|
||||
fi
|
||||
fi
|
||||
])
|
||||
|
||||
# Strict compilation options.
|
||||
AC_DEFUN(AC_CHECK_STRICT_COMPILE,
|
||||
[
|
||||
AC_MSG_CHECKING([whether strict compiling is on])
|
||||
AC_ARG_ENABLE(strict,[ --enable-strict compile with strict compiler options [default=yes]], [
|
||||
if test "$enableval" = no
|
||||
then
|
||||
ac_use_strict_options=no
|
||||
else
|
||||
ac_use_strict_options=yes
|
||||
fi
|
||||
], [ac_use_strict_options=yes])
|
||||
AC_MSG_RESULT($ac_use_strict_options)
|
||||
|
||||
if test "$ac_use_strict_options" = yes
|
||||
then
|
||||
if test "$GCC" = yes
|
||||
then
|
||||
CFLAGS="$CFLAGS -Wall -ansi -pedantic -Wshadow -Wpointer-arith -Wmissing-prototypes -Wwrite-strings -Wno-long-long"
|
||||
case "${host}" in
|
||||
*-*-solaris*)
|
||||
CFLAGS="$CFLAGS -D__STDC__=0";;
|
||||
esac
|
||||
else
|
||||
case "${host}" in
|
||||
*-*-cygwin)
|
||||
if test "`$CC /help 2>&1 | head -c9`" = "Microsoft"
|
||||
then
|
||||
CFLAGS="$CFLAGS /W4"
|
||||
fi
|
||||
esac
|
||||
fi
|
||||
if test "$GXX" = yes
|
||||
then
|
||||
CXXFLAGS="$CXXFLAGS -W -Wall -ansi -pedantic -Wpointer-arith -Wwrite-strings -Wno-long-long"
|
||||
case "${host}" in
|
||||
*-*-solaris*)
|
||||
CXXFLAGS="$CXXFLAGS -D__STDC__=0";;
|
||||
esac
|
||||
else
|
||||
case "${host}" in
|
||||
*-*-cygwin)
|
||||
if test "`$CXX /help 2>&1 | head -c9`" = "Microsoft"
|
||||
then
|
||||
CXXFLAGS="$CXXFLAGS /W4"
|
||||
fi
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
])
|
||||
|
||||
|
562
source/allinone/allinone.sln
Normal file
562
source/allinone/allinone.sln
Normal file
|
@ -0,0 +1,562 @@
|
|||
Microsoft Visual Studio Solution File, Format Version 10.00
|
||||
# Visual Studio 2008
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cal", "..\samples\cal\cal.vcproj", "{F7659D77-09CF-4FE9-ACEE-927287AA9509}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cintltst", "..\test\cintltst\cintltst.vcproj", "{3D1246AE-1B32-479B-BECA-AEFA97BE2321}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "common", "..\common\common.vcproj", "{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{203EC78A-0531-43F0-A636-285439BDE025} = {203EC78A-0531-43F0-A636-285439BDE025}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ctestfw", "..\tools\ctestfw\ctestfw.vcproj", "{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "date", "..\samples\date\date.vcproj", "{38B5751A-C6F9-4409-950C-F4F9DA17275F}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "derb", "..\tools\genrb\derb.vcproj", "{D3065ADB-8820-4CC7-9B6C-9510833961A3}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genbrk", "..\tools\genbrk\genbrk.vcproj", "{C2BE5000-7501-4E87-9724-B8D82494FAE6}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genccode", "..\tools\genccode\genccode.vcproj", "{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencmn", "..\tools\gencmn\gencmn.vcproj", "{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencnval", "..\tools\gencnval\gencnval.vcproj", "{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gennames", "..\tools\gennames\gennames.vcproj", "{F5281B04-A9E0-4680-BBA8-1D7F7D115458}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gennorm", "..\tools\gennorm\gennorm.vcproj", "{F5213103-6CBE-46E6-B4CC-2570B6837D86}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genpname", "..\tools\genpname\genpname.vcproj", "{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genprops", "..\tools\genprops\genprops.vcproj", "{6F744648-D15F-478A-90C6-58E353B5DDB3}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genrb", "..\tools\genrb\genrb.vcproj", "{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gentest", "..\tools\gentest\gentest.vcproj", "{77C78066-746F-4EA6-B3FE-B8C8A4A97891}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genuca", "..\tools\genuca\genuca.vcproj", "{86829694-A375-4C58-B4EA-96EF514E3225}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "i18n", "..\i18n\i18n.vcproj", "{0178B127-6269-407D-B112-93877BB62776}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "intltest", "..\test\intltest\intltest.vcproj", "{73632960-B3A6-464D-83A3-4B43365F19B8}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "layout", "..\layout\layout.vcproj", "{C920062A-0647-4553-A3B2-37C58065664B}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "layoutex", "..\layoutex\layoutex.vcproj", "{37FC2C7F-1904-4811-8955-2F478830EAD1}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{C920062A-0647-4553-A3B2-37C58065664B} = {C920062A-0647-4553-A3B2-37C58065664B}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "makeconv", "..\tools\makeconv\makeconv.vcproj", "{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "makedata", "..\data\makedata.vcproj", "{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{C2BE5000-7501-4E87-9724-B8D82494FAE6} = {C2BE5000-7501-4E87-9724-B8D82494FAE6}
|
||||
{F5213103-6CBE-46E6-B4CC-2570B6837D86} = {F5213103-6CBE-46E6-B4CC-2570B6837D86}
|
||||
{F5281B04-A9E0-4680-BBA8-1D7F7D115458} = {F5281B04-A9E0-4680-BBA8-1D7F7D115458}
|
||||
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A} = {97521D06-EC47-45D4-8BD0-9E16B3F93B2A}
|
||||
{C2B04507-2521-4801-BF0D-5FD79D6D518C} = {C2B04507-2521-4801-BF0D-5FD79D6D518C}
|
||||
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB} = {DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{C920062A-0647-4553-A3B2-37C58065664B} = {C920062A-0647-4553-A3B2-37C58065664B}
|
||||
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF} = {8B41752B-5A52-41E4-B7E0-07921C0CC6BF}
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
|
||||
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C} = {F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}
|
||||
{6F744648-D15F-478A-90C6-58E353B5DDB3} = {6F744648-D15F-478A-90C6-58E353B5DDB3}
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61} = {DB312A49-12A9-4E07-9E96-451DC2D8FF61}
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62} = {DB312A49-12A9-4E07-9E96-451DC2D8FF62}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC} = {62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8} = {73632960-B3A6-464D-83A3-4B43365F19B8}
|
||||
{77C78066-746F-4EA6-B3FE-B8C8A4A97891} = {77C78066-746F-4EA6-B3FE-B8C8A4A97891}
|
||||
{37FC2C7F-1904-4811-8955-2F478830EAD1} = {37FC2C7F-1904-4811-8955-2F478830EAD1}
|
||||
{E4993E82-D68A-46CA-BAE0-9D35E172E46F} = {E4993E82-D68A-46CA-BAE0-9D35E172E46F}
|
||||
{67351485-4D18-4245-BE39-A7EF0675ACD2} = {67351485-4D18-4245-BE39-A7EF0675ACD2}
|
||||
{203EC78A-0531-43F0-A636-285439BDE025} = {203EC78A-0531-43F0-A636-285439BDE025}
|
||||
{DBA4088D-F6F9-4F8F-8820-082A4765C16C} = {DBA4088D-F6F9-4F8F-8820-082A4765C16C}
|
||||
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F} = {A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}
|
||||
{86829694-A375-4C58-B4EA-96EF514E3225} = {86829694-A375-4C58-B4EA-96EF514E3225}
|
||||
{3D1246AE-1B32-479B-BECA-AEFA97BE2321} = {3D1246AE-1B32-479B-BECA-AEFA97BE2321}
|
||||
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057} = {691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}
|
||||
{631C23CE-6C1D-4875-88F0-85E0A42B36EA} = {631C23CE-6C1D-4875-88F0-85E0A42B36EA}
|
||||
{D3065ADB-8820-4CC7-9B6C-9510833961A3} = {D3065ADB-8820-4CC7-9B6C-9510833961A3}
|
||||
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547} = {FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}
|
||||
{9D4211F7-2C77-439C-82F0-30A4E43BA569} = {9D4211F7-2C77-439C-82F0-30A4E43BA569}
|
||||
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC} = {4C8454FE-81D3-4CA3-9927-29BA96F03DAC}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pkgdata", "..\tools\pkgdata\pkgdata.vcproj", "{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC} = {62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}
|
||||
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F} = {A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}
|
||||
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547} = {FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stubdata", "..\stubdata\stubdata.vcproj", "{203EC78A-0531-43F0-A636-285439BDE025}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toolutil", "..\tools\toolutil\toolutil.vcproj", "{6B231032-3CB5-4EED-9210-810D666A23A0}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "uconv", "..\extra\uconv\uconv.vcproj", "{DBA4088D-F6F9-4F8F-8820-082A4765C16C}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A} = {97521D06-EC47-45D4-8BD0-9E16B3F93B2A}
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC} = {4C8454FE-81D3-4CA3-9927-29BA96F03DAC}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "io", "..\io\io.vcproj", "{C2B04507-2521-4801-BF0D-5FD79D6D518C}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gensprep", "..\tools\gensprep\gensprep.vcproj", "{631C23CE-6C1D-4875-88F0-85E0A42B36EA}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "iotest", "..\test\iotest\iotest.vcproj", "{E4993E82-D68A-46CA-BAE0-9D35E172E46F}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{C2B04507-2521-4801-BF0D-5FD79D6D518C} = {C2B04507-2521-4801-BF0D-5FD79D6D518C}
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genbidi", "..\tools\genbidi\genbidi.vcproj", "{DB312A49-12A9-4E07-9E96-451DC2D8FF62}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencase", "..\tools\gencase\gencase.vcproj", "{DB312A49-12A9-4E07-9E96-451DC2D8FF61}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icupkg", "..\tools\icupkg\icupkg.vcproj", "{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genctd", "..\tools\genctd\genctd.vcproj", "{9D4211F7-2C77-439C-82F0-30A4E43BA569}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "letest", "..\test\letest\letest.vcproj", "{67351485-4D18-4245-BE39-A7EF0675ACD2}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{C920062A-0647-4553-A3B2-37C58065664B} = {C920062A-0647-4553-A3B2-37C58065664B}
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
{37FC2C7F-1904-4811-8955-2F478830EAD1} = {37FC2C7F-1904-4811-8955-2F478830EAD1}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencfu", "..\tools\gencfu\gencfu.vcproj", "{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Debug|x64 = Debug|x64
|
||||
Release|Win32 = Release|Win32
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|x64.Build.0 = Debug|x64
|
||||
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|Win32.Build.0 = Release|Win32
|
||||
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|x64.ActiveCfg = Release|x64
|
||||
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|x64.Build.0 = Release|x64
|
||||
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|x64.Build.0 = Debug|x64
|
||||
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|Win32.Build.0 = Release|Win32
|
||||
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|x64.ActiveCfg = Release|x64
|
||||
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|x64.Build.0 = Release|x64
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|x64.Build.0 = Debug|x64
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|Win32.Build.0 = Release|Win32
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|x64.ActiveCfg = Release|x64
|
||||
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|x64.Build.0 = Release|x64
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|x64.Build.0 = Debug|x64
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|Win32.Build.0 = Release|Win32
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|x64.ActiveCfg = Release|x64
|
||||
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|x64.Build.0 = Release|x64
|
||||
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|x64.Build.0 = Debug|x64
|
||||
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|Win32.Build.0 = Release|Win32
|
||||
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|x64.ActiveCfg = Release|x64
|
||||
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|x64.Build.0 = Release|x64
|
||||
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|x64.Build.0 = Debug|x64
|
||||
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|Win32.Build.0 = Release|Win32
|
||||
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|x64.ActiveCfg = Release|x64
|
||||
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|x64.Build.0 = Release|x64
|
||||
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|x64.Build.0 = Debug|x64
|
||||
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|Win32.Build.0 = Release|Win32
|
||||
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|x64.ActiveCfg = Release|x64
|
||||
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|x64.Build.0 = Release|x64
|
||||
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|x64.Build.0 = Debug|x64
|
||||
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|Win32.Build.0 = Release|Win32
|
||||
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|x64.ActiveCfg = Release|x64
|
||||
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|x64.Build.0 = Release|x64
|
||||
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|x64.Build.0 = Debug|x64
|
||||
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|Win32.Build.0 = Release|Win32
|
||||
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|x64.ActiveCfg = Release|x64
|
||||
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|x64.Build.0 = Release|x64
|
||||
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|x64.Build.0 = Debug|x64
|
||||
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|Win32.Build.0 = Release|Win32
|
||||
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|x64.ActiveCfg = Release|x64
|
||||
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|x64.Build.0 = Release|x64
|
||||
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Debug|x64.Build.0 = Debug|x64
|
||||
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Release|Win32.Build.0 = Release|Win32
|
||||
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Release|x64.ActiveCfg = Release|x64
|
||||
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Release|x64.Build.0 = Release|x64
|
||||
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Debug|x64.Build.0 = Debug|x64
|
||||
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Release|Win32.Build.0 = Release|Win32
|
||||
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Release|x64.ActiveCfg = Release|x64
|
||||
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Release|x64.Build.0 = Release|x64
|
||||
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Debug|x64.Build.0 = Debug|x64
|
||||
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Release|Win32.Build.0 = Release|Win32
|
||||
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Release|x64.ActiveCfg = Release|x64
|
||||
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Release|x64.Build.0 = Release|x64
|
||||
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Debug|x64.Build.0 = Debug|x64
|
||||
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Release|Win32.Build.0 = Release|Win32
|
||||
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Release|x64.ActiveCfg = Release|x64
|
||||
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Release|x64.Build.0 = Release|x64
|
||||
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|x64.Build.0 = Debug|x64
|
||||
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|Win32.Build.0 = Release|Win32
|
||||
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|x64.ActiveCfg = Release|x64
|
||||
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|x64.Build.0 = Release|x64
|
||||
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|x64.Build.0 = Debug|x64
|
||||
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|Win32.Build.0 = Release|Win32
|
||||
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|x64.ActiveCfg = Release|x64
|
||||
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|x64.Build.0 = Release|x64
|
||||
{86829694-A375-4C58-B4EA-96EF514E3225}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{86829694-A375-4C58-B4EA-96EF514E3225}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{86829694-A375-4C58-B4EA-96EF514E3225}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{86829694-A375-4C58-B4EA-96EF514E3225}.Debug|x64.Build.0 = Debug|x64
|
||||
{86829694-A375-4C58-B4EA-96EF514E3225}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{86829694-A375-4C58-B4EA-96EF514E3225}.Release|Win32.Build.0 = Release|Win32
|
||||
{86829694-A375-4C58-B4EA-96EF514E3225}.Release|x64.ActiveCfg = Release|x64
|
||||
{86829694-A375-4C58-B4EA-96EF514E3225}.Release|x64.Build.0 = Release|x64
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Debug|x64.Build.0 = Debug|x64
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.Build.0 = Release|Win32
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Release|x64.ActiveCfg = Release|x64
|
||||
{0178B127-6269-407D-B112-93877BB62776}.Release|x64.Build.0 = Release|x64
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|x64.Build.0 = Debug|x64
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|Win32.Build.0 = Release|Win32
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|x64.ActiveCfg = Release|x64
|
||||
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|x64.Build.0 = Release|x64
|
||||
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|x64.Build.0 = Debug|x64
|
||||
{C920062A-0647-4553-A3B2-37C58065664B}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{C920062A-0647-4553-A3B2-37C58065664B}.Release|Win32.Build.0 = Release|Win32
|
||||
{C920062A-0647-4553-A3B2-37C58065664B}.Release|x64.ActiveCfg = Release|x64
|
||||
{C920062A-0647-4553-A3B2-37C58065664B}.Release|x64.Build.0 = Release|x64
|
||||
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|x64.Build.0 = Debug|x64
|
||||
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|Win32.Build.0 = Release|Win32
|
||||
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|x64.ActiveCfg = Release|x64
|
||||
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|x64.Build.0 = Release|x64
|
||||
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|x64.Build.0 = Debug|x64
|
||||
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|Win32.Build.0 = Release|Win32
|
||||
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|x64.ActiveCfg = Release|x64
|
||||
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|x64.Build.0 = Release|x64
|
||||
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|x64.Build.0 = Debug|x64
|
||||
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|Win32.Build.0 = Release|Win32
|
||||
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|x64.ActiveCfg = Release|x64
|
||||
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|x64.Build.0 = Release|x64
|
||||
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|x64.Build.0 = Debug|x64
|
||||
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|Win32.Build.0 = Release|Win32
|
||||
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|x64.ActiveCfg = Release|x64
|
||||
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|x64.Build.0 = Release|x64
|
||||
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|x64.Build.0 = Debug|x64
|
||||
{203EC78A-0531-43F0-A636-285439BDE025}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{203EC78A-0531-43F0-A636-285439BDE025}.Release|Win32.Build.0 = Release|Win32
|
||||
{203EC78A-0531-43F0-A636-285439BDE025}.Release|x64.ActiveCfg = Release|x64
|
||||
{203EC78A-0531-43F0-A636-285439BDE025}.Release|x64.Build.0 = Release|x64
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|x64.Build.0 = Debug|x64
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|Win32.Build.0 = Release|Win32
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|x64.ActiveCfg = Release|x64
|
||||
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|x64.Build.0 = Release|x64
|
||||
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|x64.Build.0 = Debug|x64
|
||||
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|Win32.Build.0 = Release|Win32
|
||||
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|x64.ActiveCfg = Release|x64
|
||||
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|x64.Build.0 = Release|x64
|
||||
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|x64.Build.0 = Debug|x64
|
||||
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|Win32.Build.0 = Release|Win32
|
||||
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|x64.ActiveCfg = Release|x64
|
||||
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|x64.Build.0 = Release|x64
|
||||
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|x64.Build.0 = Debug|x64
|
||||
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|Win32.Build.0 = Release|Win32
|
||||
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|x64.ActiveCfg = Release|x64
|
||||
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|x64.Build.0 = Release|x64
|
||||
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|x64.Build.0 = Debug|x64
|
||||
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|Win32.Build.0 = Release|Win32
|
||||
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|x64.ActiveCfg = Release|x64
|
||||
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|x64.Build.0 = Release|x64
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Debug|x64.Build.0 = Debug|x64
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Release|Win32.Build.0 = Release|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Release|x64.ActiveCfg = Release|x64
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Release|x64.Build.0 = Release|x64
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Debug|x64.Build.0 = Debug|x64
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Release|Win32.Build.0 = Release|Win32
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Release|x64.ActiveCfg = Release|x64
|
||||
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Release|x64.Build.0 = Release|x64
|
||||
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|x64.Build.0 = Debug|x64
|
||||
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|Win32.Build.0 = Release|Win32
|
||||
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|x64.ActiveCfg = Release|x64
|
||||
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|x64.Build.0 = Release|x64
|
||||
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|x64.Build.0 = Debug|x64
|
||||
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|Win32.Build.0 = Release|Win32
|
||||
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|x64.ActiveCfg = Release|x64
|
||||
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|x64.Build.0 = Release|x64
|
||||
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|x64.Build.0 = Debug|x64
|
||||
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|Win32.Build.0 = Release|Win32
|
||||
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|x64.ActiveCfg = Release|x64
|
||||
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|x64.Build.0 = Release|x64
|
||||
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|x64.Build.0 = Debug|x64
|
||||
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.Build.0 = Release|Win32
|
||||
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.ActiveCfg = Release|x64
|
||||
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
205
source/common/Makefile.in
Normal file
205
source/common/Makefile.in
Normal file
|
@ -0,0 +1,205 @@
|
|||
#******************************************************************************
|
||||
#
|
||||
# Copyright (C) 1999-2009, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
#******************************************************************************
|
||||
## Makefile.in for ICU - icuuc.so
|
||||
## Stephen F. Booth
|
||||
|
||||
## Source directory information
|
||||
srcdir = @srcdir@
|
||||
top_srcdir = @top_srcdir@
|
||||
|
||||
top_builddir = ..
|
||||
|
||||
## All the flags and other definitions are included here.
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
## Build directory information
|
||||
subdir = common
|
||||
|
||||
# for service hook
|
||||
LOCALSVC_CPP=localsvc.cpp
|
||||
SVC_HOOK_INC=$(top_builddir)/common/svchook.mk
|
||||
|
||||
## Extra files to remove for 'make clean'
|
||||
CLEANFILES = *~ $(DEPS) $(IMPORT_LIB) $(MIDDLE_IMPORT_LIB) $(FINAL_IMPORT_LIB) $(SVC_HOOK_INC)
|
||||
|
||||
## Target information
|
||||
|
||||
TARGET_STUBNAME=$(COMMON_STUBNAME)
|
||||
|
||||
ifneq ($(ENABLE_STATIC),)
|
||||
TARGET = $(LIBDIR)/$(LIBSICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(A)
|
||||
endif
|
||||
|
||||
ifneq ($(ENABLE_SHARED),)
|
||||
SO_TARGET = $(LIBDIR)/$(LIBICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(SO)
|
||||
ALL_SO_TARGETS = $(SO_TARGET) $(MIDDLE_SO_TARGET) $(FINAL_SO_TARGET) $(SHARED_OBJECT)
|
||||
|
||||
ifeq ($(ENABLE_SO_VERSION_DATA),1)
|
||||
SO_VERSION_DATA = common.res
|
||||
endif
|
||||
|
||||
ifeq ($(OS390BATCH),1)
|
||||
BATCH_TARGET = $(BATCH_COMMON_TARGET)
|
||||
BATCH_LIBS = $(BATCH_LIBICUDT) -lm
|
||||
endif # OS390BATCH
|
||||
|
||||
endif # ENABLE_SHARED
|
||||
|
||||
ALL_TARGETS = $(TARGET) $(ALL_SO_TARGETS) $(BATCH_TARGET)
|
||||
|
||||
DYNAMICCPPFLAGS = $(SHAREDLIBCPPFLAGS)
|
||||
DYNAMICCFLAGS = $(SHAREDLIBCFLAGS)
|
||||
DYNAMICCXXFLAGS = $(SHAREDLIBCXXFLAGS)
|
||||
CFLAGS += $(LIBCFLAGS)
|
||||
CXXFLAGS += $(LIBCXXFLAGS)
|
||||
|
||||
ifneq ($(top_builddir),$(top_srcdir))
|
||||
CPPFLAGS += -I$(top_builddir)/common
|
||||
endif
|
||||
CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/i18n $(LIBCPPFLAGS) $(CPPFLAGSICUUC)
|
||||
DEFS += -DU_COMMON_IMPLEMENTATION
|
||||
LDFLAGS += $(LDFLAGSICUUC)
|
||||
|
||||
# $(LIBICUDT) is either stub data or the real DLL common data.
|
||||
LIBS = $(LIBICUDT) $(DEFAULT_LIBS)
|
||||
|
||||
OBJECTS = errorcode.o putil.o umath.o utypes.o uinvchar.o umutex.o ucln_cmn.o uinit.o uobject.o cmemory.o \
|
||||
udata.o ucmndata.o udatamem.o umapfile.o udataswp.o ucol_swp.o utrace.o \
|
||||
uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o \
|
||||
ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \
|
||||
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
|
||||
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o \
|
||||
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
|
||||
ucat.o locmap.o uloc.o locid.o locutil.o \
|
||||
bytestream.o stringpiece.o \
|
||||
ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
|
||||
utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
|
||||
normlzr.o unorm.o unormcmp.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \
|
||||
uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
|
||||
uscript.o usc_impl.o unames.o \
|
||||
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
|
||||
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \
|
||||
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
|
||||
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
|
||||
uidna.o usprep.o punycode.o \
|
||||
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o mutex.o dtintrv.o ucnvsel.o propsvec.o \
|
||||
ulist.o uloc_tag.o icudataver.o
|
||||
|
||||
## Header files to install
|
||||
HEADERS = $(srcdir)/unicode/*.h unicode/*.h
|
||||
|
||||
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
-include Makefile.local
|
||||
|
||||
-include $(SVC_HOOK_INC)
|
||||
|
||||
|
||||
## List of phony targets
|
||||
.PHONY : all all-local install install-local clean clean-local \
|
||||
distclean distclean-local install-library install-headers dist \
|
||||
dist-local check check-local
|
||||
|
||||
## Clear suffix list
|
||||
.SUFFIXES :
|
||||
|
||||
## List of standard targets
|
||||
all: all-local
|
||||
install: install-local
|
||||
clean: clean-local
|
||||
distclean : distclean-local
|
||||
dist: dist-local
|
||||
check: all check-local
|
||||
|
||||
all-local: $(ALL_TARGETS) unicode/platform.h
|
||||
|
||||
install-local: install-headers install-library
|
||||
|
||||
install-library: all-local
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(libdir)
|
||||
ifneq ($(ENABLE_STATIC),)
|
||||
$(INSTALL-L) $(TARGET) $(DESTDIR)$(libdir)
|
||||
endif
|
||||
ifneq ($(ENABLE_SHARED),)
|
||||
$(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(libdir)
|
||||
ifneq ($(FINAL_SO_TARGET),$(SO_TARGET))
|
||||
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(SO_TARGET))
|
||||
ifneq ($(FINAL_SO_TARGET),$(MIDDLE_SO_TARGET))
|
||||
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(MIDDLE_SO_TARGET))
|
||||
endif
|
||||
endif
|
||||
ifneq ($(IMPORT_LIB_EXT),)
|
||||
$(INSTALL-L) $(FINAL_IMPORT_LIB) $(DESTDIR)$(libdir)
|
||||
ifneq ($(IMPORT_LIB),$(FINAL_IMPORT_LIB))
|
||||
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(IMPORT_LIB))
|
||||
endif
|
||||
ifneq ($(MIDDLE_IMPORT_LIB),$(FINAL_IMPORT_LIB))
|
||||
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(MIDDLE_IMPORT_LIB))
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
$(SVC_HOOK_INC):
|
||||
@echo generating $@
|
||||
@-test -f $(top_srcdir)/common/$(LOCALSVC_CPP) && ( echo "have $(LOCALSVC_CPP) - U_LOCAL_SERVICE_HOOK=1" ; \
|
||||
echo 'CPPFLAGS +=-DU_LOCAL_SERVICE_HOOK=1' > $@ ; \
|
||||
echo 'OBJECTS += $(LOCALSVC_CPP:%.cpp=%.o)' >> $@ \
|
||||
) ; true
|
||||
@echo "# Autogenerated by Makefile" >> $@
|
||||
|
||||
install-headers:
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(includedir)/unicode
|
||||
@for file in $(HEADERS); do \
|
||||
echo "$(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode"; \
|
||||
$(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode || exit; \
|
||||
done
|
||||
|
||||
dist-local:
|
||||
|
||||
clean-local:
|
||||
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
|
||||
$(RMV) $(OBJECTS) $(STATIC_OBJECTS) $(ALL_TARGETS) $(SO_VERSION_DATA)
|
||||
|
||||
distclean-local: clean-local
|
||||
$(RMV) Makefile icucfg.h unicode/platform.h $(SVC_HOOK_INC)
|
||||
|
||||
check-local:
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(SVC_HOOK_INC)
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
unicode/platform.h: $(srcdir)/unicode/platform.h.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
ifneq ($(ENABLE_STATIC),)
|
||||
$(TARGET): $(STATIC_OBJECTS)
|
||||
$(AR) $(ARFLAGS) $(AR_OUTOPT)$@ $^
|
||||
$(RANLIB) $@
|
||||
endif
|
||||
|
||||
ifneq ($(ENABLE_SHARED),)
|
||||
$(SHARED_OBJECT): $(OBJECTS) $(SO_VERSION_DATA)
|
||||
$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(LIBS)
|
||||
|
||||
ifeq ($(OS390BATCH),1)
|
||||
$(BATCH_TARGET):$(OBJECTS)
|
||||
$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(BATCH_LIBS)
|
||||
endif # OS390BATCH
|
||||
endif # ENABLE_SHARED
|
||||
|
||||
ifeq (,$(MAKECMDGOALS))
|
||||
-include $(DEPS)
|
||||
else
|
||||
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
|
||||
-include $(DEPS)
|
||||
endif
|
||||
endif
|
||||
|
723
source/common/bmpset.cpp
Normal file
723
source/common/bmpset.cpp
Normal file
|
@ -0,0 +1,723 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: bmpset.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2007jan29
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "cmemory.h"
|
||||
#include "bmpset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
|
||||
list(parentList), listLength(parentListLength) {
|
||||
uprv_memset(asciiBytes, 0, sizeof(asciiBytes));
|
||||
uprv_memset(table7FF, 0, sizeof(table7FF));
|
||||
uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
|
||||
|
||||
/*
|
||||
* Set the list indexes for binary searches for
|
||||
* U+0800, U+1000, U+2000, .., U+F000, U+10000.
|
||||
* U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
|
||||
* looked up in the bit tables.
|
||||
* The last pair of indexes is for finding supplementary code points.
|
||||
*/
|
||||
list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
|
||||
int32_t i;
|
||||
for(i=1; i<=0x10; ++i) {
|
||||
list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
|
||||
}
|
||||
list4kStarts[0x11]=listLength-1;
|
||||
|
||||
initBits();
|
||||
overrideIllegal();
|
||||
}
|
||||
|
||||
BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
|
||||
list(newParentList), listLength(newParentListLength) {
|
||||
uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes));
|
||||
uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
|
||||
uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
|
||||
uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
|
||||
}
|
||||
|
||||
BMPSet::~BMPSet() {
|
||||
}
|
||||
|
||||
/*
|
||||
* Set bits in a bit rectangle in "vertical" bit organization.
|
||||
* start<limit<=0x800
|
||||
*/
|
||||
static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
|
||||
int32_t lead=start>>6;
|
||||
int32_t trail=start&0x3f;
|
||||
|
||||
// Set one bit indicating an all-one block.
|
||||
uint32_t bits=(uint32_t)1<<lead;
|
||||
if((start+1)==limit) { // Single-character shortcut.
|
||||
table[trail]|=bits;
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t limitLead=limit>>6;
|
||||
int32_t limitTrail=limit&0x3f;
|
||||
|
||||
if(lead==limitLead) {
|
||||
// Partial vertical bit column.
|
||||
while(trail<limitTrail) {
|
||||
table[trail++]|=bits;
|
||||
}
|
||||
} else {
|
||||
// Partial vertical bit column,
|
||||
// followed by a bit rectangle,
|
||||
// followed by another partial vertical bit column.
|
||||
if(trail>0) {
|
||||
do {
|
||||
table[trail++]|=bits;
|
||||
} while(trail<64);
|
||||
++lead;
|
||||
}
|
||||
if(lead<limitLead) {
|
||||
bits=~((1<<lead)-1);
|
||||
if(limitLead<0x20) {
|
||||
bits&=(1<<limitLead)-1;
|
||||
}
|
||||
for(trail=0; trail<64; ++trail) {
|
||||
table[trail]|=bits;
|
||||
}
|
||||
}
|
||||
bits=1<<limitLead;
|
||||
for(trail=0; trail<limitTrail; ++trail) {
|
||||
table[trail]|=bits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BMPSet::initBits() {
|
||||
UChar32 start, limit;
|
||||
int32_t listIndex=0;
|
||||
|
||||
// Set asciiBytes[].
|
||||
do {
|
||||
start=list[listIndex++];
|
||||
if(listIndex<listLength) {
|
||||
limit=list[listIndex++];
|
||||
} else {
|
||||
limit=0x110000;
|
||||
}
|
||||
if(start>=0x80) {
|
||||
break;
|
||||
}
|
||||
do {
|
||||
asciiBytes[start++]=1;
|
||||
} while(start<limit && start<0x80);
|
||||
} while(limit<=0x80);
|
||||
|
||||
// Set table7FF[].
|
||||
while(start<0x800) {
|
||||
set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
|
||||
if(limit>0x800) {
|
||||
start=0x800;
|
||||
break;
|
||||
}
|
||||
|
||||
start=list[listIndex++];
|
||||
if(listIndex<listLength) {
|
||||
limit=list[listIndex++];
|
||||
} else {
|
||||
limit=0x110000;
|
||||
}
|
||||
}
|
||||
|
||||
// Set bmpBlockBits[].
|
||||
int32_t minStart=0x800;
|
||||
while(start<0x10000) {
|
||||
if(limit>0x10000) {
|
||||
limit=0x10000;
|
||||
}
|
||||
|
||||
if(start<minStart) {
|
||||
start=minStart;
|
||||
}
|
||||
if(start<limit) { // Else: Another range entirely in a known mixed-value block.
|
||||
if(start&0x3f) {
|
||||
// Mixed-value block of 64 code points.
|
||||
start>>=6;
|
||||
bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
|
||||
start=(start+1)<<6; // Round up to the next block boundary.
|
||||
minStart=start; // Ignore further ranges in this block.
|
||||
}
|
||||
if(start<limit) {
|
||||
if(start<(limit&~0x3f)) {
|
||||
// Multiple all-ones blocks of 64 code points each.
|
||||
set32x64Bits(bmpBlockBits, start>>6, limit>>6);
|
||||
}
|
||||
|
||||
if(limit&0x3f) {
|
||||
// Mixed-value block of 64 code points.
|
||||
limit>>=6;
|
||||
bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
|
||||
limit=(limit+1)<<6; // Round up to the next block boundary.
|
||||
minStart=limit; // Ignore further ranges in this block.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(limit==0x10000) {
|
||||
break;
|
||||
}
|
||||
|
||||
start=list[listIndex++];
|
||||
if(listIndex<listLength) {
|
||||
limit=list[listIndex++];
|
||||
} else {
|
||||
limit=0x110000;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Override some bits and bytes to the result of contains(FFFD)
|
||||
* for faster validity checking at runtime.
|
||||
* No need to set 0 values where they were reset to 0 in the constructor
|
||||
* and not modified by initBits().
|
||||
* (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
|
||||
* Need to set 0 values for surrogates D800..DFFF.
|
||||
*/
|
||||
void BMPSet::overrideIllegal() {
|
||||
uint32_t bits, mask;
|
||||
int32_t i;
|
||||
|
||||
if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) {
|
||||
// contains(FFFD)==TRUE
|
||||
for(i=0x80; i<0xc0; ++i) {
|
||||
asciiBytes[i]=1;
|
||||
}
|
||||
|
||||
bits=3; // Lead bytes 0xC0 and 0xC1.
|
||||
for(i=0; i<64; ++i) {
|
||||
table7FF[i]|=bits;
|
||||
}
|
||||
|
||||
bits=1; // Lead byte 0xE0.
|
||||
for(i=0; i<32; ++i) { // First half of 4k block.
|
||||
bmpBlockBits[i]|=bits;
|
||||
}
|
||||
|
||||
mask=~(0x10001<<0xd); // Lead byte 0xED.
|
||||
bits=1<<0xd;
|
||||
for(i=32; i<64; ++i) { // Second half of 4k block.
|
||||
bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
|
||||
}
|
||||
} else {
|
||||
// contains(FFFD)==FALSE
|
||||
mask=~(0x10001<<0xd); // Lead byte 0xED.
|
||||
for(i=32; i<64; ++i) { // Second half of 4k block.
|
||||
bmpBlockBits[i]&=mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
|
||||
/* Examples:
|
||||
findCodePoint(c)
|
||||
set list[] c=0 1 3 4 7 8
|
||||
=== ============== ===========
|
||||
[] [110000] 0 0 0 0 0 0
|
||||
[\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
|
||||
[\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
|
||||
[:Any:] [0, 110000] 1 1 1 1 1 1
|
||||
*/
|
||||
|
||||
// Return the smallest i such that c < list[i]. Assume
|
||||
// list[len - 1] == HIGH and that c is legal (0..HIGH-1).
|
||||
if (c < list[lo])
|
||||
return lo;
|
||||
// High runner test. c is often after the last range, so an
|
||||
// initial check for this condition pays off.
|
||||
if (lo >= hi || c >= list[hi-1])
|
||||
return hi;
|
||||
// invariant: c >= list[lo]
|
||||
// invariant: c < list[hi]
|
||||
for (;;) {
|
||||
int32_t i = (lo + hi) >> 1;
|
||||
if (i == lo) {
|
||||
break; // Found!
|
||||
} else if (c < list[i]) {
|
||||
hi = i;
|
||||
} else {
|
||||
lo = i;
|
||||
}
|
||||
}
|
||||
return hi;
|
||||
}
|
||||
|
||||
UBool
|
||||
BMPSet::contains(UChar32 c) const {
|
||||
if((uint32_t)c<=0x7f) {
|
||||
return (UBool)asciiBytes[c];
|
||||
} else if((uint32_t)c<=0x7ff) {
|
||||
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
|
||||
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
return (UBool)twoBits;
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
|
||||
}
|
||||
} else if((uint32_t)c<=0x10ffff) {
|
||||
// surrogate or supplementary code point
|
||||
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
|
||||
} else {
|
||||
// Out-of-range code points get FALSE, consistent with long-standing
|
||||
// behavior of UnicodeSet::contains(c).
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for sufficient length for trail unit for each surrogate pair.
|
||||
* Handle single surrogates as surrogate code points as usual in ICU.
|
||||
*/
|
||||
const UChar *
|
||||
BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
|
||||
UChar c, c2;
|
||||
|
||||
if(spanCondition) {
|
||||
// span
|
||||
do {
|
||||
c=*s;
|
||||
if(c<=0x7f) {
|
||||
if(!asciiBytes[c]) {
|
||||
break;
|
||||
}
|
||||
} else if(c<=0x7ff) {
|
||||
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
|
||||
break;
|
||||
}
|
||||
} else if(c<0xd800 || c>=0xe000) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
if(twoBits==0) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
|
||||
// surrogate code point
|
||||
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// surrogate pair
|
||||
if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
|
||||
break;
|
||||
}
|
||||
++s;
|
||||
}
|
||||
} while(++s<limit);
|
||||
} else {
|
||||
// span not
|
||||
do {
|
||||
c=*s;
|
||||
if(c<=0x7f) {
|
||||
if(asciiBytes[c]) {
|
||||
break;
|
||||
}
|
||||
} else if(c<=0x7ff) {
|
||||
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
|
||||
break;
|
||||
}
|
||||
} else if(c<0xd800 || c>=0xe000) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
if(twoBits!=0) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
|
||||
// surrogate code point
|
||||
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// surrogate pair
|
||||
if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
|
||||
break;
|
||||
}
|
||||
++s;
|
||||
}
|
||||
} while(++s<limit);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/* Symmetrical with span(). */
|
||||
const UChar *
|
||||
BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
|
||||
UChar c, c2;
|
||||
|
||||
if(spanCondition) {
|
||||
// span
|
||||
for(;;) {
|
||||
c=*(--limit);
|
||||
if(c<=0x7f) {
|
||||
if(!asciiBytes[c]) {
|
||||
break;
|
||||
}
|
||||
} else if(c<=0x7ff) {
|
||||
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
|
||||
break;
|
||||
}
|
||||
} else if(c<0xd800 || c>=0xe000) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
if(twoBits==0) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
|
||||
// surrogate code point
|
||||
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// surrogate pair
|
||||
if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
|
||||
break;
|
||||
}
|
||||
--limit;
|
||||
}
|
||||
if(s==limit) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// span not
|
||||
for(;;) {
|
||||
c=*(--limit);
|
||||
if(c<=0x7f) {
|
||||
if(asciiBytes[c]) {
|
||||
break;
|
||||
}
|
||||
} else if(c<=0x7ff) {
|
||||
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
|
||||
break;
|
||||
}
|
||||
} else if(c<0xd800 || c>=0xe000) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
if(twoBits!=0) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
|
||||
// surrogate code point
|
||||
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// surrogate pair
|
||||
if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
|
||||
break;
|
||||
}
|
||||
--limit;
|
||||
}
|
||||
if(s==limit) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
return limit+1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Precheck for sufficient trail bytes at end of string only once per span.
|
||||
* Check validity.
|
||||
*/
|
||||
const uint8_t *
|
||||
BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
|
||||
const uint8_t *limit=s+length;
|
||||
uint8_t b=*s;
|
||||
if((int8_t)b>=0) {
|
||||
// Initial all-ASCII span.
|
||||
if(spanCondition) {
|
||||
do {
|
||||
if(!asciiBytes[b] || ++s==limit) {
|
||||
return s;
|
||||
}
|
||||
b=*s;
|
||||
} while((int8_t)b>=0);
|
||||
} else {
|
||||
do {
|
||||
if(asciiBytes[b] || ++s==limit) {
|
||||
return s;
|
||||
}
|
||||
b=*s;
|
||||
} while((int8_t)b>=0);
|
||||
}
|
||||
length=(int32_t)(limit-s);
|
||||
}
|
||||
|
||||
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
|
||||
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
|
||||
}
|
||||
|
||||
const uint8_t *limit0=limit;
|
||||
|
||||
/*
|
||||
* Make sure that the last 1/2/3/4-byte sequence before limit is complete
|
||||
* or runs into a lead byte.
|
||||
* In the span loop compare s with limit only once
|
||||
* per multi-byte character.
|
||||
*
|
||||
* Give a trailing illegal sequence the same value as the result of contains(FFFD),
|
||||
* including it if that is part of the span, otherwise set limit0 to before
|
||||
* the truncated sequence.
|
||||
*/
|
||||
b=*(limit-1);
|
||||
if((int8_t)b<0) {
|
||||
// b>=0x80: lead or trail byte
|
||||
if(b<0xc0) {
|
||||
// single trail byte, check for preceding 3- or 4-byte lead byte
|
||||
if(length>=2 && (b=*(limit-2))>=0xe0) {
|
||||
limit-=2;
|
||||
if(asciiBytes[0x80]!=spanCondition) {
|
||||
limit0=limit;
|
||||
}
|
||||
} else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
|
||||
// 4-byte lead byte with only two trail bytes
|
||||
limit-=3;
|
||||
if(asciiBytes[0x80]!=spanCondition) {
|
||||
limit0=limit;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// lead byte with no trail bytes
|
||||
--limit;
|
||||
if(asciiBytes[0x80]!=spanCondition) {
|
||||
limit0=limit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t t1, t2, t3;
|
||||
|
||||
while(s<limit) {
|
||||
b=*s;
|
||||
if(b<0xc0) {
|
||||
// ASCII; or trail bytes with the result of contains(FFFD).
|
||||
if(spanCondition) {
|
||||
do {
|
||||
if(!asciiBytes[b]) {
|
||||
return s;
|
||||
} else if(++s==limit) {
|
||||
return limit0;
|
||||
}
|
||||
b=*s;
|
||||
} while(b<0xc0);
|
||||
} else {
|
||||
do {
|
||||
if(asciiBytes[b]) {
|
||||
return s;
|
||||
} else if(++s==limit) {
|
||||
return limit0;
|
||||
}
|
||||
b=*s;
|
||||
} while(b<0xc0);
|
||||
}
|
||||
}
|
||||
++s; // Advance past the lead byte.
|
||||
if(b>=0xe0) {
|
||||
if(b<0xf0) {
|
||||
if( /* handle U+0000..U+FFFF inline */
|
||||
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
|
||||
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f
|
||||
) {
|
||||
b&=0xf;
|
||||
uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with this lead byte and middle trail byte
|
||||
// are either in the set or not.
|
||||
if(twoBits!=(uint32_t)spanCondition) {
|
||||
return s-1;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
UChar32 c=(b<<12)|(t1<<6)|t2;
|
||||
if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
|
||||
return s-1;
|
||||
}
|
||||
}
|
||||
s+=2;
|
||||
continue;
|
||||
}
|
||||
} else if( /* handle U+10000..U+10FFFF inline */
|
||||
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
|
||||
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
|
||||
(t3=(uint8_t)(s[2]-0x80)) <= 0x3f
|
||||
) {
|
||||
// Give an illegal sequence the same value as the result of contains(FFFD).
|
||||
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
|
||||
if( ( (0x10000<=c && c<=0x10ffff) ?
|
||||
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
|
||||
asciiBytes[0x80]
|
||||
) != spanCondition
|
||||
) {
|
||||
return s-1;
|
||||
}
|
||||
s+=3;
|
||||
continue;
|
||||
}
|
||||
} else /* 0xc0<=b<0xe0 */ {
|
||||
if( /* handle U+0000..U+07FF inline */
|
||||
(t1=(uint8_t)(*s-0x80)) <= 0x3f
|
||||
) {
|
||||
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
|
||||
return s-1;
|
||||
}
|
||||
++s;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Give an illegal sequence the same value as the result of contains(FFFD).
|
||||
// Handle each byte of an illegal sequence separately to simplify the code;
|
||||
// no need to optimize error handling.
|
||||
if(asciiBytes[0x80]!=spanCondition) {
|
||||
return s-1;
|
||||
}
|
||||
}
|
||||
|
||||
return limit0;
|
||||
}
|
||||
|
||||
/*
|
||||
* While going backwards through UTF-8 optimize only for ASCII.
|
||||
* Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
|
||||
* possible to tell from the last byte in a multi-byte sequence how many
|
||||
* preceding bytes there should be. Therefore, going backwards through UTF-8
|
||||
* is much harder than going forward.
|
||||
*/
|
||||
int32_t
|
||||
BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
|
||||
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
|
||||
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
|
||||
}
|
||||
|
||||
uint8_t b;
|
||||
|
||||
do {
|
||||
b=s[--length];
|
||||
if((int8_t)b>=0) {
|
||||
// ASCII sub-span
|
||||
if(spanCondition) {
|
||||
do {
|
||||
if(!asciiBytes[b]) {
|
||||
return length+1;
|
||||
} else if(length==0) {
|
||||
return 0;
|
||||
}
|
||||
b=s[--length];
|
||||
} while((int8_t)b>=0);
|
||||
} else {
|
||||
do {
|
||||
if(asciiBytes[b]) {
|
||||
return length+1;
|
||||
} else if(length==0) {
|
||||
return 0;
|
||||
}
|
||||
b=s[--length];
|
||||
} while((int8_t)b>=0);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t prev=length;
|
||||
UChar32 c;
|
||||
if(b<0xc0) {
|
||||
// trail byte: collect a multi-byte character
|
||||
c=utf8_prevCharSafeBody(s, 0, &length, b, -1);
|
||||
if(c<0) {
|
||||
c=0xfffd;
|
||||
}
|
||||
} else {
|
||||
// lead byte in last-trail position
|
||||
c=0xfffd;
|
||||
}
|
||||
// c is a valid code point, not ASCII, not a surrogate
|
||||
if(c<=0x7ff) {
|
||||
if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
|
||||
return prev+1;
|
||||
}
|
||||
} else if(c<=0xffff) {
|
||||
int lead=c>>12;
|
||||
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
|
||||
if(twoBits<=1) {
|
||||
// All 64 code points with the same bits 15..6
|
||||
// are either in the set or not.
|
||||
if(twoBits!=(uint32_t)spanCondition) {
|
||||
return prev+1;
|
||||
}
|
||||
} else {
|
||||
// Look up the code point in its 4k block of code points.
|
||||
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
|
||||
return prev+1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
|
||||
return prev+1;
|
||||
}
|
||||
}
|
||||
} while(length>0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
161
source/common/bmpset.h
Normal file
161
source/common/bmpset.h
Normal file
|
@ -0,0 +1,161 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: bmpset.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2007jan29
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __BMPSET_H__
|
||||
#define __BMPSET_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* Helper class for frozen UnicodeSets, implements contains() and span()
|
||||
* optimized for BMP code points. Structured to be UTF-8-friendly.
|
||||
*
|
||||
* ASCII: Look up bytes.
|
||||
* 2-byte characters: Bits organized vertically.
|
||||
* 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
|
||||
* with mixed for illegal ranges.
|
||||
* Supplementary characters: Call contains() on the parent set.
|
||||
*/
|
||||
class BMPSet : public UMemory {
|
||||
public:
|
||||
BMPSet(const int32_t *parentList, int32_t parentListLength);
|
||||
BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength);
|
||||
virtual ~BMPSet();
|
||||
|
||||
virtual UBool contains(UChar32 c) const;
|
||||
|
||||
/*
|
||||
* Span the initial substring for which each character c has spanCondition==contains(c).
|
||||
* It must be s<limit and spanCondition==0 or 1.
|
||||
* @return The string pointer which limits the span.
|
||||
*/
|
||||
const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
|
||||
|
||||
/*
|
||||
* Span the trailing substring for which each character c has spanCondition==contains(c).
|
||||
* It must be s<limit and spanCondition==0 or 1.
|
||||
* @return The string pointer which starts the span.
|
||||
*/
|
||||
const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
|
||||
|
||||
/*
|
||||
* Span the initial substring for which each character c has spanCondition==contains(c).
|
||||
* It must be length>0 and spanCondition==0 or 1.
|
||||
* @return The string pointer which limits the span.
|
||||
*/
|
||||
const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
|
||||
|
||||
/*
|
||||
* Span the trailing substring for which each character c has spanCondition==contains(c).
|
||||
* It must be length>0 and spanCondition==0 or 1.
|
||||
* @return The start of the span.
|
||||
*/
|
||||
int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
|
||||
|
||||
private:
|
||||
void initBits();
|
||||
void overrideIllegal();
|
||||
|
||||
/**
|
||||
* Same as UnicodeSet::findCodePoint(UChar32 c) const except that the
|
||||
* binary search is restricted for finding code points in a certain range.
|
||||
*
|
||||
* For restricting the search for finding in the range start..end,
|
||||
* pass in
|
||||
* lo=findCodePoint(start) and
|
||||
* hi=findCodePoint(end)
|
||||
* with 0<=lo<=hi<len.
|
||||
* findCodePoint(c) defaults to lo=0 and hi=len-1.
|
||||
*
|
||||
* @param c a character in a subrange of MIN_VALUE..MAX_VALUE
|
||||
* @param lo The lowest index to be returned.
|
||||
* @param hi The highest index to be returned.
|
||||
* @return the smallest integer i in the range lo..hi,
|
||||
* inclusive, such that c < list[i]
|
||||
*/
|
||||
int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const;
|
||||
|
||||
inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
|
||||
|
||||
/*
|
||||
* One byte per ASCII character, or trail byte in lead position.
|
||||
* 0 or 1 for ASCII characters.
|
||||
* The value for trail bytes is the result of contains(FFFD)
|
||||
* for faster validity checking at runtime.
|
||||
*/
|
||||
UBool asciiBytes[0xc0];
|
||||
|
||||
/*
|
||||
* One bit per code point from U+0000..U+07FF.
|
||||
* The bits are organized vertically; consecutive code points
|
||||
* correspond to the same bit positions in consecutive table words.
|
||||
* With code point parts
|
||||
* lead=c{10..6}
|
||||
* trail=c{5..0}
|
||||
* it is set.contains(c)==(table7FF[trail] bit lead)
|
||||
*
|
||||
* Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
|
||||
* for faster validity checking at runtime.
|
||||
*/
|
||||
uint32_t table7FF[64];
|
||||
|
||||
/*
|
||||
* One bit per 64 BMP code points.
|
||||
* The bits are organized vertically; consecutive 64-code point blocks
|
||||
* correspond to the same bit position in consecutive table words.
|
||||
* With code point parts
|
||||
* lead=c{15..12}
|
||||
* t1=c{11..6}
|
||||
* test bits (lead+16) and lead in bmpBlockBits[t1].
|
||||
* If the upper bit is 0, then the lower bit indicates if contains(c)
|
||||
* for all code points in the 64-block.
|
||||
* If the upper bit is 1, then the block is mixed and set.contains(c)
|
||||
* must be called.
|
||||
*
|
||||
* Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
|
||||
* the result of contains(FFFD) for faster validity checking at runtime.
|
||||
*/
|
||||
uint32_t bmpBlockBits[64];
|
||||
|
||||
/*
|
||||
* Inversion list indexes for restricted binary searches in
|
||||
* findCodePoint(), from
|
||||
* findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
|
||||
* U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
|
||||
* always looked up in the bit tables.
|
||||
* The last pair of indexes is for finding supplementary code points.
|
||||
*/
|
||||
int32_t list4kStarts[18];
|
||||
|
||||
/*
|
||||
* The inversion list of the parent set, for the slower contains() implementation
|
||||
* for mixed BMP blocks and for supplementary code points.
|
||||
* The list is terminated with list[listLength-1]=0x110000.
|
||||
*/
|
||||
const int32_t *list;
|
||||
int32_t listLength;
|
||||
};
|
||||
|
||||
inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
|
||||
return (UBool)(findCodePoint(c, lo, hi) & 1);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
290
source/common/brkeng.cpp
Normal file
290
source/common/brkeng.cpp
Normal file
|
@ -0,0 +1,290 @@
|
|||
/**
|
||||
************************************************************************************
|
||||
* Copyright (C) 2006-2007, International Business Machines Corporation and others. *
|
||||
* All Rights Reserved. *
|
||||
************************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "brkeng.h"
|
||||
#include "dictbe.h"
|
||||
#include "triedict.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "uvector.h"
|
||||
#include "umutex.h"
|
||||
#include "uresimp.h"
|
||||
#include "ubrkimpl.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
LanguageBreakEngine::LanguageBreakEngine() {
|
||||
}
|
||||
|
||||
LanguageBreakEngine::~LanguageBreakEngine() {
|
||||
}
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
LanguageBreakFactory::LanguageBreakFactory() {
|
||||
}
|
||||
|
||||
LanguageBreakFactory::~LanguageBreakFactory() {
|
||||
}
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
|
||||
for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
|
||||
fHandled[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
UnhandledEngine::~UnhandledEngine() {
|
||||
for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
|
||||
if (fHandled[i] != 0) {
|
||||
delete fHandled[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
|
||||
return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))
|
||||
&& fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
|
||||
}
|
||||
|
||||
int32_t
|
||||
UnhandledEngine::findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
int32_t breakType,
|
||||
UStack &/*foundBreaks*/ ) const {
|
||||
if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
|
||||
UChar32 c = utext_current32(text);
|
||||
if (reverse) {
|
||||
while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
|
||||
c = utext_previous32(text);
|
||||
}
|
||||
}
|
||||
else {
|
||||
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
|
||||
utext_next32(text); // TODO: recast loop to work with post-increment operations.
|
||||
c = utext_current32(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
|
||||
if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
|
||||
if (fHandled[breakType] == 0) {
|
||||
fHandled[breakType] = new UnicodeSet();
|
||||
if (fHandled[breakType] == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!fHandled[breakType]->contains(c)) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// Apply the entire script of the character.
|
||||
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
|
||||
fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
|
||||
fEngines = 0;
|
||||
}
|
||||
|
||||
ICULanguageBreakFactory::~ICULanguageBreakFactory() {
|
||||
if (fEngines != 0) {
|
||||
delete fEngines;
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
U_CDECL_BEGIN
|
||||
static void U_CALLCONV _deleteEngine(void *obj) {
|
||||
delete (const U_NAMESPACE_QUALIFIER LanguageBreakEngine *) obj;
|
||||
}
|
||||
U_CDECL_END
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
const LanguageBreakEngine *
|
||||
ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
|
||||
UBool needsInit;
|
||||
int32_t i;
|
||||
const LanguageBreakEngine *lbe = NULL;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
// TODO: The global mutex should not be used.
|
||||
// The global mutex should only be used for short periods.
|
||||
// A ICULanguageBreakFactory specific mutex should be used.
|
||||
umtx_lock(NULL);
|
||||
needsInit = (UBool)(fEngines == NULL);
|
||||
if (!needsInit) {
|
||||
i = fEngines->size();
|
||||
while (--i >= 0) {
|
||||
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
|
||||
if (lbe != NULL && lbe->handles(c, breakType)) {
|
||||
break;
|
||||
}
|
||||
lbe = NULL;
|
||||
}
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
if (lbe != NULL) {
|
||||
return lbe;
|
||||
}
|
||||
|
||||
if (needsInit) {
|
||||
UStack *engines = new UStack(_deleteEngine, NULL, status);
|
||||
if (U_SUCCESS(status) && engines == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
else if (U_FAILURE(status)) {
|
||||
delete engines;
|
||||
engines = NULL;
|
||||
}
|
||||
else {
|
||||
umtx_lock(NULL);
|
||||
if (fEngines == NULL) {
|
||||
fEngines = engines;
|
||||
engines = NULL;
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
delete engines;
|
||||
}
|
||||
}
|
||||
|
||||
if (fEngines == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// We didn't find an engine the first time through, or there was no
|
||||
// stack. Create an engine.
|
||||
const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType);
|
||||
|
||||
// Now get the lock, and see if someone else has created it in the
|
||||
// meantime
|
||||
umtx_lock(NULL);
|
||||
i = fEngines->size();
|
||||
while (--i >= 0) {
|
||||
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
|
||||
if (lbe != NULL && lbe->handles(c, breakType)) {
|
||||
break;
|
||||
}
|
||||
lbe = NULL;
|
||||
}
|
||||
if (lbe == NULL && newlbe != NULL) {
|
||||
fEngines->push((void *)newlbe, status);
|
||||
lbe = newlbe;
|
||||
newlbe = NULL;
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
delete newlbe;
|
||||
|
||||
return lbe;
|
||||
}
|
||||
|
||||
const LanguageBreakEngine *
|
||||
ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UScriptCode code = uscript_getScript(c, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType);
|
||||
if (dict != NULL) {
|
||||
const LanguageBreakEngine *engine = NULL;
|
||||
switch(code) {
|
||||
case USCRIPT_THAI:
|
||||
engine = new ThaiBreakEngine(dict, status);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (engine == NULL) {
|
||||
delete dict;
|
||||
}
|
||||
else if (U_FAILURE(status)) {
|
||||
delete engine;
|
||||
engine = NULL;
|
||||
}
|
||||
return engine;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const CompactTrieDictionary *
|
||||
ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakType*/) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// Open root from brkitr tree.
|
||||
char dictnbuff[256];
|
||||
char ext[4]={'\0'};
|
||||
|
||||
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
|
||||
b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
|
||||
b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status);
|
||||
int32_t dictnlength = 0;
|
||||
const UChar *dictfname = ures_getString(b, &dictnlength, &status);
|
||||
if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) {
|
||||
dictnlength = 0;
|
||||
status = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
if (U_SUCCESS(status) && dictfname) {
|
||||
UChar* extStart=u_strchr(dictfname, 0x002e);
|
||||
int len = 0;
|
||||
if(extStart!=NULL){
|
||||
len = extStart-dictfname;
|
||||
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
|
||||
u_UCharsToChars(dictfname, dictnbuff, len);
|
||||
}
|
||||
dictnbuff[len]=0; // nul terminate
|
||||
}
|
||||
ures_close(b);
|
||||
UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
const CompactTrieDictionary *dict = new CompactTrieDictionary(
|
||||
file, status);
|
||||
if (U_SUCCESS(status) && dict == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
delete dict;
|
||||
dict = NULL;
|
||||
}
|
||||
return dict;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
292
source/common/brkeng.h
Normal file
292
source/common/brkeng.h
Normal file
|
@ -0,0 +1,292 @@
|
|||
/**
|
||||
************************************************************************************
|
||||
* Copyright (C) 2006-2007, International Business Machines Corporation and others. *
|
||||
* All Rights Reserved. *
|
||||
************************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef BRKENG_H
|
||||
#define BRKENG_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/utext.h"
|
||||
#include "unicode/uscript.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeSet;
|
||||
class UStack;
|
||||
class CompactTrieDictionary;
|
||||
|
||||
/*******************************************************************
|
||||
* LanguageBreakEngine
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>LanguageBreakEngines implement language-specific knowledge for
|
||||
* finding text boundaries within a run of characters belonging to a
|
||||
* specific set. The boundaries will be of a specific kind, e.g. word,
|
||||
* line, etc.</p>
|
||||
*
|
||||
* <p>LanguageBreakEngines should normally be implemented so as to
|
||||
* be shared between threads without locking.</p>
|
||||
*/
|
||||
class LanguageBreakEngine : public UMemory {
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
*/
|
||||
LanguageBreakEngine();
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~LanguageBreakEngine();
|
||||
|
||||
/**
|
||||
* <p>Indicate whether this engine handles a particular character for
|
||||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param breakType The type of text break which the caller wants to determine
|
||||
* @return TRUE if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
*
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left at the end of the run of characters which the engine
|
||||
* is capable of handling.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param reverse Whether the caller is looking for breaks in a reverse
|
||||
* direction.
|
||||
* @param breakType The type of break desired, or -1.
|
||||
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
int32_t breakType,
|
||||
UStack &foundBreaks ) const = 0;
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* LanguageBreakFactory
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>LanguageBreakFactorys find and return a LanguageBreakEngine
|
||||
* that can determine breaks for characters in a specific set, if
|
||||
* such an object can be found.</p>
|
||||
*
|
||||
* <p>If a LanguageBreakFactory is to be shared between threads,
|
||||
* appropriate synchronization must be used; there is none internal
|
||||
* to the factory.</p>
|
||||
*
|
||||
* <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
|
||||
* normally be shared between threads without synchronization, unless
|
||||
* the specific subclass of LanguageBreakFactory indicates otherwise.</p>
|
||||
*
|
||||
* <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
|
||||
* it returns when it itself is deleted, unless the specific subclass of
|
||||
* LanguageBreakFactory indicates otherwise. Naturally, the factory should
|
||||
* not be deleted until the LanguageBreakEngines it has returned are no
|
||||
* longer needed.</p>
|
||||
*/
|
||||
class LanguageBreakFactory : public UMemory {
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
*/
|
||||
LanguageBreakFactory();
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~LanguageBreakFactory();
|
||||
|
||||
/**
|
||||
* <p>Find and return a LanguageBreakEngine that can find the desired
|
||||
* kind of break for the set of characters to which the supplied
|
||||
* character belongs. It is up to the set of available engines to
|
||||
* determine what the sets of characters are.</p>
|
||||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* UnhandledEngine
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
|
||||
* handles characters that no other LanguageBreakEngine is available to
|
||||
* handle. It is told the character and the type of break; at its
|
||||
* discretion it may handle more than the specified character (e.g.,
|
||||
* the entire script to which that character belongs.</p>
|
||||
*
|
||||
* <p>UnhandledEngines may not be shared between threads without
|
||||
* external synchronization.</p>
|
||||
*/
|
||||
|
||||
class UnhandledEngine : public LanguageBreakEngine {
|
||||
private:
|
||||
|
||||
/**
|
||||
* The sets of characters handled, for each break type
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet *fHandled[4];
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
*/
|
||||
UnhandledEngine(UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~UnhandledEngine();
|
||||
|
||||
/**
|
||||
* <p>Indicate whether this engine handles a particular character for
|
||||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param breakType The type of text break which the caller wants to determine
|
||||
* @return TRUE if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles(UChar32 c, int32_t breakType) const;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
*
|
||||
* @param text A UText representing the text (TODO: UText). The
|
||||
* iterator is left at the end of the run of characters which the engine
|
||||
* is capable of handling.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param reverse Whether the caller is looking for breaks in a reverse
|
||||
* direction.
|
||||
* @param breakType The type of break desired, or -1.
|
||||
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
int32_t breakType,
|
||||
UStack &foundBreaks ) const;
|
||||
|
||||
/**
|
||||
* <p>Tell the engine to handle a particular character and break type.</p>
|
||||
*
|
||||
* @param c A character which the engine should handle
|
||||
* @param breakType The type of text break for which the engine should handle c
|
||||
*/
|
||||
virtual void handleCharacter(UChar32 c, int32_t breakType);
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* ICULanguageBreakFactory
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
|
||||
* ICU. It creates dictionary-based LanguageBreakEngines from dictionary
|
||||
* data in the ICU data file.</p>
|
||||
*/
|
||||
class ICULanguageBreakFactory : public LanguageBreakFactory {
|
||||
private:
|
||||
|
||||
/**
|
||||
* The stack of break engines created by this factory
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UStack *fEngines;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Standard constructor.</p>
|
||||
*
|
||||
*/
|
||||
ICULanguageBreakFactory(UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~ICULanguageBreakFactory();
|
||||
|
||||
/**
|
||||
* <p>Find and return a LanguageBreakEngine that can find the desired
|
||||
* kind of break for the set of characters to which the supplied
|
||||
* character belongs. It is up to the set of available engines to
|
||||
* determine what the sets of characters are.</p>
|
||||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* <p>Create a LanguageBreakEngine for the set of characters to which
|
||||
* the supplied character belongs, for the specified break type.</p>
|
||||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
|
||||
|
||||
/**
|
||||
* <p>Create a CompactTrieDictionary for the specified script and break type.</p>
|
||||
*
|
||||
* @param script An ISO 15924 script code that identifies the dictionary to be
|
||||
* created.
|
||||
* @param breakType The kind of text break for which a dictionary is
|
||||
* sought.
|
||||
* @return A CompactTrieDictionary with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const CompactTrieDictionary *loadDictionaryFor(UScriptCode script, int32_t breakType);
|
||||
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* BRKENG_H */
|
||||
#endif
|
462
source/common/brkiter.cpp
Normal file
462
source/common/brkiter.cpp
Normal file
|
@ -0,0 +1,462 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2009, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* File TXTBDRY.CPP
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 02/18/97 aliu Converted from OpenClass. Added DONE.
|
||||
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
||||
// *****************************************************************************
|
||||
// This file was generated from the java source file BreakIterator.java
|
||||
// *****************************************************************************
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/rbbi.h"
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "cstring.h"
|
||||
#include "umutex.h"
|
||||
#include "servloc.h"
|
||||
#include "locbased.h"
|
||||
#include "uresimp.h"
|
||||
#include "uassert.h"
|
||||
#include "ubrkimpl.h"
|
||||
|
||||
// *****************************************************************************
|
||||
// class BreakIterator
|
||||
// This class implements methods for finding the location of boundaries in text.
|
||||
// Instances of BreakIterator maintain a current position and scan over text
|
||||
// returning the index of characters where boundaries occur.
|
||||
// *****************************************************************************
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
BreakIterator*
|
||||
BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
|
||||
{
|
||||
char fnbuff[256];
|
||||
char ext[4]={'\0'};
|
||||
char actualLocale[ULOC_FULLNAME_CAPACITY];
|
||||
int32_t size;
|
||||
const UChar* brkfname = NULL;
|
||||
UResourceBundle brkRulesStack;
|
||||
UResourceBundle brkNameStack;
|
||||
UResourceBundle *brkRules = &brkRulesStack;
|
||||
UResourceBundle *brkName = &brkNameStack;
|
||||
RuleBasedBreakIterator *result = NULL;
|
||||
|
||||
if (U_FAILURE(status))
|
||||
return NULL;
|
||||
|
||||
ures_initStackObject(brkRules);
|
||||
ures_initStackObject(brkName);
|
||||
|
||||
// Get the locale
|
||||
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status);
|
||||
/* this is a hack for now. Should be fixed when the data is fetched from
|
||||
brk_index.txt */
|
||||
if(status==U_USING_DEFAULT_WARNING){
|
||||
status=U_ZERO_ERROR;
|
||||
ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status);
|
||||
}
|
||||
|
||||
// Get the "boundaries" array.
|
||||
if (U_SUCCESS(status)) {
|
||||
brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
|
||||
// Get the string object naming the rules file
|
||||
brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
|
||||
// Get the actual string
|
||||
brkfname = ures_getString(brkName, &size, &status);
|
||||
U_ASSERT((size_t)size<sizeof(fnbuff));
|
||||
if ((size_t)size>=sizeof(fnbuff)) {
|
||||
size=0;
|
||||
if (U_SUCCESS(status)) {
|
||||
status = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
// Use the string if we found it
|
||||
if (U_SUCCESS(status) && brkfname) {
|
||||
uprv_strncpy(actualLocale,
|
||||
ures_getLocale(brkName, &status),
|
||||
sizeof(actualLocale)/sizeof(actualLocale[0]));
|
||||
|
||||
UChar* extStart=u_strchr(brkfname, 0x002e);
|
||||
int len = 0;
|
||||
if(extStart!=NULL){
|
||||
len = extStart-brkfname;
|
||||
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
|
||||
u_UCharsToChars(brkfname, fnbuff, len);
|
||||
}
|
||||
fnbuff[len]=0; // nul terminate
|
||||
}
|
||||
}
|
||||
|
||||
ures_close(brkRules);
|
||||
ures_close(brkName);
|
||||
|
||||
UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
ures_close(b);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Create a RuleBasedBreakIterator
|
||||
result = new RuleBasedBreakIterator(file, status);
|
||||
|
||||
// If there is a result, set the valid locale and actual locale, and the kind
|
||||
if (U_SUCCESS(status) && result != NULL) {
|
||||
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
|
||||
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale);
|
||||
result->setBreakType(kind);
|
||||
}
|
||||
|
||||
ures_close(b);
|
||||
|
||||
if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple
|
||||
delete result;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (result == NULL) {
|
||||
udata_close(file);
|
||||
if (U_SUCCESS(status)) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Creates a break iterator for word breaks.
|
||||
BreakIterator* U_EXPORT2
|
||||
BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
return createInstance(key, UBRK_WORD, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a break iterator for line breaks.
|
||||
BreakIterator* U_EXPORT2
|
||||
BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
return createInstance(key, UBRK_LINE, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a break iterator for character breaks.
|
||||
BreakIterator* U_EXPORT2
|
||||
BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
return createInstance(key, UBRK_CHARACTER, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a break iterator for sentence breaks.
|
||||
BreakIterator* U_EXPORT2
|
||||
BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
return createInstance(key, UBRK_SENTENCE, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Creates a break iterator for title casing breaks.
|
||||
BreakIterator* U_EXPORT2
|
||||
BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
|
||||
{
|
||||
return createInstance(key, UBRK_TITLE, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
// Gets all the available locales that has localized text boundary data.
|
||||
const Locale* U_EXPORT2
|
||||
BreakIterator::getAvailableLocales(int32_t& count)
|
||||
{
|
||||
return Locale::getAvailableLocales(count);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Gets the objectLocale display name in the default locale language.
|
||||
UnicodeString& U_EXPORT2
|
||||
BreakIterator::getDisplayName(const Locale& objectLocale,
|
||||
UnicodeString& name)
|
||||
{
|
||||
return objectLocale.getDisplayName(name);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Gets the objectLocale display name in the displayLocale language.
|
||||
UnicodeString& U_EXPORT2
|
||||
BreakIterator::getDisplayName(const Locale& objectLocale,
|
||||
const Locale& displayLocale,
|
||||
UnicodeString& name)
|
||||
{
|
||||
return objectLocale.getDisplayName(displayLocale, name);
|
||||
}
|
||||
|
||||
// ------------------------------------------
|
||||
//
|
||||
// Default constructor and destructor
|
||||
//
|
||||
//-------------------------------------------
|
||||
|
||||
BreakIterator::BreakIterator()
|
||||
{
|
||||
fBufferClone = FALSE;
|
||||
*validLocale = *actualLocale = 0;
|
||||
}
|
||||
|
||||
BreakIterator::~BreakIterator()
|
||||
{
|
||||
}
|
||||
|
||||
// ------------------------------------------
|
||||
//
|
||||
// Registration
|
||||
//
|
||||
//-------------------------------------------
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
|
||||
protected:
|
||||
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
|
||||
return BreakIterator::makeInstance(loc, kind, status);
|
||||
}
|
||||
};
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
class ICUBreakIteratorService : public ICULocaleService {
|
||||
public:
|
||||
ICUBreakIteratorService()
|
||||
: ICULocaleService(UNICODE_STRING("Break Iterator", 14))
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
registerFactory(new ICUBreakIteratorFactory(), status);
|
||||
}
|
||||
|
||||
virtual UObject* cloneInstance(UObject* instance) const {
|
||||
return ((BreakIterator*)instance)->clone();
|
||||
}
|
||||
|
||||
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
|
||||
LocaleKey& lkey = (LocaleKey&)key;
|
||||
int32_t kind = lkey.kind();
|
||||
Locale loc;
|
||||
lkey.currentLocale(loc);
|
||||
return BreakIterator::makeInstance(loc, kind, status);
|
||||
}
|
||||
|
||||
virtual UBool isDefault() const {
|
||||
return countFactories() == 1;
|
||||
}
|
||||
};
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// defined in ucln_cmn.h
|
||||
|
||||
static U_NAMESPACE_QUALIFIER ICULocaleService* gService = NULL;
|
||||
|
||||
/**
|
||||
* Release all static memory held by breakiterator.
|
||||
*/
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV breakiterator_cleanup(void) {
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
if (gService) {
|
||||
delete gService;
|
||||
gService = NULL;
|
||||
}
|
||||
#endif
|
||||
return TRUE;
|
||||
}
|
||||
U_CDECL_END
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
static ICULocaleService*
|
||||
getService(void)
|
||||
{
|
||||
UBool needsInit;
|
||||
UMTX_CHECK(NULL, (UBool)(gService == NULL), needsInit);
|
||||
|
||||
if (needsInit) {
|
||||
ICULocaleService *tService = new ICUBreakIteratorService();
|
||||
umtx_lock(NULL);
|
||||
if (gService == NULL) {
|
||||
gService = tService;
|
||||
tService = NULL;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
delete tService;
|
||||
}
|
||||
return gService;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
static inline UBool
|
||||
hasService(void)
|
||||
{
|
||||
UBool retVal;
|
||||
UMTX_CHECK(NULL, gService != NULL, retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
URegistryKey U_EXPORT2
|
||||
BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
|
||||
{
|
||||
ICULocaleService *service = getService();
|
||||
if (service == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
return service->registerInstance(toAdopt, locale, kind, status);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
UBool U_EXPORT2
|
||||
BreakIterator::unregister(URegistryKey key, UErrorCode& status)
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
if (hasService()) {
|
||||
return gService->unregister(key, status);
|
||||
}
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
StringEnumeration* U_EXPORT2
|
||||
BreakIterator::getAvailableLocales(void)
|
||||
{
|
||||
ICULocaleService *service = getService();
|
||||
if (service == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
return service->getAvailableLocales();
|
||||
}
|
||||
#endif /* UCONFIG_NO_SERVICE */
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
BreakIterator*
|
||||
BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
if (hasService()) {
|
||||
Locale actualLoc("");
|
||||
BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
|
||||
// TODO: The way the service code works in ICU 2.8 is that if
|
||||
// there is a real registered break iterator, the actualLoc
|
||||
// will be populated, but if the handleDefault path is taken
|
||||
// (because nothing is registered that can handle the
|
||||
// requested locale) then the actualLoc comes back empty. In
|
||||
// that case, the returned object already has its actual/valid
|
||||
// locale data populated (by makeInstance, which is what
|
||||
// handleDefault calls), so we don't touch it. YES, A COMMENT
|
||||
// THIS LONG is a sign of bad code -- so the action item is to
|
||||
// revisit this in ICU 3.0 and clean it up/fix it/remove it.
|
||||
if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
|
||||
U_LOCALE_BASED(locBased, *result);
|
||||
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
return makeInstance(loc, kind, status);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
BreakIterator*
|
||||
BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
||||
{
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
BreakIterator *result = NULL;
|
||||
switch (kind) {
|
||||
case UBRK_CHARACTER:
|
||||
result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
|
||||
break;
|
||||
case UBRK_WORD:
|
||||
result = BreakIterator::buildInstance(loc, "word", kind, status);
|
||||
break;
|
||||
case UBRK_LINE:
|
||||
result = BreakIterator::buildInstance(loc, "line", kind, status);
|
||||
break;
|
||||
case UBRK_SENTENCE:
|
||||
result = BreakIterator::buildInstance(loc, "sentence", kind, status);
|
||||
break;
|
||||
case UBRK_TITLE:
|
||||
result = BreakIterator::buildInstance(loc, "title", kind, status);
|
||||
break;
|
||||
default:
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Locale
|
||||
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
|
||||
U_LOCALE_BASED(locBased, *this);
|
||||
return locBased.getLocale(type, status);
|
||||
}
|
||||
|
||||
const char *
|
||||
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
|
||||
U_LOCALE_BASED(locBased, *this);
|
||||
return locBased.getLocaleID(type, status);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
//eof
|
65
source/common/bytestream.cpp
Normal file
65
source/common/bytestream.cpp
Normal file
|
@ -0,0 +1,65 @@
|
|||
// Copyright (C) 2009, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// Copyright 2007 Google Inc. All Rights Reserved.
|
||||
// Author: sanjay@google.com (Sanjay Ghemawat)
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestream.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
char* ByteSink::GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t /*desired_capacity_hint*/,
|
||||
char* scratch, int32_t scratch_capacity,
|
||||
int32_t* result_capacity) {
|
||||
if (min_capacity < 1 || scratch_capacity < min_capacity) {
|
||||
*result_capacity = 0;
|
||||
return NULL;
|
||||
}
|
||||
*result_capacity = scratch_capacity;
|
||||
return scratch;
|
||||
}
|
||||
|
||||
void ByteSink::Flush() {}
|
||||
|
||||
CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
|
||||
: outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity), size_(0), overflowed_(false) {
|
||||
}
|
||||
|
||||
void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
|
||||
if (n <= 0) {
|
||||
return;
|
||||
}
|
||||
int32_t available = capacity_ - size_;
|
||||
if (n > available) {
|
||||
n = available;
|
||||
overflowed_ = true;
|
||||
}
|
||||
if (n > 0 && bytes != (outbuf_ + size_)) {
|
||||
uprv_memcpy(outbuf_ + size_, bytes, n);
|
||||
}
|
||||
size_ += n;
|
||||
}
|
||||
|
||||
char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t /*desired_capacity_hint*/,
|
||||
char* scratch,
|
||||
int32_t scratch_capacity,
|
||||
int32_t* result_capacity) {
|
||||
if (min_capacity < 1 || scratch_capacity < min_capacity) {
|
||||
*result_capacity = 0;
|
||||
return NULL;
|
||||
}
|
||||
int32_t available = capacity_ - size_;
|
||||
if (available >= min_capacity) {
|
||||
*result_capacity = available;
|
||||
return outbuf_ + size_;
|
||||
} else {
|
||||
*result_capacity = scratch_capacity;
|
||||
return scratch;
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
611
source/common/caniter.cpp
Normal file
611
source/common/caniter.cpp
Normal file
|
@ -0,0 +1,611 @@
|
|||
/*
|
||||
*****************************************************************************
|
||||
* Copyright (C) 1996-2006, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "hash.h"
|
||||
#include "unormimp.h"
|
||||
#include "unicode/caniter.h"
|
||||
#include "unicode/normlzr.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
/**
|
||||
* This class allows one to iterate through all the strings that are canonically equivalent to a given
|
||||
* string. For example, here are some sample results:
|
||||
Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
1: \u0041\u030A\u0064\u0307\u0327
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
2: \u0041\u030A\u0064\u0327\u0307
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
|
||||
3: \u0041\u030A\u1E0B\u0327
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
|
||||
4: \u0041\u030A\u1E11\u0307
|
||||
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
|
||||
5: \u00C5\u0064\u0307\u0327
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
6: \u00C5\u0064\u0327\u0307
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
|
||||
7: \u00C5\u1E0B\u0327
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
|
||||
8: \u00C5\u1E11\u0307
|
||||
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
|
||||
9: \u212B\u0064\u0307\u0327
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
|
||||
10: \u212B\u0064\u0327\u0307
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
|
||||
11: \u212B\u1E0B\u0327
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
|
||||
12: \u212B\u1E11\u0307
|
||||
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
|
||||
*<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
|
||||
* since it has not been optimized for that situation.
|
||||
*@author M. Davis
|
||||
*@draft
|
||||
*/
|
||||
|
||||
// public
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// TODO: add boilerplate methods.
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
|
||||
|
||||
/**
|
||||
*@param source string to get results for
|
||||
*/
|
||||
CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
|
||||
pieces(NULL),
|
||||
pieces_length(0),
|
||||
pieces_lengths(NULL),
|
||||
current(NULL),
|
||||
current_length(0)
|
||||
{
|
||||
if(U_SUCCESS(status)) {
|
||||
setSource(sourceStr, status);
|
||||
}
|
||||
}
|
||||
|
||||
CanonicalIterator::~CanonicalIterator() {
|
||||
cleanPieces();
|
||||
}
|
||||
|
||||
void CanonicalIterator::cleanPieces() {
|
||||
int32_t i = 0;
|
||||
if(pieces != NULL) {
|
||||
for(i = 0; i < pieces_length; i++) {
|
||||
if(pieces[i] != NULL) {
|
||||
delete[] pieces[i];
|
||||
}
|
||||
}
|
||||
uprv_free(pieces);
|
||||
pieces = NULL;
|
||||
pieces_length = 0;
|
||||
}
|
||||
if(pieces_lengths != NULL) {
|
||||
uprv_free(pieces_lengths);
|
||||
pieces_lengths = NULL;
|
||||
}
|
||||
if(current != NULL) {
|
||||
uprv_free(current);
|
||||
current = NULL;
|
||||
current_length = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*@return gets the source: NOTE: it is the NFD form of source
|
||||
*/
|
||||
UnicodeString CanonicalIterator::getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the iterator so that one can start again from the beginning.
|
||||
*/
|
||||
void CanonicalIterator::reset() {
|
||||
done = FALSE;
|
||||
for (int i = 0; i < current_length; ++i) {
|
||||
current[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*@return the next string that is canonically equivalent. The value null is returned when
|
||||
* the iteration is done.
|
||||
*/
|
||||
UnicodeString CanonicalIterator::next() {
|
||||
int32_t i = 0;
|
||||
|
||||
if (done) {
|
||||
buffer.setToBogus();
|
||||
return buffer;
|
||||
}
|
||||
|
||||
// delete old contents
|
||||
buffer.remove();
|
||||
|
||||
// construct return value
|
||||
|
||||
for (i = 0; i < pieces_length; ++i) {
|
||||
buffer.append(pieces[i][current[i]]);
|
||||
}
|
||||
//String result = buffer.toString(); // not needed
|
||||
|
||||
// find next value for next time
|
||||
|
||||
for (i = current_length - 1; ; --i) {
|
||||
if (i < 0) {
|
||||
done = TRUE;
|
||||
break;
|
||||
}
|
||||
current[i]++;
|
||||
if (current[i] < pieces_lengths[i]) break; // got sequence
|
||||
current[i] = 0;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
*@param set the source string to iterate against. This allows the same iterator to be used
|
||||
* while changing the source string, saving object creation.
|
||||
*/
|
||||
void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) {
|
||||
int32_t list_length = 0;
|
||||
UChar32 cp = 0;
|
||||
int32_t start = 0;
|
||||
int32_t i = 0;
|
||||
UnicodeString *list = NULL;
|
||||
|
||||
Normalizer::normalize(newSource, UNORM_NFD, 0, source, status);
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
done = FALSE;
|
||||
|
||||
cleanPieces();
|
||||
|
||||
// catch degenerate case
|
||||
if (newSource.length() == 0) {
|
||||
pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
|
||||
pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
|
||||
pieces_length = 1;
|
||||
current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
|
||||
current_length = 1;
|
||||
if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CleanPartialInitialization;
|
||||
}
|
||||
current[0] = 0;
|
||||
pieces[0] = new UnicodeString[1];
|
||||
pieces_lengths[0] = 1;
|
||||
if (pieces[0] == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CleanPartialInitialization;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
list = new UnicodeString[source.length()];
|
||||
if (list == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CleanPartialInitialization;
|
||||
}
|
||||
|
||||
// i should initialy be the number of code units at the
|
||||
// start of the string
|
||||
i = UTF16_CHAR_LENGTH(source.char32At(0));
|
||||
//int32_t i = 1;
|
||||
// find the segments
|
||||
// This code iterates through the source string and
|
||||
// extracts segments that end up on a codepoint that
|
||||
// doesn't start any decompositions. (Analysis is done
|
||||
// on the NFD form - see above).
|
||||
for (; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) {
|
||||
cp = source.char32At(i);
|
||||
if (unorm_isCanonSafeStart(cp)) {
|
||||
source.extract(start, i-start, list[list_length++]); // add up to i
|
||||
start = i;
|
||||
}
|
||||
}
|
||||
source.extract(start, i-start, list[list_length++]); // add last one
|
||||
|
||||
|
||||
// allocate the arrays, and find the strings that are CE to each segment
|
||||
pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
|
||||
pieces_length = list_length;
|
||||
pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
|
||||
current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
|
||||
current_length = list_length;
|
||||
if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto CleanPartialInitialization;
|
||||
}
|
||||
|
||||
for (i = 0; i < current_length; i++) {
|
||||
current[i] = 0;
|
||||
}
|
||||
// for each segment, get all the combinations that can produce
|
||||
// it after NFD normalization
|
||||
for (i = 0; i < pieces_length; ++i) {
|
||||
//if (PROGRESS) printf("SEGMENT\n");
|
||||
pieces[i] = getEquivalents(list[i], pieces_lengths[i], status);
|
||||
}
|
||||
|
||||
delete[] list;
|
||||
return;
|
||||
// Common section to cleanup all local variables and reset object variables.
|
||||
CleanPartialInitialization:
|
||||
if (list != NULL) {
|
||||
delete[] list;
|
||||
}
|
||||
cleanPieces();
|
||||
}
|
||||
|
||||
/**
|
||||
* Dumb recursive implementation of permutation.
|
||||
* TODO: optimize
|
||||
* @param source the string to find permutations for
|
||||
* @return the results in a set.
|
||||
*/
|
||||
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
|
||||
int32_t i = 0;
|
||||
|
||||
// optimization:
|
||||
// if zero or one character, just return a set with it
|
||||
// we check for length < 2 to keep from counting code points all the time
|
||||
if (source.length() <= 2 && source.countChar32() <= 1) {
|
||||
UnicodeString *toPut = new UnicodeString(source);
|
||||
/* test for NULL */
|
||||
if (toPut == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
result->put(source, toPut, status);
|
||||
return;
|
||||
}
|
||||
|
||||
// otherwise iterate through the string, and recursively permute all the other characters
|
||||
UChar32 cp;
|
||||
Hashtable subpermute(status);
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
subpermute.setValueDeleter(uhash_deleteUnicodeString);
|
||||
|
||||
for (i = 0; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) {
|
||||
cp = source.char32At(i);
|
||||
const UHashElement *ne = NULL;
|
||||
int32_t el = -1;
|
||||
UnicodeString subPermuteString = source;
|
||||
|
||||
// optimization:
|
||||
// if the character is canonical combining class zero,
|
||||
// don't permute it
|
||||
if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
|
||||
//System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
|
||||
continue;
|
||||
}
|
||||
|
||||
subpermute.removeAll();
|
||||
|
||||
// see what the permutations of the characters before and after this one are
|
||||
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
|
||||
permute(subPermuteString.replace(i, UTF16_CHAR_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status);
|
||||
/* Test for buffer overflows */
|
||||
if(U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
// The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents
|
||||
// of source at this point.
|
||||
|
||||
// prefix this character to all of them
|
||||
ne = subpermute.nextElement(el);
|
||||
while (ne != NULL) {
|
||||
UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
|
||||
UnicodeString *chStr = new UnicodeString(cp);
|
||||
//test for NULL
|
||||
if (chStr == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
|
||||
//if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr));
|
||||
result->put(*chStr, chStr, status);
|
||||
ne = subpermute.nextElement(el);
|
||||
}
|
||||
}
|
||||
//return result;
|
||||
}
|
||||
|
||||
// privates
|
||||
|
||||
// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
|
||||
UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) {
|
||||
Hashtable result(status);
|
||||
Hashtable permutations(status);
|
||||
Hashtable basic(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return 0;
|
||||
}
|
||||
result.setValueDeleter(uhash_deleteUnicodeString);
|
||||
permutations.setValueDeleter(uhash_deleteUnicodeString);
|
||||
basic.setValueDeleter(uhash_deleteUnicodeString);
|
||||
|
||||
UChar USeg[256];
|
||||
int32_t segLen = segment.extract(USeg, 256, status);
|
||||
getEquivalents2(&basic, USeg, segLen, status);
|
||||
|
||||
// now get all the permutations
|
||||
// add only the ones that are canonically equivalent
|
||||
// TODO: optimize by not permuting any class zero.
|
||||
|
||||
const UHashElement *ne = NULL;
|
||||
int32_t el = -1;
|
||||
//Iterator it = basic.iterator();
|
||||
ne = basic.nextElement(el);
|
||||
//while (it.hasNext())
|
||||
while (ne != NULL) {
|
||||
//String item = (String) it.next();
|
||||
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
|
||||
|
||||
permutations.removeAll();
|
||||
permute(item, CANITER_SKIP_ZEROES, &permutations, status);
|
||||
const UHashElement *ne2 = NULL;
|
||||
int32_t el2 = -1;
|
||||
//Iterator it2 = permutations.iterator();
|
||||
ne2 = permutations.nextElement(el2);
|
||||
//while (it2.hasNext())
|
||||
while (ne2 != NULL) {
|
||||
//String possible = (String) it2.next();
|
||||
//UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
|
||||
UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
|
||||
UnicodeString attempt;
|
||||
Normalizer::normalize(possible, UNORM_NFD, 0, attempt, status);
|
||||
|
||||
// TODO: check if operator == is semanticaly the same as attempt.equals(segment)
|
||||
if (attempt==segment) {
|
||||
//if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible)));
|
||||
// TODO: use the hashtable just to catch duplicates - store strings directly (somehow).
|
||||
result.put(possible, new UnicodeString(possible), status); //add(possible);
|
||||
} else {
|
||||
//if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible)));
|
||||
}
|
||||
|
||||
ne2 = permutations.nextElement(el2);
|
||||
}
|
||||
ne = basic.nextElement(el);
|
||||
}
|
||||
|
||||
/* Test for buffer overflows */
|
||||
if(U_FAILURE(status)) {
|
||||
return 0;
|
||||
}
|
||||
// convert into a String[] to clean up storage
|
||||
//String[] finalResult = new String[result.size()];
|
||||
UnicodeString *finalResult = NULL;
|
||||
int32_t resultCount;
|
||||
if((resultCount = result.count())) {
|
||||
finalResult = new UnicodeString[resultCount];
|
||||
if (finalResult == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
//result.toArray(finalResult);
|
||||
result_len = 0;
|
||||
el = -1;
|
||||
ne = result.nextElement(el);
|
||||
while(ne != NULL) {
|
||||
finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
|
||||
ne = result.nextElement(el);
|
||||
}
|
||||
|
||||
|
||||
return finalResult;
|
||||
}
|
||||
|
||||
Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));
|
||||
|
||||
UnicodeString toPut(segment, segLen);
|
||||
|
||||
fillinResult->put(toPut, new UnicodeString(toPut), status);
|
||||
|
||||
USerializedSet starts;
|
||||
|
||||
// cycle through all the characters
|
||||
UChar32 cp, end = 0;
|
||||
int32_t i = 0, j;
|
||||
for (i = 0; i < segLen; i += UTF16_CHAR_LENGTH(cp)) {
|
||||
// see if any character is at the start of some decomposition
|
||||
UTF_GET_CHAR(segment, 0, i, segLen, cp);
|
||||
if (!unorm_getCanonStartSet(cp, &starts)) {
|
||||
continue;
|
||||
}
|
||||
// if so, see which decompositions match
|
||||
for(j = 0, cp = end+1; cp <= end || uset_getSerializedRange(&starts, j++, &cp, &end); ++cp) {
|
||||
Hashtable remainder(status);
|
||||
remainder.setValueDeleter(uhash_deleteUnicodeString);
|
||||
if (extract(&remainder, cp, segment, segLen, i, status) == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// there were some matches, so add all the possibilities to the set.
|
||||
UnicodeString prefix(segment, i);
|
||||
prefix += cp;
|
||||
|
||||
int32_t el = -1;
|
||||
const UHashElement *ne = remainder.nextElement(el);
|
||||
while (ne != NULL) {
|
||||
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
|
||||
UnicodeString *toAdd = new UnicodeString(prefix);
|
||||
/* test for NULL */
|
||||
if (toAdd == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
*toAdd += item;
|
||||
fillinResult->put(*toAdd, toAdd, status);
|
||||
|
||||
//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));
|
||||
|
||||
ne = remainder.nextElement(el);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Test for buffer overflows */
|
||||
if(U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
return fillinResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* See if the decomposition of cp2 is at segment starting at segmentPos
|
||||
* (with canonical rearrangment!)
|
||||
* If so, take the remainder, and return the equivalents
|
||||
*/
|
||||
Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
|
||||
//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
|
||||
//if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
|
||||
//if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const int32_t bufSize = 256;
|
||||
int32_t bufLen = 0;
|
||||
UChar temp[bufSize];
|
||||
|
||||
int32_t inputLen = 0, decompLen;
|
||||
UChar stackBuffer[4];
|
||||
const UChar *decomp;
|
||||
|
||||
U16_APPEND_UNSAFE(temp, inputLen, comp);
|
||||
decomp = unorm_getCanonicalDecomposition(comp, stackBuffer, &decompLen);
|
||||
if(decomp == NULL) {
|
||||
/* copy temp */
|
||||
stackBuffer[0] = temp[0];
|
||||
if(inputLen > 1) {
|
||||
stackBuffer[1] = temp[1];
|
||||
}
|
||||
decomp = stackBuffer;
|
||||
decompLen = inputLen;
|
||||
}
|
||||
|
||||
UChar *buff = temp+inputLen;
|
||||
|
||||
// See if it matches the start of segment (at segmentPos)
|
||||
UBool ok = FALSE;
|
||||
UChar32 cp;
|
||||
int32_t decompPos = 0;
|
||||
UChar32 decompCp;
|
||||
UTF_NEXT_CHAR(decomp, decompPos, decompLen, decompCp);
|
||||
|
||||
int32_t i;
|
||||
UBool overflow = FALSE;
|
||||
|
||||
i = segmentPos;
|
||||
while(i < segLen) {
|
||||
UTF_NEXT_CHAR(segment, i, segLen, cp);
|
||||
|
||||
if (cp == decompCp) { // if equal, eat another cp from decomp
|
||||
|
||||
//if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp))));
|
||||
|
||||
if (decompPos == decompLen) { // done, have all decomp characters!
|
||||
//u_strcat(buff+bufLen, segment+i);
|
||||
uprv_memcpy(buff+bufLen, segment+i, (segLen-i)*sizeof(UChar));
|
||||
bufLen+=segLen-i;
|
||||
|
||||
ok = TRUE;
|
||||
break;
|
||||
}
|
||||
UTF_NEXT_CHAR(decomp, decompPos, decompLen, decompCp);
|
||||
} else {
|
||||
//if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp))));
|
||||
|
||||
// brute force approach
|
||||
|
||||
U16_APPEND(buff, bufLen, bufSize, cp, overflow);
|
||||
|
||||
if(overflow) {
|
||||
/*
|
||||
* ### TODO handle buffer overflow
|
||||
* The buffer is large, but an overflow may still happen with
|
||||
* unusual input (many combining marks?).
|
||||
* Reallocate buffer and continue.
|
||||
* markus 20020929
|
||||
*/
|
||||
|
||||
overflow = FALSE;
|
||||
}
|
||||
|
||||
/* TODO: optimize
|
||||
// since we know that the classes are monotonically increasing, after zero
|
||||
// e.g. 0 5 7 9 0 3
|
||||
// we can do an optimization
|
||||
// there are only a few cases that work: zero, less, same, greater
|
||||
// if both classes are the same, we fail
|
||||
// if the decomp class < the segment class, we fail
|
||||
|
||||
segClass = getClass(cp);
|
||||
if (decompClass <= segClass) return null;
|
||||
*/
|
||||
}
|
||||
}
|
||||
if (!ok)
|
||||
return NULL; // we failed, characters left over
|
||||
|
||||
//if (PROGRESS) printf("Matches\n");
|
||||
|
||||
if (bufLen == 0) {
|
||||
fillinResult->put(UnicodeString(), new UnicodeString(), status);
|
||||
return fillinResult; // succeed, but no remainder
|
||||
}
|
||||
|
||||
// brute force approach
|
||||
// check to make sure result is canonically equivalent
|
||||
int32_t tempLen = inputLen + bufLen;
|
||||
|
||||
UChar trial[bufSize];
|
||||
unorm_decompose(trial, bufSize, temp, tempLen, FALSE, 0, &status);
|
||||
|
||||
if(U_FAILURE(status)
|
||||
|| uprv_memcmp(segment+segmentPos, trial, (segLen - segmentPos)*sizeof(UChar)) != 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return getEquivalents2(fillinResult, buff, bufLen, status);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
96
source/common/chariter.cpp
Normal file
96
source/common/chariter.cpp
Normal file
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/chariter.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
ForwardCharacterIterator::~ForwardCharacterIterator() {}
|
||||
ForwardCharacterIterator::ForwardCharacterIterator()
|
||||
: UObject()
|
||||
{}
|
||||
ForwardCharacterIterator::ForwardCharacterIterator(const ForwardCharacterIterator &other)
|
||||
: UObject(other)
|
||||
{}
|
||||
|
||||
|
||||
CharacterIterator::CharacterIterator()
|
||||
: textLength(0), pos(0), begin(0), end(0) {
|
||||
}
|
||||
|
||||
CharacterIterator::CharacterIterator(int32_t length)
|
||||
: textLength(length), pos(0), begin(0), end(length) {
|
||||
if(textLength < 0) {
|
||||
textLength = end = 0;
|
||||
}
|
||||
}
|
||||
|
||||
CharacterIterator::CharacterIterator(int32_t length, int32_t position)
|
||||
: textLength(length), pos(position), begin(0), end(length) {
|
||||
if(textLength < 0) {
|
||||
textLength = end = 0;
|
||||
}
|
||||
if(pos < 0) {
|
||||
pos = 0;
|
||||
} else if(pos > end) {
|
||||
pos = end;
|
||||
}
|
||||
}
|
||||
|
||||
CharacterIterator::CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position)
|
||||
: textLength(length), pos(position), begin(textBegin), end(textEnd) {
|
||||
if(textLength < 0) {
|
||||
textLength = 0;
|
||||
}
|
||||
if(begin < 0) {
|
||||
begin = 0;
|
||||
} else if(begin > textLength) {
|
||||
begin = textLength;
|
||||
}
|
||||
if(end < begin) {
|
||||
end = begin;
|
||||
} else if(end > textLength) {
|
||||
end = textLength;
|
||||
}
|
||||
if(pos < begin) {
|
||||
pos = begin;
|
||||
} else if(pos > end) {
|
||||
pos = end;
|
||||
}
|
||||
}
|
||||
|
||||
CharacterIterator::CharacterIterator(const CharacterIterator &that) :
|
||||
ForwardCharacterIterator(that),
|
||||
textLength(that.textLength), pos(that.pos), begin(that.begin), end(that.end)
|
||||
{
|
||||
}
|
||||
|
||||
CharacterIterator &
|
||||
CharacterIterator::operator=(const CharacterIterator &that) {
|
||||
ForwardCharacterIterator::operator=(that);
|
||||
textLength = that.textLength;
|
||||
pos = that.pos;
|
||||
begin = that.begin;
|
||||
end = that.end;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// implementing first[32]PostInc() directly in a subclass should be faster
|
||||
// but these implementations make subclassing a little easier
|
||||
UChar
|
||||
CharacterIterator::firstPostInc(void) {
|
||||
setToStart();
|
||||
return nextPostInc();
|
||||
}
|
||||
|
||||
UChar32
|
||||
CharacterIterator::first32PostInc(void) {
|
||||
setToStart();
|
||||
return next32PostInc();
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
88
source/common/charstr.h
Normal file
88
source/common/charstr.h
Normal file
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2001-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 11/19/2001 aliu Creation.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CHARSTRING_H
|
||||
#define CHARSTRING_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
// class CharString
|
||||
//
|
||||
// This is a tiny wrapper class that is used internally to make a
|
||||
// UnicodeString look like a const char*. It can be allocated on the
|
||||
// stack. It only creates a heap buffer if it needs to.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class U_COMMON_API CharString : public UMemory {
|
||||
public:
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
// Constructor
|
||||
// @param str The unicode string to be converted to char *
|
||||
// @param codepage The char * code page. "" for invariant conversion.
|
||||
// NULL for default code page.
|
||||
// inline CharString(const UnicodeString& str, const char *codepage);
|
||||
#endif
|
||||
|
||||
inline CharString(const UnicodeString& str);
|
||||
inline ~CharString();
|
||||
inline operator const char*() const { return ptr; }
|
||||
|
||||
private:
|
||||
char buf[128];
|
||||
char* ptr;
|
||||
|
||||
CharString(const CharString &other); // forbid copying of this class
|
||||
CharString &operator=(const CharString &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
#if !UCONFIG_NO_CONVERSION
|
||||
|
||||
// PLEASE DON'T USE THIS FUNCTION.
|
||||
// We don't want the static dependency on conversion or the performance hit that comes from a codepage conversion.
|
||||
/*
|
||||
inline CharString::CharString(const UnicodeString& str, const char *codepage) {
|
||||
int32_t len;
|
||||
ptr = buf;
|
||||
len = str.extract(0, 0x7FFFFFFF, buf ,sizeof(buf)-1, codepage);
|
||||
if (len >= (int32_t)(sizeof(buf)-1)) {
|
||||
ptr = (char *)uprv_malloc(len+1);
|
||||
str.extract(0, 0x7FFFFFFF, ptr, len+1, codepage);
|
||||
}
|
||||
}*/
|
||||
|
||||
#endif
|
||||
|
||||
inline CharString::CharString(const UnicodeString& str) {
|
||||
int32_t len;
|
||||
ptr = buf;
|
||||
len = str.extract(0, 0x7FFFFFFF, buf, (int32_t)(sizeof(buf)-1), US_INV);
|
||||
if (len >= (int32_t)(sizeof(buf)-1)) {
|
||||
ptr = (char *)uprv_malloc(len+1);
|
||||
str.extract(0, 0x7FFFFFFF, ptr, len+1, US_INV);
|
||||
}
|
||||
}
|
||||
|
||||
inline CharString::~CharString() {
|
||||
if (ptr != buf) {
|
||||
uprv_free(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
//eof
|
124
source/common/cmemory.c
Normal file
124
source/common/cmemory.c
Normal file
|
@ -0,0 +1,124 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File cmemory.c ICU Heap allocation.
|
||||
* All ICU heap allocation, both for C and C++ new of ICU
|
||||
* class types, comes through these functions.
|
||||
*
|
||||
* If you have a need to replace ICU allocation, this is the
|
||||
* place to do it.
|
||||
*
|
||||
* Note that uprv_malloc(0) returns a non-NULL pointer, and
|
||||
* that a subsequent free of that pointer value is a NOP.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
#include "unicode/uclean.h"
|
||||
#include "cmemory.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
/* uprv_malloc(0) returns a pointer to this read-only data. */
|
||||
static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0};
|
||||
|
||||
/* Function Pointers for user-supplied heap functions */
|
||||
static const void *pContext;
|
||||
static UMemAllocFn *pAlloc;
|
||||
static UMemReallocFn *pRealloc;
|
||||
static UMemFreeFn *pFree;
|
||||
|
||||
/* Flag indicating whether any heap allocations have happened.
|
||||
* Used to prevent changing out the heap functions after allocations have been made */
|
||||
static UBool gHeapInUse;
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uprv_malloc(size_t s) {
|
||||
if (s > 0) {
|
||||
gHeapInUse = TRUE;
|
||||
if (pAlloc) {
|
||||
return (*pAlloc)(pContext, s);
|
||||
} else {
|
||||
return malloc(s);
|
||||
}
|
||||
} else {
|
||||
return (void *)zeroMem;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uprv_realloc(void * buffer, size_t size) {
|
||||
if (buffer == zeroMem) {
|
||||
return uprv_malloc(size);
|
||||
} else if (size == 0) {
|
||||
if (pFree) {
|
||||
(*pFree)(pContext, buffer);
|
||||
} else {
|
||||
free(buffer);
|
||||
}
|
||||
return (void *)zeroMem;
|
||||
} else {
|
||||
gHeapInUse = TRUE;
|
||||
if (pRealloc) {
|
||||
return (*pRealloc)(pContext, buffer, size);
|
||||
} else {
|
||||
return realloc(buffer, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_free(void *buffer) {
|
||||
if (buffer != zeroMem) {
|
||||
if (pFree) {
|
||||
(*pFree)(pContext, buffer);
|
||||
} else {
|
||||
free(buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status)
|
||||
{
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
if (a==NULL || r==NULL || f==NULL) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
if (gHeapInUse) {
|
||||
*status = U_INVALID_STATE_ERROR;
|
||||
return;
|
||||
}
|
||||
pContext = context;
|
||||
pAlloc = a;
|
||||
pRealloc = r;
|
||||
pFree = f;
|
||||
}
|
||||
|
||||
|
||||
U_CFUNC UBool cmemory_cleanup(void) {
|
||||
pContext = NULL;
|
||||
pAlloc = NULL;
|
||||
pRealloc = NULL;
|
||||
pFree = NULL;
|
||||
gHeapInUse = FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* gHeapInUse
|
||||
* Return True if ICU has allocated any memory.
|
||||
* Used by u_SetMutexFunctions() and similar to verify that ICU has not
|
||||
* been used, that it is in a pristine initial state.
|
||||
*/
|
||||
U_CFUNC UBool cmemory_inUse() {
|
||||
return gHeapInUse;
|
||||
}
|
||||
|
94
source/common/cmemory.h
Normal file
94
source/common/cmemory.h
Normal file
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File CMEMORY.H
|
||||
*
|
||||
* Contains stdlib.h/string.h memory functions
|
||||
*
|
||||
* @author Bertrand A. Damiba
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 6/20/98 Bertrand Created.
|
||||
* 05/03/99 stephen Changed from functions to macros.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CMEMORY_H
|
||||
#define CMEMORY_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
|
||||
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
|
||||
#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
|
||||
#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uprv_malloc(size_t s);
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uprv_realloc(void *mem, size_t size);
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_free(void *mem);
|
||||
|
||||
/**
|
||||
* This should align the memory properly on any machine.
|
||||
* This is very useful for the safeClone functions.
|
||||
*/
|
||||
typedef union {
|
||||
long t1;
|
||||
double t2;
|
||||
void *t3;
|
||||
} UAlignedMemory;
|
||||
|
||||
/**
|
||||
* Get the least significant bits of a pointer (a memory address).
|
||||
* For example, with a mask of 3, the macro gets the 2 least significant bits,
|
||||
* which will be 0 if the pointer is 32-bit (4-byte) aligned.
|
||||
*
|
||||
* ptrdiff_t is the most appropriate integer type to cast to.
|
||||
* size_t should work too, since on most (or all?) platforms it has the same
|
||||
* width as ptrdiff_t.
|
||||
*/
|
||||
#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
|
||||
|
||||
/**
|
||||
* Get the amount of bytes that a pointer is off by from
|
||||
* the previous UAlignedMemory-aligned pointer.
|
||||
*/
|
||||
#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
|
||||
|
||||
/**
|
||||
* Get the amount of bytes to add to a pointer
|
||||
* in order to get the next UAlignedMemory-aligned address.
|
||||
*/
|
||||
#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
|
||||
|
||||
/**
|
||||
* Indicate whether the ICU allocation functions have been used.
|
||||
* This is used to determine whether ICU is in an initial, unused state.
|
||||
*/
|
||||
U_CFUNC UBool
|
||||
cmemory_inUse(void);
|
||||
|
||||
/**
|
||||
* Heap clean up function, called from u_cleanup()
|
||||
* Clears any user heap functions from u_setMemoryFunctions()
|
||||
* Does NOT deallocate any remaining allocated memory.
|
||||
*/
|
||||
U_CFUNC UBool
|
||||
cmemory_cleanup(void);
|
||||
|
||||
#endif
|
108
source/common/common.rc
Normal file
108
source/common/common.rc
Normal file
|
@ -0,0 +1,108 @@
|
|||
// Do not edit with Microsoft Developer Studio Resource Editor.
|
||||
// It will permanently substitute version numbers that are intended to be
|
||||
// picked up by the pre-processor during each build.
|
||||
// Copyright (c) 2001-2009 International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
#include "msvcres.h"
|
||||
|
||||
#define APSTUDIO_READONLY_SYMBOLS
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Generated from the TEXTINCLUDE 2 resource.
|
||||
//
|
||||
#include <winresrc.h>
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#undef APSTUDIO_READONLY_SYMBOLS
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
|
||||
LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL
|
||||
#pragma code_page(1252)
|
||||
|
||||
#ifdef APSTUDIO_INVOKED
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// TEXTINCLUDE
|
||||
//
|
||||
|
||||
1 TEXTINCLUDE
|
||||
BEGIN
|
||||
"msvcres.h\0"
|
||||
END
|
||||
|
||||
2 TEXTINCLUDE
|
||||
BEGIN
|
||||
"#include <winresrc.h>\0"
|
||||
END
|
||||
|
||||
3 TEXTINCLUDE
|
||||
BEGIN
|
||||
"\r\n"
|
||||
"\0"
|
||||
END
|
||||
|
||||
#endif // APSTUDIO_INVOKED
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Version
|
||||
//
|
||||
#define STR(s) #s
|
||||
#define CommaVersionString(a, b, c, d) STR(a) ", " STR(b) ", " STR(c) ", " STR(d) "\0"
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM
|
||||
PRODUCTVERSION U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
#else
|
||||
FILEFLAGS 0x0L
|
||||
#endif
|
||||
FILEOS VOS__WINDOWS32
|
||||
FILETYPE VFT_DLL
|
||||
FILESUBTYPE 0x0L
|
||||
BEGIN
|
||||
BLOCK "StringFileInfo"
|
||||
BEGIN
|
||||
BLOCK "00000000"
|
||||
BEGIN
|
||||
VALUE "Comments", ICU_WEBSITE "\0"
|
||||
VALUE "CompanyName", "IBM Corporation and others\0"
|
||||
VALUE "FileDescription", "IBM ICU Common DLL\0"
|
||||
VALUE "FileVersion", CommaVersionString(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM)
|
||||
VALUE "LegalCopyright", U_COPYRIGHT_STRING "\0"
|
||||
#ifdef _DEBUG
|
||||
VALUE "OriginalFilename", "icuuc" U_ICU_VERSION_SHORT "d.dll\0"
|
||||
#else
|
||||
VALUE "OriginalFilename", "icuuc" U_ICU_VERSION_SHORT ".dll\0"
|
||||
#endif
|
||||
VALUE "PrivateBuild", "\0"
|
||||
VALUE "ProductName", "International Components for Unicode\0"
|
||||
VALUE "ProductVersion", CommaVersionString(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM)
|
||||
VALUE "SpecialBuild", "\0"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
BEGIN
|
||||
VALUE "Translation", 0x000, 0000
|
||||
END
|
||||
END
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
#ifndef APSTUDIO_INVOKED
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Generated from the TEXTINCLUDE 3 resource.
|
||||
//
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#endif // not APSTUDIO_INVOKED
|
||||
|
4444
source/common/common.vcproj
Normal file
4444
source/common/common.vcproj
Normal file
File diff suppressed because it is too large
Load diff
84
source/common/cpputils.h
Normal file
84
source/common/cpputils.h
Normal file
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: cpputils.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*/
|
||||
|
||||
#ifndef CPPUTILS_H
|
||||
#define CPPUTILS_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Array copy utility functions */
|
||||
/*==========================================================================*/
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const double* src, double* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const double* src, int32_t srcStart,
|
||||
double* dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart,
|
||||
int8_t* dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart,
|
||||
int16_t* dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,
|
||||
int32_t* dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
static
|
||||
inline void
|
||||
uprv_arrayCopy(const UChar *src, int32_t srcStart,
|
||||
UChar *dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
/**
|
||||
* Copy an array of UnicodeString OBJECTS (not pointers).
|
||||
* @internal
|
||||
*/
|
||||
static inline void
|
||||
uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count)
|
||||
{ while(count-- > 0) *dst++ = *src++; }
|
||||
|
||||
/**
|
||||
* Copy an array of UnicodeString OBJECTS (not pointers).
|
||||
* @internal
|
||||
*/
|
||||
static inline void
|
||||
uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart,
|
||||
U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)
|
||||
{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
|
||||
|
||||
#endif /* _CPPUTILS */
|
328
source/common/cstring.c
Normal file
328
source/common/cstring.c
Normal file
|
@ -0,0 +1,328 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File CSTRING.C
|
||||
*
|
||||
* @author Helena Shih
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 6/18/98 hshih Created
|
||||
* 09/08/98 stephen Added include for ctype, for Mac Port
|
||||
* 11/15/99 helena Integrated S/390 IEEE changes.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uassert.h"
|
||||
|
||||
/*
|
||||
* We hardcode case conversion for invariant characters to match our expectation
|
||||
* and the compiler execution charset.
|
||||
* This prevents problems on systems
|
||||
* - with non-default casing behavior, like Turkish system locales where
|
||||
* tolower('I') maps to dotless i and toupper('i') maps to dotted I
|
||||
* - where there are no lowercase Latin characters at all, or using different
|
||||
* codes (some old EBCDIC codepages)
|
||||
*
|
||||
* This works because the compiler usually runs on a platform where the execution
|
||||
* charset includes all of the invariant characters at their expected
|
||||
* code positions, so that the char * string literals in ICU code match
|
||||
* the char literals here.
|
||||
*
|
||||
* Note that the set of lowercase Latin letters is discontiguous in EBCDIC
|
||||
* and the set of uppercase Latin letters is discontiguous as well.
|
||||
*/
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_toupper(char c) {
|
||||
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
|
||||
c=(char)(c+('A'-'a'));
|
||||
}
|
||||
#else
|
||||
if('a'<=c && c<='z') {
|
||||
c=(char)(c+('A'-'a'));
|
||||
}
|
||||
#endif
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Commented out because cstring.h defines uprv_tolower() to be
|
||||
* the same as either uprv_asciitolower() or uprv_ebcdictolower()
|
||||
* to reduce the amount of code to cover with tests.
|
||||
*
|
||||
* Note that this uprv_tolower() definition is likely to work for most
|
||||
* charset families, not just ASCII and EBCDIC, because its #else branch
|
||||
* is written generically.
|
||||
*/
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_tolower(char c) {
|
||||
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
|
||||
c=(char)(c+('a'-'A'));
|
||||
}
|
||||
#else
|
||||
if('A'<=c && c<='Z') {
|
||||
c=(char)(c+('a'-'A'));
|
||||
}
|
||||
#endif
|
||||
return c;
|
||||
}
|
||||
#endif
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_asciitolower(char c) {
|
||||
if(0x41<=c && c<=0x5a) {
|
||||
c=(char)(c+0x20);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_ebcdictolower(char c) {
|
||||
if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
|
||||
(0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
|
||||
(0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
|
||||
) {
|
||||
c=(char)(c-0x40);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
T_CString_toLowerCase(char* str)
|
||||
{
|
||||
char* origPtr = str;
|
||||
|
||||
if (str) {
|
||||
do
|
||||
*str = (char)uprv_tolower(*str);
|
||||
while (*(str++));
|
||||
}
|
||||
|
||||
return origPtr;
|
||||
}
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
T_CString_toUpperCase(char* str)
|
||||
{
|
||||
char* origPtr = str;
|
||||
|
||||
if (str) {
|
||||
do
|
||||
*str = (char)uprv_toupper(*str);
|
||||
while (*(str++));
|
||||
}
|
||||
|
||||
return origPtr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Takes a int32_t and fills in a char* string with that number "radix"-based.
|
||||
* Does not handle negative values (makes an empty string for them).
|
||||
* Writes at most 12 chars ("-2147483647" plus NUL).
|
||||
* Returns the length of the string (not including the NUL).
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
|
||||
{
|
||||
char tbuf[30];
|
||||
int32_t tbx = sizeof(tbuf);
|
||||
uint8_t digit;
|
||||
int32_t length = 0;
|
||||
uint32_t uval;
|
||||
|
||||
U_ASSERT(radix>=2 && radix<=16);
|
||||
uval = (uint32_t) v;
|
||||
if(v<0 && radix == 10) {
|
||||
/* Only in base 10 do we conside numbers to be signed. */
|
||||
uval = (uint32_t)(-v);
|
||||
buffer[length++] = '-';
|
||||
}
|
||||
|
||||
tbx = sizeof(tbuf)-1;
|
||||
tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
|
||||
do {
|
||||
digit = (uint8_t)(uval % radix);
|
||||
tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
|
||||
uval = uval / radix;
|
||||
} while (uval != 0);
|
||||
|
||||
/* copy converted number into user buffer */
|
||||
uprv_strcpy(buffer+length, tbuf+tbx);
|
||||
length += sizeof(tbuf) - tbx -1;
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Takes a int64_t and fills in a char* string with that number "radix"-based.
|
||||
* Writes at most 21: chars ("-9223372036854775807" plus NUL).
|
||||
* Returns the length of the string, not including the terminating NULL.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
|
||||
{
|
||||
char tbuf[30];
|
||||
int32_t tbx = sizeof(tbuf);
|
||||
uint8_t digit;
|
||||
int32_t length = 0;
|
||||
uint64_t uval;
|
||||
|
||||
U_ASSERT(radix>=2 && radix<=16);
|
||||
uval = (uint64_t) v;
|
||||
if(v<0 && radix == 10) {
|
||||
/* Only in base 10 do we conside numbers to be signed. */
|
||||
uval = (uint64_t)(-v);
|
||||
buffer[length++] = '-';
|
||||
}
|
||||
|
||||
tbx = sizeof(tbuf)-1;
|
||||
tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
|
||||
do {
|
||||
digit = (uint8_t)(uval % radix);
|
||||
tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
|
||||
uval = uval / radix;
|
||||
} while (uval != 0);
|
||||
|
||||
/* copy converted number into user buffer */
|
||||
uprv_strcpy(buffer+length, tbuf+tbx);
|
||||
length += sizeof(tbuf) - tbx -1;
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_stringToInteger(const char *integerString, int32_t radix)
|
||||
{
|
||||
char *end;
|
||||
return uprv_strtoul(integerString, &end, radix);
|
||||
|
||||
}
|
||||
|
||||
U_CAPI int U_EXPORT2
|
||||
T_CString_stricmp(const char *str1, const char *str2) {
|
||||
if(str1==NULL) {
|
||||
if(str2==NULL) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if(str2==NULL) {
|
||||
return 1;
|
||||
} else {
|
||||
/* compare non-NULL strings lexically with lowercase */
|
||||
int rc;
|
||||
unsigned char c1, c2;
|
||||
|
||||
for(;;) {
|
||||
c1=(unsigned char)*str1;
|
||||
c2=(unsigned char)*str2;
|
||||
if(c1==0) {
|
||||
if(c2==0) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if(c2==0) {
|
||||
return 1;
|
||||
} else {
|
||||
/* compare non-zero characters with lowercase */
|
||||
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
++str1;
|
||||
++str2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int U_EXPORT2
|
||||
T_CString_strnicmp(const char *str1, const char *str2, uint32_t n) {
|
||||
if(str1==NULL) {
|
||||
if(str2==NULL) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if(str2==NULL) {
|
||||
return 1;
|
||||
} else {
|
||||
/* compare non-NULL strings lexically with lowercase */
|
||||
int rc;
|
||||
unsigned char c1, c2;
|
||||
|
||||
for(; n--;) {
|
||||
c1=(unsigned char)*str1;
|
||||
c2=(unsigned char)*str2;
|
||||
if(c1==0) {
|
||||
if(c2==0) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if(c2==0) {
|
||||
return 1;
|
||||
} else {
|
||||
/* compare non-zero characters with lowercase */
|
||||
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
++str1;
|
||||
++str2;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
uprv_strdup(const char *src) {
|
||||
size_t len = uprv_strlen(src) + 1;
|
||||
char *dup = (char *) uprv_malloc(len);
|
||||
|
||||
if (dup) {
|
||||
uprv_memcpy(dup, src, len);
|
||||
}
|
||||
|
||||
return dup;
|
||||
}
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
uprv_strndup(const char *src, int32_t n) {
|
||||
char *dup;
|
||||
|
||||
if(n < 0) {
|
||||
dup = uprv_strdup(src);
|
||||
} else {
|
||||
dup = (char*)uprv_malloc(n+1);
|
||||
if (dup) {
|
||||
uprv_memcpy(dup, src, n);
|
||||
dup[n] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return dup;
|
||||
}
|
120
source/common/cstring.h
Normal file
120
source/common/cstring.h
Normal file
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File CSTRING.H
|
||||
*
|
||||
* Contains CString interface
|
||||
*
|
||||
* @author Helena Shih
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 6/17/98 hshih Created.
|
||||
* 05/03/99 stephen Changed from functions to macros.
|
||||
* 06/14/99 stephen Added icu_strncat, icu_strncmp, icu_tolower
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CSTRING_H
|
||||
#define CSTRING_H 1
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE strcpy(dst, src)
|
||||
#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
|
||||
#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str)
|
||||
#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2)
|
||||
#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
|
||||
#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src)
|
||||
#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
|
||||
#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
|
||||
#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
|
||||
#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_toupper(char c);
|
||||
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_asciitolower(char c);
|
||||
|
||||
U_CAPI char U_EXPORT2
|
||||
uprv_ebcdictolower(char c);
|
||||
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define uprv_tolower uprv_asciitolower
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
# define uprv_tolower uprv_ebcdictolower
|
||||
#else
|
||||
# error U_CHARSET_FAMILY is not valid
|
||||
#endif
|
||||
|
||||
#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end)
|
||||
#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base)
|
||||
#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base)
|
||||
#ifdef U_WINDOWS
|
||||
# if defined(__BORLANDC__)
|
||||
# define uprv_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE stricmp(str1, str2)
|
||||
# define uprv_strnicmp(str1, str2, n) U_STANDARD_CPP_NAMESPACE strnicmp(str1, str2, n)
|
||||
# else
|
||||
# define uprv_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE _stricmp(str1, str2)
|
||||
# define uprv_strnicmp(str1, str2, n) U_STANDARD_CPP_NAMESPACE _strnicmp(str1, str2, n)
|
||||
# endif
|
||||
#elif defined(POSIX)
|
||||
# define uprv_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE strcasecmp(str1, str2)
|
||||
# define uprv_strnicmp(str1, str2, n) U_STANDARD_CPP_NAMESPACE strncasecmp(str1, str2, n)
|
||||
#else
|
||||
# define uprv_stricmp(str1, str2) T_CString_stricmp(str1, str2)
|
||||
# define uprv_strnicmp(str1, str2, n) T_CString_strnicmp(str1, str2, n)
|
||||
#endif
|
||||
|
||||
/* Conversion from a digit to the character with radix base from 2-19 */
|
||||
/* May need to use U_UPPER_ORDINAL*/
|
||||
#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10))
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
uprv_strdup(const char *src);
|
||||
|
||||
/**
|
||||
* uprv_malloc n+1 bytes, and copy n bytes from src into the new string.
|
||||
* Terminate with a null at offset n. If n is -1, works like uprv_strdup
|
||||
* @param src
|
||||
* @param n length of the input string, not including null.
|
||||
* @return new string (owned by caller, use uprv_free to free).
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI char* U_EXPORT2
|
||||
uprv_strndup(const char *src, int32_t n);
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
T_CString_toLowerCase(char* str);
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
T_CString_toUpperCase(char* str);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_integerToString(char *buffer, int32_t n, int32_t radix);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
T_CString_stringToInteger(const char *integerString, int32_t radix);
|
||||
|
||||
U_CAPI int U_EXPORT2
|
||||
T_CString_stricmp(const char *str1, const char *str2);
|
||||
|
||||
U_CAPI int U_EXPORT2
|
||||
T_CString_strnicmp(const char *str1, const char *str2, uint32_t n);
|
||||
|
||||
#endif /* ! CSTRING_H */
|
53
source/common/cwchar.c
Normal file
53
source/common/cwchar.c
Normal file
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: cwchar.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2001may25
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !U_HAVE_WCSCPY
|
||||
|
||||
#include "cwchar.h"
|
||||
|
||||
U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) {
|
||||
wchar_t *start=dst;
|
||||
while(*dst!=0) {
|
||||
++dst;
|
||||
}
|
||||
while((*dst=*src)!=0) {
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) {
|
||||
wchar_t *start=dst;
|
||||
while((*dst=*src)!=0) {
|
||||
++dst;
|
||||
++src;
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
U_CAPI size_t uprv_wcslen(const wchar_t *src) {
|
||||
const wchar_t *start=src;
|
||||
while(*src!=0) {
|
||||
++src;
|
||||
}
|
||||
return src-start;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
56
source/common/cwchar.h
Normal file
56
source/common/cwchar.h
Normal file
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: cwchar.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2001may25
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* This file contains ICU-internal definitions of wchar_t operations.
|
||||
* These definitions were moved here from cstring.h so that fewer
|
||||
* ICU implementation files include wchar.h.
|
||||
*/
|
||||
|
||||
#ifndef __CWCHAR_H__
|
||||
#define __CWCHAR_H__
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */
|
||||
#if U_HAVE_WCHAR_H
|
||||
# include <wchar.h>
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Wide-character functions */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* The following are not available on all systems, defined in wchar.h or string.h. */
|
||||
#if U_HAVE_WCSCPY
|
||||
# define uprv_wcscpy wcscpy
|
||||
# define uprv_wcscat wcscat
|
||||
# define uprv_wcslen wcslen
|
||||
#else
|
||||
U_CAPI wchar_t* U_EXPORT2
|
||||
uprv_wcscpy(wchar_t *dst, const wchar_t *src);
|
||||
U_CAPI wchar_t* U_EXPORT2
|
||||
uprv_wcscat(wchar_t *dst, const wchar_t *src);
|
||||
U_CAPI size_t U_EXPORT2
|
||||
uprv_wcslen(const wchar_t *src);
|
||||
#endif
|
||||
|
||||
/* The following are part of the ANSI C standard, defined in stdlib.h . */
|
||||
#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count)
|
||||
#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count)
|
||||
|
||||
|
||||
#endif
|
427
source/common/dictbe.cpp
Normal file
427
source/common/dictbe.cpp
Normal file
|
@ -0,0 +1,427 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2006-2008, International Business Machines Corporation and others. *
|
||||
* All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "brkeng.h"
|
||||
#include "dictbe.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "uvector.h"
|
||||
#include "triedict.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
/*DictionaryBreakEngine::DictionaryBreakEngine() {
|
||||
fTypes = 0;
|
||||
}*/
|
||||
|
||||
DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) {
|
||||
fTypes = breakTypes;
|
||||
}
|
||||
|
||||
DictionaryBreakEngine::~DictionaryBreakEngine() {
|
||||
}
|
||||
|
||||
UBool
|
||||
DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const {
|
||||
return (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)
|
||||
&& fSet.contains(c));
|
||||
}
|
||||
|
||||
int32_t
|
||||
DictionaryBreakEngine::findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
int32_t breakType,
|
||||
UStack &foundBreaks ) const {
|
||||
int32_t result = 0;
|
||||
|
||||
// Find the span of characters included in the set.
|
||||
int32_t start = (int32_t)utext_getNativeIndex(text);
|
||||
int32_t current;
|
||||
int32_t rangeStart;
|
||||
int32_t rangeEnd;
|
||||
UChar32 c = utext_current32(text);
|
||||
if (reverse) {
|
||||
UBool isDict = fSet.contains(c);
|
||||
while((current = (int32_t)utext_getNativeIndex(text)) > startPos && isDict) {
|
||||
c = utext_previous32(text);
|
||||
isDict = fSet.contains(c);
|
||||
}
|
||||
rangeStart = (current < startPos) ? startPos : current+(isDict ? 0 : 1);
|
||||
rangeEnd = start + 1;
|
||||
}
|
||||
else {
|
||||
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
|
||||
utext_next32(text); // TODO: recast loop for postincrement
|
||||
c = utext_current32(text);
|
||||
}
|
||||
rangeStart = start;
|
||||
rangeEnd = current;
|
||||
}
|
||||
if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
|
||||
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
|
||||
utext_setNativeIndex(text, current);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
|
||||
fSet = set;
|
||||
// Compact for caching
|
||||
fSet.compact();
|
||||
}
|
||||
|
||||
/*void
|
||||
DictionaryBreakEngine::setBreakTypes( uint32_t breakTypes ) {
|
||||
fTypes = breakTypes;
|
||||
}*/
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
|
||||
// Helper class for improving readability of the Thai word break
|
||||
// algorithm. The implementation is completely inline.
|
||||
|
||||
// List size, limited by the maximum number of words in the dictionary
|
||||
// that form a nested sequence.
|
||||
#define POSSIBLE_WORD_LIST_MAX 20
|
||||
|
||||
class PossibleWord {
|
||||
private:
|
||||
// list of word candidate lengths, in increasing length order
|
||||
int32_t lengths[POSSIBLE_WORD_LIST_MAX];
|
||||
int count; // Count of candidates
|
||||
int32_t prefix; // The longest match with a dictionary word
|
||||
int32_t offset; // Offset in the text of these candidates
|
||||
int mark; // The preferred candidate's offset
|
||||
int current; // The candidate we're currently looking at
|
||||
|
||||
public:
|
||||
PossibleWord();
|
||||
~PossibleWord();
|
||||
|
||||
// Fill the list of candidates if needed, select the longest, and return the number found
|
||||
int candidates( UText *text, const TrieWordDictionary *dict, int32_t rangeEnd );
|
||||
|
||||
// Select the currently marked candidate, point after it in the text, and invalidate self
|
||||
int32_t acceptMarked( UText *text );
|
||||
|
||||
// Back up from the current candidate to the next shorter one; return TRUE if that exists
|
||||
// and point the text after it
|
||||
UBool backUp( UText *text );
|
||||
|
||||
// Return the longest prefix this candidate location shares with a dictionary word
|
||||
int32_t longestPrefix();
|
||||
|
||||
// Mark the current candidate as the one we like
|
||||
void markCurrent();
|
||||
};
|
||||
|
||||
inline
|
||||
PossibleWord::PossibleWord() {
|
||||
offset = -1;
|
||||
}
|
||||
|
||||
inline
|
||||
PossibleWord::~PossibleWord() {
|
||||
}
|
||||
|
||||
inline int
|
||||
PossibleWord::candidates( UText *text, const TrieWordDictionary *dict, int32_t rangeEnd ) {
|
||||
// TODO: If getIndex is too slow, use offset < 0 and add discardAll()
|
||||
int32_t start = (int32_t)utext_getNativeIndex(text);
|
||||
if (start != offset) {
|
||||
offset = start;
|
||||
prefix = dict->matches(text, rangeEnd-start, lengths, count, sizeof(lengths)/sizeof(lengths[0]));
|
||||
// Dictionary leaves text after longest prefix, not longest word. Back up.
|
||||
if (count <= 0) {
|
||||
utext_setNativeIndex(text, start);
|
||||
}
|
||||
}
|
||||
if (count > 0) {
|
||||
utext_setNativeIndex(text, start+lengths[count-1]);
|
||||
}
|
||||
current = count-1;
|
||||
mark = current;
|
||||
return count;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
PossibleWord::acceptMarked( UText *text ) {
|
||||
utext_setNativeIndex(text, offset + lengths[mark]);
|
||||
return lengths[mark];
|
||||
}
|
||||
|
||||
inline UBool
|
||||
PossibleWord::backUp( UText *text ) {
|
||||
if (current > 0) {
|
||||
utext_setNativeIndex(text, offset + lengths[--current]);
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
inline int32_t
|
||||
PossibleWord::longestPrefix() {
|
||||
return prefix;
|
||||
}
|
||||
|
||||
inline void
|
||||
PossibleWord::markCurrent() {
|
||||
mark = current;
|
||||
}
|
||||
|
||||
// How many words in a row are "good enough"?
|
||||
#define THAI_LOOKAHEAD 3
|
||||
|
||||
// Will not combine a non-word with a preceding dictionary word longer than this
|
||||
#define THAI_ROOT_COMBINE_THRESHOLD 3
|
||||
|
||||
// Will not combine a non-word that shares at least this much prefix with a
|
||||
// dictionary word, with a preceding word
|
||||
#define THAI_PREFIX_COMBINE_THRESHOLD 3
|
||||
|
||||
// Ellision character
|
||||
#define THAI_PAIYANNOI 0x0E2F
|
||||
|
||||
// Repeat character
|
||||
#define THAI_MAIYAMOK 0x0E46
|
||||
|
||||
// Minimum word size
|
||||
#define THAI_MIN_WORD 2
|
||||
|
||||
// Minimum number of characters for two words
|
||||
#define THAI_MIN_WORD_SPAN (THAI_MIN_WORD * 2)
|
||||
|
||||
ThaiBreakEngine::ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status)
|
||||
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
|
||||
fDictionary(adoptDictionary)
|
||||
{
|
||||
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
setCharacters(fThaiWordSet);
|
||||
}
|
||||
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
|
||||
fMarkSet.add(0x0020);
|
||||
fEndWordSet = fThaiWordSet;
|
||||
fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
|
||||
fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
|
||||
fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
|
||||
fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
|
||||
fSuffixSet.add(THAI_PAIYANNOI);
|
||||
fSuffixSet.add(THAI_MAIYAMOK);
|
||||
|
||||
// Compact for caching.
|
||||
fMarkSet.compact();
|
||||
fEndWordSet.compact();
|
||||
fBeginWordSet.compact();
|
||||
fSuffixSet.compact();
|
||||
}
|
||||
|
||||
ThaiBreakEngine::~ThaiBreakEngine() {
|
||||
delete fDictionary;
|
||||
}
|
||||
|
||||
int32_t
|
||||
ThaiBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const {
|
||||
if ((rangeEnd - rangeStart) < THAI_MIN_WORD_SPAN) {
|
||||
return 0; // Not enough characters for two words
|
||||
}
|
||||
|
||||
uint32_t wordsFound = 0;
|
||||
int32_t wordLength;
|
||||
int32_t current;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
PossibleWord words[THAI_LOOKAHEAD];
|
||||
UChar32 uc;
|
||||
|
||||
utext_setNativeIndex(text, rangeStart);
|
||||
|
||||
while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
|
||||
wordLength = 0;
|
||||
|
||||
// Look for candidate words at the current position
|
||||
int candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
|
||||
|
||||
// If we found exactly one, use that
|
||||
if (candidates == 1) {
|
||||
wordLength = words[wordsFound%THAI_LOOKAHEAD].acceptMarked(text);
|
||||
wordsFound += 1;
|
||||
}
|
||||
|
||||
// If there was more than one, see which one can take us forward the most words
|
||||
else if (candidates > 1) {
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int wordsMatched = 1;
|
||||
if (words[(wordsFound+1)%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
goto foundBest;
|
||||
}
|
||||
|
||||
// See if any of the possible second words is followed by a third word
|
||||
do {
|
||||
// If we find a third word, stop right away
|
||||
if (words[(wordsFound+2)%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
|
||||
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
|
||||
goto foundBest;
|
||||
}
|
||||
}
|
||||
while (words[(wordsFound+1)%THAI_LOOKAHEAD].backUp(text));
|
||||
}
|
||||
}
|
||||
while (words[wordsFound%THAI_LOOKAHEAD].backUp(text));
|
||||
foundBest:
|
||||
wordLength = words[wordsFound%THAI_LOOKAHEAD].acceptMarked(text);
|
||||
wordsFound += 1;
|
||||
}
|
||||
|
||||
// We come here after having either found a word or not. We look ahead to the
|
||||
// next word. If it's not a dictionary word, we will combine it withe the word we
|
||||
// just found (if there is one), but only if the preceding word does not exceed
|
||||
// the threshold.
|
||||
// The text iterator should now be positioned at the end of the word we found.
|
||||
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength < THAI_ROOT_COMBINE_THRESHOLD) {
|
||||
// if it is a dictionary word, do nothing. If it isn't, then if there is
|
||||
// no preceding word, or the non-word shares less than the minimum threshold
|
||||
// of characters with a dictionary word, then scan to resynchronize
|
||||
if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
|
||||
&& (wordLength == 0
|
||||
|| words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) {
|
||||
// Look for a plausible word boundary
|
||||
//TODO: This section will need a rework for UText.
|
||||
int32_t remaining = rangeEnd - (current+wordLength);
|
||||
UChar32 pc = utext_current32(text);
|
||||
int32_t chars = 0;
|
||||
for (;;) {
|
||||
utext_next32(text);
|
||||
uc = utext_current32(text);
|
||||
// TODO: Here we're counting on the fact that the SA languages are all
|
||||
// in the BMP. This should get fixed with the UText rework.
|
||||
chars += 1;
|
||||
if (--remaining <= 0) {
|
||||
break;
|
||||
}
|
||||
if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
|
||||
// Maybe. See if it's in the dictionary.
|
||||
// NOTE: In the original Apple code, checked that the next
|
||||
// two characters after uc were not 0x0E4C THANTHAKHAT before
|
||||
// checking the dictionary. That is just a performance filter,
|
||||
// but it's not clear it's faster than checking the trie.
|
||||
int candidates = words[(wordsFound+1)%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
|
||||
utext_setNativeIndex(text, current+wordLength+chars);
|
||||
if (candidates > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
pc = uc;
|
||||
}
|
||||
|
||||
// Bump the word count if there wasn't already one
|
||||
if (wordLength <= 0) {
|
||||
wordsFound += 1;
|
||||
}
|
||||
|
||||
// Update the length with the passed-over characters
|
||||
wordLength += chars;
|
||||
}
|
||||
else {
|
||||
// Back up to where we were for next iteration
|
||||
utext_setNativeIndex(text, current+wordLength);
|
||||
}
|
||||
}
|
||||
|
||||
// Never stop before a combining mark.
|
||||
int32_t currPos;
|
||||
while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
|
||||
utext_next32(text);
|
||||
wordLength += (int32_t)utext_getNativeIndex(text) - currPos;
|
||||
}
|
||||
|
||||
// Look ahead for possible suffixes if a dictionary word does not follow.
|
||||
// We do this in code rather than using a rule so that the heuristic
|
||||
// resynch continues to function. For example, one of the suffix characters
|
||||
// could be a typo in the middle of a word.
|
||||
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) {
|
||||
if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
|
||||
&& fSuffixSet.contains(uc = utext_current32(text))) {
|
||||
if (uc == THAI_PAIYANNOI) {
|
||||
if (!fSuffixSet.contains(utext_previous32(text))) {
|
||||
// Skip over previous end and PAIYANNOI
|
||||
utext_next32(text);
|
||||
utext_next32(text);
|
||||
wordLength += 1; // Add PAIYANNOI to word
|
||||
uc = utext_current32(text); // Fetch next character
|
||||
}
|
||||
else {
|
||||
// Restore prior position
|
||||
utext_next32(text);
|
||||
}
|
||||
}
|
||||
if (uc == THAI_MAIYAMOK) {
|
||||
if (utext_previous32(text) != THAI_MAIYAMOK) {
|
||||
// Skip over previous end and MAIYAMOK
|
||||
utext_next32(text);
|
||||
utext_next32(text);
|
||||
wordLength += 1; // Add MAIYAMOK to word
|
||||
}
|
||||
else {
|
||||
// Restore prior position
|
||||
utext_next32(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
utext_setNativeIndex(text, current+wordLength);
|
||||
}
|
||||
}
|
||||
|
||||
// Did we find a word on this iteration? If so, push it on the break stack
|
||||
if (wordLength > 0) {
|
||||
foundBreaks.push((current+wordLength), status);
|
||||
}
|
||||
}
|
||||
|
||||
// Don't return a break for the end of the dictionary range if there is one there.
|
||||
if (foundBreaks.peeki() >= rangeEnd) {
|
||||
(void) foundBreaks.popi();
|
||||
wordsFound -= 1;
|
||||
}
|
||||
|
||||
return wordsFound;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
193
source/common/dictbe.h
Normal file
193
source/common/dictbe.h
Normal file
|
@ -0,0 +1,193 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2006, International Business Machines Corporation and others. *
|
||||
* All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef DICTBE_H
|
||||
#define DICTBE_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utext.h"
|
||||
|
||||
#include "brkeng.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class TrieWordDictionary;
|
||||
|
||||
/*******************************************************************
|
||||
* DictionaryBreakEngine
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a
|
||||
* dictionary to determine language-specific breaks.</p>
|
||||
*
|
||||
* <p>After it is constructed a DictionaryBreakEngine may be shared between
|
||||
* threads without synchronization.</p>
|
||||
*/
|
||||
class DictionaryBreakEngine : public LanguageBreakEngine {
|
||||
private:
|
||||
/**
|
||||
* The set of characters handled by this engine
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fSet;
|
||||
|
||||
/**
|
||||
* The set of break types handled by this engine
|
||||
* @internal
|
||||
*/
|
||||
|
||||
uint32_t fTypes;
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
*/
|
||||
DictionaryBreakEngine();
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Constructor setting the break types handled.</p>
|
||||
*
|
||||
* @param breakTypes A bitmap of types handled by the engine.
|
||||
*/
|
||||
DictionaryBreakEngine( uint32_t breakTypes );
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~DictionaryBreakEngine();
|
||||
|
||||
/**
|
||||
* <p>Indicate whether this engine handles a particular character for
|
||||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param breakType The type of text break which the caller wants to determine
|
||||
* @return TRUE if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles( UChar32 c, int32_t breakType ) const;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
*
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left at the end of the run of characters which the engine
|
||||
* is capable of handling.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param reverse Whether the caller is looking for breaks in a reverse
|
||||
* direction.
|
||||
* @param breakType The type of break desired, or -1.
|
||||
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
int32_t breakType,
|
||||
UStack &foundBreaks ) const;
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* <p>Set the character set handled by this engine.</p>
|
||||
*
|
||||
* @param set A UnicodeSet of the set of characters handled by the engine
|
||||
*/
|
||||
virtual void setCharacters( const UnicodeSet &set );
|
||||
|
||||
/**
|
||||
* <p>Set the break types handled by this engine.</p>
|
||||
*
|
||||
* @param breakTypes A bitmap of types handled by the engine.
|
||||
*/
|
||||
// virtual void setBreakTypes( uint32_t breakTypes );
|
||||
|
||||
/**
|
||||
* <p>Divide up a range of known dictionary characters.</p>
|
||||
*
|
||||
* @param text A UText representing the text
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const = 0;
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* ThaiBreakEngine
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a
|
||||
* TrieWordDictionary and heuristics to determine Thai-specific breaks.</p>
|
||||
*
|
||||
* <p>After it is constructed a ThaiBreakEngine may be shared between
|
||||
* threads without synchronization.</p>
|
||||
*/
|
||||
class ThaiBreakEngine : public DictionaryBreakEngine {
|
||||
private:
|
||||
/**
|
||||
* The set of characters handled by this engine
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet fThaiWordSet;
|
||||
UnicodeSet fEndWordSet;
|
||||
UnicodeSet fBeginWordSet;
|
||||
UnicodeSet fSuffixSet;
|
||||
UnicodeSet fMarkSet;
|
||||
const TrieWordDictionary *fDictionary;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
* @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the
|
||||
* engine is deleted.
|
||||
*/
|
||||
ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~ThaiBreakEngine();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* <p>Divide up a range of known dictionary characters.</p>
|
||||
*
|
||||
* @param text A UText representing the text
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const;
|
||||
|
||||
};
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* DICTBE_H */
|
||||
#endif
|
61
source/common/dtintrv.cpp
Normal file
61
source/common/dtintrv.cpp
Normal file
|
@ -0,0 +1,61 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (C) 2008, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
* File DTINTRV.CPP
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "unicode/dtintrv.h"
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval)
|
||||
|
||||
//DateInterval::DateInterval(){}
|
||||
|
||||
|
||||
DateInterval::DateInterval(UDate from, UDate to)
|
||||
: fromDate(from),
|
||||
toDate(to)
|
||||
{}
|
||||
|
||||
|
||||
DateInterval::~DateInterval(){}
|
||||
|
||||
|
||||
DateInterval::DateInterval(const DateInterval& other)
|
||||
: UObject(other) {
|
||||
*this = other;
|
||||
}
|
||||
|
||||
|
||||
DateInterval&
|
||||
DateInterval::operator=(const DateInterval& other) {
|
||||
if ( this != &other ) {
|
||||
fromDate = other.fromDate;
|
||||
toDate = other.toDate;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
DateInterval*
|
||||
DateInterval::clone() const {
|
||||
return new DateInterval(*this);
|
||||
}
|
||||
|
||||
|
||||
UBool
|
||||
DateInterval::operator==(const DateInterval& other) const {
|
||||
return ( fromDate == other.fromDate && toDate == other.toDate );
|
||||
}
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
34
source/common/errorcode.cpp
Normal file
34
source/common/errorcode.cpp
Normal file
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: errorcode.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2009mar10
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/errorcode.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UErrorCode ErrorCode::reset() {
|
||||
UErrorCode code = errorCode;
|
||||
errorCode = U_ZERO_ERROR;
|
||||
return code;
|
||||
}
|
||||
|
||||
void ErrorCode::assertSuccess() const {
|
||||
if(isFailure()) {
|
||||
handleFailure();
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
207
source/common/hash.h
Normal file
207
source/common/hash.h
Normal file
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1997-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
* Date Name Description
|
||||
* 03/28/00 aliu Creation.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef HASH_H
|
||||
#define HASH_H
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "uhash.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Hashtable is a thin C++ wrapper around UHashtable, a general-purpose void*
|
||||
* hashtable implemented in C. Hashtable is designed to be idiomatic and
|
||||
* easy-to-use in C++.
|
||||
*
|
||||
* Hashtable is an INTERNAL CLASS.
|
||||
*/
|
||||
class U_COMMON_API Hashtable : public UMemory {
|
||||
UHashtable* hash;
|
||||
UHashtable hashObj;
|
||||
|
||||
inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Construct a hashtable
|
||||
* @param ignoreKeyCase If true, keys are case insensitive.
|
||||
* @param status Error code
|
||||
*/
|
||||
Hashtable(UBool ignoreKeyCase, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Construct a hashtable
|
||||
* @param keyComp Compartor for comparing the keys
|
||||
* @param valueComp Compartor for comparing the values
|
||||
* @param status Error code
|
||||
*/
|
||||
Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Construct a hashtable
|
||||
* @param status Error code
|
||||
*/
|
||||
Hashtable(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Construct a hashtable, _disregarding any error_. Use this constructor
|
||||
* with caution.
|
||||
*/
|
||||
Hashtable();
|
||||
|
||||
/**
|
||||
* Non-virtual destructor; make this virtual if Hashtable is subclassed
|
||||
* in the future.
|
||||
*/
|
||||
~Hashtable();
|
||||
|
||||
UObjectDeleter *setValueDeleter(UObjectDeleter *fn);
|
||||
|
||||
int32_t count() const;
|
||||
|
||||
void* put(const UnicodeString& key, void* value, UErrorCode& status);
|
||||
|
||||
int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
|
||||
|
||||
void* get(const UnicodeString& key) const;
|
||||
|
||||
int32_t geti(const UnicodeString& key) const;
|
||||
|
||||
void* remove(const UnicodeString& key);
|
||||
|
||||
int32_t removei(const UnicodeString& key);
|
||||
|
||||
void removeAll(void);
|
||||
|
||||
const UHashElement* find(const UnicodeString& key) const;
|
||||
|
||||
const UHashElement* nextElement(int32_t& pos) const;
|
||||
|
||||
UKeyComparator* setKeyCompartor(UKeyComparator*keyComp);
|
||||
|
||||
UValueComparator* setValueCompartor(UValueComparator* valueComp);
|
||||
|
||||
UBool equals(const Hashtable& that) const;
|
||||
private:
|
||||
Hashtable(const Hashtable &other); // forbid copying of this class
|
||||
Hashtable &operator=(const Hashtable &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
/*********************************************************************
|
||||
* Implementation
|
||||
********************************************************************/
|
||||
|
||||
inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
|
||||
UValueComparator *valueComp, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
hash = &hashObj;
|
||||
uhash_setKeyDeleter(hash, uhash_deleteUnicodeString);
|
||||
}
|
||||
}
|
||||
|
||||
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
|
||||
UErrorCode& status) : hash(0) {
|
||||
init( uhash_hashUnicodeString, keyComp, valueComp, status);
|
||||
}
|
||||
inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
|
||||
: hash(0)
|
||||
{
|
||||
init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
|
||||
: uhash_hashUnicodeString,
|
||||
ignoreKeyCase ? uhash_compareCaselessUnicodeString
|
||||
: uhash_compareUnicodeString,
|
||||
NULL,
|
||||
status);
|
||||
}
|
||||
|
||||
inline Hashtable::Hashtable(UErrorCode& status)
|
||||
: hash(0)
|
||||
{
|
||||
init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
|
||||
}
|
||||
|
||||
inline Hashtable::Hashtable()
|
||||
: hash(0)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
|
||||
}
|
||||
|
||||
inline Hashtable::~Hashtable() {
|
||||
if (hash != NULL) {
|
||||
uhash_close(hash);
|
||||
}
|
||||
}
|
||||
|
||||
inline UObjectDeleter *Hashtable::setValueDeleter(UObjectDeleter *fn) {
|
||||
return uhash_setValueDeleter(hash, fn);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::count() const {
|
||||
return uhash_count(hash);
|
||||
}
|
||||
|
||||
inline void* Hashtable::put(const UnicodeString& key, void* value, UErrorCode& status) {
|
||||
return uhash_put(hash, new UnicodeString(key), value, &status);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCode& status) {
|
||||
return uhash_puti(hash, new UnicodeString(key), value, &status);
|
||||
}
|
||||
|
||||
inline void* Hashtable::get(const UnicodeString& key) const {
|
||||
return uhash_get(hash, &key);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::geti(const UnicodeString& key) const {
|
||||
return uhash_geti(hash, &key);
|
||||
}
|
||||
|
||||
inline void* Hashtable::remove(const UnicodeString& key) {
|
||||
return uhash_remove(hash, &key);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::removei(const UnicodeString& key) {
|
||||
return uhash_removei(hash, &key);
|
||||
}
|
||||
|
||||
inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
|
||||
return uhash_find(hash, &key);
|
||||
}
|
||||
|
||||
inline const UHashElement* Hashtable::nextElement(int32_t& pos) const {
|
||||
return uhash_nextElement(hash, &pos);
|
||||
}
|
||||
|
||||
inline void Hashtable::removeAll(void) {
|
||||
uhash_removeAll(hash);
|
||||
}
|
||||
|
||||
inline UKeyComparator* Hashtable::setKeyCompartor(UKeyComparator*keyComp){
|
||||
return uhash_setKeyComparator(hash, keyComp);
|
||||
}
|
||||
|
||||
inline UValueComparator* Hashtable::setValueCompartor(UValueComparator* valueComp){
|
||||
return uhash_setValueComparator(hash, valueComp);
|
||||
}
|
||||
|
||||
inline UBool Hashtable::equals(const Hashtable& that)const{
|
||||
return uhash_equals(hash, that.hash);
|
||||
}
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
61
source/common/icucfg.h.in
Normal file
61
source/common/icucfg.h.in
Normal file
|
@ -0,0 +1,61 @@
|
|||
/* common/icucfg.h.in. Generated automatically from configure.in by autoheader. */
|
||||
|
||||
/* Define if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Define if your processor stores words with the most significant
|
||||
byte first (like Motorola and SPARC, unlike Intel and VAX). */
|
||||
#undef WORDS_BIGENDIAN
|
||||
|
||||
/* Copyright (c) 1999-2000, International Business Machines Corporation and
|
||||
others. All Rights Reserved. */
|
||||
/* Define to signed char if not in <sys/types.h> */
|
||||
#undef int8_t
|
||||
|
||||
/* Define to unsigned char if not in <sys/types.h> */
|
||||
#undef uint8_t
|
||||
|
||||
/* Define to signed short if not in <sys/types.h> */
|
||||
#undef int16_t
|
||||
|
||||
/* Define to unsigned short if not in <sys/types.h> */
|
||||
#undef uint16_t
|
||||
|
||||
/* Define to signed long if not in <sys/types.h> */
|
||||
#undef int32_t
|
||||
|
||||
/* Define to unsigned long if not in <sys/types.h> */
|
||||
#undef uint32_t
|
||||
|
||||
/* Define to signed char if not in <sys/types.h> */
|
||||
#undef bool_t
|
||||
|
||||
/* Define if your system has <wchar.h> */
|
||||
#undef HAVE_WCHAR_H
|
||||
|
||||
/* Define to the size of wchar_t */
|
||||
#undef SIZEOF_WCHAR_T
|
||||
|
||||
/* Define if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define if you have the cma library (-lcma). */
|
||||
#undef HAVE_LIBCMA
|
||||
|
||||
/* Define if you have the dl library (-ldl). */
|
||||
#undef HAVE_LIBDL
|
||||
|
||||
/* Define if you have the dld library (-ldld). */
|
||||
#undef HAVE_LIBDLD
|
||||
|
||||
/* Define if you have the m library (-lm). */
|
||||
#undef HAVE_LIBM
|
||||
|
||||
/* Define if you have the pthread library (-lpthread). */
|
||||
#undef HAVE_LIBPTHREAD
|
||||
|
||||
/* Define if you have the pthreads library (-lpthreads). */
|
||||
#undef HAVE_LIBPTHREADS
|
||||
|
||||
/* Define if you have the wcs library (-lwcs). */
|
||||
#undef HAVE_LIBWCS
|
83
source/common/icudataver.c
Normal file
83
source/common/icudataver.c
Normal file
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/icudataver.h"
|
||||
#include "unicode/uversion.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "uresimp.h" /* for ures_getVersionByKey */
|
||||
#include "cmemory.h"
|
||||
|
||||
/*
|
||||
* Determines if icustd is in the data.
|
||||
*/
|
||||
static UBool hasICUSTDBundle();
|
||||
|
||||
static UBool hasICUSTDBundle() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UBool result = TRUE;
|
||||
|
||||
UResourceBundle *icustdbundle = ures_openDirect(NULL, U_ICU_STD_BUNDLE, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
result = TRUE;
|
||||
} else {
|
||||
result = FALSE;
|
||||
}
|
||||
|
||||
ures_close(icustdbundle);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) {
|
||||
UResourceBundle *icudatares = NULL;
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (dataVersionFillin != NULL) {
|
||||
icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status);
|
||||
if (U_SUCCESS(*status)) {
|
||||
ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status);
|
||||
}
|
||||
ures_close(icudatares);
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2 u_isDataOlder(UVersionInfo dataVersionFillin, UBool *isModifiedFillin, UErrorCode *status) {
|
||||
UBool result = TRUE;
|
||||
UVersionInfo dataVersion;
|
||||
UVersionInfo wiredVersion;
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
u_getDataVersion(dataVersion, status);
|
||||
if (U_SUCCESS(*status)) {
|
||||
u_versionFromString(wiredVersion, U_ICU_DATA_VERSION);
|
||||
|
||||
if (uprv_memcmp(dataVersion, wiredVersion, sizeof(UVersionInfo)) >= 0) {
|
||||
result = FALSE;
|
||||
}
|
||||
|
||||
if (dataVersionFillin != NULL) {
|
||||
uprv_memcpy(dataVersionFillin, dataVersion, sizeof(UVersionInfo));
|
||||
}
|
||||
|
||||
if (hasICUSTDBundle()) {
|
||||
*isModifiedFillin = FALSE;
|
||||
} else {
|
||||
*isModifiedFillin = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
25
source/common/localsvc.h
Normal file
25
source/common/localsvc.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 2006 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef LOCALSVC_H
|
||||
#define LOCALSVC_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if U_LOCAL_SERVICE_HOOK
|
||||
/**
|
||||
* Prototype for user-supplied service hook. This function is expected to return
|
||||
* a type of factory object specific to the requested service.
|
||||
*
|
||||
* @param what service-specific string identifying the specific user hook
|
||||
* @param status error status
|
||||
* @return a service-specific hook, or NULL on failure.
|
||||
*/
|
||||
U_CAPI void* uprv_svc_hook(const char *what, UErrorCode *status);
|
||||
#endif
|
||||
|
||||
#endif
|
46
source/common/locbased.cpp
Normal file
46
source/common/locbased.cpp
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
* Created: January 16 2004
|
||||
* Since: ICU 2.8
|
||||
**********************************************************************
|
||||
*/
|
||||
#include "locbased.h"
|
||||
#include "cstring.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
|
||||
const char* id = getLocaleID(type, status);
|
||||
return Locale((id != 0) ? id : "");
|
||||
}
|
||||
|
||||
const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
switch(type) {
|
||||
case ULOC_VALID_LOCALE:
|
||||
return valid;
|
||||
case ULOC_ACTUAL_LOCALE:
|
||||
return actual;
|
||||
default:
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
|
||||
if (validID != 0) {
|
||||
uprv_strcpy(valid, validID);
|
||||
}
|
||||
if (actualID != 0) {
|
||||
uprv_strcpy(actual, actualID);
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
97
source/common/locbased.h
Normal file
97
source/common/locbased.h
Normal file
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
* Created: January 16 2004
|
||||
* Since: ICU 2.8
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef LOCBASED_H
|
||||
#define LOCBASED_H
|
||||
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
/**
|
||||
* Macro to declare a locale LocaleBased wrapper object for the given
|
||||
* object, which must have two members named `validLocale' and
|
||||
* `actualLocale'.
|
||||
*/
|
||||
#define U_LOCALE_BASED(varname, objname) \
|
||||
LocaleBased varname((objname).validLocale, (objname).actualLocale);
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* A utility class that unifies the implementation of getLocale() by
|
||||
* various ICU services. This class is likely to be removed in the
|
||||
* ICU 3.0 time frame in favor of an integrated approach with the
|
||||
* services framework.
|
||||
* @since ICU 2.8
|
||||
*/
|
||||
class U_COMMON_API LocaleBased : public UMemory {
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Construct a LocaleBased wrapper around the two pointers. These
|
||||
* will be aliased for the lifetime of this object.
|
||||
*/
|
||||
inline LocaleBased(char* validAlias, char* actualAlias);
|
||||
|
||||
/**
|
||||
* Construct a LocaleBased wrapper around the two const pointers.
|
||||
* These will be aliased for the lifetime of this object.
|
||||
*/
|
||||
inline LocaleBased(const char* validAlias, const char* actualAlias);
|
||||
|
||||
/**
|
||||
* Return locale meta-data for the service object wrapped by this
|
||||
* object. Either the valid or the actual locale may be
|
||||
* retrieved.
|
||||
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
|
||||
* @param status input-output error code
|
||||
* @return the indicated locale
|
||||
*/
|
||||
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Return the locale ID for the service object wrapped by this
|
||||
* object. Either the valid or the actual locale may be
|
||||
* retrieved.
|
||||
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
|
||||
* @param status input-output error code
|
||||
* @return the indicated locale ID
|
||||
*/
|
||||
const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Set the locale meta-data for the service object wrapped by this
|
||||
* object. If either parameter is zero, it is ignored.
|
||||
* @param valid the ID of the valid locale
|
||||
* @param actual the ID of the actual locale
|
||||
*/
|
||||
void setLocaleIDs(const char* valid, const char* actual);
|
||||
|
||||
private:
|
||||
|
||||
char* valid;
|
||||
|
||||
char* actual;
|
||||
};
|
||||
|
||||
inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
|
||||
valid(validAlias), actual(actualAlias) {
|
||||
}
|
||||
|
||||
inline LocaleBased::LocaleBased(const char* validAlias,
|
||||
const char* actualAlias) :
|
||||
// ugh: cast away const
|
||||
valid((char*)validAlias), actual((char*)actualAlias) {
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
1340
source/common/locid.cpp
Normal file
1340
source/common/locid.cpp
Normal file
File diff suppressed because it is too large
Load diff
893
source/common/locmap.c
Normal file
893
source/common/locmap.c
Normal file
|
@ -0,0 +1,893 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1996-2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* Provides functionality for mapping between
|
||||
* LCID and Posix IDs or ICU locale to codepage
|
||||
*
|
||||
* Note: All classes and code in this file are
|
||||
* intended for internal use only.
|
||||
*
|
||||
* Methods of interest:
|
||||
* unsigned long convertToLCID(const char*);
|
||||
* const char* convertToPosix(unsigned long);
|
||||
*
|
||||
* Kathleen Wilson, 4/30/96
|
||||
*
|
||||
* Date Name Description
|
||||
* 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
|
||||
* setId() method and safety check against
|
||||
* MAX_ID_LENGTH.
|
||||
* 04/23/99 stephen Added C wrapper for convertToPosix.
|
||||
* 09/18/00 george Removed the memory leaks.
|
||||
* 08/23/01 george Convert to C
|
||||
*/
|
||||
|
||||
#include "locmap.h"
|
||||
#include "cstring.h"
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* The mapping from Win32 locale ID numbers to POSIX locale strings should
|
||||
* be the faster one.
|
||||
*
|
||||
* Many LCID values come from winnt.h
|
||||
* Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
|
||||
*/
|
||||
|
||||
/*
|
||||
////////////////////////////////////////////////
|
||||
//
|
||||
// Internal Classes for LCID <--> POSIX Mapping
|
||||
//
|
||||
/////////////////////////////////////////////////
|
||||
*/
|
||||
|
||||
typedef struct ILcidPosixElement
|
||||
{
|
||||
const uint32_t hostID;
|
||||
const char * const posixID;
|
||||
} ILcidPosixElement;
|
||||
|
||||
typedef struct ILcidPosixMap
|
||||
{
|
||||
const uint32_t numRegions;
|
||||
const struct ILcidPosixElement* const regionMaps;
|
||||
} ILcidPosixMap;
|
||||
|
||||
|
||||
/*
|
||||
/////////////////////////////////////////////////
|
||||
//
|
||||
// Easy macros to make the LCID <--> POSIX Mapping
|
||||
//
|
||||
/////////////////////////////////////////////////
|
||||
*/
|
||||
|
||||
/*
|
||||
The standard one language/one country mapping for LCID.
|
||||
The first element must be the language, and the following
|
||||
elements are the language with the country.
|
||||
*/
|
||||
#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
|
||||
static const ILcidPosixElement languageID[] = { \
|
||||
{LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
|
||||
{hostID, #posixID}, \
|
||||
};
|
||||
|
||||
/*
|
||||
Create the map for the posixID. This macro supposes that the language string
|
||||
name is the same as the global variable name, and that the first element
|
||||
in the ILcidPosixElement is just the language.
|
||||
*/
|
||||
#define ILCID_POSIX_MAP(_posixID) \
|
||||
{sizeof(_posixID)/sizeof(ILcidPosixElement), _posixID}
|
||||
|
||||
/*
|
||||
////////////////////////////////////////////
|
||||
//
|
||||
// Create the table of LCID to POSIX Mapping
|
||||
// None of it should be dynamically created.
|
||||
//
|
||||
// Keep static locale variables inside the function so that
|
||||
// it can be created properly during static init.
|
||||
//
|
||||
////////////////////////////////////////////
|
||||
*/
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
|
||||
|
||||
static const ILcidPosixElement ar[] = {
|
||||
{0x01, "ar"},
|
||||
{0x3801, "ar_AE"},
|
||||
{0x3c01, "ar_BH"},
|
||||
{0x1401, "ar_DZ"},
|
||||
{0x0c01, "ar_EG"},
|
||||
{0x0801, "ar_IQ"},
|
||||
{0x2c01, "ar_JO"},
|
||||
{0x3401, "ar_KW"},
|
||||
{0x3001, "ar_LB"},
|
||||
{0x1001, "ar_LY"},
|
||||
{0x1801, "ar_MA"},
|
||||
{0x2001, "ar_OM"},
|
||||
{0x4001, "ar_QA"},
|
||||
{0x0401, "ar_SA"},
|
||||
{0x2801, "ar_SY"},
|
||||
{0x1c01, "ar_TN"},
|
||||
{0x2401, "ar_YE"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
|
||||
|
||||
static const ILcidPosixElement az[] = {
|
||||
{0x2c, "az"},
|
||||
{0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
|
||||
{0x082c, "az_Cyrl"}, /* Cyrillic based */
|
||||
{0x042c, "az_Latn_AZ"}, /* Latin based */
|
||||
{0x042c, "az_Latn"}, /* Latin based */
|
||||
{0x042c, "az_AZ"} /* Latin based */
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
|
||||
|
||||
static const ILcidPosixElement ber[] = {
|
||||
{0x5f, "ber"},
|
||||
{0x045f, "ber_Arab_DZ"},
|
||||
{0x045f, "ber_Arab"},
|
||||
{0x085f, "ber_Latn_DZ"},
|
||||
{0x085f, "ber_Latn"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
|
||||
|
||||
static const ILcidPosixElement bn[] = {
|
||||
{0x45, "bn"},
|
||||
{0x0845, "bn_BD"},
|
||||
{0x0445, "bn_IN"}
|
||||
};
|
||||
|
||||
static const ILcidPosixElement bo[] = {
|
||||
{0x51, "bo"},
|
||||
{0x0851, "bo_BT"},
|
||||
{0x0451, "bo_CN"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0403, ca, ca_ES)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
|
||||
|
||||
/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
|
||||
static const ILcidPosixElement cs_CZ[] = {
|
||||
{0x05, "cs"},
|
||||
{0x0405, "cs_CZ"},
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
|
||||
|
||||
static const ILcidPosixElement de[] = {
|
||||
{0x07, "de"},
|
||||
{0x0c07, "de_AT"},
|
||||
{0x0807, "de_CH"},
|
||||
{0x0407, "de_DE"},
|
||||
{0x1407, "de_LI"},
|
||||
{0x1007, "de_LU"},
|
||||
{0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
|
||||
{0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
|
||||
|
||||
static const ILcidPosixElement en[] = {
|
||||
{0x09, "en"},
|
||||
{0x0c09, "en_AU"},
|
||||
{0x2809, "en_BZ"},
|
||||
{0x1009, "en_CA"},
|
||||
{0x0809, "en_GB"},
|
||||
{0x1809, "en_IE"},
|
||||
{0x4009, "en_IN"},
|
||||
{0x2009, "en_JM"},
|
||||
{0x4409, "en_MY"},
|
||||
{0x1409, "en_NZ"},
|
||||
{0x3409, "en_PH"},
|
||||
{0x4809, "en_SG"},
|
||||
{0x2C09, "en_TT"},
|
||||
{0x0409, "en_US"},
|
||||
{0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
|
||||
{0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */
|
||||
{0x1c09, "en_ZA"},
|
||||
{0x3009, "en_ZW"},
|
||||
{0x0409, "en_AS"}, /* Alias for en_US. Leave last. */
|
||||
{0x0409, "en_GU"}, /* Alias for en_US. Leave last. */
|
||||
{0x0409, "en_MH"}, /* Alias for en_US. Leave last. */
|
||||
{0x0409, "en_MP"}, /* Alias for en_US. Leave last. */
|
||||
{0x0409, "en_UM"} /* Alias for en_US. Leave last. */
|
||||
};
|
||||
|
||||
static const ILcidPosixElement en_US_POSIX[] = {
|
||||
{0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
|
||||
};
|
||||
|
||||
static const ILcidPosixElement es[] = {
|
||||
{0x0a, "es"},
|
||||
{0x2c0a, "es_AR"},
|
||||
{0x400a, "es_BO"},
|
||||
{0x340a, "es_CL"},
|
||||
{0x240a, "es_CO"},
|
||||
{0x140a, "es_CR"},
|
||||
{0x1c0a, "es_DO"},
|
||||
{0x300a, "es_EC"},
|
||||
{0x0c0a, "es_ES"}, /*Modern sort.*/
|
||||
{0x100a, "es_GT"},
|
||||
{0x480a, "es_HN"},
|
||||
{0x080a, "es_MX"},
|
||||
{0x4c0a, "es_NI"},
|
||||
{0x180a, "es_PA"},
|
||||
{0x280a, "es_PE"},
|
||||
{0x500a, "es_PR"},
|
||||
{0x3c0a, "es_PY"},
|
||||
{0x440a, "es_SV"},
|
||||
{0x540a, "es_US"},
|
||||
{0x380a, "es_UY"},
|
||||
{0x200a, "es_VE"},
|
||||
{0x040a, "es_ES@collation=traditional"},
|
||||
{0x040a, "es@collation=traditional"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
|
||||
|
||||
/* ISO-639 doesn't distinguish between Persian and Dari.*/
|
||||
static const ILcidPosixElement fa[] = {
|
||||
{0x29, "fa"},
|
||||
{0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
|
||||
{0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
|
||||
};
|
||||
|
||||
/* duplicate for roundtripping */
|
||||
static const ILcidPosixElement fa_AF[] = {
|
||||
{0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
|
||||
{0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
|
||||
|
||||
static const ILcidPosixElement fr[] = {
|
||||
{0x0c, "fr"},
|
||||
{0x080c, "fr_BE"},
|
||||
{0x0c0c, "fr_CA"},
|
||||
{0x240c, "fr_CD"},
|
||||
{0x100c, "fr_CH"},
|
||||
{0x300c, "fr_CI"},
|
||||
{0x2c0c, "fr_CM"},
|
||||
{0x040c, "fr_FR"},
|
||||
{0x3c0c, "fr_HT"},
|
||||
{0x140c, "fr_LU"},
|
||||
{0x380c, "fr_MA"},
|
||||
{0x180c, "fr_MC"},
|
||||
{0x340c, "fr_ML"},
|
||||
{0x200c, "fr_RE"},
|
||||
{0x280c, "fr_SN"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
|
||||
|
||||
/* This LCID is really two different locales.*/
|
||||
static const ILcidPosixElement ga[] = {
|
||||
{0x3c, "ga"},
|
||||
{0x3c, "gd"},
|
||||
{0x083c, "ga_IE"}, /* Gaelic (Ireland) */
|
||||
{0x043c, "gd_GB"} /* Gaelic (Scotland) */
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0468, ha, ha_NG) /* ha_Latn_NG? */
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
|
||||
|
||||
/* This LCID is really four different locales.*/
|
||||
static const ILcidPosixElement hr[] = {
|
||||
{0x1a, "hr"},
|
||||
{0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
|
||||
{0x141a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
|
||||
{0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
|
||||
{0x141a, "bs"}, /* Bosnian */
|
||||
{0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
|
||||
{0x201a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
|
||||
{0x101a, "hr_BA"}, /* Croatian in Bosnia */
|
||||
{0x041a, "hr_HR"}, /* Croatian*/
|
||||
{0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
|
||||
{0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
|
||||
{0x081a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
|
||||
{0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
|
||||
{0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
|
||||
{0x0c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
|
||||
{0x0c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
|
||||
|
||||
static const ILcidPosixElement it[] = {
|
||||
{0x10, "it"},
|
||||
{0x0810, "it_CH"},
|
||||
{0x0410, "it_IT"}
|
||||
};
|
||||
|
||||
static const ILcidPosixElement iu[] = {
|
||||
{0x5d, "iu"},
|
||||
{0x045d, "iu_Cans_CA"},
|
||||
{0x045d, "iu_Cans"},
|
||||
{0x085d, "iu_Latn_CA"},
|
||||
{0x085d, "iu_Latn"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
|
||||
|
||||
static const ILcidPosixElement ko[] = {
|
||||
{0x12, "ko"},
|
||||
{0x0812, "ko_KP"},
|
||||
{0x0412, "ko_KR"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
|
||||
|
||||
static const ILcidPosixElement ks[] = { /* We could add PK and CN too */
|
||||
{0x60, "ks"},
|
||||
{0x0860, "ks_IN"}, /* Documentation doesn't mention script */
|
||||
{0x0460, "ks_Arab_IN"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
|
||||
|
||||
static const ILcidPosixElement mn[] = {
|
||||
{0x50, "mn"},
|
||||
{0x0850, "mn_CN"},
|
||||
{0x0450, "mn_MN"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
|
||||
|
||||
static const ILcidPosixElement ms[] = {
|
||||
{0x3e, "ms"},
|
||||
{0x083e, "ms_BN"}, /* Brunei Darussalam*/
|
||||
{0x043e, "ms_MY"} /* Malaysia*/
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
|
||||
|
||||
static const ILcidPosixElement ne[] = {
|
||||
{0x61, "ne"},
|
||||
{0x0861, "ne_IN"}, /* India*/
|
||||
{0x0461, "ne_NP"} /* Nepal*/
|
||||
};
|
||||
|
||||
static const ILcidPosixElement nl[] = {
|
||||
{0x13, "nl"},
|
||||
{0x0813, "nl_BE"},
|
||||
{0x0413, "nl_NL"}
|
||||
};
|
||||
|
||||
/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
|
||||
static const ILcidPosixElement no[] = {
|
||||
{0x14, "nb"}, /* really nb */
|
||||
{0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
|
||||
{0x0414, "no"}, /* really nb_NO */
|
||||
{0x0414, "no_NO"}, /* really nb_NO */
|
||||
{0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
|
||||
{0x0814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
|
||||
{0x0814, "no_NO_NY"}/* really nn_NO */
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0472, om, om_ET) /* TODO: Verify the country */
|
||||
|
||||
/* Declared as or_IN to get around compiler errors*/
|
||||
static const ILcidPosixElement or_IN[] = {
|
||||
{0x48, "or"},
|
||||
{0x0448, "or_IN"},
|
||||
};
|
||||
|
||||
static const ILcidPosixElement pa[] = {
|
||||
{0x46, "pa"},
|
||||
{0x0446, "pa_IN"},
|
||||
{0x0846, "pa_PK"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
|
||||
|
||||
static const ILcidPosixElement pt[] = {
|
||||
{0x16, "pt"},
|
||||
{0x0416, "pt_BR"},
|
||||
{0x0816, "pt_PT"}
|
||||
};
|
||||
|
||||
static const ILcidPosixElement qu[] = {
|
||||
{0x6b, "qu"},
|
||||
{0x046b, "qu_BO"},
|
||||
{0x086b, "qu_EC"},
|
||||
{0x0C6b, "qu_PE"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0486, qut, qut_GT) /* qut is an ISO-639-3 code */
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0418, ro, ro_RO)
|
||||
|
||||
static const ILcidPosixElement root[] = {
|
||||
{0x00, "root"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0419, ru, ru_RU)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
|
||||
|
||||
static const ILcidPosixElement sd[] = {
|
||||
{0x59, "sd"},
|
||||
{0x0459, "sd_IN"},
|
||||
{0x0859, "sd_PK"}
|
||||
};
|
||||
|
||||
static const ILcidPosixElement se[] = {
|
||||
{0x3b, "se"},
|
||||
{0x0c3b, "se_FI"},
|
||||
{0x043b, "se_NO"},
|
||||
{0x083b, "se_SE"},
|
||||
{0x183b, "sma_NO"},
|
||||
{0x1c3b, "sma_SE"},
|
||||
{0x103b, "smj_NO"},
|
||||
{0x143b, "smj_SE"},
|
||||
{0x243b, "smn_FI"},
|
||||
{0x203b, "sms_FI"},
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0477, so, so_ET) /* TODO: Verify the country */
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
|
||||
|
||||
static const ILcidPosixElement sv[] = {
|
||||
{0x1d, "sv"},
|
||||
{0x081d, "sv_FI"},
|
||||
{0x041d, "sv_SE"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0449, ta, ta_IN)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0428, tg, tg_TJ) /* Cyrillic based by default */
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
|
||||
|
||||
static const ILcidPosixElement ti[] = {
|
||||
{0x73, "ti"},
|
||||
{0x0873, "ti_ER"},
|
||||
{0x0473, "ti_ET"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0432, tn, tn_BW)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0480, ug, ug_CN)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
|
||||
|
||||
static const ILcidPosixElement ur[] = {
|
||||
{0x20, "ur"},
|
||||
{0x0820, "ur_IN"},
|
||||
{0x0420, "ur_PK"}
|
||||
};
|
||||
|
||||
static const ILcidPosixElement uz[] = {
|
||||
{0x43, "uz"},
|
||||
{0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
|
||||
{0x0843, "uz_Cyrl"}, /* Cyrillic based */
|
||||
{0x0843, "uz_UZ"}, /* Cyrillic based */
|
||||
{0x0443, "uz_Latn_UZ"}, /* Latin based */
|
||||
{0x0443, "uz_Latn"} /* Latin based */
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0433, ve, ve_ZA) /* TODO: Verify the country */
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
|
||||
|
||||
static const ILcidPosixElement wen[] = {
|
||||
{0x2E, "wen"},
|
||||
{0x042E, "wen_DE"},
|
||||
{0x042E, "hsb_DE"},
|
||||
{0x082E, "dsb_DE"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
|
||||
|
||||
static const ILcidPosixElement zh[] = {
|
||||
{0x04, "zh"},
|
||||
{0x0804, "zh_Hans_CN"},
|
||||
{0x0804, "zh_Hans"},
|
||||
{0x0804, "zh_CN"},
|
||||
{0x0c04, "zh_Hant_HK"},
|
||||
{0x0c04, "zh_HK"},
|
||||
{0x1404, "zh_Hant_MO"},
|
||||
{0x1404, "zh_MO"},
|
||||
{0x1004, "zh_Hans_SG"},
|
||||
{0x1004, "zh_SG"},
|
||||
{0x0404, "zh_Hant_TW"},
|
||||
{0x0404, "zh_Hant"},
|
||||
{0x0404, "zh_TW"},
|
||||
{0x30404,"zh_Hant_TW"}, /* Bopomofo order */
|
||||
{0x30404,"zh_TW"}, /* Bopomofo order */
|
||||
{0x20404,"zh_Hant_TW@collation=stroke"},
|
||||
{0x20404,"zh_TW@collation=stroke"},
|
||||
{0x20804,"zh_Hans_CN@collation=stroke"},
|
||||
{0x20804,"zh_CN@collation=stroke"}
|
||||
};
|
||||
|
||||
ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
|
||||
|
||||
/* This must be static and grouped by LCID. */
|
||||
|
||||
/* non-existent ISO-639-2 codes */
|
||||
/*
|
||||
0x466 Edo
|
||||
0x467 Fulfulde - Nigeria
|
||||
0x486 K'iche - Guatemala
|
||||
0x430 Sutu
|
||||
*/
|
||||
static const ILcidPosixMap gPosixIDmap[] = {
|
||||
ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
|
||||
ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
|
||||
ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
|
||||
ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
|
||||
ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
|
||||
ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
|
||||
ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
|
||||
ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
|
||||
ILCID_POSIX_MAP(ber), /* ber Berber/Tamazight 0x5f */
|
||||
ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
|
||||
ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
|
||||
ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
|
||||
ILCID_POSIX_MAP(br), /* br Breton 0x7e */
|
||||
ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
|
||||
ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
|
||||
ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
|
||||
ILCID_POSIX_MAP(cs_CZ), /* cs Czech 0x05 */
|
||||
ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
|
||||
ILCID_POSIX_MAP(da), /* da Danish 0x06 */
|
||||
ILCID_POSIX_MAP(de), /* de German 0x07 */
|
||||
ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
|
||||
ILCID_POSIX_MAP(el), /* el Greek 0x08 */
|
||||
ILCID_POSIX_MAP(en), /* en English 0x09 */
|
||||
ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
|
||||
ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
|
||||
ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
|
||||
ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
|
||||
ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
|
||||
ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
|
||||
ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
|
||||
ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
|
||||
ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
|
||||
ILCID_POSIX_MAP(fr), /* fr French 0x0c */
|
||||
ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
|
||||
ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
|
||||
ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
|
||||
ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
|
||||
ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
|
||||
ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
|
||||
ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
|
||||
ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
|
||||
ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
|
||||
ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
|
||||
ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
|
||||
ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
|
||||
ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
|
||||
ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
|
||||
ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
|
||||
ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
|
||||
ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
|
||||
ILCID_POSIX_MAP(it), /* it Italian 0x10 */
|
||||
ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
|
||||
ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
|
||||
ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
|
||||
ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
|
||||
ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
|
||||
ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
|
||||
ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
|
||||
ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
|
||||
ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
|
||||
ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
|
||||
ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
|
||||
ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
|
||||
ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
|
||||
ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
|
||||
ILCID_POSIX_MAP(la), /* la Latin 0x76 */
|
||||
ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
|
||||
ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
|
||||
ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
|
||||
ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
|
||||
ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
|
||||
ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
|
||||
ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
|
||||
ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
|
||||
ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
|
||||
ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
|
||||
ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
|
||||
ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
|
||||
ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
|
||||
/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
|
||||
ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
|
||||
ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
|
||||
/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
|
||||
ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
|
||||
ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
|
||||
ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
|
||||
ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
|
||||
ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
|
||||
ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
|
||||
ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
|
||||
ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
|
||||
ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
|
||||
ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
|
||||
ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
|
||||
ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
|
||||
ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
|
||||
ILCID_POSIX_MAP(root), /* root 0x00 */
|
||||
ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
|
||||
ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
|
||||
ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
|
||||
ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
|
||||
ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
|
||||
ILCID_POSIX_MAP(se), /* se Sami 0x3b */
|
||||
/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
|
||||
ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
|
||||
ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
|
||||
ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
|
||||
ILCID_POSIX_MAP(so), /* so Somali 0x77 */
|
||||
ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
|
||||
/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
|
||||
ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
|
||||
ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
|
||||
ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
|
||||
ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
|
||||
ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
|
||||
ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
|
||||
ILCID_POSIX_MAP(th), /* th Thai 0x1e */
|
||||
ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
|
||||
ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
|
||||
ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
|
||||
ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
|
||||
ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
|
||||
ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
|
||||
ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
|
||||
ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
|
||||
ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
|
||||
ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
|
||||
ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
|
||||
ILCID_POSIX_MAP(wen), /* wen Sorbian 0x2e */
|
||||
ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
|
||||
ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
|
||||
ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
|
||||
ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
|
||||
ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
|
||||
};
|
||||
|
||||
static const uint32_t gLocaleCount = sizeof(gPosixIDmap)/sizeof(ILcidPosixMap);
|
||||
|
||||
/**
|
||||
* Do not call this function. It is called by hostID.
|
||||
* The function is not private because this struct must stay as a C struct,
|
||||
* and this is an internal class.
|
||||
*/
|
||||
static int32_t
|
||||
idCmp(const char* id1, const char* id2)
|
||||
{
|
||||
int32_t diffIdx = 0;
|
||||
while (*id1 == *id2 && *id1 != 0) {
|
||||
diffIdx++;
|
||||
id1++;
|
||||
id2++;
|
||||
}
|
||||
return diffIdx;
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches for a Windows LCID
|
||||
*
|
||||
* @param posixid the Posix style locale id.
|
||||
* @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
|
||||
* no equivalent Windows LCID.
|
||||
* @return the LCID
|
||||
*/
|
||||
static uint32_t
|
||||
getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
|
||||
{
|
||||
int32_t bestIdx = 0;
|
||||
int32_t bestIdxDiff = 0;
|
||||
int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
|
||||
uint32_t idx;
|
||||
|
||||
for (idx = 0; idx < this_0->numRegions; idx++ ) {
|
||||
int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
|
||||
if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
|
||||
if (posixIDlen == sameChars) {
|
||||
/* Exact match */
|
||||
return this_0->regionMaps[idx].hostID;
|
||||
}
|
||||
bestIdxDiff = sameChars;
|
||||
bestIdx = idx;
|
||||
}
|
||||
}
|
||||
/* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
|
||||
/* We also have to make sure that sid and si and similar string subsets don't match. */
|
||||
if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
|
||||
&& this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
|
||||
{
|
||||
*status = U_USING_FALLBACK_WARNING;
|
||||
return this_0->regionMaps[bestIdx].hostID;
|
||||
}
|
||||
|
||||
/*no match found */
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return this_0->regionMaps->hostID;
|
||||
}
|
||||
|
||||
static const char*
|
||||
getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0; i <= this_0->numRegions; i++)
|
||||
{
|
||||
if (this_0->regionMaps[i].hostID == hostID)
|
||||
{
|
||||
return this_0->regionMaps[i].posixID;
|
||||
}
|
||||
}
|
||||
|
||||
/* If you get here, then no matching region was found,
|
||||
so return the language id with the wild card region. */
|
||||
return this_0->regionMaps[0].posixID;
|
||||
}
|
||||
|
||||
/*
|
||||
//////////////////////////////////////
|
||||
//
|
||||
// LCID --> POSIX
|
||||
//
|
||||
/////////////////////////////////////
|
||||
*/
|
||||
|
||||
U_CAPI const char *
|
||||
uprv_convertToPosix(uint32_t hostid, UErrorCode* status)
|
||||
{
|
||||
uint16_t langID = LANGUAGE_LCID(hostid);
|
||||
uint32_t localeIndex;
|
||||
|
||||
for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++)
|
||||
{
|
||||
if (langID == gPosixIDmap[localeIndex].regionMaps->hostID)
|
||||
{
|
||||
return getPosixID(&gPosixIDmap[localeIndex], hostid);
|
||||
}
|
||||
}
|
||||
|
||||
/* no match found */
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
//////////////////////////////////////
|
||||
//
|
||||
// POSIX --> LCID
|
||||
// This should only be called from uloc_getLCID.
|
||||
// The locale ID must be in canonical form.
|
||||
// langID is separate so that this file doesn't depend on the uloc_* API.
|
||||
//
|
||||
/////////////////////////////////////
|
||||
*/
|
||||
|
||||
U_CAPI uint32_t
|
||||
uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
|
||||
{
|
||||
|
||||
uint32_t low = 0;
|
||||
uint32_t high = gLocaleCount;
|
||||
uint32_t mid = high;
|
||||
uint32_t oldmid = 0;
|
||||
int32_t compVal;
|
||||
|
||||
uint32_t value = 0;
|
||||
uint32_t fallbackValue = (uint32_t)-1;
|
||||
UErrorCode myStatus;
|
||||
uint32_t idx;
|
||||
|
||||
/* Check for incomplete id. */
|
||||
if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*Binary search for the map entry for normal cases */
|
||||
|
||||
while (high > low) /*binary search*/{
|
||||
|
||||
mid = (high+low) >> 1; /*Finds median*/
|
||||
|
||||
if (mid == oldmid)
|
||||
break;
|
||||
|
||||
compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
|
||||
if (compVal < 0){
|
||||
high = mid;
|
||||
}
|
||||
else if (compVal > 0){
|
||||
low = mid;
|
||||
}
|
||||
else /*we found it*/{
|
||||
return getHostID(&gPosixIDmap[mid], posixID, status);
|
||||
}
|
||||
oldmid = mid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sometimes we can't do a binary search on posixID because some LCIDs
|
||||
* go to different locales. We hit one of those special cases.
|
||||
*/
|
||||
for (idx = 0; idx < gLocaleCount; idx++ ) {
|
||||
myStatus = U_ZERO_ERROR;
|
||||
value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
|
||||
if (myStatus == U_ZERO_ERROR) {
|
||||
return value;
|
||||
}
|
||||
else if (myStatus == U_USING_FALLBACK_WARNING) {
|
||||
fallbackValue = value;
|
||||
}
|
||||
}
|
||||
|
||||
if (fallbackValue != (uint32_t)-1) {
|
||||
*status = U_USING_FALLBACK_WARNING;
|
||||
return fallbackValue;
|
||||
}
|
||||
|
||||
/* no match found */
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0; /* return international (root) */
|
||||
}
|
||||
|
37
source/common/locmap.h
Normal file
37
source/common/locmap.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1996-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* File locmap.h : Locale Mapping Classes
|
||||
*
|
||||
*
|
||||
* Created by: Helena Shih
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 3/11/97 aliu Added setId().
|
||||
* 4/20/99 Madhu Added T_convertToPosix()
|
||||
* 09/18/00 george Removed the memory leaks.
|
||||
* 08/23/01 george Convert to C
|
||||
*============================================================================
|
||||
*/
|
||||
|
||||
#ifndef LOCMAP_H
|
||||
#define LOCMAP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID)
|
||||
|
||||
U_CAPI const char *uprv_convertToPosix(uint32_t hostid, UErrorCode* status);
|
||||
|
||||
/* Don't call this function directly. Use uloc_getLCID instead. */
|
||||
U_CAPI uint32_t uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status);
|
||||
|
||||
#endif /* LOCMAP_H */
|
||||
|
267
source/common/locutil.cpp
Normal file
267
source/common/locutil.cpp
Normal file
|
@ -0,0 +1,267 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2006, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
#include "unicode/resbund.h"
|
||||
#include "cmemory.h"
|
||||
#include "ustrfmt.h"
|
||||
#include "locutil.h"
|
||||
#include "charstr.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uassert.h"
|
||||
#include "umutex.h"
|
||||
|
||||
// see LocaleUtility::getAvailableLocaleNames
|
||||
static U_NAMESPACE_QUALIFIER Hashtable * LocaleUtility_cache = NULL;
|
||||
|
||||
#define UNDERSCORE_CHAR ((UChar)0x005f)
|
||||
#define AT_SIGN_CHAR ((UChar)64)
|
||||
#define PERIOD_CHAR ((UChar)46)
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* Release all static memory held by Locale Utility.
|
||||
*/
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV service_cleanup(void) {
|
||||
if (LocaleUtility_cache) {
|
||||
delete LocaleUtility_cache;
|
||||
LocaleUtility_cache = NULL;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UnicodeString&
|
||||
LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
|
||||
{
|
||||
if (id == NULL) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
// Fix case only (no other changes) up to the first '@' or '.' or
|
||||
// end of string, whichever comes first. In 3.0 I changed this to
|
||||
// stop at first '@' or '.'. It used to run out to the end of
|
||||
// string. My fix makes the tests pass but is probably
|
||||
// structurally incorrect. See below. [alan 3.0]
|
||||
|
||||
// TODO: Doug, you might want to revise this...
|
||||
result = *id;
|
||||
int32_t i = 0;
|
||||
int32_t end = result.indexOf(AT_SIGN_CHAR);
|
||||
int32_t n = result.indexOf(PERIOD_CHAR);
|
||||
if (n >= 0 && n < end) {
|
||||
end = n;
|
||||
}
|
||||
if (end < 0) {
|
||||
end = result.length();
|
||||
}
|
||||
n = result.indexOf(UNDERSCORE_CHAR);
|
||||
if (n < 0) {
|
||||
n = end;
|
||||
}
|
||||
for (; i < n; ++i) {
|
||||
UChar c = result.charAt(i);
|
||||
if (c >= 0x0041 && c <= 0x005a) {
|
||||
c += 0x20;
|
||||
result.setCharAt(i, c);
|
||||
}
|
||||
}
|
||||
for (n = end; i < n; ++i) {
|
||||
UChar c = result.charAt(i);
|
||||
if (c >= 0x0061 && c <= 0x007a) {
|
||||
c -= 0x20;
|
||||
result.setCharAt(i, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
#if 0
|
||||
// This code does a proper full level 2 canonicalization of id.
|
||||
// It's nasty to go from UChar to char to char to UChar -- but
|
||||
// that's what you have to do to use the uloc_canonicalize
|
||||
// function on UnicodeStrings.
|
||||
|
||||
// I ended up doing the alternate fix (see above) not for
|
||||
// performance reasons, although performance will certainly be
|
||||
// better, but because doing a full level 2 canonicalization
|
||||
// causes some tests to fail. [alan 3.0]
|
||||
|
||||
// TODO: Doug, you might want to revisit this...
|
||||
result.setToBogus();
|
||||
if (id != 0) {
|
||||
int32_t buflen = id->length() + 8; // space for NUL
|
||||
char* buf = (char*) uprv_malloc(buflen);
|
||||
char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);
|
||||
if (buf != 0 && canon != 0) {
|
||||
U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
uloc_canonicalize(buf, canon, buflen, &ec);
|
||||
if (U_SUCCESS(ec)) {
|
||||
result = UnicodeString(canon);
|
||||
}
|
||||
}
|
||||
uprv_free(buf);
|
||||
uprv_free(canon);
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
Locale&
|
||||
LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
|
||||
{
|
||||
enum { BUFLEN = 128 }; // larger than ever needed
|
||||
|
||||
if (id.isBogus() || id.length() >= BUFLEN) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
/*
|
||||
* We need to convert from a UnicodeString to char * in order to
|
||||
* create a Locale.
|
||||
*
|
||||
* Problem: Locale ID strings may contain '@' which is a variant
|
||||
* character and cannot be handled by invariant-character conversion.
|
||||
*
|
||||
* Hack: Since ICU code can handle locale IDs with multiple encodings
|
||||
* of '@' (at least for EBCDIC; it's not known to be a problem for
|
||||
* ASCII-based systems),
|
||||
* we use regular invariant-character conversion for everything else
|
||||
* and manually convert U+0040 into a compiler-char-constant '@'.
|
||||
* While this compilation-time constant may not match the runtime
|
||||
* encoding of '@', it should be one of the encodings which ICU
|
||||
* recognizes.
|
||||
*
|
||||
* There should be only at most one '@' in a locale ID.
|
||||
*/
|
||||
char buffer[BUFLEN];
|
||||
int32_t prev, i;
|
||||
prev = 0;
|
||||
for(;;) {
|
||||
i = id.indexOf((UChar)0x40, prev);
|
||||
if(i < 0) {
|
||||
// no @ between prev and the rest of the string
|
||||
id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
|
||||
break; // done
|
||||
} else {
|
||||
// normal invariant-character conversion for text between @s
|
||||
id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
|
||||
// manually "convert" U+0040 at id[i] into '@' at buffer[i]
|
||||
buffer[i] = '@';
|
||||
prev = i + 1;
|
||||
}
|
||||
}
|
||||
result = Locale::createFromName(buffer);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
|
||||
{
|
||||
if (locale.isBogus()) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
result.append(UnicodeString(locale.getName(), -1, US_INV));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const Hashtable*
|
||||
LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
|
||||
{
|
||||
// LocaleUtility_cache is a hash-of-hashes. The top-level keys
|
||||
// are path strings ('bundleID') passed to
|
||||
// ures_openAvailableLocales. The top-level values are
|
||||
// second-level hashes. The second-level keys are result strings
|
||||
// from ures_openAvailableLocales. The second-level values are
|
||||
// garbage ((void*)1 or other random pointer).
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
Hashtable* cache;
|
||||
umtx_lock(NULL);
|
||||
cache = LocaleUtility_cache;
|
||||
umtx_unlock(NULL);
|
||||
|
||||
if (cache == NULL) {
|
||||
cache = new Hashtable(status);
|
||||
if (cache == NULL || U_FAILURE(status)) {
|
||||
return NULL; // catastrophic failure; e.g. out of memory
|
||||
}
|
||||
cache->setValueDeleter(uhash_deleteHashtable);
|
||||
Hashtable* h; // set this to final LocaleUtility_cache value
|
||||
umtx_lock(NULL);
|
||||
h = LocaleUtility_cache;
|
||||
if (h == NULL) {
|
||||
LocaleUtility_cache = h = cache;
|
||||
cache = NULL;
|
||||
ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
if(cache != NULL) {
|
||||
delete cache;
|
||||
}
|
||||
cache = h;
|
||||
}
|
||||
|
||||
U_ASSERT(cache != NULL);
|
||||
|
||||
Hashtable* htp;
|
||||
umtx_lock(NULL);
|
||||
htp = (Hashtable*) cache->get(bundleID);
|
||||
umtx_unlock(NULL);
|
||||
|
||||
if (htp == NULL) {
|
||||
htp = new Hashtable(status);
|
||||
if (htp && U_SUCCESS(status)) {
|
||||
CharString cbundleID(bundleID);
|
||||
const char* path = (const char*) cbundleID;
|
||||
if (*path == 0) path = NULL; // empty string => NULL
|
||||
UEnumeration *uenum = ures_openAvailableLocales(path, &status);
|
||||
for (;;) {
|
||||
const UChar* id = uenum_unext(uenum, NULL, &status);
|
||||
if (id == NULL) {
|
||||
break;
|
||||
}
|
||||
htp->put(UnicodeString(id), (void*)htp, status);
|
||||
}
|
||||
uenum_close(uenum);
|
||||
if (U_FAILURE(status)) {
|
||||
delete htp;
|
||||
return NULL;
|
||||
}
|
||||
umtx_lock(NULL);
|
||||
cache->put(bundleID, (void*)htp, status);
|
||||
umtx_unlock(NULL);
|
||||
}
|
||||
}
|
||||
return htp;
|
||||
}
|
||||
|
||||
UBool
|
||||
LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
|
||||
{
|
||||
return child.indexOf(root) == 0 &&
|
||||
(child.length() == root.length() ||
|
||||
child.charAt(root.length()) == UNDERSCORE_CHAR);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* !UCONFIG_NO_SERVICE */
|
||||
#endif
|
||||
|
||||
|
37
source/common/locutil.h
Normal file
37
source/common/locutil.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2002-2005, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#ifndef LOCUTIL_H
|
||||
#define LOCUTIL_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "hash.h"
|
||||
|
||||
#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
// temporary utility functions, till I know where to find them
|
||||
// in header so tests can also access them
|
||||
|
||||
class U_COMMON_API LocaleUtility {
|
||||
public:
|
||||
static UnicodeString& canonicalLocaleString(const UnicodeString* id, UnicodeString& result);
|
||||
static Locale& initLocaleFromName(const UnicodeString& id, Locale& result);
|
||||
static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result);
|
||||
static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID);
|
||||
static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
20
source/common/msvcres.h
Normal file
20
source/common/msvcres.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
//{{NO_DEPENDENCIES}}
|
||||
// Copyright (c) 2003-2005 International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// Used by common.rc and other .rc files.
|
||||
//Do not edit with Microsoft Developer Studio because it will modify this
|
||||
//header the wrong way. This is here to prevent Visual Studio .NET from
|
||||
//unnessarily building the resource files when it's not needed.
|
||||
//
|
||||
|
||||
/*
|
||||
These are defined before unicode/uversion.h in order to prevent
|
||||
STLPort's broken stddef.h from being used when rc.exe parses this file.
|
||||
*/
|
||||
#define _STLP_OUTERMOST_HEADER_ID 0
|
||||
#define _STLP_WINCE 1
|
||||
|
||||
#include "unicode/uversion.h"
|
||||
|
||||
#define ICU_WEBSITE "http://ibm.com/software/globalization/icu/"
|
18
source/common/mutex.cpp
Normal file
18
source/common/mutex.cpp
Normal file
|
@ -0,0 +1,18 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2008, International Business Machines Corporation. *
|
||||
* All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if UCONFIG_NO_SERVICE
|
||||
|
||||
/* If UCONFIG_NO_SERVICE, then there is no invocation of Mutex elsewhere in
|
||||
common, so add one here to force an export */
|
||||
#include "mutex.h"
|
||||
static Mutex *aMutex = 0;
|
||||
|
||||
/* UCONFIG_NO_SERVICE */
|
||||
#endif
|
77
source/common/mutex.h
Normal file
77
source/common/mutex.h
Normal file
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*/
|
||||
//----------------------------------------------------------------------------
|
||||
// File: mutex.h
|
||||
//
|
||||
// Lightweight C++ wrapper for umtx_ C mutex functions
|
||||
//
|
||||
// Author: Alan Liu 1/31/97
|
||||
// History:
|
||||
// 06/04/97 helena Updated setImplementation as per feedback from 5/21 drop.
|
||||
// 04/07/1999 srl refocused as a thin wrapper
|
||||
//
|
||||
//----------------------------------------------------------------------------
|
||||
#ifndef MUTEX_H
|
||||
#define MUTEX_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "umutex.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Code within that accesses shared static or global data should
|
||||
// should instantiate a Mutex object while doing so. You should make your own
|
||||
// private mutex where possible.
|
||||
|
||||
// For example:
|
||||
//
|
||||
// UMTX myMutex;
|
||||
//
|
||||
// void Function(int arg1, int arg2)
|
||||
// {
|
||||
// static Object* foo; // Shared read-write object
|
||||
// Mutex mutex(&myMutex); // or no args for the global lock
|
||||
// foo->Method();
|
||||
// // When 'mutex' goes out of scope and gets destroyed here, the lock is released
|
||||
// }
|
||||
//
|
||||
// Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
|
||||
// returning a Mutex. This is a common mistake which silently slips through the
|
||||
// compiler!!
|
||||
//
|
||||
|
||||
class U_COMMON_API Mutex : public UMemory {
|
||||
public:
|
||||
inline Mutex(UMTX *mutex = NULL);
|
||||
inline ~Mutex();
|
||||
|
||||
private:
|
||||
UMTX *fMutex;
|
||||
|
||||
Mutex(const Mutex &other); // forbid copying of this class
|
||||
Mutex &operator=(const Mutex &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
inline Mutex::Mutex(UMTX *mutex)
|
||||
: fMutex(mutex)
|
||||
{
|
||||
umtx_lock(fMutex);
|
||||
}
|
||||
|
||||
inline Mutex::~Mutex()
|
||||
{
|
||||
umtx_unlock(fMutex);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //_MUTEX_
|
||||
//eof
|
611
source/common/normlzr.cpp
Normal file
611
source/common/normlzr.cpp
Normal file
|
@ -0,0 +1,611 @@
|
|||
/*
|
||||
*************************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1996-2005, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/schriter.h"
|
||||
#include "unicode/uchriter.h"
|
||||
#include "unicode/uiter.h"
|
||||
#include "unicode/normlzr.h"
|
||||
#include "cmemory.h"
|
||||
#include "unormimp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Constructors and other boilerplate
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
|
||||
UObject(), fUMode(mode), fOptions(0),
|
||||
currentIndex(0), nextIndex(0),
|
||||
buffer(), bufferPos(0)
|
||||
{
|
||||
init(new StringCharacterIterator(str));
|
||||
}
|
||||
|
||||
Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
|
||||
UObject(), fUMode(mode), fOptions(0),
|
||||
currentIndex(0), nextIndex(0),
|
||||
buffer(), bufferPos(0)
|
||||
{
|
||||
init(new UCharCharacterIterator(str, length));
|
||||
}
|
||||
|
||||
Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
|
||||
UObject(), fUMode(mode), fOptions(0),
|
||||
currentIndex(0), nextIndex(0),
|
||||
buffer(), bufferPos(0)
|
||||
{
|
||||
init(iter.clone());
|
||||
}
|
||||
|
||||
Normalizer::Normalizer(const Normalizer ©) :
|
||||
UObject(copy), fUMode(copy.fUMode), fOptions(copy.fOptions),
|
||||
currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
|
||||
buffer(copy.buffer), bufferPos(copy.bufferPos)
|
||||
{
|
||||
init(((CharacterIterator *)(copy.text->context))->clone());
|
||||
}
|
||||
|
||||
static const UChar _NUL=0;
|
||||
|
||||
void
|
||||
Normalizer::init(CharacterIterator *iter) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
|
||||
text=(UCharIterator *)uprv_malloc(sizeof(UCharIterator));
|
||||
if(text!=NULL) {
|
||||
if(unorm_haveData(&errorCode)) {
|
||||
uiter_setCharacterIterator(text, iter);
|
||||
} else {
|
||||
delete iter;
|
||||
uiter_setCharacterIterator(text, new UCharCharacterIterator(&_NUL, 0));
|
||||
}
|
||||
} else {
|
||||
delete iter;
|
||||
}
|
||||
}
|
||||
|
||||
Normalizer::~Normalizer()
|
||||
{
|
||||
if(text!=NULL) {
|
||||
delete (CharacterIterator *)text->context;
|
||||
uprv_free(text);
|
||||
}
|
||||
}
|
||||
|
||||
Normalizer*
|
||||
Normalizer::clone() const
|
||||
{
|
||||
if(this!=0) {
|
||||
return new Normalizer(*this);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator.
|
||||
*/
|
||||
int32_t Normalizer::hashCode() const
|
||||
{
|
||||
return ((CharacterIterator *)(text->context))->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
|
||||
}
|
||||
|
||||
UBool Normalizer::operator==(const Normalizer& that) const
|
||||
{
|
||||
return
|
||||
this==&that ||
|
||||
fUMode==that.fUMode &&
|
||||
fOptions==that.fOptions &&
|
||||
*((CharacterIterator *)(text->context))==*((CharacterIterator *)(that.text->context)) &&
|
||||
buffer==that.buffer &&
|
||||
bufferPos==that.bufferPos &&
|
||||
nextIndex==that.nextIndex;
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Static utility methods
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
void U_EXPORT2
|
||||
Normalizer::normalize(const UnicodeString& source,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status) {
|
||||
if(source.isBogus() || U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
if(U_SUCCESS(status)) {
|
||||
status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
} else {
|
||||
UnicodeString localDest;
|
||||
UnicodeString *dest;
|
||||
|
||||
if(&source!=&result) {
|
||||
dest=&result;
|
||||
} else {
|
||||
// the source and result strings are the same object, use a temporary one
|
||||
dest=&localDest;
|
||||
}
|
||||
|
||||
UChar *buffer=dest->getBuffer(source.length());
|
||||
int32_t length=unorm_internalNormalize(buffer, dest->getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
mode, options,
|
||||
&status);
|
||||
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
|
||||
if(status==U_BUFFER_OVERFLOW_ERROR) {
|
||||
status=U_ZERO_ERROR;
|
||||
buffer=dest->getBuffer(length);
|
||||
length=unorm_internalNormalize(buffer, dest->getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
mode, options,
|
||||
&status);
|
||||
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
|
||||
}
|
||||
|
||||
if(dest==&localDest) {
|
||||
result=*dest;
|
||||
}
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void U_EXPORT2
|
||||
Normalizer::compose(const UnicodeString& source,
|
||||
UBool compat, int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status) {
|
||||
if(source.isBogus() || U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
if(U_SUCCESS(status)) {
|
||||
status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
} else {
|
||||
UnicodeString localDest;
|
||||
UnicodeString *dest;
|
||||
|
||||
if(&source!=&result) {
|
||||
dest=&result;
|
||||
} else {
|
||||
// the source and result strings are the same object, use a temporary one
|
||||
dest=&localDest;
|
||||
}
|
||||
|
||||
UChar *buffer=dest->getBuffer(source.length());
|
||||
int32_t length=unorm_compose(buffer, dest->getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
compat, options,
|
||||
&status);
|
||||
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
|
||||
if(status==U_BUFFER_OVERFLOW_ERROR) {
|
||||
status=U_ZERO_ERROR;
|
||||
buffer=dest->getBuffer(length);
|
||||
length=unorm_compose(buffer, dest->getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
compat, options,
|
||||
&status);
|
||||
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
|
||||
}
|
||||
|
||||
if(dest==&localDest) {
|
||||
result=*dest;
|
||||
}
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void U_EXPORT2
|
||||
Normalizer::decompose(const UnicodeString& source,
|
||||
UBool compat, int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status) {
|
||||
if(source.isBogus() || U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
if(U_SUCCESS(status)) {
|
||||
status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
} else {
|
||||
UnicodeString localDest;
|
||||
UnicodeString *dest;
|
||||
|
||||
if(&source!=&result) {
|
||||
dest=&result;
|
||||
} else {
|
||||
// the source and result strings are the same object, use a temporary one
|
||||
dest=&localDest;
|
||||
}
|
||||
|
||||
UChar *buffer=dest->getBuffer(source.length());
|
||||
int32_t length=unorm_decompose(buffer, dest->getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
compat, options,
|
||||
&status);
|
||||
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
|
||||
if(status==U_BUFFER_OVERFLOW_ERROR) {
|
||||
status=U_ZERO_ERROR;
|
||||
buffer=dest->getBuffer(length);
|
||||
length=unorm_decompose(buffer, dest->getCapacity(),
|
||||
source.getBuffer(), source.length(),
|
||||
compat, options,
|
||||
&status);
|
||||
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
|
||||
}
|
||||
|
||||
if(dest==&localDest) {
|
||||
result=*dest;
|
||||
}
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeString & U_EXPORT2
|
||||
Normalizer::concatenate(UnicodeString &left, UnicodeString &right,
|
||||
UnicodeString &result,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UErrorCode &errorCode) {
|
||||
if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
|
||||
result.setToBogus();
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
} else {
|
||||
UnicodeString localDest;
|
||||
UnicodeString *dest;
|
||||
|
||||
if(&left!=&result && &right!=&result) {
|
||||
dest=&result;
|
||||
} else {
|
||||
// the source and result strings are the same object, use a temporary one
|
||||
dest=&localDest;
|
||||
}
|
||||
|
||||
UChar *buffer=dest->getBuffer(left.length()+right.length());
|
||||
int32_t length=unorm_concatenate(left.getBuffer(), left.length(),
|
||||
right.getBuffer(), right.length(),
|
||||
buffer, dest->getCapacity(),
|
||||
mode, options,
|
||||
&errorCode);
|
||||
dest->releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
buffer=dest->getBuffer(length);
|
||||
int32_t length=unorm_concatenate(left.getBuffer(), left.length(),
|
||||
right.getBuffer(), right.length(),
|
||||
buffer, dest->getCapacity(),
|
||||
mode, options,
|
||||
&errorCode);
|
||||
dest->releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
}
|
||||
|
||||
if(dest==&localDest) {
|
||||
result=*dest;
|
||||
}
|
||||
if(U_FAILURE(errorCode)) {
|
||||
result.setToBogus();
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Iteration API
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Return the current character in the normalized text.
|
||||
*/
|
||||
UChar32 Normalizer::current() {
|
||||
if(bufferPos<buffer.length() || nextNormalize()) {
|
||||
return buffer.char32At(bufferPos);
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the next character in the normalized text and advance
|
||||
* the iteration position by one. If the end
|
||||
* of the text has already been reached, {@link #DONE} is returned.
|
||||
*/
|
||||
UChar32 Normalizer::next() {
|
||||
if(bufferPos<buffer.length() || nextNormalize()) {
|
||||
UChar32 c=buffer.char32At(bufferPos);
|
||||
bufferPos+=UTF_CHAR_LENGTH(c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the previous character in the normalized text and decrement
|
||||
* the iteration position by one. If the beginning
|
||||
* of the text has already been reached, {@link #DONE} is returned.
|
||||
*/
|
||||
UChar32 Normalizer::previous() {
|
||||
if(bufferPos>0 || previousNormalize()) {
|
||||
UChar32 c=buffer.char32At(bufferPos-1);
|
||||
bufferPos-=UTF_CHAR_LENGTH(c);
|
||||
return c;
|
||||
} else {
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
void Normalizer::reset() {
|
||||
currentIndex=nextIndex=text->move(text, 0, UITER_START);
|
||||
clearBuffer();
|
||||
}
|
||||
|
||||
void
|
||||
Normalizer::setIndexOnly(int32_t index) {
|
||||
currentIndex=nextIndex=text->move(text, index, UITER_ZERO); // validates index
|
||||
clearBuffer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the first character in the normalized text-> This resets
|
||||
* the <tt>Normalizer's</tt> position to the beginning of the text->
|
||||
*/
|
||||
UChar32 Normalizer::first() {
|
||||
reset();
|
||||
return next();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the last character in the normalized text-> This resets
|
||||
* the <tt>Normalizer's</tt> position to be just before the
|
||||
* the input text corresponding to that normalized character.
|
||||
*/
|
||||
UChar32 Normalizer::last() {
|
||||
currentIndex=nextIndex=text->move(text, 0, UITER_LIMIT);
|
||||
clearBuffer();
|
||||
return previous();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the current iteration position in the input text that is
|
||||
* being normalized. This method is useful in applications such as
|
||||
* searching, where you need to be able to determine the position in
|
||||
* the input text that corresponds to a given normalized output character.
|
||||
* <p>
|
||||
* <b>Note:</b> This method sets the position in the <em>input</em>, while
|
||||
* {@link #next} and {@link #previous} iterate through characters in the
|
||||
* <em>output</em>. This means that there is not necessarily a one-to-one
|
||||
* correspondence between characters returned by <tt>next</tt> and
|
||||
* <tt>previous</tt> and the indices passed to and returned from
|
||||
* <tt>setIndex</tt> and {@link #getIndex}.
|
||||
*
|
||||
*/
|
||||
int32_t Normalizer::getIndex() const {
|
||||
if(bufferPos<buffer.length()) {
|
||||
return currentIndex;
|
||||
} else {
|
||||
return nextIndex;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the index of the start of the input text-> This is the begin index
|
||||
* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
|
||||
* over which this <tt>Normalizer</tt> is iterating
|
||||
*/
|
||||
int32_t Normalizer::startIndex() const {
|
||||
return text->getIndex(text, UITER_START);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the index of the end of the input text-> This is the end index
|
||||
* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
|
||||
* over which this <tt>Normalizer</tt> is iterating
|
||||
*/
|
||||
int32_t Normalizer::endIndex() const {
|
||||
return text->getIndex(text, UITER_LIMIT);
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Property access methods
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
Normalizer::setMode(UNormalizationMode newMode)
|
||||
{
|
||||
fUMode = newMode;
|
||||
}
|
||||
|
||||
UNormalizationMode
|
||||
Normalizer::getUMode() const
|
||||
{
|
||||
return fUMode;
|
||||
}
|
||||
|
||||
void
|
||||
Normalizer::setOption(int32_t option,
|
||||
UBool value)
|
||||
{
|
||||
if (value) {
|
||||
fOptions |= option;
|
||||
} else {
|
||||
fOptions &= (~option);
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
Normalizer::getOption(int32_t option) const
|
||||
{
|
||||
return (fOptions & option) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the input text over which this <tt>Normalizer</tt> will iterate.
|
||||
* The iteration position is set to the beginning of the input text->
|
||||
*/
|
||||
void
|
||||
Normalizer::setText(const UnicodeString& newText,
|
||||
UErrorCode &status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
CharacterIterator *newIter = new StringCharacterIterator(newText);
|
||||
if (newIter == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
delete (CharacterIterator *)(text->context);
|
||||
text->context = newIter;
|
||||
reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the input text over which this <tt>Normalizer</tt> will iterate.
|
||||
* The iteration position is set to the beginning of the string.
|
||||
*/
|
||||
void
|
||||
Normalizer::setText(const CharacterIterator& newText,
|
||||
UErrorCode &status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
CharacterIterator *newIter = newText.clone();
|
||||
if (newIter == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
delete (CharacterIterator *)(text->context);
|
||||
text->context = newIter;
|
||||
reset();
|
||||
}
|
||||
|
||||
void
|
||||
Normalizer::setText(const UChar* newText,
|
||||
int32_t length,
|
||||
UErrorCode &status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
|
||||
if (newIter == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
delete (CharacterIterator *)(text->context);
|
||||
text->context = newIter;
|
||||
reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the text under iteration into the UnicodeString referred to by "result".
|
||||
* @param result Receives a copy of the text under iteration.
|
||||
*/
|
||||
void
|
||||
Normalizer::getText(UnicodeString& result)
|
||||
{
|
||||
((CharacterIterator *)(text->context))->getText(result);
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Private utility methods
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
void Normalizer::clearBuffer() {
|
||||
buffer.remove();
|
||||
bufferPos=0;
|
||||
}
|
||||
|
||||
UBool
|
||||
Normalizer::nextNormalize() {
|
||||
UChar *p;
|
||||
int32_t length;
|
||||
UErrorCode errorCode;
|
||||
|
||||
clearBuffer();
|
||||
currentIndex=nextIndex;
|
||||
text->move(text, nextIndex, UITER_ZERO);
|
||||
if(!text->hasNext(text)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
p=buffer.getBuffer(-1);
|
||||
length=unorm_next(text, p, buffer.getCapacity(),
|
||||
fUMode, fOptions,
|
||||
TRUE, 0,
|
||||
&errorCode);
|
||||
buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
text->move(text, nextIndex, UITER_ZERO);
|
||||
p=buffer.getBuffer(length);
|
||||
length=unorm_next(text, p, buffer.getCapacity(),
|
||||
fUMode, fOptions,
|
||||
TRUE, 0,
|
||||
&errorCode);
|
||||
buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
}
|
||||
|
||||
nextIndex=text->getIndex(text, UITER_CURRENT);
|
||||
return U_SUCCESS(errorCode) && !buffer.isEmpty();
|
||||
}
|
||||
|
||||
UBool
|
||||
Normalizer::previousNormalize() {
|
||||
UChar *p;
|
||||
int32_t length;
|
||||
UErrorCode errorCode;
|
||||
|
||||
clearBuffer();
|
||||
nextIndex=currentIndex;
|
||||
text->move(text, currentIndex, UITER_ZERO);
|
||||
if(!text->hasPrevious(text)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
p=buffer.getBuffer(-1);
|
||||
length=unorm_previous(text, p, buffer.getCapacity(),
|
||||
fUMode, fOptions,
|
||||
TRUE, 0,
|
||||
&errorCode);
|
||||
buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
text->move(text, currentIndex, UITER_ZERO);
|
||||
p=buffer.getBuffer(length);
|
||||
length=unorm_previous(text, p, buffer.getCapacity(),
|
||||
fUMode, fOptions,
|
||||
TRUE, 0,
|
||||
&errorCode);
|
||||
buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
|
||||
}
|
||||
|
||||
bufferPos=buffer.length();
|
||||
currentIndex=text->getIndex(text, UITER_CURRENT);
|
||||
return U_SUCCESS(errorCode) && !buffer.isEmpty();
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
21
source/common/parsepos.cpp
Normal file
21
source/common/parsepos.cpp
Normal file
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2003-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/parsepos.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ParsePosition)
|
||||
|
||||
ParsePosition::~ParsePosition() {}
|
||||
|
||||
ParsePosition *
|
||||
ParsePosition::clone() const {
|
||||
return new ParsePosition(*this);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
758
source/common/propname.cpp
Normal file
758
source/common/propname.cpp
Normal file
|
@ -0,0 +1,758 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
* Created: October 30 2002
|
||||
* Since: ICU 2.4
|
||||
**********************************************************************
|
||||
*/
|
||||
#include "propname.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "umutex.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uarrsort.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* Get the next non-ignorable ASCII character from a property name
|
||||
* and lowercases it.
|
||||
* @return ((advance count for the name)<<8)|character
|
||||
*/
|
||||
static inline int32_t
|
||||
getASCIIPropertyNameChar(const char *name) {
|
||||
int32_t i;
|
||||
char c;
|
||||
|
||||
/* Ignore delimiters '-', '_', and ASCII White_Space */
|
||||
for(i=0;
|
||||
(c=name[i++])==0x2d || c==0x5f ||
|
||||
c==0x20 || (0x09<=c && c<=0x0d);
|
||||
) {}
|
||||
|
||||
if(c!=0) {
|
||||
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
|
||||
} else {
|
||||
return i<<8;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next non-ignorable EBCDIC character from a property name
|
||||
* and lowercases it.
|
||||
* @return ((advance count for the name)<<8)|character
|
||||
*/
|
||||
static inline int32_t
|
||||
getEBCDICPropertyNameChar(const char *name) {
|
||||
int32_t i;
|
||||
char c;
|
||||
|
||||
/* Ignore delimiters '-', '_', and EBCDIC White_Space */
|
||||
for(i=0;
|
||||
(c=name[i++])==0x60 || c==0x6d ||
|
||||
c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
|
||||
) {}
|
||||
|
||||
if(c!=0) {
|
||||
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
|
||||
} else {
|
||||
return i<<8;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unicode property names and property value names are compared "loosely".
|
||||
*
|
||||
* UCD.html 4.0.1 says:
|
||||
* For all property names, property value names, and for property values for
|
||||
* Enumerated, Binary, or Catalog properties, use the following
|
||||
* loose matching rule:
|
||||
*
|
||||
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
|
||||
*
|
||||
* This function does just that, for (char *) name strings.
|
||||
* It is almost identical to ucnv_compareNames() but also ignores
|
||||
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
|
||||
int32_t rc, r1, r2;
|
||||
|
||||
for(;;) {
|
||||
r1=getASCIIPropertyNameChar(name1);
|
||||
r2=getASCIIPropertyNameChar(name2);
|
||||
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if(((r1|r2)&0xff)==0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Compare the lowercased characters */
|
||||
if(r1!=r2) {
|
||||
rc=(r1&0xff)-(r2&0xff);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
name1+=r1>>8;
|
||||
name2+=r2>>8;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
|
||||
int32_t rc, r1, r2;
|
||||
|
||||
for(;;) {
|
||||
r1=getEBCDICPropertyNameChar(name1);
|
||||
r2=getEBCDICPropertyNameChar(name2);
|
||||
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if(((r1|r2)&0xff)==0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Compare the lowercased characters */
|
||||
if(r1!=r2) {
|
||||
rc=(r1&0xff)-(r2&0xff);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
name1+=r1>>8;
|
||||
name2+=r2>>8;
|
||||
}
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// PropertyAliases implementation
|
||||
|
||||
const char*
|
||||
PropertyAliases::chooseNameInGroup(Offset offset,
|
||||
UPropertyNameChoice choice) const {
|
||||
int32_t c = choice;
|
||||
if (!offset || c < 0) {
|
||||
return NULL;
|
||||
}
|
||||
const Offset* p = (const Offset*) getPointer(offset);
|
||||
while (c-- > 0) {
|
||||
if (*p++ < 0) return NULL;
|
||||
}
|
||||
Offset a = *p;
|
||||
if (a < 0) a = -a;
|
||||
return (const char*) getPointerNull(a);
|
||||
}
|
||||
|
||||
const ValueMap*
|
||||
PropertyAliases::getValueMap(EnumValue prop) const {
|
||||
NonContiguousEnumToOffset* e2o = (NonContiguousEnumToOffset*) getPointer(enumToValue_offset);
|
||||
Offset a = e2o->getOffset(prop);
|
||||
return (const ValueMap*) (a ? getPointerNull(a) : NULL);
|
||||
}
|
||||
|
||||
inline const char*
|
||||
PropertyAliases::getPropertyName(EnumValue prop,
|
||||
UPropertyNameChoice choice) const {
|
||||
NonContiguousEnumToOffset* e2n = (NonContiguousEnumToOffset*) getPointer(enumToName_offset);
|
||||
return chooseNameInGroup(e2n->getOffset(prop), choice);
|
||||
}
|
||||
|
||||
inline EnumValue
|
||||
PropertyAliases::getPropertyEnum(const char* alias) const {
|
||||
NameToEnum* n2e = (NameToEnum*) getPointer(nameToEnum_offset);
|
||||
return n2e->getEnum(alias, *this);
|
||||
}
|
||||
|
||||
inline const char*
|
||||
PropertyAliases::getPropertyValueName(EnumValue prop,
|
||||
EnumValue value,
|
||||
UPropertyNameChoice choice) const {
|
||||
const ValueMap* vm = getValueMap(prop);
|
||||
if (!vm) return NULL;
|
||||
Offset a;
|
||||
if (vm->enumToName_offset) {
|
||||
a = ((EnumToOffset*) getPointer(vm->enumToName_offset))->
|
||||
getOffset(value);
|
||||
} else {
|
||||
a = ((NonContiguousEnumToOffset*) getPointer(vm->ncEnumToName_offset))->
|
||||
getOffset(value);
|
||||
}
|
||||
return chooseNameInGroup(a, choice);
|
||||
}
|
||||
|
||||
inline EnumValue
|
||||
PropertyAliases::getPropertyValueEnum(EnumValue prop,
|
||||
const char* alias) const {
|
||||
const ValueMap* vm = getValueMap(prop);
|
||||
if (!vm) return UCHAR_INVALID_CODE;
|
||||
NameToEnum* n2e = (NameToEnum*) getPointer(vm->nameToEnum_offset);
|
||||
return n2e->getEnum(alias, *this);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
U_NAMESPACE_USE
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// UDataMemory structures
|
||||
|
||||
static const PropertyAliases* PNAME = NULL;
|
||||
static UDataMemory* UDATA = NULL;
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// UDataMemory loading/unloading
|
||||
|
||||
/**
|
||||
* udata callback to verify the zone data.
|
||||
*/
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV
|
||||
isPNameAcceptable(void* /*context*/,
|
||||
const char* /*type*/, const char* /*name*/,
|
||||
const UDataInfo* info) {
|
||||
return
|
||||
info->size >= sizeof(UDataInfo) &&
|
||||
info->isBigEndian == U_IS_BIG_ENDIAN &&
|
||||
info->charsetFamily == U_CHARSET_FAMILY &&
|
||||
info->dataFormat[0] == PNAME_SIG_0 &&
|
||||
info->dataFormat[1] == PNAME_SIG_1 &&
|
||||
info->dataFormat[2] == PNAME_SIG_2 &&
|
||||
info->dataFormat[3] == PNAME_SIG_3 &&
|
||||
info->formatVersion[0] == PNAME_FORMAT_VERSION;
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV pname_cleanup(void) {
|
||||
if (UDATA) {
|
||||
udata_close(UDATA);
|
||||
UDATA = NULL;
|
||||
}
|
||||
PNAME = NULL;
|
||||
return TRUE;
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
/**
|
||||
* Load the property names data. Caller should check that data is
|
||||
* not loaded BEFORE calling this function. Returns TRUE if the load
|
||||
* succeeds.
|
||||
*/
|
||||
static UBool _load() {
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
UDataMemory* data =
|
||||
udata_openChoice(0, PNAME_DATA_TYPE, PNAME_DATA_NAME,
|
||||
isPNameAcceptable, 0, &ec);
|
||||
if (U_SUCCESS(ec)) {
|
||||
umtx_lock(NULL);
|
||||
if (UDATA == NULL) {
|
||||
UDATA = data;
|
||||
PNAME = (const PropertyAliases*) udata_getMemory(UDATA);
|
||||
ucln_common_registerCleanup(UCLN_COMMON_PNAME, pname_cleanup);
|
||||
data = NULL;
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
}
|
||||
if (data) {
|
||||
udata_close(data);
|
||||
}
|
||||
return PNAME!=NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inline function that expands to code that does a lazy load of the
|
||||
* property names data. If the data is already loaded, avoids an
|
||||
* unnecessary function call. If the data is not loaded, call _load()
|
||||
* to load it, and return TRUE if the load succeeds.
|
||||
*/
|
||||
static inline UBool load() {
|
||||
UBool f;
|
||||
UMTX_CHECK(NULL, (PNAME!=NULL), f);
|
||||
return f || _load();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Public API implementation
|
||||
|
||||
// The C API is just a thin wrapper. Each function obtains a pointer
|
||||
// to the singleton PropertyAliases, and calls the appropriate method
|
||||
// on it. If it cannot obtain a pointer, because valid data is not
|
||||
// available, then it returns NULL or UCHAR_INVALID_CODE.
|
||||
|
||||
U_CAPI const char* U_EXPORT2
|
||||
u_getPropertyName(UProperty property,
|
||||
UPropertyNameChoice nameChoice) {
|
||||
return load() ? PNAME->getPropertyName(property, nameChoice)
|
||||
: NULL;
|
||||
}
|
||||
|
||||
U_CAPI UProperty U_EXPORT2
|
||||
u_getPropertyEnum(const char* alias) {
|
||||
UProperty p = load() ? (UProperty) PNAME->getPropertyEnum(alias)
|
||||
: UCHAR_INVALID_CODE;
|
||||
return p;
|
||||
}
|
||||
|
||||
U_CAPI const char* U_EXPORT2
|
||||
u_getPropertyValueName(UProperty property,
|
||||
int32_t value,
|
||||
UPropertyNameChoice nameChoice) {
|
||||
return load() ? PNAME->getPropertyValueName(property, value, nameChoice)
|
||||
: NULL;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_getPropertyValueEnum(UProperty property,
|
||||
const char* alias) {
|
||||
return load() ? PNAME->getPropertyValueEnum(property, alias)
|
||||
: (int32_t)UCHAR_INVALID_CODE;
|
||||
}
|
||||
|
||||
/* data swapping ------------------------------------------------------------ */
|
||||
|
||||
/*
|
||||
* Sub-structure-swappers use the temp array (which is as large as the
|
||||
* actual data) for intermediate storage,
|
||||
* as well as to indicate if a particular structure has been swapped already.
|
||||
* The temp array is initially reset to all 0.
|
||||
* pos is the byte offset of the sub-structure in the inBytes/outBytes/temp arrays.
|
||||
*/
|
||||
|
||||
int32_t
|
||||
EnumToOffset::swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode) {
|
||||
const EnumToOffset *inMap;
|
||||
EnumToOffset *outMap, *tempMap;
|
||||
int32_t size;
|
||||
|
||||
tempMap=(EnumToOffset *)(temp+pos);
|
||||
if(tempMap->enumStart!=0 || tempMap->enumLimit!=0) {
|
||||
/* this map was swapped already */
|
||||
size=tempMap->getSize();
|
||||
return size;
|
||||
}
|
||||
|
||||
inMap=(const EnumToOffset *)(inBytes+pos);
|
||||
outMap=(EnumToOffset *)(outBytes+pos);
|
||||
|
||||
tempMap->enumStart=udata_readInt32(ds, inMap->enumStart);
|
||||
tempMap->enumLimit=udata_readInt32(ds, inMap->enumLimit);
|
||||
size=tempMap->getSize();
|
||||
|
||||
if(length>=0) {
|
||||
if(length<(pos+size)) {
|
||||
if(length<(int32_t)sizeof(PropertyAliases)) {
|
||||
udata_printError(ds, "upname_swap(EnumToOffset): too few bytes (%d after header)\n"
|
||||
" for pnames.icu EnumToOffset{%d..%d} at %d\n",
|
||||
length, tempMap->enumStart, tempMap->enumLimit, pos);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* swap enumStart and enumLimit */
|
||||
ds->swapArray32(ds, inMap, 2*sizeof(EnumValue), outMap, pErrorCode);
|
||||
|
||||
/* swap _offsetArray[] */
|
||||
ds->swapArray16(ds, inMap->getOffsetArray(), (tempMap->enumLimit-tempMap->enumStart)*sizeof(Offset),
|
||||
outMap->getOffsetArray(), pErrorCode);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
int32_t
|
||||
NonContiguousEnumToOffset::swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode) {
|
||||
const NonContiguousEnumToOffset *inMap;
|
||||
NonContiguousEnumToOffset *outMap, *tempMap;
|
||||
int32_t size;
|
||||
|
||||
tempMap=(NonContiguousEnumToOffset *)(temp+pos);
|
||||
if(tempMap->count!=0) {
|
||||
/* this map was swapped already */
|
||||
size=tempMap->getSize();
|
||||
return size;
|
||||
}
|
||||
|
||||
inMap=(const NonContiguousEnumToOffset *)(inBytes+pos);
|
||||
outMap=(NonContiguousEnumToOffset *)(outBytes+pos);
|
||||
|
||||
tempMap->count=udata_readInt32(ds, inMap->count);
|
||||
size=tempMap->getSize();
|
||||
|
||||
if(length>=0) {
|
||||
if(length<(pos+size)) {
|
||||
if(length<(int32_t)sizeof(PropertyAliases)) {
|
||||
udata_printError(ds, "upname_swap(NonContiguousEnumToOffset): too few bytes (%d after header)\n"
|
||||
" for pnames.icu NonContiguousEnumToOffset[%d] at %d\n",
|
||||
length, tempMap->count, pos);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* swap count and _enumArray[] */
|
||||
length=(1+tempMap->count)*sizeof(EnumValue);
|
||||
ds->swapArray32(ds, inMap, length,
|
||||
outMap, pErrorCode);
|
||||
|
||||
/* swap _offsetArray[] */
|
||||
pos+=length;
|
||||
ds->swapArray16(ds, inBytes+pos, tempMap->count*sizeof(Offset),
|
||||
outBytes+pos, pErrorCode);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
struct NameAndIndex {
|
||||
Offset name, index;
|
||||
};
|
||||
|
||||
U_CDECL_BEGIN
|
||||
typedef int32_t U_CALLCONV PropNameCompareFn(const char *name1, const char *name2);
|
||||
|
||||
struct CompareContext {
|
||||
const char *chars;
|
||||
PropNameCompareFn *propCompare;
|
||||
};
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
upname_compareRows(const void *context, const void *left, const void *right) {
|
||||
CompareContext *cmp=(CompareContext *)context;
|
||||
return cmp->propCompare(cmp->chars+((const NameAndIndex *)left)->name,
|
||||
cmp->chars+((const NameAndIndex *)right)->name);
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
int32_t
|
||||
NameToEnum::swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode) {
|
||||
const NameToEnum *inMap;
|
||||
NameToEnum *outMap, *tempMap;
|
||||
|
||||
const EnumValue *inEnumArray;
|
||||
EnumValue *outEnumArray;
|
||||
|
||||
const Offset *inNameArray;
|
||||
Offset *outNameArray;
|
||||
|
||||
NameAndIndex *sortArray;
|
||||
CompareContext cmp;
|
||||
|
||||
int32_t i, size, oldIndex;
|
||||
|
||||
tempMap=(NameToEnum *)(temp+pos);
|
||||
if(tempMap->count!=0) {
|
||||
/* this map was swapped already */
|
||||
size=tempMap->getSize();
|
||||
return size;
|
||||
}
|
||||
|
||||
inMap=(const NameToEnum *)(inBytes+pos);
|
||||
outMap=(NameToEnum *)(outBytes+pos);
|
||||
|
||||
tempMap->count=udata_readInt32(ds, inMap->count);
|
||||
size=tempMap->getSize();
|
||||
|
||||
if(length>=0) {
|
||||
if(length<(pos+size)) {
|
||||
if(length<(int32_t)sizeof(PropertyAliases)) {
|
||||
udata_printError(ds, "upname_swap(NameToEnum): too few bytes (%d after header)\n"
|
||||
" for pnames.icu NameToEnum[%d] at %d\n",
|
||||
length, tempMap->count, pos);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* swap count */
|
||||
ds->swapArray32(ds, inMap, 4, outMap, pErrorCode);
|
||||
|
||||
inEnumArray=inMap->getEnumArray();
|
||||
outEnumArray=outMap->getEnumArray();
|
||||
|
||||
inNameArray=(const Offset *)(inEnumArray+tempMap->count);
|
||||
outNameArray=(Offset *)(outEnumArray+tempMap->count);
|
||||
|
||||
if(ds->inCharset==ds->outCharset) {
|
||||
/* no need to sort, just swap the enum/name arrays */
|
||||
ds->swapArray32(ds, inEnumArray, tempMap->count*4, outEnumArray, pErrorCode);
|
||||
ds->swapArray16(ds, inNameArray, tempMap->count*2, outNameArray, pErrorCode);
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* The name and enum arrays are sorted by names and must be resorted
|
||||
* if inCharset!=outCharset.
|
||||
* We use the corresponding part of the temp array to sort an array
|
||||
* of pairs of name offsets and sorting indexes.
|
||||
* Then the sorting indexes are used to permutate-swap the name and enum arrays.
|
||||
*
|
||||
* The outBytes must already contain the swapped strings.
|
||||
*/
|
||||
sortArray=(NameAndIndex *)tempMap->getEnumArray();
|
||||
for(i=0; i<tempMap->count; ++i) {
|
||||
sortArray[i].name=udata_readInt16(ds, inNameArray[i]);
|
||||
sortArray[i].index=(Offset)i;
|
||||
}
|
||||
|
||||
/*
|
||||
* use a stable sort to avoid shuffling of equal strings,
|
||||
* which makes testing harder
|
||||
*/
|
||||
cmp.chars=(const char *)outBytes;
|
||||
if (ds->outCharset==U_ASCII_FAMILY) {
|
||||
cmp.propCompare=uprv_compareASCIIPropertyNames;
|
||||
}
|
||||
else {
|
||||
cmp.propCompare=uprv_compareEBCDICPropertyNames;
|
||||
}
|
||||
uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex),
|
||||
upname_compareRows, &cmp,
|
||||
TRUE, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed\n",
|
||||
tempMap->count);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* copy/swap/permutate _enumArray[] and _nameArray[] */
|
||||
if(inEnumArray!=outEnumArray) {
|
||||
for(i=0; i<tempMap->count; ++i) {
|
||||
oldIndex=sortArray[i].index;
|
||||
ds->swapArray32(ds, inEnumArray+oldIndex, 4, outEnumArray+i, pErrorCode);
|
||||
ds->swapArray16(ds, inNameArray+oldIndex, 2, outNameArray+i, pErrorCode);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* in-place swapping: need to permutate into a temporary array
|
||||
* and then copy back to not destroy the data
|
||||
*/
|
||||
EnumValue *tempEnumArray;
|
||||
Offset *oldIndexes;
|
||||
|
||||
/* write name offsets directly from sortArray */
|
||||
for(i=0; i<tempMap->count; ++i) {
|
||||
ds->writeUInt16((uint16_t *)outNameArray+i, (uint16_t)sortArray[i].name);
|
||||
}
|
||||
|
||||
/*
|
||||
* compress the oldIndexes into a separate array to make space for tempEnumArray
|
||||
* the tempMap _nameArray becomes oldIndexes[], getting the index
|
||||
* values from the 2D sortArray[],
|
||||
* while sortArray=tempMap _enumArray[] becomes tempEnumArray[]
|
||||
* this saves us allocating more memory
|
||||
*
|
||||
* it works because sizeof(NameAndIndex)<=sizeof(EnumValue)
|
||||
* and because the nameArray[] can be used for oldIndexes[]
|
||||
*/
|
||||
tempEnumArray=(EnumValue *)sortArray;
|
||||
oldIndexes=(Offset *)(sortArray+tempMap->count);
|
||||
|
||||
/* copy sortArray[].index values into oldIndexes[] */
|
||||
for(i=0; i<tempMap->count; ++i) {
|
||||
oldIndexes[i]=sortArray[i].index;
|
||||
}
|
||||
|
||||
/* permutate inEnumArray[] into tempEnumArray[] */
|
||||
for(i=0; i<tempMap->count; ++i) {
|
||||
ds->swapArray32(ds, inEnumArray+oldIndexes[i], 4, tempEnumArray+i, pErrorCode);
|
||||
}
|
||||
|
||||
/* copy tempEnumArray[] to outEnumArray[] */
|
||||
uprv_memcpy(outEnumArray, tempEnumArray, tempMap->count*4);
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
int32_t
|
||||
PropertyAliases::swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
UErrorCode *pErrorCode) {
|
||||
const PropertyAliases *inAliases;
|
||||
PropertyAliases *outAliases;
|
||||
PropertyAliases aliases;
|
||||
|
||||
const ValueMap *inValueMaps;
|
||||
ValueMap *outValueMaps;
|
||||
ValueMap valueMap;
|
||||
|
||||
uint8_t *temp;
|
||||
|
||||
int32_t i;
|
||||
|
||||
inAliases=(const PropertyAliases *)inBytes;
|
||||
outAliases=(PropertyAliases *)outBytes;
|
||||
|
||||
/* read the input PropertyAliases - all 16-bit values */
|
||||
for(i=0; i<(int32_t)sizeof(PropertyAliases)/2; ++i) {
|
||||
((uint16_t *)&aliases)[i]=ds->readUInt16(((const uint16_t *)inBytes)[i]);
|
||||
}
|
||||
|
||||
if(length>=0) {
|
||||
if(length<aliases.total_size) {
|
||||
udata_printError(ds, "upname_swap(): too few bytes (%d after header) for all of pnames.icu\n",
|
||||
length);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* copy the data for inaccessible bytes */
|
||||
if(inBytes!=outBytes) {
|
||||
uprv_memcpy(outBytes, inBytes, aliases.total_size);
|
||||
}
|
||||
|
||||
/* swap the PropertyAliases class fields */
|
||||
ds->swapArray16(ds, inAliases, sizeof(PropertyAliases), outAliases, pErrorCode);
|
||||
|
||||
/* swap the name groups */
|
||||
ds->swapArray16(ds, inBytes+aliases.nameGroupPool_offset,
|
||||
aliases.stringPool_offset-aliases.nameGroupPool_offset,
|
||||
outBytes+aliases.nameGroupPool_offset, pErrorCode);
|
||||
|
||||
/* swap the strings */
|
||||
udata_swapInvStringBlock(ds, inBytes+aliases.stringPool_offset,
|
||||
aliases.total_size-aliases.stringPool_offset,
|
||||
outBytes+aliases.stringPool_offset, pErrorCode);
|
||||
|
||||
/*
|
||||
* alloc uint8_t temp[total_size] and reset it
|
||||
* swap each top-level struct, put at least the count fields into temp
|
||||
* use subclass-specific swap() functions
|
||||
* enumerate value maps, for each
|
||||
* if temp does not have count!=0 yet
|
||||
* read count, put it into temp
|
||||
* swap the array(s)
|
||||
* resort strings in name->enum maps
|
||||
* swap value maps
|
||||
*/
|
||||
temp=(uint8_t *)uprv_malloc(aliases.total_size);
|
||||
if(temp==NULL) {
|
||||
udata_printError(ds, "upname_swap(): unable to allocate temp memory (%d bytes)\n",
|
||||
aliases.total_size);
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
}
|
||||
uprv_memset(temp, 0, aliases.total_size);
|
||||
|
||||
/* swap properties->name groups map */
|
||||
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
|
||||
temp, aliases.enumToName_offset, pErrorCode);
|
||||
|
||||
/* swap name->properties map */
|
||||
NameToEnum::swap(ds, inBytes, length, outBytes,
|
||||
temp, aliases.nameToEnum_offset, pErrorCode);
|
||||
|
||||
/* swap properties->value maps map */
|
||||
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
|
||||
temp, aliases.enumToValue_offset, pErrorCode);
|
||||
|
||||
/* enumerate all ValueMaps and swap them */
|
||||
inValueMaps=(const ValueMap *)(inBytes+aliases.valueMap_offset);
|
||||
outValueMaps=(ValueMap *)(outBytes+aliases.valueMap_offset);
|
||||
|
||||
for(i=0; i<aliases.valueMap_count; ++i) {
|
||||
valueMap.enumToName_offset=udata_readInt16(ds, inValueMaps[i].enumToName_offset);
|
||||
valueMap.ncEnumToName_offset=udata_readInt16(ds, inValueMaps[i].ncEnumToName_offset);
|
||||
valueMap.nameToEnum_offset=udata_readInt16(ds, inValueMaps[i].nameToEnum_offset);
|
||||
|
||||
if(valueMap.enumToName_offset!=0) {
|
||||
EnumToOffset::swap(ds, inBytes, length, outBytes,
|
||||
temp, valueMap.enumToName_offset,
|
||||
pErrorCode);
|
||||
} else if(valueMap.ncEnumToName_offset!=0) {
|
||||
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
|
||||
temp, valueMap.ncEnumToName_offset,
|
||||
pErrorCode);
|
||||
}
|
||||
if(valueMap.nameToEnum_offset!=0) {
|
||||
NameToEnum::swap(ds, inBytes, length, outBytes,
|
||||
temp, valueMap.nameToEnum_offset,
|
||||
pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
/* swap the ValueMaps array itself */
|
||||
ds->swapArray16(ds, inValueMaps, aliases.valueMap_count*sizeof(ValueMap),
|
||||
outValueMaps, pErrorCode);
|
||||
|
||||
/* name groups and strings were swapped above */
|
||||
|
||||
/* release temp */
|
||||
uprv_free(temp);
|
||||
}
|
||||
|
||||
return aliases.total_size;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
upname_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UDataInfo *pInfo;
|
||||
int32_t headerSize;
|
||||
|
||||
const uint8_t *inBytes;
|
||||
uint8_t *outBytes;
|
||||
|
||||
/* udata_swapDataHeader checks the arguments */
|
||||
headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check data format and format version */
|
||||
pInfo=(const UDataInfo *)((const char *)inData+4);
|
||||
if(!(
|
||||
pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */
|
||||
pInfo->dataFormat[1]==0x6e &&
|
||||
pInfo->dataFormat[2]==0x61 &&
|
||||
pInfo->dataFormat[3]==0x6d &&
|
||||
pInfo->formatVersion[0]==1
|
||||
)) {
|
||||
udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
pInfo->dataFormat[2], pInfo->dataFormat[3],
|
||||
pInfo->formatVersion[0]);
|
||||
*pErrorCode=U_UNSUPPORTED_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
inBytes=(const uint8_t *)inData+headerSize;
|
||||
outBytes=(uint8_t *)outData+headerSize;
|
||||
|
||||
if(length>=0) {
|
||||
length-=headerSize;
|
||||
if(length<(int32_t)sizeof(PropertyAliases)) {
|
||||
udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
|
||||
length);
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return headerSize+PropertyAliases::swap(ds, inBytes, length, outBytes, pErrorCode);
|
||||
}
|
||||
|
||||
//eof
|
515
source/common/propname.h
Normal file
515
source/common/propname.h
Normal file
|
@ -0,0 +1,515 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
* Created: October 30 2002
|
||||
* Since: ICU 2.4
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef PROPNAME_H
|
||||
#define PROPNAME_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "udataswp.h"
|
||||
#include "uprops.h"
|
||||
|
||||
/*
|
||||
* This header defines the in-memory layout of the property names data
|
||||
* structure representing the UCD data files PropertyAliases.txt and
|
||||
* PropertyValueAliases.txt. It is used by:
|
||||
* propname.cpp - reads data
|
||||
* genpname - creates data
|
||||
*/
|
||||
|
||||
/* low-level char * property name comparison -------------------------------- */
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* \var uprv_comparePropertyNames
|
||||
* Unicode property names and property value names are compared "loosely".
|
||||
*
|
||||
* UCD.html 4.0.1 says:
|
||||
* For all property names, property value names, and for property values for
|
||||
* Enumerated, Binary, or Catalog properties, use the following
|
||||
* loose matching rule:
|
||||
*
|
||||
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
|
||||
*
|
||||
* This function does just that, for (char *) name strings.
|
||||
* It is almost identical to ucnv_compareNames() but also ignores
|
||||
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
|
||||
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
|
||||
#else
|
||||
# error U_CHARSET_FAMILY is not valid
|
||||
#endif
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
/* UDataMemory structure and signatures ------------------------------------- */
|
||||
|
||||
#define PNAME_DATA_NAME "pnames"
|
||||
#define PNAME_DATA_TYPE "icu"
|
||||
|
||||
/* Fields in UDataInfo: */
|
||||
|
||||
/* PNAME_SIG[] is encoded as numeric literals for compatibility with the HP compiler */
|
||||
#define PNAME_SIG_0 ((uint8_t)0x70) /* p */
|
||||
#define PNAME_SIG_1 ((uint8_t)0x6E) /* n */
|
||||
#define PNAME_SIG_2 ((uint8_t)0x61) /* a */
|
||||
#define PNAME_SIG_3 ((uint8_t)0x6D) /* m */
|
||||
|
||||
#define PNAME_FORMAT_VERSION ((int8_t)1) /* formatVersion[0] */
|
||||
|
||||
/**
|
||||
* Swap pnames.icu. See udataswp.h.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
upname_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
|
||||
#ifdef XP_CPLUSPLUS
|
||||
|
||||
class Builder;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* An offset from the start of the pnames data to a contained entity.
|
||||
* This must be a signed value, since negative offsets are used as an
|
||||
* end-of-list marker. Offsets to actual objects are non-zero. A
|
||||
* zero offset indicates an absent entry; this corresponds to aliases
|
||||
* marked "n/a" in the original Unicode data files.
|
||||
*/
|
||||
typedef int16_t Offset; /* must be signed */
|
||||
|
||||
#define MAX_OFFSET 0x7FFF
|
||||
|
||||
/**
|
||||
* A generic value for a property or property value. Typically an
|
||||
* enum from uchar.h, but sometimes a non-enum value. It must be
|
||||
* large enough to accomodate the largest enum value, which as of this
|
||||
* writing is the largest general category mask. Need not be signed
|
||||
* but may be. Typically it doesn't matter, since the caller will
|
||||
* cast it to the proper type before use. Takes the special value
|
||||
* UCHAR_INVALID_CODE for invalid input.
|
||||
*/
|
||||
typedef int32_t EnumValue;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* ValueMap */
|
||||
|
||||
/**
|
||||
* For any top-level property that has named values (binary and
|
||||
* enumerated properties), there is a ValueMap object. This object
|
||||
* maps from enum values to two other maps. One goes from value enums
|
||||
* to value names. The other goes from value names to value enums.
|
||||
*
|
||||
* The value enum values may be contiguous or disjoint. If they are
|
||||
* contiguous then the enumToName_offset is nonzero, and the
|
||||
* ncEnumToName_offset is zero. Vice versa if the value enums are
|
||||
* disjoint.
|
||||
*
|
||||
* There are n of these objects, where n is the number of binary
|
||||
* properties + the number of enumerated properties.
|
||||
*/
|
||||
struct ValueMap {
|
||||
|
||||
/* -- begin pnames data -- */
|
||||
/* Enum=>name EnumToOffset / NonContiguousEnumToOffset objects. */
|
||||
/* Exactly one of these will be nonzero. */
|
||||
Offset enumToName_offset;
|
||||
Offset ncEnumToName_offset;
|
||||
|
||||
Offset nameToEnum_offset; /* Name=>enum data */
|
||||
/* -- end pnames data -- */
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* PropertyAliases class */
|
||||
|
||||
/**
|
||||
* A class encapsulating access to the memory-mapped data representing
|
||||
* property aliases and property value aliases (pnames). The class
|
||||
* MUST have no v-table and declares certain methods inline -- small
|
||||
* methods and methods that are called from only one point.
|
||||
*
|
||||
* The data members in this class correspond to the in-memory layout
|
||||
* of the header of the pnames data.
|
||||
*/
|
||||
class PropertyAliases {
|
||||
|
||||
/* -- begin pnames data -- */
|
||||
/* Enum=>name EnumToOffset object for binary and enumerated */
|
||||
/* properties */
|
||||
Offset enumToName_offset;
|
||||
|
||||
/* Name=>enum data for binary & enumerated properties */
|
||||
Offset nameToEnum_offset;
|
||||
|
||||
/* Enum=>offset EnumToOffset object mapping enumerated properties */
|
||||
/* to ValueMap objects */
|
||||
Offset enumToValue_offset;
|
||||
|
||||
/* The following are needed by external readers of this data. */
|
||||
/* We don't use them ourselves. */
|
||||
int16_t total_size; /* size in bytes excluding the udata header */
|
||||
Offset valueMap_offset; /* offset to start of array */
|
||||
int16_t valueMap_count; /* number of entries */
|
||||
Offset nameGroupPool_offset; /* offset to start of array */
|
||||
int16_t nameGroupPool_count; /* number of entries (not groups) */
|
||||
Offset stringPool_offset; /* offset to start of pool */
|
||||
int16_t stringPool_count; /* number of strings (not size in bytes) */
|
||||
|
||||
/* -- end pnames data -- */
|
||||
|
||||
friend class ::Builder;
|
||||
|
||||
const ValueMap* getValueMap(EnumValue prop) const;
|
||||
|
||||
const char* chooseNameInGroup(Offset offset,
|
||||
UPropertyNameChoice choice) const;
|
||||
|
||||
public:
|
||||
|
||||
inline const int8_t* getPointer(Offset o) const {
|
||||
return ((const int8_t*) this) + o;
|
||||
}
|
||||
|
||||
inline const int8_t* getPointerNull(Offset o) const {
|
||||
return o ? getPointer(o) : NULL;
|
||||
}
|
||||
|
||||
inline const char* getPropertyName(EnumValue prop,
|
||||
UPropertyNameChoice choice) const;
|
||||
|
||||
inline EnumValue getPropertyEnum(const char* alias) const;
|
||||
|
||||
inline const char* getPropertyValueName(EnumValue prop, EnumValue value,
|
||||
UPropertyNameChoice choice) const;
|
||||
|
||||
inline EnumValue getPropertyValueEnum(EnumValue prop,
|
||||
const char* alias) const;
|
||||
|
||||
static int32_t
|
||||
swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
UErrorCode *pErrorCode);
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* EnumToOffset */
|
||||
|
||||
/**
|
||||
* A generic map from enum values to Offsets. The enum values must be
|
||||
* contiguous, from enumStart to enumLimit. The Offset values may
|
||||
* point to anything.
|
||||
*/
|
||||
class EnumToOffset {
|
||||
|
||||
/* -- begin pnames data -- */
|
||||
EnumValue enumStart;
|
||||
EnumValue enumLimit;
|
||||
Offset _offsetArray; /* [array of enumLimit-enumStart] */
|
||||
/* -- end pnames data -- */
|
||||
|
||||
friend class ::Builder;
|
||||
|
||||
Offset* getOffsetArray() {
|
||||
return &_offsetArray;
|
||||
}
|
||||
|
||||
const Offset* getOffsetArray() const {
|
||||
return &_offsetArray;
|
||||
}
|
||||
|
||||
static int32_t getSize(int32_t n) {
|
||||
return sizeof(EnumToOffset) + sizeof(Offset) * (n - 1);
|
||||
}
|
||||
|
||||
int32_t getSize() {
|
||||
return getSize(enumLimit - enumStart);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
Offset getOffset(EnumValue enumProbe) const {
|
||||
if (enumProbe < enumStart ||
|
||||
enumProbe >= enumLimit) {
|
||||
return 0; /* not found */
|
||||
}
|
||||
const Offset* p = getOffsetArray();
|
||||
return p[enumProbe - enumStart];
|
||||
}
|
||||
|
||||
static int32_t
|
||||
swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode);
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* NonContiguousEnumToOffset */
|
||||
|
||||
/**
|
||||
* A generic map from enum values to Offsets. The enum values may be
|
||||
* disjoint. If they are contiguous, an EnumToOffset should be used
|
||||
* instead. The Offset values may point to anything.
|
||||
*/
|
||||
class NonContiguousEnumToOffset {
|
||||
|
||||
/* -- begin pnames data -- */
|
||||
int32_t count;
|
||||
EnumValue _enumArray; /* [array of count] */
|
||||
/* Offset _offsetArray; // [array of count] after enumValue[count-1] */
|
||||
/* -- end pnames data -- */
|
||||
|
||||
friend class ::Builder;
|
||||
|
||||
EnumValue* getEnumArray() {
|
||||
return &_enumArray;
|
||||
}
|
||||
|
||||
const EnumValue* getEnumArray() const {
|
||||
return &_enumArray;
|
||||
}
|
||||
|
||||
Offset* getOffsetArray() {
|
||||
return (Offset*) (getEnumArray() + count);
|
||||
}
|
||||
|
||||
const Offset* getOffsetArray() const {
|
||||
return (Offset*) (getEnumArray() + count);
|
||||
}
|
||||
|
||||
static int32_t getSize(int32_t n) {
|
||||
return sizeof(int32_t) + (sizeof(EnumValue) + sizeof(Offset)) * n;
|
||||
}
|
||||
|
||||
int32_t getSize() {
|
||||
return getSize(count);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
Offset getOffset(EnumValue enumProbe) const {
|
||||
const EnumValue* e = getEnumArray();
|
||||
const Offset* p = getOffsetArray();
|
||||
/* linear search; binary later if warranted */
|
||||
/* (binary is not faster for short lists) */
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
if (e[i] < enumProbe) continue;
|
||||
if (e[i] > enumProbe) break;
|
||||
return p[i];
|
||||
}
|
||||
return 0; /* not found */
|
||||
}
|
||||
|
||||
static int32_t
|
||||
swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode);
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* NameToEnum */
|
||||
|
||||
/**
|
||||
* A map from names to enum values.
|
||||
*/
|
||||
class NameToEnum {
|
||||
|
||||
/* -- begin pnames data -- */
|
||||
int32_t count; /* number of entries */
|
||||
EnumValue _enumArray; /* [array of count] EnumValues */
|
||||
/* Offset _nameArray; // [array of count] offsets to names */
|
||||
/* -- end pnames data -- */
|
||||
|
||||
friend class ::Builder;
|
||||
|
||||
EnumValue* getEnumArray() {
|
||||
return &_enumArray;
|
||||
}
|
||||
|
||||
const EnumValue* getEnumArray() const {
|
||||
return &_enumArray;
|
||||
}
|
||||
|
||||
Offset* getNameArray() {
|
||||
return (Offset*) (getEnumArray() + count);
|
||||
}
|
||||
|
||||
const Offset* getNameArray() const {
|
||||
return (Offset*) (getEnumArray() + count);
|
||||
}
|
||||
|
||||
static int32_t getSize(int32_t n) {
|
||||
return sizeof(int32_t) + (sizeof(Offset) + sizeof(EnumValue)) * n;
|
||||
}
|
||||
|
||||
int32_t getSize() {
|
||||
return getSize(count);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
EnumValue getEnum(const char* alias, const PropertyAliases& data) const {
|
||||
|
||||
const Offset* n = getNameArray();
|
||||
const EnumValue* e = getEnumArray();
|
||||
|
||||
/* linear search; binary later if warranted */
|
||||
/* (binary is not faster for short lists) */
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
const char* name = (const char*) data.getPointer(n[i]);
|
||||
int32_t c = uprv_comparePropertyNames(alias, name);
|
||||
if (c > 0) continue;
|
||||
if (c < 0) break;
|
||||
return e[i];
|
||||
}
|
||||
|
||||
return UCHAR_INVALID_CODE;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
swap(const UDataSwapper *ds,
|
||||
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
|
||||
uint8_t *temp, int32_t pos,
|
||||
UErrorCode *pErrorCode);
|
||||
};
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
*
|
||||
* In-memory layout. THIS IS NOT A STANDALONE DOCUMENT. It goes
|
||||
* together with above C++ declarations and gives an overview.
|
||||
*
|
||||
* See above for definitions of Offset and EnumValue. Also, refer to
|
||||
* above class declarations for the "bottom line" on data layout.
|
||||
*
|
||||
* Sizes:
|
||||
* '*_offset' is an Offset (see above)
|
||||
* 'count' members are typically int32_t (see above declarations)
|
||||
* 'enumArray' is an array of EnumValue (see above)
|
||||
* 'offsetArray' is an array of Offset (see above)
|
||||
* 'nameArray' is an array of Offset (see above)
|
||||
* 'enum*' is an EnumValue (see above)
|
||||
* '*Array [x n]' means that *Array has n elements
|
||||
*
|
||||
* References:
|
||||
* Instead of pointers, this flat data structure contains offsets.
|
||||
* All offsets are relative to the start of 'header'. A notation
|
||||
* is used to indicate what structure each offset points to:
|
||||
* 'foo (>x)' the offset(s) in foo point to structure x
|
||||
*
|
||||
* Structures:
|
||||
* Each structure is assigned a number, except for the header,
|
||||
* which is called 'header'. The numbers are not contiguous
|
||||
* for historical reasons. Some structures have sub-parts
|
||||
* that are denoted with a letter, e.g., "5a".
|
||||
*
|
||||
* BEGIN LAYOUT
|
||||
* ============
|
||||
* header:
|
||||
* enumToName_offset (>0)
|
||||
* nameToEnum_offset (>2)
|
||||
* enumToValue_offset (>3)
|
||||
* (alignment padding build in to header)
|
||||
*
|
||||
* The header also contains the following, used by "external readers"
|
||||
* like ICU4J and icuswap.
|
||||
*
|
||||
* // The following are needed by external readers of this data.
|
||||
* // We don't use them ourselves.
|
||||
* int16_t total_size; // size in bytes excluding the udata header
|
||||
* Offset valueMap_offset; // offset to start of array
|
||||
* int16_t valueMap_count; // number of entries
|
||||
* Offset nameGroupPool_offset; // offset to start of array
|
||||
* int16_t nameGroupPool_count; // number of entries (not groups)
|
||||
* Offset stringPool_offset; // offset to start of pool
|
||||
* int16_t stringPool_count; // number of strings (not size in bytes)
|
||||
*
|
||||
* 0: # NonContiguousEnumToOffset obj for props => name groups
|
||||
* count
|
||||
* enumArray [x count]
|
||||
* offsetArray [x count] (>98)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* (1: omitted -- no longer used)
|
||||
*
|
||||
* 2: # NameToEnum obj for binary & enumerated props
|
||||
* count
|
||||
* enumArray [x count]
|
||||
* nameArray [x count] (>99)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* 3: # NonContiguousEnumToOffset obj for enumerated props => ValueMaps
|
||||
* count
|
||||
* enumArray [x count]
|
||||
* offsetArray [x count] (>4)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* 4: # ValueMap array [x one for each enumerated prop i]
|
||||
* enumToName_offset (>5a +2*i) one of these two is NULL, one is not
|
||||
* ncEnumToName_offset (>5b +2*i)
|
||||
* nameToEnums_offset (>6 +2*i)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* for each enumerated prop (either 5a or 5b):
|
||||
*
|
||||
* 5a: # EnumToOffset for enumerated prop's values => name groups
|
||||
* enumStart
|
||||
* enumLimit
|
||||
* offsetArray [x enumLimit - enumStart] (>98)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* 5b: # NonContiguousEnumToOffset for enumerated prop's values => name groups
|
||||
* count
|
||||
* enumArray [x count]
|
||||
* offsetArray [x count] (>98)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* 6: # NameToEnum for enumerated prop's values
|
||||
* count
|
||||
* enumArray [x count]
|
||||
* nameArray [x count] (>99)
|
||||
*
|
||||
* => pad to next 4-byte boundary
|
||||
*
|
||||
* 98: # name group pool {NGP}
|
||||
* [array of Offset values] (>99)
|
||||
*
|
||||
* 99: # string pool {SP}
|
||||
* [pool of nul-terminated char* strings]
|
||||
*/
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* C++ */
|
||||
|
||||
#endif
|
557
source/common/propsvec.c
Normal file
557
source/common/propsvec.c
Normal file
|
@ -0,0 +1,557 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: propsvec.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002feb22
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Store bits (Unicode character properties) in bit set vectors.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include "utrie.h"
|
||||
#include "utrie2.h"
|
||||
#include "uarrsort.h"
|
||||
#include "propsvec.h"
|
||||
|
||||
struct UPropsVectors {
|
||||
uint32_t *v;
|
||||
int32_t columns; /* number of columns, plus two for start & limit values */
|
||||
int32_t maxRows;
|
||||
int32_t rows;
|
||||
int32_t prevRow; /* search optimization: remember last row seen */
|
||||
UBool isCompacted;
|
||||
};
|
||||
|
||||
#define UPVEC_INITIAL_ROWS (1<<12)
|
||||
#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16)
|
||||
#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
|
||||
|
||||
U_CAPI UPropsVectors * U_EXPORT2
|
||||
upvec_open(int32_t columns, UErrorCode *pErrorCode) {
|
||||
UPropsVectors *pv;
|
||||
uint32_t *v, *row;
|
||||
uint32_t cp;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(columns<1) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
columns+=2; /* count range start and limit columns */
|
||||
|
||||
pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors));
|
||||
v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4);
|
||||
if(pv==NULL || v==NULL) {
|
||||
uprv_free(pv);
|
||||
uprv_free(v);
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memset(pv, 0, sizeof(UPropsVectors));
|
||||
pv->v=v;
|
||||
pv->columns=columns;
|
||||
pv->maxRows=UPVEC_INITIAL_ROWS;
|
||||
pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
|
||||
|
||||
/* set the all-Unicode row and the special-value rows */
|
||||
row=pv->v;
|
||||
uprv_memset(row, 0, pv->rows*columns*4);
|
||||
row[0]=0;
|
||||
row[1]=0x110000;
|
||||
row+=columns;
|
||||
for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
|
||||
row[0]=cp;
|
||||
row[1]=cp+1;
|
||||
row+=columns;
|
||||
}
|
||||
return pv;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_close(UPropsVectors *pv) {
|
||||
if(pv!=NULL) {
|
||||
uprv_free(pv->v);
|
||||
uprv_free(pv);
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t *
|
||||
_findRow(UPropsVectors *pv, UChar32 rangeStart) {
|
||||
uint32_t *row;
|
||||
int32_t columns, i, start, limit, prevRow, rows;
|
||||
|
||||
columns=pv->columns;
|
||||
rows=limit=pv->rows;
|
||||
prevRow=pv->prevRow;
|
||||
|
||||
/* check the vicinity of the last-seen row (start searching with an unrolled loop) */
|
||||
row=pv->v+prevRow*columns;
|
||||
if(rangeStart>=(UChar32)row[0]) {
|
||||
if(rangeStart<(UChar32)row[1]) {
|
||||
/* same row as last seen */
|
||||
return row;
|
||||
} else if(rangeStart<(UChar32)(row+=columns)[1]) {
|
||||
/* next row after the last one */
|
||||
pv->prevRow=prevRow+1;
|
||||
return row;
|
||||
} else if(rangeStart<(UChar32)(row+=columns)[1]) {
|
||||
/* second row after the last one */
|
||||
pv->prevRow=prevRow+2;
|
||||
return row;
|
||||
} else if((rangeStart-(UChar32)row[1])<10) {
|
||||
/* we are close, continue looping */
|
||||
prevRow+=2;
|
||||
do {
|
||||
++prevRow;
|
||||
row+=columns;
|
||||
} while(rangeStart>=(UChar32)row[1]);
|
||||
pv->prevRow=prevRow;
|
||||
return row;
|
||||
}
|
||||
} else if(rangeStart<(UChar32)pv->v[1]) {
|
||||
/* the very first row */
|
||||
pv->prevRow=0;
|
||||
return pv->v;
|
||||
}
|
||||
|
||||
/* do a binary search for the start of the range */
|
||||
start=0;
|
||||
while(start<limit-1) {
|
||||
i=(start+limit)/2;
|
||||
row=pv->v+i*columns;
|
||||
if(rangeStart<(UChar32)row[0]) {
|
||||
limit=i;
|
||||
} else if(rangeStart<(UChar32)row[1]) {
|
||||
pv->prevRow=i;
|
||||
return row;
|
||||
} else {
|
||||
start=i;
|
||||
}
|
||||
}
|
||||
|
||||
/* must be found because all ranges together always cover all of Unicode */
|
||||
pv->prevRow=start;
|
||||
return pv->v+start*columns;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_setValue(UPropsVectors *pv,
|
||||
UChar32 start, UChar32 end,
|
||||
int32_t column,
|
||||
uint32_t value, uint32_t mask,
|
||||
UErrorCode *pErrorCode) {
|
||||
uint32_t *firstRow, *lastRow;
|
||||
int32_t columns;
|
||||
UChar32 limit;
|
||||
UBool splitFirstRow, splitLastRow;
|
||||
|
||||
/* argument checking */
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
if( pv==NULL ||
|
||||
start<0 || start>end || end>UPVEC_MAX_CP ||
|
||||
column<0 || column>=(pv->columns-2)
|
||||
) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
if(pv->isCompacted) {
|
||||
*pErrorCode=U_NO_WRITE_PERMISSION;
|
||||
return;
|
||||
}
|
||||
limit=end+1;
|
||||
|
||||
/* initialize */
|
||||
columns=pv->columns;
|
||||
column+=2; /* skip range start and limit columns */
|
||||
value&=mask;
|
||||
|
||||
/* find the rows whose ranges overlap with the input range */
|
||||
|
||||
/* find the first and last rows, always successful */
|
||||
firstRow=_findRow(pv, start);
|
||||
lastRow=_findRow(pv, end);
|
||||
|
||||
/*
|
||||
* Rows need to be split if they partially overlap with the
|
||||
* input range (only possible for the first and last rows)
|
||||
* and if their value differs from the input value.
|
||||
*/
|
||||
splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask));
|
||||
splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask));
|
||||
|
||||
/* split first/last rows if necessary */
|
||||
if(splitFirstRow || splitLastRow) {
|
||||
int32_t count, rows;
|
||||
|
||||
rows=pv->rows;
|
||||
if((rows+splitFirstRow+splitLastRow)>pv->maxRows) {
|
||||
uint32_t *newVectors;
|
||||
int32_t newMaxRows;
|
||||
|
||||
if(pv->maxRows<UPVEC_MEDIUM_ROWS) {
|
||||
newMaxRows=UPVEC_MEDIUM_ROWS;
|
||||
} else if(pv->maxRows<UPVEC_MAX_ROWS) {
|
||||
newMaxRows=UPVEC_MAX_ROWS;
|
||||
} else {
|
||||
/* Implementation bug, or UPVEC_MAX_ROWS too low. */
|
||||
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4);
|
||||
if(newVectors==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
uprv_memcpy(newVectors, pv->v, rows*columns*4);
|
||||
firstRow=newVectors+(firstRow-pv->v);
|
||||
lastRow=newVectors+(lastRow-pv->v);
|
||||
uprv_free(pv->v);
|
||||
pv->v=newVectors;
|
||||
pv->maxRows=newMaxRows;
|
||||
}
|
||||
|
||||
/* count the number of row cells to move after the last row, and move them */
|
||||
count = (int32_t)((pv->v+rows*columns)-(lastRow+columns));
|
||||
if(count>0) {
|
||||
uprv_memmove(
|
||||
lastRow+(1+splitFirstRow+splitLastRow)*columns,
|
||||
lastRow+columns,
|
||||
count*4);
|
||||
}
|
||||
pv->rows=rows+splitFirstRow+splitLastRow;
|
||||
|
||||
/* split the first row, and move the firstRow pointer to the second part */
|
||||
if(splitFirstRow) {
|
||||
/* copy all affected rows up one and move the lastRow pointer */
|
||||
count = (int32_t)((lastRow-firstRow)+columns);
|
||||
uprv_memmove(firstRow+columns, firstRow, count*4);
|
||||
lastRow+=columns;
|
||||
|
||||
/* split the range and move the firstRow pointer */
|
||||
firstRow[1]=firstRow[columns]=(uint32_t)start;
|
||||
firstRow+=columns;
|
||||
}
|
||||
|
||||
/* split the last row */
|
||||
if(splitLastRow) {
|
||||
/* copy the last row data */
|
||||
uprv_memcpy(lastRow+columns, lastRow, columns*4);
|
||||
|
||||
/* split the range and move the firstRow pointer */
|
||||
lastRow[1]=lastRow[columns]=(uint32_t)limit;
|
||||
}
|
||||
}
|
||||
|
||||
/* set the "row last seen" to the last row for the range */
|
||||
pv->prevRow=(int32_t)((lastRow-(pv->v))/columns);
|
||||
|
||||
/* set the input value in all remaining rows */
|
||||
firstRow+=column;
|
||||
lastRow+=column;
|
||||
mask=~mask;
|
||||
for(;;) {
|
||||
*firstRow=(*firstRow&mask)|value;
|
||||
if(firstRow==lastRow) {
|
||||
break;
|
||||
}
|
||||
firstRow+=columns;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) {
|
||||
uint32_t *row;
|
||||
UPropsVectors *ncpv;
|
||||
|
||||
if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) {
|
||||
return 0;
|
||||
}
|
||||
ncpv=(UPropsVectors *)pv;
|
||||
row=_findRow(ncpv, c);
|
||||
return row[2+column];
|
||||
}
|
||||
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
|
||||
UChar32 *pRangeStart, UChar32 *pRangeEnd) {
|
||||
uint32_t *row;
|
||||
int32_t columns;
|
||||
|
||||
if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
columns=pv->columns;
|
||||
row=pv->v+rowIndex*columns;
|
||||
if(pRangeStart!=NULL) {
|
||||
*pRangeStart=(UChar32)row[0];
|
||||
}
|
||||
if(pRangeEnd!=NULL) {
|
||||
*pRangeEnd=(UChar32)row[1]-1;
|
||||
}
|
||||
return row+2;
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
upvec_compareRows(const void *context, const void *l, const void *r) {
|
||||
const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
|
||||
const UPropsVectors *pv=(const UPropsVectors *)context;
|
||||
int32_t i, count, columns;
|
||||
|
||||
count=columns=pv->columns; /* includes start/limit columns */
|
||||
|
||||
/* start comparing after start/limit but wrap around to them */
|
||||
i=2;
|
||||
do {
|
||||
if(left[i]!=right[i]) {
|
||||
return left[i]<right[i] ? -1 : 1;
|
||||
}
|
||||
if(++i==columns) {
|
||||
i=0;
|
||||
}
|
||||
} while(--count>0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
|
||||
uint32_t *row;
|
||||
int32_t i, columns, valueColumns, rows, count;
|
||||
UChar32 start, limit;
|
||||
|
||||
/* argument checking */
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
if(handler==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
if(pv->isCompacted) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Set the flag now: Sorting and compacting destroys the builder data structure. */
|
||||
pv->isCompacted=TRUE;
|
||||
|
||||
rows=pv->rows;
|
||||
columns=pv->columns;
|
||||
valueColumns=columns-2; /* not counting start & limit */
|
||||
|
||||
/* sort the properties vectors to find unique vector values */
|
||||
uprv_sortArray(pv->v, rows, columns*4,
|
||||
upvec_compareRows, pv, FALSE, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find and set the special values.
|
||||
* This has to do almost the same work as the compaction below,
|
||||
* to find the indexes where the special-value rows will move.
|
||||
*/
|
||||
row=pv->v;
|
||||
count=-valueColumns;
|
||||
for(i=0; i<rows; ++i) {
|
||||
start=(UChar32)row[0];
|
||||
|
||||
/* count a new values vector if it is different from the current one */
|
||||
if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) {
|
||||
count+=valueColumns;
|
||||
}
|
||||
|
||||
if(start>=UPVEC_FIRST_SPECIAL_CP) {
|
||||
handler(context, start, start, count, row+2, valueColumns, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
row+=columns;
|
||||
}
|
||||
|
||||
/* count is at the beginning of the last vector, add valueColumns to include that last vector */
|
||||
count+=valueColumns;
|
||||
|
||||
/* Call the handler once more to signal the start of delivering real values. */
|
||||
handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP,
|
||||
count, row-valueColumns, valueColumns, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move vector contents up to a contiguous array with only unique
|
||||
* vector values, and call the handler function for each vector.
|
||||
*
|
||||
* This destroys the Properties Vector structure and replaces it
|
||||
* with an array of just vector values.
|
||||
*/
|
||||
row=pv->v;
|
||||
count=-valueColumns;
|
||||
for(i=0; i<rows; ++i) {
|
||||
/* fetch these first before memmove() may overwrite them */
|
||||
start=(UChar32)row[0];
|
||||
limit=(UChar32)row[1];
|
||||
|
||||
/* add a new values vector if it is different from the current one */
|
||||
if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) {
|
||||
count+=valueColumns;
|
||||
uprv_memmove(pv->v+count, row+2, valueColumns*4);
|
||||
}
|
||||
|
||||
if(start<UPVEC_FIRST_SPECIAL_CP) {
|
||||
handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
row+=columns;
|
||||
}
|
||||
|
||||
/* count is at the beginning of the last vector, add one to include that last vector */
|
||||
pv->rows=count/valueColumns+1;
|
||||
}
|
||||
|
||||
U_CAPI const uint32_t * U_EXPORT2
|
||||
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
|
||||
if(!pv->isCompacted) {
|
||||
return NULL;
|
||||
}
|
||||
if(pRows!=NULL) {
|
||||
*pRows=pv->rows;
|
||||
}
|
||||
if(pColumns!=NULL) {
|
||||
*pColumns=pv->columns-2;
|
||||
}
|
||||
return pv->v;
|
||||
}
|
||||
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_cloneArray(const UPropsVectors *pv,
|
||||
int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) {
|
||||
uint32_t *clonedArray;
|
||||
int32_t byteLength;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
if(!pv->isCompacted) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
byteLength=pv->rows*(pv->columns-2)*4;
|
||||
clonedArray=(uint32_t *)uprv_malloc(byteLength);
|
||||
if(clonedArray==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memcpy(clonedArray, pv->v, byteLength);
|
||||
if(pRows!=NULL) {
|
||||
*pRows=pv->rows;
|
||||
}
|
||||
if(pColumns!=NULL) {
|
||||
*pColumns=pv->columns-2;
|
||||
}
|
||||
return clonedArray;
|
||||
}
|
||||
|
||||
U_CAPI UTrie2 * U_EXPORT2
|
||||
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
|
||||
UPVecToUTrie2Context toUTrie2={ NULL };
|
||||
upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode);
|
||||
utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
utrie2_close(toUTrie2.trie);
|
||||
toUTrie2.trie=NULL;
|
||||
}
|
||||
return toUTrie2.trie;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
|
||||
* some 16-bit field and builds and returns a UTrie2.
|
||||
*/
|
||||
|
||||
U_CAPI void U_CALLCONV
|
||||
upvec_compactToUTrieHandler(void *context,
|
||||
UChar32 start, UChar32 end,
|
||||
int32_t rowIndex, uint32_t *row, int32_t columns,
|
||||
UErrorCode *pErrorCode) {
|
||||
UPVecToUTrieContext *toUTrie=(UPVecToUTrieContext *)context;
|
||||
if(start<UPVEC_FIRST_SPECIAL_CP) {
|
||||
if(!utrie_setRange32(toUTrie->newTrie, start, end+1, (uint32_t)rowIndex, TRUE)) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
} else {
|
||||
switch(start) {
|
||||
case UPVEC_INITIAL_VALUE_CP:
|
||||
toUTrie->initialValue=rowIndex;
|
||||
break;
|
||||
case UPVEC_START_REAL_VALUES_CP:
|
||||
if(rowIndex>0xffff) {
|
||||
/* too many rows for a 16-bit trie */
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
} else {
|
||||
toUTrie->newTrie=utrie_open(NULL, NULL, toUTrie->capacity,
|
||||
toUTrie->initialValue, toUTrie->initialValue,
|
||||
toUTrie->latin1Linear);
|
||||
if(toUTrie->newTrie==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void U_CALLCONV
|
||||
upvec_compactToUTrie2Handler(void *context,
|
||||
UChar32 start, UChar32 end,
|
||||
int32_t rowIndex, uint32_t *row, int32_t columns,
|
||||
UErrorCode *pErrorCode) {
|
||||
UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context;
|
||||
if(start<UPVEC_FIRST_SPECIAL_CP) {
|
||||
utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode);
|
||||
} else {
|
||||
switch(start) {
|
||||
case UPVEC_INITIAL_VALUE_CP:
|
||||
toUTrie2->initialValue=rowIndex;
|
||||
break;
|
||||
case UPVEC_ERROR_VALUE_CP:
|
||||
toUTrie2->errorValue=rowIndex;
|
||||
break;
|
||||
case UPVEC_START_REAL_VALUES_CP:
|
||||
toUTrie2->maxValue=rowIndex;
|
||||
if(rowIndex>0xffff) {
|
||||
/* too many rows for a 16-bit trie */
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
} else {
|
||||
toUTrie2->trie=utrie2_open(toUTrie2->initialValue,
|
||||
toUTrie2->errorValue, pErrorCode);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
191
source/common/propsvec.h
Normal file
191
source/common/propsvec.h
Normal file
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: propsvec.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002feb22
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Store bits (Unicode character properties) in bit set vectors.
|
||||
*/
|
||||
|
||||
#ifndef __UPROPSVEC_H__
|
||||
#define __UPROPSVEC_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "utrie.h"
|
||||
#include "utrie2.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* Unicode Properties Vectors associated with code point ranges.
|
||||
*
|
||||
* Rows of uint32_t integers in a contiguous array store
|
||||
* the range limits and the properties vectors.
|
||||
*
|
||||
* Logically, each row has a certain number of uint32_t values,
|
||||
* which is set via the upvec_open() "columns" parameter.
|
||||
*
|
||||
* Internally, two additional columns are stored.
|
||||
* In each internal row,
|
||||
* row[0] contains the start code point and
|
||||
* row[1] contains the limit code point,
|
||||
* which is the start of the next range.
|
||||
*
|
||||
* Initially, there is only one "normal" row for
|
||||
* range [0..0x110000[ with values 0.
|
||||
* There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
|
||||
*
|
||||
* It would be possible to store only one range boundary per row,
|
||||
* but self-contained rows allow to later sort them by contents.
|
||||
*/
|
||||
struct UPropsVectors;
|
||||
typedef struct UPropsVectors UPropsVectors;
|
||||
|
||||
/*
|
||||
* Special pseudo code points for storing the initialValue and the errorValue,
|
||||
* which are used to initialize a UTrie2 or similar.
|
||||
*/
|
||||
#define UPVEC_FIRST_SPECIAL_CP 0x110000
|
||||
#define UPVEC_INITIAL_VALUE_CP 0x110000
|
||||
#define UPVEC_ERROR_VALUE_CP 0x110001
|
||||
#define UPVEC_MAX_CP 0x110001
|
||||
|
||||
/*
|
||||
* Special pseudo code point used in upvec_compact() signalling the end of
|
||||
* delivering special values and the beginning of delivering real ones.
|
||||
* Stable value, unlike UPVEC_MAX_CP which might grow over time.
|
||||
*/
|
||||
#define UPVEC_START_REAL_VALUES_CP 0x200000
|
||||
|
||||
/*
|
||||
* Open a UPropsVectors object.
|
||||
* @param columns Number of value integers (uint32_t) per row.
|
||||
*/
|
||||
U_CAPI UPropsVectors * U_EXPORT2
|
||||
upvec_open(int32_t columns, UErrorCode *pErrorCode);
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_close(UPropsVectors *pv);
|
||||
|
||||
/*
|
||||
* In rows for code points [start..end], select the column,
|
||||
* reset the mask bits and set the value bits (ANDed with the mask).
|
||||
*
|
||||
* Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_setValue(UPropsVectors *pv,
|
||||
UChar32 start, UChar32 end,
|
||||
int32_t column,
|
||||
uint32_t value, uint32_t mask,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/*
|
||||
* Logically const but must not be used on the same pv concurrently!
|
||||
* Always returns 0 if called after upvec_compact().
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
|
||||
|
||||
/*
|
||||
* pRangeStart and pRangeEnd can be NULL.
|
||||
* @return NULL if rowIndex out of range and for illegal arguments,
|
||||
* or if called after upvec_compact()
|
||||
*/
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
|
||||
UChar32 *pRangeStart, UChar32 *pRangeEnd);
|
||||
|
||||
/*
|
||||
* Compact the vectors:
|
||||
* - modify the memory
|
||||
* - keep only unique vectors
|
||||
* - store them contiguously from the beginning of the memory
|
||||
* - for each (non-unique) row, call the handler function
|
||||
*
|
||||
* The handler's rowIndex is the index of the row in the compacted
|
||||
* memory block.
|
||||
* (Therefore, it starts at 0 increases in increments of the columns value.)
|
||||
*
|
||||
* In a first phase, only special values are delivered (each exactly once),
|
||||
* with start==end both equalling a special pseudo code point.
|
||||
* Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
|
||||
* where rowIndex is the length of the compacted array,
|
||||
* and the row is arbitrary (but not NULL).
|
||||
* Then, in the second phase, the handler is called for each row of real values.
|
||||
*/
|
||||
typedef void U_CALLCONV
|
||||
UPVecCompactHandler(void *context,
|
||||
UChar32 start, UChar32 end,
|
||||
int32_t rowIndex, uint32_t *row, int32_t columns,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
|
||||
|
||||
/*
|
||||
* Get the vectors array after calling upvec_compact().
|
||||
* The caller must not modify nor release the returned array.
|
||||
* Returns NULL if called before upvec_compact().
|
||||
*/
|
||||
U_CAPI const uint32_t * U_EXPORT2
|
||||
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
|
||||
|
||||
/*
|
||||
* Get a clone of the vectors array after calling upvec_compact().
|
||||
* The caller owns the returned array and must uprv_free() it.
|
||||
* Returns NULL if called before upvec_compact().
|
||||
*/
|
||||
U_CAPI uint32_t * U_EXPORT2
|
||||
upvec_cloneArray(const UPropsVectors *pv,
|
||||
int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
|
||||
|
||||
/*
|
||||
* Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
|
||||
* vectors array, and freeze the trie.
|
||||
*/
|
||||
U_CAPI UTrie2 * U_EXPORT2
|
||||
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
|
||||
|
||||
struct UPVecToUTrieContext {
|
||||
UNewTrie *newTrie;
|
||||
int32_t capacity;
|
||||
int32_t initialValue;
|
||||
UBool latin1Linear;
|
||||
};
|
||||
typedef struct UPVecToUTrieContext UPVecToUTrieContext;
|
||||
|
||||
/* context=UPVecToUTrieContext, creates the trie and stores the rowIndex values */
|
||||
U_CAPI void U_CALLCONV
|
||||
upvec_compactToUTrieHandler(void *context,
|
||||
UChar32 start, UChar32 end,
|
||||
int32_t rowIndex, uint32_t *row, int32_t columns,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
struct UPVecToUTrie2Context {
|
||||
UTrie2 *trie;
|
||||
int32_t initialValue;
|
||||
int32_t errorValue;
|
||||
int32_t maxValue;
|
||||
};
|
||||
typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
|
||||
|
||||
/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
|
||||
U_CAPI void U_CALLCONV
|
||||
upvec_compactToUTrie2Handler(void *context,
|
||||
UChar32 start, UChar32 end,
|
||||
int32_t rowIndex, uint32_t *row, int32_t columns,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
580
source/common/punycode.c
Normal file
580
source/common/punycode.c
Normal file
|
@ -0,0 +1,580 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: punycode.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002jan31
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
|
||||
/* This ICU code derived from: */
|
||||
/*
|
||||
punycode.c 0.4.0 (2001-Nov-17-Sat)
|
||||
http://www.cs.berkeley.edu/~amc/idn/
|
||||
Adam M. Costello
|
||||
http://www.nicemice.net/amc/
|
||||
|
||||
Disclaimer and license
|
||||
|
||||
Regarding this entire document or any portion of it (including
|
||||
the pseudocode and C code), the author makes no guarantees and
|
||||
is not responsible for any damage resulting from its use. The
|
||||
author grants irrevocable permission to anyone to use, modify,
|
||||
and distribute it in any way that does not diminish the rights
|
||||
of anyone else to use, modify, and distribute it, provided that
|
||||
redistributed derivative works do not contain misleading author or
|
||||
version information. Derivative works need not be licensed under
|
||||
similar terms.
|
||||
*/
|
||||
/*
|
||||
* ICU modifications:
|
||||
* - ICU data types and coding conventions
|
||||
* - ICU string buffer handling with implicit source lengths
|
||||
* and destination preflighting
|
||||
* - UTF-16 handling
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "ustr_imp.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
#include "punycode.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
|
||||
/* Punycode ----------------------------------------------------------------- */
|
||||
|
||||
/* Punycode parameters for Bootstring */
|
||||
#define BASE 36
|
||||
#define TMIN 1
|
||||
#define TMAX 26
|
||||
#define SKEW 38
|
||||
#define DAMP 700
|
||||
#define INITIAL_BIAS 72
|
||||
#define INITIAL_N 0x80
|
||||
|
||||
/* "Basic" Unicode/ASCII code points */
|
||||
#define _HYPHEN 0X2d
|
||||
#define DELIMITER _HYPHEN
|
||||
|
||||
#define _ZERO_ 0X30
|
||||
#define _NINE 0x39
|
||||
|
||||
#define _SMALL_A 0X61
|
||||
#define _SMALL_Z 0X7a
|
||||
|
||||
#define _CAPITAL_A 0X41
|
||||
#define _CAPITAL_Z 0X5a
|
||||
|
||||
#define IS_BASIC(c) ((c)<0x80)
|
||||
#define IS_BASIC_UPPERCASE(c) (_CAPITAL_A<=(c) && (c)<=_CAPITAL_Z)
|
||||
|
||||
/**
|
||||
* digitToBasic() returns the basic code point whose value
|
||||
* (when used for representing integers) is d, which must be in the
|
||||
* range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
|
||||
* nonzero, in which case the uppercase form is used.
|
||||
*/
|
||||
static U_INLINE char
|
||||
digitToBasic(int32_t digit, UBool uppercase) {
|
||||
/* 0..25 map to ASCII a..z or A..Z */
|
||||
/* 26..35 map to ASCII 0..9 */
|
||||
if(digit<26) {
|
||||
if(uppercase) {
|
||||
return (char)(_CAPITAL_A+digit);
|
||||
} else {
|
||||
return (char)(_SMALL_A+digit);
|
||||
}
|
||||
} else {
|
||||
return (char)((_ZERO_-26)+digit);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* basicToDigit[] contains the numeric value of a basic code
|
||||
* point (for use in representing integers) in the range 0 to
|
||||
* BASE-1, or -1 if b is does not represent a value.
|
||||
*/
|
||||
static const int8_t
|
||||
basicToDigit[256]={
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||
};
|
||||
|
||||
static U_INLINE char
|
||||
asciiCaseMap(char b, UBool uppercase) {
|
||||
if(uppercase) {
|
||||
if(_SMALL_A<=b && b<=_SMALL_Z) {
|
||||
b-=(_SMALL_A-_CAPITAL_A);
|
||||
}
|
||||
} else {
|
||||
if(_CAPITAL_A<=b && b<=_CAPITAL_Z) {
|
||||
b+=(_SMALL_A-_CAPITAL_A);
|
||||
}
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
/* Punycode-specific Bootstring code ---------------------------------------- */
|
||||
|
||||
/*
|
||||
* The following code omits the {parts} of the pseudo-algorithm in the spec
|
||||
* that are not used with the Punycode parameter set.
|
||||
*/
|
||||
|
||||
/* Bias adaptation function. */
|
||||
static int32_t
|
||||
adaptBias(int32_t delta, int32_t length, UBool firstTime) {
|
||||
int32_t count;
|
||||
|
||||
if(firstTime) {
|
||||
delta/=DAMP;
|
||||
} else {
|
||||
delta/=2;
|
||||
}
|
||||
|
||||
delta+=delta/length;
|
||||
for(count=0; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
|
||||
delta/=(BASE-TMIN);
|
||||
}
|
||||
|
||||
return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
|
||||
}
|
||||
|
||||
#define MAX_CP_COUNT 200
|
||||
|
||||
U_CFUNC int32_t
|
||||
u_strToPunycode(const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UBool *caseFlags,
|
||||
UErrorCode *pErrorCode) {
|
||||
|
||||
int32_t cpBuffer[MAX_CP_COUNT];
|
||||
int32_t n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
|
||||
UChar c, c2;
|
||||
|
||||
/* argument checking */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the basic code points and
|
||||
* convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
|
||||
*/
|
||||
srcCPCount=destLength=0;
|
||||
if(srcLength==-1) {
|
||||
/* NUL-terminated input */
|
||||
for(j=0; /* no condition */; ++j) {
|
||||
if((c=src[j])==0) {
|
||||
break;
|
||||
}
|
||||
if(srcCPCount==MAX_CP_COUNT) {
|
||||
/* too many input code points */
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if(IS_BASIC(c)) {
|
||||
cpBuffer[srcCPCount++]=0;
|
||||
if(destLength<destCapacity) {
|
||||
dest[destLength]=
|
||||
caseFlags!=NULL ?
|
||||
asciiCaseMap((char)c, caseFlags[j]) :
|
||||
(char)c;
|
||||
}
|
||||
++destLength;
|
||||
} else {
|
||||
n=(caseFlags!=NULL && caseFlags[j])<<31L;
|
||||
if(UTF_IS_SINGLE(c)) {
|
||||
n|=c;
|
||||
} else if(UTF_IS_LEAD(c) && UTF_IS_TRAIL(c2=src[j+1])) {
|
||||
++j;
|
||||
n|=(int32_t)UTF16_GET_PAIR_VALUE(c, c2);
|
||||
} else {
|
||||
/* error: unmatched surrogate */
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
cpBuffer[srcCPCount++]=n;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* length-specified input */
|
||||
for(j=0; j<srcLength; ++j) {
|
||||
if(srcCPCount==MAX_CP_COUNT) {
|
||||
/* too many input code points */
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
c=src[j];
|
||||
if(IS_BASIC(c)) {
|
||||
if(destLength<destCapacity) {
|
||||
cpBuffer[srcCPCount++]=0;
|
||||
dest[destLength]=
|
||||
caseFlags!=NULL ?
|
||||
asciiCaseMap((char)c, caseFlags[j]) :
|
||||
(char)c;
|
||||
}
|
||||
++destLength;
|
||||
} else {
|
||||
n=(caseFlags!=NULL && caseFlags[j])<<31L;
|
||||
if(UTF_IS_SINGLE(c)) {
|
||||
n|=c;
|
||||
} else if(UTF_IS_LEAD(c) && (j+1)<srcLength && UTF_IS_TRAIL(c2=src[j+1])) {
|
||||
++j;
|
||||
n|=(int32_t)UTF16_GET_PAIR_VALUE(c, c2);
|
||||
} else {
|
||||
/* error: unmatched surrogate */
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
cpBuffer[srcCPCount++]=n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Finish the basic string - if it is not empty - with a delimiter. */
|
||||
basicLength=destLength;
|
||||
if(basicLength>0) {
|
||||
if(destLength<destCapacity) {
|
||||
dest[destLength]=DELIMITER;
|
||||
}
|
||||
++destLength;
|
||||
}
|
||||
|
||||
/*
|
||||
* handledCPCount is the number of code points that have been handled
|
||||
* basicLength is the number of basic code points
|
||||
* destLength is the number of chars that have been output
|
||||
*/
|
||||
|
||||
/* Initialize the state: */
|
||||
n=INITIAL_N;
|
||||
delta=0;
|
||||
bias=INITIAL_BIAS;
|
||||
|
||||
/* Main encoding loop: */
|
||||
for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
|
||||
/*
|
||||
* All non-basic code points < n have been handled already.
|
||||
* Find the next larger one:
|
||||
*/
|
||||
for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
|
||||
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
|
||||
if(n<=q && q<m) {
|
||||
m=q;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Increase delta enough to advance the decoder's
|
||||
* <n,i> state to <m,0>, but guard against overflow:
|
||||
*/
|
||||
if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
|
||||
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
|
||||
return 0;
|
||||
}
|
||||
delta+=(m-n)*(handledCPCount+1);
|
||||
n=m;
|
||||
|
||||
/* Encode a sequence of same code points n */
|
||||
for(j=0; j<srcCPCount; ++j) {
|
||||
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
|
||||
if(q<n) {
|
||||
++delta;
|
||||
} else if(q==n) {
|
||||
/* Represent delta as a generalized variable-length integer: */
|
||||
for(q=delta, k=BASE; /* no condition */; k+=BASE) {
|
||||
|
||||
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
|
||||
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
} else if(t>TMAX) {
|
||||
t=TMAX;
|
||||
}
|
||||
*/
|
||||
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
} else if(k>=(bias+TMAX)) {
|
||||
t=TMAX;
|
||||
}
|
||||
|
||||
if(q<t) {
|
||||
break;
|
||||
}
|
||||
|
||||
if(destLength<destCapacity) {
|
||||
dest[destLength++]=digitToBasic(t+(q-t)%(BASE-t), 0);
|
||||
}
|
||||
q=(q-t)/(BASE-t);
|
||||
}
|
||||
|
||||
if(destLength<destCapacity) {
|
||||
dest[destLength++]=digitToBasic(q, (UBool)(cpBuffer[j]<0));
|
||||
}
|
||||
bias=adaptBias(delta, handledCPCount+1, (UBool)(handledCPCount==basicLength));
|
||||
delta=0;
|
||||
++handledCPCount;
|
||||
}
|
||||
}
|
||||
|
||||
++delta;
|
||||
++n;
|
||||
}
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
|
||||
}
|
||||
|
||||
U_CFUNC int32_t
|
||||
u_strFromPunycode(const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UBool *caseFlags,
|
||||
UErrorCode *pErrorCode) {
|
||||
int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
|
||||
destCPCount, firstSupplementaryIndex, cpLength;
|
||||
UChar b;
|
||||
|
||||
/* argument checking */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(srcLength==-1) {
|
||||
srcLength=u_strlen(src);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the basic code points:
|
||||
* Let basicLength be the number of input code points
|
||||
* before the last delimiter, or 0 if there is none,
|
||||
* then copy the first basicLength code points to the output.
|
||||
*
|
||||
* The two following loops iterate backward.
|
||||
*/
|
||||
for(j=srcLength; j>0;) {
|
||||
if(src[--j]==DELIMITER) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
destLength=basicLength=destCPCount=j;
|
||||
|
||||
while(j>0) {
|
||||
b=src[--j];
|
||||
if(!IS_BASIC(b)) {
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(j<destCapacity) {
|
||||
dest[j]=(UChar)b;
|
||||
|
||||
if(caseFlags!=NULL) {
|
||||
caseFlags[j]=IS_BASIC_UPPERCASE(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize the state: */
|
||||
n=INITIAL_N;
|
||||
i=0;
|
||||
bias=INITIAL_BIAS;
|
||||
firstSupplementaryIndex=1000000000;
|
||||
|
||||
/*
|
||||
* Main decoding loop:
|
||||
* Start just after the last delimiter if any
|
||||
* basic code points were copied; start at the beginning otherwise.
|
||||
*/
|
||||
for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
|
||||
/*
|
||||
* in is the index of the next character to be consumed, and
|
||||
* destCPCount is the number of code points in the output array.
|
||||
*
|
||||
* Decode a generalized variable-length integer into delta,
|
||||
* which gets added to i. The overflow checking is easier
|
||||
* if we increase i as we go, then subtract off its starting
|
||||
* value at the end to obtain delta.
|
||||
*/
|
||||
for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
|
||||
if(in>=srcLength) {
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
|
||||
digit=basicToDigit[(uint8_t)src[in++]];
|
||||
if(digit<0) {
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
if(digit>(0x7fffffff-i)/w) {
|
||||
/* integer overflow */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
|
||||
i+=digit*w;
|
||||
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
} else if(t>TMAX) {
|
||||
t=TMAX;
|
||||
}
|
||||
*/
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
} else if(k>=(bias+TMAX)) {
|
||||
t=TMAX;
|
||||
}
|
||||
if(digit<t) {
|
||||
break;
|
||||
}
|
||||
|
||||
if(w>0x7fffffff/(BASE-t)) {
|
||||
/* integer overflow */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
w*=BASE-t;
|
||||
}
|
||||
|
||||
/*
|
||||
* Modification from sample code:
|
||||
* Increments destCPCount here,
|
||||
* where needed instead of in for() loop tail.
|
||||
*/
|
||||
++destCPCount;
|
||||
bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0));
|
||||
|
||||
/*
|
||||
* i was supposed to wrap around from (incremented) destCPCount to 0,
|
||||
* incrementing n each time, so we'll fix that now:
|
||||
*/
|
||||
if(i/destCPCount>(0x7fffffff-n)) {
|
||||
/* integer overflow */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
|
||||
n+=i/destCPCount;
|
||||
i%=destCPCount;
|
||||
/* not needed for Punycode: */
|
||||
/* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
|
||||
|
||||
if(n>0x10ffff || UTF_IS_SURROGATE(n)) {
|
||||
/* Unicode code point overflow */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Insert n at position i of the output: */
|
||||
cpLength=UTF_CHAR_LENGTH(n);
|
||||
if((destLength+cpLength)<destCapacity) {
|
||||
int32_t codeUnitIndex;
|
||||
|
||||
/*
|
||||
* Handle indexes when supplementary code points are present.
|
||||
*
|
||||
* In almost all cases, there will be only BMP code points before i
|
||||
* and even in the entire string.
|
||||
* This is handled with the same efficiency as with UTF-32.
|
||||
*
|
||||
* Only the rare cases with supplementary code points are handled
|
||||
* more slowly - but not too bad since this is an insertion anyway.
|
||||
*/
|
||||
if(i<=firstSupplementaryIndex) {
|
||||
codeUnitIndex=i;
|
||||
if(cpLength>1) {
|
||||
firstSupplementaryIndex=codeUnitIndex;
|
||||
} else {
|
||||
++firstSupplementaryIndex;
|
||||
}
|
||||
} else {
|
||||
codeUnitIndex=firstSupplementaryIndex;
|
||||
UTF_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex);
|
||||
}
|
||||
|
||||
/* use the UChar index codeUnitIndex instead of the code point index i */
|
||||
if(codeUnitIndex<destLength) {
|
||||
uprv_memmove(dest+codeUnitIndex+cpLength,
|
||||
dest+codeUnitIndex,
|
||||
(destLength-codeUnitIndex)*U_SIZEOF_UCHAR);
|
||||
if(caseFlags!=NULL) {
|
||||
uprv_memmove(caseFlags+codeUnitIndex+cpLength,
|
||||
caseFlags+codeUnitIndex,
|
||||
destLength-codeUnitIndex);
|
||||
}
|
||||
}
|
||||
if(cpLength==1) {
|
||||
/* BMP, insert one code unit */
|
||||
dest[codeUnitIndex]=(UChar)n;
|
||||
} else {
|
||||
/* supplementary character, insert two code units */
|
||||
dest[codeUnitIndex]=UTF16_LEAD(n);
|
||||
dest[codeUnitIndex+1]=UTF16_TRAIL(n);
|
||||
}
|
||||
if(caseFlags!=NULL) {
|
||||
/* Case of last character determines uppercase flag: */
|
||||
caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]);
|
||||
if(cpLength==2) {
|
||||
caseFlags[codeUnitIndex+1]=FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
destLength+=cpLength;
|
||||
++i;
|
||||
}
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
|
||||
}
|
||||
|
||||
/* ### check notes on overflow handling - only necessary if not IDNA? are these Punycode functions to be public? */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
118
source/common/punycode.h
Normal file
118
source/common/punycode.h
Normal file
|
@ -0,0 +1,118 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: punycode.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2002jan31
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/* This ICU code derived from: */
|
||||
/*
|
||||
punycode.c 0.4.0 (2001-Nov-17-Sat)
|
||||
http://www.cs.berkeley.edu/~amc/idn/
|
||||
Adam M. Costello
|
||||
http://www.nicemice.net/amc/
|
||||
*/
|
||||
|
||||
#ifndef __PUNYCODE_H__
|
||||
#define __PUNYCODE_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
/**
|
||||
* u_strToPunycode() converts Unicode to Punycode.
|
||||
*
|
||||
* The input string must not contain single, unpaired surrogates.
|
||||
* The output will be represented as an array of ASCII code points.
|
||||
*
|
||||
* The output string is NUL-terminated according to normal ICU
|
||||
* string output rules.
|
||||
*
|
||||
* @param src Input Unicode string.
|
||||
* This function handles a limited amount of code points
|
||||
* (the limit is >=64).
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded.
|
||||
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
|
||||
* @param dest Output Punycode array.
|
||||
* @param destCapacity Size of dest.
|
||||
* @param caseFlags Vector of boolean values, one per input UChar,
|
||||
* indicating that the corresponding character is to be
|
||||
* marked for the decoder optionally
|
||||
* uppercasing (TRUE) or lowercasing (FALSE)
|
||||
* the character.
|
||||
* ASCII characters are output directly in the case as marked.
|
||||
* Flags corresponding to trail surrogates are ignored.
|
||||
* If caseFlags==NULL then input characters are not
|
||||
* case-mapped.
|
||||
* @param pErrorCode ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if src contains
|
||||
* unmatched single surrogates.
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
|
||||
* too many code points.
|
||||
* @return Number of ASCII characters in puny.
|
||||
*
|
||||
* @see u_strFromPunycode
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
u_strToPunycode(const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UBool *caseFlags,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* u_strFromPunycode() converts Punycode to Unicode.
|
||||
* The Unicode string will be at most as long (in UChars)
|
||||
* than the Punycode string (in chars).
|
||||
*
|
||||
* @param src Input Punycode string.
|
||||
* @param srcLength Length of puny, or -1 if NUL-terminated
|
||||
* @param dest Output Unicode string buffer.
|
||||
* @param destCapacity Size of dest in number of UChars,
|
||||
* and of caseFlags in numbers of UBools.
|
||||
* @param caseFlags Output array for case flags as
|
||||
* defined by the Punycode string.
|
||||
* The caller should uppercase (TRUE) or lowercase (FASLE)
|
||||
* the corresponding character in dest.
|
||||
* For supplementary characters, only the lead surrogate
|
||||
* is marked, and FALSE is stored for the trail surrogate.
|
||||
* This is redundant and not necessary for ASCII characters
|
||||
* because they are already in the case indicated.
|
||||
* Can be NULL if the case flags are not needed.
|
||||
* @param pErrorCode ICU in/out error code parameter.
|
||||
* U_INVALID_CHAR_FOUND if a non-ASCII character
|
||||
* precedes the last delimiter ('-'),
|
||||
* or if an invalid character (not a-zA-Z0-9) is found
|
||||
* after the last delimiter.
|
||||
* U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed.
|
||||
* @return Number of UChars written to dest.
|
||||
*
|
||||
* @see u_strToPunycode
|
||||
*/
|
||||
U_CFUNC int32_t
|
||||
u_strFromPunycode(const UChar *src, int32_t srcLength,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UBool *caseFlags,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
1949
source/common/putil.c
Normal file
1949
source/common/putil.c
Normal file
File diff suppressed because it is too large
Load diff
277
source/common/putilimp.h
Normal file
277
source/common/putilimp.h
Normal file
|
@ -0,0 +1,277 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
*
|
||||
* FILE NAME : putilimp.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 10/17/04 grhoten Move internal functions from putil.h to this file.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef PUTILIMP_H
|
||||
#define PUTILIMP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/putil.h"
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Platform utilities */
|
||||
/*==========================================================================*/
|
||||
|
||||
/**
|
||||
* Platform utilities isolates the platform dependencies of the
|
||||
* libarary. For each platform which this code is ported to, these
|
||||
* functions may have to be re-implemented.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Floating point utility to determine if a double is Not a Number (NaN).
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL UBool U_EXPORT2 uprv_isNaN(double d);
|
||||
/**
|
||||
* Floating point utility to determine if a double has an infinite value.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL UBool U_EXPORT2 uprv_isInfinite(double d);
|
||||
/**
|
||||
* Floating point utility to determine if a double has a positive infinite value.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL UBool U_EXPORT2 uprv_isPositiveInfinity(double d);
|
||||
/**
|
||||
* Floating point utility to determine if a double has a negative infinite value.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL UBool U_EXPORT2 uprv_isNegativeInfinity(double d);
|
||||
/**
|
||||
* Floating point utility that returns a Not a Number (NaN) value.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_getNaN(void);
|
||||
/**
|
||||
* Floating point utility that returns an infinite value.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_getInfinity(void);
|
||||
|
||||
/**
|
||||
* Floating point utility to truncate a double.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_trunc(double d);
|
||||
/**
|
||||
* Floating point utility to calculate the floor of a double.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_floor(double d);
|
||||
/**
|
||||
* Floating point utility to calculate the ceiling of a double.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_ceil(double d);
|
||||
/**
|
||||
* Floating point utility to calculate the absolute value of a double.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_fabs(double d);
|
||||
/**
|
||||
* Floating point utility to calculate the fractional and integer parts of a double.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_modf(double d, double* pinteger);
|
||||
/**
|
||||
* Floating point utility to calculate the remainder of a double divided by another double.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_fmod(double d, double y);
|
||||
/**
|
||||
* Floating point utility to calculate d to the power of exponent (d^exponent).
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_pow(double d, double exponent);
|
||||
/**
|
||||
* Floating point utility to calculate 10 to the power of exponent (10^exponent).
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_pow10(int32_t exponent);
|
||||
/**
|
||||
* Floating point utility to calculate the maximum value of two doubles.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_fmax(double d, double y);
|
||||
/**
|
||||
* Floating point utility to calculate the minimum value of two doubles.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_fmin(double d, double y);
|
||||
/**
|
||||
* Private utility to calculate the maximum value of two integers.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y);
|
||||
/**
|
||||
* Private utility to calculate the minimum value of two integers.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y);
|
||||
|
||||
#if U_IS_BIG_ENDIAN
|
||||
# define uprv_isNegative(number) (*((signed char *)&(number))<0)
|
||||
#else
|
||||
# define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Return the largest positive number that can be represented by an integer
|
||||
* type of arbitrary bit length.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_maxMantissa(void);
|
||||
|
||||
/**
|
||||
* Floating point utility to calculate the logarithm of a double.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_log(double d);
|
||||
|
||||
/**
|
||||
* Does common notion of rounding e.g. uprv_floor(x + 0.5);
|
||||
* @param x the double number
|
||||
* @return the rounded double
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL double U_EXPORT2 uprv_round(double x);
|
||||
|
||||
#if 0
|
||||
/**
|
||||
* Returns the number of digits after the decimal point in a double number x.
|
||||
*
|
||||
* @param x the double number
|
||||
* @return the number of digits after the decimal point in a double number x.
|
||||
* @internal
|
||||
*/
|
||||
/*U_INTERNAL int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x);*/
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Time zone utilities
|
||||
*
|
||||
* Wrappers for C runtime library functions relating to timezones.
|
||||
* The t_tzset() function (similar to tzset) uses the current setting
|
||||
* of the environment variable TZ to assign values to three global
|
||||
* variables: daylight, timezone, and tzname. These variables have the
|
||||
* following meanings, and are declared in <time.h>.
|
||||
*
|
||||
* daylight Nonzero if daylight-saving-time zone (DST) is specified
|
||||
* in TZ; otherwise, 0. Default value is 1.
|
||||
* timezone Difference in seconds between coordinated universal
|
||||
* time and local time. E.g., -28,800 for PST (GMT-8hrs)
|
||||
* tzname(0) Three-letter time-zone name derived from TZ environment
|
||||
* variable. E.g., "PST".
|
||||
* tzname(1) Three-letter DST zone name derived from TZ environment
|
||||
* variable. E.g., "PDT". If DST zone is omitted from TZ,
|
||||
* tzname(1) is an empty string.
|
||||
*
|
||||
* Notes: For example, to set the TZ environment variable to correspond
|
||||
* to the current time zone in Germany, you can use one of the
|
||||
* following statements:
|
||||
*
|
||||
* set TZ=GST1GDT
|
||||
* set TZ=GST+1GDT
|
||||
*
|
||||
* If the TZ value is not set, t_tzset() attempts to use the time zone
|
||||
* information specified by the operating system. Under Windows NT
|
||||
* and Windows 95, this information is specified in the Control Panel's
|
||||
* Date/Time application.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL void U_EXPORT2 uprv_tzset(void);
|
||||
|
||||
/**
|
||||
* Difference in seconds between coordinated universal
|
||||
* time and local time. E.g., -28,800 for PST (GMT-8hrs)
|
||||
* @return the difference in seconds between coordinated universal time and local time.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL int32_t U_EXPORT2 uprv_timezone(void);
|
||||
|
||||
/**
|
||||
* tzname(0) Three-letter time-zone name derived from TZ environment
|
||||
* variable. E.g., "PST".
|
||||
* tzname(1) Three-letter DST zone name derived from TZ environment
|
||||
* variable. E.g., "PDT". If DST zone is omitted from TZ,
|
||||
* tzname(1) is an empty string.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL const char* U_EXPORT2 uprv_tzname(int n);
|
||||
|
||||
/**
|
||||
* Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
|
||||
* @return the UTC time measured in milliseconds
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL UDate U_EXPORT2 uprv_getUTCtime(void);
|
||||
|
||||
/**
|
||||
* Determine whether a pathname is absolute or not, as defined by the platform.
|
||||
* @param path Pathname to test
|
||||
* @return TRUE if the path is absolute
|
||||
* @internal (ICU 3.0)
|
||||
*/
|
||||
U_INTERNAL UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path);
|
||||
|
||||
/**
|
||||
* Use U_MAX_PTR instead of this function.
|
||||
* @param void pointer to test
|
||||
* @return the largest possible pointer greater than the base
|
||||
* @internal (ICU 3.8)
|
||||
*/
|
||||
U_INTERNAL void * U_EXPORT2 uprv_maximumPtr(void *base);
|
||||
|
||||
/**
|
||||
* Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer.
|
||||
* In fact, buffer sizes must not exceed 2GB so that the difference between
|
||||
* the buffer limit and the buffer start can be expressed in an int32_t.
|
||||
*
|
||||
* The definition of U_MAX_PTR must fulfill the following conditions:
|
||||
* - return the largest possible pointer greater than base
|
||||
* - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
|
||||
* - avoid wrapping around at high addresses
|
||||
* - make sure that the returned pointer is not farther from base than 0x7fffffff
|
||||
*
|
||||
* @param base The beginning of a buffer to find the maximum offset from
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_MAX_PTR
|
||||
# if defined(OS390) && !defined(_LP64)
|
||||
/* We have 31-bit pointers. */
|
||||
# define U_MAX_PTR(base) ((void *)0x7fffffff)
|
||||
# elif defined(OS400)
|
||||
# define U_MAX_PTR(base) uprv_maximumPtr((void *)base)
|
||||
# elif defined(__GNUC__) && __GNUC__ >= 4
|
||||
/*
|
||||
* Due to a compiler optimization bug, gcc 4 causes test failures when doing
|
||||
* this math arithmetic on pointers on some platforms. It seems like the
|
||||
* pointers are considered signed instead of unsigned. The uintptr_t type
|
||||
* isn't available on all platforms (i.e MSVC 6) and pointers aren't always
|
||||
* a scalar value (i.e. i5/OS see uprv_maximumPtr function).
|
||||
*/
|
||||
# define U_MAX_PTR(base) \
|
||||
((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \
|
||||
? ((uintptr_t)(base)+0x7fffffffu) \
|
||||
: (uintptr_t)-1))
|
||||
# else
|
||||
# define U_MAX_PTR(base) \
|
||||
((char *)(((char *)(base)+0x7fffffffu) > (char *)(base) \
|
||||
? ((char *)(base)+0x7fffffffu) \
|
||||
: (char *)-1))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif
|
1855
source/common/rbbi.cpp
Normal file
1855
source/common/rbbi.cpp
Normal file
File diff suppressed because it is too large
Load diff
453
source/common/rbbicst.pl
Executable file
453
source/common/rbbicst.pl
Executable file
|
@ -0,0 +1,453 @@
|
|||
#**************************************************************************
|
||||
# Copyright (C) 2002-2005 International Business Machines Corporation *
|
||||
# and others. All rights reserved. *
|
||||
#**************************************************************************
|
||||
#
|
||||
# rbbicst Compile the RBBI rule paser state table data into initialized C data.
|
||||
# Usage:
|
||||
# cd icu/source/common
|
||||
# perl rbbicst.pl < rbbirpt.txt > rbbirpt.h
|
||||
# perl rbbicst.pl -j < rbbirpt.txt > RBBIRuleParseTable.java
|
||||
#
|
||||
# The output file, rbbrpt.h, is included by some of the .cpp rbbi
|
||||
# implementation files. This perl script is NOT run as part
|
||||
# of a normal ICU build. It is run by hand when needed, and the
|
||||
# rbbirpt.h generated file is put back into cvs.
|
||||
#
|
||||
# See rbbirpt.txt for a description of the input format for this script.
|
||||
#
|
||||
|
||||
if ($ARGV[0] eq "-j") {
|
||||
$javaOutput = 1;
|
||||
shift @ARGV;
|
||||
}
|
||||
|
||||
|
||||
$num_states = 1; # Always the state number for the line being compiled.
|
||||
$line_num = 0; # The line number in the input file.
|
||||
|
||||
$states{"pop"} = 255; # Add the "pop" to the list of defined state names.
|
||||
# This prevents any state from being labelled with "pop",
|
||||
# and resolves references to "pop" in the next state field.
|
||||
|
||||
line_loop: while (<>) {
|
||||
chomp();
|
||||
$line = $_;
|
||||
@fields = split();
|
||||
$line_num++;
|
||||
|
||||
# Remove # comments, which are any fields beginning with a #, plus all
|
||||
# that follow on the line.
|
||||
for ($i=0; $i<@fields; $i++) {
|
||||
if ($fields[$i] =~ /^#/) {
|
||||
@fields = @fields[0 .. $i-1];
|
||||
last;
|
||||
}
|
||||
}
|
||||
# ignore blank lines, and those with no fields left after stripping comments..
|
||||
if (@fields == 0) {
|
||||
next;
|
||||
}
|
||||
|
||||
#
|
||||
# State Label: handling.
|
||||
# Does the first token end with a ":"? If so, it's the name of a state.
|
||||
# Put in a hash, together with the current state number,
|
||||
# so that we can later look up the number from the name.
|
||||
#
|
||||
if (@fields[0] =~ /.*:$/) {
|
||||
$state_name = @fields[0];
|
||||
$state_name =~ s/://; # strip off the colon from the state name.
|
||||
|
||||
if ($states{$state_name} != 0) {
|
||||
print " rbbicst: at line $line-num duplicate definition of state $state_name\n";
|
||||
}
|
||||
$states{$state_name} = $num_states;
|
||||
$stateNames[$num_states] = $state_name;
|
||||
|
||||
# if the label was the only thing on this line, go on to the next line,
|
||||
# otherwise assume that a state definition is on the same line and fall through.
|
||||
if (@fields == 1) {
|
||||
next line_loop;
|
||||
}
|
||||
shift @fields; # shift off label field in preparation
|
||||
# for handling the rest of the line.
|
||||
}
|
||||
|
||||
#
|
||||
# State Transition line.
|
||||
# syntax is this,
|
||||
# character [n] target-state [^push-state] [function-name]
|
||||
# where
|
||||
# [something] is an optional something
|
||||
# character is either a single quoted character e.g. '['
|
||||
# or a name of a character class, e.g. white_space
|
||||
#
|
||||
|
||||
$state_line_num[$num_states] = $line_num; # remember line number with each state
|
||||
# so we can make better error messages later.
|
||||
#
|
||||
# First field, character class or literal character for this transition.
|
||||
#
|
||||
if ($fields[0] =~ /^'.'$/) {
|
||||
# We've got a quoted literal character.
|
||||
$state_literal_chars[$num_states] = $fields[0];
|
||||
$state_literal_chars[$num_states] =~ s/'//g;
|
||||
} else {
|
||||
# We've got the name of a character class.
|
||||
$state_char_class[$num_states] = $fields[0];
|
||||
if ($fields[0] =~ /[\W]/) {
|
||||
print " rbbicsts: at line $line_num, bad character literal or character class name.\n";
|
||||
print " scanning $fields[0]\n";
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
shift @fields;
|
||||
|
||||
#
|
||||
# do the 'n' flag
|
||||
#
|
||||
$state_flag[$num_states] = $javaOutput? "false" : "FALSE";
|
||||
if ($fields[0] eq "n") {
|
||||
$state_flag[$num_states] = $javaOutput? "true": "TRUE";
|
||||
shift @fields;
|
||||
}
|
||||
|
||||
#
|
||||
# do the destination state.
|
||||
#
|
||||
$state_dest_state[$num_states] = $fields[0];
|
||||
if ($fields[0] eq "") {
|
||||
print " rbbicsts: at line $line_num, destination state missing.\n";
|
||||
exit(-1);
|
||||
}
|
||||
shift @fields;
|
||||
|
||||
#
|
||||
# do the push state, if present.
|
||||
#
|
||||
if ($fields[0] =~ /^\^/) {
|
||||
$fields[0] =~ s/^\^//;
|
||||
$state_push_state[$num_states] = $fields[0];
|
||||
if ($fields[0] eq "" ) {
|
||||
print " rbbicsts: at line $line_num, expected state after ^ (no spaces).\n";
|
||||
exit(-1);
|
||||
}
|
||||
shift @fields;
|
||||
}
|
||||
|
||||
#
|
||||
# Lastly, do the optional action name.
|
||||
#
|
||||
if ($fields[0] ne "") {
|
||||
$state_func_name[$num_states] = $fields[0];
|
||||
shift @fields;
|
||||
}
|
||||
|
||||
#
|
||||
# There should be no fields left on the line at this point.
|
||||
#
|
||||
if (@fields > 0) {
|
||||
print " rbbicsts: at line $line_num, unexpected extra stuff on input line.\n";
|
||||
print " scanning $fields[0]\n";
|
||||
}
|
||||
$num_states++;
|
||||
}
|
||||
|
||||
#
|
||||
# We've read in the whole file, now go back and output the
|
||||
# C source code for the state transition table.
|
||||
#
|
||||
# We read all states first, before writing anything, so that the state numbers
|
||||
# for the destination states are all available to be written.
|
||||
#
|
||||
|
||||
#
|
||||
# Make hashes for the names of the character classes and
|
||||
# for the names of the actions that appeared.
|
||||
#
|
||||
for ($state=1; $state < $num_states; $state++) {
|
||||
if ($state_char_class[$state] ne "") {
|
||||
if ($charClasses{$state_char_class[$state]} == 0) {
|
||||
$charClasses{$state_char_class[$state]} = 1;
|
||||
}
|
||||
}
|
||||
if ($state_func_name[$state] eq "") {
|
||||
$state_func_name[$state] = "doNOP";
|
||||
}
|
||||
if ($actions{$state_action_name[$state]} == 0) {
|
||||
$actions{$state_func_name[$state]} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
# Check that all of the destination states have been defined
|
||||
#
|
||||
#
|
||||
$states{"exit"} = 0; # Predefined state name, terminates state machine.
|
||||
for ($state=1; $state<$num_states; $state++) {
|
||||
if ($states{$state_dest_state[$state]} == 0 && $state_dest_state[$state] ne "exit") {
|
||||
print "Error at line $state_line_num[$state]: target state \"$state_dest_state[$state]\" is not defined.\n";
|
||||
$errors++;
|
||||
}
|
||||
if ($state_push_state[$state] ne "" && $states{$state_push_state[$state]} == 0) {
|
||||
print "Error at line $state_line_num[$state]: target state \"$state_push_state[$state]\" is not defined.\n";
|
||||
$errors++;
|
||||
}
|
||||
}
|
||||
|
||||
die if ($errors>0);
|
||||
|
||||
#
|
||||
# Assign numbers to each of the character classes classes used.
|
||||
# Sets are numbered from 128 - 250
|
||||
# The values 0-127 in the state table are used for matching
|
||||
# individual ASCII characters (the only thing that can appear in the rules.)
|
||||
# The "set" names appearing in the code below (default, etc.) need special
|
||||
# handling because they do not correspond to a normal set of characters,
|
||||
# but trigger special handling by code in the state machine.
|
||||
#
|
||||
$i = 128;
|
||||
foreach $setName (sort keys %charClasses) {
|
||||
if ($setName eq "default") {
|
||||
$charClasses{$setName} = 255;}
|
||||
elsif ($setName eq "escaped") {
|
||||
$charClasses{$setName} = 254;}
|
||||
elsif ($setName eq "escapedP") {
|
||||
$charClasses{$setName} = 253;}
|
||||
elsif ($setName eq "eof") {
|
||||
$charClasses{$setName} = 252;}
|
||||
else {
|
||||
# Normal (single) character class. Number them.
|
||||
$charClasses{$setName} = $i;
|
||||
$i++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
my ($sec, $min, $hour, , $day, $mon, $year, $wday, $yday, $isdst) = localtime;
|
||||
$year += 1900;
|
||||
|
||||
if ($javaOutput) {
|
||||
print "/*\n";
|
||||
print " *******************************************************************************\n";
|
||||
print " * Copyright (C) 2003-$year,\n";
|
||||
print " * International Business Machines Corporation and others. All Rights Reserved.\n";
|
||||
print " *******************************************************************************\n";
|
||||
print " */\n";
|
||||
print " \n";
|
||||
print "package com.ibm.icu.text;\n";
|
||||
print " \n";
|
||||
print "/**\n";
|
||||
print " * Generated Java File. Do not edit by hand.\n";
|
||||
print " * This file contains the state table for the ICU Rule Based Break Iterator\n";
|
||||
print " * rule parser.\n";
|
||||
print " * It is generated by the Perl script \"rbbicst.pl\" from\n";
|
||||
print " * the rule parser state definitions file \"rbbirpt.txt\".\n";
|
||||
print " * \@internal \n";
|
||||
print " *\n";
|
||||
print " */\n";
|
||||
|
||||
print "class RBBIRuleParseTable\n";
|
||||
print "{\n";
|
||||
|
||||
#
|
||||
# Emit the constants for the actions to be performed.
|
||||
#
|
||||
$n = 1;
|
||||
foreach $act (sort keys %actions) {
|
||||
print " static final short $act = $n;\n";
|
||||
$n++;
|
||||
}
|
||||
print " \n";
|
||||
|
||||
#
|
||||
# Emit constants for char class names
|
||||
#
|
||||
foreach $setName (sort keys %charClasses) {
|
||||
print " static final short kRuleSet_$setName = $charClasses{$setName};\n";
|
||||
}
|
||||
print "\n\n";
|
||||
|
||||
|
||||
print " static class RBBIRuleTableElement { \n";
|
||||
print " short fAction; \n";
|
||||
print " short fCharClass; \n";
|
||||
print " short fNextState; \n";
|
||||
print " short fPushState; \n";
|
||||
print " boolean fNextChar; \n";
|
||||
print " String fStateName; \n";
|
||||
print " RBBIRuleTableElement(short a, int cc, int ns, int ps, boolean nc, String sn) { \n";
|
||||
print " fAction = a; \n";
|
||||
print " fCharClass = (short)cc; \n";
|
||||
print " fNextState = (short)ns; \n";
|
||||
print " fPushState = (short)ps; \n";
|
||||
print " fNextChar = nc; \n";
|
||||
print " fStateName = sn; \n";
|
||||
print " } \n";
|
||||
print " }; \n";
|
||||
print " \n";
|
||||
|
||||
|
||||
print " static RBBIRuleTableElement[] gRuleParseStateTable = { \n ";
|
||||
print " new RBBIRuleTableElement(doNOP, 0, 0,0, true, null ) // 0 \n"; #output the unused state 0.
|
||||
for ($state=1; $state < $num_states; $state++) {
|
||||
print " , new RBBIRuleTableElement($state_func_name[$state],";
|
||||
if ($state_literal_chars[$state] ne "") {
|
||||
$c = $state_literal_chars[$state];
|
||||
print("'$c', ");
|
||||
}else {
|
||||
print " $charClasses{$state_char_class[$state]},";
|
||||
}
|
||||
print " $states{$state_dest_state[$state]},";
|
||||
|
||||
# The push-state field is optional. If omitted, fill field with a zero, which flags
|
||||
# the state machine that there is no push state.
|
||||
if ($state_push_state[$state] eq "") {
|
||||
print "0, ";
|
||||
} else {
|
||||
print " $states{$state_push_state[$state]},";
|
||||
}
|
||||
print " $state_flag[$state], ";
|
||||
|
||||
# if this is the first row of the table for this state, put out the state name.
|
||||
if ($stateNames[$state] ne "") {
|
||||
print " \"$stateNames[$state]\") ";
|
||||
} else {
|
||||
print " null ) ";
|
||||
}
|
||||
|
||||
# Put out a comment showing the number (index) of this state row,
|
||||
print " // $state ";
|
||||
print "\n";
|
||||
}
|
||||
print " };\n";
|
||||
|
||||
print "}; \n";
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
#
|
||||
# C++ Output ...
|
||||
#
|
||||
|
||||
|
||||
print "//---------------------------------------------------------------------------------\n";
|
||||
print "//\n";
|
||||
print "// Generated Header File. Do not edit by hand.\n";
|
||||
print "// This file contains the state table for the ICU Rule Based Break Iterator\n";
|
||||
print "// rule parser.\n";
|
||||
print "// It is generated by the Perl script \"rbbicst.pl\" from\n";
|
||||
print "// the rule parser state definitions file \"rbbirpt.txt\".\n";
|
||||
print "//\n";
|
||||
print "// Copyright (C) 2002-$year International Business Machines Corporation \n";
|
||||
print "// and others. All rights reserved. \n";
|
||||
print "//\n";
|
||||
print "//---------------------------------------------------------------------------------\n";
|
||||
print "#ifndef RBBIRPT_H\n";
|
||||
print "#define RBBIRPT_H\n";
|
||||
print "\n";
|
||||
print "U_NAMESPACE_BEGIN\n";
|
||||
|
||||
#
|
||||
# Emit the constants for indicies of Unicode Sets
|
||||
# Define one constant for each of the character classes encountered.
|
||||
# At the same time, store the index corresponding to the set name back into hash.
|
||||
#
|
||||
print "//\n";
|
||||
print "// Character classes for RBBI rule scanning.\n";
|
||||
print "//\n";
|
||||
foreach $setName (sort keys %charClasses) {
|
||||
if ($charClasses{$setName} < 250) {
|
||||
# Normal character class.
|
||||
print " static const uint8_t kRuleSet_$setName = $charClasses{$setName};\n";
|
||||
}
|
||||
}
|
||||
print "\n\n";
|
||||
|
||||
#
|
||||
# Emit the enum for the actions to be performed.
|
||||
#
|
||||
print "enum RBBI_RuleParseAction {\n";
|
||||
foreach $act (sort keys %actions) {
|
||||
print " $act,\n";
|
||||
}
|
||||
print " rbbiLastAction};\n\n";
|
||||
|
||||
#
|
||||
# Emit the struct definition for transtion table elements.
|
||||
#
|
||||
print "//-------------------------------------------------------------------------------\n";
|
||||
print "//\n";
|
||||
print "// RBBIRuleTableEl represents the structure of a row in the transition table\n";
|
||||
print "// for the rule parser state machine.\n";
|
||||
print "//-------------------------------------------------------------------------------\n";
|
||||
print "struct RBBIRuleTableEl {\n";
|
||||
print " RBBI_RuleParseAction fAction;\n";
|
||||
print " uint8_t fCharClass; // 0-127: an individual ASCII character\n";
|
||||
print " // 128-255: character class index\n";
|
||||
print " uint8_t fNextState; // 0-250: normal next-stat numbers\n";
|
||||
print " // 255: pop next-state from stack.\n";
|
||||
print " uint8_t fPushState;\n";
|
||||
print " UBool fNextChar;\n";
|
||||
print "};\n\n";
|
||||
|
||||
#
|
||||
# emit the state transition table
|
||||
#
|
||||
print "static const struct RBBIRuleTableEl gRuleParseStateTable[] = {\n";
|
||||
print " {doNOP, 0, 0, 0, TRUE}\n"; # State 0 is a dummy. Real states start with index = 1.
|
||||
for ($state=1; $state < $num_states; $state++) {
|
||||
print " , {$state_func_name[$state],";
|
||||
if ($state_literal_chars[$state] ne "") {
|
||||
$c = $state_literal_chars[$state];
|
||||
printf(" %d /* $c */,", ord($c)); # use numeric value, so EBCDIC machines are ok.
|
||||
}else {
|
||||
print " $charClasses{$state_char_class[$state]},";
|
||||
}
|
||||
print " $states{$state_dest_state[$state]},";
|
||||
|
||||
# The push-state field is optional. If omitted, fill field with a zero, which flags
|
||||
# the state machine that there is no push state.
|
||||
if ($state_push_state[$state] eq "") {
|
||||
print "0, ";
|
||||
} else {
|
||||
print " $states{$state_push_state[$state]},";
|
||||
}
|
||||
print " $state_flag[$state]} ";
|
||||
|
||||
# Put out a C++ comment showing the number (index) of this state row,
|
||||
# and, if this is the first row of the table for this state, the state name.
|
||||
print " // $state ";
|
||||
if ($stateNames[$state] ne "") {
|
||||
print " $stateNames[$state]";
|
||||
}
|
||||
print "\n";
|
||||
};
|
||||
print " };\n";
|
||||
|
||||
|
||||
#
|
||||
# emit a mapping array from state numbers to state names.
|
||||
#
|
||||
# This array is used for producing debugging output from the rule parser.
|
||||
#
|
||||
print "#ifdef RBBI_DEBUG\n";
|
||||
print "static const char * const RBBIRuleStateNames[] = {";
|
||||
for ($state=0; $state<$num_states; $state++) {
|
||||
if ($stateNames[$state] ne "") {
|
||||
print " \"$stateNames[$state]\",\n";
|
||||
} else {
|
||||
print " 0,\n";
|
||||
}
|
||||
}
|
||||
print " 0};\n";
|
||||
print "#endif\n\n";
|
||||
|
||||
print "U_NAMESPACE_END\n";
|
||||
print "#endif\n";
|
||||
}
|
||||
|
||||
|
||||
|
450
source/common/rbbidata.cpp
Normal file
450
source/common/rbbidata.cpp
Normal file
|
@ -0,0 +1,450 @@
|
|||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 1999-2008 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "rbbidata.h"
|
||||
#include "rbbirb.h"
|
||||
#include "utrie.h"
|
||||
#include "udatamem.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "umutex.h"
|
||||
|
||||
#include "uassert.h"
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// Trie access folding function. Copied as-is from properties code in uchar.c
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
U_CDECL_BEGIN
|
||||
static int32_t U_CALLCONV
|
||||
getFoldingOffset(uint32_t data) {
|
||||
/* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */
|
||||
if(data&0x8000) {
|
||||
return (int32_t)(data&0x7fff);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// Constructors.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status) {
|
||||
init(data, status);
|
||||
}
|
||||
|
||||
RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt, UErrorCode &status) {
|
||||
init(data, status);
|
||||
fDontFreeData = TRUE;
|
||||
}
|
||||
|
||||
RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
|
||||
const RBBIDataHeader *d = (const RBBIDataHeader *)
|
||||
// ((char *)&(udm->pHeader->info) + udm->pHeader->info.size);
|
||||
// taking into consideration the padding added in by udata_write
|
||||
((char *)(udm->pHeader) + udm->pHeader->dataHeader.headerSize);
|
||||
init(d, status);
|
||||
fUDataMem = udm;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// init(). Does most of the work of construction, shared between the
|
||||
// constructors.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
fHeader = data;
|
||||
if (fHeader->fMagic != 0xb1a0 ||
|
||||
!(fHeader->fFormatVersion[0] == 3 || // ICU 3.4
|
||||
*(int32_t *)fHeader->fFormatVersion == 1)) // ICU 3.2 and earlier.
|
||||
{
|
||||
status = U_INVALID_FORMAT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
fDontFreeData = FALSE;
|
||||
fUDataMem = NULL;
|
||||
fReverseTable = NULL;
|
||||
fSafeFwdTable = NULL;
|
||||
fSafeRevTable = NULL;
|
||||
if (data->fFTableLen != 0) {
|
||||
fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable);
|
||||
}
|
||||
if (data->fRTableLen != 0) {
|
||||
fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
|
||||
}
|
||||
if (data->fSFTableLen != 0) {
|
||||
fSafeFwdTable = (RBBIStateTable *)((char *)data + fHeader->fSFTable);
|
||||
}
|
||||
if (data->fSRTableLen != 0) {
|
||||
fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
|
||||
}
|
||||
|
||||
|
||||
utrie_unserialize(&fTrie,
|
||||
(uint8_t *)data + fHeader->fTrie,
|
||||
fHeader->fTrieLen,
|
||||
&status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
fTrie.getFoldingOffset=getFoldingOffset;
|
||||
|
||||
|
||||
fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource);
|
||||
fRuleString.setTo(TRUE, fRuleSource, -1);
|
||||
U_ASSERT(data->fRuleSourceLen > 0);
|
||||
|
||||
fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable);
|
||||
fStatusMaxIdx = data->fStatusTableLen / sizeof(int32_t);
|
||||
|
||||
fRefCount = 1;
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
char *debugEnv = getenv("U_RBBIDEBUG");
|
||||
if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// Destructor. Don't call this - use removeReference() instead.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
RBBIDataWrapper::~RBBIDataWrapper() {
|
||||
U_ASSERT(fRefCount == 0);
|
||||
if (fUDataMem) {
|
||||
udata_close(fUDataMem);
|
||||
} else if (!fDontFreeData) {
|
||||
uprv_free((void *)fHeader);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// Operator == Consider two RBBIDataWrappers to be equal if they
|
||||
// refer to the same underlying data. Although
|
||||
// the data wrappers are normally shared between
|
||||
// iterator instances, it's possible to independently
|
||||
// open the same data twice, and get two instances, which
|
||||
// should still be ==.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
UBool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const {
|
||||
if (fHeader == other.fHeader) {
|
||||
return TRUE;
|
||||
}
|
||||
if (fHeader->fLength != other.fHeader->fLength) {
|
||||
return FALSE;
|
||||
}
|
||||
if (uprv_memcmp(fHeader, other.fHeader, fHeader->fLength) == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
int32_t RBBIDataWrapper::hashCode() {
|
||||
return fHeader->fFTableLen;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// Reference Counting. A single RBBIDataWrapper object is shared among
|
||||
// however many RulesBasedBreakIterator instances are
|
||||
// referencing the same data.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
void RBBIDataWrapper::removeReference() {
|
||||
if (umtx_atomic_dec(&fRefCount) == 0) {
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
RBBIDataWrapper *RBBIDataWrapper::addReference() {
|
||||
umtx_atomic_inc(&fRefCount);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// getRuleSourceString
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
const UnicodeString &RBBIDataWrapper::getRuleSourceString() const {
|
||||
return fRuleString;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// print - debugging function to dump the runtime data tables.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
#ifdef RBBI_DEBUG
|
||||
void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *table) {
|
||||
uint32_t c;
|
||||
uint32_t s;
|
||||
|
||||
RBBIDebugPrintf(" %s\n", heading);
|
||||
|
||||
RBBIDebugPrintf("State | Acc LA TagIx");
|
||||
for (c=0; c<fHeader->fCatCount; c++) {RBBIDebugPrintf("%3d ", c);}
|
||||
RBBIDebugPrintf("\n------|---------------"); for (c=0;c<fHeader->fCatCount; c++) {
|
||||
RBBIDebugPrintf("----");
|
||||
}
|
||||
RBBIDebugPrintf("\n");
|
||||
|
||||
if (table == NULL) {
|
||||
RBBIDebugPrintf(" N U L L T A B L E\n\n");
|
||||
return;
|
||||
}
|
||||
for (s=0; s<table->fNumStates; s++) {
|
||||
RBBIStateTableRow *row = (RBBIStateTableRow *)
|
||||
(table->fTableData + (table->fRowLen * s));
|
||||
RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->fAccepting, row->fLookAhead, row->fTagIdx);
|
||||
for (c=0; c<fHeader->fCatCount; c++) {
|
||||
RBBIDebugPrintf("%3d ", row->fNextState[c]);
|
||||
}
|
||||
RBBIDebugPrintf("\n");
|
||||
}
|
||||
RBBIDebugPrintf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
void RBBIDataWrapper::printData() {
|
||||
RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
|
||||
RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
|
||||
fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
|
||||
RBBIDebugPrintf(" total length of data = %d\n", fHeader->fLength);
|
||||
RBBIDebugPrintf(" number of character categories = %d\n\n", fHeader->fCatCount);
|
||||
|
||||
printTable("Forward State Transition Table", fForwardTable);
|
||||
printTable("Reverse State Transition Table", fReverseTable);
|
||||
printTable("Safe Forward State Transition Table", fSafeFwdTable);
|
||||
printTable("Safe Reverse State Transition Table", fSafeRevTable);
|
||||
|
||||
RBBIDebugPrintf("\nOrignal Rules source:\n");
|
||||
for (int32_t c=0; fRuleSource[c] != 0; c++) {
|
||||
RBBIDebugPrintf("%c", fRuleSource[c]);
|
||||
}
|
||||
RBBIDebugPrintf("\n\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
U_NAMESPACE_USE
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// ubrk_swap - byte swap and char encoding swap of RBBI data
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *status) {
|
||||
|
||||
if (status == NULL || U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
|
||||
*status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Check that the data header is for for break data.
|
||||
// (Header contents are defined in genbrk.cpp)
|
||||
//
|
||||
const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4);
|
||||
if(!( pInfo->dataFormat[0]==0x42 && /* dataFormat="Brk " */
|
||||
pInfo->dataFormat[1]==0x72 &&
|
||||
pInfo->dataFormat[2]==0x6b &&
|
||||
pInfo->dataFormat[3]==0x20 &&
|
||||
pInfo->formatVersion[0]==3 )) {
|
||||
udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
pInfo->dataFormat[2], pInfo->dataFormat[3],
|
||||
pInfo->formatVersion[0]);
|
||||
*status=U_UNSUPPORTED_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Swap the data header. (This is the generic ICU Data Header, not the RBBI Specific
|
||||
// RBBIDataHeader). This swap also conveniently gets us
|
||||
// the size of the ICU d.h., which lets us locate the start
|
||||
// of the RBBI specific data.
|
||||
//
|
||||
int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
|
||||
|
||||
|
||||
//
|
||||
// Get the RRBI Data Header, and check that it appears to be OK.
|
||||
//
|
||||
// Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
|
||||
// an int32_t with a value of 1. Starting with ICU 3.4,
|
||||
// RBBI's fDataFormat matches the dataFormat field from the
|
||||
// UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
|
||||
//
|
||||
const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
|
||||
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
|
||||
UBool formatVersionOne = ds->readUInt32(*(int32_t *)rbbiDH->fFormatVersion) == 1;
|
||||
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
|
||||
!(formatVersionOne || rbbiDH->fFormatVersion[0] == 3) ||
|
||||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader))
|
||||
{
|
||||
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
|
||||
*status=U_UNSUPPORTED_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Prefight operation? Just return the size
|
||||
//
|
||||
int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength);
|
||||
int32_t totalSize = headerSize + breakDataLength;
|
||||
if (length < 0) {
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
//
|
||||
// Check that length passed in is consistent with length from RBBI data header.
|
||||
//
|
||||
if (length < totalSize) {
|
||||
udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n",
|
||||
breakDataLength);
|
||||
*status=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Swap the Data. Do the data itself first, then the RBBI Data Header, because
|
||||
// we need to reference the header to locate the data, and an
|
||||
// inplace swap of the header leaves it unusable.
|
||||
//
|
||||
uint8_t *outBytes = (uint8_t *)outData + headerSize;
|
||||
RBBIDataHeader *outputDH = (RBBIDataHeader *)outBytes;
|
||||
|
||||
int32_t tableStartOffset;
|
||||
int32_t tableLength;
|
||||
|
||||
//
|
||||
// If not swapping in place, zero out the output buffer before starting.
|
||||
// Individual tables and other data items within are aligned to 8 byte boundaries
|
||||
// when originally created. Any unused space between items needs to be zero.
|
||||
//
|
||||
if (inBytes != outBytes) {
|
||||
uprv_memset(outBytes, 0, breakDataLength);
|
||||
}
|
||||
|
||||
//
|
||||
// Each state table begins with several 32 bit fields. Calculate the size
|
||||
// in bytes of these.
|
||||
//
|
||||
int32_t topSize = offsetof(RBBIStateTable, fTableData);
|
||||
|
||||
// Forward state table.
|
||||
tableStartOffset = ds->readUInt32(rbbiDH->fFTable);
|
||||
tableLength = ds->readUInt32(rbbiDH->fFTableLen);
|
||||
|
||||
if (tableLength > 0) {
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
|
||||
outBytes+tableStartOffset, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
|
||||
outBytes+tableStartOffset+topSize, status);
|
||||
}
|
||||
|
||||
// Reverse state table. Same layout as forward table, above.
|
||||
tableStartOffset = ds->readUInt32(rbbiDH->fRTable);
|
||||
tableLength = ds->readUInt32(rbbiDH->fRTableLen);
|
||||
|
||||
if (tableLength > 0) {
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
|
||||
outBytes+tableStartOffset, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
|
||||
outBytes+tableStartOffset+topSize, status);
|
||||
}
|
||||
|
||||
// Safe Forward state table. Same layout as forward table, above.
|
||||
tableStartOffset = ds->readUInt32(rbbiDH->fSFTable);
|
||||
tableLength = ds->readUInt32(rbbiDH->fSFTableLen);
|
||||
|
||||
if (tableLength > 0) {
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
|
||||
outBytes+tableStartOffset, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
|
||||
outBytes+tableStartOffset+topSize, status);
|
||||
}
|
||||
|
||||
// Safe Reverse state table. Same layout as forward table, above.
|
||||
tableStartOffset = ds->readUInt32(rbbiDH->fSRTable);
|
||||
tableLength = ds->readUInt32(rbbiDH->fSRTableLen);
|
||||
|
||||
if (tableLength > 0) {
|
||||
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
|
||||
outBytes+tableStartOffset, status);
|
||||
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
|
||||
outBytes+tableStartOffset+topSize, status);
|
||||
}
|
||||
|
||||
// Trie table for character categories
|
||||
utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
|
||||
outBytes+ds->readUInt32(rbbiDH->fTrie), status);
|
||||
|
||||
// Source Rules Text. It's UChar data
|
||||
ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),
|
||||
outBytes+ds->readUInt32(rbbiDH->fRuleSource), status);
|
||||
|
||||
// Table of rule status values. It's all int_32 values
|
||||
ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen),
|
||||
outBytes+ds->readUInt32(rbbiDH->fStatusTable), status);
|
||||
|
||||
// And, last, the header.
|
||||
// For the old version one format, the entire header consists of int32_t values.
|
||||
// For the newer formats, the fDataFormat field is an array of four bytes.
|
||||
// Swap the whole thing as int32_t, then, for the newer format, re-swap the one field.
|
||||
//
|
||||
ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
|
||||
if (formatVersionOne == FALSE) {
|
||||
ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status);
|
||||
}
|
||||
|
||||
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
198
source/common/rbbidata.h
Normal file
198
source/common/rbbidata.h
Normal file
|
@ -0,0 +1,198 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2005,2008 International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: rbbidata.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* RBBI data formats Includes
|
||||
*
|
||||
* Structs that describes the format of the Binary RBBI data,
|
||||
* as it is stored in ICU's data file.
|
||||
*
|
||||
* RBBIDataWrapper - Instances of this class sit between the
|
||||
* raw data structs and the RulesBasedBreakIterator objects
|
||||
* that are created by applications. The wrapper class
|
||||
* provides reference counting for the underlying data,
|
||||
* and direct pointers to data that would not otherwise
|
||||
* be accessible without ugly pointer arithmetic. The
|
||||
* wrapper does not attempt to provide any higher level
|
||||
* abstractions for the data itself.
|
||||
*
|
||||
* There will be only one instance of RBBIDataWrapper for any
|
||||
* set of RBBI run time data being shared by instances
|
||||
* (clones) of RulesBasedBreakIterator.
|
||||
*/
|
||||
|
||||
#ifndef __RBBIDATA_H__
|
||||
#define __RBBIDATA_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "udataswp.h"
|
||||
|
||||
/**
|
||||
* Swap RBBI data. See udataswp.h.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#ifdef XP_CPLUSPLUS
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "utrie.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* The following structs map exactly onto the raw data from ICU common data file.
|
||||
*/
|
||||
struct RBBIDataHeader {
|
||||
uint32_t fMagic; /* == 0xbla0 */
|
||||
uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
|
||||
/* if there is one associated with this data. */
|
||||
/* (version originates in rbbi, is copied to UDataInfo) */
|
||||
/* For ICU 3.2 and earlier, this field was */
|
||||
/* uint32_t fVersion */
|
||||
/* with a value of 1. */
|
||||
uint32_t fLength; /* Total length in bytes of this RBBI Data, */
|
||||
/* including all sections, not just the header. */
|
||||
uint32_t fCatCount; /* Number of character categories. */
|
||||
|
||||
/* */
|
||||
/* Offsets and sizes of each of the subsections within the RBBI data. */
|
||||
/* All offsets are bytes from the start of the RBBIDataHeader. */
|
||||
/* All sizes are in bytes. */
|
||||
/* */
|
||||
uint32_t fFTable; /* forward state transition table. */
|
||||
uint32_t fFTableLen;
|
||||
uint32_t fRTable; /* Offset to the reverse state transition table. */
|
||||
uint32_t fRTableLen;
|
||||
uint32_t fSFTable; /* safe point forward transition table */
|
||||
uint32_t fSFTableLen;
|
||||
uint32_t fSRTable; /* safe point reverse transition table */
|
||||
uint32_t fSRTableLen;
|
||||
uint32_t fTrie; /* Offset to Trie data for character categories */
|
||||
uint32_t fTrieLen;
|
||||
uint32_t fRuleSource; /* Offset to the source for for the break */
|
||||
uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
|
||||
uint32_t fStatusTable; /* Offset to the table of rule status values */
|
||||
uint32_t fStatusTableLen;
|
||||
|
||||
uint32_t fReserved[6]; /* Reserved for expansion */
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct RBBIStateTableRow {
|
||||
int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
|
||||
/* Value 0: not an accepting state. */
|
||||
/* -1: Unconditional Accepting state. */
|
||||
/* positive: Look-ahead match has completed. */
|
||||
/* Actual boundary position happened earlier */
|
||||
/* Value here == fLookAhead in earlier */
|
||||
/* state, at actual boundary pos. */
|
||||
int16_t fLookAhead; /* Non-zero if this row is for a state that */
|
||||
/* corresponds to a '/' in the rule source. */
|
||||
/* Value is the same as the fAccepting */
|
||||
/* value for the rule (which will appear */
|
||||
/* in a different state. */
|
||||
int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
|
||||
/* from a rule. Value is the index in the */
|
||||
/* StatusTable of the set of matching */
|
||||
/* tags (rule status values) */
|
||||
int16_t fReserved;
|
||||
uint16_t fNextState[2]; /* Next State, indexed by char category. */
|
||||
/* Array Size is fNumCols from the */
|
||||
/* state table header. */
|
||||
/* CAUTION: see RBBITableBuilder::getTableSize() */
|
||||
/* before changing anything here. */
|
||||
};
|
||||
|
||||
|
||||
struct RBBIStateTable {
|
||||
uint32_t fNumStates; /* Number of states. */
|
||||
uint32_t fRowLen; /* Length of a state table row, in bytes. */
|
||||
uint32_t fFlags; /* Option Flags for this state table */
|
||||
uint32_t fReserved; /* reserved */
|
||||
char fTableData[4]; /* First RBBIStateTableRow begins here. */
|
||||
/* (making it char[] simplifies ugly address */
|
||||
/* arithmetic for indexing variable length rows.) */
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
RBBI_LOOKAHEAD_HARD_BREAK = 1,
|
||||
RBBI_BOF_REQUIRED = 2
|
||||
} RBBIStateTableFlags;
|
||||
|
||||
|
||||
/* */
|
||||
/* The reference counting wrapper class */
|
||||
/* */
|
||||
class RBBIDataWrapper : public UMemory {
|
||||
public:
|
||||
enum EDontAdopt {
|
||||
kDontAdopt
|
||||
};
|
||||
RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
|
||||
RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
|
||||
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
|
||||
~RBBIDataWrapper();
|
||||
|
||||
void init(const RBBIDataHeader *data, UErrorCode &status);
|
||||
RBBIDataWrapper *addReference();
|
||||
void removeReference();
|
||||
UBool operator ==(const RBBIDataWrapper &other) const;
|
||||
int32_t hashCode();
|
||||
const UnicodeString &getRuleSourceString() const;
|
||||
#ifdef RBBI_DEBUG
|
||||
void printData();
|
||||
void printTable(const char *heading, const RBBIStateTable *table);
|
||||
#else
|
||||
#define printData()
|
||||
#define printTable(heading, table)
|
||||
#endif
|
||||
|
||||
/* */
|
||||
/* Pointers to items within the data */
|
||||
/* */
|
||||
const RBBIDataHeader *fHeader;
|
||||
const RBBIStateTable *fForwardTable;
|
||||
const RBBIStateTable *fReverseTable;
|
||||
const RBBIStateTable *fSafeFwdTable;
|
||||
const RBBIStateTable *fSafeRevTable;
|
||||
const UChar *fRuleSource;
|
||||
const int32_t *fRuleStatusTable;
|
||||
|
||||
/* number of int32_t values in the rule status table. Used to sanity check indexing */
|
||||
int32_t fStatusMaxIdx;
|
||||
|
||||
UTrie fTrie;
|
||||
|
||||
private:
|
||||
int32_t fRefCount;
|
||||
UDataMemory *fUDataMem;
|
||||
UnicodeString fRuleString;
|
||||
UBool fDontFreeData;
|
||||
|
||||
RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */
|
||||
RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */
|
||||
};
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* C++ */
|
||||
|
||||
#endif
|
358
source/common/rbbinode.cpp
Normal file
358
source/common/rbbinode.cpp
Normal file
|
@ -0,0 +1,358 @@
|
|||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 2002-2008 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
//
|
||||
// File: rbbinode.cpp
|
||||
//
|
||||
// Implementation of class RBBINode, which represents a node in the
|
||||
// tree generated when parsing the Rules Based Break Iterator rules.
|
||||
//
|
||||
// This "Class" is actually closer to a struct.
|
||||
// Code using it is expected to directly access fields much of the time.
|
||||
//
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/parsepos.h"
|
||||
#include "uvector.h"
|
||||
|
||||
#include "rbbirb.h"
|
||||
#include "rbbinode.h"
|
||||
|
||||
#include "uassert.h"
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
static int gLastSerial = 0;
|
||||
#endif
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// Constructor. Just set the fields to reasonable default values.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
RBBINode::RBBINode(NodeType t) : UMemory() {
|
||||
#ifdef RBBI_DEBUG
|
||||
fSerialNum = ++gLastSerial;
|
||||
#endif
|
||||
fType = t;
|
||||
fParent = NULL;
|
||||
fLeftChild = NULL;
|
||||
fRightChild = NULL;
|
||||
fInputSet = NULL;
|
||||
fFirstPos = 0;
|
||||
fLastPos = 0;
|
||||
fNullable = FALSE;
|
||||
fLookAheadEnd = FALSE;
|
||||
fVal = 0;
|
||||
fPrecedence = precZero;
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
|
||||
fLastPosSet = new UVector(status);
|
||||
fFollowPos = new UVector(status);
|
||||
if (t==opCat) {fPrecedence = precOpCat;}
|
||||
else if (t==opOr) {fPrecedence = precOpOr;}
|
||||
else if (t==opStart) {fPrecedence = precStart;}
|
||||
else if (t==opLParen) {fPrecedence = precLParen;}
|
||||
|
||||
}
|
||||
|
||||
|
||||
RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
|
||||
#ifdef RBBI_DEBUG
|
||||
fSerialNum = ++gLastSerial;
|
||||
#endif
|
||||
fType = other.fType;
|
||||
fParent = NULL;
|
||||
fLeftChild = NULL;
|
||||
fRightChild = NULL;
|
||||
fInputSet = other.fInputSet;
|
||||
fPrecedence = other.fPrecedence;
|
||||
fText = other.fText;
|
||||
fFirstPos = other.fFirstPos;
|
||||
fLastPos = other.fLastPos;
|
||||
fNullable = other.fNullable;
|
||||
fVal = other.fVal;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
|
||||
fLastPosSet = new UVector(status);
|
||||
fFollowPos = new UVector(status);
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// Destructor. Deletes both this node AND any child nodes,
|
||||
// except in the case of variable reference nodes. For
|
||||
// these, the l. child points back to the definition, which
|
||||
// is common for all references to the variable, meaning
|
||||
// it can't be deleted here.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
RBBINode::~RBBINode() {
|
||||
// printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
|
||||
delete fInputSet;
|
||||
fInputSet = NULL;
|
||||
|
||||
switch (this->fType) {
|
||||
case varRef:
|
||||
case setRef:
|
||||
// for these node types, multiple instances point to the same "children"
|
||||
// Storage ownership of children handled elsewhere. Don't delete here.
|
||||
break;
|
||||
|
||||
default:
|
||||
delete fLeftChild;
|
||||
fLeftChild = NULL;
|
||||
delete fRightChild;
|
||||
fRightChild = NULL;
|
||||
}
|
||||
|
||||
|
||||
delete fFirstPosSet;
|
||||
delete fLastPosSet;
|
||||
delete fFollowPos;
|
||||
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// cloneTree Make a copy of the subtree rooted at this node.
|
||||
// Discard any variable references encountered along the way,
|
||||
// and replace with copies of the variable's definitions.
|
||||
// Used to replicate the expression underneath variable
|
||||
// references in preparation for generating the DFA tables.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
RBBINode *RBBINode::cloneTree() {
|
||||
RBBINode *n;
|
||||
|
||||
if (fType == RBBINode::varRef) {
|
||||
// If the current node is a variable reference, skip over it
|
||||
// and clone the definition of the variable instead.
|
||||
n = fLeftChild->cloneTree();
|
||||
} else if (fType == RBBINode::uset) {
|
||||
n = this;
|
||||
} else {
|
||||
n = new RBBINode(*this);
|
||||
// Check for null pointer.
|
||||
if (n != NULL) {
|
||||
if (fLeftChild != NULL) {
|
||||
n->fLeftChild = fLeftChild->cloneTree();
|
||||
n->fLeftChild->fParent = n;
|
||||
}
|
||||
if (fRightChild != NULL) {
|
||||
n->fRightChild = fRightChild->cloneTree();
|
||||
n->fRightChild->fParent = n;
|
||||
}
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// flattenVariables Walk a parse tree, replacing any variable
|
||||
// references with a copy of the variable's definition.
|
||||
// Aside from variables, the tree is not changed.
|
||||
//
|
||||
// Return the root of the tree. If the root was not a variable
|
||||
// reference, it remains unchanged - the root we started with
|
||||
// is the root we return. If, however, the root was a variable
|
||||
// reference, the root of the newly cloned replacement tree will
|
||||
// be returned, and the original tree deleted.
|
||||
//
|
||||
// This function works by recursively walking the tree
|
||||
// without doing anything until a variable reference is
|
||||
// found, then calling cloneTree() at that point. Any
|
||||
// nested references are handled by cloneTree(), not here.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
RBBINode *RBBINode::flattenVariables() {
|
||||
if (fType == varRef) {
|
||||
RBBINode *retNode = fLeftChild->cloneTree();
|
||||
delete this;
|
||||
return retNode;
|
||||
}
|
||||
|
||||
if (fLeftChild != NULL) {
|
||||
fLeftChild = fLeftChild->flattenVariables();
|
||||
fLeftChild->fParent = this;
|
||||
}
|
||||
if (fRightChild != NULL) {
|
||||
fRightChild = fRightChild->flattenVariables();
|
||||
fRightChild->fParent = this;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// flattenSets Walk the parse tree, replacing any nodes of type setRef
|
||||
// with a copy of the expression tree for the set. A set's
|
||||
// equivalent expression tree is precomputed and saved as
|
||||
// the left child of the uset node.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
void RBBINode::flattenSets() {
|
||||
U_ASSERT(fType != setRef);
|
||||
|
||||
if (fLeftChild != NULL) {
|
||||
if (fLeftChild->fType==setRef) {
|
||||
RBBINode *setRefNode = fLeftChild;
|
||||
RBBINode *usetNode = setRefNode->fLeftChild;
|
||||
RBBINode *replTree = usetNode->fLeftChild;
|
||||
fLeftChild = replTree->cloneTree();
|
||||
fLeftChild->fParent = this;
|
||||
delete setRefNode;
|
||||
} else {
|
||||
fLeftChild->flattenSets();
|
||||
}
|
||||
}
|
||||
|
||||
if (fRightChild != NULL) {
|
||||
if (fRightChild->fType==setRef) {
|
||||
RBBINode *setRefNode = fRightChild;
|
||||
RBBINode *usetNode = setRefNode->fLeftChild;
|
||||
RBBINode *replTree = usetNode->fLeftChild;
|
||||
fRightChild = replTree->cloneTree();
|
||||
fRightChild->fParent = this;
|
||||
delete setRefNode;
|
||||
} else {
|
||||
fRightChild->flattenSets();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// findNodes() Locate all the nodes of the specified type, starting
|
||||
// at the specified root.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
|
||||
/* test for buffer overflows */
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (fType == kind) {
|
||||
dest->addElement(this, status);
|
||||
}
|
||||
if (fLeftChild != NULL) {
|
||||
fLeftChild->findNodes(dest, kind, status);
|
||||
}
|
||||
if (fRightChild != NULL) {
|
||||
fRightChild->findNodes(dest, kind, status);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// print. Print out a single node, for debugging.
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
#ifdef RBBI_DEBUG
|
||||
void RBBINode::printNode() {
|
||||
static const char * const nodeTypeNames[] = {
|
||||
"setRef",
|
||||
"uset",
|
||||
"varRef",
|
||||
"leafChar",
|
||||
"lookAhead",
|
||||
"tag",
|
||||
"endMark",
|
||||
"opStart",
|
||||
"opCat",
|
||||
"opOr",
|
||||
"opStar",
|
||||
"opPlus",
|
||||
"opQuestion",
|
||||
"opBreak",
|
||||
"opReverse",
|
||||
"opLParen"
|
||||
};
|
||||
|
||||
if (this==NULL) {
|
||||
RBBIDebugPrintf("%10p", (void *)this);
|
||||
} else {
|
||||
RBBIDebugPrintf("%10p %12s %10p %10p %10p %4d %6d %d ",
|
||||
(void *)this, nodeTypeNames[fType], (void *)fParent, (void *)fLeftChild, (void *)fRightChild,
|
||||
fSerialNum, fFirstPos, fVal);
|
||||
if (fType == varRef) {
|
||||
RBBI_DEBUG_printUnicodeString(fText);
|
||||
}
|
||||
}
|
||||
RBBIDebugPrintf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<s.length(); i++) {
|
||||
RBBIDebugPrintf("%c", s.charAt(i));
|
||||
// putc(s.charAt(i), stdout);
|
||||
}
|
||||
for (i=s.length(); i<minWidth; i++) {
|
||||
RBBIDebugPrintf(" ");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//
|
||||
// print. Print out the tree of nodes rooted at "this"
|
||||
//
|
||||
//-------------------------------------------------------------------------
|
||||
#ifdef RBBI_DEBUG
|
||||
void RBBINode::printTree(UBool printHeading) {
|
||||
if (printHeading) {
|
||||
RBBIDebugPrintf( "-------------------------------------------------------------------\n"
|
||||
" Address type Parent LeftChild RightChild serial position value\n"
|
||||
);
|
||||
}
|
||||
this->printNode();
|
||||
if (this != NULL) {
|
||||
// Only dump the definition under a variable reference if asked to.
|
||||
// Unconditinally dump children of all other node types.
|
||||
if (fType != varRef) {
|
||||
if (fLeftChild != NULL) {
|
||||
fLeftChild->printTree(FALSE);
|
||||
}
|
||||
|
||||
if (fRightChild != NULL) {
|
||||
fRightChild->printTree(FALSE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
118
source/common/rbbinode.h
Normal file
118
source/common/rbbinode.h
Normal file
|
@ -0,0 +1,118 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2001-2006, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
#ifndef RBBINODE_H
|
||||
#define RBBINODE_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
//
|
||||
// class RBBINode
|
||||
//
|
||||
// Represents a node in the parse tree generated when reading
|
||||
// a rule file.
|
||||
//
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeSet;
|
||||
class UVector;
|
||||
|
||||
class RBBINode : public UMemory {
|
||||
public:
|
||||
enum NodeType {
|
||||
setRef,
|
||||
uset,
|
||||
varRef,
|
||||
leafChar,
|
||||
lookAhead,
|
||||
tag,
|
||||
endMark,
|
||||
opStart,
|
||||
opCat,
|
||||
opOr,
|
||||
opStar,
|
||||
opPlus,
|
||||
opQuestion,
|
||||
opBreak,
|
||||
opReverse,
|
||||
opLParen
|
||||
};
|
||||
|
||||
enum OpPrecedence {
|
||||
precZero,
|
||||
precStart,
|
||||
precLParen,
|
||||
precOpOr,
|
||||
precOpCat
|
||||
};
|
||||
|
||||
NodeType fType;
|
||||
RBBINode *fParent;
|
||||
RBBINode *fLeftChild;
|
||||
RBBINode *fRightChild;
|
||||
UnicodeSet *fInputSet; // For uset nodes only.
|
||||
OpPrecedence fPrecedence; // For binary ops only.
|
||||
|
||||
UnicodeString fText; // Text corresponding to this node.
|
||||
// May be lazily evaluated when (if) needed
|
||||
// for some node types.
|
||||
int fFirstPos; // Position in the rule source string of the
|
||||
// first text associated with the node.
|
||||
// If there's a left child, this will be the same
|
||||
// as that child's left pos.
|
||||
int fLastPos; // Last position in the rule source string
|
||||
// of any text associated with this node.
|
||||
// If there's a right child, this will be the same
|
||||
// as that child's last postion.
|
||||
|
||||
UBool fNullable; // See Aho.
|
||||
int32_t fVal; // For leafChar nodes, the value.
|
||||
// Values are the character category,
|
||||
// corresponds to columns in the final
|
||||
// state transition table.
|
||||
|
||||
UBool fLookAheadEnd; // For endMark nodes, set TRUE if
|
||||
// marking the end of a look-ahead rule.
|
||||
|
||||
UVector *fFirstPosSet;
|
||||
UVector *fLastPosSet; // TODO: rename fFirstPos & fLastPos to avoid confusion.
|
||||
UVector *fFollowPos;
|
||||
|
||||
|
||||
RBBINode(NodeType t);
|
||||
RBBINode(const RBBINode &other);
|
||||
~RBBINode();
|
||||
|
||||
RBBINode *cloneTree();
|
||||
RBBINode *flattenVariables();
|
||||
void flattenSets();
|
||||
void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
void printNode();
|
||||
void printTree(UBool withHeading);
|
||||
#endif
|
||||
|
||||
private:
|
||||
RBBINode &operator = (const RBBINode &other); // No defs.
|
||||
UBool operator == (const RBBINode &other); // Private, so these functions won't accidently be used.
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
int fSerialNum; // Debugging aids.
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
U_CFUNC void
|
||||
RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth=0);
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
323
source/common/rbbirb.cpp
Normal file
323
source/common/rbbirb.cpp
Normal file
|
@ -0,0 +1,323 @@
|
|||
//
|
||||
// file: rbbirb.cpp
|
||||
//
|
||||
// Copyright (C) 2002-2008, International Business Machines Corporation and others.
|
||||
// All Rights Reserved.
|
||||
//
|
||||
// This file contains the RBBIRuleBuilder class implementation. This is the main class for
|
||||
// building (compiling) break rules into the tables required by the runtime
|
||||
// RBBI engine.
|
||||
//
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/rbbi.h"
|
||||
#include "unicode/ubrk.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uchriter.h"
|
||||
#include "unicode/parsepos.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
||||
#include "rbbirb.h"
|
||||
#include "rbbinode.h"
|
||||
|
||||
#include "rbbiscan.h"
|
||||
#include "rbbisetb.h"
|
||||
#include "rbbitblb.h"
|
||||
#include "rbbidata.h"
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//
|
||||
// Constructor.
|
||||
//
|
||||
//----------------------------------------------------------------------------------------
|
||||
RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
|
||||
UParseError *parseErr,
|
||||
UErrorCode &status)
|
||||
: fRules(rules)
|
||||
{
|
||||
fStatus = &status; // status is checked below
|
||||
fParseError = parseErr;
|
||||
fDebugEnv = NULL;
|
||||
#ifdef RBBI_DEBUG
|
||||
fDebugEnv = getenv("U_RBBIDEBUG");
|
||||
#endif
|
||||
|
||||
|
||||
fForwardTree = NULL;
|
||||
fReverseTree = NULL;
|
||||
fSafeFwdTree = NULL;
|
||||
fSafeRevTree = NULL;
|
||||
fDefaultTree = &fForwardTree;
|
||||
fForwardTables = NULL;
|
||||
fReverseTables = NULL;
|
||||
fSafeFwdTables = NULL;
|
||||
fSafeRevTables = NULL;
|
||||
fRuleStatusVals = NULL;
|
||||
fChainRules = FALSE;
|
||||
fLBCMNoChain = FALSE;
|
||||
fLookAheadHardBreak = FALSE;
|
||||
fUSetNodes = NULL;
|
||||
fRuleStatusVals = NULL;
|
||||
fScanner = NULL;
|
||||
fSetBuilder = NULL;
|
||||
if (parseErr) {
|
||||
uprv_memset(parseErr, 0, sizeof(UParseError));
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
fUSetNodes = new UVector(status); // bcos status gets overwritten here
|
||||
fRuleStatusVals = new UVector(status);
|
||||
fScanner = new RBBIRuleScanner(this);
|
||||
fSetBuilder = new RBBISetBuilder(this);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//
|
||||
// Destructor
|
||||
//
|
||||
//----------------------------------------------------------------------------------------
|
||||
RBBIRuleBuilder::~RBBIRuleBuilder() {
|
||||
|
||||
int i;
|
||||
for (i=0; ; i++) {
|
||||
RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
|
||||
if (n==NULL) {
|
||||
break;
|
||||
}
|
||||
delete n;
|
||||
}
|
||||
|
||||
delete fUSetNodes;
|
||||
delete fSetBuilder;
|
||||
delete fForwardTables;
|
||||
delete fReverseTables;
|
||||
delete fSafeFwdTables;
|
||||
delete fSafeRevTables;
|
||||
|
||||
delete fForwardTree;
|
||||
delete fReverseTree;
|
||||
delete fSafeFwdTree;
|
||||
delete fSafeRevTree;
|
||||
delete fScanner;
|
||||
delete fRuleStatusVals;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//
|
||||
// flattenData() - Collect up the compiled RBBI rule data and put it into
|
||||
// the format for saving in ICU data files,
|
||||
// which is also the format needed by the RBBI runtime engine.
|
||||
//
|
||||
//----------------------------------------------------------------------------------------
|
||||
static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
|
||||
|
||||
RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
||||
int32_t i;
|
||||
|
||||
if (U_FAILURE(*fStatus)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Remove comments and whitespace from the rules to make it smaller.
|
||||
UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules));
|
||||
|
||||
// Calculate the size of each section in the data.
|
||||
// Sizes here are padded up to a multiple of 8 for better memory alignment.
|
||||
// Sections sizes actually stored in the header are for the actual data
|
||||
// without the padding.
|
||||
//
|
||||
int32_t headerSize = align8(sizeof(RBBIDataHeader));
|
||||
int32_t forwardTableSize = align8(fForwardTables->getTableSize());
|
||||
int32_t reverseTableSize = align8(fReverseTables->getTableSize());
|
||||
int32_t safeFwdTableSize = align8(fSafeFwdTables->getTableSize());
|
||||
int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
|
||||
int32_t trieSize = align8(fSetBuilder->getTrieSize());
|
||||
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
|
||||
int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
|
||||
|
||||
int32_t totalSize = headerSize + forwardTableSize + reverseTableSize
|
||||
+ safeFwdTableSize + safeRevTableSize
|
||||
+ statusTableSize + trieSize + rulesSize;
|
||||
|
||||
RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
|
||||
if (data == NULL) {
|
||||
*fStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memset(data, 0, totalSize);
|
||||
|
||||
|
||||
data->fMagic = 0xb1a0;
|
||||
data->fFormatVersion[0] = 3;
|
||||
data->fFormatVersion[1] = 1;
|
||||
data->fFormatVersion[2] = 0;
|
||||
data->fFormatVersion[3] = 0;
|
||||
data->fLength = totalSize;
|
||||
data->fCatCount = fSetBuilder->getNumCharCategories();
|
||||
|
||||
data->fFTable = headerSize;
|
||||
data->fFTableLen = forwardTableSize;
|
||||
data->fRTable = data->fFTable + forwardTableSize;
|
||||
data->fRTableLen = reverseTableSize;
|
||||
data->fSFTable = data->fRTable + reverseTableSize;
|
||||
data->fSFTableLen = safeFwdTableSize;
|
||||
data->fSRTable = data->fSFTable + safeFwdTableSize;
|
||||
data->fSRTableLen = safeRevTableSize;
|
||||
|
||||
data->fTrie = data->fSRTable + safeRevTableSize;
|
||||
data->fTrieLen = fSetBuilder->getTrieSize();
|
||||
data->fStatusTable = data->fTrie + trieSize;
|
||||
data->fStatusTableLen= statusTableSize;
|
||||
data->fRuleSource = data->fStatusTable + statusTableSize;
|
||||
data->fRuleSourceLen = strippedRules.length() * sizeof(UChar);
|
||||
|
||||
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
|
||||
|
||||
fForwardTables->exportTable((uint8_t *)data + data->fFTable);
|
||||
fReverseTables->exportTable((uint8_t *)data + data->fRTable);
|
||||
fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
|
||||
fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
|
||||
fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
|
||||
|
||||
int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
|
||||
for (i=0; i<fRuleStatusVals->size(); i++) {
|
||||
ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
|
||||
}
|
||||
|
||||
strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------------------
|
||||
//
|
||||
// createRuleBasedBreakIterator construct from source rules that are passed in
|
||||
// in a UnicodeString
|
||||
//
|
||||
//----------------------------------------------------------------------------------------
|
||||
BreakIterator *
|
||||
RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
|
||||
UParseError *parseError,
|
||||
UErrorCode &status)
|
||||
{
|
||||
// status checked below
|
||||
|
||||
//
|
||||
// Read the input rules, generate a parse tree, symbol table,
|
||||
// and list of all Unicode Sets referenced by the rules.
|
||||
//
|
||||
RBBIRuleBuilder builder(rules, parseError, status);
|
||||
if (U_FAILURE(status)) { // status checked here bcos build below doesn't
|
||||
return NULL;
|
||||
}
|
||||
builder.fScanner->parse();
|
||||
|
||||
//
|
||||
// UnicodeSet processing.
|
||||
// Munge the Unicode Sets to create a set of character categories.
|
||||
// Generate the mapping tables (TRIE) from input 32-bit characters to
|
||||
// the character categories.
|
||||
//
|
||||
builder.fSetBuilder->build();
|
||||
|
||||
|
||||
//
|
||||
// Generate the DFA state transition table.
|
||||
//
|
||||
builder.fForwardTables = new RBBITableBuilder(&builder, &builder.fForwardTree);
|
||||
builder.fReverseTables = new RBBITableBuilder(&builder, &builder.fReverseTree);
|
||||
builder.fSafeFwdTables = new RBBITableBuilder(&builder, &builder.fSafeFwdTree);
|
||||
builder.fSafeRevTables = new RBBITableBuilder(&builder, &builder.fSafeRevTree);
|
||||
if (U_SUCCESS(status)
|
||||
&& (builder.fForwardTables == NULL || builder.fReverseTables == NULL ||
|
||||
builder.fSafeFwdTables == NULL || builder.fSafeRevTables == NULL))
|
||||
{
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
// Before building the tables, check to make sure the status is ok.
|
||||
if (U_FAILURE(status)) {
|
||||
delete builder.fForwardTables; builder.fForwardTables = NULL;
|
||||
delete builder.fReverseTables; builder.fReverseTables = NULL;
|
||||
delete builder.fSafeFwdTables; builder.fSafeFwdTables = NULL;
|
||||
delete builder.fSafeRevTables; builder.fSafeRevTables = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
builder.fForwardTables->build();
|
||||
builder.fReverseTables->build();
|
||||
builder.fSafeFwdTables->build();
|
||||
builder.fSafeRevTables->build();
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
if (builder.fDebugEnv && uprv_strstr(builder.fDebugEnv, "states")) {
|
||||
builder.fForwardTables->printRuleStatusTable();
|
||||
}
|
||||
#endif
|
||||
|
||||
//
|
||||
// Package up the compiled data into a memory image
|
||||
// in the run-time format.
|
||||
//
|
||||
RBBIDataHeader *data = builder.flattenData(); // returns NULL if error
|
||||
if (U_FAILURE(*builder.fStatus)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Clean up the compiler related stuff
|
||||
//
|
||||
|
||||
|
||||
//
|
||||
// Create a break iterator from the compiled rules.
|
||||
// (Identical to creation from stored pre-compiled rules)
|
||||
//
|
||||
// status is checked after init in construction.
|
||||
RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete This;
|
||||
This = NULL;
|
||||
}
|
||||
else if(This == NULL) { // test for NULL
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
return This;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
211
source/common/rbbirb.h
Normal file
211
source/common/rbbirb.h
Normal file
|
@ -0,0 +1,211 @@
|
|||
//
|
||||
// rbbirb.h
|
||||
//
|
||||
// Copyright (C) 2002-2008, International Business Machines Corporation and others.
|
||||
// All Rights Reserved.
|
||||
//
|
||||
// This file contains declarations for several classes from the
|
||||
// Rule Based Break Iterator rule builder.
|
||||
//
|
||||
|
||||
|
||||
#ifndef RBBIRB_H
|
||||
#define RBBIRB_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/rbbi.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "uhash.h"
|
||||
#include "uvector.h"
|
||||
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
|
||||
// looks up references to $variables within a set.
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class RBBIRuleScanner;
|
||||
struct RBBIRuleTableEl;
|
||||
class RBBISetBuilder;
|
||||
class RBBINode;
|
||||
class RBBITableBuilder;
|
||||
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// RBBISymbolTable. Implements SymbolTable interface that is used by the
|
||||
// UnicodeSet parser to resolve references to $variables.
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
|
||||
public: // of these structs for each entry.
|
||||
RBBISymbolTableEntry();
|
||||
UnicodeString key;
|
||||
RBBINode *val;
|
||||
~RBBISymbolTableEntry();
|
||||
|
||||
private:
|
||||
RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
|
||||
RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
|
||||
class RBBISymbolTable : public UMemory, public SymbolTable {
|
||||
private:
|
||||
const UnicodeString &fRules;
|
||||
UHashtable *fHashTable;
|
||||
RBBIRuleScanner *fRuleScanner;
|
||||
|
||||
// These next two fields are part of the mechanism for passing references to
|
||||
// already-constructed UnicodeSets back to the UnicodeSet constructor
|
||||
// when the pattern includes $variable references.
|
||||
const UnicodeString ffffString; // = "/uffff"
|
||||
UnicodeSet *fCachedSetLookup;
|
||||
|
||||
public:
|
||||
// API inherited from class SymbolTable
|
||||
virtual const UnicodeString* lookup(const UnicodeString& s) const;
|
||||
virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
|
||||
virtual UnicodeString parseReference(const UnicodeString& text,
|
||||
ParsePosition& pos, int32_t limit) const;
|
||||
|
||||
// Additional Functions
|
||||
RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
|
||||
virtual ~RBBISymbolTable();
|
||||
|
||||
virtual RBBINode *lookupNode(const UnicodeString &key) const;
|
||||
virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
virtual void rbbiSymtablePrint() const;
|
||||
#else
|
||||
// A do-nothing inline function for non-debug builds. Member funcs can't be empty
|
||||
// or the call sites won't compile.
|
||||
int32_t fFakeField;
|
||||
#define rbbiSymtablePrint() fFakeField=0;
|
||||
#endif
|
||||
|
||||
private:
|
||||
RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
|
||||
RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// class RBBIRuleBuilder The top-level class handling RBBI rule compiling.
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
class RBBIRuleBuilder : public UMemory {
|
||||
public:
|
||||
|
||||
// Create a rule based break iterator from a set of rules.
|
||||
// This function is the main entry point into the rule builder. The
|
||||
// public ICU API for creating RBBIs uses this function to do the actual work.
|
||||
//
|
||||
static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
|
||||
UParseError *parseError,
|
||||
UErrorCode &status);
|
||||
|
||||
public:
|
||||
// The "public" functions and data members that appear below are accessed
|
||||
// (and shared) by the various parts that make up the rule builder. They
|
||||
// are NOT intended to be accessed by anything outside of the
|
||||
// rule builder implementation.
|
||||
RBBIRuleBuilder(const UnicodeString &rules,
|
||||
UParseError *parseErr,
|
||||
UErrorCode &status
|
||||
);
|
||||
|
||||
virtual ~RBBIRuleBuilder();
|
||||
char *fDebugEnv; // controls debug trace output
|
||||
UErrorCode *fStatus; // Error reporting. Keeping status
|
||||
UParseError *fParseError; // here avoids passing it everywhere.
|
||||
const UnicodeString &fRules; // The rule string that we are compiling
|
||||
|
||||
RBBIRuleScanner *fScanner; // The scanner.
|
||||
RBBINode *fForwardTree; // The parse trees, generated by the scanner,
|
||||
RBBINode *fReverseTree; // then manipulated by subsequent steps.
|
||||
RBBINode *fSafeFwdTree;
|
||||
RBBINode *fSafeRevTree;
|
||||
|
||||
RBBINode **fDefaultTree; // For rules not qualified with a !
|
||||
// the tree to which they belong to.
|
||||
|
||||
UBool fChainRules; // True for chained Unicode TR style rules.
|
||||
// False for traditional regexp rules.
|
||||
|
||||
UBool fLBCMNoChain; // True: suppress chaining of rules on
|
||||
// chars with LineBreak property == CM.
|
||||
|
||||
UBool fLookAheadHardBreak; // True: Look ahead matches cause an
|
||||
// immediate break, no continuing for the
|
||||
// longest match.
|
||||
|
||||
RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
|
||||
UVector *fUSetNodes; // Vector of all uset nodes.
|
||||
|
||||
RBBITableBuilder *fForwardTables; // State transition tables
|
||||
RBBITableBuilder *fReverseTables;
|
||||
RBBITableBuilder *fSafeFwdTables;
|
||||
RBBITableBuilder *fSafeRevTables;
|
||||
|
||||
UVector *fRuleStatusVals; // The values that can be returned
|
||||
// from getRuleStatus().
|
||||
|
||||
RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
|
||||
// data tables..
|
||||
private:
|
||||
RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
|
||||
RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//
|
||||
// RBBISetTableEl is an entry in the hash table of UnicodeSets that have
|
||||
// been encountered. The val Node will be of nodetype uset
|
||||
// and contain pointers to the actual UnicodeSets.
|
||||
// The Key is the source string for initializing the set.
|
||||
//
|
||||
// The hash table is used to avoid creating duplicate
|
||||
// unnamed (not $var references) UnicodeSets.
|
||||
//
|
||||
// Memory Management:
|
||||
// The Hash Table owns these RBBISetTableEl structs and
|
||||
// the key strings. It does NOT own the val nodes.
|
||||
//
|
||||
//----------------------------------------------------------------------------
|
||||
struct RBBISetTableEl {
|
||||
UnicodeString *key;
|
||||
RBBINode *val;
|
||||
};
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//
|
||||
// RBBIDebugPrintf Printf equivalent, for debugging output.
|
||||
// Conditional compilation of the implementation lets us
|
||||
// get rid of the stdio dependency in environments where it
|
||||
// is unavailable.
|
||||
//
|
||||
//----------------------------------------------------------------------------
|
||||
#ifdef RBBI_DEBUG
|
||||
#include <stdio.h>
|
||||
#define RBBIDebugPrintf printf
|
||||
#define RBBIDebugPuts puts
|
||||
#else
|
||||
#undef RBBIDebugPrintf
|
||||
#define RBBIDebugPuts(arg)
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
|
||||
|
275
source/common/rbbirpt.h
Normal file
275
source/common/rbbirpt.h
Normal file
|
@ -0,0 +1,275 @@
|
|||
//---------------------------------------------------------------------------------
|
||||
//
|
||||
// Generated Header File. Do not edit by hand.
|
||||
// This file contains the state table for the ICU Rule Based Break Iterator
|
||||
// rule parser.
|
||||
// It is generated by the Perl script "rbbicst.pl" from
|
||||
// the rule parser state definitions file "rbbirpt.txt".
|
||||
//
|
||||
// Copyright (C) 2002-2005 International Business Machines Corporation
|
||||
// and others. All rights reserved.
|
||||
//
|
||||
//---------------------------------------------------------------------------------
|
||||
#ifndef RBBIRPT_H
|
||||
#define RBBIRPT_H
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
//
|
||||
// Character classes for RBBI rule scanning.
|
||||
//
|
||||
static const uint8_t kRuleSet_digit_char = 128;
|
||||
static const uint8_t kRuleSet_name_char = 129;
|
||||
static const uint8_t kRuleSet_name_start_char = 130;
|
||||
static const uint8_t kRuleSet_rule_char = 131;
|
||||
static const uint8_t kRuleSet_white_space = 132;
|
||||
|
||||
|
||||
enum RBBI_RuleParseAction {
|
||||
doCheckVarDef,
|
||||
doDotAny,
|
||||
doEndAssign,
|
||||
doEndOfRule,
|
||||
doEndVariableName,
|
||||
doExit,
|
||||
doExprCatOperator,
|
||||
doExprFinished,
|
||||
doExprOrOperator,
|
||||
doExprRParen,
|
||||
doExprStart,
|
||||
doLParen,
|
||||
doNOP,
|
||||
doOptionEnd,
|
||||
doOptionStart,
|
||||
doReverseDir,
|
||||
doRuleChar,
|
||||
doRuleError,
|
||||
doRuleErrorAssignExpr,
|
||||
doScanUnicodeSet,
|
||||
doSlash,
|
||||
doStartAssign,
|
||||
doStartTagValue,
|
||||
doStartVariableName,
|
||||
doTagDigit,
|
||||
doTagExpectedError,
|
||||
doTagValue,
|
||||
doUnaryOpPlus,
|
||||
doUnaryOpQuestion,
|
||||
doUnaryOpStar,
|
||||
doVariableNameExpectedErr,
|
||||
rbbiLastAction};
|
||||
|
||||
//-------------------------------------------------------------------------------
|
||||
//
|
||||
// RBBIRuleTableEl represents the structure of a row in the transition table
|
||||
// for the rule parser state machine.
|
||||
//-------------------------------------------------------------------------------
|
||||
struct RBBIRuleTableEl {
|
||||
RBBI_RuleParseAction fAction;
|
||||
uint8_t fCharClass; // 0-127: an individual ASCII character
|
||||
// 128-255: character class index
|
||||
uint8_t fNextState; // 0-250: normal next-stat numbers
|
||||
// 255: pop next-state from stack.
|
||||
uint8_t fPushState;
|
||||
UBool fNextChar;
|
||||
};
|
||||
|
||||
static const struct RBBIRuleTableEl gRuleParseStateTable[] = {
|
||||
{doNOP, 0, 0, 0, TRUE}
|
||||
, {doExprStart, 254, 21, 8, FALSE} // 1 start
|
||||
, {doNOP, 132, 1,0, TRUE} // 2
|
||||
, {doExprStart, 36 /* $ */, 80, 90, FALSE} // 3
|
||||
, {doNOP, 33 /* ! */, 11,0, TRUE} // 4
|
||||
, {doNOP, 59 /* ; */, 1,0, TRUE} // 5
|
||||
, {doNOP, 252, 0,0, FALSE} // 6
|
||||
, {doExprStart, 255, 21, 8, FALSE} // 7
|
||||
, {doEndOfRule, 59 /* ; */, 1,0, TRUE} // 8 break-rule-end
|
||||
, {doNOP, 132, 8,0, TRUE} // 9
|
||||
, {doRuleError, 255, 95,0, FALSE} // 10
|
||||
, {doNOP, 33 /* ! */, 13,0, TRUE} // 11 rev-option
|
||||
, {doReverseDir, 255, 20, 8, FALSE} // 12
|
||||
, {doOptionStart, 130, 15,0, TRUE} // 13 option-scan1
|
||||
, {doRuleError, 255, 95,0, FALSE} // 14
|
||||
, {doNOP, 129, 15,0, TRUE} // 15 option-scan2
|
||||
, {doOptionEnd, 255, 17,0, FALSE} // 16
|
||||
, {doNOP, 59 /* ; */, 1,0, TRUE} // 17 option-scan3
|
||||
, {doNOP, 132, 17,0, TRUE} // 18
|
||||
, {doRuleError, 255, 95,0, FALSE} // 19
|
||||
, {doExprStart, 255, 21, 8, FALSE} // 20 reverse-rule
|
||||
, {doRuleChar, 254, 30,0, TRUE} // 21 term
|
||||
, {doNOP, 132, 21,0, TRUE} // 22
|
||||
, {doRuleChar, 131, 30,0, TRUE} // 23
|
||||
, {doNOP, 91 /* [ */, 86, 30, FALSE} // 24
|
||||
, {doLParen, 40 /* ( */, 21, 30, TRUE} // 25
|
||||
, {doNOP, 36 /* $ */, 80, 29, FALSE} // 26
|
||||
, {doDotAny, 46 /* . */, 30,0, TRUE} // 27
|
||||
, {doRuleError, 255, 95,0, FALSE} // 28
|
||||
, {doCheckVarDef, 255, 30,0, FALSE} // 29 term-var-ref
|
||||
, {doNOP, 132, 30,0, TRUE} // 30 expr-mod
|
||||
, {doUnaryOpStar, 42 /* * */, 35,0, TRUE} // 31
|
||||
, {doUnaryOpPlus, 43 /* + */, 35,0, TRUE} // 32
|
||||
, {doUnaryOpQuestion, 63 /* ? */, 35,0, TRUE} // 33
|
||||
, {doNOP, 255, 35,0, FALSE} // 34
|
||||
, {doExprCatOperator, 254, 21,0, FALSE} // 35 expr-cont
|
||||
, {doNOP, 132, 35,0, TRUE} // 36
|
||||
, {doExprCatOperator, 131, 21,0, FALSE} // 37
|
||||
, {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 38
|
||||
, {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 39
|
||||
, {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 40
|
||||
, {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 41
|
||||
, {doExprCatOperator, 47 /* / */, 47,0, FALSE} // 42
|
||||
, {doExprCatOperator, 123 /* { */, 59,0, TRUE} // 43
|
||||
, {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 44
|
||||
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 45
|
||||
, {doExprFinished, 255, 255,0, FALSE} // 46
|
||||
, {doSlash, 47 /* / */, 49,0, TRUE} // 47 look-ahead
|
||||
, {doNOP, 255, 95,0, FALSE} // 48
|
||||
, {doExprCatOperator, 254, 21,0, FALSE} // 49 expr-cont-no-slash
|
||||
, {doNOP, 132, 35,0, TRUE} // 50
|
||||
, {doExprCatOperator, 131, 21,0, FALSE} // 51
|
||||
, {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 52
|
||||
, {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 53
|
||||
, {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 54
|
||||
, {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 55
|
||||
, {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 56
|
||||
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 57
|
||||
, {doExprFinished, 255, 255,0, FALSE} // 58
|
||||
, {doNOP, 132, 59,0, TRUE} // 59 tag-open
|
||||
, {doStartTagValue, 128, 62,0, FALSE} // 60
|
||||
, {doTagExpectedError, 255, 95,0, FALSE} // 61
|
||||
, {doNOP, 132, 66,0, TRUE} // 62 tag-value
|
||||
, {doNOP, 125 /* } */, 66,0, FALSE} // 63
|
||||
, {doTagDigit, 128, 62,0, TRUE} // 64
|
||||
, {doTagExpectedError, 255, 95,0, FALSE} // 65
|
||||
, {doNOP, 132, 66,0, TRUE} // 66 tag-close
|
||||
, {doTagValue, 125 /* } */, 69,0, TRUE} // 67
|
||||
, {doTagExpectedError, 255, 95,0, FALSE} // 68
|
||||
, {doExprCatOperator, 254, 21,0, FALSE} // 69 expr-cont-no-tag
|
||||
, {doNOP, 132, 69,0, TRUE} // 70
|
||||
, {doExprCatOperator, 131, 21,0, FALSE} // 71
|
||||
, {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 72
|
||||
, {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 73
|
||||
, {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 74
|
||||
, {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 75
|
||||
, {doExprCatOperator, 47 /* / */, 47,0, FALSE} // 76
|
||||
, {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 77
|
||||
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 78
|
||||
, {doExprFinished, 255, 255,0, FALSE} // 79
|
||||
, {doStartVariableName, 36 /* $ */, 82,0, TRUE} // 80 scan-var-name
|
||||
, {doNOP, 255, 95,0, FALSE} // 81
|
||||
, {doNOP, 130, 84,0, TRUE} // 82 scan-var-start
|
||||
, {doVariableNameExpectedErr, 255, 95,0, FALSE} // 83
|
||||
, {doNOP, 129, 84,0, TRUE} // 84 scan-var-body
|
||||
, {doEndVariableName, 255, 255,0, FALSE} // 85
|
||||
, {doScanUnicodeSet, 91 /* [ */, 255,0, TRUE} // 86 scan-unicode-set
|
||||
, {doScanUnicodeSet, 112 /* p */, 255,0, TRUE} // 87
|
||||
, {doScanUnicodeSet, 80 /* P */, 255,0, TRUE} // 88
|
||||
, {doNOP, 255, 95,0, FALSE} // 89
|
||||
, {doNOP, 132, 90,0, TRUE} // 90 assign-or-rule
|
||||
, {doStartAssign, 61 /* = */, 21, 93, TRUE} // 91
|
||||
, {doNOP, 255, 29, 8, FALSE} // 92
|
||||
, {doEndAssign, 59 /* ; */, 1,0, TRUE} // 93 assign-end
|
||||
, {doRuleErrorAssignExpr, 255, 95,0, FALSE} // 94
|
||||
, {doExit, 255, 95,0, TRUE} // 95 errorDeath
|
||||
};
|
||||
#ifdef RBBI_DEBUG
|
||||
static const char * const RBBIRuleStateNames[] = { 0,
|
||||
"start",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
"break-rule-end",
|
||||
0,
|
||||
0,
|
||||
"rev-option",
|
||||
0,
|
||||
"option-scan1",
|
||||
0,
|
||||
"option-scan2",
|
||||
0,
|
||||
"option-scan3",
|
||||
0,
|
||||
0,
|
||||
"reverse-rule",
|
||||
"term",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
"term-var-ref",
|
||||
"expr-mod",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
"expr-cont",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
"look-ahead",
|
||||
0,
|
||||
"expr-cont-no-slash",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
"tag-open",
|
||||
0,
|
||||
0,
|
||||
"tag-value",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
"tag-close",
|
||||
0,
|
||||
0,
|
||||
"expr-cont-no-tag",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
"scan-var-name",
|
||||
0,
|
||||
"scan-var-start",
|
||||
0,
|
||||
"scan-var-body",
|
||||
0,
|
||||
"scan-unicode-set",
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
"assign-or-rule",
|
||||
0,
|
||||
0,
|
||||
"assign-end",
|
||||
0,
|
||||
"errorDeath",
|
||||
0};
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif
|
315
source/common/rbbirpt.txt
Normal file
315
source/common/rbbirpt.txt
Normal file
|
@ -0,0 +1,315 @@
|
|||
|
||||
#*****************************************************************************
|
||||
#
|
||||
# Copyright (C) 2002-2003, International Business Machines Corporation and others.
|
||||
# All Rights Reserved.
|
||||
#
|
||||
#*****************************************************************************
|
||||
#
|
||||
# file: rbbirpt.txt
|
||||
# ICU Break Iterator Rule Parser State Table
|
||||
#
|
||||
# This state table is used when reading and parsing a set of RBBI rules
|
||||
# The rule parser uses a state machine; the data in this file define the
|
||||
# state transitions that occur for each input character.
|
||||
#
|
||||
# *** This file defines the RBBI rule grammar. This is it.
|
||||
# *** The determination of what is accepted is here.
|
||||
#
|
||||
# This file is processed by a perl script "rbbicst.pl" to produce initialized C arrays
|
||||
# that are then built with the rule parser.
|
||||
#
|
||||
|
||||
#
|
||||
# Here is the syntax of the state definitions in this file:
|
||||
#
|
||||
#
|
||||
#StateName:
|
||||
# input-char n next-state ^push-state action
|
||||
# input-char n next-state ^push-state action
|
||||
# | | | | |
|
||||
# | | | | |--- action to be performed by state machine
|
||||
# | | | | See function RBBIRuleScanner::doParseActions()
|
||||
# | | | |
|
||||
# | | | |--- Push this named state onto the state stack.
|
||||
# | | | Later, when next state is specified as "pop",
|
||||
# | | | the pushed state will become the current state.
|
||||
# | | |
|
||||
# | | |--- Transition to this state if the current input character matches the input
|
||||
# | | character or char class in the left hand column. "pop" causes the next
|
||||
# | | state to be popped from the state stack.
|
||||
# | |
|
||||
# | |--- When making the state transition specified on this line, advance to the next
|
||||
# | character from the input only if 'n' appears here.
|
||||
# |
|
||||
# |--- Character or named character classes to test for. If the current character being scanned
|
||||
# matches, peform the actions and go to the state specified on this line.
|
||||
# The input character is tested sequentally, in the order written. The characters and
|
||||
# character classes tested for do not need to be mutually exclusive. The first match wins.
|
||||
#
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
# start state, scan position is at the beginning of the rules file, or in between two rules.
|
||||
#
|
||||
start:
|
||||
escaped term ^break-rule-end doExprStart
|
||||
white_space n start
|
||||
'$' scan-var-name ^assign-or-rule doExprStart
|
||||
'!' n rev-option
|
||||
';' n start # ignore empty rules.
|
||||
eof exit
|
||||
default term ^break-rule-end doExprStart
|
||||
|
||||
#
|
||||
# break-rule-end: Returned from doing a break-rule expression.
|
||||
#
|
||||
break-rule-end:
|
||||
';' n start doEndOfRule
|
||||
white_space n break-rule-end
|
||||
default errorDeath doRuleError
|
||||
|
||||
|
||||
#
|
||||
# ! We've just scanned a '!', indicating either a !!key word flag or a
|
||||
# !Reverse rule.
|
||||
#
|
||||
rev-option:
|
||||
'!' n option-scan1
|
||||
default reverse-rule ^break-rule-end doReverseDir
|
||||
|
||||
option-scan1:
|
||||
name_start_char n option-scan2 doOptionStart
|
||||
default errorDeath doRuleError
|
||||
|
||||
option-scan2:
|
||||
name_char n option-scan2
|
||||
default option-scan3 doOptionEnd
|
||||
|
||||
option-scan3:
|
||||
';' n start
|
||||
white_space n option-scan3
|
||||
default errorDeath doRuleError
|
||||
|
||||
|
||||
reverse-rule:
|
||||
default term ^break-rule-end doExprStart
|
||||
|
||||
|
||||
#
|
||||
# term. Eat through a single rule character, or a composite thing, which
|
||||
# could be a parenthesized expression, a variable name, or a Unicode Set.
|
||||
#
|
||||
term:
|
||||
escaped n expr-mod doRuleChar
|
||||
white_space n term
|
||||
rule_char n expr-mod doRuleChar
|
||||
'[' scan-unicode-set ^expr-mod
|
||||
'(' n term ^expr-mod doLParen
|
||||
'$' scan-var-name ^term-var-ref
|
||||
'.' n expr-mod doDotAny
|
||||
default errorDeath doRuleError
|
||||
|
||||
|
||||
|
||||
#
|
||||
# term-var-ref We've just finished scanning a reference to a $variable.
|
||||
# Check that the variable was defined.
|
||||
# The variable name scanning is in common with assignment statements,
|
||||
# so the check can't be done there.
|
||||
term-var-ref:
|
||||
default expr-mod doCheckVarDef
|
||||
|
||||
|
||||
#
|
||||
# expr-mod We've just finished scanning a term, now look for the optional
|
||||
# trailing '*', '?', '+'
|
||||
#
|
||||
expr-mod:
|
||||
white_space n expr-mod
|
||||
'*' n expr-cont doUnaryOpStar
|
||||
'+' n expr-cont doUnaryOpPlus
|
||||
'?' n expr-cont doUnaryOpQuestion
|
||||
default expr-cont
|
||||
|
||||
|
||||
#
|
||||
# expr-cont Expression, continuation. At a point where additional terms are
|
||||
# allowed, but not required.
|
||||
#
|
||||
expr-cont:
|
||||
escaped term doExprCatOperator
|
||||
white_space n expr-cont
|
||||
rule_char term doExprCatOperator
|
||||
'[' term doExprCatOperator
|
||||
'(' term doExprCatOperator
|
||||
'$' term doExprCatOperator
|
||||
'.' term doExprCatOperator
|
||||
'/' look-ahead doExprCatOperator
|
||||
'{' n tag-open doExprCatOperator
|
||||
'|' n term doExprOrOperator
|
||||
')' n pop doExprRParen
|
||||
default pop doExprFinished
|
||||
|
||||
|
||||
#
|
||||
# look-ahead Scanning a '/', which identifies a break point, assuming that the
|
||||
# remainder of the expression matches.
|
||||
#
|
||||
# Generate a parse tree as if this was a special kind of input symbol
|
||||
# appearing in an otherwise normal concatenation expression.
|
||||
#
|
||||
look-ahead:
|
||||
'/' n expr-cont-no-slash doSlash
|
||||
default errorDeath
|
||||
|
||||
|
||||
#
|
||||
# expr-cont-no-slash Expression, continuation. At a point where additional terms are
|
||||
# allowed, but not required. Just like
|
||||
# expr-cont, above, except that no '/'
|
||||
# look-ahead symbol is permitted.
|
||||
#
|
||||
expr-cont-no-slash:
|
||||
escaped term doExprCatOperator
|
||||
white_space n expr-cont
|
||||
rule_char term doExprCatOperator
|
||||
'[' term doExprCatOperator
|
||||
'(' term doExprCatOperator
|
||||
'$' term doExprCatOperator
|
||||
'.' term doExprCatOperator
|
||||
'|' n term doExprOrOperator
|
||||
')' n pop doExprRParen
|
||||
default pop doExprFinished
|
||||
|
||||
|
||||
#
|
||||
# tags scanning a '{', the opening delimiter for a tag that identifies
|
||||
# the kind of match. Scan the whole {dddd} tag, where d=digit
|
||||
#
|
||||
tag-open:
|
||||
white_space n tag-open
|
||||
digit_char tag-value doStartTagValue
|
||||
default errorDeath doTagExpectedError
|
||||
|
||||
tag-value:
|
||||
white_space n tag-close
|
||||
'}' tag-close
|
||||
digit_char n tag-value doTagDigit
|
||||
default errorDeath doTagExpectedError
|
||||
|
||||
tag-close:
|
||||
white_space n tag-close
|
||||
'}' n expr-cont-no-tag doTagValue
|
||||
default errorDeath doTagExpectedError
|
||||
|
||||
|
||||
|
||||
#
|
||||
# expr-cont-no-tag Expression, continuation. At a point where additional terms are
|
||||
# allowed, but not required. Just like
|
||||
# expr-cont, above, except that no "{ddd}"
|
||||
# tagging is permitted.
|
||||
#
|
||||
expr-cont-no-tag:
|
||||
escaped term doExprCatOperator
|
||||
white_space n expr-cont-no-tag
|
||||
rule_char term doExprCatOperator
|
||||
'[' term doExprCatOperator
|
||||
'(' term doExprCatOperator
|
||||
'$' term doExprCatOperator
|
||||
'.' term doExprCatOperator
|
||||
'/' look-ahead doExprCatOperator
|
||||
'|' n term doExprOrOperator
|
||||
')' n pop doExprRParen
|
||||
default pop doExprFinished
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
# Variable Name Scanning.
|
||||
#
|
||||
# The state that branched to here must have pushed a return state
|
||||
# to go to after completion of the variable name scanning.
|
||||
#
|
||||
# The current input character must be the $ that introduces the name.
|
||||
# The $ is consummed here rather than in the state that first detected it
|
||||
# so that the doStartVariableName action only needs to happen in one
|
||||
# place (here), and the other states don't need to worry about it.
|
||||
#
|
||||
scan-var-name:
|
||||
'$' n scan-var-start doStartVariableName
|
||||
default errorDeath
|
||||
|
||||
|
||||
scan-var-start:
|
||||
name_start_char n scan-var-body
|
||||
default errorDeath doVariableNameExpectedErr
|
||||
|
||||
scan-var-body:
|
||||
name_char n scan-var-body
|
||||
default pop doEndVariableName
|
||||
|
||||
|
||||
|
||||
#
|
||||
# scan-unicode-set Unicode Sets are parsed by the the UnicodeSet class.
|
||||
# Within the RBBI parser, after finding the first character
|
||||
# of a Unicode Set, we just hand the rule input at that
|
||||
# point of to the Unicode Set constructor, then pick
|
||||
# up parsing after the close of the set.
|
||||
#
|
||||
# The action for this state invokes the UnicodeSet parser.
|
||||
#
|
||||
scan-unicode-set:
|
||||
'[' n pop doScanUnicodeSet
|
||||
'p' n pop doScanUnicodeSet
|
||||
'P' n pop doScanUnicodeSet
|
||||
default errorDeath
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#
|
||||
# assign-or-rule. A $variable was encountered at the start of something, could be
|
||||
# either an assignment statement or a rule, depending on whether an '='
|
||||
# follows the variable name. We get to this state when the variable name
|
||||
# scanning does a return.
|
||||
#
|
||||
assign-or-rule:
|
||||
white_space n assign-or-rule
|
||||
'=' n term ^assign-end doStartAssign # variable was target of assignment
|
||||
default term-var-ref ^break-rule-end # variable was a term in a rule
|
||||
|
||||
|
||||
|
||||
#
|
||||
# assign-end This state is entered when the end of the expression on the
|
||||
# right hand side of an assignment is found. We get here via
|
||||
# a pop; this state is pushed when the '=' in an assignment is found.
|
||||
#
|
||||
# The only thing allowed at this point is a ';'. The RHS of an
|
||||
# assignment must look like a rule expression, and we come here
|
||||
# when what is being scanned no longer looks like an expression.
|
||||
#
|
||||
assign-end:
|
||||
';' n start doEndAssign
|
||||
default errorDeath doRuleErrorAssignExpr
|
||||
|
||||
|
||||
|
||||
#
|
||||
# errorDeath. This state is specified as the next state whenever a syntax error
|
||||
# in the source rules is detected. Barring bugs, the state machine will never
|
||||
# actually get here, but will stop because of the action associated with the error.
|
||||
# But, just in case, this state asks the state machine to exit.
|
||||
errorDeath:
|
||||
default n errorDeath doExit
|
||||
|
||||
|
1210
source/common/rbbiscan.cpp
Normal file
1210
source/common/rbbiscan.cpp
Normal file
File diff suppressed because it is too large
Load diff
162
source/common/rbbiscan.h
Normal file
162
source/common/rbbiscan.h
Normal file
|
@ -0,0 +1,162 @@
|
|||
//
|
||||
// rbbiscan.h
|
||||
//
|
||||
// Copyright (C) 2002-2008, International Business Machines Corporation and others.
|
||||
// All Rights Reserved.
|
||||
//
|
||||
// This file contains declarations for class RBBIRuleScanner
|
||||
//
|
||||
|
||||
|
||||
#ifndef RBBISCAN_H
|
||||
#define RBBISCAN_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/rbbi.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "uhash.h"
|
||||
#include "uvector.h"
|
||||
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
|
||||
// looks up references to $variables within a set.
|
||||
#include "rbbinode.h"
|
||||
//#include "rbbitblb.h"
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class RBBIRuleBuilder;
|
||||
class RBBISymbolTable;
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
//
|
||||
// class RBBIRuleScanner does the lowest level, character-at-a-time
|
||||
// scanning of break iterator rules.
|
||||
//
|
||||
// The output of the scanner is parse trees for
|
||||
// the rule expressions and a list of all Unicode Sets
|
||||
// encountered.
|
||||
//
|
||||
//--------------------------------------------------------------------------------
|
||||
|
||||
class RBBIRuleScanner : public UMemory {
|
||||
public:
|
||||
|
||||
enum {
|
||||
kStackSize = 100 // The size of the state stack for
|
||||
}; // rules parsing. Corresponds roughly
|
||||
// to the depth of parentheses nesting
|
||||
// that is allowed in the rules.
|
||||
|
||||
struct RBBIRuleChar {
|
||||
UChar32 fChar;
|
||||
UBool fEscaped;
|
||||
};
|
||||
|
||||
RBBIRuleScanner(RBBIRuleBuilder *rb);
|
||||
|
||||
|
||||
virtual ~RBBIRuleScanner();
|
||||
|
||||
void nextChar(RBBIRuleChar &c); // Get the next char from the input stream.
|
||||
// Return false if at end.
|
||||
|
||||
UBool push(const RBBIRuleChar &c); // Push (unget) one character.
|
||||
// Only a single character may be pushed.
|
||||
|
||||
void parse(); // Parse the rules, generating two parse
|
||||
// trees, one each for the forward and
|
||||
// reverse rules,
|
||||
// and a list of UnicodeSets encountered.
|
||||
|
||||
/**
|
||||
* Return a rules string without unnecessary
|
||||
* characters.
|
||||
*/
|
||||
static UnicodeString stripRules(const UnicodeString &rules);
|
||||
private:
|
||||
|
||||
UBool doParseActions(int32_t a);
|
||||
void error(UErrorCode e); // error reporting convenience function.
|
||||
void fixOpStack(RBBINode::OpPrecedence p);
|
||||
// a character.
|
||||
void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
|
||||
|
||||
UChar32 nextCharLL();
|
||||
#ifdef RBBI_DEBUG
|
||||
void printNodeStack(const char *title);
|
||||
#endif
|
||||
RBBINode *pushNewNode(RBBINode::NodeType t);
|
||||
void scanSet();
|
||||
|
||||
|
||||
RBBIRuleBuilder *fRB; // The rule builder that we are part of.
|
||||
|
||||
int32_t fScanIndex; // Index of current character being processed
|
||||
// in the rule input string.
|
||||
int32_t fNextIndex; // Index of the next character, which
|
||||
// is the first character not yet scanned.
|
||||
UBool fQuoteMode; // Scan is in a 'quoted region'
|
||||
int32_t fLineNum; // Line number in input file.
|
||||
int32_t fCharNum; // Char position within the line.
|
||||
UChar32 fLastChar; // Previous char, needed to count CR-LF
|
||||
// as a single line, not two.
|
||||
|
||||
RBBIRuleChar fC; // Current char for parse state machine
|
||||
// processing.
|
||||
UnicodeString fVarName; // $variableName, valid when we've just
|
||||
// scanned one.
|
||||
|
||||
RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule
|
||||
// parsing. index by p[state][char-class]
|
||||
|
||||
uint16_t fStack[kStackSize]; // State stack, holds state pushes
|
||||
int32_t fStackPtr; // and pops as specified in the state
|
||||
// transition rules.
|
||||
|
||||
RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created
|
||||
// during the parse of a rule
|
||||
int32_t fNodeStackPtr;
|
||||
|
||||
|
||||
UBool fReverseRule; // True if the rule currently being scanned
|
||||
// is a reverse direction rule (if it
|
||||
// starts with a '!')
|
||||
|
||||
UBool fLookAheadRule; // True if the rule includes a '/'
|
||||
// somewhere within it.
|
||||
|
||||
RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of
|
||||
// $variable symbols.
|
||||
|
||||
UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to
|
||||
// the sets created while parsing rules.
|
||||
// The key is the string used for creating
|
||||
// the set.
|
||||
|
||||
UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during
|
||||
// the scanning of RBBI rules. The
|
||||
// indicies for these are assigned by the
|
||||
// perl script that builds the state tables.
|
||||
// See rbbirpt.h.
|
||||
|
||||
int32_t fRuleNum; // Counts each rule as it is scanned.
|
||||
|
||||
int32_t fOptionStart; // Input index of start of a !!option
|
||||
// keyword, while being scanned.
|
||||
|
||||
UnicodeSet *gRuleSet_rule_char;
|
||||
UnicodeSet *gRuleSet_white_space;
|
||||
UnicodeSet *gRuleSet_name_char;
|
||||
UnicodeSet *gRuleSet_name_start_char;
|
||||
|
||||
RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
|
||||
RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
695
source/common/rbbisetb.cpp
Normal file
695
source/common/rbbisetb.cpp
Normal file
|
@ -0,0 +1,695 @@
|
|||
//
|
||||
// rbbisetb.cpp
|
||||
//
|
||||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 2002-2008 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
***************************************************************************
|
||||
*/
|
||||
//
|
||||
// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules
|
||||
// (part of the rule building process.)
|
||||
//
|
||||
// Starting with the rules parse tree from the scanner,
|
||||
//
|
||||
// - Enumerate the set of UnicodeSets that are referenced
|
||||
// by the RBBI rules.
|
||||
// - compute a set of non-overlapping character ranges
|
||||
// with all characters within a range belonging to the same
|
||||
// set of input uniocde sets.
|
||||
// - Derive a set of non-overlapping UnicodeSet (like things)
|
||||
// that will correspond to columns in the state table for
|
||||
// the RBBI execution engine. All characters within one
|
||||
// of these sets belong to the same set of the original
|
||||
// UnicodeSets from the user's rules.
|
||||
// - construct the trie table that maps input characters
|
||||
// to the index of the matching non-overlapping set of set from
|
||||
// the previous step.
|
||||
//
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/uniset.h"
|
||||
#include "utrie.h"
|
||||
#include "uvector.h"
|
||||
#include "uassert.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
||||
#include "rbbisetb.h"
|
||||
#include "rbbinode.h"
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// getFoldedRBBIValue Call-back function used during building of Trie table.
|
||||
// Folding value: just store the offset (16 bits)
|
||||
// if there is any non-0 entry.
|
||||
// (It'd really be nice if the Trie builder would provide a
|
||||
// simple default, so this function could go away from here.)
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
/* folding value: just store the offset (16 bits) if there is any non-0 entry */
|
||||
U_CDECL_BEGIN
|
||||
static uint32_t U_CALLCONV
|
||||
getFoldedRBBIValue(UNewTrie *trie, UChar32 start, int32_t offset) {
|
||||
uint32_t value;
|
||||
UChar32 limit;
|
||||
UBool inBlockZero;
|
||||
|
||||
limit=start+0x400;
|
||||
while(start<limit) {
|
||||
value=utrie_get32(trie, start, &inBlockZero);
|
||||
if(inBlockZero) {
|
||||
start+=UTRIE_DATA_BLOCK_LENGTH;
|
||||
} else if(value!=0) {
|
||||
return (uint32_t)(offset|0x8000);
|
||||
} else {
|
||||
++start;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// Constructor
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb)
|
||||
{
|
||||
fRB = rb;
|
||||
fStatus = rb->fStatus;
|
||||
fRangeList = 0;
|
||||
fTrie = 0;
|
||||
fTrieSize = 0;
|
||||
fGroupCount = 0;
|
||||
fSawBOF = FALSE;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// Destructor
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
RBBISetBuilder::~RBBISetBuilder()
|
||||
{
|
||||
RangeDescriptor *nextRangeDesc;
|
||||
|
||||
// Walk through & delete the linked list of RangeDescriptors
|
||||
for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) {
|
||||
RangeDescriptor *r = nextRangeDesc;
|
||||
nextRangeDesc = r->fNext;
|
||||
delete r;
|
||||
}
|
||||
|
||||
utrie_close(fTrie);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// build Build the list of non-overlapping character ranges
|
||||
// from the Unicode Sets.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
void RBBISetBuilder::build() {
|
||||
RBBINode *usetNode;
|
||||
RangeDescriptor *rlRange;
|
||||
|
||||
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();}
|
||||
|
||||
//
|
||||
// Initialize the process by creating a single range encompassing all characters
|
||||
// that is in no sets.
|
||||
//
|
||||
fRangeList = new RangeDescriptor(*fStatus); // will check for status here
|
||||
if (fRangeList == NULL) {
|
||||
*fStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
fRangeList->fStartChar = 0;
|
||||
fRangeList->fEndChar = 0x10ffff;
|
||||
|
||||
if (U_FAILURE(*fStatus)) {
|
||||
return;
|
||||
}
|
||||
|
||||
//
|
||||
// Find the set of non-overlapping ranges of characters
|
||||
//
|
||||
int ni;
|
||||
for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules
|
||||
usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
|
||||
if (usetNode==NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
UnicodeSet *inputSet = usetNode->fInputSet;
|
||||
int32_t inputSetRangeCount = inputSet->getRangeCount();
|
||||
int inputSetRangeIndex = 0;
|
||||
rlRange = fRangeList;
|
||||
|
||||
for (;;) {
|
||||
if (inputSetRangeIndex >= inputSetRangeCount) {
|
||||
break;
|
||||
}
|
||||
UChar32 inputSetRangeBegin = inputSet->getRangeStart(inputSetRangeIndex);
|
||||
UChar32 inputSetRangeEnd = inputSet->getRangeEnd(inputSetRangeIndex);
|
||||
|
||||
// skip over ranges from the range list that are completely
|
||||
// below the current range from the input unicode set.
|
||||
while (rlRange->fEndChar < inputSetRangeBegin) {
|
||||
rlRange = rlRange->fNext;
|
||||
}
|
||||
|
||||
// If the start of the range from the range list is before with
|
||||
// the start of the range from the unicode set, split the range list range
|
||||
// in two, with one part being before (wholly outside of) the unicode set
|
||||
// and the other containing the rest.
|
||||
// Then continue the loop; the post-split current range will then be skipped
|
||||
// over
|
||||
if (rlRange->fStartChar < inputSetRangeBegin) {
|
||||
rlRange->split(inputSetRangeBegin, *fStatus);
|
||||
if (U_FAILURE(*fStatus)) {
|
||||
return;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Same thing at the end of the ranges...
|
||||
// If the end of the range from the range list doesn't coincide with
|
||||
// the end of the range from the unicode set, split the range list
|
||||
// range in two. The first part of the split range will be
|
||||
// wholly inside the Unicode set.
|
||||
if (rlRange->fEndChar > inputSetRangeEnd) {
|
||||
rlRange->split(inputSetRangeEnd+1, *fStatus);
|
||||
if (U_FAILURE(*fStatus)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// The current rlRange is now entirely within the UnicodeSet range.
|
||||
// Add this unicode set to the list of sets for this rlRange
|
||||
if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
|
||||
rlRange->fIncludesSets->addElement(usetNode, *fStatus);
|
||||
if (U_FAILURE(*fStatus)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Advance over ranges that we are finished with.
|
||||
if (inputSetRangeEnd == rlRange->fEndChar) {
|
||||
inputSetRangeIndex++;
|
||||
}
|
||||
rlRange = rlRange->fNext;
|
||||
}
|
||||
}
|
||||
|
||||
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}
|
||||
|
||||
//
|
||||
// Group the above ranges, with each group consisting of one or more
|
||||
// ranges that are in exactly the same set of original UnicodeSets.
|
||||
// The groups are numbered, and these group numbers are the set of
|
||||
// input symbols recognized by the run-time state machine.
|
||||
//
|
||||
// Numbering: # 0 (state table column 0) is unused.
|
||||
// # 1 is reserved - table column 1 is for end-of-input
|
||||
// # 2 is reserved - table column 2 is for beginning-in-input
|
||||
// # 3 is the first range list.
|
||||
//
|
||||
RangeDescriptor *rlSearchRange;
|
||||
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
|
||||
for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
|
||||
if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
|
||||
rlRange->fNum = rlSearchRange->fNum;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rlRange->fNum == 0) {
|
||||
fGroupCount ++;
|
||||
rlRange->fNum = fGroupCount+2;
|
||||
rlRange->setDictionaryFlag();
|
||||
addValToSets(rlRange->fIncludesSets, fGroupCount+2);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle input sets that contain the special string {eof}.
|
||||
// Column 1 of the state table is reserved for EOF on input.
|
||||
// Column 2 is reserved for before-the-start-input.
|
||||
// (This column can be optimized away later if there are no rule
|
||||
// references to {bof}.)
|
||||
// Add this column value (1 or 2) to the equivalent expression
|
||||
// subtree for each UnicodeSet that contains the string {eof}
|
||||
// Because {bof} and {eof} are not a characters in the normal sense,
|
||||
// they doesn't affect the computation of ranges or TRIE.
|
||||
static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0};
|
||||
static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0};
|
||||
|
||||
UnicodeString eofString(eofUString);
|
||||
UnicodeString bofString(bofUString);
|
||||
for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules
|
||||
usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
|
||||
if (usetNode==NULL) {
|
||||
break;
|
||||
}
|
||||
UnicodeSet *inputSet = usetNode->fInputSet;
|
||||
if (inputSet->contains(eofString)) {
|
||||
addValToSet(usetNode, 1);
|
||||
}
|
||||
if (inputSet->contains(bofString)) {
|
||||
addValToSet(usetNode, 2);
|
||||
fSawBOF = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
|
||||
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
|
||||
|
||||
//
|
||||
// Build the Trie table for mapping UChar32 values to the corresponding
|
||||
// range group number
|
||||
//
|
||||
fTrie = utrie_open(NULL, // Pre-existing trie to be filled in
|
||||
NULL, // Data array (utrie will allocate one)
|
||||
100000, // Max Data Length
|
||||
0, // Initial value for all code points
|
||||
0, // Lead surrogate unit value
|
||||
TRUE); // Keep Latin 1 in separately
|
||||
|
||||
|
||||
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
|
||||
utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// getTrieSize() Return the size that will be required to serialize the Trie.
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
int32_t RBBISetBuilder::getTrieSize() /*const*/ {
|
||||
fTrieSize = utrie_serialize(fTrie,
|
||||
NULL, // Buffer
|
||||
0, // Capacity
|
||||
getFoldedRBBIValue,
|
||||
TRUE, // Reduce to 16 bits
|
||||
fStatus);
|
||||
// RBBIDebugPrintf("Trie table size is %d\n", trieSize);
|
||||
return fTrieSize;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// serializeTrie() Put the serialized trie at the specified address.
|
||||
// Trust the caller to have given us enough memory.
|
||||
// getTrieSize() MUST be called first.
|
||||
//
|
||||
//-----------------------------------------------------------------------------------
|
||||
void RBBISetBuilder::serializeTrie(uint8_t *where) {
|
||||
utrie_serialize(fTrie,
|
||||
where, // Buffer
|
||||
fTrieSize, // Capacity
|
||||
getFoldedRBBIValue,
|
||||
TRUE, // Reduce to 16 bits
|
||||
fStatus);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// addValToSets Add a runtime-mapped input value to each uset from a
|
||||
// list of uset nodes. (val corresponds to a state table column.)
|
||||
// For each of the original Unicode sets - which correspond
|
||||
// directly to uset nodes - a logically equivalent expression
|
||||
// is constructed in terms of the remapped runtime input
|
||||
// symbol set. This function adds one runtime input symbol to
|
||||
// a list of sets.
|
||||
//
|
||||
// The "logically equivalent expression" is the tree for an
|
||||
// or-ing together of all of the symbols that go into the set.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
void RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
|
||||
int32_t ix;
|
||||
|
||||
for (ix=0; ix<sets->size(); ix++) {
|
||||
RBBINode *usetNode = (RBBINode *)sets->elementAt(ix);
|
||||
addValToSet(usetNode, val);
|
||||
}
|
||||
}
|
||||
|
||||
void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
|
||||
RBBINode *leafNode = new RBBINode(RBBINode::leafChar);
|
||||
if (leafNode == NULL) {
|
||||
*fStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
leafNode->fVal = (unsigned short)val;
|
||||
if (usetNode->fLeftChild == NULL) {
|
||||
usetNode->fLeftChild = leafNode;
|
||||
leafNode->fParent = usetNode;
|
||||
} else {
|
||||
// There are already input symbols present for this set.
|
||||
// Set up an OR node, with the previous stuff as the left child
|
||||
// and the new value as the right child.
|
||||
RBBINode *orNode = new RBBINode(RBBINode::opOr);
|
||||
if (orNode == NULL) {
|
||||
*fStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
orNode->fLeftChild = usetNode->fLeftChild;
|
||||
orNode->fRightChild = leafNode;
|
||||
orNode->fLeftChild->fParent = orNode;
|
||||
orNode->fRightChild->fParent = orNode;
|
||||
usetNode->fLeftChild = orNode;
|
||||
orNode->fParent = usetNode;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// getNumCharCategories
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
int32_t RBBISetBuilder::getNumCharCategories() const {
|
||||
return fGroupCount + 3;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// sawBOF
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
UBool RBBISetBuilder::sawBOF() const {
|
||||
return fSawBOF;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// getFirstChar Given a runtime RBBI character category, find
|
||||
// the first UChar32 that is in the set of chars
|
||||
// in the category.
|
||||
//------------------------------------------------------------------------
|
||||
UChar32 RBBISetBuilder::getFirstChar(int32_t category) const {
|
||||
RangeDescriptor *rlRange;
|
||||
UChar32 retVal = (UChar32)-1;
|
||||
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
|
||||
if (rlRange->fNum == category) {
|
||||
retVal = rlRange->fStartChar;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// printRanges A debugging function.
|
||||
// dump out all of the range definitions.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
#ifdef RBBI_DEBUG
|
||||
void RBBISetBuilder::printRanges() {
|
||||
RangeDescriptor *rlRange;
|
||||
int i;
|
||||
|
||||
RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n");
|
||||
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
|
||||
RBBIDebugPrintf("%2i %4x-%4x ", rlRange->fNum, rlRange->fStartChar, rlRange->fEndChar);
|
||||
|
||||
for (i=0; i<rlRange->fIncludesSets->size(); i++) {
|
||||
RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
|
||||
UnicodeString setName = UNICODE_STRING("anon", 4);
|
||||
RBBINode *setRef = usetNode->fParent;
|
||||
if (setRef != NULL) {
|
||||
RBBINode *varRef = setRef->fParent;
|
||||
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
|
||||
setName = varRef->fText;
|
||||
}
|
||||
}
|
||||
RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
|
||||
}
|
||||
RBBIDebugPrintf("\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// printRangeGroups A debugging function.
|
||||
// dump out all of the range groups.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
#ifdef RBBI_DEBUG
|
||||
void RBBISetBuilder::printRangeGroups() {
|
||||
RangeDescriptor *rlRange;
|
||||
RangeDescriptor *tRange;
|
||||
int i;
|
||||
int lastPrintedGroupNum = 0;
|
||||
|
||||
RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n");
|
||||
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
|
||||
int groupNum = rlRange->fNum & 0xbfff;
|
||||
if (groupNum > lastPrintedGroupNum) {
|
||||
lastPrintedGroupNum = groupNum;
|
||||
RBBIDebugPrintf("%2i ", groupNum);
|
||||
|
||||
if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}
|
||||
|
||||
for (i=0; i<rlRange->fIncludesSets->size(); i++) {
|
||||
RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
|
||||
UnicodeString setName = UNICODE_STRING("anon", 4);
|
||||
RBBINode *setRef = usetNode->fParent;
|
||||
if (setRef != NULL) {
|
||||
RBBINode *varRef = setRef->fParent;
|
||||
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
|
||||
setName = varRef->fText;
|
||||
}
|
||||
}
|
||||
RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for (tRange = rlRange; tRange != 0; tRange = tRange->fNext) {
|
||||
if (tRange->fNum == rlRange->fNum) {
|
||||
if (i++ % 5 == 0) {
|
||||
RBBIDebugPrintf("\n ");
|
||||
}
|
||||
RBBIDebugPrintf(" %05x-%05x", tRange->fStartChar, tRange->fEndChar);
|
||||
}
|
||||
}
|
||||
RBBIDebugPrintf("\n");
|
||||
}
|
||||
}
|
||||
RBBIDebugPrintf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
// printSets A debugging function.
|
||||
// dump out all of the set definitions.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
#ifdef RBBI_DEBUG
|
||||
void RBBISetBuilder::printSets() {
|
||||
int i;
|
||||
|
||||
RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n");
|
||||
for (i=0; ; i++) {
|
||||
RBBINode *usetNode;
|
||||
RBBINode *setRef;
|
||||
RBBINode *varRef;
|
||||
UnicodeString setName;
|
||||
|
||||
usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i);
|
||||
if (usetNode == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
RBBIDebugPrintf("%3d ", i);
|
||||
setName = UNICODE_STRING("anonymous", 9);
|
||||
setRef = usetNode->fParent;
|
||||
if (setRef != NULL) {
|
||||
varRef = setRef->fParent;
|
||||
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
|
||||
setName = varRef->fText;
|
||||
}
|
||||
}
|
||||
RBBI_DEBUG_printUnicodeString(setName);
|
||||
RBBIDebugPrintf(" ");
|
||||
RBBI_DEBUG_printUnicodeString(usetNode->fText);
|
||||
RBBIDebugPrintf("\n");
|
||||
if (usetNode->fLeftChild != NULL) {
|
||||
usetNode->fLeftChild->printTree(TRUE);
|
||||
}
|
||||
}
|
||||
RBBIDebugPrintf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
//
|
||||
// RangeDescriptor copy constructor
|
||||
//
|
||||
//-------------------------------------------------------------------------------------
|
||||
|
||||
RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) {
|
||||
int i;
|
||||
|
||||
this->fStartChar = other.fStartChar;
|
||||
this->fEndChar = other.fEndChar;
|
||||
this->fNum = other.fNum;
|
||||
this->fNext = NULL;
|
||||
UErrorCode oldstatus = status;
|
||||
this->fIncludesSets = new UVector(status);
|
||||
if (U_FAILURE(oldstatus)) {
|
||||
status = oldstatus;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
/* test for NULL */
|
||||
if (this->fIncludesSets == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
for (i=0; i<other.fIncludesSets->size(); i++) {
|
||||
this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
//
|
||||
// RangeDesriptor default constructor
|
||||
//
|
||||
//-------------------------------------------------------------------------------------
|
||||
RangeDescriptor::RangeDescriptor(UErrorCode &status) {
|
||||
this->fStartChar = 0;
|
||||
this->fEndChar = 0;
|
||||
this->fNum = 0;
|
||||
this->fNext = NULL;
|
||||
UErrorCode oldstatus = status;
|
||||
this->fIncludesSets = new UVector(status);
|
||||
if (U_FAILURE(oldstatus)) {
|
||||
status = oldstatus;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
/* test for NULL */
|
||||
if(this->fIncludesSets == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
//
|
||||
// RangeDesriptor Destructor
|
||||
//
|
||||
//-------------------------------------------------------------------------------------
|
||||
RangeDescriptor::~RangeDescriptor() {
|
||||
delete fIncludesSets;
|
||||
fIncludesSets = NULL;
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
//
|
||||
// RangeDesriptor::split()
|
||||
//
|
||||
//-------------------------------------------------------------------------------------
|
||||
void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
|
||||
U_ASSERT(where>fStartChar && where<=fEndChar);
|
||||
RangeDescriptor *nr = new RangeDescriptor(*this, status);
|
||||
if(nr == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
delete nr;
|
||||
return;
|
||||
}
|
||||
// RangeDescriptor copy constructor copies all fields.
|
||||
// Only need to update those that are different after the split.
|
||||
nr->fStartChar = where;
|
||||
this->fEndChar = where-1;
|
||||
nr->fNext = this->fNext;
|
||||
this->fNext = nr;
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
//
|
||||
// RangeDescriptor::setDictionaryFlag
|
||||
//
|
||||
// Character Category Numbers that include characters from
|
||||
// the original Unicode Set named "dictionary" have bit 14
|
||||
// set to 1. The RBBI runtime engine uses this to trigger
|
||||
// use of the word dictionary.
|
||||
//
|
||||
// This function looks through the Unicode Sets that it
|
||||
// (the range) includes, and sets the bit in fNum when
|
||||
// "dictionary" is among them.
|
||||
//
|
||||
// TODO: a faster way would be to find the set node for
|
||||
// "dictionary" just once, rather than looking it
|
||||
// up by name every time.
|
||||
//
|
||||
//-------------------------------------------------------------------------------------
|
||||
void RangeDescriptor::setDictionaryFlag() {
|
||||
int i;
|
||||
|
||||
for (i=0; i<this->fIncludesSets->size(); i++) {
|
||||
RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
|
||||
UnicodeString setName;
|
||||
RBBINode *setRef = usetNode->fParent;
|
||||
if (setRef != NULL) {
|
||||
RBBINode *varRef = setRef->fParent;
|
||||
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
|
||||
setName = varRef->fText;
|
||||
}
|
||||
}
|
||||
if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals.
|
||||
this->fNum |= 0x4000;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
130
source/common/rbbisetb.h
Normal file
130
source/common/rbbisetb.h
Normal file
|
@ -0,0 +1,130 @@
|
|||
//
|
||||
// rbbisetb.h
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2001-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef RBBISETB_H
|
||||
#define RBBISETB_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "rbbirb.h"
|
||||
#include "uvector.h"
|
||||
|
||||
struct UNewTrie;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//
|
||||
// RBBISetBuilder Derives the character categories used by the runtime RBBI engine
|
||||
// from the Unicode Sets appearing in the source RBBI rules, and
|
||||
// creates the TRIE table used to map from Unicode to the
|
||||
// character categories.
|
||||
//
|
||||
|
||||
|
||||
//
|
||||
// RangeDescriptor
|
||||
//
|
||||
// Each of the non-overlapping character ranges gets one of these descriptors.
|
||||
// All of them are strung together in a linked list, which is kept in order
|
||||
// (by character)
|
||||
//
|
||||
class RangeDescriptor : public UMemory {
|
||||
public:
|
||||
UChar32 fStartChar; // Start of range, unicode 32 bit value.
|
||||
UChar32 fEndChar; // End of range, unicode 32 bit value.
|
||||
int32_t fNum; // runtime-mapped input value for this range.
|
||||
UVector *fIncludesSets; // vector of the the original
|
||||
// Unicode sets that include this range.
|
||||
// (Contains ptrs to uset nodes)
|
||||
RangeDescriptor *fNext; // Next RangeDescriptor in the linked list.
|
||||
|
||||
RangeDescriptor(UErrorCode &status);
|
||||
RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
|
||||
~RangeDescriptor();
|
||||
void split(UChar32 where, UErrorCode &status); // Spit this range in two at "where", with
|
||||
// where appearing in the second (higher) part.
|
||||
void setDictionaryFlag(); // Check whether this range appears as part of
|
||||
// the Unicode set named "dictionary"
|
||||
|
||||
private:
|
||||
RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
|
||||
RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
|
||||
//
|
||||
// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules.
|
||||
//
|
||||
// Starting with the rules parse tree from the scanner,
|
||||
//
|
||||
// - Enumerate the set of UnicodeSets that are referenced
|
||||
// by the RBBI rules.
|
||||
// - compute a derived set of non-overlapping UnicodeSets
|
||||
// that will correspond to columns in the state table for
|
||||
// the RBBI execution engine.
|
||||
// - construct the trie table that maps input characters
|
||||
// to set numbers in the non-overlapping set of sets.
|
||||
//
|
||||
|
||||
|
||||
class RBBISetBuilder : public UMemory {
|
||||
public:
|
||||
RBBISetBuilder(RBBIRuleBuilder *rb);
|
||||
~RBBISetBuilder();
|
||||
|
||||
void build();
|
||||
void addValToSets(UVector *sets, uint32_t val);
|
||||
void addValToSet (RBBINode *usetNode, uint32_t val);
|
||||
int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the
|
||||
// runtime state machine, which are the same as
|
||||
// columns in the DFA state table
|
||||
int32_t getTrieSize() /*const*/; // Size in bytes of the serialized Trie.
|
||||
void serializeTrie(uint8_t *where); // write out the serialized Trie.
|
||||
UChar32 getFirstChar(int32_t val) const;
|
||||
UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
|
||||
// character were encountered.
|
||||
#ifdef RBBI_DEBUG
|
||||
void printSets();
|
||||
void printRanges();
|
||||
void printRangeGroups();
|
||||
#else
|
||||
#define printSets()
|
||||
#define printRanges()
|
||||
#define printRangeGroups()
|
||||
#endif
|
||||
|
||||
private:
|
||||
void numberSets();
|
||||
|
||||
RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns us.
|
||||
UErrorCode *fStatus;
|
||||
|
||||
RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors
|
||||
|
||||
UNewTrie *fTrie; // The mapping TRIE that is the end result of processing
|
||||
uint32_t fTrieSize; // the Unicode Sets.
|
||||
|
||||
// Groups correspond to character categories -
|
||||
// groups of ranges that are in the same original UnicodeSets.
|
||||
// fGroupCount is the index of the last used group.
|
||||
// fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
|
||||
// State table column 0 is not used. Column 1 is for end-of-input.
|
||||
// column 2 is for group 0. Funny counting.
|
||||
int32_t fGroupCount;
|
||||
|
||||
UBool fSawBOF;
|
||||
|
||||
RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
|
||||
RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif
|
269
source/common/rbbistbl.cpp
Normal file
269
source/common/rbbistbl.cpp
Normal file
|
@ -0,0 +1,269 @@
|
|||
//
|
||||
// file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class
|
||||
//
|
||||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 2002-2006 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/parsepos.h"
|
||||
|
||||
#include "umutex.h"
|
||||
|
||||
#include "rbbirb.h"
|
||||
#include "rbbinode.h"
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
|
||||
// when the hash table is deleted.
|
||||
//
|
||||
U_CDECL_BEGIN
|
||||
static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
|
||||
U_NAMESPACE_QUALIFIER RBBISymbolTableEntry *px = (U_NAMESPACE_QUALIFIER RBBISymbolTableEntry *)p;
|
||||
delete px;
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
|
||||
:fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
|
||||
{
|
||||
fHashTable = NULL;
|
||||
fCachedSetLookup = NULL;
|
||||
|
||||
fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
|
||||
// uhash_open checks status
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
|
||||
}
|
||||
|
||||
|
||||
|
||||
RBBISymbolTable::~RBBISymbolTable()
|
||||
{
|
||||
uhash_close(fHashTable);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTable::lookup This function from the abstract symbol table inteface
|
||||
// looks up a variable name and returns a UnicodeString
|
||||
// containing the substitution text.
|
||||
//
|
||||
// The variable name does NOT include the leading $.
|
||||
//
|
||||
const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const
|
||||
{
|
||||
RBBISymbolTableEntry *el;
|
||||
RBBINode *varRefNode;
|
||||
RBBINode *exprNode;
|
||||
RBBINode *usetNode;
|
||||
const UnicodeString *retString;
|
||||
RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
|
||||
|
||||
el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
|
||||
if (el == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
varRefNode = el->val;
|
||||
exprNode = varRefNode->fLeftChild; // Root node of expression for variable
|
||||
if (exprNode->fType == RBBINode::setRef) {
|
||||
// The $variable refers to a single UnicodeSet
|
||||
// return the ffffString, which will subsequently be interpreted as a
|
||||
// stand-in character for the set by RBBISymbolTable::lookupMatcher()
|
||||
usetNode = exprNode->fLeftChild;
|
||||
This->fCachedSetLookup = usetNode->fInputSet;
|
||||
retString = &ffffString;
|
||||
}
|
||||
else
|
||||
{
|
||||
// The variable refers to something other than just a set.
|
||||
// return the original source string for the expression
|
||||
retString = &exprNode->fText;
|
||||
This->fCachedSetLookup = NULL;
|
||||
}
|
||||
return retString;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTable::lookupMatcher This function from the abstract symbol table
|
||||
// interface maps a single stand-in character to a
|
||||
// pointer to a Unicode Set. The Unicode Set code uses this
|
||||
// mechanism to get all references to the same $variable
|
||||
// name to refer to a single common Unicode Set instance.
|
||||
//
|
||||
// This implementation cheats a little, and does not maintain a map of stand-in chars
|
||||
// to sets. Instead, it takes advantage of the fact that the UnicodeSet
|
||||
// constructor will always call this function right after calling lookup(),
|
||||
// and we just need to remember what set to return between these two calls.
|
||||
const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
|
||||
{
|
||||
UnicodeSet *retVal = NULL;
|
||||
RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
|
||||
if (ch == 0xffff) {
|
||||
retVal = fCachedSetLookup;
|
||||
This->fCachedSetLookup = 0;
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
//
|
||||
// RBBISymbolTable::parseReference This function from the abstract symbol table interface
|
||||
// looks for a $variable name in the source text.
|
||||
// It does not look it up, only scans for it.
|
||||
// It is used by the UnicodeSet parser.
|
||||
//
|
||||
// This implementation is lifted pretty much verbatim
|
||||
// from the rules based transliterator implementation.
|
||||
// I didn't see an obvious way of sharing it.
|
||||
//
|
||||
UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text,
|
||||
ParsePosition& pos, int32_t limit) const
|
||||
{
|
||||
int32_t start = pos.getIndex();
|
||||
int32_t i = start;
|
||||
UnicodeString result;
|
||||
while (i < limit) {
|
||||
UChar c = text.charAt(i);
|
||||
if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
|
||||
break;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (i == start) { // No valid name chars
|
||||
return result; // Indicate failure with empty string
|
||||
}
|
||||
pos.setIndex(i);
|
||||
text.extractBetween(start, i, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTable::lookupNode Given a key (a variable name), return the
|
||||
// corresponding RBBI Node. If there is no entry
|
||||
// in the table for this name, return NULL.
|
||||
//
|
||||
RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
|
||||
|
||||
RBBINode *retNode = NULL;
|
||||
RBBISymbolTableEntry *el;
|
||||
|
||||
el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
|
||||
if (el != NULL) {
|
||||
retNode = el->val;
|
||||
}
|
||||
return retNode;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTable::addEntry Add a new entry to the symbol table.
|
||||
// Indicate an error if the name already exists -
|
||||
// this will only occur in the case of duplicate
|
||||
// variable assignments.
|
||||
//
|
||||
void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
|
||||
RBBISymbolTableEntry *e;
|
||||
/* test for buffer overflows */
|
||||
if (U_FAILURE(err)) {
|
||||
return;
|
||||
}
|
||||
e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
|
||||
if (e != NULL) {
|
||||
err = U_BRK_VARIABLE_REDFINITION;
|
||||
return;
|
||||
}
|
||||
|
||||
e = new RBBISymbolTableEntry;
|
||||
if (e == NULL) {
|
||||
err = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
e->key = key;
|
||||
e->val = val;
|
||||
uhash_put( fHashTable, &e->key, e, &err);
|
||||
}
|
||||
|
||||
|
||||
RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
|
||||
|
||||
RBBISymbolTableEntry::~RBBISymbolTableEntry() {
|
||||
// The "val" of a symbol table entry is a variable reference node.
|
||||
// The l. child of the val is the rhs expression from the assignment.
|
||||
// Unlike other node types, children of variable reference nodes are not
|
||||
// automatically recursively deleted. We do it manually here.
|
||||
delete val->fLeftChild;
|
||||
val->fLeftChild = NULL;
|
||||
|
||||
delete val;
|
||||
|
||||
// Note: the key UnicodeString is destructed by virtue of being in the object by value.
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// RBBISymbolTable::print Debugging function, dump out the symbol table contents.
|
||||
//
|
||||
#ifdef RBBI_DEBUG
|
||||
void RBBISymbolTable::rbbiSymtablePrint() const {
|
||||
RBBIDebugPrintf("Variable Definitions\n"
|
||||
"Name Node Val String Val\n"
|
||||
"----------------------------------------------------------------------\n");
|
||||
|
||||
int32_t pos = -1;
|
||||
const UHashElement *e = NULL;
|
||||
for (;;) {
|
||||
e = uhash_nextElement(fHashTable, &pos);
|
||||
if (e == NULL ) {
|
||||
break;
|
||||
}
|
||||
RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
|
||||
|
||||
RBBI_DEBUG_printUnicodeString(s->key, 15);
|
||||
RBBIDebugPrintf(" %8p ", (void *)s->val);
|
||||
RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText);
|
||||
RBBIDebugPrintf("\n");
|
||||
}
|
||||
|
||||
RBBIDebugPrintf("\nParsed Variable Definitions\n");
|
||||
pos = -1;
|
||||
for (;;) {
|
||||
e = uhash_nextElement(fHashTable, &pos);
|
||||
if (e == NULL ) {
|
||||
break;
|
||||
}
|
||||
RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
|
||||
RBBI_DEBUG_printUnicodeString(s->key);
|
||||
s->val->fLeftChild->printTree(TRUE);
|
||||
RBBIDebugPrintf("\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
1278
source/common/rbbitblb.cpp
Normal file
1278
source/common/rbbitblb.cpp
Normal file
File diff suppressed because it is too large
Load diff
127
source/common/rbbitblb.h
Normal file
127
source/common/rbbitblb.h
Normal file
|
@ -0,0 +1,127 @@
|
|||
//
|
||||
// rbbitblb.h
|
||||
//
|
||||
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002-2005, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef RBBITBLB_H
|
||||
#define RBBITBLB_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/rbbi.h"
|
||||
#include "rbbinode.h"
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class RBBIRuleScanner;
|
||||
class RBBIRuleBuilder;
|
||||
|
||||
//
|
||||
// class RBBITableBuilder is part of the RBBI rule compiler.
|
||||
// It builds the state transition table used by the RBBI runtime
|
||||
// from the expression syntax tree generated by the rule scanner.
|
||||
//
|
||||
// This class is part of the RBBI implementation only.
|
||||
// There is no user-visible public API here.
|
||||
//
|
||||
|
||||
class RBBITableBuilder : public UMemory {
|
||||
public:
|
||||
RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode);
|
||||
~RBBITableBuilder();
|
||||
|
||||
void build();
|
||||
int32_t getTableSize() const; // Return the runtime size in bytes of
|
||||
// the built state table
|
||||
void exportTable(void *where); // fill in the runtime state table.
|
||||
// Sufficient memory must exist at
|
||||
// the specified location.
|
||||
|
||||
|
||||
private:
|
||||
void calcNullable(RBBINode *n);
|
||||
void calcFirstPos(RBBINode *n);
|
||||
void calcLastPos(RBBINode *n);
|
||||
void calcFollowPos(RBBINode *n);
|
||||
void calcChainedFollowPos(RBBINode *n);
|
||||
void bofFixup();
|
||||
void buildStateTable();
|
||||
void flagAcceptingStates();
|
||||
void flagLookAheadStates();
|
||||
void flagTaggedStates();
|
||||
void mergeRuleStatusVals();
|
||||
|
||||
// Set functions for UVector.
|
||||
// TODO: make a USet subclass of UVector
|
||||
|
||||
void setAdd(UVector *dest, UVector *source);
|
||||
UBool setEquals(UVector *a, UVector *b);
|
||||
|
||||
void sortedAdd(UVector **dest, int32_t val);
|
||||
|
||||
public:
|
||||
#ifdef RBBI_DEBUG
|
||||
void printSet(UVector *s);
|
||||
void printPosSets(RBBINode *n /* = NULL*/);
|
||||
void printStates();
|
||||
void printRuleStatusTable();
|
||||
#else
|
||||
#define printSet(s)
|
||||
#define printPosSets(n)
|
||||
#define printStates()
|
||||
#define printRuleStatusTable()
|
||||
#endif
|
||||
|
||||
private:
|
||||
RBBIRuleBuilder *fRB;
|
||||
RBBINode *&fTree; // The root node of the parse tree to build a
|
||||
// table for.
|
||||
UErrorCode *fStatus;
|
||||
|
||||
UVector *fDStates; // D states (Aho's terminology)
|
||||
// Index is state number
|
||||
// Contents are RBBIStateDescriptor pointers.
|
||||
|
||||
|
||||
RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class
|
||||
RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
//
|
||||
// RBBIStateDescriptor - The DFA is constructed as a set of these descriptors,
|
||||
// one for each state.
|
||||
class RBBIStateDescriptor : public UMemory {
|
||||
public:
|
||||
UBool fMarked;
|
||||
int32_t fAccepting;
|
||||
int32_t fLookAhead;
|
||||
UVector *fTagVals;
|
||||
int32_t fTagsIdx;
|
||||
UVector *fPositions; // Set of parse tree positions associated
|
||||
// with this state. Unordered (it's a set).
|
||||
// UVector contents are RBBINode *
|
||||
|
||||
UVector *fDtran; // Transitions out of this state.
|
||||
// indexed by input character
|
||||
// contents is int index of dest state
|
||||
// in RBBITableBuilder.fDStates
|
||||
|
||||
RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus);
|
||||
~RBBIStateDescriptor();
|
||||
|
||||
private:
|
||||
RBBIStateDescriptor(const RBBIStateDescriptor &other); // forbid copying of this class
|
||||
RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other); // forbid copying of this class
|
||||
};
|
||||
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif
|
400
source/common/resbund.cpp
Normal file
400
source/common/resbund.cpp
Normal file
|
@ -0,0 +1,400 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* File resbund.cpp
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 02/05/97 aliu Fixed bug in chopLocale. Added scanForLocaleInFile
|
||||
* based on code taken from scanForLocale. Added
|
||||
* constructor which attempts to read resource bundle
|
||||
* from a specific file, without searching other files.
|
||||
* 02/11/97 aliu Added UErrorCode return values to constructors. Fixed
|
||||
* infinite loops in scanForFile and scanForLocale.
|
||||
* Modified getRawResourceData to not delete storage in
|
||||
* localeData and resourceData which it doesn't own.
|
||||
* Added Mac compatibility #ifdefs for tellp() and
|
||||
* ios::nocreate.
|
||||
* 03/04/97 aliu Modified to use ExpandingDataSink objects instead of
|
||||
* the highly inefficient ostrstream objects.
|
||||
* 03/13/97 aliu Rewrote to load in entire resource bundle and store
|
||||
* it as a Hashtable of ResourceBundleData objects.
|
||||
* Added state table to govern parsing of files.
|
||||
* Modified to load locale index out of new file distinct
|
||||
* from default.txt.
|
||||
* 03/25/97 aliu Modified to support 2-d arrays, needed for timezone data.
|
||||
* Added support for custom file suffixes. Again, needed
|
||||
* to support timezone data. Improved error handling to
|
||||
* detect duplicate tags and subtags.
|
||||
* 04/07/97 aliu Fixed bug in getHashtableForLocale(). Fixed handling
|
||||
* of failing UErrorCode values on entry to API methods.
|
||||
* Fixed bugs in getArrayItem() for negative indices.
|
||||
* 04/29/97 aliu Update to use new Hashtable deletion protocol.
|
||||
* 05/06/97 aliu Flattened kTransitionTable for HP compiler.
|
||||
* Fixed usage of CharString.
|
||||
* 06/11/99 stephen Removed parsing of .txt files.
|
||||
* Reworked to use new binary format.
|
||||
* Cleaned up.
|
||||
* 06/14/99 stephen Removed methods taking a filename suffix.
|
||||
* 06/22/99 stephen Added missing T_FileStream_close in parse()
|
||||
* 11/09/99 weiv Added getLocale(), rewritten constructForLocale()
|
||||
* March 2000 weiv complete overhaul.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/resbund.h"
|
||||
#include "umutex.h"
|
||||
|
||||
#include "uresimp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
* Implementation Notes
|
||||
*
|
||||
* Resource bundles are read in once, and thereafter cached.
|
||||
* ResourceBundle statically keeps track of which files have been
|
||||
* read, so we are guaranteed that each file is read at most once.
|
||||
* Resource bundles can be loaded from different data directories and
|
||||
* will be treated as distinct, even if they are for the same locale.
|
||||
*
|
||||
* Resource bundles are lightweight objects, which have pointers to
|
||||
* one or more shared Hashtable objects containing all the data.
|
||||
* Copying would be cheap, but there is no copy constructor, since
|
||||
* there wasn't one in the original API.
|
||||
*
|
||||
* The ResourceBundle parsing mechanism is implemented as a transition
|
||||
* network, for easy maintenance and modification. The network is
|
||||
* implemented as a matrix (instead of in code) to make this even
|
||||
* easier. The matrix contains Transition objects. Each Transition
|
||||
* object describes a destination node and an action to take before
|
||||
* moving to the destination node. The source node is encoded by the
|
||||
* index of the object in the array that contains it. The pieces
|
||||
* needed to understand the transition network are the enums for node
|
||||
* IDs and actions, the parse() method, which walks through the
|
||||
* network and implements the actions, and the network itself. The
|
||||
* network guarantees certain conditions, for example, that a new
|
||||
* resource will not be closed until one has been opened first; or
|
||||
* that data will not be stored into a TaggedList until a TaggedList
|
||||
* has been created. Nonetheless, the code in parse() does some
|
||||
* consistency checks as it runs the network, and fails with an
|
||||
* U_INTERNAL_PROGRAM_ERROR if one of these checks fails. If the input
|
||||
* data has a bad format, an U_INVALID_FORMAT_ERROR is returned. If you
|
||||
* see an U_INTERNAL_PROGRAM_ERROR the transition matrix has a bug in
|
||||
* it.
|
||||
*
|
||||
* Old functionality of multiple locales in a single file is still
|
||||
* supported. For this reason, LOCALE names override FILE names. If
|
||||
* data for en_US is located in the en.txt file, once it is loaded,
|
||||
* the code will not care where it came from (other than remembering
|
||||
* which directory it came from). However, if there is an en_US
|
||||
* resource in en_US.txt, that will take precedence. There is no
|
||||
* limit to the number or type of resources that can be stored in a
|
||||
* file, however, files are only searched in a specific way. If
|
||||
* en_US_CA is requested, then first en_US_CA.txt is searched, then
|
||||
* en_US.txt, then en.txt, then default.txt. So it only makes sense
|
||||
* to put certain locales in certain files. In this example, it would
|
||||
* be logical to put en_US_CA, en_US, and en into the en.txt file,
|
||||
* since they would be found there if asked for. The extreme example
|
||||
* is to place all locale resources into default.txt, which should
|
||||
* also work.
|
||||
*
|
||||
* Inheritance is implemented. For example, xx_YY_zz inherits as
|
||||
* follows: xx_YY_zz, xx_YY, xx, default. Inheritance is implemented
|
||||
* as an array of hashtables. There will be from 1 to 4 hashtables in
|
||||
* the array.
|
||||
*
|
||||
* Fallback files are implemented. The fallback pattern is Language
|
||||
* Country Variant (LCV) -> LC -> L. Fallback is first done for the
|
||||
* requested locale. Then it is done for the default locale, as
|
||||
* returned by Locale::getDefault(). Then the special file
|
||||
* default.txt is searched for the default locale. The overall FILE
|
||||
* fallback path is LCV -> LC -> L -> dLCV -> dLC -> dL -> default.
|
||||
*
|
||||
* Note that although file name searching includes the default locale,
|
||||
* once a ResourceBundle object is constructed, the inheritance path
|
||||
* no longer includes the default locale. The path is LCV -> LC -> L
|
||||
* -> default.
|
||||
*
|
||||
* File parsing is lazy. Nothing is parsed unless it is called for by
|
||||
* someone. So when a ResourceBundle for xx_YY_zz is constructed,
|
||||
* only that locale is parsed (along with anything else in the same
|
||||
* file). Later, if the FooBar tag is asked for, and if it isn't
|
||||
* found in xx_YY_zz, then xx_YY.txt will be parsed and checked, and
|
||||
* so forth, until the chain is exhausted or the tag is found.
|
||||
*
|
||||
* Thread-safety is implemented around caches, both the cache that
|
||||
* stores all the resouce data, and the cache that stores flags
|
||||
* indicating whether or not a file has been visited. These caches
|
||||
* delete their storage at static cleanup time, when the process
|
||||
* quits.
|
||||
*
|
||||
* ResourceBundle supports TableCollation as a special case. This
|
||||
* involves having special ResourceBundle objects which DO own their
|
||||
* data, since we don't want large collation rule strings in the
|
||||
* ResourceBundle cache (these are already cached in the
|
||||
* TableCollation cache). TableCollation files (.ctx files) have the
|
||||
* same format as normal resource data files, with a different
|
||||
* interpretation, from the standpoint of ResourceBundle. .ctx files
|
||||
* are loaded into otherwise ordinary ResourceBundle objects. They
|
||||
* don't inherit (that's implemented by TableCollation) and they own
|
||||
* their data (as mentioned above). However, they still support
|
||||
* possible multiple locales in a single .ctx file. (This is in
|
||||
* practice a bad idea, since you only want the one locale you're
|
||||
* looking for, and only one tag will be present
|
||||
* ("CollationElements"), so you don't need an inheritance chain of
|
||||
* multiple locales.) Up to 4 locale resources will be loaded from a
|
||||
* .ctx file; everything after the first 4 is ignored (parsed and
|
||||
* deleted). (Normal .txt files have no limit.) Instead of being
|
||||
* loaded into the cache, and then looked up as needed, the locale
|
||||
* resources are read straight into the ResourceBundle object.
|
||||
*
|
||||
* The Index, which used to reside in default.txt, has been moved to a
|
||||
* new file, index.txt. This file contains a slightly modified format
|
||||
* with the addition of the "InstalledLocales" tag; it looks like:
|
||||
*
|
||||
* Index {
|
||||
* InstalledLocales {
|
||||
* ar
|
||||
* ..
|
||||
* zh_TW
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ResourceBundle)
|
||||
|
||||
ResourceBundle::ResourceBundle(UErrorCode &err)
|
||||
:UObject(), fLocale(NULL)
|
||||
{
|
||||
fResource = ures_open(0, Locale::getDefault().getName(), &err);
|
||||
}
|
||||
|
||||
ResourceBundle::ResourceBundle(const ResourceBundle &other)
|
||||
:UObject(other), fLocale(NULL)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
if (other.fResource) {
|
||||
fResource = ures_copyResb(0, other.fResource, &status);
|
||||
} else {
|
||||
/* Copying a bad resource bundle */
|
||||
fResource = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
ResourceBundle::ResourceBundle(UResourceBundle *res, UErrorCode& err)
|
||||
:UObject(), fLocale(NULL)
|
||||
{
|
||||
if (res) {
|
||||
fResource = ures_copyResb(0, res, &err);
|
||||
} else {
|
||||
/* Copying a bad resource bundle */
|
||||
fResource = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
ResourceBundle::ResourceBundle(const char* path, const Locale& locale, UErrorCode& err)
|
||||
:UObject(), fLocale(NULL)
|
||||
{
|
||||
fResource = ures_open(path, locale.getName(), &err);
|
||||
}
|
||||
|
||||
|
||||
ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other)
|
||||
{
|
||||
if(this == &other) {
|
||||
return *this;
|
||||
}
|
||||
if(fResource != 0) {
|
||||
ures_close(fResource);
|
||||
fResource = NULL;
|
||||
}
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if (other.fResource) {
|
||||
fResource = ures_copyResb(0, other.fResource, &status);
|
||||
} else {
|
||||
/* Copying a bad resource bundle */
|
||||
fResource = NULL;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
ResourceBundle::~ResourceBundle()
|
||||
{
|
||||
if(fResource != 0) {
|
||||
ures_close(fResource);
|
||||
}
|
||||
if(fLocale != NULL) {
|
||||
delete(fLocale);
|
||||
}
|
||||
}
|
||||
|
||||
ResourceBundle *
|
||||
ResourceBundle::clone() const {
|
||||
return new ResourceBundle(*this);
|
||||
}
|
||||
|
||||
UnicodeString ResourceBundle::getString(UErrorCode& status) const {
|
||||
int32_t len = 0;
|
||||
const UChar *r = ures_getString(fResource, &len, &status);
|
||||
return UnicodeString(TRUE, r, len);
|
||||
}
|
||||
|
||||
const uint8_t *ResourceBundle::getBinary(int32_t& len, UErrorCode& status) const {
|
||||
return ures_getBinary(fResource, &len, &status);
|
||||
}
|
||||
|
||||
const int32_t *ResourceBundle::getIntVector(int32_t& len, UErrorCode& status) const {
|
||||
return ures_getIntVector(fResource, &len, &status);
|
||||
}
|
||||
|
||||
uint32_t ResourceBundle::getUInt(UErrorCode& status) const {
|
||||
return ures_getUInt(fResource, &status);
|
||||
}
|
||||
|
||||
int32_t ResourceBundle::getInt(UErrorCode& status) const {
|
||||
return ures_getInt(fResource, &status);
|
||||
}
|
||||
|
||||
const char *ResourceBundle::getName(void) const {
|
||||
return ures_getName(fResource);
|
||||
}
|
||||
|
||||
const char *ResourceBundle::getKey(void) const {
|
||||
return ures_getKey(fResource);
|
||||
}
|
||||
|
||||
UResType ResourceBundle::getType(void) const {
|
||||
return ures_getType(fResource);
|
||||
}
|
||||
|
||||
int32_t ResourceBundle::getSize(void) const {
|
||||
return ures_getSize(fResource);
|
||||
}
|
||||
|
||||
UBool ResourceBundle::hasNext(void) const {
|
||||
return ures_hasNext(fResource);
|
||||
}
|
||||
|
||||
void ResourceBundle::resetIterator(void) {
|
||||
ures_resetIterator(fResource);
|
||||
}
|
||||
|
||||
ResourceBundle ResourceBundle::getNext(UErrorCode& status) {
|
||||
UResourceBundle r;
|
||||
|
||||
ures_initStackObject(&r);
|
||||
ures_getNextResource(fResource, &r, &status);
|
||||
ResourceBundle res(&r, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
ures_close(&r);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
UnicodeString ResourceBundle::getNextString(UErrorCode& status) {
|
||||
int32_t len = 0;
|
||||
const UChar* r = ures_getNextString(fResource, &len, 0, &status);
|
||||
return UnicodeString(TRUE, r, len);
|
||||
}
|
||||
|
||||
UnicodeString ResourceBundle::getNextString(const char ** key, UErrorCode& status) {
|
||||
int32_t len = 0;
|
||||
const UChar* r = ures_getNextString(fResource, &len, key, &status);
|
||||
return UnicodeString(TRUE, r, len);
|
||||
}
|
||||
|
||||
ResourceBundle ResourceBundle::get(int32_t indexR, UErrorCode& status) const {
|
||||
UResourceBundle r;
|
||||
|
||||
ures_initStackObject(&r);
|
||||
ures_getByIndex(fResource, indexR, &r, &status);
|
||||
ResourceBundle res(&r, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
ures_close(&r);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
UnicodeString ResourceBundle::getStringEx(int32_t indexS, UErrorCode& status) const {
|
||||
int32_t len = 0;
|
||||
const UChar* r = ures_getStringByIndex(fResource, indexS, &len, &status);
|
||||
return UnicodeString(TRUE, r, len);
|
||||
}
|
||||
|
||||
ResourceBundle ResourceBundle::get(const char* key, UErrorCode& status) const {
|
||||
UResourceBundle r;
|
||||
|
||||
ures_initStackObject(&r);
|
||||
ures_getByKey(fResource, key, &r, &status);
|
||||
ResourceBundle res(&r, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
ures_close(&r);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
ResourceBundle ResourceBundle::getWithFallback(const char* key, UErrorCode& status){
|
||||
UResourceBundle r;
|
||||
ures_initStackObject(&r);
|
||||
ures_getByKeyWithFallback(fResource, key, &r, &status);
|
||||
ResourceBundle res(&r, status);
|
||||
if(U_SUCCESS(status)){
|
||||
ures_close(&r);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
UnicodeString ResourceBundle::getStringEx(const char* key, UErrorCode& status) const {
|
||||
int32_t len = 0;
|
||||
const UChar* r = ures_getStringByKey(fResource, key, &len, &status);
|
||||
return UnicodeString(TRUE, r, len);
|
||||
}
|
||||
|
||||
const char*
|
||||
ResourceBundle::getVersionNumber() const
|
||||
{
|
||||
return ures_getVersionNumber(fResource);
|
||||
}
|
||||
|
||||
void ResourceBundle::getVersion(UVersionInfo versionInfo) const {
|
||||
ures_getVersion(fResource, versionInfo);
|
||||
}
|
||||
|
||||
const Locale &ResourceBundle::getLocale(void) const
|
||||
{
|
||||
UBool needInit;
|
||||
UMTX_CHECK(NULL, (fLocale == NULL), needInit);
|
||||
if(needInit) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const char *localeName = ures_getLocale(fResource, &status);
|
||||
Locale *tLocale = new Locale(localeName);
|
||||
// Null pointer check
|
||||
if (tLocale == NULL) {
|
||||
return Locale::getDefault(); // Return default locale if one could not be created.
|
||||
}
|
||||
umtx_lock(NULL);
|
||||
ResourceBundle *me = (ResourceBundle *)this; // semantically const
|
||||
if (me->fLocale == NULL) {
|
||||
me->fLocale = tLocale;
|
||||
tLocale = NULL;
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
delete tLocale;
|
||||
}
|
||||
return *fLocale;
|
||||
}
|
||||
|
||||
const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const
|
||||
{
|
||||
return ures_getLocaleByType(fResource, type, &status);
|
||||
}
|
||||
|
||||
//eof
|
||||
U_NAMESPACE_END
|
55
source/common/resbund_cnv.cpp
Normal file
55
source/common/resbund_cnv.cpp
Normal file
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: resbund_cnv.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004aug25
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Character conversion functions moved here from resbund.cpp
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/resbund.h"
|
||||
#include "uinvchar.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
ResourceBundle::ResourceBundle( const UnicodeString& path,
|
||||
const Locale& locale,
|
||||
UErrorCode& error)
|
||||
:UObject(), fLocale(NULL)
|
||||
{
|
||||
constructForLocale(path, locale, error);
|
||||
}
|
||||
|
||||
ResourceBundle::ResourceBundle( const UnicodeString& path,
|
||||
UErrorCode& error)
|
||||
:UObject(), fLocale(NULL)
|
||||
{
|
||||
constructForLocale(path, Locale::getDefault(), error);
|
||||
}
|
||||
|
||||
void
|
||||
ResourceBundle::constructForLocale(const UnicodeString& path,
|
||||
const Locale& locale,
|
||||
UErrorCode& error)
|
||||
{
|
||||
if (path.isEmpty()) {
|
||||
fResource = ures_open(NULL, locale.getName(), &error);
|
||||
}
|
||||
else {
|
||||
UnicodeString nullTerminatedPath(path);
|
||||
nullTerminatedPath.append((UChar)0);
|
||||
fResource = ures_openU(nullTerminatedPath.getBuffer(), locale.getName(), &error);
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
160
source/common/ruleiter.cpp
Normal file
160
source/common/ruleiter.cpp
Normal file
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2003-2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
* Created: September 24 2003
|
||||
* Since: ICU 2.8
|
||||
**********************************************************************
|
||||
*/
|
||||
#include "ruleiter.h"
|
||||
#include "unicode/parsepos.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/symtable.h"
|
||||
#include "util.h"
|
||||
|
||||
/* \U87654321 or \ud800\udc00 */
|
||||
#define MAX_U_NOTATION_LEN 12
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
|
||||
ParsePosition& thePos) :
|
||||
text(theText),
|
||||
pos(thePos),
|
||||
sym(theSym),
|
||||
buf(0),
|
||||
bufPos(0)
|
||||
{}
|
||||
|
||||
UBool RuleCharacterIterator::atEnd() const {
|
||||
return buf == 0 && pos.getIndex() == text.length();
|
||||
}
|
||||
|
||||
UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
|
||||
if (U_FAILURE(ec)) return DONE;
|
||||
|
||||
UChar32 c = DONE;
|
||||
isEscaped = FALSE;
|
||||
|
||||
for (;;) {
|
||||
c = _current();
|
||||
_advance(UTF_CHAR_LENGTH(c));
|
||||
|
||||
if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
|
||||
(options & PARSE_VARIABLES) != 0 && sym != 0) {
|
||||
UnicodeString name = sym->parseReference(text, pos, text.length());
|
||||
// If name is empty there was an isolated SYMBOL_REF;
|
||||
// return it. Caller must be prepared for this.
|
||||
if (name.length() == 0) {
|
||||
break;
|
||||
}
|
||||
bufPos = 0;
|
||||
buf = sym->lookup(name);
|
||||
if (buf == 0) {
|
||||
ec = U_UNDEFINED_VARIABLE;
|
||||
return DONE;
|
||||
}
|
||||
// Handle empty variable value
|
||||
if (buf->length() == 0) {
|
||||
buf = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((options & SKIP_WHITESPACE) != 0 &&
|
||||
uprv_isRuleWhiteSpace(c)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
|
||||
UnicodeString tempEscape;
|
||||
int32_t offset = 0;
|
||||
c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
|
||||
jumpahead(offset);
|
||||
isEscaped = TRUE;
|
||||
if (c < 0) {
|
||||
ec = U_MALFORMED_UNICODE_ESCAPE;
|
||||
return DONE;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
|
||||
p.buf = buf;
|
||||
p.pos = pos.getIndex();
|
||||
p.bufPos = bufPos;
|
||||
}
|
||||
|
||||
void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
|
||||
buf = p.buf;
|
||||
pos.setIndex(p.pos);
|
||||
bufPos = p.bufPos;
|
||||
}
|
||||
|
||||
void RuleCharacterIterator::skipIgnored(int32_t options) {
|
||||
if ((options & SKIP_WHITESPACE) != 0) {
|
||||
for (;;) {
|
||||
UChar32 a = _current();
|
||||
if (!uprv_isRuleWhiteSpace(a)) break;
|
||||
_advance(UTF_CHAR_LENGTH(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
|
||||
if (maxLookAhead < 0) {
|
||||
maxLookAhead = 0x7FFFFFFF;
|
||||
}
|
||||
if (buf != 0) {
|
||||
buf->extract(bufPos, maxLookAhead, result);
|
||||
} else {
|
||||
text.extract(pos.getIndex(), maxLookAhead, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void RuleCharacterIterator::jumpahead(int32_t count) {
|
||||
_advance(count);
|
||||
}
|
||||
|
||||
/*
|
||||
UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
|
||||
int32_t b = pos.getIndex();
|
||||
text.extract(0, b, result);
|
||||
return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
|
||||
}
|
||||
*/
|
||||
|
||||
UChar32 RuleCharacterIterator::_current() const {
|
||||
if (buf != 0) {
|
||||
return buf->char32At(bufPos);
|
||||
} else {
|
||||
int i = pos.getIndex();
|
||||
return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
|
||||
}
|
||||
}
|
||||
|
||||
void RuleCharacterIterator::_advance(int32_t count) {
|
||||
if (buf != 0) {
|
||||
bufPos += count;
|
||||
if (bufPos == buf->length()) {
|
||||
buf = 0;
|
||||
}
|
||||
} else {
|
||||
pos.setIndex(pos.getIndex() + count);
|
||||
if (pos.getIndex() > text.length()) {
|
||||
pos.setIndex(text.length());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
//eof
|
232
source/common/ruleiter.h
Normal file
232
source/common/ruleiter.h
Normal file
|
@ -0,0 +1,232 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2003-2007, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
* Created: September 24 2003
|
||||
* Since: ICU 2.8
|
||||
**********************************************************************
|
||||
*/
|
||||
#ifndef _RULEITER_H_
|
||||
#define _RULEITER_H_
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class UnicodeString;
|
||||
class ParsePosition;
|
||||
class SymbolTable;
|
||||
|
||||
/**
|
||||
* An iterator that returns 32-bit code points. This class is deliberately
|
||||
* <em>not</em> related to any of the ICU character iterator classes
|
||||
* in order to minimize complexity.
|
||||
* @author Alan Liu
|
||||
* @since ICU 2.8
|
||||
*/
|
||||
class RuleCharacterIterator : public UMemory {
|
||||
|
||||
// TODO: Ideas for later. (Do not implement if not needed, lest the
|
||||
// code coverage numbers go down due to unused methods.)
|
||||
// 1. Add a copy constructor, operator==() method.
|
||||
// 2. Rather than return DONE, throw an exception if the end
|
||||
// is reached -- this is an alternate usage model, probably not useful.
|
||||
|
||||
private:
|
||||
/**
|
||||
* Text being iterated.
|
||||
*/
|
||||
const UnicodeString& text;
|
||||
|
||||
/**
|
||||
* Position of iterator.
|
||||
*/
|
||||
ParsePosition& pos;
|
||||
|
||||
/**
|
||||
* Symbol table used to parse and dereference variables. May be 0.
|
||||
*/
|
||||
const SymbolTable* sym;
|
||||
|
||||
/**
|
||||
* Current variable expansion, or 0 if none.
|
||||
*/
|
||||
const UnicodeString* buf;
|
||||
|
||||
/**
|
||||
* Position within buf. Meaningless if buf == 0.
|
||||
*/
|
||||
int32_t bufPos;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Value returned when there are no more characters to iterate.
|
||||
*/
|
||||
enum { DONE = -1 };
|
||||
|
||||
/**
|
||||
* Bitmask option to enable parsing of variable names. If (options &
|
||||
* PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
|
||||
* its value. Variables are parsed using the SymbolTable API.
|
||||
*/
|
||||
enum { PARSE_VARIABLES = 1 };
|
||||
|
||||
/**
|
||||
* Bitmask option to enable parsing of escape sequences. If (options &
|
||||
* PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
|
||||
* to its value. Escapes are parsed using Utility.unescapeAt().
|
||||
*/
|
||||
enum { PARSE_ESCAPES = 2 };
|
||||
|
||||
/**
|
||||
* Bitmask option to enable skipping of whitespace. If (options &
|
||||
* SKIP_WHITESPACE) != 0, then whitespace characters will be silently
|
||||
* skipped, as if they were not present in the input. Whitespace
|
||||
* characters are defined by UCharacterProperty.isRuleWhiteSpace().
|
||||
*/
|
||||
enum { SKIP_WHITESPACE = 4 };
|
||||
|
||||
/**
|
||||
* Constructs an iterator over the given text, starting at the given
|
||||
* position.
|
||||
* @param text the text to be iterated
|
||||
* @param sym the symbol table, or null if there is none. If sym is null,
|
||||
* then variables will not be deferenced, even if the PARSE_VARIABLES
|
||||
* option is set.
|
||||
* @param pos upon input, the index of the next character to return. If a
|
||||
* variable has been dereferenced, then pos will <em>not</em> increment as
|
||||
* characters of the variable value are iterated.
|
||||
*/
|
||||
RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym,
|
||||
ParsePosition& pos);
|
||||
|
||||
/**
|
||||
* Returns true if this iterator has no more characters to return.
|
||||
*/
|
||||
UBool atEnd() const;
|
||||
|
||||
/**
|
||||
* Returns the next character using the given options, or DONE if there
|
||||
* are no more characters, and advance the position to the next
|
||||
* character.
|
||||
* @param options one or more of the following options, bitwise-OR-ed
|
||||
* together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
|
||||
* @param isEscaped output parameter set to TRUE if the character
|
||||
* was escaped
|
||||
* @param ec input-output error code. An error will only be set by
|
||||
* this routing if options includes PARSE_VARIABLES and an unknown
|
||||
* variable name is seen, or if options includes PARSE_ESCAPES and
|
||||
* an invalid escape sequence is seen.
|
||||
* @return the current 32-bit code point, or DONE
|
||||
*/
|
||||
UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec);
|
||||
|
||||
/**
|
||||
* Returns true if this iterator is currently within a variable expansion.
|
||||
*/
|
||||
inline UBool inVariable() const;
|
||||
|
||||
/**
|
||||
* An opaque object representing the position of a RuleCharacterIterator.
|
||||
*/
|
||||
struct Pos : public UMemory {
|
||||
private:
|
||||
const UnicodeString* buf;
|
||||
int32_t pos;
|
||||
int32_t bufPos;
|
||||
friend class RuleCharacterIterator;
|
||||
};
|
||||
|
||||
/**
|
||||
* Sets an object which, when later passed to setPos(), will
|
||||
* restore this iterator's position. Usage idiom:
|
||||
*
|
||||
* RuleCharacterIterator iterator = ...;
|
||||
* RuleCharacterIterator::Pos pos;
|
||||
* iterator.getPos(pos);
|
||||
* for (;;) {
|
||||
* iterator.getPos(pos);
|
||||
* int c = iterator.next(...);
|
||||
* ...
|
||||
* }
|
||||
* iterator.setPos(pos);
|
||||
*
|
||||
* @param p a position object to be set to this iterator's
|
||||
* current position.
|
||||
*/
|
||||
void getPos(Pos& p) const;
|
||||
|
||||
/**
|
||||
* Restores this iterator to the position it had when getPos()
|
||||
* set the given object.
|
||||
* @param p a position object previously set by getPos()
|
||||
*/
|
||||
void setPos(const Pos& p);
|
||||
|
||||
/**
|
||||
* Skips ahead past any ignored characters, as indicated by the given
|
||||
* options. This is useful in conjunction with the lookahead() method.
|
||||
*
|
||||
* Currently, this only has an effect for SKIP_WHITESPACE.
|
||||
* @param options one or more of the following options, bitwise-OR-ed
|
||||
* together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
|
||||
*/
|
||||
void skipIgnored(int32_t options);
|
||||
|
||||
/**
|
||||
* Returns a string containing the remainder of the characters to be
|
||||
* returned by this iterator, without any option processing. If the
|
||||
* iterator is currently within a variable expansion, this will only
|
||||
* extend to the end of the variable expansion. This method is provided
|
||||
* so that iterators may interoperate with string-based APIs. The typical
|
||||
* sequence of calls is to call skipIgnored(), then call lookahead(), then
|
||||
* parse the string returned by lookahead(), then call jumpahead() to
|
||||
* resynchronize the iterator.
|
||||
* @param result a string to receive the characters to be returned
|
||||
* by future calls to next()
|
||||
* @param maxLookAhead The maximum to copy into the result.
|
||||
* @return a reference to result
|
||||
*/
|
||||
UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const;
|
||||
|
||||
/**
|
||||
* Advances the position by the given number of 16-bit code units.
|
||||
* This is useful in conjunction with the lookahead() method.
|
||||
* @param count the number of 16-bit code units to jump over
|
||||
*/
|
||||
void jumpahead(int32_t count);
|
||||
|
||||
/**
|
||||
* Returns a string representation of this object, consisting of the
|
||||
* characters being iterated, with a '|' marking the current position.
|
||||
* Position within an expanded variable is <em>not</em> indicated.
|
||||
* @param result output parameter to receive a string
|
||||
* representation of this object
|
||||
*/
|
||||
// UnicodeString& toString(UnicodeString& result) const;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Returns the current 32-bit code point without parsing escapes, parsing
|
||||
* variables, or skipping whitespace.
|
||||
* @return the current 32-bit code point
|
||||
*/
|
||||
UChar32 _current() const;
|
||||
|
||||
/**
|
||||
* Advances the position by the given amount.
|
||||
* @param count the number of 16-bit code units to advance past
|
||||
*/
|
||||
void _advance(int32_t count);
|
||||
};
|
||||
|
||||
inline UBool RuleCharacterIterator::inVariable() const {
|
||||
return buf != 0;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // _RULEITER_H_
|
||||
//eof
|
115
source/common/schriter.cpp
Normal file
115
source/common/schriter.cpp
Normal file
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 1998-2007, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
******************************************************************************
|
||||
*
|
||||
* File schriter.cpp
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/05/99 stephen Cleaned up.
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/schriter.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringCharacterIterator)
|
||||
|
||||
StringCharacterIterator::StringCharacterIterator()
|
||||
: UCharCharacterIterator(),
|
||||
text()
|
||||
{
|
||||
// NEVER DEFAULT CONSTRUCT!
|
||||
}
|
||||
|
||||
StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr)
|
||||
: UCharCharacterIterator(textStr.getBuffer(), textStr.length()),
|
||||
text(textStr)
|
||||
{
|
||||
// we had set the input parameter's array, now we need to set our copy's array
|
||||
UCharCharacterIterator::text = this->text.getBuffer();
|
||||
}
|
||||
|
||||
StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr,
|
||||
int32_t textPos)
|
||||
: UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textPos),
|
||||
text(textStr)
|
||||
{
|
||||
// we had set the input parameter's array, now we need to set our copy's array
|
||||
UCharCharacterIterator::text = this->text.getBuffer();
|
||||
}
|
||||
|
||||
StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr,
|
||||
int32_t textBegin,
|
||||
int32_t textEnd,
|
||||
int32_t textPos)
|
||||
: UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textBegin, textEnd, textPos),
|
||||
text(textStr)
|
||||
{
|
||||
// we had set the input parameter's array, now we need to set our copy's array
|
||||
UCharCharacterIterator::text = this->text.getBuffer();
|
||||
}
|
||||
|
||||
StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator& that)
|
||||
: UCharCharacterIterator(that),
|
||||
text(that.text)
|
||||
{
|
||||
// we had set the input parameter's array, now we need to set our copy's array
|
||||
UCharCharacterIterator::text = this->text.getBuffer();
|
||||
}
|
||||
|
||||
StringCharacterIterator::~StringCharacterIterator() {
|
||||
}
|
||||
|
||||
StringCharacterIterator&
|
||||
StringCharacterIterator::operator=(const StringCharacterIterator& that) {
|
||||
UCharCharacterIterator::operator=(that);
|
||||
text = that.text;
|
||||
// we had set the input parameter's array, now we need to set our copy's array
|
||||
UCharCharacterIterator::text = this->text.getBuffer();
|
||||
return *this;
|
||||
}
|
||||
|
||||
UBool
|
||||
StringCharacterIterator::operator==(const ForwardCharacterIterator& that) const {
|
||||
if (this == &that) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
// do not call UCharCharacterIterator::operator==()
|
||||
// because that checks for array pointer equality
|
||||
// while we compare UnicodeString objects
|
||||
|
||||
if (getDynamicClassID() != that.getDynamicClassID()) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
StringCharacterIterator& realThat = (StringCharacterIterator&)that;
|
||||
|
||||
return text == realThat.text
|
||||
&& pos == realThat.pos
|
||||
&& begin == realThat.begin
|
||||
&& end == realThat.end;
|
||||
}
|
||||
|
||||
CharacterIterator*
|
||||
StringCharacterIterator::clone() const {
|
||||
return new StringCharacterIterator(*this);
|
||||
}
|
||||
|
||||
void
|
||||
StringCharacterIterator::setText(const UnicodeString& newText) {
|
||||
text = newText;
|
||||
UCharCharacterIterator::setText(text.getBuffer(), text.length());
|
||||
}
|
||||
|
||||
void
|
||||
StringCharacterIterator::getText(UnicodeString& result) {
|
||||
result = text;
|
||||
}
|
||||
U_NAMESPACE_END
|
981
source/common/serv.cpp
Normal file
981
source/common/serv.cpp
Normal file
|
@ -0,0 +1,981 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2008, International Business Machines Corporation. *
|
||||
* All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
|
||||
#include "serv.h"
|
||||
#include "umutex.h"
|
||||
|
||||
#undef SERVICE_REFCOUNT
|
||||
|
||||
// in case we use the refcount stuff
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
const UChar ICUServiceKey::PREFIX_DELIMITER = 0x002F; /* '/' */
|
||||
|
||||
ICUServiceKey::ICUServiceKey(const UnicodeString& id)
|
||||
: _id(id) {
|
||||
}
|
||||
|
||||
ICUServiceKey::~ICUServiceKey()
|
||||
{
|
||||
}
|
||||
|
||||
const UnicodeString&
|
||||
ICUServiceKey::getID() const
|
||||
{
|
||||
return _id;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
ICUServiceKey::canonicalID(UnicodeString& result) const
|
||||
{
|
||||
return result.append(_id);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
ICUServiceKey::currentID(UnicodeString& result) const
|
||||
{
|
||||
return canonicalID(result);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
ICUServiceKey::currentDescriptor(UnicodeString& result) const
|
||||
{
|
||||
prefix(result);
|
||||
result.append(PREFIX_DELIMITER);
|
||||
return currentID(result);
|
||||
}
|
||||
|
||||
UBool
|
||||
ICUServiceKey::fallback()
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
UBool
|
||||
ICUServiceKey::isFallbackOf(const UnicodeString& id) const
|
||||
{
|
||||
return id == _id;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
ICUServiceKey::prefix(UnicodeString& result) const
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
ICUServiceKey::parsePrefix(UnicodeString& result)
|
||||
{
|
||||
int32_t n = result.indexOf(PREFIX_DELIMITER);
|
||||
if (n < 0) {
|
||||
n = 0;
|
||||
}
|
||||
result.remove(n);
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
ICUServiceKey::parseSuffix(UnicodeString& result)
|
||||
{
|
||||
int32_t n = result.indexOf(PREFIX_DELIMITER);
|
||||
if (n >= 0) {
|
||||
result.remove(0, n+1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
UnicodeString&
|
||||
ICUServiceKey::debug(UnicodeString& result) const
|
||||
{
|
||||
debugClass(result);
|
||||
result.append(" id: ");
|
||||
result.append(_id);
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
ICUServiceKey::debugClass(UnicodeString& result) const
|
||||
{
|
||||
return result.append("ICUServiceKey");
|
||||
}
|
||||
#endif
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUServiceKey)
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
SimpleFactory::SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible)
|
||||
: _instance(instanceToAdopt), _id(id), _visible(visible)
|
||||
{
|
||||
}
|
||||
|
||||
SimpleFactory::~SimpleFactory()
|
||||
{
|
||||
delete _instance;
|
||||
}
|
||||
|
||||
UObject*
|
||||
SimpleFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
UnicodeString temp;
|
||||
if (_id == key.currentID(temp)) {
|
||||
return service->cloneInstance(_instance);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
SimpleFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const
|
||||
{
|
||||
if (_visible) {
|
||||
result.put(_id, (void*)this, status); // cast away const
|
||||
} else {
|
||||
result.remove(_id);
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
SimpleFactory::getDisplayName(const UnicodeString& id, const Locale& /* locale */, UnicodeString& result) const
|
||||
{
|
||||
if (_visible && _id == id) {
|
||||
result = _id;
|
||||
} else {
|
||||
result.setToBogus();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
UnicodeString&
|
||||
SimpleFactory::debug(UnicodeString& toAppendTo) const
|
||||
{
|
||||
debugClass(toAppendTo);
|
||||
toAppendTo.append(" id: ");
|
||||
toAppendTo.append(_id);
|
||||
toAppendTo.append(", visible: ");
|
||||
toAppendTo.append(_visible ? "T" : "F");
|
||||
return toAppendTo;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
SimpleFactory::debugClass(UnicodeString& toAppendTo) const
|
||||
{
|
||||
return toAppendTo.append("SimpleFactory");
|
||||
}
|
||||
#endif
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleFactory)
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceListener)
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
// Record the actual id for this service in the cache, so we can return it
|
||||
// even if we succeed later with a different id.
|
||||
class CacheEntry : public UMemory {
|
||||
private:
|
||||
int32_t refcount;
|
||||
|
||||
public:
|
||||
UnicodeString actualDescriptor;
|
||||
UObject* service;
|
||||
|
||||
/**
|
||||
* Releases a reference to the shared resource.
|
||||
*/
|
||||
~CacheEntry() {
|
||||
delete service;
|
||||
}
|
||||
|
||||
CacheEntry(const UnicodeString& _actualDescriptor, UObject* _service)
|
||||
: refcount(1), actualDescriptor(_actualDescriptor), service(_service) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiation creates an initial reference, so don't call this
|
||||
* unless you're creating a new pointer to this. Management of
|
||||
* that pointer will have to know how to deal with refcounts.
|
||||
* Return true if the resource has not already been released.
|
||||
*/
|
||||
CacheEntry* ref() {
|
||||
++refcount;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructions removes a reference, so don't call this unless
|
||||
* you're removing pointer to this somewhere. Management of that
|
||||
* pointer will have to know how to deal with refcounts. Once
|
||||
* the refcount drops to zero, the resource is released. Return
|
||||
* false if the resouce has been released.
|
||||
*/
|
||||
CacheEntry* unref() {
|
||||
if ((--refcount) == 0) {
|
||||
delete this;
|
||||
return NULL;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return TRUE if there is at least one reference to this and the
|
||||
* resource has not been released.
|
||||
*/
|
||||
UBool isShared() const {
|
||||
return refcount > 1;
|
||||
}
|
||||
};
|
||||
|
||||
// UObjectDeleter for serviceCache
|
||||
U_CDECL_BEGIN
|
||||
static void U_CALLCONV
|
||||
cacheDeleter(void* obj) {
|
||||
U_NAMESPACE_USE ((CacheEntry*)obj)->unref();
|
||||
}
|
||||
|
||||
/**
|
||||
* Deleter for UObjects
|
||||
*/
|
||||
static void U_CALLCONV
|
||||
deleteUObject(void *obj) {
|
||||
U_NAMESPACE_USE delete (UObject*) obj;
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
class DNCache : public UMemory {
|
||||
public:
|
||||
Hashtable cache;
|
||||
const Locale locale;
|
||||
|
||||
DNCache(const Locale& _locale)
|
||||
: cache(), locale(_locale)
|
||||
{
|
||||
// cache.setKeyDeleter(uhash_deleteUnicodeString);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
StringPair*
|
||||
StringPair::create(const UnicodeString& displayName,
|
||||
const UnicodeString& id,
|
||||
UErrorCode& status)
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
StringPair* sp = new StringPair(displayName, id);
|
||||
if (sp == NULL || sp->isBogus()) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
delete sp;
|
||||
return NULL;
|
||||
}
|
||||
return sp;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UBool
|
||||
StringPair::isBogus() const {
|
||||
return displayName.isBogus() || id.isBogus();
|
||||
}
|
||||
|
||||
StringPair::StringPair(const UnicodeString& _displayName,
|
||||
const UnicodeString& _id)
|
||||
: displayName(_displayName)
|
||||
, id(_id)
|
||||
{
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static void U_CALLCONV
|
||||
userv_deleteStringPair(void *obj) {
|
||||
U_NAMESPACE_USE delete (StringPair*) obj;
|
||||
}
|
||||
U_CDECL_END
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
ICUService::ICUService()
|
||||
: name()
|
||||
, lock(0)
|
||||
, timestamp(0)
|
||||
, factories(NULL)
|
||||
, serviceCache(NULL)
|
||||
, idCache(NULL)
|
||||
, dnCache(NULL)
|
||||
{
|
||||
umtx_init(&lock);
|
||||
}
|
||||
|
||||
ICUService::ICUService(const UnicodeString& newName)
|
||||
: name(newName)
|
||||
, lock(0)
|
||||
, timestamp(0)
|
||||
, factories(NULL)
|
||||
, serviceCache(NULL)
|
||||
, idCache(NULL)
|
||||
, dnCache(NULL)
|
||||
{
|
||||
umtx_init(&lock);
|
||||
}
|
||||
|
||||
ICUService::~ICUService()
|
||||
{
|
||||
{
|
||||
Mutex mutex(&lock);
|
||||
clearCaches();
|
||||
delete factories;
|
||||
factories = NULL;
|
||||
}
|
||||
umtx_destroy(&lock);
|
||||
}
|
||||
|
||||
UObject*
|
||||
ICUService::get(const UnicodeString& descriptor, UErrorCode& status) const
|
||||
{
|
||||
return get(descriptor, NULL, status);
|
||||
}
|
||||
|
||||
UObject*
|
||||
ICUService::get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const
|
||||
{
|
||||
UObject* result = NULL;
|
||||
ICUServiceKey* key = createKey(&descriptor, status);
|
||||
if (key) {
|
||||
result = getKey(*key, actualReturn, status);
|
||||
delete key;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
UObject*
|
||||
ICUService::getKey(ICUServiceKey& key, UErrorCode& status) const
|
||||
{
|
||||
return getKey(key, NULL, status);
|
||||
}
|
||||
|
||||
// this is a vector that subclasses of ICUService can override to further customize the result object
|
||||
// before returning it. All other public get functions should call this one.
|
||||
|
||||
UObject*
|
||||
ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const
|
||||
{
|
||||
return getKey(key, actualReturn, NULL, status);
|
||||
}
|
||||
|
||||
// make it possible to call reentrantly on systems that don't have reentrant mutexes.
|
||||
// we can use this simple approach since we know the situation where we're calling
|
||||
// reentrantly even without knowing the thread.
|
||||
class XMutex : public UMemory {
|
||||
public:
|
||||
inline XMutex(UMTX *mutex, UBool reentering)
|
||||
: fMutex(mutex)
|
||||
, fActive(!reentering)
|
||||
{
|
||||
if (fActive) umtx_lock(fMutex);
|
||||
}
|
||||
inline ~XMutex() {
|
||||
if (fActive) umtx_unlock(fMutex);
|
||||
}
|
||||
|
||||
private:
|
||||
UMTX *fMutex;
|
||||
UBool fActive;
|
||||
};
|
||||
|
||||
struct UVectorDeleter {
|
||||
UVector* _obj;
|
||||
UVectorDeleter() : _obj(NULL) {}
|
||||
~UVectorDeleter() { delete _obj; }
|
||||
};
|
||||
|
||||
// called only by factories, treat as private
|
||||
UObject*
|
||||
ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (isDefault()) {
|
||||
return handleDefault(key, actualReturn, status);
|
||||
}
|
||||
|
||||
ICUService* ncthis = (ICUService*)this; // cast away semantic const
|
||||
|
||||
CacheEntry* result = NULL;
|
||||
{
|
||||
// The factory list can't be modified until we're done,
|
||||
// otherwise we might update the cache with an invalid result.
|
||||
// The cache has to stay in synch with the factory list.
|
||||
// ICU doesn't have monitors so we can't use rw locks, so
|
||||
// we single-thread everything using this service, for now.
|
||||
|
||||
// if factory is not null, we're calling from within the mutex,
|
||||
// and since some unix machines don't have reentrant mutexes we
|
||||
// need to make sure not to try to lock it again.
|
||||
XMutex mutex(&ncthis->lock, factory != NULL);
|
||||
|
||||
if (serviceCache == NULL) {
|
||||
ncthis->serviceCache = new Hashtable(status);
|
||||
if (ncthis->serviceCache == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
delete serviceCache;
|
||||
return NULL;
|
||||
}
|
||||
serviceCache->setValueDeleter(cacheDeleter);
|
||||
}
|
||||
|
||||
UnicodeString currentDescriptor;
|
||||
UVectorDeleter cacheDescriptorList;
|
||||
UBool putInCache = FALSE;
|
||||
|
||||
int32_t startIndex = 0;
|
||||
int32_t limit = factories->size();
|
||||
UBool cacheResult = TRUE;
|
||||
|
||||
if (factory != NULL) {
|
||||
for (int32_t i = 0; i < limit; ++i) {
|
||||
if (factory == (const ICUServiceFactory*)factories->elementAt(i)) {
|
||||
startIndex = i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (startIndex == 0) {
|
||||
// throw new InternalError("Factory " + factory + "not registered with service: " + this);
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
cacheResult = FALSE;
|
||||
}
|
||||
|
||||
do {
|
||||
currentDescriptor.remove();
|
||||
key.currentDescriptor(currentDescriptor);
|
||||
result = (CacheEntry*)serviceCache->get(currentDescriptor);
|
||||
if (result != NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
// first test of cache failed, so we'll have to update
|
||||
// the cache if we eventually succeed-- that is, if we're
|
||||
// going to update the cache at all.
|
||||
putInCache = TRUE;
|
||||
|
||||
int32_t index = startIndex;
|
||||
while (index < limit) {
|
||||
ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(index++);
|
||||
UObject* service = f->create(key, this, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete service;
|
||||
return NULL;
|
||||
}
|
||||
if (service != NULL) {
|
||||
result = new CacheEntry(currentDescriptor, service);
|
||||
if (result == NULL) {
|
||||
delete service;
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
goto outerEnd;
|
||||
}
|
||||
}
|
||||
|
||||
// prepare to load the cache with all additional ids that
|
||||
// will resolve to result, assuming we'll succeed. We
|
||||
// don't want to keep querying on an id that's going to
|
||||
// fallback to the one that succeeded, we want to hit the
|
||||
// cache the first time next goaround.
|
||||
if (cacheDescriptorList._obj == NULL) {
|
||||
cacheDescriptorList._obj = new UVector(uhash_deleteUnicodeString, NULL, 5, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
UnicodeString* idToCache = new UnicodeString(currentDescriptor);
|
||||
if (idToCache == NULL || idToCache->isBogus()) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cacheDescriptorList._obj->addElement(idToCache, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
} while (key.fallback());
|
||||
outerEnd:
|
||||
|
||||
if (result != NULL) {
|
||||
if (putInCache && cacheResult) {
|
||||
serviceCache->put(result->actualDescriptor, result, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete result;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (cacheDescriptorList._obj != NULL) {
|
||||
for (int32_t i = cacheDescriptorList._obj->size(); --i >= 0;) {
|
||||
UnicodeString* desc = (UnicodeString*)cacheDescriptorList._obj->elementAt(i);
|
||||
serviceCache->put(*desc, result, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete result;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result->ref();
|
||||
cacheDescriptorList._obj->removeElementAt(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (actualReturn != NULL) {
|
||||
// strip null prefix
|
||||
if (result->actualDescriptor.indexOf((UChar)0x2f) == 0) { // U+002f=slash (/)
|
||||
actualReturn->remove();
|
||||
actualReturn->append(result->actualDescriptor,
|
||||
1,
|
||||
result->actualDescriptor.length() - 1);
|
||||
} else {
|
||||
*actualReturn = result->actualDescriptor;
|
||||
}
|
||||
|
||||
if (actualReturn->isBogus()) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
delete result;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
UObject* service = cloneInstance(result->service);
|
||||
if (putInCache && !cacheResult) {
|
||||
delete result;
|
||||
}
|
||||
return service;
|
||||
}
|
||||
}
|
||||
|
||||
return handleDefault(key, actualReturn, status);
|
||||
}
|
||||
|
||||
UObject*
|
||||
ICUService::handleDefault(const ICUServiceKey& /* key */, UnicodeString* /* actualIDReturn */, UErrorCode& /* status */) const
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UVector&
|
||||
ICUService::getVisibleIDs(UVector& result, UErrorCode& status) const {
|
||||
return getVisibleIDs(result, NULL, status);
|
||||
}
|
||||
|
||||
UVector&
|
||||
ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const
|
||||
{
|
||||
result.removeAllElements();
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
ICUService * ncthis = (ICUService*)this; // cast away semantic const
|
||||
{
|
||||
Mutex mutex(&ncthis->lock);
|
||||
const Hashtable* map = getVisibleIDMap(status);
|
||||
if (map != NULL) {
|
||||
ICUServiceKey* fallbackKey = createKey(matchID, status);
|
||||
|
||||
for (int32_t pos = -1;;) {
|
||||
const UHashElement* e = map->nextElement(pos);
|
||||
if (e == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
const UnicodeString* id = (const UnicodeString*)e->key.pointer;
|
||||
if (fallbackKey != NULL) {
|
||||
if (!fallbackKey->isFallbackOf(*id)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeString* idClone = new UnicodeString(*id);
|
||||
if (idClone == NULL || idClone->isBogus()) {
|
||||
delete idClone;
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
result.addElement(idClone, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete idClone;
|
||||
break;
|
||||
}
|
||||
}
|
||||
delete fallbackKey;
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
result.removeAllElements();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const Hashtable*
|
||||
ICUService::getVisibleIDMap(UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) return NULL;
|
||||
|
||||
// must only be called when lock is already held
|
||||
|
||||
ICUService* ncthis = (ICUService*)this; // cast away semantic const
|
||||
if (idCache == NULL) {
|
||||
ncthis->idCache = new Hashtable(status);
|
||||
if (idCache == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else if (factories != NULL) {
|
||||
for (int32_t pos = factories->size(); --pos >= 0;) {
|
||||
ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(pos);
|
||||
f->updateVisibleIDs(*idCache, status);
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
delete idCache;
|
||||
ncthis->idCache = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return idCache;
|
||||
}
|
||||
|
||||
|
||||
UnicodeString&
|
||||
ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result) const
|
||||
{
|
||||
return getDisplayName(id, result, Locale::getDefault());
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const
|
||||
{
|
||||
{
|
||||
ICUService* ncthis = (ICUService*)this; // cast away semantic const
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
Mutex mutex(&ncthis->lock);
|
||||
const Hashtable* map = getVisibleIDMap(status);
|
||||
if (map != NULL) {
|
||||
ICUServiceFactory* f = (ICUServiceFactory*)map->get(id);
|
||||
if (f != NULL) {
|
||||
f->getDisplayName(id, locale, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// fallback
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ICUServiceKey* fallbackKey = createKey(&id, status);
|
||||
while (fallbackKey->fallback()) {
|
||||
UnicodeString us;
|
||||
fallbackKey->currentID(us);
|
||||
f = (ICUServiceFactory*)map->get(us);
|
||||
if (f != NULL) {
|
||||
f->getDisplayName(id, locale, result);
|
||||
delete fallbackKey;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
delete fallbackKey;
|
||||
}
|
||||
}
|
||||
result.setToBogus();
|
||||
return result;
|
||||
}
|
||||
|
||||
UVector&
|
||||
ICUService::getDisplayNames(UVector& result, UErrorCode& status) const
|
||||
{
|
||||
return getDisplayNames(result, Locale::getDefault(), NULL, status);
|
||||
}
|
||||
|
||||
|
||||
UVector&
|
||||
ICUService::getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const
|
||||
{
|
||||
return getDisplayNames(result, locale, NULL, status);
|
||||
}
|
||||
|
||||
UVector&
|
||||
ICUService::getDisplayNames(UVector& result,
|
||||
const Locale& locale,
|
||||
const UnicodeString* matchID,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
result.removeAllElements();
|
||||
result.setDeleter(userv_deleteStringPair);
|
||||
if (U_SUCCESS(status)) {
|
||||
ICUService* ncthis = (ICUService*)this; // cast away semantic const
|
||||
Mutex mutex(&ncthis->lock);
|
||||
|
||||
if (dnCache != NULL && dnCache->locale != locale) {
|
||||
delete dnCache;
|
||||
ncthis->dnCache = NULL;
|
||||
}
|
||||
|
||||
if (dnCache == NULL) {
|
||||
const Hashtable* m = getVisibleIDMap(status);
|
||||
if (m != NULL) {
|
||||
ncthis->dnCache = new DNCache(locale);
|
||||
if (dnCache == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t pos = -1;
|
||||
const UHashElement* entry = NULL;
|
||||
while ((entry = m->nextElement(pos)) != NULL) {
|
||||
const UnicodeString* id = (const UnicodeString*)entry->key.pointer;
|
||||
ICUServiceFactory* f = (ICUServiceFactory*)entry->value.pointer;
|
||||
UnicodeString dname;
|
||||
f->getDisplayName(*id, locale, dname);
|
||||
if (dname.isBogus()) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
dnCache->cache.put(dname, (void*)id, status); // share pointer with visibleIDMap
|
||||
if (U_SUCCESS(status)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
delete dnCache;
|
||||
ncthis->dnCache = NULL;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ICUServiceKey* matchKey = createKey(matchID, status);
|
||||
/* To ensure that all elements in the hashtable are iterated, set pos to -1.
|
||||
* nextElement(pos) will skip the position at pos and begin the iteration
|
||||
* at the next position, which in this case will be 0.
|
||||
*/
|
||||
int32_t pos = -1;
|
||||
const UHashElement *entry = NULL;
|
||||
while ((entry = dnCache->cache.nextElement(pos)) != NULL) {
|
||||
const UnicodeString* id = (const UnicodeString*)entry->value.pointer;
|
||||
if (matchKey != NULL && !matchKey->isFallbackOf(*id)) {
|
||||
continue;
|
||||
}
|
||||
const UnicodeString* dn = (const UnicodeString*)entry->key.pointer;
|
||||
StringPair* sp = StringPair::create(*id, *dn, status);
|
||||
result.addElement(sp, status);
|
||||
if (U_FAILURE(status)) {
|
||||
result.removeAllElements();
|
||||
break;
|
||||
}
|
||||
}
|
||||
delete matchKey;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
URegistryKey
|
||||
ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status)
|
||||
{
|
||||
return registerInstance(objToAdopt, id, TRUE, status);
|
||||
}
|
||||
|
||||
URegistryKey
|
||||
ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status)
|
||||
{
|
||||
ICUServiceKey* key = createKey(&id, status);
|
||||
if (key != NULL) {
|
||||
UnicodeString canonicalID;
|
||||
key->canonicalID(canonicalID);
|
||||
delete key;
|
||||
|
||||
ICUServiceFactory* f = createSimpleFactory(objToAdopt, canonicalID, visible, status);
|
||||
if (f != NULL) {
|
||||
return registerFactory(f, status);
|
||||
}
|
||||
}
|
||||
delete objToAdopt;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ICUServiceFactory*
|
||||
ICUService::createSimpleFactory(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status)
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
if ((objToAdopt != NULL) && (!id.isBogus())) {
|
||||
return new SimpleFactory(objToAdopt, id, visible);
|
||||
}
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
URegistryKey
|
||||
ICUService::registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status)
|
||||
{
|
||||
if (U_SUCCESS(status) && factoryToAdopt != NULL) {
|
||||
Mutex mutex(&lock);
|
||||
|
||||
if (factories == NULL) {
|
||||
factories = new UVector(deleteUObject, NULL, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete factories;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
factories->insertElementAt(factoryToAdopt, 0, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
clearCaches();
|
||||
} else {
|
||||
delete factoryToAdopt;
|
||||
factoryToAdopt = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (factoryToAdopt != NULL) {
|
||||
notifyChanged();
|
||||
}
|
||||
|
||||
return (URegistryKey)factoryToAdopt;
|
||||
}
|
||||
|
||||
UBool
|
||||
ICUService::unregister(URegistryKey rkey, UErrorCode& status)
|
||||
{
|
||||
ICUServiceFactory *factory = (ICUServiceFactory*)rkey;
|
||||
UBool result = FALSE;
|
||||
if (factory != NULL && factories != NULL) {
|
||||
Mutex mutex(&lock);
|
||||
|
||||
if (factories->removeElement(factory)) {
|
||||
clearCaches();
|
||||
result = TRUE;
|
||||
} else {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
delete factory;
|
||||
}
|
||||
}
|
||||
if (result) {
|
||||
notifyChanged();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
ICUService::reset()
|
||||
{
|
||||
{
|
||||
Mutex mutex(&lock);
|
||||
reInitializeFactories();
|
||||
clearCaches();
|
||||
}
|
||||
notifyChanged();
|
||||
}
|
||||
|
||||
void
|
||||
ICUService::reInitializeFactories()
|
||||
{
|
||||
if (factories != NULL) {
|
||||
factories->removeAllElements();
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
ICUService::isDefault() const
|
||||
{
|
||||
return countFactories() == 0;
|
||||
}
|
||||
|
||||
ICUServiceKey*
|
||||
ICUService::createKey(const UnicodeString* id, UErrorCode& status) const
|
||||
{
|
||||
return (U_FAILURE(status) || id == NULL) ? NULL : new ICUServiceKey(*id);
|
||||
}
|
||||
|
||||
void
|
||||
ICUService::clearCaches()
|
||||
{
|
||||
// callers synchronize before use
|
||||
++timestamp;
|
||||
delete dnCache;
|
||||
dnCache = NULL;
|
||||
delete idCache;
|
||||
idCache = NULL;
|
||||
delete serviceCache; serviceCache = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
ICUService::clearServiceCache()
|
||||
{
|
||||
// callers synchronize before use
|
||||
delete serviceCache; serviceCache = NULL;
|
||||
}
|
||||
|
||||
UBool
|
||||
ICUService::acceptsListener(const EventListener& l) const
|
||||
{
|
||||
return l.getDynamicClassID() == ServiceListener::getStaticClassID();
|
||||
}
|
||||
|
||||
void
|
||||
ICUService::notifyListener(EventListener& l) const
|
||||
{
|
||||
((ServiceListener&)l).serviceChanged(*this);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
ICUService::getName(UnicodeString& result) const
|
||||
{
|
||||
return result.append(name);
|
||||
}
|
||||
|
||||
int32_t
|
||||
ICUService::countFactories() const
|
||||
{
|
||||
return factories == NULL ? 0 : factories->size();
|
||||
}
|
||||
|
||||
int32_t
|
||||
ICUService::getTimestamp() const
|
||||
{
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* UCONFIG_NO_SERVICE */
|
||||
#endif
|
996
source/common/serv.h
Normal file
996
source/common/serv.h
Normal file
|
@ -0,0 +1,996 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2007, International Business Machines Corporation. *
|
||||
* All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ICUSERV_H
|
||||
#define ICUSERV_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if UCONFIG_NO_SERVICE
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* Allow the declaration of APIs with pointers to ICUService
|
||||
* even when service is removed from the build.
|
||||
*/
|
||||
class ICUService;
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#else
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/umisc.h"
|
||||
|
||||
#include "hash.h"
|
||||
#include "uvector.h"
|
||||
#include "servnotf.h"
|
||||
|
||||
class ICUServiceTest;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class ICUServiceKey;
|
||||
class ICUServiceFactory;
|
||||
class SimpleFactory;
|
||||
class ServiceListener;
|
||||
class ICUService;
|
||||
|
||||
class DNCache;
|
||||
|
||||
/*******************************************************************
|
||||
* ICUServiceKey
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>ICUServiceKeys are used to communicate with factories to
|
||||
* generate an instance of the service. ICUServiceKeys define how
|
||||
* ids are canonicalized, provide both a current id and a current
|
||||
* descriptor to use in querying the cache and factories, and
|
||||
* determine the fallback strategy.</p>
|
||||
*
|
||||
* <p>ICUServiceKeys provide both a currentDescriptor and a currentID.
|
||||
* The descriptor contains an optional prefix, followed by '/'
|
||||
* and the currentID. Factories that handle complex keys,
|
||||
* for example number format factories that generate multiple
|
||||
* kinds of formatters for the same locale, use the descriptor
|
||||
* to provide a fully unique identifier for the service object,
|
||||
* while using the currentID (in this case, the locale string),
|
||||
* as the visible IDs that can be localized.</p>
|
||||
*
|
||||
* <p>The default implementation of ICUServiceKey has no fallbacks and
|
||||
* has no custom descriptors.</p>
|
||||
*/
|
||||
class U_COMMON_API ICUServiceKey : public UObject {
|
||||
private:
|
||||
const UnicodeString _id;
|
||||
|
||||
protected:
|
||||
static const UChar PREFIX_DELIMITER;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Construct a key from an id.</p>
|
||||
*
|
||||
* @param id the ID from which to construct the key.
|
||||
*/
|
||||
ICUServiceKey(const UnicodeString& id);
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~ICUServiceKey();
|
||||
|
||||
/**
|
||||
* <p>Return the original ID used to construct this key.</p>
|
||||
*
|
||||
* @return the ID used to construct this key.
|
||||
*/
|
||||
virtual const UnicodeString& getID() const;
|
||||
|
||||
/**
|
||||
* <p>Return the canonical version of the original ID. This implementation
|
||||
* appends the original ID to result. Result is returned as a convenience.</p>
|
||||
*
|
||||
* @param result the output parameter to which the id will be appended.
|
||||
* @return the modified result.
|
||||
*/
|
||||
virtual UnicodeString& canonicalID(UnicodeString& result) const;
|
||||
|
||||
/**
|
||||
* <p>Return the (canonical) current ID. This implementation appends
|
||||
* the canonical ID to result. Result is returned as a convenience.</p>
|
||||
*
|
||||
* @param result the output parameter to which the current id will be appended.
|
||||
* @return the modified result.
|
||||
*/
|
||||
virtual UnicodeString& currentID(UnicodeString& result) const;
|
||||
|
||||
/**
|
||||
* <p>Return the current descriptor. This implementation appends
|
||||
* the current descriptor to result. Result is returned as a convenience.</p>
|
||||
*
|
||||
* <p>The current descriptor is used to fully
|
||||
* identify an instance of the service in the cache. A
|
||||
* factory may handle all descriptors for an ID, or just a
|
||||
* particular descriptor. The factory can either parse the
|
||||
* descriptor or use custom API on the key in order to
|
||||
* instantiate the service.</p>
|
||||
*
|
||||
* @param result the output parameter to which the current id will be appended.
|
||||
* @return the modified result.
|
||||
*/
|
||||
virtual UnicodeString& currentDescriptor(UnicodeString& result) const;
|
||||
|
||||
/**
|
||||
* <p>If the key has a fallback, modify the key and return true,
|
||||
* otherwise return false. The current ID will change if there
|
||||
* is a fallback. No currentIDs should be repeated, and fallback
|
||||
* must eventually return false. This implementation has no fallbacks
|
||||
* and always returns false.</p>
|
||||
*
|
||||
* @return TRUE if the ICUServiceKey changed to a valid fallback value.
|
||||
*/
|
||||
virtual UBool fallback();
|
||||
|
||||
/**
|
||||
* <p>Return TRUE if a key created from id matches, or would eventually
|
||||
* fallback to match, the canonical ID of this ICUServiceKey.</p>
|
||||
*
|
||||
* @param id the id to test.
|
||||
* @return TRUE if this ICUServiceKey's canonical ID is a fallback of id.
|
||||
*/
|
||||
virtual UBool isFallbackOf(const UnicodeString& id) const;
|
||||
|
||||
/**
|
||||
* <p>Return the prefix. This implementation leaves result unchanged.
|
||||
* Result is returned as a convenience.</p>
|
||||
*
|
||||
* @param result the output parameter to which the prefix will be appended.
|
||||
* @return the modified result.
|
||||
*/
|
||||
virtual UnicodeString& prefix(UnicodeString& result) const;
|
||||
|
||||
/**
|
||||
* <p>A utility to parse the prefix out of a descriptor string. Only
|
||||
* the (undelimited) prefix, if any, remains in result. Result is returned as a
|
||||
* convenience.</p>
|
||||
*
|
||||
* @param result an input/output parameter that on entry is a descriptor, and
|
||||
* on exit is the prefix of that descriptor.
|
||||
* @return the modified result.
|
||||
*/
|
||||
static UnicodeString& parsePrefix(UnicodeString& result);
|
||||
|
||||
/**
|
||||
* <p>A utility to parse the suffix out of a descriptor string. Only
|
||||
* the (undelimited) suffix, if any, remains in result. Result is returned as a
|
||||
* convenience.</p>
|
||||
*
|
||||
* @param result an input/output parameter that on entry is a descriptor, and
|
||||
* on exit is the suffix of that descriptor.
|
||||
* @return the modified result.
|
||||
*/
|
||||
static UnicodeString& parseSuffix(UnicodeString& result);
|
||||
|
||||
public:
|
||||
/**
|
||||
* UObject RTTI boilerplate.
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
/**
|
||||
* UObject RTTI boilerplate.
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
public:
|
||||
virtual UnicodeString& debug(UnicodeString& result) const;
|
||||
virtual UnicodeString& debugClass(UnicodeString& result) const;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* ICUServiceFactory
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>An implementing ICUServiceFactory generates the service objects maintained by the
|
||||
* service. A factory generates a service object from a key,
|
||||
* updates id->factory mappings, and returns the display name for
|
||||
* a supported id.</p>
|
||||
*/
|
||||
class U_COMMON_API ICUServiceFactory : public UObject {
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Create a service object from the key, if this factory
|
||||
* supports the key. Otherwise, return NULL.</p>
|
||||
*
|
||||
* <p>If the factory supports the key, then it can call
|
||||
* the service's getKey(ICUServiceKey, String[], ICUServiceFactory) method
|
||||
* passing itself as the factory to get the object that
|
||||
* the service would have created prior to the factory's
|
||||
* registration with the service. This can change the
|
||||
* key, so any information required from the key should
|
||||
* be extracted before making such a callback.</p>
|
||||
*
|
||||
* @param key the service key.
|
||||
* @param service the service with which this factory is registered.
|
||||
* @param status the error code status.
|
||||
* @return the service object, or NULL if the factory does not support the key.
|
||||
*/
|
||||
virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Update result to reflect the IDs (not descriptors) that this
|
||||
* factory publicly handles. Result contains mappings from ID to
|
||||
* factory. On entry it will contain all (visible) mappings from
|
||||
* previously-registered factories.</p>
|
||||
*
|
||||
* <p>This function, together with getDisplayName, are used to
|
||||
* support ICUService::getDisplayNames. The factory determines
|
||||
* which IDs (of those it supports) it will make visible, and of
|
||||
* those, which it will provide localized display names for. In
|
||||
* most cases it will register mappings from all IDs it supports
|
||||
* to itself.</p>
|
||||
*
|
||||
* @param result the mapping table to update.
|
||||
* @param status the error code status.
|
||||
*/
|
||||
virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Return, in result, the display name of the id in the provided locale.
|
||||
* This is an id, not a descriptor. If the id is
|
||||
* not visible, sets result to bogus. If the
|
||||
* incoming result is bogus, it remains bogus. Result is returned as a
|
||||
* convenience. Results are not defined if id is not one supported by this
|
||||
* factory.</p>
|
||||
*
|
||||
* @param id a visible id supported by this factory.
|
||||
* @param locale the locale for which to generate the corresponding localized display name.
|
||||
* @param result output parameter to hold the display name.
|
||||
* @return result.
|
||||
*/
|
||||
virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const = 0;
|
||||
};
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>A default implementation of factory. This provides default
|
||||
* implementations for subclasses, and implements a singleton
|
||||
* factory that matches a single ID and returns a single
|
||||
* (possibly deferred-initialized) instance. This implements
|
||||
* updateVisibleIDs to add a mapping from its ID to itself
|
||||
* if visible is true, or to remove any existing mapping
|
||||
* for its ID if visible is false. No localization of display
|
||||
* names is performed.</p>
|
||||
*/
|
||||
class U_COMMON_API SimpleFactory : public ICUServiceFactory {
|
||||
protected:
|
||||
UObject* _instance;
|
||||
const UnicodeString _id;
|
||||
const UBool _visible;
|
||||
|
||||
public:
|
||||
/**
|
||||
* <p>Construct a SimpleFactory that maps a single ID to a single
|
||||
* service instance. If visible is TRUE, the ID will be visible.
|
||||
* The instance must not be NULL. The SimpleFactory will adopt
|
||||
* the instance, which must not be changed subsequent to this call.</p>
|
||||
*
|
||||
* @param instanceToAdopt the service instance to adopt.
|
||||
* @param id the ID to assign to this service instance.
|
||||
* @param visible if TRUE, the ID will be visible.
|
||||
*/
|
||||
SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible = TRUE);
|
||||
|
||||
/**
|
||||
* <p>Destructor.</p>
|
||||
*/
|
||||
virtual ~SimpleFactory();
|
||||
|
||||
/**
|
||||
* <p>This implementation returns a clone of the service instance if the factory's ID is equal to
|
||||
* the key's currentID. Service and prefix are ignored.</p>
|
||||
*
|
||||
* @param key the service key.
|
||||
* @param service the service with which this factory is registered.
|
||||
* @param status the error code status.
|
||||
* @return the service object, or NULL if the factory does not support the key.
|
||||
*/
|
||||
virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>This implementation adds a mapping from ID -> this to result if visible is TRUE,
|
||||
* otherwise it removes ID from result.</p>
|
||||
*
|
||||
* @param result the mapping table to update.
|
||||
* @param status the error code status.
|
||||
*/
|
||||
virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>This implementation returns the factory ID if it equals id and visible is TRUE,
|
||||
* otherwise it returns the empty string. (This implementation provides
|
||||
* no localized id information.)</p>
|
||||
*
|
||||
* @param id a visible id supported by this factory.
|
||||
* @param locale the locale for which to generate the corresponding localized display name.
|
||||
* @param result output parameter to hold the display name.
|
||||
* @return result.
|
||||
*/
|
||||
virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
|
||||
|
||||
public:
|
||||
/**
|
||||
* UObject RTTI boilerplate.
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
/**
|
||||
* UObject RTTI boilerplate.
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
public:
|
||||
virtual UnicodeString& debug(UnicodeString& toAppendTo) const;
|
||||
virtual UnicodeString& debugClass(UnicodeString& toAppendTo) const;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>ServiceListener is the listener that ICUService provides by default.
|
||||
* ICUService will notifiy this listener when factories are added to
|
||||
* or removed from the service. Subclasses can provide
|
||||
* different listener interfaces that extend EventListener, and modify
|
||||
* acceptsListener and notifyListener as appropriate.</p>
|
||||
*/
|
||||
class U_COMMON_API ServiceListener : public EventListener {
|
||||
public:
|
||||
/**
|
||||
* <p>This method is called when the service changes. At the time of the
|
||||
* call this listener is registered with the service. It must
|
||||
* not modify the notifier in the context of this call.</p>
|
||||
*
|
||||
* @param service the service that changed.
|
||||
*/
|
||||
virtual void serviceChanged(const ICUService& service) const = 0;
|
||||
|
||||
public:
|
||||
/**
|
||||
* UObject RTTI boilerplate.
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
/**
|
||||
* UObject RTTI boilerplate.
|
||||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>A StringPair holds a displayName/ID pair. ICUService uses it
|
||||
* as the array elements returned by getDisplayNames.
|
||||
*/
|
||||
class U_COMMON_API StringPair : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* <p>The display name of the pair.</p>
|
||||
*/
|
||||
const UnicodeString displayName;
|
||||
|
||||
/**
|
||||
* <p>The ID of the pair.</p>
|
||||
*/
|
||||
const UnicodeString id;
|
||||
|
||||
/**
|
||||
* <p>Creates a string pair from a displayName and an ID.</p>
|
||||
*
|
||||
* @param displayName the displayName.
|
||||
* @param id the ID.
|
||||
* @param status the error code status.
|
||||
* @return a StringPair if the creation was successful, otherwise NULL.
|
||||
*/
|
||||
static StringPair* create(const UnicodeString& displayName,
|
||||
const UnicodeString& id,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Return TRUE if either string of the pair is bogus.</p>
|
||||
* @return TRUE if either string of the pair is bogus.
|
||||
*/
|
||||
UBool isBogus() const;
|
||||
|
||||
private:
|
||||
StringPair(const UnicodeString& displayName, const UnicodeString& id);
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* ICUService
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>A Service provides access to service objects that implement a
|
||||
* particular service, e.g. transliterators. Users provide a String
|
||||
* id (for example, a locale string) to the service, and get back an
|
||||
* object for that id. Service objects can be any kind of object. A
|
||||
* new service object is returned for each query. The caller is
|
||||
* responsible for deleting it.</p>
|
||||
*
|
||||
* <p>Services 'canonicalize' the query ID and use the canonical ID to
|
||||
* query for the service. The service also defines a mechanism to
|
||||
* 'fallback' the ID multiple times. Clients can optionally request
|
||||
* the actual ID that was matched by a query when they use an ID to
|
||||
* retrieve a service object.</p>
|
||||
*
|
||||
* <p>Service objects are instantiated by ICUServiceFactory objects
|
||||
* registered with the service. The service queries each
|
||||
* ICUServiceFactory in turn, from most recently registered to
|
||||
* earliest registered, until one returns a service object. If none
|
||||
* responds with a service object, a fallback ID is generated, and the
|
||||
* process repeats until a service object is returned or until the ID
|
||||
* has no further fallbacks.</p>
|
||||
*
|
||||
* <p>In ICU 2.4, UObject (the base class of service instances) does
|
||||
* not define a polymorphic clone function. ICUService uses clones to
|
||||
* manage ownership. Thus, for now, ICUService defines an abstract
|
||||
* method, cloneInstance, that clients must implement to create clones
|
||||
* of the service instances. This may change in future releases of
|
||||
* ICU.</p>
|
||||
*
|
||||
* <p>ICUServiceFactories can be dynamically registered and
|
||||
* unregistered with the service. When registered, an
|
||||
* ICUServiceFactory is installed at the head of the factory list, and
|
||||
* so gets 'first crack' at any keys or fallback keys. When
|
||||
* unregistered, it is removed from the service and can no longer be
|
||||
* located through it. Service objects generated by this factory and
|
||||
* held by the client are unaffected.</p>
|
||||
*
|
||||
* <p>If a service has variants (e.g., the different variants of
|
||||
* BreakIterator) an ICUServiceFactory can use the prefix of the
|
||||
* ICUServiceKey to determine the variant of a service to generate.
|
||||
* If it does not support all variants, it can request
|
||||
* previously-registered factories to handle the ones it does not
|
||||
* support.</p>
|
||||
*
|
||||
* <p>ICUService uses ICUServiceKeys to query factories and perform
|
||||
* fallback. The ICUServiceKey defines the canonical form of the ID,
|
||||
* and implements the fallback strategy. Custom ICUServiceKeys can be
|
||||
* defined that parse complex IDs into components that
|
||||
* ICUServiceFactories can more easily use. The ICUServiceKey can
|
||||
* cache the results of this parsing to save repeated effort.
|
||||
* ICUService provides convenience APIs that take UnicodeStrings and
|
||||
* generate default ICUServiceKeys for use in querying.</p>
|
||||
*
|
||||
* <p>ICUService provides API to get the list of IDs publicly
|
||||
* supported by the service (although queries aren't restricted to
|
||||
* this list). This list contains only 'simple' IDs, and not fully
|
||||
* unique IDs. ICUServiceFactories are associated with each simple ID
|
||||
* and the responsible factory can also return a human-readable
|
||||
* localized version of the simple ID, for use in user interfaces.
|
||||
* ICUService can also provide an array of the all the localized
|
||||
* visible IDs and their corresponding internal IDs.</p>
|
||||
*
|
||||
* <p>ICUService implements ICUNotifier, so that clients can register
|
||||
* to receive notification when factories are added or removed from
|
||||
* the service. ICUService provides a default EventListener
|
||||
* subinterface, ServiceListener, which can be registered with the
|
||||
* service. When the service changes, the ServiceListener's
|
||||
* serviceChanged method is called with the service as the
|
||||
* argument.</p>
|
||||
*
|
||||
* <p>The ICUService API is both rich and generic, and it is expected
|
||||
* that most implementations will statically 'wrap' ICUService to
|
||||
* present a more appropriate API-- for example, to declare the type
|
||||
* of the objects returned from get, to limit the factories that can
|
||||
* be registered with the service, or to define their own listener
|
||||
* interface with a custom callback method. They might also customize
|
||||
* ICUService by overriding it, for example, to customize the
|
||||
* ICUServiceKey and fallback strategy. ICULocaleService is a
|
||||
* subclass of ICUService that uses Locale names as IDs and uses
|
||||
* ICUServiceKeys that implement the standard resource bundle fallback
|
||||
* strategy. Most clients will wish to subclass it instead of
|
||||
* ICUService.</p>
|
||||
*/
|
||||
class U_COMMON_API ICUService : public ICUNotifier {
|
||||
protected:
|
||||
/**
|
||||
* Name useful for debugging.
|
||||
*/
|
||||
const UnicodeString name;
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* single lock used by this service.
|
||||
*/
|
||||
UMTX lock;
|
||||
|
||||
/**
|
||||
* Timestamp so iterators can be fail-fast.
|
||||
*/
|
||||
uint32_t timestamp;
|
||||
|
||||
/**
|
||||
* All the factories registered with this service.
|
||||
*/
|
||||
UVector* factories;
|
||||
|
||||
/**
|
||||
* The service cache.
|
||||
*/
|
||||
Hashtable* serviceCache;
|
||||
|
||||
/**
|
||||
* The ID cache.
|
||||
*/
|
||||
Hashtable* idCache;
|
||||
|
||||
/**
|
||||
* The name cache.
|
||||
*/
|
||||
DNCache* dnCache;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
public:
|
||||
/**
|
||||
* <p>Construct a new ICUService.</p>
|
||||
*/
|
||||
ICUService();
|
||||
|
||||
/**
|
||||
* <p>Construct with a name (useful for debugging).</p>
|
||||
*
|
||||
* @param name a name to use in debugging.
|
||||
*/
|
||||
ICUService(const UnicodeString& name);
|
||||
|
||||
/**
|
||||
* <p>Destructor.</p>
|
||||
*/
|
||||
virtual ~ICUService();
|
||||
|
||||
/**
|
||||
* <p>Return the name of this service. This will be the empty string if none was assigned.
|
||||
* Returns result as a convenience.</p>
|
||||
*
|
||||
* @param result an output parameter to contain the name of this service.
|
||||
* @return the name of this service.
|
||||
*/
|
||||
UnicodeString& getName(UnicodeString& result) const;
|
||||
|
||||
/**
|
||||
* <p>Convenience override for get(ICUServiceKey&, UnicodeString*). This uses
|
||||
* createKey to create a key for the provided descriptor.</p>
|
||||
*
|
||||
* @param descriptor the descriptor.
|
||||
* @param status the error code status.
|
||||
* @return the service instance, or NULL.
|
||||
*/
|
||||
UObject* get(const UnicodeString& descriptor, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>Convenience override for get(ICUServiceKey&, UnicodeString*). This uses
|
||||
* createKey to create a key from the provided descriptor.</p>
|
||||
*
|
||||
* @param descriptor the descriptor.
|
||||
* @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
|
||||
* @param status the error code status.
|
||||
* @return the service instance, or NULL.
|
||||
*/
|
||||
UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>Convenience override for get(ICUServiceKey&, UnicodeString*).</p>
|
||||
*
|
||||
* @param key the key.
|
||||
* @param status the error code status.
|
||||
* @return the service instance, or NULL.
|
||||
*/
|
||||
UObject* getKey(ICUServiceKey& key, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>Given a key, return a service object, and, if actualReturn
|
||||
* is not NULL, the descriptor with which it was found in the
|
||||
* first element of actualReturn. If no service object matches
|
||||
* this key, returns NULL and leaves actualReturn unchanged.</p>
|
||||
*
|
||||
* <p>This queries the cache using the key's descriptor, and if no
|
||||
* object in the cache matches, tries the key on each
|
||||
* registered factory, in order. If none generates a service
|
||||
* object for the key, repeats the process with each fallback of
|
||||
* the key, until either a factory returns a service object, or the key
|
||||
* has no fallback. If no object is found, the result of handleDefault
|
||||
* is returned.</p>
|
||||
*
|
||||
* <p>Subclasses can override this method to further customize the
|
||||
* result before returning it.
|
||||
*
|
||||
* @param key the key.
|
||||
* @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
|
||||
* @param status the error code status.
|
||||
* @return the service instance, or NULL.
|
||||
*/
|
||||
virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>This version of getKey is only called by ICUServiceFactories within the scope
|
||||
* of a previous getKey call, to determine what previously-registered factories would
|
||||
* have returned. For details, see getKey(ICUServiceKey&, UErrorCode&). Subclasses
|
||||
* should not call it directly, but call through one of the other get functions.</p>
|
||||
*
|
||||
* @param key the key.
|
||||
* @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
|
||||
* @param factory the factory making the recursive call.
|
||||
* @param status the error code status.
|
||||
* @return the service instance, or NULL.
|
||||
*/
|
||||
UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>Convenience override for getVisibleIDs(String) that passes null
|
||||
* as the fallback, thus returning all visible IDs.</p>
|
||||
*
|
||||
* @param result a vector to hold the returned IDs.
|
||||
* @param status the error code status.
|
||||
* @return the result vector.
|
||||
*/
|
||||
UVector& getVisibleIDs(UVector& result, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>Return a snapshot of the visible IDs for this service. This
|
||||
* list will not change as ICUServiceFactories are added or removed, but the
|
||||
* supported IDs will, so there is no guarantee that all and only
|
||||
* the IDs in the returned list will be visible and supported by the
|
||||
* service in subsequent calls.</p>
|
||||
*
|
||||
* <p>The IDs are returned as pointers to UnicodeStrings. The
|
||||
* caller owns the IDs. Previous contents of result are discarded before
|
||||
* new elements, if any, are added.</p>
|
||||
*
|
||||
* <p>matchID is passed to createKey to create a key. If the key
|
||||
* is not NULL, its isFallbackOf method is used to filter out IDs
|
||||
* that don't match the key or have it as a fallback.</p>
|
||||
*
|
||||
* @param result a vector to hold the returned IDs.
|
||||
* @param matchID an ID used to filter the result, or NULL if all IDs are desired.
|
||||
* @param status the error code status.
|
||||
* @return the result vector.
|
||||
*/
|
||||
UVector& getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>Convenience override for getDisplayName(const UnicodeString&, const Locale&, UnicodeString&) that
|
||||
* uses the current default locale.</p>
|
||||
*
|
||||
* @param id the ID for which to retrieve the localized displayName.
|
||||
* @param result an output parameter to hold the display name.
|
||||
* @return the modified result.
|
||||
*/
|
||||
UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result) const;
|
||||
|
||||
/**
|
||||
* <p>Given a visible ID, return the display name in the requested locale.
|
||||
* If there is no directly supported ID corresponding to this ID, result is
|
||||
* set to bogus.</p>
|
||||
*
|
||||
* @param id the ID for which to retrieve the localized displayName.
|
||||
* @param result an output parameter to hold the display name.
|
||||
* @param locale the locale in which to localize the ID.
|
||||
* @return the modified result.
|
||||
*/
|
||||
UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const;
|
||||
|
||||
/**
|
||||
* <p>Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that
|
||||
* uses the current default Locale as the locale and NULL for
|
||||
* the matchID.</p>
|
||||
*
|
||||
* @param result a vector to hold the returned displayName/id StringPairs.
|
||||
* @param status the error code status.
|
||||
* @return the modified result vector.
|
||||
*/
|
||||
UVector& getDisplayNames(UVector& result, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that
|
||||
* uses NULL for the matchID.</p>
|
||||
*
|
||||
* @param result a vector to hold the returned displayName/id StringPairs.
|
||||
* @param locale the locale in which to localize the ID.
|
||||
* @param status the error code status.
|
||||
* @return the modified result vector.
|
||||
*/
|
||||
UVector& getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>Return a snapshot of the mapping from display names to visible
|
||||
* IDs for this service. This set will not change as factories
|
||||
* are added or removed, but the supported IDs will, so there is
|
||||
* no guarantee that all and only the IDs in the returned map will
|
||||
* be visible and supported by the service in subsequent calls,
|
||||
* nor is there any guarantee that the current display names match
|
||||
* those in the result.</p>
|
||||
*
|
||||
* <p>The names are returned as pointers to StringPairs, which
|
||||
* contain both the displayName and the corresponding ID. The
|
||||
* caller owns the StringPairs. Previous contents of result are
|
||||
* discarded before new elements, if any, are added.</p>
|
||||
*
|
||||
* <p>matchID is passed to createKey to create a key. If the key
|
||||
* is not NULL, its isFallbackOf method is used to filter out IDs
|
||||
* that don't match the key or have it as a fallback.</p>
|
||||
*
|
||||
* @param result a vector to hold the returned displayName/id StringPairs.
|
||||
* @param locale the locale in which to localize the ID.
|
||||
* @param matchID an ID used to filter the result, or NULL if all IDs are desired.
|
||||
* @param status the error code status.
|
||||
* @return the result vector. */
|
||||
UVector& getDisplayNames(UVector& result,
|
||||
const Locale& locale,
|
||||
const UnicodeString* matchID,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>A convenience override of registerInstance(UObject*, const UnicodeString&, UBool)
|
||||
* that defaults visible to TRUE.</p>
|
||||
*
|
||||
* @param objToAdopt the object to register and adopt.
|
||||
* @param id the ID to assign to this object.
|
||||
* @param status the error code status.
|
||||
* @return a registry key that can be passed to unregister to unregister
|
||||
* (and discard) this instance.
|
||||
*/
|
||||
URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Register a service instance with the provided ID. The ID will be
|
||||
* canonicalized. The canonicalized ID will be returned by
|
||||
* getVisibleIDs if visible is TRUE. The service instance will be adopted and
|
||||
* must not be modified subsequent to this call.</p>
|
||||
*
|
||||
* <p>This issues a serviceChanged notification to registered listeners.</p>
|
||||
*
|
||||
* <p>This implementation wraps the object using
|
||||
* createSimpleFactory, and calls registerFactory.</p>
|
||||
*
|
||||
* @param objToAdopt the object to register and adopt.
|
||||
* @param id the ID to assign to this object.
|
||||
* @param visible TRUE if getVisibleIDs is to return this ID.
|
||||
* @param status the error code status.
|
||||
* @return a registry key that can be passed to unregister() to unregister
|
||||
* (and discard) this instance.
|
||||
*/
|
||||
virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Register an ICUServiceFactory. Returns a registry key that
|
||||
* can be used to unregister the factory. The factory
|
||||
* must not be modified subsequent to this call. The service owns
|
||||
* all registered factories. In case of an error, the factory is
|
||||
* deleted.</p>
|
||||
*
|
||||
* <p>This issues a serviceChanged notification to registered listeners.</p>
|
||||
*
|
||||
* <p>The default implementation accepts all factories.</p>
|
||||
*
|
||||
* @param factoryToAdopt the factory to register and adopt.
|
||||
* @param status the error code status.
|
||||
* @return a registry key that can be passed to unregister to unregister
|
||||
* (and discard) this factory.
|
||||
*/
|
||||
virtual URegistryKey registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Unregister a factory using a registry key returned by
|
||||
* registerInstance or registerFactory. After a successful call,
|
||||
* the factory will be removed from the service factory list and
|
||||
* deleted, and the key becomes invalid.</p>
|
||||
*
|
||||
* <p>This issues a serviceChanged notification to registered
|
||||
* listeners.</p>
|
||||
*
|
||||
* @param rkey the registry key.
|
||||
* @param status the error code status.
|
||||
* @return TRUE if the call successfully unregistered the factory.
|
||||
*/
|
||||
virtual UBool unregister(URegistryKey rkey, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* </p>Reset the service to the default factories. The factory
|
||||
* lock is acquired and then reInitializeFactories is called.</p>
|
||||
*
|
||||
* <p>This issues a serviceChanged notification to registered listeners.</p>
|
||||
*/
|
||||
virtual void reset(void);
|
||||
|
||||
/**
|
||||
* <p>Return TRUE if the service is in its default state.</p>
|
||||
*
|
||||
* <p>The default implementation returns TRUE if there are no
|
||||
* factories registered.</p>
|
||||
*/
|
||||
virtual UBool isDefault(void) const;
|
||||
|
||||
/**
|
||||
* <p>Create a key from an ID. If ID is NULL, returns NULL.</p>
|
||||
*
|
||||
* <p>The default implementation creates an ICUServiceKey instance.
|
||||
* Subclasses can override to define more useful keys appropriate
|
||||
* to the factories they accept.</p>
|
||||
*
|
||||
* @param a pointer to the ID for which to create a default ICUServiceKey.
|
||||
* @param status the error code status.
|
||||
* @return the ICUServiceKey corresponding to ID, or NULL.
|
||||
*/
|
||||
virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>Clone object so that caller can own the copy. In ICU2.4, UObject doesn't define
|
||||
* clone, so we need an instance-aware method that knows how to do this.
|
||||
* This is public so factories can call it, but should really be protected.</p>
|
||||
*
|
||||
* @param instance the service instance to clone.
|
||||
* @return a clone of the passed-in instance, or NULL if cloning was unsuccessful.
|
||||
*/
|
||||
virtual UObject* cloneInstance(UObject* instance) const = 0;
|
||||
|
||||
|
||||
/************************************************************************
|
||||
* Subclassing API
|
||||
*/
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* <p>Create a factory that wraps a single service object. Called by registerInstance.</p>
|
||||
*
|
||||
* <p>The default implementation returns an instance of SimpleFactory.</p>
|
||||
*
|
||||
* @param instanceToAdopt the service instance to adopt.
|
||||
* @param id the ID to assign to this service instance.
|
||||
* @param visible if TRUE, the ID will be visible.
|
||||
* @param status the error code status.
|
||||
* @return an instance of ICUServiceFactory that maps this instance to the provided ID.
|
||||
*/
|
||||
virtual ICUServiceFactory* createSimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* <p>Reinitialize the factory list to its default state. After this call, isDefault()
|
||||
* must return TRUE.</p>
|
||||
*
|
||||
* <p>This issues a serviceChanged notification to registered listeners.</p>
|
||||
*
|
||||
* <p>The default implementation clears the factory list.
|
||||
* Subclasses can override to provide other default initialization
|
||||
* of the factory list. Subclasses must not call this method
|
||||
* directly, since it must only be called while holding write
|
||||
* access to the factory list.</p>
|
||||
*/
|
||||
virtual void reInitializeFactories(void);
|
||||
|
||||
/**
|
||||
* <p>Default handler for this service if no factory in the factory list
|
||||
* handled the key passed to getKey.</p>
|
||||
*
|
||||
* <p>The default implementation returns NULL.</p>
|
||||
*
|
||||
* @param key the key.
|
||||
* @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
|
||||
* @param status the error code status.
|
||||
* @return the service instance, or NULL.
|
||||
*/
|
||||
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>Clear caches maintained by this service.</p>
|
||||
*
|
||||
* <p>Subclasses can override if they implement additional caches
|
||||
* that need to be cleared when the service changes. Subclasses
|
||||
* should generally not call this method directly, as it must only
|
||||
* be called while synchronized on the factory lock.</p>
|
||||
*/
|
||||
virtual void clearCaches(void);
|
||||
|
||||
/**
|
||||
* <p>Return true if the listener is accepted.</p>
|
||||
*
|
||||
* <p>The default implementation accepts the listener if it is
|
||||
* a ServiceListener. Subclasses can override this to accept
|
||||
* different listeners.</p>
|
||||
*
|
||||
* @param l the listener to test.
|
||||
* @return TRUE if the service accepts the listener.
|
||||
*/
|
||||
virtual UBool acceptsListener(const EventListener& l) const;
|
||||
|
||||
/**
|
||||
* <p>Notify the listener of a service change.</p>
|
||||
*
|
||||
* <p>The default implementation assumes a ServiceListener.
|
||||
* If acceptsListener has been overridden to accept different
|
||||
* listeners, this should be overridden as well.</p>
|
||||
*
|
||||
* @param l the listener to notify.
|
||||
*/
|
||||
virtual void notifyListener(EventListener& l) const;
|
||||
|
||||
/************************************************************************
|
||||
* Utilities for subclasses.
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>Clear only the service cache.</p>
|
||||
*
|
||||
* <p>This can be called by subclasses when a change affects the service
|
||||
* cache but not the ID caches, e.g., when the default locale changes
|
||||
* the resolution of IDs also changes, requiring the cache to be
|
||||
* flushed, but not the visible IDs themselves.</p>
|
||||
*/
|
||||
void clearServiceCache(void);
|
||||
|
||||
/**
|
||||
* <p>Return a map from visible IDs to factories.
|
||||
* This must only be called when the mutex is held.</p>
|
||||
*
|
||||
* @param status the error code status.
|
||||
* @return a Hashtable containing mappings from visible
|
||||
* IDs to factories.
|
||||
*/
|
||||
const Hashtable* getVisibleIDMap(UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* <p>Allow subclasses to read the time stamp.</p>
|
||||
*
|
||||
* @return the timestamp.
|
||||
*/
|
||||
int32_t getTimestamp(void) const;
|
||||
|
||||
/**
|
||||
* <p>Return the number of registered factories.</p>
|
||||
*
|
||||
* @return the number of factories registered at the time of the call.
|
||||
*/
|
||||
int32_t countFactories(void) const;
|
||||
|
||||
private:
|
||||
|
||||
friend class ::ICUServiceTest; // give tests access to countFactories.
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* UCONFIG_NO_SERVICE */
|
||||
#endif
|
||||
|
||||
/* ICUSERV_H */
|
||||
#endif
|
||||
|
187
source/common/servlk.cpp
Normal file
187
source/common/servlk.cpp
Normal file
|
@ -0,0 +1,187 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
|
||||
#include "unicode/resbund.h"
|
||||
#include "uresimp.h"
|
||||
#include "cmemory.h"
|
||||
#include "servloc.h"
|
||||
#include "ustrfmt.h"
|
||||
#include "uhash.h"
|
||||
#include "charstr.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uassert.h"
|
||||
|
||||
#define UNDERSCORE_CHAR ((UChar)0x005f)
|
||||
#define AT_SIGN_CHAR ((UChar)64)
|
||||
#define PERIOD_CHAR ((UChar)46)
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
LocaleKey*
|
||||
LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID,
|
||||
const UnicodeString* canonicalFallbackID,
|
||||
UErrorCode& status)
|
||||
{
|
||||
return LocaleKey::createWithCanonicalFallback(primaryID, canonicalFallbackID, KIND_ANY, status);
|
||||
}
|
||||
|
||||
LocaleKey*
|
||||
LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID,
|
||||
const UnicodeString* canonicalFallbackID,
|
||||
int32_t kind,
|
||||
UErrorCode& status)
|
||||
{
|
||||
if (primaryID == NULL || U_FAILURE(status)) {
|
||||
return NULL;
|
||||
}
|
||||
UnicodeString canonicalPrimaryID;
|
||||
LocaleUtility::canonicalLocaleString(primaryID, canonicalPrimaryID);
|
||||
return new LocaleKey(*primaryID, canonicalPrimaryID, canonicalFallbackID, kind);
|
||||
}
|
||||
|
||||
LocaleKey::LocaleKey(const UnicodeString& primaryID,
|
||||
const UnicodeString& canonicalPrimaryID,
|
||||
const UnicodeString* canonicalFallbackID,
|
||||
int32_t kind)
|
||||
: ICUServiceKey(primaryID)
|
||||
, _kind(kind)
|
||||
, _primaryID(canonicalPrimaryID)
|
||||
, _fallbackID()
|
||||
, _currentID()
|
||||
{
|
||||
_fallbackID.setToBogus();
|
||||
if (_primaryID.length() != 0) {
|
||||
if (canonicalFallbackID != NULL && _primaryID != *canonicalFallbackID) {
|
||||
_fallbackID = *canonicalFallbackID;
|
||||
}
|
||||
}
|
||||
|
||||
_currentID = _primaryID;
|
||||
}
|
||||
|
||||
LocaleKey::~LocaleKey() {}
|
||||
|
||||
UnicodeString&
|
||||
LocaleKey::prefix(UnicodeString& result) const {
|
||||
if (_kind != KIND_ANY) {
|
||||
UChar buffer[64];
|
||||
uprv_itou(buffer, 64, _kind, 10, 0);
|
||||
UnicodeString temp(buffer);
|
||||
result.append(temp);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t
|
||||
LocaleKey::kind() const {
|
||||
return _kind;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
LocaleKey::canonicalID(UnicodeString& result) const {
|
||||
return result.append(_primaryID);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
LocaleKey::currentID(UnicodeString& result) const {
|
||||
if (!_currentID.isBogus()) {
|
||||
result.append(_currentID);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
LocaleKey::currentDescriptor(UnicodeString& result) const {
|
||||
if (!_currentID.isBogus()) {
|
||||
prefix(result).append(PREFIX_DELIMITER).append(_currentID);
|
||||
} else {
|
||||
result.setToBogus();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Locale&
|
||||
LocaleKey::canonicalLocale(Locale& result) const {
|
||||
return LocaleUtility::initLocaleFromName(_primaryID, result);
|
||||
}
|
||||
|
||||
Locale&
|
||||
LocaleKey::currentLocale(Locale& result) const {
|
||||
return LocaleUtility::initLocaleFromName(_currentID, result);
|
||||
}
|
||||
|
||||
UBool
|
||||
LocaleKey::fallback() {
|
||||
if (!_currentID.isBogus()) {
|
||||
int x = _currentID.lastIndexOf(UNDERSCORE_CHAR);
|
||||
if (x != -1) {
|
||||
_currentID.remove(x); // truncate current or fallback, whichever we're pointing to
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (!_fallbackID.isBogus()) {
|
||||
_currentID = _fallbackID;
|
||||
_fallbackID.setToBogus();
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (_currentID.length() > 0) {
|
||||
_currentID.remove(0); // completely truncate
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
_currentID.setToBogus();
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
UBool
|
||||
LocaleKey::isFallbackOf(const UnicodeString& id) const {
|
||||
UnicodeString temp(id);
|
||||
parseSuffix(temp);
|
||||
return temp.indexOf(_primaryID) == 0 &&
|
||||
(temp.length() == _primaryID.length() ||
|
||||
temp.charAt(_primaryID.length()) == UNDERSCORE_CHAR);
|
||||
}
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
UnicodeString&
|
||||
LocaleKey::debug(UnicodeString& result) const
|
||||
{
|
||||
ICUServiceKey::debug(result);
|
||||
result.append(" kind: ");
|
||||
result.append(_kind);
|
||||
result.append(" primaryID: ");
|
||||
result.append(_primaryID);
|
||||
result.append(" fallbackID: ");
|
||||
result.append(_fallbackID);
|
||||
result.append(" currentID: ");
|
||||
result.append(_currentID);
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
LocaleKey::debugClass(UnicodeString& result) const
|
||||
{
|
||||
return result.append("LocaleKey ");
|
||||
}
|
||||
#endif
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKey)
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* !UCONFIG_NO_SERVICE */
|
||||
#endif
|
||||
|
||||
|
151
source/common/servlkf.cpp
Normal file
151
source/common/servlkf.cpp
Normal file
|
@ -0,0 +1,151 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2005, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
|
||||
#include "unicode/resbund.h"
|
||||
#include "uresimp.h"
|
||||
#include "cmemory.h"
|
||||
#include "servloc.h"
|
||||
#include "ustrfmt.h"
|
||||
#include "uhash.h"
|
||||
#include "charstr.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uassert.h"
|
||||
|
||||
#define UNDERSCORE_CHAR ((UChar)0x005f)
|
||||
#define AT_SIGN_CHAR ((UChar)64)
|
||||
#define PERIOD_CHAR ((UChar)46)
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
LocaleKeyFactory::LocaleKeyFactory(int32_t coverage)
|
||||
: _name()
|
||||
, _coverage(coverage)
|
||||
{
|
||||
}
|
||||
|
||||
LocaleKeyFactory::LocaleKeyFactory(int32_t coverage, const UnicodeString& name)
|
||||
: _name(name)
|
||||
, _coverage(coverage)
|
||||
{
|
||||
}
|
||||
|
||||
LocaleKeyFactory::~LocaleKeyFactory() {
|
||||
}
|
||||
|
||||
UObject*
|
||||
LocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const {
|
||||
if (handlesKey(key, status)) {
|
||||
const LocaleKey& lkey = (const LocaleKey&)key;
|
||||
int32_t kind = lkey.kind();
|
||||
Locale loc;
|
||||
lkey.currentLocale(loc);
|
||||
|
||||
return handleCreate(loc, kind, service, status);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UBool
|
||||
LocaleKeyFactory::handlesKey(const ICUServiceKey& key, UErrorCode& status) const {
|
||||
const Hashtable* supported = getSupportedIDs(status);
|
||||
if (supported) {
|
||||
UnicodeString id;
|
||||
key.currentID(id);
|
||||
return supported->get(id) != NULL;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
void
|
||||
LocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const {
|
||||
const Hashtable* supported = getSupportedIDs(status);
|
||||
if (supported) {
|
||||
UBool visible = (_coverage & 0x1) == 0;
|
||||
|
||||
const UHashElement* elem = NULL;
|
||||
int32_t pos = 0;
|
||||
while ((elem = supported->nextElement(pos)) != NULL) {
|
||||
const UnicodeString& id = *((const UnicodeString*)elem->key.pointer);
|
||||
if (!visible) {
|
||||
result.remove(id);
|
||||
} else {
|
||||
result.put(id, (void*)this, status); // this is dummy non-void marker used for set semantics
|
||||
if (U_FAILURE(status)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
LocaleKeyFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const {
|
||||
if ((_coverage & 0x1) == 0) {
|
||||
//UErrorCode status = U_ZERO_ERROR;
|
||||
// assume if this is called on us, we support some fallback of this id
|
||||
// if (isSupportedID(id, status)) {
|
||||
Locale loc;
|
||||
LocaleUtility::initLocaleFromName(id, loc);
|
||||
return loc.getDisplayName(locale, result);
|
||||
// }
|
||||
}
|
||||
result.setToBogus();
|
||||
return result;
|
||||
}
|
||||
|
||||
UObject*
|
||||
LocaleKeyFactory::handleCreate(const Locale& /* loc */,
|
||||
int32_t /* kind */,
|
||||
const ICUService* /* service */,
|
||||
UErrorCode& /* status */) const {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//UBool
|
||||
//LocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& status) const {
|
||||
// const Hashtable* ids = getSupportedIDs(status);
|
||||
// return ids && ids->get(id);
|
||||
//}
|
||||
|
||||
const Hashtable*
|
||||
LocaleKeyFactory::getSupportedIDs(UErrorCode& /* status */) const {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
UnicodeString&
|
||||
LocaleKeyFactory::debug(UnicodeString& result) const
|
||||
{
|
||||
debugClass(result);
|
||||
result.append(", name: ");
|
||||
result.append(_name);
|
||||
result.append(", coverage: ");
|
||||
result.append(_coverage);
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
LocaleKeyFactory::debugClass(UnicodeString& result) const
|
||||
{
|
||||
return result.append("LocaleKeyFactory");
|
||||
}
|
||||
#endif
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKeyFactory)
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* !UCONFIG_NO_SERVICE */
|
||||
#endif
|
||||
|
||||
|
550
source/common/servloc.h
Normal file
550
source/common/servloc.h
Normal file
|
@ -0,0 +1,550 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2005, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#ifndef ICULSERV_H
|
||||
#define ICULSERV_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if UCONFIG_NO_SERVICE
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* Allow the declaration of APIs with pointers to ICUService
|
||||
* even when service is removed from the build.
|
||||
*/
|
||||
class ICULocaleService;
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#else
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/strenum.h"
|
||||
|
||||
#include "hash.h"
|
||||
#include "uvector.h"
|
||||
|
||||
#include "serv.h"
|
||||
#include "locutil.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class ICULocaleService;
|
||||
|
||||
class LocaleKey;
|
||||
class LocaleKeyFactory;
|
||||
class SimpleLocaleKeyFactory;
|
||||
class ServiceListener;
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* A subclass of Key that implements a locale fallback mechanism.
|
||||
* The first locale to search for is the locale provided by the
|
||||
* client, and the fallback locale to search for is the current
|
||||
* default locale. If a prefix is present, the currentDescriptor
|
||||
* includes it before the locale proper, separated by "/". This
|
||||
* is the default key instantiated by ICULocaleService.</p>
|
||||
*
|
||||
* <p>Canonicalization adjusts the locale string so that the
|
||||
* section before the first understore is in lower case, and the rest
|
||||
* is in upper case, with no trailing underscores.</p>
|
||||
*/
|
||||
|
||||
class U_COMMON_API LocaleKey : public ICUServiceKey {
|
||||
private:
|
||||
int32_t _kind;
|
||||
UnicodeString _primaryID;
|
||||
UnicodeString _fallbackID;
|
||||
UnicodeString _currentID;
|
||||
|
||||
public:
|
||||
enum {
|
||||
KIND_ANY = -1
|
||||
};
|
||||
|
||||
/**
|
||||
* Create a LocaleKey with canonical primary and fallback IDs.
|
||||
*/
|
||||
static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID,
|
||||
const UnicodeString* canonicalFallbackID,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Create a LocaleKey with canonical primary and fallback IDs.
|
||||
*/
|
||||
static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID,
|
||||
const UnicodeString* canonicalFallbackID,
|
||||
int32_t kind,
|
||||
UErrorCode& status);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* PrimaryID is the user's requested locale string,
|
||||
* canonicalPrimaryID is this string in canonical form,
|
||||
* fallbackID is the current default locale's string in
|
||||
* canonical form.
|
||||
*/
|
||||
LocaleKey(const UnicodeString& primaryID,
|
||||
const UnicodeString& canonicalPrimaryID,
|
||||
const UnicodeString* canonicalFallbackID,
|
||||
int32_t kind);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Append the prefix associated with the kind, or nothing if the kind is KIND_ANY.
|
||||
*/
|
||||
virtual UnicodeString& prefix(UnicodeString& result) const;
|
||||
|
||||
/**
|
||||
* Return the kind code associated with this key.
|
||||
*/
|
||||
virtual int32_t kind() const;
|
||||
|
||||
/**
|
||||
* Return the canonicalID.
|
||||
*/
|
||||
virtual UnicodeString& canonicalID(UnicodeString& result) const;
|
||||
|
||||
/**
|
||||
* Return the currentID.
|
||||
*/
|
||||
virtual UnicodeString& currentID(UnicodeString& result) const;
|
||||
|
||||
/**
|
||||
* Return the (canonical) current descriptor, or null if no current id.
|
||||
*/
|
||||
virtual UnicodeString& currentDescriptor(UnicodeString& result) const;
|
||||
|
||||
/**
|
||||
* Convenience method to return the locale corresponding to the (canonical) original ID.
|
||||
*/
|
||||
virtual Locale& canonicalLocale(Locale& result) const;
|
||||
|
||||
/**
|
||||
* Convenience method to return the locale corresponding to the (canonical) current ID.
|
||||
*/
|
||||
virtual Locale& currentLocale(Locale& result) const;
|
||||
|
||||
/**
|
||||
* If the key has a fallback, modify the key and return true,
|
||||
* otherwise return false.</p>
|
||||
*
|
||||
* <p>First falls back through the primary ID, then through
|
||||
* the fallbackID. The final fallback is the empty string,
|
||||
* unless the primary id was the empty string, in which case
|
||||
* there is no fallback.
|
||||
*/
|
||||
virtual UBool fallback();
|
||||
|
||||
/**
|
||||
* Return true if a key created from id matches, or would eventually
|
||||
* fallback to match, the canonical ID of this key.
|
||||
*/
|
||||
virtual UBool isFallbackOf(const UnicodeString& id) const;
|
||||
|
||||
public:
|
||||
/**
|
||||
* UObject boilerplate.
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~LocaleKey();
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
public:
|
||||
virtual UnicodeString& debug(UnicodeString& result) const;
|
||||
virtual UnicodeString& debugClass(UnicodeString& result) const;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* A subclass of ICUServiceFactory that uses LocaleKeys, and is able to
|
||||
* 'cover' more specific locales with more general locales that it
|
||||
* supports.
|
||||
*
|
||||
* <p>Coverage may be either of the values VISIBLE or INVISIBLE.
|
||||
*
|
||||
* <p>'Visible' indicates that the specific locale(s) supported by
|
||||
* the factory are registered in getSupportedIDs, 'Invisible'
|
||||
* indicates that they are not.
|
||||
*
|
||||
* <p>Localization of visible ids is handled
|
||||
* by the handling factory, regardless of kind.
|
||||
*/
|
||||
class U_COMMON_API LocaleKeyFactory : public ICUServiceFactory {
|
||||
protected:
|
||||
const UnicodeString _name;
|
||||
const int32_t _coverage;
|
||||
|
||||
public:
|
||||
enum {
|
||||
/**
|
||||
* Coverage value indicating that the factory makes
|
||||
* its locales visible, and does not cover more specific
|
||||
* locales.
|
||||
*/
|
||||
VISIBLE = 0,
|
||||
|
||||
/**
|
||||
* Coverage value indicating that the factory does not make
|
||||
* its locales visible, and does not cover more specific
|
||||
* locales.
|
||||
*/
|
||||
INVISIBLE = 1
|
||||
};
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~LocaleKeyFactory();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Constructor used by subclasses.
|
||||
*/
|
||||
LocaleKeyFactory(int32_t coverage);
|
||||
|
||||
/**
|
||||
* Constructor used by subclasses.
|
||||
*/
|
||||
LocaleKeyFactory(int32_t coverage, const UnicodeString& name);
|
||||
|
||||
/**
|
||||
* Implement superclass abstract method. This checks the currentID of
|
||||
* the key against the supported IDs, and passes the canonicalLocale and
|
||||
* kind off to handleCreate (which subclasses must implement).
|
||||
*/
|
||||
public:
|
||||
virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
|
||||
|
||||
protected:
|
||||
virtual UBool handlesKey(const ICUServiceKey& key, UErrorCode& status) const;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Override of superclass method. This adjusts the result based
|
||||
* on the coverage rule for this factory.
|
||||
*/
|
||||
virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Return a localized name for the locale represented by id.
|
||||
*/
|
||||
virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Utility method used by create(ICUServiceKey, ICUService). Subclasses can implement
|
||||
* this instead of create. The default returns NULL.
|
||||
*/
|
||||
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Return true if this id is one the factory supports (visible or
|
||||
* otherwise).
|
||||
*/
|
||||
// virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Return the set of ids that this factory supports (visible or
|
||||
* otherwise). This can be called often and might need to be
|
||||
* cached if it is expensive to create.
|
||||
*/
|
||||
virtual const Hashtable* getSupportedIDs(UErrorCode& status) const;
|
||||
|
||||
public:
|
||||
/**
|
||||
* UObject boilerplate.
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
public:
|
||||
virtual UnicodeString& debug(UnicodeString& result) const;
|
||||
virtual UnicodeString& debugClass(UnicodeString& result) const;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* A LocaleKeyFactory that just returns a single object for a kind/locale.
|
||||
*/
|
||||
|
||||
class U_COMMON_API SimpleLocaleKeyFactory : public LocaleKeyFactory {
|
||||
private:
|
||||
UObject* _obj;
|
||||
UnicodeString _id;
|
||||
const int32_t _kind;
|
||||
|
||||
public:
|
||||
SimpleLocaleKeyFactory(UObject* objToAdopt,
|
||||
const UnicodeString& locale,
|
||||
int32_t kind,
|
||||
int32_t coverage);
|
||||
|
||||
SimpleLocaleKeyFactory(UObject* objToAdopt,
|
||||
const Locale& locale,
|
||||
int32_t kind,
|
||||
int32_t coverage);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~SimpleLocaleKeyFactory();
|
||||
|
||||
/**
|
||||
* Override of superclass method. Returns the service object if kind/locale match. Service is not used.
|
||||
*/
|
||||
virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Override of superclass method. This adjusts the result based
|
||||
* on the coverage rule for this factory.
|
||||
*/
|
||||
virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Return true if this id is equal to the locale name.
|
||||
*/
|
||||
//virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const;
|
||||
|
||||
|
||||
public:
|
||||
/**
|
||||
* UObject boilerplate.
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
public:
|
||||
virtual UnicodeString& debug(UnicodeString& result) const;
|
||||
virtual UnicodeString& debugClass(UnicodeString& result) const;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* A LocaleKeyFactory that creates a service based on the ICU locale data.
|
||||
* This is a base class for most ICU factories. Subclasses instantiate it
|
||||
* with a constructor that takes a bundle name, which determines the supported
|
||||
* IDs. Subclasses then override handleCreate to create the actual service
|
||||
* object. The default implementation returns a resource bundle.
|
||||
*/
|
||||
class U_COMMON_API ICUResourceBundleFactory : public LocaleKeyFactory
|
||||
{
|
||||
protected:
|
||||
UnicodeString _bundleName;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Convenience constructor that uses the main ICU bundle name.
|
||||
*/
|
||||
ICUResourceBundleFactory();
|
||||
|
||||
/**
|
||||
* A service factory based on ICU resource data in resources with
|
||||
* the given name. This should be a 'path' that can be passed to
|
||||
* ures_openAvailableLocales, such as U_ICUDATA or U_ICUDATA_COLL.
|
||||
* The empty string is equivalent to U_ICUDATA.
|
||||
*/
|
||||
ICUResourceBundleFactory(const UnicodeString& bundleName);
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
*/
|
||||
virtual ~ICUResourceBundleFactory();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Return the supported IDs. This is the set of all locale names in ICULocaleData.
|
||||
*/
|
||||
virtual const Hashtable* getSupportedIDs(UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Create the service. The default implementation returns the resource bundle
|
||||
* for the locale, ignoring kind, and service.
|
||||
*/
|
||||
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const;
|
||||
|
||||
public:
|
||||
/**
|
||||
* UObject boilerplate.
|
||||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
public:
|
||||
virtual UnicodeString& debug(UnicodeString& result) const;
|
||||
virtual UnicodeString& debugClass(UnicodeString& result) const;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
class U_COMMON_API ICULocaleService : public ICUService
|
||||
{
|
||||
private:
|
||||
Locale fallbackLocale;
|
||||
UnicodeString fallbackLocaleName;
|
||||
UMTX llock;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Construct an ICULocaleService.
|
||||
*/
|
||||
ICULocaleService();
|
||||
|
||||
/**
|
||||
* Construct an ICULocaleService with a name (useful for debugging).
|
||||
*/
|
||||
ICULocaleService(const UnicodeString& name);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~ICULocaleService();
|
||||
|
||||
#if 0
|
||||
// redeclare because of overload resolution rules?
|
||||
// no, causes ambiguities since both UnicodeString and Locale have constructors that take a const char*
|
||||
// need some compiler flag to remove warnings
|
||||
UObject* get(const UnicodeString& descriptor, UErrorCode& status) const {
|
||||
return ICUService::get(descriptor, status);
|
||||
}
|
||||
|
||||
UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const {
|
||||
return ICUService::get(descriptor, actualReturn, status);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Convenience override for callers using locales. This calls
|
||||
* get(Locale, int, Locale[]) with KIND_ANY for kind and null for
|
||||
* actualReturn.
|
||||
*/
|
||||
UObject* get(const Locale& locale, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Convenience override for callers using locales. This calls
|
||||
* get(Locale, int, Locale[]) with a null actualReturn.
|
||||
*/
|
||||
UObject* get(const Locale& locale, int32_t kind, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Convenience override for callers using locales. This calls
|
||||
* get(Locale, String, Locale[]) with a null kind.
|
||||
*/
|
||||
UObject* get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Convenience override for callers using locales. This uses
|
||||
* createKey(Locale.toString(), kind) to create a key, calls getKey, and then
|
||||
* if actualReturn is not null, returns the actualResult from
|
||||
* getKey (stripping any prefix) into a Locale.
|
||||
*/
|
||||
UObject* get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Convenience override for callers using locales. This calls
|
||||
* registerObject(Object, Locale, int32_t kind, int coverage)
|
||||
* passing KIND_ANY for the kind, and VISIBLE for the coverage.
|
||||
*/
|
||||
virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Convenience function for callers using locales. This calls
|
||||
* registerObject(Object, Locale, int kind, int coverage)
|
||||
* passing VISIBLE for the coverage.
|
||||
*/
|
||||
virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Convenience function for callers using locales. This instantiates
|
||||
* a SimpleLocaleKeyFactory, and registers the factory.
|
||||
*/
|
||||
virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status);
|
||||
|
||||
|
||||
/**
|
||||
* (Stop compiler from complaining about hidden overrides.)
|
||||
* Since both UnicodeString and Locale have constructors that take const char*, adding a public
|
||||
* method that takes UnicodeString causes ambiguity at call sites that use const char*.
|
||||
* We really need a flag that is understood by all compilers that will suppress the warning about
|
||||
* hidden overrides.
|
||||
*/
|
||||
virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Convenience method for callers using locales. This returns the standard
|
||||
* service ID enumeration.
|
||||
*/
|
||||
virtual StringEnumeration* getAvailableLocales(void) const;
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Return the name of the current fallback locale. If it has changed since this was
|
||||
* last accessed, the service cache is cleared.
|
||||
*/
|
||||
const UnicodeString& validateFallbackLocale() const;
|
||||
|
||||
/**
|
||||
* Override superclass createKey method.
|
||||
*/
|
||||
virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Additional createKey that takes a kind.
|
||||
*/
|
||||
virtual ICUServiceKey* createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const;
|
||||
|
||||
friend class ServiceEnumeration;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* UCONFIG_NO_SERVICE */
|
||||
#endif
|
||||
|
||||
/* ICULSERV_H */
|
||||
#endif
|
||||
|
297
source/common/servls.cpp
Normal file
297
source/common/servls.cpp
Normal file
|
@ -0,0 +1,297 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
|
||||
#include "unicode/resbund.h"
|
||||
#include "uresimp.h"
|
||||
#include "cmemory.h"
|
||||
#include "servloc.h"
|
||||
#include "ustrfmt.h"
|
||||
#include "uhash.h"
|
||||
#include "charstr.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uassert.h"
|
||||
|
||||
#define UNDERSCORE_CHAR ((UChar)0x005f)
|
||||
#define AT_SIGN_CHAR ((UChar)64)
|
||||
#define PERIOD_CHAR ((UChar)46)
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
ICULocaleService::ICULocaleService()
|
||||
: fallbackLocale(Locale::getDefault())
|
||||
, llock(0)
|
||||
{
|
||||
umtx_init(&llock);
|
||||
}
|
||||
|
||||
ICULocaleService::ICULocaleService(const UnicodeString& dname)
|
||||
: ICUService(dname)
|
||||
, fallbackLocale(Locale::getDefault())
|
||||
, llock(0)
|
||||
{
|
||||
umtx_init(&llock);
|
||||
}
|
||||
|
||||
ICULocaleService::~ICULocaleService()
|
||||
{
|
||||
umtx_destroy(&llock);
|
||||
}
|
||||
|
||||
UObject*
|
||||
ICULocaleService::get(const Locale& locale, UErrorCode& status) const
|
||||
{
|
||||
return get(locale, LocaleKey::KIND_ANY, NULL, status);
|
||||
}
|
||||
|
||||
UObject*
|
||||
ICULocaleService::get(const Locale& locale, int32_t kind, UErrorCode& status) const
|
||||
{
|
||||
return get(locale, kind, NULL, status);
|
||||
}
|
||||
|
||||
UObject*
|
||||
ICULocaleService::get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const
|
||||
{
|
||||
return get(locale, LocaleKey::KIND_ANY, actualReturn, status);
|
||||
}
|
||||
|
||||
UObject*
|
||||
ICULocaleService::get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const
|
||||
{
|
||||
UObject* result = NULL;
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString locName(locale.getName(), -1, US_INV);
|
||||
if (locName.isBogus()) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
ICUServiceKey* key = createKey(&locName, kind, status);
|
||||
if (key) {
|
||||
if (actualReturn == NULL) {
|
||||
result = getKey(*key, status);
|
||||
} else {
|
||||
UnicodeString temp;
|
||||
result = getKey(*key, &temp, status);
|
||||
|
||||
if (result != NULL) {
|
||||
key->parseSuffix(temp);
|
||||
LocaleUtility::initLocaleFromName(temp, *actualReturn);
|
||||
}
|
||||
}
|
||||
delete key;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
URegistryKey
|
||||
ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale,
|
||||
UBool visible, UErrorCode& status)
|
||||
{
|
||||
Locale loc;
|
||||
LocaleUtility::initLocaleFromName(locale, loc);
|
||||
return registerInstance(objToAdopt, loc, LocaleKey::KIND_ANY,
|
||||
visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE, status);
|
||||
}
|
||||
|
||||
URegistryKey
|
||||
ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status)
|
||||
{
|
||||
return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status);
|
||||
}
|
||||
|
||||
URegistryKey
|
||||
ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status)
|
||||
{
|
||||
return registerInstance(objToAdopt, locale, kind, LocaleKeyFactory::VISIBLE, status);
|
||||
}
|
||||
|
||||
URegistryKey
|
||||
ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status)
|
||||
{
|
||||
ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage);
|
||||
if (factory != NULL) {
|
||||
return registerFactory(factory, status);
|
||||
}
|
||||
delete objToAdopt;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if 0
|
||||
URegistryKey
|
||||
ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UErrorCode& status)
|
||||
{
|
||||
return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status);
|
||||
}
|
||||
|
||||
URegistryKey
|
||||
ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status)
|
||||
{
|
||||
return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY,
|
||||
visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE,
|
||||
status);
|
||||
}
|
||||
|
||||
URegistryKey
|
||||
ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, int32_t kind, int32_t coverage, UErrorCode& status)
|
||||
{
|
||||
ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage);
|
||||
if (factory != NULL) {
|
||||
return registerFactory(factory, status);
|
||||
}
|
||||
delete objToAdopt;
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
class ServiceEnumeration : public StringEnumeration {
|
||||
private:
|
||||
const ICULocaleService* _service;
|
||||
int32_t _timestamp;
|
||||
UVector _ids;
|
||||
int32_t _pos;
|
||||
|
||||
private:
|
||||
ServiceEnumeration(const ICULocaleService* service, UErrorCode &status)
|
||||
: _service(service)
|
||||
, _timestamp(service->getTimestamp())
|
||||
, _ids(uhash_deleteUnicodeString, NULL, status)
|
||||
, _pos(0)
|
||||
{
|
||||
_service->getVisibleIDs(_ids, status);
|
||||
}
|
||||
|
||||
ServiceEnumeration(const ServiceEnumeration &other, UErrorCode &status)
|
||||
: _service(other._service)
|
||||
, _timestamp(other._timestamp)
|
||||
, _ids(uhash_deleteUnicodeString, NULL, status)
|
||||
, _pos(0)
|
||||
{
|
||||
if(U_SUCCESS(status)) {
|
||||
int32_t i, length;
|
||||
|
||||
length = other._ids.size();
|
||||
for(i = 0; i < length; ++i) {
|
||||
_ids.addElement(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
|
||||
}
|
||||
|
||||
if(U_SUCCESS(status)) {
|
||||
_pos = other._pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static ServiceEnumeration* create(const ICULocaleService* service) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ServiceEnumeration* result = new ServiceEnumeration(service, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
return result;
|
||||
}
|
||||
delete result;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
virtual ~ServiceEnumeration() {}
|
||||
|
||||
virtual StringEnumeration *clone() const {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ServiceEnumeration *cl = new ServiceEnumeration(*this, status);
|
||||
if(U_FAILURE(status)) {
|
||||
delete cl;
|
||||
cl = NULL;
|
||||
}
|
||||
return cl;
|
||||
}
|
||||
|
||||
UBool upToDate(UErrorCode& status) const {
|
||||
if (U_SUCCESS(status)) {
|
||||
if (_timestamp == _service->getTimestamp()) {
|
||||
return TRUE;
|
||||
}
|
||||
status = U_ENUM_OUT_OF_SYNC_ERROR;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
virtual int32_t count(UErrorCode& status) const {
|
||||
return upToDate(status) ? _ids.size() : 0;
|
||||
}
|
||||
|
||||
virtual const UnicodeString* snext(UErrorCode& status) {
|
||||
if (upToDate(status) && (_pos < _ids.size())) {
|
||||
return (const UnicodeString*)_ids[_pos++];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
virtual void reset(UErrorCode& status) {
|
||||
if (status == U_ENUM_OUT_OF_SYNC_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
}
|
||||
if (U_SUCCESS(status)) {
|
||||
_timestamp = _service->getTimestamp();
|
||||
_pos = 0;
|
||||
_service->getVisibleIDs(_ids, status);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static UClassID U_EXPORT2 getStaticClassID(void);
|
||||
virtual UClassID getDynamicClassID(void) const;
|
||||
};
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceEnumeration)
|
||||
|
||||
StringEnumeration*
|
||||
ICULocaleService::getAvailableLocales(void) const
|
||||
{
|
||||
return ServiceEnumeration::create(this);
|
||||
}
|
||||
|
||||
const UnicodeString&
|
||||
ICULocaleService::validateFallbackLocale() const
|
||||
{
|
||||
const Locale& loc = Locale::getDefault();
|
||||
ICULocaleService* ncThis = (ICULocaleService*)this;
|
||||
{
|
||||
Mutex mutex(&ncThis->llock);
|
||||
if (loc != fallbackLocale) {
|
||||
ncThis->fallbackLocale = loc;
|
||||
LocaleUtility::initNameFromLocale(loc, ncThis->fallbackLocaleName);
|
||||
ncThis->clearServiceCache();
|
||||
}
|
||||
}
|
||||
return fallbackLocaleName;
|
||||
}
|
||||
|
||||
ICUServiceKey*
|
||||
ICULocaleService::createKey(const UnicodeString* id, UErrorCode& status) const
|
||||
{
|
||||
return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), status);
|
||||
}
|
||||
|
||||
ICUServiceKey*
|
||||
ICULocaleService::createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const
|
||||
{
|
||||
return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), kind, status);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* !UCONFIG_NO_SERVICE */
|
||||
#endif
|
||||
|
||||
|
118
source/common/servnotf.cpp
Normal file
118
source/common/servnotf.cpp
Normal file
|
@ -0,0 +1,118 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2006, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
|
||||
#include "servnotf.h"
|
||||
#ifdef NOTIFIER_DEBUG
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
EventListener::~EventListener() {}
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EventListener)
|
||||
|
||||
ICUNotifier::ICUNotifier(void)
|
||||
: notifyLock(0), listeners(NULL)
|
||||
{
|
||||
umtx_init(¬ifyLock);
|
||||
}
|
||||
|
||||
ICUNotifier::~ICUNotifier(void) {
|
||||
{
|
||||
Mutex lmx(¬ifyLock);
|
||||
delete listeners;
|
||||
listeners = NULL;
|
||||
}
|
||||
umtx_destroy(¬ifyLock);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ICUNotifier::addListener(const EventListener* l, UErrorCode& status)
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
if (l == NULL) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
if (acceptsListener(*l)) {
|
||||
Mutex lmx(¬ifyLock);
|
||||
if (listeners == NULL) {
|
||||
listeners = new UVector(5, status);
|
||||
} else {
|
||||
for (int i = 0, e = listeners->size(); i < e; ++i) {
|
||||
const EventListener* el = (const EventListener*)(listeners->elementAt(i));
|
||||
if (l == el) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
listeners->addElement((void*)l, status); // cast away const
|
||||
}
|
||||
#ifdef NOTIFIER_DEBUG
|
||||
else {
|
||||
fprintf(stderr, "Listener invalid for this notifier.");
|
||||
exit(1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ICUNotifier::removeListener(const EventListener *l, UErrorCode& status)
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
if (l == NULL) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
Mutex lmx(¬ifyLock);
|
||||
if (listeners != NULL) {
|
||||
// identity equality check
|
||||
for (int i = 0, e = listeners->size(); i < e; ++i) {
|
||||
const EventListener* el = (const EventListener*)listeners->elementAt(i);
|
||||
if (l == el) {
|
||||
listeners->removeElementAt(i);
|
||||
if (listeners->size() == 0) {
|
||||
delete listeners;
|
||||
listeners = NULL;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ICUNotifier::notifyChanged(void)
|
||||
{
|
||||
if (listeners != NULL) {
|
||||
Mutex lmx(¬ifyLock);
|
||||
if (listeners != NULL) {
|
||||
for (int i = 0, e = listeners->size(); i < e; ++i) {
|
||||
EventListener* el = (EventListener*)listeners->elementAt(i);
|
||||
notifyListener(*el);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* UCONFIG_NO_SERVICE */
|
||||
#endif
|
||||
|
124
source/common/servnotf.h
Normal file
124
source/common/servnotf.h
Normal file
|
@ -0,0 +1,124 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2004, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
#ifndef ICUNOTIF_H
|
||||
#define ICUNOTIF_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if UCONFIG_NO_SERVICE
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* Allow the declaration of APIs with pointers to BreakIterator
|
||||
* even when break iteration is removed from the build.
|
||||
*/
|
||||
class ICUNotifier;
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#else
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
#include "mutex.h"
|
||||
#include "uvector.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class U_COMMON_API EventListener : public UObject {
|
||||
public:
|
||||
virtual ~EventListener();
|
||||
|
||||
public:
|
||||
static UClassID U_EXPORT2 getStaticClassID();
|
||||
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
public:
|
||||
#ifdef SERVICE_DEBUG
|
||||
virtual UnicodeString& debug(UnicodeString& result) const {
|
||||
return debugClass(result);
|
||||
}
|
||||
|
||||
virtual UnicodeString& debugClass(UnicodeString& result) const {
|
||||
return result.append("Key");
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* <p>Abstract implementation of a notification facility. Clients add
|
||||
* EventListeners with addListener and remove them with removeListener.
|
||||
* Notifiers call notifyChanged when they wish to notify listeners.
|
||||
* This queues the listener list on the notification thread, which
|
||||
* eventually dequeues the list and calls notifyListener on each
|
||||
* listener in the list.</p>
|
||||
*
|
||||
* <p>Subclasses override acceptsListener and notifyListener
|
||||
* to add type-safe notification. AcceptsListener should return
|
||||
* true if the listener is of the appropriate type; ICUNotifier
|
||||
* itself will ensure the listener is non-null and that the
|
||||
* identical listener is not already registered with the Notifier.
|
||||
* NotifyListener should cast the listener to the appropriate
|
||||
* type and call the appropriate method on the listener.
|
||||
*/
|
||||
|
||||
class U_COMMON_API ICUNotifier : public UMemory {
|
||||
private: UMTX notifyLock;
|
||||
private: UVector* listeners;
|
||||
|
||||
public:
|
||||
ICUNotifier(void);
|
||||
|
||||
virtual ~ICUNotifier(void);
|
||||
|
||||
/**
|
||||
* Add a listener to be notified when notifyChanged is called.
|
||||
* The listener must not be null. AcceptsListener must return
|
||||
* true for the listener. Attempts to concurrently
|
||||
* register the identical listener more than once will be
|
||||
* silently ignored.
|
||||
*/
|
||||
virtual void addListener(const EventListener* l, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Stop notifying this listener. The listener must
|
||||
* not be null. Attemps to remove a listener that is
|
||||
* not registered will be silently ignored.
|
||||
*/
|
||||
virtual void removeListener(const EventListener* l, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* ICU doesn't spawn its own threads. All listeners are notified in
|
||||
* the thread of the caller. Misbehaved listeners can therefore
|
||||
* indefinitely block the calling thread. Callers should beware of
|
||||
* deadlock situations.
|
||||
*/
|
||||
virtual void notifyChanged(void);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Subclasses implement this to return TRUE if the listener is
|
||||
* of the appropriate type.
|
||||
*/
|
||||
virtual UBool acceptsListener(const EventListener& l) const = 0;
|
||||
|
||||
/**
|
||||
* Subclasses implement this to notify the listener.
|
||||
*/
|
||||
virtual void notifyListener(EventListener& l) const = 0;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* UCONFIG_NO_SERVICE */
|
||||
#endif
|
||||
|
||||
/* ICUNOTIF_H */
|
||||
#endif
|
94
source/common/servrbf.cpp
Normal file
94
source/common/servrbf.cpp
Normal file
|
@ -0,0 +1,94 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2005, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
|
||||
#include "unicode/resbund.h"
|
||||
#include "uresimp.h"
|
||||
#include "cmemory.h"
|
||||
#include "servloc.h"
|
||||
#include "ustrfmt.h"
|
||||
#include "uhash.h"
|
||||
#include "charstr.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uassert.h"
|
||||
|
||||
#define UNDERSCORE_CHAR ((UChar)0x005f)
|
||||
#define AT_SIGN_CHAR ((UChar)64)
|
||||
#define PERIOD_CHAR ((UChar)46)
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
ICUResourceBundleFactory::ICUResourceBundleFactory()
|
||||
: LocaleKeyFactory(VISIBLE)
|
||||
, _bundleName()
|
||||
{
|
||||
}
|
||||
|
||||
ICUResourceBundleFactory::ICUResourceBundleFactory(const UnicodeString& bundleName)
|
||||
: LocaleKeyFactory(VISIBLE)
|
||||
, _bundleName(bundleName)
|
||||
{
|
||||
}
|
||||
|
||||
ICUResourceBundleFactory::~ICUResourceBundleFactory() {}
|
||||
|
||||
const Hashtable*
|
||||
ICUResourceBundleFactory::getSupportedIDs(UErrorCode& status) const
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
return LocaleUtility::getAvailableLocaleNames(_bundleName);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
UObject*
|
||||
ICUResourceBundleFactory::handleCreate(const Locale& loc, int32_t /* kind */, const ICUService* /* service */, UErrorCode& status) const
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
// _bundleName is a package name
|
||||
// and should only contain invariant characters
|
||||
// ??? is it always true that the max length of the bundle name is 19?
|
||||
// who made this change? -- dlf
|
||||
char pkg[20];
|
||||
int32_t length;
|
||||
length=_bundleName.extract(0, INT32_MAX, pkg, (int32_t)sizeof(pkg), US_INV);
|
||||
if(length>=(int32_t)sizeof(pkg)) {
|
||||
return NULL;
|
||||
}
|
||||
return new ResourceBundle(pkg, loc, status);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
UnicodeString&
|
||||
ICUResourceBundleFactory::debug(UnicodeString& result) const
|
||||
{
|
||||
LocaleKeyFactory::debug(result);
|
||||
result.append(", bundle: ");
|
||||
return result.append(_bundleName);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
ICUResourceBundleFactory::debugClass(UnicodeString& result) const
|
||||
{
|
||||
return result.append("ICUResourceBundleFactory");
|
||||
}
|
||||
#endif
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUResourceBundleFactory)
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* !UCONFIG_NO_SERVICE */
|
||||
#endif
|
||||
|
||||
|
122
source/common/servslkf.cpp
Normal file
122
source/common/servslkf.cpp
Normal file
|
@ -0,0 +1,122 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2001-2005, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
|
||||
#include "unicode/resbund.h"
|
||||
#include "uresimp.h"
|
||||
#include "cmemory.h"
|
||||
#include "servloc.h"
|
||||
#include "ustrfmt.h"
|
||||
#include "uhash.h"
|
||||
#include "charstr.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "uassert.h"
|
||||
|
||||
#define UNDERSCORE_CHAR ((UChar)0x005f)
|
||||
#define AT_SIGN_CHAR ((UChar)64)
|
||||
#define PERIOD_CHAR ((UChar)46)
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
******************************************************************
|
||||
*/
|
||||
|
||||
SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt,
|
||||
const UnicodeString& locale,
|
||||
int32_t kind,
|
||||
int32_t coverage)
|
||||
: LocaleKeyFactory(coverage)
|
||||
, _obj(objToAdopt)
|
||||
, _id(locale)
|
||||
, _kind(kind)
|
||||
{
|
||||
}
|
||||
|
||||
SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt,
|
||||
const Locale& locale,
|
||||
int32_t kind,
|
||||
int32_t coverage)
|
||||
: LocaleKeyFactory(coverage)
|
||||
, _obj(objToAdopt)
|
||||
, _id()
|
||||
, _kind(kind)
|
||||
{
|
||||
LocaleUtility::initNameFromLocale(locale, _id);
|
||||
}
|
||||
|
||||
SimpleLocaleKeyFactory::~SimpleLocaleKeyFactory()
|
||||
{
|
||||
delete _obj;
|
||||
_obj = NULL;
|
||||
}
|
||||
|
||||
UObject*
|
||||
SimpleLocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
const LocaleKey& lkey = (const LocaleKey&)key;
|
||||
if (_kind == LocaleKey::KIND_ANY || _kind == lkey.kind()) {
|
||||
UnicodeString keyID;
|
||||
lkey.currentID(keyID);
|
||||
if (_id == keyID) {
|
||||
return service->cloneInstance(_obj);
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//UBool
|
||||
//SimpleLocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& /* status */) const
|
||||
//{
|
||||
// return id == _id;
|
||||
//}
|
||||
|
||||
void
|
||||
SimpleLocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const
|
||||
{
|
||||
if (U_SUCCESS(status)) {
|
||||
if (_coverage & 0x1) {
|
||||
result.remove(_id);
|
||||
} else {
|
||||
result.put(_id, (void*)this, status);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SERVICE_DEBUG
|
||||
UnicodeString&
|
||||
SimpleLocaleKeyFactory::debug(UnicodeString& result) const
|
||||
{
|
||||
LocaleKeyFactory::debug(result);
|
||||
result.append(", id: ");
|
||||
result.append(_id);
|
||||
result.append(", kind: ");
|
||||
result.append(_kind);
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
SimpleLocaleKeyFactory::debugClass(UnicodeString& result) const
|
||||
{
|
||||
return result.append("SimpleLocaleKeyFactory");
|
||||
}
|
||||
#endif
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleLocaleKeyFactory)
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* !UCONFIG_NO_SERVICE */
|
||||
#endif
|
||||
|
||||
|
129
source/common/sprpimpl.h
Normal file
129
source/common/sprpimpl.h
Normal file
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003-2006, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: sprpimpl.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb1
|
||||
* created by: Ram Viswanadha
|
||||
*/
|
||||
|
||||
#ifndef SPRPIMPL_H
|
||||
#define SPRPIMPL_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_IDNA
|
||||
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/usprep.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "utrie.h"
|
||||
#include "udataswp.h"
|
||||
#include "ubidi_props.h"
|
||||
|
||||
#define _SPREP_DATA_TYPE "spp"
|
||||
|
||||
enum UStringPrepType{
|
||||
USPREP_UNASSIGNED = 0x0000 ,
|
||||
USPREP_MAP = 0x0001 ,
|
||||
USPREP_PROHIBITED = 0x0002 ,
|
||||
USPREP_DELETE = 0x0003 ,
|
||||
USPREP_TYPE_LIMIT = 0x0004
|
||||
};
|
||||
|
||||
typedef enum UStringPrepType UStringPrepType;
|
||||
|
||||
#ifdef USPREP_TYPE_NAMES_ARRAY
|
||||
static const char* usprepTypeNames[] ={
|
||||
"UNASSIGNED" ,
|
||||
"MAP" ,
|
||||
"PROHIBITED" ,
|
||||
"DELETE",
|
||||
"TYPE_LIMIT"
|
||||
};
|
||||
#endif
|
||||
|
||||
enum{
|
||||
_SPREP_NORMALIZATION_ON = 0x0001,
|
||||
_SPREP_CHECK_BIDI_ON = 0x0002
|
||||
};
|
||||
|
||||
enum{
|
||||
_SPREP_TYPE_THRESHOLD = 0xFFF0,
|
||||
_SPREP_MAX_INDEX_VALUE = 0x3FBF, /*16139*/
|
||||
_SPREP_MAX_INDEX_TOP_LENGTH = 0x0003
|
||||
};
|
||||
|
||||
/* indexes[] value names */
|
||||
enum {
|
||||
_SPREP_INDEX_TRIE_SIZE = 0, /* number of bytes in StringPrep trie */
|
||||
_SPREP_INDEX_MAPPING_DATA_SIZE = 1, /* The array that contains the mapping */
|
||||
_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION = 2, /* The index of Unicode version of last entry in NormalizationCorrections.txt */
|
||||
_SPREP_ONE_UCHAR_MAPPING_INDEX_START = 3, /* The starting index of 1 UChar mapping index in the mapping data array */
|
||||
_SPREP_TWO_UCHARS_MAPPING_INDEX_START = 4, /* The starting index of 2 UChars mapping index in the mapping data array */
|
||||
_SPREP_THREE_UCHARS_MAPPING_INDEX_START = 5, /* The starting index of 3 UChars mapping index in the mapping data array */
|
||||
_SPREP_FOUR_UCHARS_MAPPING_INDEX_START = 6, /* The starting index of 4 UChars mapping index in the mapping data array */
|
||||
_SPREP_OPTIONS = 7, /* Bit set of options to turn on in the profile */
|
||||
_SPREP_INDEX_TOP=16 /* changing this requires a new formatVersion */
|
||||
};
|
||||
|
||||
typedef struct UStringPrepKey UStringPrepKey;
|
||||
|
||||
|
||||
struct UStringPrepKey{
|
||||
char* name;
|
||||
char* path;
|
||||
};
|
||||
|
||||
struct UStringPrepProfile{
|
||||
int32_t indexes[_SPREP_INDEX_TOP];
|
||||
UTrie sprepTrie;
|
||||
const uint16_t* mappingData;
|
||||
UDataMemory* sprepData;
|
||||
const UBiDiProps *bdp; /* used only if checkBiDi is set */
|
||||
int32_t refCount;
|
||||
UBool isDataLoaded;
|
||||
UBool doNFKC;
|
||||
UBool checkBiDi;
|
||||
};
|
||||
|
||||
/**
|
||||
* Helper function for populating the UParseError struct
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_syntaxError(const UChar* rules,
|
||||
int32_t pos,
|
||||
int32_t rulesLen,
|
||||
UParseError* parseError);
|
||||
|
||||
|
||||
/**
|
||||
* Swap StringPrep .spp profile data. See udataswp.h.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
usprep_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
#endif /* #if !UCONFIG_NO_IDNA */
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
54
source/common/stringpiece.cpp
Normal file
54
source/common/stringpiece.cpp
Normal file
|
@ -0,0 +1,54 @@
|
|||
// Copyright (C) 2009, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//
|
||||
// Copyright 2004 and onwards Google Inc.
|
||||
//
|
||||
// Author: wilsonh@google.com (Wilson Hsieh)
|
||||
//
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "cstring.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
StringPiece::StringPiece(const char* str)
|
||||
: ptr_(str), length_((str == NULL) ? 0 : static_cast<int32_t>(uprv_strlen(str))) { }
|
||||
|
||||
StringPiece::StringPiece(const StringPiece& x, int32_t pos) {
|
||||
if (pos < 0) {
|
||||
pos = 0;
|
||||
} else if (pos > x.length_) {
|
||||
pos = x.length_;
|
||||
}
|
||||
ptr_ = x.ptr_ + pos;
|
||||
length_ = x.length_ - pos;
|
||||
}
|
||||
|
||||
StringPiece::StringPiece(const StringPiece& x, int32_t pos, int32_t len) {
|
||||
if (pos < 0) {
|
||||
pos = 0;
|
||||
} else if (pos > x.length_) {
|
||||
pos = x.length_;
|
||||
}
|
||||
if (len < 0) {
|
||||
len = 0;
|
||||
} else if (len > x.length_ - pos) {
|
||||
len = x.length_ - pos;
|
||||
}
|
||||
ptr_ = x.ptr_ + pos;
|
||||
length_ = len;
|
||||
}
|
||||
|
||||
/* Microsft Visual Studios <= 8.0 complains about redefinition of this
|
||||
* static const class variable. However, the C++ standard states that this
|
||||
* definition is correct. Perhaps there is a bug in the Microsoft compiler.
|
||||
* This is not an issue on any other compilers (that we know of) including
|
||||
* Visual Studios 9.0.
|
||||
* Cygwin with MSVC 9.0 also complains here about redefinition.
|
||||
*/
|
||||
#if (!defined(_MSC_VER) || (_MSC_VER >= 1500)) && !defined(CYGWINMSVC)
|
||||
const int32_t StringPiece::npos;
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_END
|
1408
source/common/triedict.cpp
Normal file
1408
source/common/triedict.cpp
Normal file
File diff suppressed because it is too large
Load diff
346
source/common/triedict.h
Normal file
346
source/common/triedict.h
Normal file
|
@ -0,0 +1,346 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2006, International Business Machines Corporation and others. *
|
||||
* All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef TRIEDICT_H
|
||||
#define TRIEDICT_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/utext.h"
|
||||
|
||||
struct UEnumeration;
|
||||
struct UDataSwapper;
|
||||
struct UDataMemory;
|
||||
|
||||
/**
|
||||
* <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
|
||||
*
|
||||
* @param ds Pointer to UDataSwapper containing global data about the
|
||||
* transformation and function pointers for handling primitive
|
||||
* types.
|
||||
* @param inData Pointer to the input data to be transformed or examined.
|
||||
* @param length Length of the data, counting bytes. May be -1 for preflighting.
|
||||
* If length>=0, then transform the data.
|
||||
* If length==-1, then only determine the length of the data.
|
||||
* The length cannot be determined from the data itself for all
|
||||
* types of data (e.g., not for simple arrays of integers).
|
||||
* @param outData Pointer to the output data buffer.
|
||||
* If length>=0 (transformation), then the output buffer must
|
||||
* have a capacity of at least length.
|
||||
* If length==-1, then outData will not be used and can be NULL.
|
||||
* @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
|
||||
* fulfill U_SUCCESS on input.
|
||||
* @return The actual length of the data.
|
||||
*
|
||||
* @see UDataSwapper
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
triedict_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class StringEnumeration;
|
||||
struct CompactTrieHeader;
|
||||
|
||||
/*******************************************************************
|
||||
* TrieWordDictionary
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>TrieWordDictionary is an abstract class that represents a word
|
||||
* dictionary based on a trie. The base protocol is read-only.
|
||||
* Subclasses may allow writing.</p>
|
||||
*/
|
||||
class U_COMMON_API TrieWordDictionary : public UMemory {
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
*/
|
||||
TrieWordDictionary();
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~TrieWordDictionary();
|
||||
|
||||
/**
|
||||
* <p>Find dictionary words that match the text.</p>
|
||||
*
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left after the longest prefix match in the dictionary.
|
||||
* @param start The current position in text.
|
||||
* @param maxLength The maximum number of code units to match.
|
||||
* @param lengths An array that is filled with the lengths of words that matched.
|
||||
* @param count Filled with the number of elements output in lengths.
|
||||
* @param limit The size of the lengths array; this limits the number of words output.
|
||||
* @return The number of characters in text that were matched.
|
||||
*/
|
||||
virtual int32_t matches( UText *text,
|
||||
int32_t maxLength,
|
||||
int32_t *lengths,
|
||||
int &count,
|
||||
int limit ) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
|
||||
*
|
||||
* @param status A status code recording the success of the call.
|
||||
* @return A StringEnumeration that will iterate through the whole dictionary.
|
||||
* The caller is responsible for closing it. The order is unspecified.
|
||||
*/
|
||||
virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
|
||||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* MutableTrieDictionary
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
|
||||
* added.</p>
|
||||
*/
|
||||
|
||||
struct TernaryNode; // Forwards declaration
|
||||
|
||||
class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
|
||||
private:
|
||||
/**
|
||||
* The root node of the trie
|
||||
* @internal
|
||||
*/
|
||||
|
||||
TernaryNode *fTrie;
|
||||
|
||||
/**
|
||||
* A UText for internal use
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UText *fIter;
|
||||
|
||||
friend class CompactTrieDictionary; // For fast conversion
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Constructor.</p>
|
||||
*
|
||||
* @param median A UChar around which to balance the trie. Ideally, it should
|
||||
* begin at least one word that is near the median of the set in the dictionary
|
||||
* @param status A status code recording the success of the call.
|
||||
*/
|
||||
MutableTrieDictionary( UChar median, UErrorCode &status );
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~MutableTrieDictionary();
|
||||
|
||||
/**
|
||||
* <p>Find dictionary words that match the text.</p>
|
||||
*
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left after the longest prefix match in the dictionary.
|
||||
* @param maxLength The maximum number of code units to match.
|
||||
* @param lengths An array that is filled with the lengths of words that matched.
|
||||
* @param count Filled with the number of elements output in lengths.
|
||||
* @param limit The size of the lengths array; this limits the number of words output.
|
||||
* @return The number of characters in text that were matched.
|
||||
*/
|
||||
virtual int32_t matches( UText *text,
|
||||
int32_t maxLength,
|
||||
int32_t *lengths,
|
||||
int &count,
|
||||
int limit ) const;
|
||||
|
||||
/**
|
||||
* <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
|
||||
*
|
||||
* @param status A status code recording the success of the call.
|
||||
* @return A StringEnumeration that will iterate through the whole dictionary.
|
||||
* The caller is responsible for closing it. The order is unspecified.
|
||||
*/
|
||||
virtual StringEnumeration *openWords( UErrorCode &status ) const;
|
||||
|
||||
/**
|
||||
* <p>Add one word to the dictionary.</p>
|
||||
*
|
||||
* @param word A UChar buffer containing the word.
|
||||
* @param length The length of the word.
|
||||
* @param status The resultant status
|
||||
*/
|
||||
virtual void addWord( const UChar *word,
|
||||
int32_t length,
|
||||
UErrorCode &status);
|
||||
|
||||
#if 0
|
||||
/**
|
||||
* <p>Add all strings from a UEnumeration to the dictionary.</p>
|
||||
*
|
||||
* @param words A UEnumeration that will return the desired words.
|
||||
* @param status The resultant status
|
||||
*/
|
||||
virtual void addWords( UEnumeration *words, UErrorCode &status );
|
||||
#endif
|
||||
|
||||
protected:
|
||||
/**
|
||||
* <p>Search the dictionary for matches.</p>
|
||||
*
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left after the longest prefix match in the dictionary.
|
||||
* @param maxLength The maximum number of code units to match.
|
||||
* @param lengths An array that is filled with the lengths of words that matched.
|
||||
* @param count Filled with the number of elements output in lengths.
|
||||
* @param limit The size of the lengths array; this limits the number of words output.
|
||||
* @param parent The parent of the current node
|
||||
* @param pMatched The returned parent node matched the input
|
||||
* @return The number of characters in text that were matched.
|
||||
*/
|
||||
virtual int32_t search( UText *text,
|
||||
int32_t maxLength,
|
||||
int32_t *lengths,
|
||||
int &count,
|
||||
int limit,
|
||||
TernaryNode *&parent,
|
||||
UBool &pMatched ) const;
|
||||
|
||||
private:
|
||||
/**
|
||||
* <p>Private constructor. The root node it not allocated.</p>
|
||||
*
|
||||
* @param status A status code recording the success of the call.
|
||||
*/
|
||||
MutableTrieDictionary( UErrorCode &status );
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* CompactTrieDictionary
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
|
||||
* to save space.</p>
|
||||
*/
|
||||
class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
|
||||
private:
|
||||
/**
|
||||
* The root node of the trie
|
||||
*/
|
||||
|
||||
const CompactTrieHeader *fData;
|
||||
|
||||
/**
|
||||
* A UBool indicating whether or not we own the fData.
|
||||
*/
|
||||
|
||||
UBool fOwnData;
|
||||
|
||||
UDataMemory *fUData;
|
||||
public:
|
||||
/**
|
||||
* <p>Construct a dictionary from a UDataMemory.</p>
|
||||
*
|
||||
* @param data A pointer to a UDataMemory, which is adopted
|
||||
* @param status A status code giving the result of the constructor
|
||||
*/
|
||||
CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Construct a dictionary from raw saved data.</p>
|
||||
*
|
||||
* @param data A pointer to the raw data, which is still owned by the caller
|
||||
* @param status A status code giving the result of the constructor
|
||||
*/
|
||||
CompactTrieDictionary(const void *dataObj, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* <p>Construct a dictionary from a MutableTrieDictionary.</p>
|
||||
*
|
||||
* @param dict The dictionary to use as input.
|
||||
* @param status A status code recording the success of the call.
|
||||
*/
|
||||
CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status );
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
*/
|
||||
virtual ~CompactTrieDictionary();
|
||||
|
||||
/**
|
||||
* <p>Find dictionary words that match the text.</p>
|
||||
*
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left after the longest prefix match in the dictionary.
|
||||
* @param maxLength The maximum number of code units to match.
|
||||
* @param lengths An array that is filled with the lengths of words that matched.
|
||||
* @param count Filled with the number of elements output in lengths.
|
||||
* @param limit The size of the lengths array; this limits the number of words output.
|
||||
* @return The number of characters in text that were matched.
|
||||
*/
|
||||
virtual int32_t matches( UText *text,
|
||||
int32_t rangeEnd,
|
||||
int32_t *lengths,
|
||||
int &count,
|
||||
int limit ) const;
|
||||
|
||||
/**
|
||||
* <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
|
||||
*
|
||||
* @param status A status code recording the success of the call.
|
||||
* @return A StringEnumeration that will iterate through the whole dictionary.
|
||||
* The caller is responsible for closing it. The order is unspecified.
|
||||
*/
|
||||
virtual StringEnumeration *openWords( UErrorCode &status ) const;
|
||||
|
||||
/**
|
||||
* <p>Return the size of the compact data.</p>
|
||||
*
|
||||
* @return The size of the dictionary's compact data.
|
||||
*/
|
||||
virtual uint32_t dataSize() const;
|
||||
|
||||
/**
|
||||
* <p>Return a void * pointer to the compact data, platform-endian.</p>
|
||||
*
|
||||
* @return The data for the compact dictionary, suitable for passing to the
|
||||
* constructor.
|
||||
*/
|
||||
virtual const void *data() const;
|
||||
|
||||
/**
|
||||
* <p>Return a MutableTrieDictionary clone of this dictionary.</p>
|
||||
*
|
||||
* @param status A status code recording the success of the call.
|
||||
* @return A MutableTrieDictionary with the same data as this dictionary
|
||||
*/
|
||||
virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const;
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* <p>Convert a MutableTrieDictionary into a compact data blob.</p>
|
||||
*
|
||||
* @param dict The dictionary to convert.
|
||||
* @param status A status code recording the success of the call.
|
||||
* @return A single data blob starting with a CompactTrieHeader.
|
||||
*/
|
||||
static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict,
|
||||
UErrorCode &status );
|
||||
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/* TRIEDICT_H */
|
||||
#endif
|
236
source/common/uarrsort.c
Normal file
236
source/common/uarrsort.c
Normal file
|
@ -0,0 +1,236 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: uarrsort.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003aug04
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Internal function for sorting arrays.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include "uarrsort.h"
|
||||
|
||||
enum {
|
||||
MIN_QSORT=9, /* from Knuth */
|
||||
STACK_ITEM_SIZE=200
|
||||
};
|
||||
|
||||
/* UComparator convenience implementations ---------------------------------- */
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_uint16Comparator(const void *context, const void *left, const void *right) {
|
||||
return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_int32Comparator(const void *context, const void *left, const void *right) {
|
||||
return *(const int32_t *)left - *(const int32_t *)right;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_uint32Comparator(const void *context, const void *left, const void *right) {
|
||||
uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right;
|
||||
|
||||
/* compare directly because (l-r) would overflow the int32_t result */
|
||||
if(l<r) {
|
||||
return -1;
|
||||
} else if(l==r) {
|
||||
return 0;
|
||||
} else /* l>r */ {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Straight insertion sort from Knuth vol. III, pg. 81 ---------------------- */
|
||||
|
||||
static void
|
||||
doInsertionSort(char *array, int32_t start, int32_t limit, int32_t itemSize,
|
||||
UComparator *cmp, const void *context, void *pv) {
|
||||
int32_t i, j;
|
||||
|
||||
for(j=start+1; j<limit; ++j) {
|
||||
/* v=array[j] */
|
||||
uprv_memcpy(pv, array+j*itemSize, itemSize);
|
||||
|
||||
for(i=j; i>start; --i) {
|
||||
if(/* v>=array[i-1] */ cmp(context, pv, array+(i-1)*itemSize)>=0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* array[i]=array[i-1]; */
|
||||
uprv_memcpy(array+i*itemSize, array+(i-1)*itemSize, itemSize);
|
||||
}
|
||||
|
||||
if(i!=j) {
|
||||
/* array[i]=v; */
|
||||
uprv_memcpy(array+i*itemSize, pv, itemSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
insertionSort(char *array, int32_t length, int32_t itemSize,
|
||||
UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
|
||||
UAlignedMemory v[STACK_ITEM_SIZE/sizeof(UAlignedMemory)+1];
|
||||
void *pv;
|
||||
|
||||
/* allocate an intermediate item variable (v) */
|
||||
if(itemSize<=STACK_ITEM_SIZE) {
|
||||
pv=v;
|
||||
} else {
|
||||
pv=uprv_malloc(itemSize);
|
||||
if(pv==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
doInsertionSort(array, 0, length, itemSize, cmp, context, pv);
|
||||
|
||||
if(pv!=v) {
|
||||
uprv_free(pv);
|
||||
}
|
||||
}
|
||||
|
||||
/* QuickSort ---------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* This implementation is semi-recursive:
|
||||
* It recurses for the smaller sub-array to shorten the recursion depth,
|
||||
* and loops for the larger sub-array.
|
||||
*
|
||||
* Loosely after QuickSort algorithms in
|
||||
* Niklaus Wirth
|
||||
* Algorithmen und Datenstrukturen mit Modula-2
|
||||
* B.G. Teubner Stuttgart
|
||||
* 4. Auflage 1986
|
||||
* ISBN 3-519-02260-5
|
||||
*/
|
||||
static void
|
||||
subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize,
|
||||
UComparator *cmp, const void *context,
|
||||
void *px, void *pw) {
|
||||
int32_t left, right;
|
||||
|
||||
/* start and left are inclusive, limit and right are exclusive */
|
||||
do {
|
||||
if((start+MIN_QSORT)>=limit) {
|
||||
doInsertionSort(array, start, limit, itemSize, cmp, context, px);
|
||||
break;
|
||||
}
|
||||
|
||||
left=start;
|
||||
right=limit;
|
||||
|
||||
/* x=array[middle] */
|
||||
uprv_memcpy(px, array+((start+limit)/2)*itemSize, itemSize);
|
||||
|
||||
do {
|
||||
while(/* array[left]<x */
|
||||
cmp(context, array+left*itemSize, px)<0
|
||||
) {
|
||||
++left;
|
||||
}
|
||||
while(/* x<array[right-1] */
|
||||
cmp(context, px, array+(right-1)*itemSize)<0
|
||||
) {
|
||||
--right;
|
||||
}
|
||||
|
||||
/* swap array[left] and array[right-1] via w; ++left; --right */
|
||||
if(left<right) {
|
||||
--right;
|
||||
|
||||
if(left<right) {
|
||||
uprv_memcpy(pw, array+left*itemSize, itemSize);
|
||||
uprv_memcpy(array+left*itemSize, array+right*itemSize, itemSize);
|
||||
uprv_memcpy(array+right*itemSize, pw, itemSize);
|
||||
}
|
||||
|
||||
++left;
|
||||
}
|
||||
} while(left<right);
|
||||
|
||||
/* sort sub-arrays */
|
||||
if((right-start)<(limit-left)) {
|
||||
/* sort [start..right[ */
|
||||
if(start<(right-1)) {
|
||||
subQuickSort(array, start, right, itemSize, cmp, context, px, pw);
|
||||
}
|
||||
|
||||
/* sort [left..limit[ */
|
||||
start=left;
|
||||
} else {
|
||||
/* sort [left..limit[ */
|
||||
if(left<(limit-1)) {
|
||||
subQuickSort(array, left, limit, itemSize, cmp, context, px, pw);
|
||||
}
|
||||
|
||||
/* sort [start..right[ */
|
||||
limit=right;
|
||||
}
|
||||
} while(start<(limit-1));
|
||||
}
|
||||
|
||||
static void
|
||||
quickSort(char *array, int32_t length, int32_t itemSize,
|
||||
UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
|
||||
UAlignedMemory xw[(2*STACK_ITEM_SIZE)/sizeof(UAlignedMemory)+1];
|
||||
void *p;
|
||||
|
||||
/* allocate two intermediate item variables (x and w) */
|
||||
if(itemSize<=STACK_ITEM_SIZE) {
|
||||
p=xw;
|
||||
} else {
|
||||
p=uprv_malloc(2*itemSize);
|
||||
if(p==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
subQuickSort(array, 0, length, itemSize,
|
||||
cmp, context, p, (char *)p+itemSize);
|
||||
|
||||
if(p!=xw) {
|
||||
uprv_free(p);
|
||||
}
|
||||
}
|
||||
|
||||
/* uprv_sortArray() API ----------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Check arguments, select an appropriate implementation,
|
||||
* cast the array to char * so that array+i*itemSize works.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_sortArray(void *array, int32_t length, int32_t itemSize,
|
||||
UComparator *cmp, const void *context,
|
||||
UBool sortStable, UErrorCode *pErrorCode) {
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
if(length<=1) {
|
||||
return;
|
||||
} else if(length<MIN_QSORT || sortStable) {
|
||||
insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode);
|
||||
/* could add heapSort or similar for stable sorting of longer arrays */
|
||||
} else {
|
||||
quickSort((char *)array, length, itemSize, cmp, context, pErrorCode);
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue