ICU-7074 copy r26745 from branch

X-SVN-Rev: 26766
This commit is contained in:
Steven R. Loomis 2009-10-08 00:05:40 +00:00
commit fed2240af3
3271 changed files with 2120060 additions and 0 deletions

69
.gitattributes vendored Normal file
View file

@ -0,0 +1,69 @@
* text=auto !eol
*.c text !eol
*.cc text !eol
*.classpath text !eol
*.cpp text !eol
*.css text !eol
*.dsp text !eol
*.dsw text !eol
*.filters text !eol
*.h text !eol
*.htm text !eol
*.html text !eol
*.in text !eol
*.java text !eol
*.launch text !eol
*.mak text !eol
*.md text !eol
*.MF text !eol
*.mk text !eol
*.pl text !eol
*.pm text !eol
*.project text !eol
*.properties text !eol
*.py text !eol
*.rc text !eol
*.sh text eol=lf
*.sln text !eol
*.stub text !eol
*.txt text !eol
*.ucm text !eol
*.vcproj text !eol
*.vcxproj text !eol
*.xml text !eol
*.xsl text !eol
*.xslt text !eol
Makefile text !eol
configure text !eol
LICENSE text !eol
README text !eol
*.bin -text
*.brk -text
*.cnv -text
*.icu -text
*.res -text
*.nrm -text
*.spp -text
*.tri2 -text
/icu4c.css -text
source/data/locales/pool.res -text
source/samples/ucnv/data02.bin -text
source/test/perf/README -text
source/test/testdata/TestFont1.otf -text
source/test/testdata/icu26_testtypes.res -text
source/test/testdata/icu26e_testtypes.res -text
source/test/testdata/importtest.bin -text
source/test/testdata/iscii.bin -text
source/test/testdata/uni-text.bin -text
# The following file types are stored in Git-LFS.
*.jar filter=lfs diff=lfs merge=lfs -text
*.dat filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.gif filter=lfs diff=lfs merge=lfs -text

766
.gitignore vendored Normal file
View file

@ -0,0 +1,766 @@
/bin
/bin64
/include
/lib
/lib64
source/Doxyfile
source/Makefile
source/README
source/allinone/*.ncb
source/allinone/*.opt
source/allinone/*.suo
source/bin
source/common/*.ao
source/common/*.d
source/common/*.o
source/common/*.plg
source/common/Debug
source/common/Makefile
source/common/Release
source/common/common.res
source/common/common.vcproj.*.*.user
source/common/debug
source/common/icucfg.h
source/common/libicu*.*
source/common/release
source/common/svchook.mk
source/common/unicode/platform.h
source/common/x64
source/common/x86
source/config.cache
source/config.log
source/config.status
source/config/Makefile.inc
source/config/icu-config
source/config/icu-config.1
source/config/icucross.mk
source/config/pkgdata.inc
source/config/pkgdataMakefile
source/data/*.plg
source/data/Debug
source/data/Makefile
source/data/Release
source/data/icupkg.inc
source/data/in
source/data/makedata.vcproj.*.*.user
source/data/out
source/data/pkgdataMakefile
source/doc
source/extra/Makefile
source/extra/scrptrun/Makefile
source/extra/scrptrun/scrptrun.d
source/extra/scrptrun/srtest
source/extra/scrptrun/srtest.d
source/extra/uconv/*.d
source/extra/uconv/*.o
source/extra/uconv/Debug
source/extra/uconv/Makefile
source/extra/uconv/Release
source/extra/uconv/debug
source/extra/uconv/pkgdata.inc
source/extra/uconv/pkgdataMakefile
source/extra/uconv/pkgdatain.txt
source/extra/uconv/release
source/extra/uconv/resources/*.res
source/extra/uconv/uconv
source/extra/uconv/uconv.1
source/extra/uconv/uconv.plg
source/extra/uconv/uconv.vcproj.*.*.user
source/extra/uconv/uconvmsg
source/extra/uconv/x64
source/extra/uconv/x86
source/i18n/*.ao
source/i18n/*.d
source/i18n/*.o
source/i18n/*.plg
source/i18n/Debug
source/i18n/Makefile
source/i18n/Makefile.local
source/i18n/Release
source/i18n/debug
source/i18n/i18n.res
source/i18n/i18n.vcproj.*.*.user
source/i18n/release
source/i18n/x64
source/i18n/x86
source/icudefs.mk
source/io/*.ao
source/io/*.d
source/io/*.o
source/io/Debug
source/io/Makefile
source/io/Release
source/io/debug
source/io/io.res
source/io/io.vcproj.*.*.user
source/io/release
source/io/x64
source/io/x86
source/layout/*.ao
source/layout/*.d
source/layout/*.o
source/layout/*.pdb
source/layout/Debug
source/layout/Makefile
source/layout/Release
source/layout/debug
source/layout/layout.res
source/layout/layout.vcproj.*.*.user
source/layout/release
source/layout/x64
source/layout/x86
source/layoutex/*.ao
source/layoutex/*.d
source/layoutex/*.o
source/layoutex/*.pdb
source/layoutex/Debug
source/layoutex/Makefile
source/layoutex/Release
source/layoutex/debug
source/layoutex/layoutex.res
source/layoutex/layoutex.vcproj.*.*.user
source/layoutex/release
source/layoutex/x64
source/layoutex/x86
source/lib
source/samples/Makefile
source/samples/all/all.ncb
source/samples/all/all.suo
source/samples/break/Debug
source/samples/break/break.vcproj.*.*.user
source/samples/break/release
source/samples/cal/*.d
source/samples/cal/*.pdb
source/samples/cal/Debug
source/samples/cal/Makefile
source/samples/cal/Release
source/samples/cal/cal.vcproj.*.*.user
source/samples/cal/icucal
source/samples/cal/icucal.exe
source/samples/cal/release
source/samples/cal/x64
source/samples/cal/x86
source/samples/case/Debug
source/samples/case/case.vcproj.*.*.user
source/samples/case/release
source/samples/citer/Debug
source/samples/citer/citer.vcproj.*.*.user
source/samples/citer/release
source/samples/coll/Debug
source/samples/coll/coll.vcproj.*.*.user
source/samples/coll/release
source/samples/csdet/Debug
source/samples/csdet/Makefile
source/samples/csdet/csdet.vcproj.*.*.user
source/samples/csdet/release
source/samples/date/*.d
source/samples/date/*.pdb
source/samples/date/Debug
source/samples/date/Makefile
source/samples/date/Release
source/samples/date/date.vcproj.*.*.user
source/samples/date/icudate
source/samples/date/icudate.exe
source/samples/date/release
source/samples/date/x64
source/samples/date/x86
source/samples/datefmt/Debug
source/samples/datefmt/datefmt.vcproj.*.*.user
source/samples/datefmt/release
source/samples/layout/*.d
source/samples/layout/*.pdb
source/samples/layout/Debug
source/samples/layout/Makefile
source/samples/layout/Release
source/samples/layout/layout.vcproj.*.*.user
source/samples/layout/release
source/samples/layout/tmp
source/samples/legacy/Debug
source/samples/legacy/legacy.vcproj.*.*.user
source/samples/legacy/release
source/samples/msgfmt/Debug
source/samples/msgfmt/msgfmt.vcproj.*.*.user
source/samples/msgfmt/release
source/samples/numfmt/Debug
source/samples/numfmt/numfmt.vcproj.*.*.user
source/samples/numfmt/release
source/samples/props/Debug
source/samples/props/props.vcproj.*.*.user
source/samples/props/release
source/samples/strsrch/Debug
source/samples/strsrch/release
source/samples/strsrch/strsrch.vcproj.*.*.user
source/samples/translit/Debug
source/samples/translit/release
source/samples/translit/translit.vcproj.*.*.user
source/samples/uciter8/Debug
source/samples/uciter8/release
source/samples/uciter8/uciter8.vcproj.*.*.user
source/samples/ucnv/Debug
source/samples/ucnv/convsamp
source/samples/ucnv/convsamp.out
source/samples/ucnv/release
source/samples/ucnv/ucnv.vcproj.*.*.user
source/samples/udata/Debug
source/samples/udata/reader.vcproj.*.*.user
source/samples/udata/reader_Win32_Debug
source/samples/udata/reader_win32_release
source/samples/udata/release
source/samples/udata/writer.vcproj.*.*.user
source/samples/ufortune/Debug
source/samples/ufortune/Release
source/samples/ufortune/ufortune.vcproj.*.*.user
source/samples/ugrep/debug
source/samples/ugrep/release
source/samples/ugrep/ugrep.vcproj.*.*.user
source/samples/uresb/Debug
source/samples/uresb/release
source/samples/uresb/resources.vcproj.*.*.user
source/samples/uresb/uresb.vcproj.*.*.user
source/samples/ustring/Debug
source/samples/ustring/release
source/samples/ustring/ustring.vcproj.*.*.user
source/stubdata/*.ao
source/stubdata/*.d
source/stubdata/*.o
source/stubdata/*.plg
source/stubdata/Debug
source/stubdata/Makefile
source/stubdata/Release
source/stubdata/cygicudt*.*
source/stubdata/debug
source/stubdata/libicu*.*
source/stubdata/libsicu*.*
source/stubdata/release
source/stubdata/stubdata.vcproj.*.*.user
source/stubdata/stubdatabuilt.txt
source/stubdata/x64
source/stubdata/x86
source/test/Makefile
source/test/cintltst/*.d
source/test/cintltst/*.o
source/test/cintltst/*.plg
source/test/cintltst/Debug
source/test/cintltst/Makefile
source/test/cintltst/Release
source/test/cintltst/cintltst
source/test/cintltst/cintltst.exe
source/test/cintltst/cintltst.vcproj.*.*.user
source/test/cintltst/debug
source/test/cintltst/release
source/test/cintltst/x64
source/test/cintltst/x86
source/test/compat/Makefile
source/test/hdrtst/Makefile
source/test/intltest/*.d
source/test/intltest/*.o
source/test/intltest/*.plg
source/test/intltest/Debug
source/test/intltest/Makefile
source/test/intltest/Makefile.local
source/test/intltest/Release
source/test/intltest/debug
source/test/intltest/intltest
source/test/intltest/intltest.exe
source/test/intltest/intltest.vcproj.*.*.user
source/test/intltest/release
source/test/intltest/x64
source/test/intltest/x86
source/test/iotest/*.d
source/test/iotest/*.o
source/test/iotest/Debug
source/test/iotest/Makefile
source/test/iotest/Release
source/test/iotest/debug
source/test/iotest/iotest
source/test/iotest/iotest.exe
source/test/iotest/iotest.vcproj.*.*.user
source/test/iotest/release
source/test/iotest/x64
source/test/iotest/x86
source/test/letest/*.d
source/test/letest/*.o
source/test/letest/Debug
source/test/letest/Makefile
source/test/letest/Release
source/test/letest/debug
source/test/letest/letest
source/test/letest/letest.exe
source/test/letest/letest.vcproj.*.*.user
source/test/letest/release
source/test/letest/x64
source/test/letest/x86
source/test/perf/Makefile
source/test/perf/charperf/*.d
source/test/perf/charperf/*.o
source/test/perf/charperf/Debug
source/test/perf/charperf/Makefile
source/test/perf/charperf/Release
source/test/perf/charperf/charperf
source/test/perf/charperf/charperf.vcproj.*.*.user
source/test/perf/charperf/debug
source/test/perf/charperf/release
source/test/perf/collationperf/Makefile
source/test/perf/collperf/*.d
source/test/perf/collperf/*.o
source/test/perf/collperf/Debug
source/test/perf/collperf/Makefile
source/test/perf/collperf/Release
source/test/perf/collperf/collperf
source/test/perf/collperf/collperf.vcproj.*.*.user
source/test/perf/collperf/debug
source/test/perf/collperf/release
source/test/perf/convperf/Makefile
source/test/perf/convperf/convperf.vcproj.*.*.user
source/test/perf/convperf/debug
source/test/perf/convperf/release
source/test/perf/normperf/*.d
source/test/perf/normperf/*.o
source/test/perf/normperf/Debug
source/test/perf/normperf/Makefile
source/test/perf/normperf/Release
source/test/perf/normperf/debug
source/test/perf/normperf/normperf
source/test/perf/normperf/normperf.vcproj.*.*.user
source/test/perf/normperf/release
source/test/perf/perf.ncb
source/test/perf/perf.suo
source/test/perf/strsrchperf/Makefile
source/test/perf/ubrkperf/*.d
source/test/perf/ubrkperf/*.o
source/test/perf/ubrkperf/Debug
source/test/perf/ubrkperf/Makefile
source/test/perf/ubrkperf/Release
source/test/perf/ubrkperf/debug
source/test/perf/ubrkperf/release
source/test/perf/ubrkperf/ubrkperf
source/test/perf/ubrkperf/ubrkperf.vcproj.*.*.user
source/test/perf/unisetperf/*.d
source/test/perf/unisetperf/*.o
source/test/perf/unisetperf/Debug
source/test/perf/unisetperf/Makefile
source/test/perf/unisetperf/Release
source/test/perf/unisetperf/debug
source/test/perf/unisetperf/release
source/test/perf/unisetperf/unisetperf
source/test/perf/unisetperf/unisetperf.vcproj.*.*.user
source/test/perf/usetperf/*.d
source/test/perf/usetperf/*.o
source/test/perf/usetperf/Debug
source/test/perf/usetperf/Makefile
source/test/perf/usetperf/Release
source/test/perf/usetperf/debug
source/test/perf/usetperf/release
source/test/perf/usetperf/usetperf
source/test/perf/usetperf/usetperf.vcproj.*.*.user
source/test/perf/ustrperf/*.d
source/test/perf/ustrperf/*.o
source/test/perf/ustrperf/Debug
source/test/perf/ustrperf/Makefile
source/test/perf/ustrperf/Release
source/test/perf/ustrperf/charperf
source/test/perf/ustrperf/debug
source/test/perf/ustrperf/release
source/test/perf/ustrperf/stringperf.vcproj.*.*.user
source/test/perf/utfperf/*.d
source/test/perf/utfperf/*.o
source/test/perf/utfperf/Debug
source/test/perf/utfperf/Makefile
source/test/perf/utfperf/Release
source/test/perf/utfperf/debug
source/test/perf/utfperf/release
source/test/perf/utfperf/utfperf
source/test/perf/utfperf/utfperf.vcproj.*.*.user
source/test/perf/utrie2perf/Makefile
source/test/testdata/Makefile
source/test/testdata/out
source/test/testdata/pkgdata.inc
source/test/testdata/pkgdataMakefile
source/test/testmap/*.d
source/test/testmap/Debug
source/test/testmap/Makefile
source/test/testmap/Release
source/test/testmap/testmap
source/test/testmap/testmap.plg
source/test/thaitest/Makefile
source/test/threadtest/Makefile
source/tools/Makefile
source/tools/ctestfw/*.ao
source/tools/ctestfw/*.d
source/tools/ctestfw/*.o
source/tools/ctestfw/*.pdb
source/tools/ctestfw/*icutest*.dll
source/tools/ctestfw/*icutest*.exp
source/tools/ctestfw/*icutest*.lib
source/tools/ctestfw/*icutest*.lnk
source/tools/ctestfw/Debug
source/tools/ctestfw/Makefile
source/tools/ctestfw/Release
source/tools/ctestfw/ctestfw.vcproj.*.*.user
source/tools/ctestfw/debug
source/tools/ctestfw/libicutest*
source/tools/ctestfw/libsicutest*
source/tools/ctestfw/release
source/tools/ctestfw/x64
source/tools/ctestfw/x86
source/tools/dumpce/*.css
source/tools/dumpce/*.d
source/tools/dumpce/*.html
source/tools/dumpce/*.o
source/tools/dumpce/Makefile
source/tools/dumpce/dumpce
source/tools/genbidi/*.d
source/tools/genbidi/*.o
source/tools/genbidi/*.pdb
source/tools/genbidi/*.plg
source/tools/genbidi/Debug
source/tools/genbidi/Makefile
source/tools/genbidi/Release
source/tools/genbidi/debug
source/tools/genbidi/genbidi
source/tools/genbidi/genbidi.[0-9]
source/tools/genbidi/genbidi.vcproj.*.*.user
source/tools/genbidi/release
source/tools/genbidi/x64
source/tools/genbidi/x86
source/tools/genbrk/*.d
source/tools/genbrk/*.o
source/tools/genbrk/*.pdb
source/tools/genbrk/*.plg
source/tools/genbrk/Debug
source/tools/genbrk/Makefile
source/tools/genbrk/Release
source/tools/genbrk/debug
source/tools/genbrk/genbrk
source/tools/genbrk/genbrk.1
source/tools/genbrk/genbrk.vcproj.*.*.user
source/tools/genbrk/release
source/tools/genbrk/x64
source/tools/genbrk/x86
source/tools/gencase/*.d
source/tools/gencase/*.ncb
source/tools/gencase/*.o
source/tools/gencase/*.opt
source/tools/gencase/*.pdb
source/tools/gencase/*.plg
source/tools/gencase/Debug
source/tools/gencase/Makefile
source/tools/gencase/Release
source/tools/gencase/debug
source/tools/gencase/gencase
source/tools/gencase/gencase.[0-9]
source/tools/gencase/gencase.vcproj.*.*.user
source/tools/gencase/release
source/tools/gencase/x64
source/tools/gencase/x86
source/tools/genccode/*.d
source/tools/genccode/*.o
source/tools/genccode/*.pdb
source/tools/genccode/*.plg
source/tools/genccode/Debug
source/tools/genccode/Makefile
source/tools/genccode/Release
source/tools/genccode/debug
source/tools/genccode/genccode
source/tools/genccode/genccode.8
source/tools/genccode/genccode.vcproj.*.*.user
source/tools/genccode/release
source/tools/genccode/x64
source/tools/genccode/x86
source/tools/gencfu/*.d
source/tools/gencfu/*.o
source/tools/gencfu/*.pdb
source/tools/gencfu/Debug
source/tools/gencfu/Makefile
source/tools/gencfu/Release
source/tools/gencfu/debug
source/tools/gencfu/gencfu
source/tools/gencfu/gencfu.exe
source/tools/gencfu/gencfu.vcproj.*.*.user
source/tools/gencfu/release
source/tools/gencfu/x64
source/tools/gencfu/x86
source/tools/gencmn/*.d
source/tools/gencmn/*.o
source/tools/gencmn/*.pdb
source/tools/gencmn/*.plg
source/tools/gencmn/Debug
source/tools/gencmn/Makefile
source/tools/gencmn/Release
source/tools/gencmn/debug
source/tools/gencmn/gencmn
source/tools/gencmn/gencmn.8
source/tools/gencmn/gencmn.[0-9]
source/tools/gencmn/gencmn.vcproj.*.*.user
source/tools/gencmn/release
source/tools/gencmn/x64
source/tools/gencmn/x86
source/tools/gencnval/*.d
source/tools/gencnval/*.ncb
source/tools/gencnval/*.o
source/tools/gencnval/*.opt
source/tools/gencnval/*.pdb
source/tools/gencnval/*.plg
source/tools/gencnval/Debug
source/tools/gencnval/Makefile
source/tools/gencnval/Release
source/tools/gencnval/debug
source/tools/gencnval/gencnval
source/tools/gencnval/gencnval.1
source/tools/gencnval/gencnval.[0-9]
source/tools/gencnval/gencnval.vcproj.*.*.user
source/tools/gencnval/release
source/tools/gencnval/x64
source/tools/gencnval/x86
source/tools/genctd/*.d
source/tools/genctd/*.o
source/tools/genctd/*.pdb
source/tools/genctd/*.plg
source/tools/genctd/Debug
source/tools/genctd/Makefile
source/tools/genctd/Release
source/tools/genctd/debug
source/tools/genctd/genctd
source/tools/genctd/genctd.1
source/tools/genctd/genctd.vcproj.*.*.user
source/tools/genctd/release
source/tools/genctd/x64
source/tools/genctd/x86
source/tools/gendraft/udeprctd.h
source/tools/gendraft/udraft.h
source/tools/gendraft/uintrnal.h
source/tools/gendraft/usystem.h
source/tools/gennames/*.d
source/tools/gennames/*.ncb
source/tools/gennames/*.o
source/tools/gennames/*.opt
source/tools/gennames/*.pdb
source/tools/gennames/*.plg
source/tools/gennames/Debug
source/tools/gennames/Makefile
source/tools/gennames/Release
source/tools/gennames/debug
source/tools/gennames/gennames
source/tools/gennames/gennames.[0-9]
source/tools/gennames/gennames.vcproj.*.*.user
source/tools/gennames/release
source/tools/gennames/x64
source/tools/gennames/x86
source/tools/gennorm/*.d
source/tools/gennorm/*.o
source/tools/gennorm/*.pdb
source/tools/gennorm/*.plg
source/tools/gennorm/Debug
source/tools/gennorm/Makefile
source/tools/gennorm/Release
source/tools/gennorm/debug
source/tools/gennorm/gennorm
source/tools/gennorm/gennorm.[0-9]
source/tools/gennorm/gennorm.vcproj.*.*.user
source/tools/gennorm/release
source/tools/gennorm/x64
source/tools/gennorm/x86
source/tools/genpname/*.d
source/tools/genpname/*.o
source/tools/genpname/*.pdb
source/tools/genpname/*.plg
source/tools/genpname/Debug
source/tools/genpname/Makefile
source/tools/genpname/Release
source/tools/genpname/debug
source/tools/genpname/genpname
source/tools/genpname/genpname.vcproj.*.*.user
source/tools/genpname/release
source/tools/genpname/x64
source/tools/genpname/x86
source/tools/genprops/*.d
source/tools/genprops/*.ncb
source/tools/genprops/*.o
source/tools/genprops/*.opt
source/tools/genprops/*.pdb
source/tools/genprops/*.plg
source/tools/genprops/Debug
source/tools/genprops/Makefile
source/tools/genprops/Release
source/tools/genprops/debug
source/tools/genprops/genprops
source/tools/genprops/genprops.[0-9]
source/tools/genprops/genprops.vcproj.*.*.user
source/tools/genprops/release
source/tools/genprops/x64
source/tools/genprops/x86
source/tools/genrb/*.1
source/tools/genrb/*.d
source/tools/genrb/*.o
source/tools/genrb/*.pdb
source/tools/genrb/*.plg
source/tools/genrb/Debug
source/tools/genrb/Makefile
source/tools/genrb/Makefile.local
source/tools/genrb/Release
source/tools/genrb/debug
source/tools/genrb/derb
source/tools/genrb/derb.[0-9]
source/tools/genrb/derb.vcproj.*.*.user
source/tools/genrb/derb_*
source/tools/genrb/genrb
source/tools/genrb/genrb.[0-9]
source/tools/genrb/genrb.vcproj.*.*.user
source/tools/genrb/release
source/tools/genrb/temp
source/tools/genrb/x64
source/tools/genrb/x86
source/tools/genren/ICUunrenamed
source/tools/genren/Makefile.local
source/tools/genren/urename.*
source/tools/gensprep/*.8
source/tools/gensprep/*.d
source/tools/gensprep/*.o
source/tools/gensprep/*.pdb
source/tools/gensprep/Debug
source/tools/gensprep/Makefile
source/tools/gensprep/Release
source/tools/gensprep/debug
source/tools/gensprep/gensprep
source/tools/gensprep/gensprep.[0-9]
source/tools/gensprep/gensprep.plg
source/tools/gensprep/gensprep.vcproj.*.*.user
source/tools/gensprep/release
source/tools/gensprep/x64
source/tools/gensprep/x86
source/tools/gentest/*.d
source/tools/gentest/*.o
source/tools/gentest/*.pdb
source/tools/gentest/Debug
source/tools/gentest/Makefile
source/tools/gentest/Release
source/tools/gentest/debug
source/tools/gentest/gentest
source/tools/gentest/gentest.exe
source/tools/gentest/gentest.vcproj.*.*.user
source/tools/gentest/release
source/tools/gentest/x64
source/tools/gentest/x86
source/tools/genuca/*.d
source/tools/genuca/*.o
source/tools/genuca/*.pdb
source/tools/genuca/Debug
source/tools/genuca/Makefile
source/tools/genuca/Release
source/tools/genuca/debug
source/tools/genuca/genuca
source/tools/genuca/genuca.8
source/tools/genuca/genuca.vcproj.*.*.user
source/tools/genuca/release
source/tools/genuca/x64
source/tools/genuca/x86
source/tools/icupkg/*.8
source/tools/icupkg/*.d
source/tools/icupkg/*.ncb
source/tools/icupkg/*.o
source/tools/icupkg/*.opt
source/tools/icupkg/*.pdb
source/tools/icupkg/*.plg
source/tools/icupkg/Debug
source/tools/icupkg/Makefile
source/tools/icupkg/Release
source/tools/icupkg/debug
source/tools/icupkg/icupkg
source/tools/icupkg/icupkg.[0-9]
source/tools/icupkg/icupkg.vcproj.*.*.user
source/tools/icupkg/release
source/tools/icupkg/x64
source/tools/icupkg/x86
source/tools/icuswap/*.d
source/tools/icuswap/*.ncb
source/tools/icuswap/*.o
source/tools/icuswap/*.opt
source/tools/icuswap/*.pdb
source/tools/icuswap/*.plg
source/tools/icuswap/Debug
source/tools/icuswap/Makefile
source/tools/icuswap/Release
source/tools/icuswap/debug
source/tools/icuswap/icuswap
source/tools/icuswap/icuswap.[0-9]
source/tools/icuswap/icuswap.vcproj.*.*.user
source/tools/icuswap/release
source/tools/makeconv/*.1
source/tools/makeconv/*.d
source/tools/makeconv/*.o
source/tools/makeconv/*.pdb
source/tools/makeconv/*.plg
source/tools/makeconv/Debug
source/tools/makeconv/Makefile
source/tools/makeconv/Release
source/tools/makeconv/debug
source/tools/makeconv/makeconv
source/tools/makeconv/makeconv.[0-9]
source/tools/makeconv/makeconv.vcproj.*.*.user
source/tools/makeconv/release
source/tools/makeconv/x64
source/tools/makeconv/x86
source/tools/pkgdata/*.1
source/tools/pkgdata/*.d
source/tools/pkgdata/*.ncb
source/tools/pkgdata/*.o
source/tools/pkgdata/*.opt
source/tools/pkgdata/*.pdb
source/tools/pkgdata/Debug
source/tools/pkgdata/Makefile
source/tools/pkgdata/Release
source/tools/pkgdata/debug
source/tools/pkgdata/icupkg.inc
source/tools/pkgdata/pkgdata
source/tools/pkgdata/pkgdata.[0-9]
source/tools/pkgdata/pkgdata.vcproj.*.*.user
source/tools/pkgdata/release
source/tools/pkgdata/x64
source/tools/pkgdata/x86
source/tools/toolutil/*.ao
source/tools/toolutil/*.d
source/tools/toolutil/*.ncb
source/tools/toolutil/*.o
source/tools/toolutil/*.opt
source/tools/toolutil/*.pdb
source/tools/toolutil/*.plg
source/tools/toolutil/Debug
source/tools/toolutil/Makefile
source/tools/toolutil/Release
source/tools/toolutil/debug
source/tools/toolutil/libicu*
source/tools/toolutil/release
source/tools/toolutil/toolutil.vcproj.*.*.user
source/tools/toolutil/x64
source/tools/toolutil/x86
source/tools/tzcode/Makefile
source/tools/tzcode/ZoneMetaData.java
source/tools/tzcode/africa
source/tools/tzcode/antarctica
source/tools/tzcode/asia
source/tools/tzcode/australasia
source/tools/tzcode/backward
source/tools/tzcode/etcetera
source/tools/tzcode/europe
source/tools/tzcode/factory
source/tools/tzcode/icu_zone.txt
source/tools/tzcode/icuzdump
source/tools/tzcode/icuzdump.dSYM
source/tools/tzcode/icuzdumpout
source/tools/tzcode/iso3166.tab
source/tools/tzcode/leapseconds
source/tools/tzcode/northamerica
source/tools/tzcode/pacificnew
source/tools/tzcode/solar??
source/tools/tzcode/southamerica
source/tools/tzcode/systemv
source/tools/tzcode/tz2icu
source/tools/tzcode/tz?????????.tar.gz
source/tools/tzcode/tzorig
source/tools/tzcode/yearistype
source/tools/tzcode/yearistype.sh
source/tools/tzcode/zdumpout
source/tools/tzcode/zic
source/tools/tzcode/zone.tab
source/tools/tzcode/zoneinfo
source/tools/tzcode/zoneinfo.txt

117
APIChangeReport.html Normal file
View file

@ -0,0 +1,117 @@
<?xml version="1.0" encoding="UTF-8"?><!--
Copyright (C) 2009, International Business Machines Corporation, All Rights Reserved.
--><html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>ICU4C API Comparison: 4.2.1 with 4.3.1</title>
<link rel="stylesheet" href="icu4c.css" type="text/css">
</head>
<body>
<a name="_top"></a>
<h1>ICU4C API Comparison: 4.2.1 with 4.3.1</h1>
<ul>
<li>
<a href="#removed">Removed from 4.2.1</a>
</li>
<li>
<a href="#deprecated">Deprecated or Obsoleted in 4.3.1</a>
</li>
<li>
<a href="#changed">Changed in 4.3.1</a>
</li>
<li>
<a href="#promoted">Promoted to stable in 4.3.1</a>
</li>
<li>
<a href="#added">Added in 4.3.1</a>
</li>
<li>
<a href="#other">Other existing drafts in 4.3.1</a>
</li>
</ul>
<hr>
<a name="removed">
<h2>Removed from 4.2.1</h2>
</a>
<table class="genTable" BORDER="1">
<THEAD>
<tr>
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
</tr>
</THEAD>
</table>
<P></P>
<a href="#_top">(jump back to top)</a>
<hr>
<a name="deprecated">
<h2>Deprecated or Obsoleted in 4.3.1</h2>
</a>
<table class="genTable" BORDER="1">
<THEAD>
<tr>
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
</tr>
</THEAD>
</table>
<P></P>
<a href="#_top">(jump back to top)</a>
<hr>
<a name="changed">
<h2>Changed in 4.3.1 (old, new)</h2>
</a>
<table class="genTable" BORDER="1">
<THEAD>
<tr>
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
</tr>
</THEAD>
</table>
<P></P>
<a href="#_top">(jump back to top)</a>
<hr>
<a name="promoted">
<h2>Promoted to stable in 4.3.1</h2>
</a>
<table class="genTable" BORDER="1">
<THEAD>
<tr>
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
</tr>
</THEAD>
</table>
<P></P>
<a href="#_top">(jump back to top)</a>
<hr>
<a name="added">
<h2>Added in 4.3.1</h2>
</a>
<table class="genTable" BORDER="1">
<THEAD>
<tr>
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
</tr>
</THEAD>
</table>
<P></P>
<a href="#_top">(jump back to top)</a>
<hr>
<a name="other">
<h2>Other existing drafts in 4.3.1</h2>
</a>
<div class="other">
<table class="genTable" BORDER="1">
<THEAD>
<tr>
<th>File</th><th>API</th><th>4.2.1</th><th>4.3.1</th>
</tr>
</THEAD>
</table>
</div>
<P></P>
<a href="#_top">(jump back to top)</a>
<hr>
<p>
<i><font size="-1">Contents generated by StableAPI tool on Wed Jul 22 11:58:09 GMT-08:00 2009<br>Copyright (C) 2009, International Business Machines Corporation, All Rights Reserved.</font></i>
</p>
</body>
</html>

102
as_is/os390/unpax-icu.sh Executable file
View file

@ -0,0 +1,102 @@
#!/bin/sh
# Copyright (C) 2001-2007, International Business Machines
# Corporation and others. All Rights Reserved.
#
# Authors:
# Ami Fixler
# Steven R. Loomis
# George Rhoten
#
# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
# After extracting to EBCDIC, binary files are re-extracted without the
# EBCDIC conversion, thus restoring them to original codepage.
#
# Set the following variable to the list of binary file suffixes (extensions)
#binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK'
#ICU specific binary files
binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML'
usage()
{
echo "Enter archive filename as a parameter: $0 icu-archive.tar"
}
# first make sure we at least one arg and it's a file we can read
if [ $# -eq 0 ]; then
usage
exit
fi
tar_file=$1
if [ ! -r $tar_file ]; then
echo "$tar_file does not exist or cannot be read."
usage
exit
fi
echo ""
echo "Extracting from $tar_file ..."
echo ""
# extract files while converting them to EBCDIC
pax -rvf $tar_file -o to=IBM-1047,from=ISO8859-1 -o setfiletag
echo ""
echo "Determining binary files ..."
echo ""
# When building in ASCII mode, text files are converted as ASCII
if [ "${ICU_ENABLE_ASCII_STRINGS}" -eq 1 ]; then
binary_suffixes="$binary_suffixes txt TXT ucm UCM"
else
for file in `find ./icu \( -name \*.txt -print \) | sed -e 's/^\.\///'`; do
bom8=`head -c 3 $file|\
od -t x1|\
head -n 1|\
sed 's/ */ /g'|\
cut -f2-4 -d ' '|\
tr 'A-Z' 'a-z'`;
#Find a converted UTF-8 BOM
if [ "$bom8" = "57 8b ab" ]
then
binary_files="$binary_files $file";
fi
done
fi
for i in $(pax -f $tar_file 2>/dev/null)
do
case $i in
*/) ;; # then this entry is a directory
*.*) # then this entry has a dot in the filename
for j in $binary_suffixes
do
# We substitute the suffix more than once
# to handle files like NormalizationTest-3.2.0.txt
suf=${i#*.*}
suf=${suf#*.*}
suf=${suf#*.*}
if [ "$suf" = "$j" ]
then
binary_files="$binary_files $i"
break
fi
done
;;
*) ;; # then this entry does not have a dot in it
esac
done
# now see if a re-extract of binary files is necessary
if [ ${#binary_files} -eq 0 ]; then
echo ""
echo "There are no binary files to restore."
else
echo "Restoring binary files ..."
echo ""
rm $binary_files
pax -rvf $tar_file $binary_files
# Tag the files as binary for proper interaction with the _BPXK_AUTOCVT
# environment setting
chtag -b $binary_files
fi
echo ""
echo "$0 has completed extracting ICU from $tar_file."

View file

@ -0,0 +1,32 @@
# Copyright (C) 2006-2009, International Business Machines Corporation
# and others. All Rights Reserved.
#
# Use "test -x" instead of "test -f" most of the time.
# due to how executables are created in a different file system.
s/as_executable_p="test -f"/as_executable_p="test -x"/g
s/test -f "$ac_file"/test -x "$ac_file"/g
s/test -f $ac_dir\/install-sh/test -x $ac_dir\/install-sh/g
s/test -f $ac_dir\/install.sh/test -x $ac_dir\/install.sh/g
s/test -f $ac_dir\/shtool/test -x $ac_dir\/shtool/g
# Use the more efficient del instead of rm command.
s/rm[ ]*-r[ ]*-f/del -f/g
s/rm[ ]*-f[ ]*-r/del -f/g
s/rm[ ]*-rf/del -f/g
s/rm[ ]*-fr/del -f/g
s/rm[ ]*-f/del -f/g
##don't clean up some awks for debugging
#s/[ ]*del -f [^ ]*.awk/#&/
# Borne shell isn't always available on i5/OS
s/\/bin\/sh/\/usr\/bin\/qsh/g
# no diff in qsh the equivalent is cmp
s/ diff / cmp -s /g
## srl
# trouble w/ redirects.
s% >&$3%%g
s%^ac_cr=%# AWK reads ASCII, not EBCDIC\
touch -C 819 $tmp/defines.awk $tmp/subs.awk $tmp/subs1.awk conf$$subs.awk\
\
&%
##OBSOLETE
#(REPLACED BY CPP in runConfigureICU) Use -c qpponly instead of -E to enable the preprocessor on the compiler
#s/\$CC -E/\$CC -c -qpponly/g

195
as_is/os400/unpax-icu.sh Executable file
View file

@ -0,0 +1,195 @@
#!/usr/bin/qsh
# Copyright (C) 2000-2009, International Business Machines
# Corporation and others. All Rights Reserved.
#
# Authors:
# Ami Fixler
# Barry Novinger
# Steven R. Loomis
# George Rhoten
# Jason Spieth
#
# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
# After extracting to EBCDIC, binary files are re-extracted without the
# EBCDIC conversion, thus restoring them to original codepage.
if [ -z "$QSH_VERSION" ];
then
QSH=0
echo "QSH not detected (QSH_VERSION not set) - just testing."
else
QSH=1
#echo "QSH version $QSH_VERSION"
fi
export QSH
# Set the following variable to the list of binary file suffixes (extensions)
#****************************************************************************
#binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK'
#ICU specific binary files
#****************************************************************************
binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML'
data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*'
#****************************************************************************
# Function: usage
# Description: Prints out text that describes how to call this script
# Input: None
# Output: None
#****************************************************************************
usage()
{
echo "Enter archive filename as a parameter: $0 icu-archive.tar"
}
#****************************************************************************
# first make sure we at least one arg and it's a file we can read
#****************************************************************************
# check for no arguments
if [ $# -eq 0 ]; then
usage
exit
fi
# tar file is argument 1
tar_file=$1
# check that the file is valid
if [ ! -r $tar_file ]; then
echo "$tar_file does not exist or cannot be read."
usage
exit
fi
#****************************************************************************
# Determine which directories in the data_files list
# are included in the provided archive
#****************************************************************************
echo "Finding data_files ..."
for data_dir in $data_files
do
if (pax -f $tar_file $data_dir >/dev/null 2>&1)
then
ebcdic_data="$ebcdic_data `echo $data_dir`";
fi
done
#****************************************************************************
# Extract files. We do this in two passes. One pass for 819 files and a
# second pass for 37 files
#****************************************************************************
echo ""
echo "Extracting from $tar_file ..."
echo ""
# extract everything as iso-8859-1 except these directories
pax -C 819 -rcvf $tar_file $ebcdic_data
# extract files while converting them to EBCDIC
echo ""
echo "Extracting files which must be in ibm-37 ..."
echo ""
pax -C 37 -rvf $tar_file $ebcdic_data
#****************************************************************************
# For files we have restored as CCSID 37, check the BOM to see if they
# should be processed as 819. Also handle files with special paths. Files
# that match will be added to binary files lists. The lists will in turn
# be processed to restore files as 819.
#****************************************************************************
echo ""
echo "Determining binary files by BOM ..."
echo ""
bin_count=0
# Process BOMs
for file in `find ./icu \( -name \*.txt -print \)`; do
bom8=`head -n 1 $file|\
od -t x1|\
head -n 1|\
sed 's/ */ /g'|\
cut -f2-4 -d ' '|\
tr 'A-Z' 'a-z'`;
#Find a converted UTF-8 BOM
if [ "$bom8" = "057 08b 0ab" -o "$bom8" = "57 8b ab" ]
then
file="`echo $file | cut -d / -f2-`"
if [ `echo $binary_files | wc -w` -lt 200 ]
then
bin_count=`expr $bin_count + 1`
binary_files="$binary_files $file";
else
echo "Restoring binary files by BOM ($bin_count)..."
rm $binary_files;
pax -C 819 -rvf $tar_file $binary_files;
echo "Determining binary files by BOM ($bin_count)..."
binary_files="$file";
bin_count=`expr $bin_count + 1`
fi
fi
done
# Process special paths
for i in $(pax -f $tar_file 2>/dev/null)
do
case $i in
*/)
# then this entry is a directory
;;
*.*)
# then this entry has a dot in the filename
for j in $binary_suffixes
do
suf=${i#*.*}
if [ "$suf" = "$j" ]
then
if [ `echo $binary_files | wc -w` -lt 200 ]
then
binary_files="$binary_files $i";
bin_count=`expr $bin_count + 1`
else
echo "Restoring binary files by special paths ($bin_count) ..."
rm $binary_files;
pax -C 819 -rvf $tar_file $binary_files;
echo "Determining binary files by special paths ($bin_count) ..."
binary_files="$i";
bin_count=`expr $bin_count + 1`
fi
break
fi
done
;;
*)
# then this entry does not have a dot in it
;;
esac
done
# now see if a re-extract of binary files is necessary
if [ `echo $binary_files | wc -w` -gt 0 ]
then
echo "Restoring binary files ($bin_count) ..."
rm $binary_files
pax -C 819 -rvf $tar_file $binary_files
fi
#****************************************************************************
# Generate and run the configure script
#****************************************************************************
echo ""
echo "Generating qsh compatible configure ..."
echo ""
sed -f icu/as_is/os400/convertConfigure.sed icu/source/configure > icu/source/configureTemp
del -f icu/source/configure
mv icu/source/configureTemp icu/source/configure
chmod 755 icu/source/configure
echo ""
echo "$0 has completed extracting ICU from $tar_file - $bin_count binary files extracted."

447
icu4c.css Normal file
View file

@ -0,0 +1,447 @@
/*
* Default CSS style sheet for the ICU4C Open Source readme
* Copyright (C) 2005-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*/
/* Global styles */
body,p,li,ol,ul,th,td {
font-size: 10pt;
font-family: "Arial", "Helvetica", sans-serif;
}
body {
margin: 1em;
}
body.draft {
background-image: url(images/draftbg.png);
}
.mainbody {
padding: 1em;
}
/*
* Customize the headers to have less space around them than usual
*/
h1 {
margin-bottom: .5em;
margin-top: .5em;
padding-bottom: .5em;
padding-top: .5em;
font-weight: 700;
font-size: 20pt;
font-family: Georgia, "Times New Roman", Times, serif;
border-width: 2px;
border-style: solid;
text-align: center;
width: 100%;
font-size: 200%;
font-weight: bold;
}
h2 {
border-top: 2px solid #22d;
border-left: 2px solid #22d;
margin-bottom: 0.5em;
padding-left: 4px;
margin-top: 12pt;
font-weight: 700;
font-size: 2em;
font-family: Georgia, "Times New Roman", Times, serif;
background-color: #eee;
page-break-before: always;
}
h2 a {
text-decoration: none;
color: black;
}
h2 a:hover {
color: blue;
text-decoration: underline;
}
h3 {
border-top: 1px solid gray;
color: #1e1c46;
margin-bottom: 0pt;
margin-top: 12pt;
padding-left: 0;
margin-left: 1em;
margin-top: 0.2em;
padding-bottom: 0.4em;
font-size: 1.5em;
font-family: Georgia, "Times New Roman", Times, serif;
}
h3 a {
text-decoration: none;
color: black;
}
h3 a:hover {
color: blue;
text-decoration: underline;
}
h4 {
margin-left: 1.5em;
margin-bottom: 0pt;
margin-top: 12pt;
font-size: 1.0em;
font-weight: bolder;
font-family: Georgia, "Times New Roman", Times, serif;
}
h4 a {
text-decoration: none;
color: black;
}
h4 a:hover {
color: blue;
text-decoration: underline;
}
h5, h6 {
margin-left: 1.8em;
margin-bottom: 0pt;
margin-top: 12pt;
padding-left: 0.75em;
font-size: x-small;
font-family: Georgia, "Times New Roman", Times, serif;
}
p,pre,table,ul,ol,dl {
margin-left: 2em;
}
/*
* Navigation sidebar on the left hand of most pages
*/
td.sidebar1 {
background-color: #99CCFF;
font-weight: 700;
margin-top: 0px;
margin-bottom: 0px;
padding-top: 1em;
padding-left: 0.2em;
white-space: nowrap;
}
td.sidebar2 {
background-color: #99CCFF;
margin-top: 0px;
margin-bottom: 0px;
margin-left: 0px;
padding-top: 1px;
padding-bottom: 1px;
padding-left: 1px;
padding-right: 0.5em;
white-space: nowrap;
text-decoration: none;
display: block;
}
td.sidebar2:hover {
background-color: #EEEEFF;
padding-top: 1px;
padding-bottom: 1px;
padding-left: 1px;
padding-right: 0.5em;
}
a.sidebar2 {
text-decoration: none;
display: block;
width: 100%;
}
a.sidebar2:link {
color: #000099;
display: block;
}
a.sidebar2:hover {
background-color: #EEEEFF;
display: block;
}
.underlinehover:hover {
background-color: #EEEEFF;
text-decoration: underline;
}
/* This is the faded header at the top */
td.fadedtop {
background-color: #006699;
background-image: url(http://www.icu-project.org/images/gr100.gif);
}
/* Related site on the left */
p.relatedsite {
color: White;
font-weight: 700;
font-size: 10pt;
margin-top: 1em;
margin-bottom: 0;
padding-left: 0.2em;
white-space: nowrap;
}
/* Related site on the left */
p.sidebar3 {
margin-top: 0.75em;
margin-bottom: 0;
padding-left: 0.8em;
}
a.sidebar3 {
font-size: 0.9em;
text-decoration: none;
}
a.sidebar3:link {
text-decoration: none;
color: White;
}
a.sidebar3:hover {
text-decoration: underline;
}
/* FAQ */
li.faq_contents {
font-weight: 500;
}
p.faq_q {
font-weight: 700;
margin-bottom: 0px;
}
p.faq_a {
margin-top: 0px;
}
/* News items */
table.newsItem {
padding-left: 1em;
padding-right: 1em;
border-width: medium;
}
th.newsItem {
background-color: #666666;
color: White;
}
td.newsItem {
background-color: #CCCCCC;
}
td.release-line,th.release-line {
padding-left: 0.5em;
padding-right: 0.5em;
white-space: nowrap;
border: 1px;
}
.note {
font-style: italic;
font-size: small;
margin-left: 1em;
}
samp {
margin-left: 1em;
margin-right: 2em;
border-style: groove;
padding: 1em;
display: block;
background-color: #EEEEEE
}
table.rtable caption {
margin-left: 2px;
margin-right: 2px;
padding: 3px;
font-weight: bold;
background-color: #dee2ff;
text-align: left;
}
table.rtable tr th {
background-color: #dee2ff;
text-align: left;
}
table.rtable tr td {
background-color: #c0c0fd;
padding: 3px;
}
table.rtable tr.broken td {
background-color: #fbb;
border: 1px dashed gray;
padding: 3px;
font-weight: bold;
}
table.rtable tr.rarely td {
background-color: #efe9c2;
padding: 3px;
font-style: italic;
}
/* APIChangeReport specific things */
.row0 {
background-color: white;
}
.row1 {
background-color: #dfd;
}
.verchange {
color: red;
font-weight: bold;
font-size: large;
}
.stabchange {
color: red;
font-size: large;
}
.bigwarn {
color: red;
background-color: white;
font-size: x-large;
margin: 0.5 em;
}
table.genTable {
border-collapse: collapse;
border: 1px solid black;
}
/* 'everything inc version */
table.gentable td {
border: 1px solid gray;
padding: 0.25em;
font-size: small;
}
/* not version */
table.genTable td.file,
table.genTable td.proto {
border: none;
font-size: medium;
}
table.genTable td.file {
font-family: monospace;
font-weight: bold;
}
div.other .row0 {
background-color: white;
}
div.other .row1 {
background-color: #ddf;
}
table.docTable {
border-collapse: collapse;
border: 1px solid black;
}
/* 'everything inc version */
table.docTable td,
table.docTable th {
border: 1px solid gray;
padding: 0.25em;
font-size: small;
}
/* not version */
table.docTable td.file,
table.docTable td.proto {
border: none;
font-size: medium;
}
table.docTable td.file {
font-family: monospace;
font-weight: bold;
}
abbr {
border-bottom: 1px dashed #0B0;
}
h2.TOC {
page-break-before: auto;
}
body.readme {
}
caption {
font-weight: bold;
text-align: left
}
div.indent {
margin-left: 2em
}
ul.TOC {
list-style-type: none;
padding-left: 1em;
font-size: larger;
}
ul.TOC li a {
font-weight: bold;
}
ul.TOC li ul li a {
font-weight: normal;
list-style-type: none;
font-size: small;
}
ul.TOC li ul {
margin-left: 0;
padding-left: 2em;
font-weight: normal;
list-style-type: none;
}
pre.samp,samp {
margin-left: 1em;
border-style: groove;
padding: 1em;
display: block;
background-color: #EEEEEE
}

51
license.html Normal file
View file

@ -0,0 +1,51 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii"></meta>
<title>ICU License - ICU 1.8.1 and later</title>
</head>
<body BGCOLOR="#ffffff">
<h2>ICU License - ICU 1.8.1 and later</h2>
<p>COPYRIGHT AND PERMISSION NOTICE</p>
<p>
Copyright (c) 1995-2009 International Business Machines Corporation and others
</p>
<p>
All rights reserved.
</p>
<p>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Software, and to permit persons
to whom the Software is furnished to do so, provided that the above
copyright notice(s) and this permission notice appear in all copies
of the Software and that both the above copyright notice(s) and this
permission notice appear in supporting documentation.
</p>
<p>
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL
THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM,
OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
USE OR PERFORMANCE OF THIS SOFTWARE.
</p>
<p>
Except as contained in this notice, the name of a copyright holder shall not be
used in advertising or otherwise to promote the sale, use or other dealings in
this Software without prior written authorization of the copyright holder.
</p>
<hr>
<p><small>
All trademarks and registered trademarks mentioned herein are the property of their respective owners.
</small></p>
</body>
</html>

159
packaging/PACKAGES Normal file
View file

@ -0,0 +1,159 @@
Copyright (C) 2000-2003, International Business Machines
Corporation and others. All Rights Reserved.
ICU is packaged into a number of small, interdependent packages. This
file describes what these packages are, what their name should be
like, and what their contents are. It is useful as a reference and a
guide when packaging ICU on a new system.
+ List of ICU packages.
ICU is distributed as the following packages:
- ICU libraries. This package contains the runtime libraries needed by
applications that use ICU. All the other packages require this package
to be installed.
- ICU. This package contains the converters data, the timezones data,
and all the ICU tools.
- ICU locales. This package adds locales and break data.
- ICU development. This package contains the files necessary to build
applications that use ICU, i.e. header files, links to shared
libraries used by the linker, static libraries, etc... It also
contains sample applications and documentation.
- ICU docs. This package contains further documentation for ICU,
including a complete API reference.
- ICU data. This package contains the source for the compiled data
contained by the ICU package.
- ICU international data. This package contains the source for the
compiled data contained by the ICU locales package.
In this file, we will refer to Autoconf variables as in $(bindir). In
addition to these, we will use the following variables to denote
ICU-specific directories or information:
VERSION ICU's dotted version number, e.g. 1.6.0.1 as of this
writing.
ICUDATADIR The directory where portable ICU data are. This is
defined as $(datadir)/icu/$(VERSION).
ICULIBDIR The directory where platform-specific ICU data
are. This is defined as $(libdir)/icu/$(VERSION).
ICUSYSCONFDIR The directory where ICU configuration files are. This
is defined as $(sysconfdir)/icu.
When referring to libraries, .so will be used to denote the extension
of a shared library, and .a to denote the extension of a static
library. These extensions will actually be different on some platforms.
+ Configuration and compilation of ICU
ICU should be configured with the following options:
--with-data-packaging=files
--disable-rpath
--enable-shared
--enable-static
--without-samples
in addition to platform-specific settings (like a specific mandir or
sysconfdir). Note that the use of --disable-rpath assumes that the
packaging is made for a standard location, or that the package
installation/deinstallation will correctly manage the configuration
of the system's dyanmic loader. This is the right way of doing things.
The configure script invokation should also be done with
CFLAGS="-O2"
set, as in:
$ CFLAGS="-O2" ./configure ...
The files packaging mode is chosen because it offers the maximum
flexibility. Packages can be split easily, and system administrators
can add converters, aliases, and other resources with little
effort. Ideally, the ICU build will be modified to allow for distributing a
libicudata.so with all the converters and locales, but indexes and aliases
as separate files. But for now, this is the easiest way to get started.
+ The ICU libraries package
The ICU libraries package is typically named `libicuXX' where XX is
the major number of ICU's libraries. This number is ICU's version
number multiplied by 10 and rounded down to the nearest integer (it is
also the value of the LIB_VERSION_MAJOR configure substitution
variable). For example, for ICU 1.6.0.1, it is 16, so the package name
is `libicu16'. The major version is part of the package name to allow
for the simultaneous installation of different ICU releases.
This package contains:
- All the shared libraries, and their major number symbolic link, but
not the .so symbolic link that is only used at link time (this one is
part of the development package). These are $(libdir)/libicu*.so.* and
$(libdir)/libustdio.so.* at the time of this writing.
+ The ICU package
The ICU package is simply named `icu'. It provides data used by the ICU
libraries package and commands to create and manipulate that data.
This package contains:
- The Unicode data files (uprops.dat and unames.dat as of this writing).
- The time zones data files (tz.dat).
- All the binary data files for converters (.cnv files).
- All the ICU commands.
- The manual pages for ICU commands and file formats.
+ The ICU locales package
The ICU locales package is named `icu-locales'. It provides data used by
internationalization support in ICU.
This package contains:
- All the data for locales in ICU (.dat files).
- All the break data for specific locales (.brk files).
+ The ICU development package
The ICU developpment package is named `libicu-dev'. It provides all
the files necessary to write applications that use ICU, along with
examples and some documentation.
This package contains:
- The /usr/include/unicode directory which contains all the ICU
headers.
- The .so symbolic links used by the linker to link against the
latest version of the libraries.
- A sample Makefile fragment that can be included by applications
using ICU, to faciliate their building, along with a platform-specific
configuration file included by this fragment.
- The sample applications from the ICU source tree, in an appropriate
location for the system that the package is installed on (for example,
on Debian, in /usr/share/doc/libicu-dev/examples).
This package depends on the ICU libraries package with the exact same
version, since it provides .so symbolic links to the latest libraries.
+ The ICU docs package
The ICU docs package is named `libicu-doc'. It contains the files
generated by doxygen when the `make doc' command is executed, in a
location appropriate for the system that the package is installed on.
+ The ICU data package
The ICU data package is named `icu-data'. It contains source files for
the data found in the ICU package. These files are installed in
$(ICUDATADIR).
+ The ICU international data package
The ICU data package is named `icu-i18ndata'. It contains source files for
the dat founf in the ICU locales package. These files are installed in
$(ICUDATADIR).
----
Yves Arrouye <yves@realnames.com>

13
packaging/README Normal file
View file

@ -0,0 +1,13 @@
Copyright (C) 2000-2003, International Business Machines
Corporation and others. All Rights Reserved.
This directory contains information, input files and scripts for
packaging ICU using specific packaging tools. We assume that the
packager is familiar with the tools and procedures needed to build a
package for a given packaging method (for example, how to use
dpkg-buildpackage(1) on Debian GNU/Linux, or rpm(8) on distributions that
use RPM packages).
Please read the file PACKAGES if you are interested in packaging ICU
yourself. It describes what the different packages should be, and what
their contents are.

228
packaging/rpm/icu.spec Normal file
View file

@ -0,0 +1,228 @@
# Copyright (C) 2000-2005, International Business Machines
# Corporation and others. All Rights Reserved.
#
# RPM specification file for ICU.
#
# Neal Probert <nprobert@walid.com> is the current maintainer.
# Yves Arrouye <yves@realnames.com> is the original author.
# This file can be freely redistributed under the same license as ICU.
Name: icu
Version: 3.4
Release: 1
Requires: libicu34 >= %{version}
Summary: International Components for Unicode
Packager: Ian Holsman (CNET Networks) <ianh@cnet.com>
Copyright: X License
Group: System Environment/Libraries
Source: icu-%{version}.tgz
BuildRoot: /var/tmp/%{name}-%{version}
%description
ICU is a set of C and C++ libraries that provides robust and full-featured
Unicode and locale support. The library provides calendar support, conversions
for many character sets, language sensitive collation, date
and time formatting, support for many locales, message catalogs
and resources, message formatting, normalization, number and currency
formatting, time zones support, transliteration, word, line and
sentence breaking, etc.
This package contains the Unicode character database and derived
properties, along with converters and time zones data.
This package contains the runtime libraries for ICU. It does
not contain any of the data files needed at runtime and present in the
`icu' and `icu-locales` packages.
%package -n libicu34
Summary: International Components for Unicode (libraries)
Group: Development/Libraries
%description -n libicu34
ICU is a set of C and C++ libraries that provides robust and full-featured
Unicode support. This package contains the runtime libraries for ICU. It does
not contain any of the data files needed at runtime and present in the
`icu' and `icu-locales` packages.
%package -n libicu-devel
Summary: International Components for Unicode (development files)
Group: Development/Libraries
Requires: libicu34 = %{version}
%description -n libicu-devel
ICU is a set of C and C++ libraries that provides robust and full-featured
Unicode support. This package contains the development files for ICU.
%package locales
Summary: Locale data for ICU
Group: System Environment/Libraries
Requires: libicu34 >= %{version}
%description locales
The locale data are used by ICU to provide localization (l10n),
internationalization (i18n) and timezone support to ICU applications.
This package also contains break data for various languages,
and transliteration data.
%post
# Adjust the current ICU link in /usr/lib/icu
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
cd /usr/lib/icu
rm -f /usr/lib/icu/current
if test x"$icucurrent" != x
then
ln -s "$icucurrent" current
fi
#ICU_DATA=/usr/share/icu/%{version}
#export ICU_DATA
%preun
# Adjust the current ICU link in /usr/lib/icu
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n -e '/\/%{version}\//d' -e 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
cd /usr/lib/icu
rm -f /usr/lib/icu/current
if test x"$icucurrent" != x
then
ln -s "$icucurrent" current
fi
%post -n libicu34
ldconfig
# Adjust the current ICU link in /usr/lib/icu
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
cd /usr/lib/icu
rm -f /usr/lib/icu/current
if test x"$icucurrent" != x
then
ln -s "$icucurrent" current
fi
%preun -n libicu34
# Adjust the current ICU link in /usr/lib/icu
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n -e '/\/%{version}\//d' -e 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
cd /usr/lib/icu
rm -f /usr/lib/icu/current
if test x"$icucurrent" != x
then
ln -s "$icucurrent" current
fi
%prep
%setup -q -n icu
%build
cd source
chmod a+x ./configure
CFLAGS="-O3" CXXFLAGS="-O" ./configure --prefix=/usr --sysconfdir=/etc --with-data-packaging=files --enable-shared --enable-static --disable-samples
echo 'CPPFLAGS += -DICU_DATA_DIR=\"/usr/share/icu/%{version}\"' >> icudefs.mk
make RPM_OPT_FLAGS="$RPM_OPT_FLAGS"
%install
rm -rf $RPM_BUILD_ROOT
cd source
make install DESTDIR=$RPM_BUILD_ROOT
%files
%defattr(-,root,root)
%doc readme.html
%doc license.html
/usr/share/icu/%{version}/license.html
/usr/share/icu/%{version}/icudt34l/*.cnv
/usr/share/icu/%{version}/icudt34l/*.icu
/usr/share/icu/%{version}/icudt34l/*.spp
/usr/bin/derb
/usr/bin/genbrk
/usr/bin/gencnval
/usr/bin/genrb
/usr/bin/icu-config
/usr/bin/makeconv
/usr/bin/pkgdata
/usr/bin/uconv
/usr/sbin/decmn
/usr/sbin/genccode
/usr/sbin/gencmn
/usr/sbin/gensprep
/usr/sbin/genuca
/usr/sbin/icuswap
/usr/share/icu/%{version}/mkinstalldirs
/usr/man/man1/derb.1.*
/usr/man/man1/gencnval.1.*
/usr/man/man1/genrb.1.*
/usr/man/man1/icu-config.1.*
/usr/man/man1/makeconv.1.*
/usr/man/man1/pkgdata.1.*
/usr/man/man1/uconv.1.*
/usr/man/man8/decmn.8.*
/usr/man/man8/genccode.8.*
/usr/man/man8/gencmn.8.*
/usr/man/man8/gensprep.8.*
/usr/man/man8/genuca.8.*
%files -n icu-locales
/usr/share/icu/%{version}/icudt34l/*.brk
/usr/share/icu/%{version}/icudt34l/*.res
/usr/share/icu/%{version}/icudt34l/coll/*.res
/usr/share/icu/%{version}/icudt34l/rbnf/*.res
/usr/share/icu/%{version}/icudt34l/translit/*.res
%files -n libicu34
%doc license.html
/usr/lib/libicui18n.so.34
/usr/lib/libicui18n.so.34.0
/usr/lib/libicutu.so.34
/usr/lib/libicutu.so.34.0
/usr/lib/libicuuc.so.34
/usr/lib/libicuuc.so.34.0
/usr/lib/libicudata.so.34
/usr/lib/libicudata.so.34.0
/usr/lib/libicuio.so.34
/usr/lib/libicuio.so.34.0
/usr/lib/libiculx.so.34
/usr/lib/libiculx.so.34.0
/usr/lib/libicule.so.34
/usr/lib/libicule.so.34.0
%files -n libicu-devel
%doc readme.html
%doc license.html
/usr/lib/libicui18n.so
/usr/lib/libsicui18n.a
/usr/lib/libicuuc.so
/usr/lib/libsicuuc.a
/usr/lib/libicutu.so
/usr/lib/libsicutu.a
/usr/lib/libicuio.so
/usr/lib/libsicuio.a
/usr/lib/libicudata.so
/usr/lib/libsicudata.a
/usr/lib/libicule.so
/usr/lib/libsicule.a
/usr/lib/libiculx.so
/usr/lib/libsiculx.a
/usr/include/unicode/*.h
/usr/include/layout/*.h
/usr/lib/icu/%{version}/Makefile.inc
/usr/lib/icu/Makefile.inc
/usr/share/icu/%{version}/config
/usr/share/doc/icu-%{version}/*
%changelog
* Mon Jun 07 2004 Alexei Dets <adets@idsk.com>
- update to 3.0
* Tue Aug 16 2003 Steven Loomis <srl@jtcsv.com>
- update to 2.6.1 - include license
* Thu Jun 05 2003 Steven Loomis <srl@jtcsv.com>
- Update to 2.6
* Fri Dec 27 2002 Steven Loomis <srl@jtcsv.com>
- Update to 2.4 spec
* Fri Sep 27 2002 Steven Loomis <srl@jtcsv.com>
- minor updates to 2.2 spec. Rpath is off by default, don't pass it as an option.
* Mon Sep 16 2002 Ian Holsman <ian@holsman.net>
- update to icu 2.2

1929
readme.html Normal file

File diff suppressed because it is too large Load diff

230
source/Doxyfile.in Normal file
View file

@ -0,0 +1,230 @@
# Doxyfile 1.3.7
# ********************************************************************
# * COPYRIGHT:
# * Copyright (c) 2004-2009, International Business Machines Corporation
# * and others. All Rights Reserved.
# ********************************************************************
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
PROJECT_NAME = "ICU @VERSION@"
PROJECT_NUMBER =
OUTPUT_DIRECTORY = doc
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
USE_WINDOWS_ENCODING = YES
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = NO
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = YES
MULTILINE_CPP_IS_BRIEF = NO
DETAILS_AT_TOP = NO
INHERIT_DOCS = YES
DISTRIBUTE_GROUP_DOC = YES
TAB_SIZE = 8
ALIASES = "memo=\par Note:\n" \
"draft=\xrefitem draft \"Draft\" \"Draft List\" This API may be changed in the future versions and was introduced in" \
"stable=\xrefitem stable \"Stable\" \"Stable List\"" \
"deprecated=\xrefitem deprecated \"Deprecated\" \"Deprecated List\"" \
"obsolete=\xrefitem obsolete \"Obsolete\" \"Obsolete List\"" \
"system=\xrefitem system \"System\" \"System List\" \n Do not use unless you know what you are doing." \
"internal=\xrefitem internal \"Internal\" \"Internal List\" Do not use. This API is for internal use only."
OPTIMIZE_OUTPUT_FOR_C = YES
OPTIMIZE_OUTPUT_JAVA = NO
SUBGROUPING = YES
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = NO
EXTRACT_PRIVATE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
HIDE_FRIEND_COMPOUNDS = NO
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = YES
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_BY_SCOPE_NAME = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
# docset
GENERATE_DOCSET = NO
DOCSET_FEEDNAME = "ICU @VERSION@"
DOCSET_BUNDLE_ID = org.icu-project.icu4c
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = NO
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = ./common/unicode/platform.h @srcdir@/common/unicode @srcdir@/i18n/unicode @srcdir@/io/unicode @srcdir@/layout/LEFontInstance.h @srcdir@/layout/LEGlyphStorage.h @srcdir@/layout/LELanguages.h @srcdir@/layout/LEScripts.h @srcdir@/layout/LESwaps.h @srcdir@/layout/LETypes.h @srcdir@/layout/LayoutEngine.h @srcdir@/layoutex/layout
FILE_PATTERNS = *.h
RECURSIVE = NO
EXCLUDE = @srcdir@/common/unicode/urename.h @srcdir@/common/unicode/udraft.h @srcdir@/common/unicode/udeprctd.h @srcdir@/common/unicode/uobslete.h @srcdir@/common/unicode/ppalmos.h
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS = config*.h
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_SOURCE_FILES = NO
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = YES
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = YES
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
HTML_ALIGN_MEMBERS = YES
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
BINARY_TOC = NO
TOC_EXPAND = NO
DISABLE_INDEX = NO
ENUM_VALUES_PER_LINE = 4
GENERATE_TREEVIEW = NO
TREEVIEW_WIDTH = 250
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = NO
PAPER_TYPE = a4wide
EXTRA_PACKAGES =
LATEX_HEADER =
PDF_HYPERLINKS = NO
USE_PDFLATEX = NO
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_SCHEMA =
XML_DTD =
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = YES
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = YES
EXPAND_ONLY_PREDEF = YES
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED = U_EXPORT2= U_STABLE= U_DRAFT= U_INTERNAL= U_SYSTEM= U_DEPRECATED= U_OBSOLETE= U_CALLCONV= U_CDECL_BEGIN= U_CDECL_END= U_NO_THROW= U_NAMESPACE_BEGIN= U_NAMESPACE_END=
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE = "@builddir@/doc/html/icudocs.tag"
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = YES
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = NO
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
UML_LOOK = NO
TEMPLATE_RELATIONS = NO
INCLUDE_GRAPH = YES
INCLUDED_BY_GRAPH = YES
CALL_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DOT_IMAGE_FORMAT = png
DOT_PATH =
DOTFILE_DIRS =
MAX_DOT_GRAPH_WIDTH = 1024
MAX_DOT_GRAPH_HEIGHT = 1024
MAX_DOT_GRAPH_DEPTH = 0
GENERATE_LEGEND = YES
DOT_CLEANUP = YES
#---------------------------------------------------------------------------
# Configuration::additions related to the search engine
#---------------------------------------------------------------------------
SEARCHENGINE = YES

247
source/Makefile.in Normal file
View file

@ -0,0 +1,247 @@
#******************************************************************************
#
# Copyright (C) 1998-2009, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
## Top-level Makefile.in for ICU
## Stephen F. Booth
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = .
include $(top_builddir)/icudefs.mk
docdir = $(datadir)/doc
docsubdir = $(PACKAGE)$(ICULIBDASHSUFFIX)/html
docsubsrchdir = $(docsubdir)/search
docfilesdir = doc/html
docfiles = $(docfilesdir)/*.gif $(docfilesdir)/*.png $(docfilesdir)/*.html $(docfilesdir)/*.css $(docfilesdir)/*.tag $(docfilesdir)/installdox
docsrchdir = $(docfilesdir)/search
docsrchfiles = $(docsrchdir)/*
##
## Build directory information
subdir = .
#AUTOCONF = @AUTOCONF@
## Optional directory setup
@LAYOUT_TRUE@LAYOUT = layout layoutex
@ICUIO_TRUE@ICUIO = io
@EXTRAS_TRUE@EXTRA = extra
@TESTS_TRUE@TEST = test
@SAMPLES_TRUE@SAMPLE = samples
DOXYGEN = @DOXYGEN@
DOCZIP = icu-docs.zip
## Files to remove for 'make clean'
CLEANFILES = *~
## Files built (autoconfed) and installed
INSTALLED_BUILT_FILES = $(top_builddir)/config/Makefile.inc $(top_builddir)/config/pkgdata.inc $(top_builddir)/config/icu-config @platform_make_fragment@ $(EXTRA_DATA:%=$(DESTDIR)$(pkglibdir)/%)
## Files built (autoconfed) but not installed
LOCAL_BUILT_FILES = icudefs.mk config/icucross.mk
DOCDIRS = common i18n
SUBDIRS = stubdata common i18n $(LAYOUT) tools data $(ICUIO) $(EXTRA) $(SAMPLE) $(TEST)
SECTION = 1
MANX_FILES = config/icu-config.$(SECTION)
ALL_MAN_FILES = $(MANX_FILES)
## Extra files to install [nothing at present]
EXTRA_DATA =
## List of phony targets
.PHONY : all all-local all-recursive install install-local install-udata install-udata-files install-udata-dlls \
install-recursive clean clean-local clean-recursive distclean \
distclean-local distclean-recursive doc dist dist-local dist-recursive \
check check-local check-recursive clean-recursive-with-twist install-icu \
doc install-doc tests
## Clear suffix list
.SUFFIXES :
## List of standard targets
all: all-local all-recursive
install: install-recursive install-local
clean: clean-recursive-with-twist clean-local
distclean : distclean-recursive distclean-local
dist: dist-recursive dist-local
check: all check-recursive
check-recursive: all
ifeq ($(DOXYGEN),)
doc:
@echo you need Doxygen to generate documentation. Doxygen can be found on the Web
@echo at http://www.doxygen.org/
else
doc: doc/html/index.html
doc/html/index.html: Doxyfile $(wildcard ./common/unicode/platform.h $(srcdir)/common/unicode/*.h $(srcdir)/i18n/unicode/*.h $(srcdir)/layout/unicode/*.h $(srcdir)/io/unicode/*.h)
$(DOXYGEN)
Doxyfile: $(srcdir)/Doxyfile.in
CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(DOCZIP): doc
-$(RMV) $(DOCZIP)
( cd doc/html ; zip -r ../../$(DOCZIP) * )
endif
LOCAL_SUBDIRS = $(SUBDIRS)
CLEAN_FIRST_SUBDIRS = tools
$(LIBDIR) $(BINDIR):
-$(MKINSTALLDIRS) $@
## Recursive targets
all-recursive install-recursive clean-recursive distclean-recursive dist-recursive check-recursive: $(LIBDIR) $(BINDIR)
@dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
list='$(LOCAL_SUBDIRS)'; for subdir in $$list; do \
echo "$(MAKE)[$(MAKELEVEL)]: Making \`$$target' in \`$$subdir'"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-local"; \
else \
local_target="$$target"; \
fi; \
(cd $$subdir && $(MAKE) RECURSIVE=YES $$local_target) || exit; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) "$$target-local" || exit; \
fi
clean-recursive-with-twist:
$(MAKE) clean-recursive LOCAL_SUBDIRS='$(CLEAN_FIRST_SUBDIRS) $(filter-out $(CLEAN_FIRST_SUBDIRS),$(LOCAL_SUBDIRS))'
all-local: $(srcdir)/configure $(LOCAL_BUILT_FILES) $(INSTALLED_BUILT_FILES)
install-local: install-icu install-manx
install-icu: $(INSTALLED_BUILT_FILES)
@$(MKINSTALLDIRS) $(DESTDIR)$(pkgdatadir)/config
@$(MKINSTALLDIRS) $(DESTDIR)$(pkglibdir)
@$(MKINSTALLDIRS) $(DESTDIR)$(bindir)
@$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
$(INSTALL_DATA) @platform_make_fragment@ $(DESTDIR)$(pkgdatadir)/config/@platform_make_fragment_name@
$(INSTALL_SCRIPT) $(top_srcdir)/mkinstalldirs $(DESTDIR)$(pkgdatadir)/mkinstalldirs
$(INSTALL_SCRIPT) $(top_srcdir)/install-sh $(DESTDIR)$(pkgdatadir)/install-sh
$(INSTALL_DATA) $(top_srcdir)/../license.html $(DESTDIR)$(pkgdatadir)/license.html
$(INSTALL_SCRIPT) $(top_builddir)/config/icu-config $(DESTDIR)$(bindir)/icu-config
$(INSTALL_DATA) $(top_builddir)/config/Makefile.inc $(DESTDIR)$(pkglibdir)/Makefile.inc
$(INSTALL_DATA) $(top_builddir)/config/pkgdata.inc $(DESTDIR)$(pkglibdir)/pkgdata.inc
cd $(DESTDIR)$(pkglibdir)/..; \
$(RM) current && ln -s $(VERSION) current; \
$(RM) Makefile.inc && ln -s current/Makefile.inc Makefile.inc; \
$(RM) pkgdata.inc && ln -s current/pkgdata.inc pkgdata.inc
ifeq ($(DOXYGEN),)
install-doc:
else
install-doc: doc
$(RM) -r $(DESTDIR)$(docdir)/$(docsubdir)
$(MKINSTALLDIRS) $(DESTDIR)$(docdir)/$(docsubsrchdir)
$(INSTALL_DATA) $(docfiles) $(DESTDIR)$(docdir)/$(docsubdir)
$(INSTALL_DATA) $(docsrchfiles) $(DESTDIR)$(docdir)/$(docsubsrchdir)
endif
$(DESTDIR)$(pkglibdir)/%: $(top_srcdir)/../data/%
$(INSTALL_DATA) $< $@
# Build the tests, but don't run them.
tests: all
$(MAKE) -C $(top_builddir)/test
dist-local:
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
$(RMV) Doxyfile doc $(DOCZIP)
distclean-local: clean-local
$(RMV) $(top_builddir)/config/Makefile.inc $(top_builddir)/config/pkgdata.inc $(top_builddir)/config/icu-config
$(RMV) config.cache config.log config.status $(top_builddir)/config/icucross.mk
$(RMV) Makefile config/Makefile icudefs.mk $(LIBDIR) $(BINDIR)
check-local: $(top_builddir)/config/icu-config $(top_builddir)/config/Makefile.inc $(top_builddir)/config/pkgdata.inc
@echo verifying that icu-config --selfcheck can operate
@test "passed" = "$(shell $(top_builddir)/config/icu-config --selfcheck 2>&1)" || (echo "FAIL: icu-config could not run properly." ; exit 1)
@echo verifying that $(MAKE) -f Makefile.inc selfcheck can operate
@test "passed" = "$(shell $(MAKE) --no-print-directory -f $(top_builddir)/config/Makefile.inc SELFCHECK=1 selfcheck)" || (echo "FAIL: Makefile.inc could not run properly." ; exit 1 )
@echo "PASS: config selfcheck OK"
#$(srcdir)/configure : $(srcdir)/configure.in $(top_srcdir)/aclocal.m4
# cd $(srcdir) && $(AUTOCONF)
icudefs.mk: $(srcdir)/icudefs.mk.in $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
config/icucross.mk: $(top_builddir)/icudefs.mk $(top_builddir)/Makefile
@echo rebuilding $@
@(echo "CROSS_ICU_VERSION=$(VERSION)" ;\
echo "TOOLEXEEXT=$(EXEEXT)" \
) > $@
@(echo 'TOOLBINDIR=$$(cross_buildroot)/bin' ;\
echo 'TOOLLIBDIR=$$(cross_buildroot)/lib' ;\
echo "INVOKE=$(LDLIBRARYPATH_ENVVAR)=$(LIBRARY_PATH_PREFIX)"'$$(TOOLLIBDIR):$$(cross_buildroot)/stubdata:$$(cross_buildroot)/tools/ctestfw:$$$$'"$(LDLIBRARYPATH_ENVVAR)" ;\
echo "PKGDATA_INVOKE=$(LDLIBRARYPATH_ENVVAR)=$(LIBRARY_PATH_PREFIX)"'$$(cross_buildroot)/stubdata:$$(cross_buildroot)/tools/ctestfw:$$(TOOLLIBDIR):$$$$'"$(LDLIBRARYPATH_ENVVAR) "'$$'"(PKGDATA_INVOKE_OPTS)" ;\
echo ) >> $@
Makefile: $(srcdir)/Makefile.in icudefs.mk $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(top_builddir)/config/Makefile.inc: $(srcdir)/config/Makefile.inc.in $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(top_builddir)/config/pkgdata.inc: icudefs.mk $(top_builddir)/config/pkgdataMakefile
cd $(top_builddir)/config; \
$(MAKE) -f pkgdataMakefile
$(top_builddir)/config/pkgdataMakefile:
cd $(top_builddir) \
&& CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(top_builddir)/config/icu-config: $(top_builddir)/Makefile $(top_srcdir)/config/icu-config-top $(top_srcdir)/config/icu-config-bottom $(top_builddir)/config/Makefile.inc @platform_make_fragment@ $(top_srcdir)/config/make2sh.sed
-$(RMV) $@
$(INSTALL_SCRIPT) $(top_srcdir)/config/icu-config-top $@
chmod u+w $@
@echo "# Following from @platform_make_fragment@" >> $@
sed -f $(top_srcdir)/config/make2sh.sed < $(top_builddir)/config/Makefile.inc | grep -v '#M#' | uniq >> $@
sed -f $(top_srcdir)/config/make2sh.sed < @platform_make_fragment@ | grep -v '#M#' | uniq >> $@
cat $(top_srcdir)/config/icu-config-bottom >> $@
echo "# Rebuilt on "`date` >> $@
chmod u-w $@
config.status: $(srcdir)/configure $(srcdir)/common/unicode/uversion.h
@echo
@echo
@echo "*** config.status has become stale ***"
@echo " 'configure' and/or 'uversion.h' have changed, please"
@echo " do 'runConfigureICU' (or 'configure') again, as per"
@echo " the readme.html."
@echo
@echo
exit 1
install-manx: $(MANX_FILES)
$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
$(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
config/%.$(SECTION): $(srcdir)/config/%.$(SECTION).in
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status

483
source/aclocal.m4 vendored Normal file
View file

@ -0,0 +1,483 @@
# aclocal.m4 for ICU
# Copyright (c) 1999-2009, International Business Machines Corporation and
# others. All Rights Reserved.
# Stephen F. Booth
# @TOP@
# ICU_CHECK_MH_FRAG
AC_DEFUN(ICU_CHECK_MH_FRAG, [
AC_CACHE_CHECK(
[which Makefile fragment to use for ${host}],
[icu_cv_host_frag],
[
case "${host}" in
*-*-solaris*)
if test "$GCC" = yes; then
icu_cv_host_frag=mh-solaris-gcc
else
icu_cv_host_frag=mh-solaris
fi ;;
alpha*-*-linux-gnu)
if test "$GCC" = yes; then
icu_cv_host_frag=mh-alpha-linux-gcc
else
icu_cv_host_frag=mh-alpha-linux-cc
fi ;;
powerpc*-*-linux*)
if test "$GCC" = yes; then
icu_cv_host_frag=mh-linux
else
icu_cv_host_frag=mh-linux-va
fi ;;
*-*-linux*|*-pc-gnu) icu_cv_host_frag=mh-linux ;;
*-*-cygwin|*-*-mingw32)
if test "$GCC" = yes; then
AC_TRY_COMPILE([
#ifndef __MINGW32__
#error This is not MinGW
#endif], [], icu_cv_host_frag=mh-mingw, icu_cv_host_frag=mh-cygwin)
else
icu_cv_host_frag=mh-cygwin-msvc
fi ;;
*-*-*bsd*|*-*-dragonfly*) icu_cv_host_frag=mh-bsd-gcc ;;
*-*-aix*)
if test "$GCC" = yes; then
icu_cv_host_frag=mh-aix-gcc
else
icu_cv_host_frag=mh-aix-va
fi ;;
*-*-hpux*)
if test "$GCC" = yes; then
icu_cv_host_frag=mh-hpux-gcc
else
case "$CXX" in
*aCC) icu_cv_host_frag=mh-hpux-acc ;;
esac
fi ;;
*-*ibm-openedition*|*-*-os390*) icu_cv_host_frag=mh-os390 ;;
*-*-os400*) icu_cv_host_frag=mh-os400 ;;
*-apple-rhapsody*) icu_cv_host_frag=mh-darwin ;;
*-apple-darwin*) icu_cv_host_frag=mh-darwin ;;
*-*-beos|*-*-haiku) icu_cv_host_frag=mh-beos ;;
*-*-irix*) icu_cv_host_frag=mh-irix ;;
*-dec-osf*) icu_cv_host_frag=mh-alpha-osf ;;
*-*-nto*) icu_cv_host_frag=mh-qnx ;;
*-ncr-*) icu_cv_host_frag=mh-mpras ;;
*) icu_cv_host_frag=mh-unknown ;;
esac
]
)
])
# ICU_CONDITIONAL - similar example taken from Automake 1.4
AC_DEFUN(ICU_CONDITIONAL,
[AC_SUBST($1_TRUE)
if $2; then
$1_TRUE=
else
$1_TRUE='#'
fi])
# ICU_PROG_LINK - Make sure that the linker is usable
AC_DEFUN(ICU_PROG_LINK,
[
case "${host}" in
*-*-cygwin*|*-*-mingw*)
if test "$GCC" != yes && test -n "`link --version 2>&1 | grep 'GNU coreutils'`"; then
AC_MSG_ERROR([link.exe is not a valid linker. Your PATH is incorrect.
Please follow the directions in ICU's readme.])
fi;;
*);;
esac])
# AC_SEARCH_LIBS_FIRST(FUNCTION, SEARCH-LIBS [, ACTION-IF-FOUND
# [, ACTION-IF-NOT-FOUND [, OTHER-LIBRARIES]]])
# Search for a library defining FUNC, then see if it's not already available.
AC_DEFUN(AC_SEARCH_LIBS_FIRST,
[AC_PREREQ([2.13])
AC_CACHE_CHECK([for library containing $1], [ac_cv_search_$1],
[ac_func_search_save_LIBS="$LIBS"
ac_cv_search_$1="no"
for i in $2; do
LIBS="-l$i $5 $ac_func_search_save_LIBS"
AC_TRY_LINK_FUNC([$1],
[ac_cv_search_$1="-l$i"
break])
done
if test "$ac_cv_search_$1" = "no"; then
AC_TRY_LINK_FUNC([$1], [ac_cv_search_$1="none required"])
fi
LIBS="$ac_func_search_save_LIBS"])
if test "$ac_cv_search_$1" != "no"; then
test "$ac_cv_search_$1" = "none required" || LIBS="$ac_cv_search_$1 $LIBS"
$3
else :
$4
fi])
# Check if we can build and use 64-bit libraries
AC_DEFUN(AC_CHECK_64BIT_LIBS,
[
BITS_REQ=nochange
ENABLE_64BIT_LIBS=unknown
## revisit this for cross-compile.
AC_ARG_ENABLE(64bit-libs,
[ --enable-64bit-libs (deprecated, use --with-library-bits) build 64-bit libraries [default= platform default]],
[echo "note, use --with-library-bits instead of --*-64bit-libs"
case "${enableval}" in
no|false|32) with_library_bits=32; ;;
yes|true|64) with_library_bits=64else32 ;;
nochange) with_library_bits=nochange; ;;
*) AC_MSG_ERROR(bad value ${enableval} for '--*-64bit-libs') ;;
esac] )
AC_ARG_WITH(library-bits,
[ --with-library-bits=bits specify how many bits to use for the library (32, 64, 64else32, nochange) [default=nochange]],
[case "${withval}" in
""|nochange) BITS_REQ=$withval ;;
32|64|64else32) BITS_REQ=$withval ;;
*) AC_MSG_ERROR(bad value ${withval} for --with-library-bits) ;;
esac])
# don't use these for cross compiling
if test "$cross_compiling" = "yes" -a "${BITS_REQ}" != "nochange"; then
AC_MSG_ERROR([Don't specify bitness when cross compiling. See readme.html for help with cross compilation., and set compiler options manually.])
fi
DEFAULT_64BIT=no
AC_MSG_CHECKING([whether runnable 64 bit binaries are built by default])
AC_RUN_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
DEFAULT_64BIT=yes, DEFAULT_64BIT=no, DEFAULT_64BIT=unknown)
BITS_GOT=unknown
# 'OK' here means, we can exit any further checking, everything's copa
BITS_OK=yes
# do we need to check for buildable/runnable 32 or 64 bit?
BITS_CHECK_32=no
BITS_CHECK_64=no
# later, can we run the 32/64 bit binaries so made?
BITS_RUN_32=no
BITS_RUN_64=no
if test "$DEFAULT_64BIT" = "yes"; then
# we get 64 bits by default.
BITS_GOT=64
case "$BITS_REQ" in
32)
# need to look for 32 bit support.
BITS_CHECK_32=yes
# not copa.
BITS_OK=no;;
# everyone else is happy.
nochange) ;;
*) ;;
esac
elif test "$DEFAULT_64BIT" = "no"; then
# not 64 bit by default.
BITS_GOT=32
case "$BITS_REQ" in
64|64else32)
BITS_CHECK_64=yes
#BITS_CHECK_32=yes
BITS_OK=no;;
nochange) ;;
*) ;;
esac
elif test "$DEFAULT_64BIT" = "unknown"; then
# cross compiling.
BITS_GOT=unknown
case "$BITS_REQ" in
64|64else32) BITS_OK=no
BITS_CHECK_32=yes
BITS_CHECK_64=yes ;;
32) BITS_OK=no;;
nochange) ;;
*) ;;
esac
fi
AC_MSG_RESULT($DEFAULT_64BIT);
if test "$BITS_OK" != "yes"; then
# not copa. back these up.
CFLAGS_OLD="${CFLAGS}"
CXXFLAGS_OLD="${CXXFLAGS}"
LDFLAGS_OLD="${LDFLAGS}"
ARFLAGS_OLD="${ARFLAGS}"
CFLAGS_32="${CFLAGS}"
CXXFLAGS_32="${CXXFLAGS}"
LDFLAGS_32="${LDFLAGS}"
ARFLAGS_32="${ARFLAGS}"
CFLAGS_64="${CFLAGS}"
CXXFLAGS_64="${CXXFLAGS}"
LDFLAGS_64="${LDFLAGS}"
ARFLAGS_64="${ARFLAGS}"
CAN_BUILD_64=unknown
CAN_BUILD_32=unknown
# These results can't be cached because is sets compiler flags.
if test "$BITS_CHECK_64" = "yes"; then
AC_MSG_CHECKING([how to build 64-bit executables])
CAN_BUILD_64=no
####
# Find out if we think we can *build* for 64 bit. Doesn't check whether we can run it.
# Note, we don't have to actually check if the options work- we'll try them before using them.
# So, only try actually testing the options, if you are trying to decide between multiple options.
# On exit from the following clauses:
# if CAN_BUILD_64=yes:
# *FLAGS are assumed to contain the right settings for 64bit
# else if CAN_BUILD_64=no: (default)
# *FLAGS are assumed to be trashed, and will be reset from *FLAGS_OLD
if test "$GCC" = yes; then
CFLAGS="${CFLAGS} -m64"
CXXFLAGS="${CXXFLAGS} -m64"
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
CAN_BUILD_64=yes, CAN_BUILD_64=no)
else
case "${host}" in
sparc*-*-solaris*)
# 1. try -m64
CFLAGS="${CFLAGS} -m64"
CXXFLAGS="${CXXFLAGS} -m64"
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
CAN_BUILD_64=yes, CAN_BUILD_64=no)
if test "$CAN_BUILD_64" != yes; then
# Nope. back out changes.
CFLAGS="${CFLAGS_OLD}"
CXXFLAGS="${CFLAGS_OLD}"
# 2. try xarch=v9 [deprecated]
## TODO: cross compile: the following won't work.
SPARCV9=`isainfo -n 2>&1 | grep sparcv9`
SOL64=`$CXX -xarch=v9 2>&1 && $CC -xarch=v9 2>&1 | grep -v usage:`
# "Warning: -xarch=v9 is deprecated, use -m64 to create 64-bit programs"
if test -z "$SOL64" && test -n "$SPARCV9"; then
CFLAGS="${CFLAGS} -xtarget=ultra -xarch=v9"
CXXFLAGS="${CXXFLAGS} -xtarget=ultra -xarch=v9"
LDFLAGS="${LDFLAGS} -xtarget=ultra -xarch=v9"
CAN_BUILD_64=yes
fi
fi
;;
i386-*-solaris*)
# 1. try -m64
CFLAGS="${CFLAGS} -m64"
CXXFLAGS="${CXXFLAGS} -m64"
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
CAN_BUILD_64=yes, CAN_BUILD_64=no)
if test "$CAN_BUILD_64" != yes; then
# Nope. back out changes.
CFLAGS="${CFLAGS_OLD}"
CXXFLAGS="${CXXFLAGS_OLD}"
# 2. try the older compiler option
## TODO: cross compile problem
SOL64=`$CXX -xtarget=generic64 2>&1 && $CC -xtarget=generic64 2>&1 | grep -v usage:`
if test -z "$SOL64" && test -n "$AMD64"; then
CFLAGS="${CFLAGS} -xtarget=generic64"
CXXFLAGS="${CXXFLAGS} -xtarget=generic64"
CAN_BUILD_64=yes
fi
fi
;;
ia64-*-linux*)
# check for ecc/ecpc compiler support
## TODO: cross compiler problem
if test -n "`$CXX --help 2>&1 && $CC --help 2>&1 | grep -v Intel`"; then
if test -n "`$CXX --help 2>&1 && $CC --help 2>&1 | grep -v Itanium`"; then
CAN_BUILD_64=yes
fi
fi
;;
*-*-cygwin)
# vcvarsamd64.bat should have been used to enable 64-bit builds.
# We only do this check to display the correct answer.
## TODO: cross compiler problem
if test -n "`$CXX -help 2>&1 | grep 'for x64'`"; then
CAN_BUILD_64=yes
fi
;;
*-*-aix*|powerpc64-*-linux*)
CFLAGS="${CFLAGS} -q64"
CXXFLAGS="${CXXFLAGS} -q64"
LDFLAGS="${LDFLAGS} -q64"
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
CAN_BUILD_64=yes, CAN_BUILD_64=no)
if test "$CAN_BUILD_64" = yes; then
# worked- set other options.
case "${host}" in
*-*-aix*)
# tell AIX what executable mode to use.
ARFLAGS="${ARFLAGS} -X64"
esac
fi
;;
*-*-hpux*)
# First we try the newer +DD64, if that doesn't work,
# try other options.
CFLAGS="${CFLAGS} +DD64"
CXXFLAGS="${CXXFLAGS} +DD64"
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
CAN_BUILD_64=yes, CAN_BUILD_64=no)
if test "$CAN_BUILD_64" != yes; then
# reset
CFLAGS="${CFLAGS_OLD}"
CXXFLAGS="${CXXFLAGS_OLD}"
# append
CFLAGS="${CFLAGS} +DA2.0W"
CXXFLAGS="${CXXFLAGS} +DA2.0W"
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
CAN_BUILD_64=yes, CAN_BUILD_64=no)
fi
;;
*-*ibm-openedition*|*-*-os390*)
CFLAGS="${CFLAGS} -Wc,lp64"
CXXFLAGS="${CXXFLAGS} -Wc,lp64"
LDFLAGS="${LDFLAGS} -Wl,lp64"
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==64)?0:1;},
CAN_BUILD_64=yes, CAN_BUILD_64=no)
;;
*)
# unknown platform.
;;
esac
fi
AC_MSG_RESULT($CAN_BUILD_64)
if test "$CAN_BUILD_64" = yes; then
AC_MSG_CHECKING([whether runnable 64-bit binaries are being built ])
AC_TRY_RUN(int main(void) {return (sizeof(void*)*8==64)?0:1;},
BITS_RUN_64=yes, BITS_RUN_64=no, BITS_RUN_64=unknown)
AC_MSG_RESULT($BITS_RUN_64);
CFLAGS_64="${CFLAGS}"
CXXFLAGS_64="${CXXFLAGS}"
LDFLAGS_64="${LDFLAGS}"
ARFLAGS_64="${ARFLAGS}"
fi
# put it back.
CFLAGS="${CFLAGS_OLD}"
CXXFLAGS="${CXXFLAGS_OLD}"
LDFLAGS="${LDFLAGS_OLD}"
ARFLAGS="${ARFLAGS_OLD}"
fi
if test "$BITS_CHECK_32" = "yes"; then
# see comment under 'if BITS_CHECK_64', above.
AC_MSG_CHECKING([how to build 32-bit executables])
if test "$GCC" = yes; then
CFLAGS="${CFLAGS} -m32"
CXXFLAGS="${CXXFLAGS} -m32"
AC_COMPILE_IFELSE(int main(void) {return (sizeof(void*)*8==32)?0:1;},
CAN_BUILD_32=yes, CAN_BUILD_32=no)
fi
AC_MSG_RESULT($CAN_BUILD_32)
if test "$CAN_BUILD_32" = yes; then
AC_MSG_CHECKING([whether runnable 32-bit binaries are being built ])
AC_TRY_RUN(int main(void) {return (sizeof(void*)*8==32)?0:1;},
BITS_RUN_32=yes, BITS_RUN_32=no, BITS_RUN_32=unknown)
AC_MSG_RESULT($BITS_RUN_32);
CFLAGS_32="${CFLAGS}"
CXXFLAGS_32="${CXXFLAGS}"
LDFLAGS_32="${LDFLAGS}"
ARFLAGS_32="${ARFLAGS}"
fi
# put it back.
CFLAGS="${CFLAGS_OLD}"
CXXFLAGS="${CXXFLAGS_OLD}"
LDFLAGS="${LDFLAGS_OLD}"
ARFLAGS="${ARFLAGS_OLD}"
fi
##
# OK. Now, we've tested for 32 and 64 bitness. Let's see what we'll do.
#
# First, implement 64else32
if test "$BITS_REQ" = "64else32"; then
if test "$BITS_RUN_64" = "yes"; then
BITS_REQ=64
else
# no changes.
BITS_OK=yes
fi
fi
# implement.
if test "$BITS_REQ" = "32" -a "$BITS_RUN_32" = "yes"; then
CFLAGS="${CFLAGS_32}"
CXXFLAGS="${CXXFLAGS_32}"
LDFLAGS="${LDFLAGS_32}"
ARFLAGS="${ARFLAGS_32}"
BITS_OK=yes
elif test "$BITS_REQ" = "64" -a "$BITS_RUN_64" = "yes"; then
CFLAGS="${CFLAGS_64}"
CXXFLAGS="${CXXFLAGS_64}"
LDFLAGS="${LDFLAGS_64}"
ARFLAGS="${ARFLAGS_64}"
BITS_OK=yes
elif test "$BITS_OK" != "yes"; then
AC_MSG_ERROR([Requested $BITS_REQ bit binaries but could not compile and execute them. See readme.html for help with cross compilation., and set compiler options manually.])
fi
fi
])
# Strict compilation options.
AC_DEFUN(AC_CHECK_STRICT_COMPILE,
[
AC_MSG_CHECKING([whether strict compiling is on])
AC_ARG_ENABLE(strict,[ --enable-strict compile with strict compiler options [default=yes]], [
if test "$enableval" = no
then
ac_use_strict_options=no
else
ac_use_strict_options=yes
fi
], [ac_use_strict_options=yes])
AC_MSG_RESULT($ac_use_strict_options)
if test "$ac_use_strict_options" = yes
then
if test "$GCC" = yes
then
CFLAGS="$CFLAGS -Wall -ansi -pedantic -Wshadow -Wpointer-arith -Wmissing-prototypes -Wwrite-strings -Wno-long-long"
case "${host}" in
*-*-solaris*)
CFLAGS="$CFLAGS -D__STDC__=0";;
esac
else
case "${host}" in
*-*-cygwin)
if test "`$CC /help 2>&1 | head -c9`" = "Microsoft"
then
CFLAGS="$CFLAGS /W4"
fi
esac
fi
if test "$GXX" = yes
then
CXXFLAGS="$CXXFLAGS -W -Wall -ansi -pedantic -Wpointer-arith -Wwrite-strings -Wno-long-long"
case "${host}" in
*-*-solaris*)
CXXFLAGS="$CXXFLAGS -D__STDC__=0";;
esac
else
case "${host}" in
*-*-cygwin)
if test "`$CXX /help 2>&1 | head -c9`" = "Microsoft"
then
CXXFLAGS="$CXXFLAGS /W4"
fi
esac
fi
fi
])

View file

@ -0,0 +1,562 @@
Microsoft Visual Studio Solution File, Format Version 10.00
# Visual Studio 2008
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cal", "..\samples\cal\cal.vcproj", "{F7659D77-09CF-4FE9-ACEE-927287AA9509}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cintltst", "..\test\cintltst\cintltst.vcproj", "{3D1246AE-1B32-479B-BECA-AEFA97BE2321}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "common", "..\common\common.vcproj", "{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}"
ProjectSection(ProjectDependencies) = postProject
{203EC78A-0531-43F0-A636-285439BDE025} = {203EC78A-0531-43F0-A636-285439BDE025}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ctestfw", "..\tools\ctestfw\ctestfw.vcproj", "{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "date", "..\samples\date\date.vcproj", "{38B5751A-C6F9-4409-950C-F4F9DA17275F}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "derb", "..\tools\genrb\derb.vcproj", "{D3065ADB-8820-4CC7-9B6C-9510833961A3}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genbrk", "..\tools\genbrk\genbrk.vcproj", "{C2BE5000-7501-4E87-9724-B8D82494FAE6}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genccode", "..\tools\genccode\genccode.vcproj", "{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencmn", "..\tools\gencmn\gencmn.vcproj", "{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencnval", "..\tools\gencnval\gencnval.vcproj", "{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gennames", "..\tools\gennames\gennames.vcproj", "{F5281B04-A9E0-4680-BBA8-1D7F7D115458}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gennorm", "..\tools\gennorm\gennorm.vcproj", "{F5213103-6CBE-46E6-B4CC-2570B6837D86}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genpname", "..\tools\genpname\genpname.vcproj", "{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genprops", "..\tools\genprops\genprops.vcproj", "{6F744648-D15F-478A-90C6-58E353B5DDB3}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genrb", "..\tools\genrb\genrb.vcproj", "{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gentest", "..\tools\gentest\gentest.vcproj", "{77C78066-746F-4EA6-B3FE-B8C8A4A97891}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genuca", "..\tools\genuca\genuca.vcproj", "{86829694-A375-4C58-B4EA-96EF514E3225}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "i18n", "..\i18n\i18n.vcproj", "{0178B127-6269-407D-B112-93877BB62776}"
ProjectSection(ProjectDependencies) = postProject
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "intltest", "..\test\intltest\intltest.vcproj", "{73632960-B3A6-464D-83A3-4B43365F19B8}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "layout", "..\layout\layout.vcproj", "{C920062A-0647-4553-A3B2-37C58065664B}"
ProjectSection(ProjectDependencies) = postProject
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "layoutex", "..\layoutex\layoutex.vcproj", "{37FC2C7F-1904-4811-8955-2F478830EAD1}"
ProjectSection(ProjectDependencies) = postProject
{C920062A-0647-4553-A3B2-37C58065664B} = {C920062A-0647-4553-A3B2-37C58065664B}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "makeconv", "..\tools\makeconv\makeconv.vcproj", "{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "makedata", "..\data\makedata.vcproj", "{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}"
ProjectSection(ProjectDependencies) = postProject
{C2BE5000-7501-4E87-9724-B8D82494FAE6} = {C2BE5000-7501-4E87-9724-B8D82494FAE6}
{F5213103-6CBE-46E6-B4CC-2570B6837D86} = {F5213103-6CBE-46E6-B4CC-2570B6837D86}
{F5281B04-A9E0-4680-BBA8-1D7F7D115458} = {F5281B04-A9E0-4680-BBA8-1D7F7D115458}
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A} = {97521D06-EC47-45D4-8BD0-9E16B3F93B2A}
{C2B04507-2521-4801-BF0D-5FD79D6D518C} = {C2B04507-2521-4801-BF0D-5FD79D6D518C}
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB} = {DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{C920062A-0647-4553-A3B2-37C58065664B} = {C920062A-0647-4553-A3B2-37C58065664B}
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF} = {8B41752B-5A52-41E4-B7E0-07921C0CC6BF}
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C} = {F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}
{6F744648-D15F-478A-90C6-58E353B5DDB3} = {6F744648-D15F-478A-90C6-58E353B5DDB3}
{DB312A49-12A9-4E07-9E96-451DC2D8FF61} = {DB312A49-12A9-4E07-9E96-451DC2D8FF61}
{DB312A49-12A9-4E07-9E96-451DC2D8FF62} = {DB312A49-12A9-4E07-9E96-451DC2D8FF62}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC} = {62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}
{73632960-B3A6-464D-83A3-4B43365F19B8} = {73632960-B3A6-464D-83A3-4B43365F19B8}
{77C78066-746F-4EA6-B3FE-B8C8A4A97891} = {77C78066-746F-4EA6-B3FE-B8C8A4A97891}
{37FC2C7F-1904-4811-8955-2F478830EAD1} = {37FC2C7F-1904-4811-8955-2F478830EAD1}
{E4993E82-D68A-46CA-BAE0-9D35E172E46F} = {E4993E82-D68A-46CA-BAE0-9D35E172E46F}
{67351485-4D18-4245-BE39-A7EF0675ACD2} = {67351485-4D18-4245-BE39-A7EF0675ACD2}
{203EC78A-0531-43F0-A636-285439BDE025} = {203EC78A-0531-43F0-A636-285439BDE025}
{DBA4088D-F6F9-4F8F-8820-082A4765C16C} = {DBA4088D-F6F9-4F8F-8820-082A4765C16C}
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F} = {A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}
{86829694-A375-4C58-B4EA-96EF514E3225} = {86829694-A375-4C58-B4EA-96EF514E3225}
{3D1246AE-1B32-479B-BECA-AEFA97BE2321} = {3D1246AE-1B32-479B-BECA-AEFA97BE2321}
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057} = {691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}
{631C23CE-6C1D-4875-88F0-85E0A42B36EA} = {631C23CE-6C1D-4875-88F0-85E0A42B36EA}
{D3065ADB-8820-4CC7-9B6C-9510833961A3} = {D3065ADB-8820-4CC7-9B6C-9510833961A3}
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547} = {FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}
{9D4211F7-2C77-439C-82F0-30A4E43BA569} = {9D4211F7-2C77-439C-82F0-30A4E43BA569}
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC} = {4C8454FE-81D3-4CA3-9927-29BA96F03DAC}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pkgdata", "..\tools\pkgdata\pkgdata.vcproj", "{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC} = {62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F} = {A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547} = {FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stubdata", "..\stubdata\stubdata.vcproj", "{203EC78A-0531-43F0-A636-285439BDE025}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toolutil", "..\tools\toolutil\toolutil.vcproj", "{6B231032-3CB5-4EED-9210-810D666A23A0}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "uconv", "..\extra\uconv\uconv.vcproj", "{DBA4088D-F6F9-4F8F-8820-082A4765C16C}"
ProjectSection(ProjectDependencies) = postProject
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A} = {97521D06-EC47-45D4-8BD0-9E16B3F93B2A}
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC} = {4C8454FE-81D3-4CA3-9927-29BA96F03DAC}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "io", "..\io\io.vcproj", "{C2B04507-2521-4801-BF0D-5FD79D6D518C}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gensprep", "..\tools\gensprep\gensprep.vcproj", "{631C23CE-6C1D-4875-88F0-85E0A42B36EA}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "iotest", "..\test\iotest\iotest.vcproj", "{E4993E82-D68A-46CA-BAE0-9D35E172E46F}"
ProjectSection(ProjectDependencies) = postProject
{C2B04507-2521-4801-BF0D-5FD79D6D518C} = {C2B04507-2521-4801-BF0D-5FD79D6D518C}
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genbidi", "..\tools\genbidi\genbidi.vcproj", "{DB312A49-12A9-4E07-9E96-451DC2D8FF62}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencase", "..\tools\gencase\gencase.vcproj", "{DB312A49-12A9-4E07-9E96-451DC2D8FF61}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icupkg", "..\tools\icupkg\icupkg.vcproj", "{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genctd", "..\tools\genctd\genctd.vcproj", "{9D4211F7-2C77-439C-82F0-30A4E43BA569}"
ProjectSection(ProjectDependencies) = postProject
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "letest", "..\test\letest\letest.vcproj", "{67351485-4D18-4245-BE39-A7EF0675ACD2}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{C920062A-0647-4553-A3B2-37C58065664B} = {C920062A-0647-4553-A3B2-37C58065664B}
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47} = {ECA6B435-B4FA-4F9F-BF95-F451D078FC47}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
{37FC2C7F-1904-4811-8955-2F478830EAD1} = {37FC2C7F-1904-4811-8955-2F478830EAD1}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencfu", "..\tools\gencfu\gencfu.vcproj", "{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}"
ProjectSection(ProjectDependencies) = postProject
{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|Win32.ActiveCfg = Debug|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|Win32.Build.0 = Debug|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|x64.ActiveCfg = Debug|x64
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|x64.Build.0 = Debug|x64
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|Win32.ActiveCfg = Release|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|Win32.Build.0 = Release|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|x64.ActiveCfg = Release|x64
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|x64.Build.0 = Release|x64
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|Win32.ActiveCfg = Debug|Win32
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|Win32.Build.0 = Debug|Win32
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|x64.ActiveCfg = Debug|x64
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|x64.Build.0 = Debug|x64
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|Win32.ActiveCfg = Release|Win32
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|Win32.Build.0 = Release|Win32
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|x64.ActiveCfg = Release|x64
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|x64.Build.0 = Release|x64
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|Win32.ActiveCfg = Debug|Win32
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|Win32.Build.0 = Debug|Win32
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|x64.ActiveCfg = Debug|x64
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|x64.Build.0 = Debug|x64
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|Win32.ActiveCfg = Release|Win32
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|Win32.Build.0 = Release|Win32
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|x64.ActiveCfg = Release|x64
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|x64.Build.0 = Release|x64
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|Win32.ActiveCfg = Debug|Win32
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|Win32.Build.0 = Debug|Win32
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|x64.ActiveCfg = Debug|x64
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|x64.Build.0 = Debug|x64
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|Win32.ActiveCfg = Release|Win32
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|Win32.Build.0 = Release|Win32
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|x64.ActiveCfg = Release|x64
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|x64.Build.0 = Release|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|Win32.ActiveCfg = Debug|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|Win32.Build.0 = Debug|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|x64.ActiveCfg = Debug|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|x64.Build.0 = Debug|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|Win32.ActiveCfg = Release|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|Win32.Build.0 = Release|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|x64.ActiveCfg = Release|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|x64.Build.0 = Release|x64
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|Win32.ActiveCfg = Debug|Win32
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|Win32.Build.0 = Debug|Win32
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|x64.ActiveCfg = Debug|x64
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|x64.Build.0 = Debug|x64
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|Win32.ActiveCfg = Release|Win32
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|Win32.Build.0 = Release|Win32
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|x64.ActiveCfg = Release|x64
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|x64.Build.0 = Release|x64
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|Win32.ActiveCfg = Debug|Win32
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|Win32.Build.0 = Debug|Win32
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|x64.ActiveCfg = Debug|x64
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|x64.Build.0 = Debug|x64
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|Win32.ActiveCfg = Release|Win32
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|Win32.Build.0 = Release|Win32
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|x64.ActiveCfg = Release|x64
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|x64.Build.0 = Release|x64
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|Win32.ActiveCfg = Debug|Win32
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|Win32.Build.0 = Debug|Win32
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|x64.ActiveCfg = Debug|x64
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|x64.Build.0 = Debug|x64
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|Win32.ActiveCfg = Release|Win32
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|Win32.Build.0 = Release|Win32
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|x64.ActiveCfg = Release|x64
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|x64.Build.0 = Release|x64
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|Win32.ActiveCfg = Debug|Win32
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|Win32.Build.0 = Debug|Win32
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|x64.ActiveCfg = Debug|x64
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|x64.Build.0 = Debug|x64
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|Win32.ActiveCfg = Release|Win32
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|Win32.Build.0 = Release|Win32
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|x64.ActiveCfg = Release|x64
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|x64.Build.0 = Release|x64
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|Win32.ActiveCfg = Debug|Win32
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|Win32.Build.0 = Debug|Win32
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|x64.ActiveCfg = Debug|x64
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|x64.Build.0 = Debug|x64
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|Win32.ActiveCfg = Release|Win32
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|Win32.Build.0 = Release|Win32
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|x64.ActiveCfg = Release|x64
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|x64.Build.0 = Release|x64
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Debug|Win32.ActiveCfg = Debug|Win32
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Debug|Win32.Build.0 = Debug|Win32
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Debug|x64.ActiveCfg = Debug|x64
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Debug|x64.Build.0 = Debug|x64
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Release|Win32.ActiveCfg = Release|Win32
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Release|Win32.Build.0 = Release|Win32
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Release|x64.ActiveCfg = Release|x64
{F5281B04-A9E0-4680-BBA8-1D7F7D115458}.Release|x64.Build.0 = Release|x64
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Debug|Win32.ActiveCfg = Debug|Win32
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Debug|Win32.Build.0 = Debug|Win32
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Debug|x64.ActiveCfg = Debug|x64
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Debug|x64.Build.0 = Debug|x64
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Release|Win32.ActiveCfg = Release|Win32
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Release|Win32.Build.0 = Release|Win32
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Release|x64.ActiveCfg = Release|x64
{F5213103-6CBE-46E6-B4CC-2570B6837D86}.Release|x64.Build.0 = Release|x64
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Debug|Win32.ActiveCfg = Debug|Win32
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Debug|Win32.Build.0 = Debug|Win32
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Debug|x64.ActiveCfg = Debug|x64
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Debug|x64.Build.0 = Debug|x64
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Release|Win32.ActiveCfg = Release|Win32
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Release|Win32.Build.0 = Release|Win32
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Release|x64.ActiveCfg = Release|x64
{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}.Release|x64.Build.0 = Release|x64
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Debug|Win32.ActiveCfg = Debug|Win32
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Debug|Win32.Build.0 = Debug|Win32
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Debug|x64.ActiveCfg = Debug|x64
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Debug|x64.Build.0 = Debug|x64
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Release|Win32.ActiveCfg = Release|Win32
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Release|Win32.Build.0 = Release|Win32
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Release|x64.ActiveCfg = Release|x64
{6F744648-D15F-478A-90C6-58E353B5DDB3}.Release|x64.Build.0 = Release|x64
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|Win32.ActiveCfg = Debug|Win32
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|Win32.Build.0 = Debug|Win32
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|x64.ActiveCfg = Debug|x64
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|x64.Build.0 = Debug|x64
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|Win32.ActiveCfg = Release|Win32
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|Win32.Build.0 = Release|Win32
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|x64.ActiveCfg = Release|x64
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|x64.Build.0 = Release|x64
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|Win32.ActiveCfg = Debug|Win32
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|Win32.Build.0 = Debug|Win32
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|x64.ActiveCfg = Debug|x64
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|x64.Build.0 = Debug|x64
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|Win32.ActiveCfg = Release|Win32
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|Win32.Build.0 = Release|Win32
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|x64.ActiveCfg = Release|x64
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|x64.Build.0 = Release|x64
{86829694-A375-4C58-B4EA-96EF514E3225}.Debug|Win32.ActiveCfg = Debug|Win32
{86829694-A375-4C58-B4EA-96EF514E3225}.Debug|Win32.Build.0 = Debug|Win32
{86829694-A375-4C58-B4EA-96EF514E3225}.Debug|x64.ActiveCfg = Debug|x64
{86829694-A375-4C58-B4EA-96EF514E3225}.Debug|x64.Build.0 = Debug|x64
{86829694-A375-4C58-B4EA-96EF514E3225}.Release|Win32.ActiveCfg = Release|Win32
{86829694-A375-4C58-B4EA-96EF514E3225}.Release|Win32.Build.0 = Release|Win32
{86829694-A375-4C58-B4EA-96EF514E3225}.Release|x64.ActiveCfg = Release|x64
{86829694-A375-4C58-B4EA-96EF514E3225}.Release|x64.Build.0 = Release|x64
{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.ActiveCfg = Debug|Win32
{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.Build.0 = Debug|Win32
{0178B127-6269-407D-B112-93877BB62776}.Debug|x64.ActiveCfg = Debug|x64
{0178B127-6269-407D-B112-93877BB62776}.Debug|x64.Build.0 = Debug|x64
{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.ActiveCfg = Release|Win32
{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.Build.0 = Release|Win32
{0178B127-6269-407D-B112-93877BB62776}.Release|x64.ActiveCfg = Release|x64
{0178B127-6269-407D-B112-93877BB62776}.Release|x64.Build.0 = Release|x64
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.ActiveCfg = Debug|Win32
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.Build.0 = Debug|Win32
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|x64.ActiveCfg = Debug|x64
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|x64.Build.0 = Debug|x64
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|Win32.ActiveCfg = Release|Win32
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|Win32.Build.0 = Release|Win32
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|x64.ActiveCfg = Release|x64
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|x64.Build.0 = Release|x64
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|Win32.ActiveCfg = Debug|Win32
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|Win32.Build.0 = Debug|Win32
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|x64.ActiveCfg = Debug|x64
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|x64.Build.0 = Debug|x64
{C920062A-0647-4553-A3B2-37C58065664B}.Release|Win32.ActiveCfg = Release|Win32
{C920062A-0647-4553-A3B2-37C58065664B}.Release|Win32.Build.0 = Release|Win32
{C920062A-0647-4553-A3B2-37C58065664B}.Release|x64.ActiveCfg = Release|x64
{C920062A-0647-4553-A3B2-37C58065664B}.Release|x64.Build.0 = Release|x64
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|Win32.ActiveCfg = Debug|Win32
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|Win32.Build.0 = Debug|Win32
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|x64.ActiveCfg = Debug|x64
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|x64.Build.0 = Debug|x64
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|Win32.ActiveCfg = Release|Win32
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|Win32.Build.0 = Release|Win32
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|x64.ActiveCfg = Release|x64
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|x64.Build.0 = Release|x64
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|Win32.ActiveCfg = Debug|Win32
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|Win32.Build.0 = Debug|Win32
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|x64.ActiveCfg = Debug|x64
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|x64.Build.0 = Debug|x64
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|Win32.ActiveCfg = Release|Win32
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|Win32.Build.0 = Release|Win32
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|x64.ActiveCfg = Release|x64
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|x64.Build.0 = Release|x64
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|Win32.ActiveCfg = Debug|Win32
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|Win32.Build.0 = Debug|Win32
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|x64.ActiveCfg = Debug|x64
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|x64.Build.0 = Debug|x64
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|Win32.ActiveCfg = Release|Win32
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|Win32.Build.0 = Release|Win32
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|x64.ActiveCfg = Release|x64
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|x64.Build.0 = Release|x64
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|Win32.ActiveCfg = Debug|Win32
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|Win32.Build.0 = Debug|Win32
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|x64.ActiveCfg = Debug|x64
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|x64.Build.0 = Debug|x64
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|Win32.ActiveCfg = Release|Win32
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|Win32.Build.0 = Release|Win32
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|x64.ActiveCfg = Release|x64
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|x64.Build.0 = Release|x64
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|Win32.ActiveCfg = Debug|Win32
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|Win32.Build.0 = Debug|Win32
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|x64.ActiveCfg = Debug|x64
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|x64.Build.0 = Debug|x64
{203EC78A-0531-43F0-A636-285439BDE025}.Release|Win32.ActiveCfg = Release|Win32
{203EC78A-0531-43F0-A636-285439BDE025}.Release|Win32.Build.0 = Release|Win32
{203EC78A-0531-43F0-A636-285439BDE025}.Release|x64.ActiveCfg = Release|x64
{203EC78A-0531-43F0-A636-285439BDE025}.Release|x64.Build.0 = Release|x64
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|Win32.ActiveCfg = Debug|Win32
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|Win32.Build.0 = Debug|Win32
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|x64.ActiveCfg = Debug|x64
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|x64.Build.0 = Debug|x64
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|Win32.ActiveCfg = Release|Win32
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|Win32.Build.0 = Release|Win32
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|x64.ActiveCfg = Release|x64
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|x64.Build.0 = Release|x64
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|Win32.ActiveCfg = Debug|Win32
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|Win32.Build.0 = Debug|Win32
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|x64.ActiveCfg = Debug|x64
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|x64.Build.0 = Debug|x64
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|Win32.ActiveCfg = Release|Win32
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|Win32.Build.0 = Release|Win32
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|x64.ActiveCfg = Release|x64
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|x64.Build.0 = Release|x64
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|Win32.ActiveCfg = Debug|Win32
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|Win32.Build.0 = Debug|Win32
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|x64.ActiveCfg = Debug|x64
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|x64.Build.0 = Debug|x64
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|Win32.ActiveCfg = Release|Win32
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|Win32.Build.0 = Release|Win32
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|x64.ActiveCfg = Release|x64
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|x64.Build.0 = Release|x64
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|Win32.ActiveCfg = Debug|Win32
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|Win32.Build.0 = Debug|Win32
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|x64.ActiveCfg = Debug|x64
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|x64.Build.0 = Debug|x64
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|Win32.ActiveCfg = Release|Win32
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|Win32.Build.0 = Release|Win32
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|x64.ActiveCfg = Release|x64
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|x64.Build.0 = Release|x64
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|Win32.ActiveCfg = Debug|Win32
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|Win32.Build.0 = Debug|Win32
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|x64.ActiveCfg = Debug|x64
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|x64.Build.0 = Debug|x64
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|Win32.ActiveCfg = Release|Win32
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|Win32.Build.0 = Release|Win32
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|x64.ActiveCfg = Release|x64
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|x64.Build.0 = Release|x64
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Debug|Win32.ActiveCfg = Debug|Win32
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Debug|Win32.Build.0 = Debug|Win32
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Debug|x64.ActiveCfg = Debug|x64
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Debug|x64.Build.0 = Debug|x64
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Release|Win32.ActiveCfg = Release|Win32
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Release|Win32.Build.0 = Release|Win32
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Release|x64.ActiveCfg = Release|x64
{DB312A49-12A9-4E07-9E96-451DC2D8FF62}.Release|x64.Build.0 = Release|x64
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Debug|Win32.ActiveCfg = Debug|Win32
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Debug|Win32.Build.0 = Debug|Win32
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Debug|x64.ActiveCfg = Debug|x64
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Debug|x64.Build.0 = Debug|x64
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Release|Win32.ActiveCfg = Release|Win32
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Release|Win32.Build.0 = Release|Win32
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Release|x64.ActiveCfg = Release|x64
{DB312A49-12A9-4E07-9E96-451DC2D8FF61}.Release|x64.Build.0 = Release|x64
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|Win32.ActiveCfg = Debug|Win32
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|Win32.Build.0 = Debug|Win32
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|x64.ActiveCfg = Debug|x64
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|x64.Build.0 = Debug|x64
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|Win32.ActiveCfg = Release|Win32
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|Win32.Build.0 = Release|Win32
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|x64.ActiveCfg = Release|x64
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|x64.Build.0 = Release|x64
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|Win32.ActiveCfg = Debug|Win32
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|Win32.Build.0 = Debug|Win32
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|x64.ActiveCfg = Debug|x64
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|x64.Build.0 = Debug|x64
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|Win32.ActiveCfg = Release|Win32
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|Win32.Build.0 = Release|Win32
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|x64.ActiveCfg = Release|x64
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|x64.Build.0 = Release|x64
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|Win32.ActiveCfg = Debug|Win32
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|Win32.Build.0 = Debug|Win32
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|x64.ActiveCfg = Debug|x64
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|x64.Build.0 = Debug|x64
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|Win32.ActiveCfg = Release|Win32
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|Win32.Build.0 = Release|Win32
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|x64.ActiveCfg = Release|x64
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|x64.Build.0 = Release|x64
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|Win32.ActiveCfg = Debug|Win32
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|Win32.Build.0 = Debug|Win32
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|x64.ActiveCfg = Debug|x64
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|x64.Build.0 = Debug|x64
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.ActiveCfg = Release|Win32
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.Build.0 = Release|Win32
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.ActiveCfg = Release|x64
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

205
source/common/Makefile.in Normal file
View file

@ -0,0 +1,205 @@
#******************************************************************************
#
# Copyright (C) 1999-2009, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
## Makefile.in for ICU - icuuc.so
## Stephen F. Booth
## Source directory information
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = ..
## All the flags and other definitions are included here.
include $(top_builddir)/icudefs.mk
## Build directory information
subdir = common
# for service hook
LOCALSVC_CPP=localsvc.cpp
SVC_HOOK_INC=$(top_builddir)/common/svchook.mk
## Extra files to remove for 'make clean'
CLEANFILES = *~ $(DEPS) $(IMPORT_LIB) $(MIDDLE_IMPORT_LIB) $(FINAL_IMPORT_LIB) $(SVC_HOOK_INC)
## Target information
TARGET_STUBNAME=$(COMMON_STUBNAME)
ifneq ($(ENABLE_STATIC),)
TARGET = $(LIBDIR)/$(LIBSICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(A)
endif
ifneq ($(ENABLE_SHARED),)
SO_TARGET = $(LIBDIR)/$(LIBICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(SO)
ALL_SO_TARGETS = $(SO_TARGET) $(MIDDLE_SO_TARGET) $(FINAL_SO_TARGET) $(SHARED_OBJECT)
ifeq ($(ENABLE_SO_VERSION_DATA),1)
SO_VERSION_DATA = common.res
endif
ifeq ($(OS390BATCH),1)
BATCH_TARGET = $(BATCH_COMMON_TARGET)
BATCH_LIBS = $(BATCH_LIBICUDT) -lm
endif # OS390BATCH
endif # ENABLE_SHARED
ALL_TARGETS = $(TARGET) $(ALL_SO_TARGETS) $(BATCH_TARGET)
DYNAMICCPPFLAGS = $(SHAREDLIBCPPFLAGS)
DYNAMICCFLAGS = $(SHAREDLIBCFLAGS)
DYNAMICCXXFLAGS = $(SHAREDLIBCXXFLAGS)
CFLAGS += $(LIBCFLAGS)
CXXFLAGS += $(LIBCXXFLAGS)
ifneq ($(top_builddir),$(top_srcdir))
CPPFLAGS += -I$(top_builddir)/common
endif
CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/i18n $(LIBCPPFLAGS) $(CPPFLAGSICUUC)
DEFS += -DU_COMMON_IMPLEMENTATION
LDFLAGS += $(LDFLAGSICUUC)
# $(LIBICUDT) is either stub data or the real DLL common data.
LIBS = $(LIBICUDT) $(DEFAULT_LIBS)
OBJECTS = errorcode.o putil.o umath.o utypes.o uinvchar.o umutex.o ucln_cmn.o uinit.o uobject.o cmemory.o \
udata.o ucmndata.o udatamem.o umapfile.o udataswp.o ucol_swp.o utrace.o \
uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o \
ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o \
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
ucat.o locmap.o uloc.o locid.o locutil.o \
bytestream.o stringpiece.o \
ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
normlzr.o unorm.o unormcmp.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \
uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
uscript.o usc_impl.o unames.o \
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
uidna.o usprep.o punycode.o \
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o mutex.o dtintrv.o ucnvsel.o propsvec.o \
ulist.o uloc_tag.o icudataver.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h unicode/*.h
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
DEPS = $(OBJECTS:.o=.d)
-include Makefile.local
-include $(SVC_HOOK_INC)
## List of phony targets
.PHONY : all all-local install install-local clean clean-local \
distclean distclean-local install-library install-headers dist \
dist-local check check-local
## Clear suffix list
.SUFFIXES :
## List of standard targets
all: all-local
install: install-local
clean: clean-local
distclean : distclean-local
dist: dist-local
check: all check-local
all-local: $(ALL_TARGETS) unicode/platform.h
install-local: install-headers install-library
install-library: all-local
$(MKINSTALLDIRS) $(DESTDIR)$(libdir)
ifneq ($(ENABLE_STATIC),)
$(INSTALL-L) $(TARGET) $(DESTDIR)$(libdir)
endif
ifneq ($(ENABLE_SHARED),)
$(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(libdir)
ifneq ($(FINAL_SO_TARGET),$(SO_TARGET))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(SO_TARGET))
ifneq ($(FINAL_SO_TARGET),$(MIDDLE_SO_TARGET))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(MIDDLE_SO_TARGET))
endif
endif
ifneq ($(IMPORT_LIB_EXT),)
$(INSTALL-L) $(FINAL_IMPORT_LIB) $(DESTDIR)$(libdir)
ifneq ($(IMPORT_LIB),$(FINAL_IMPORT_LIB))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(IMPORT_LIB))
endif
ifneq ($(MIDDLE_IMPORT_LIB),$(FINAL_IMPORT_LIB))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(MIDDLE_IMPORT_LIB))
endif
endif
endif
$(SVC_HOOK_INC):
@echo generating $@
@-test -f $(top_srcdir)/common/$(LOCALSVC_CPP) && ( echo "have $(LOCALSVC_CPP) - U_LOCAL_SERVICE_HOOK=1" ; \
echo 'CPPFLAGS +=-DU_LOCAL_SERVICE_HOOK=1' > $@ ; \
echo 'OBJECTS += $(LOCALSVC_CPP:%.cpp=%.o)' >> $@ \
) ; true
@echo "# Autogenerated by Makefile" >> $@
install-headers:
$(MKINSTALLDIRS) $(DESTDIR)$(includedir)/unicode
@for file in $(HEADERS); do \
echo "$(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode"; \
$(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode || exit; \
done
dist-local:
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
$(RMV) $(OBJECTS) $(STATIC_OBJECTS) $(ALL_TARGETS) $(SO_VERSION_DATA)
distclean-local: clean-local
$(RMV) Makefile icucfg.h unicode/platform.h $(SVC_HOOK_INC)
check-local:
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(SVC_HOOK_INC)
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
unicode/platform.h: $(srcdir)/unicode/platform.h.in $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
ifneq ($(ENABLE_STATIC),)
$(TARGET): $(STATIC_OBJECTS)
$(AR) $(ARFLAGS) $(AR_OUTOPT)$@ $^
$(RANLIB) $@
endif
ifneq ($(ENABLE_SHARED),)
$(SHARED_OBJECT): $(OBJECTS) $(SO_VERSION_DATA)
$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(LIBS)
ifeq ($(OS390BATCH),1)
$(BATCH_TARGET):$(OBJECTS)
$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(BATCH_LIBS)
endif # OS390BATCH
endif # ENABLE_SHARED
ifeq (,$(MAKECMDGOALS))
-include $(DEPS)
else
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif

723
source/common/bmpset.cpp Normal file
View file

@ -0,0 +1,723 @@
/*
******************************************************************************
*
* Copyright (C) 2007-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: bmpset.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2007jan29
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "cmemory.h"
#include "bmpset.h"
U_NAMESPACE_BEGIN
BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
list(parentList), listLength(parentListLength) {
uprv_memset(asciiBytes, 0, sizeof(asciiBytes));
uprv_memset(table7FF, 0, sizeof(table7FF));
uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
/*
* Set the list indexes for binary searches for
* U+0800, U+1000, U+2000, .., U+F000, U+10000.
* U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
* looked up in the bit tables.
* The last pair of indexes is for finding supplementary code points.
*/
list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
int32_t i;
for(i=1; i<=0x10; ++i) {
list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
}
list4kStarts[0x11]=listLength-1;
initBits();
overrideIllegal();
}
BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
list(newParentList), listLength(newParentListLength) {
uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes));
uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
}
BMPSet::~BMPSet() {
}
/*
* Set bits in a bit rectangle in "vertical" bit organization.
* start<limit<=0x800
*/
static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
int32_t lead=start>>6;
int32_t trail=start&0x3f;
// Set one bit indicating an all-one block.
uint32_t bits=(uint32_t)1<<lead;
if((start+1)==limit) { // Single-character shortcut.
table[trail]|=bits;
return;
}
int32_t limitLead=limit>>6;
int32_t limitTrail=limit&0x3f;
if(lead==limitLead) {
// Partial vertical bit column.
while(trail<limitTrail) {
table[trail++]|=bits;
}
} else {
// Partial vertical bit column,
// followed by a bit rectangle,
// followed by another partial vertical bit column.
if(trail>0) {
do {
table[trail++]|=bits;
} while(trail<64);
++lead;
}
if(lead<limitLead) {
bits=~((1<<lead)-1);
if(limitLead<0x20) {
bits&=(1<<limitLead)-1;
}
for(trail=0; trail<64; ++trail) {
table[trail]|=bits;
}
}
bits=1<<limitLead;
for(trail=0; trail<limitTrail; ++trail) {
table[trail]|=bits;
}
}
}
void BMPSet::initBits() {
UChar32 start, limit;
int32_t listIndex=0;
// Set asciiBytes[].
do {
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
if(start>=0x80) {
break;
}
do {
asciiBytes[start++]=1;
} while(start<limit && start<0x80);
} while(limit<=0x80);
// Set table7FF[].
while(start<0x800) {
set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
if(limit>0x800) {
start=0x800;
break;
}
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
}
// Set bmpBlockBits[].
int32_t minStart=0x800;
while(start<0x10000) {
if(limit>0x10000) {
limit=0x10000;
}
if(start<minStart) {
start=minStart;
}
if(start<limit) { // Else: Another range entirely in a known mixed-value block.
if(start&0x3f) {
// Mixed-value block of 64 code points.
start>>=6;
bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
start=(start+1)<<6; // Round up to the next block boundary.
minStart=start; // Ignore further ranges in this block.
}
if(start<limit) {
if(start<(limit&~0x3f)) {
// Multiple all-ones blocks of 64 code points each.
set32x64Bits(bmpBlockBits, start>>6, limit>>6);
}
if(limit&0x3f) {
// Mixed-value block of 64 code points.
limit>>=6;
bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
limit=(limit+1)<<6; // Round up to the next block boundary.
minStart=limit; // Ignore further ranges in this block.
}
}
}
if(limit==0x10000) {
break;
}
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
}
}
/*
* Override some bits and bytes to the result of contains(FFFD)
* for faster validity checking at runtime.
* No need to set 0 values where they were reset to 0 in the constructor
* and not modified by initBits().
* (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
* Need to set 0 values for surrogates D800..DFFF.
*/
void BMPSet::overrideIllegal() {
uint32_t bits, mask;
int32_t i;
if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) {
// contains(FFFD)==TRUE
for(i=0x80; i<0xc0; ++i) {
asciiBytes[i]=1;
}
bits=3; // Lead bytes 0xC0 and 0xC1.
for(i=0; i<64; ++i) {
table7FF[i]|=bits;
}
bits=1; // Lead byte 0xE0.
for(i=0; i<32; ++i) { // First half of 4k block.
bmpBlockBits[i]|=bits;
}
mask=~(0x10001<<0xd); // Lead byte 0xED.
bits=1<<0xd;
for(i=32; i<64; ++i) { // Second half of 4k block.
bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
}
} else {
// contains(FFFD)==FALSE
mask=~(0x10001<<0xd); // Lead byte 0xED.
for(i=32; i<64; ++i) { // Second half of 4k block.
bmpBlockBits[i]&=mask;
}
}
}
int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
/* Examples:
findCodePoint(c)
set list[] c=0 1 3 4 7 8
=== ============== ===========
[] [110000] 0 0 0 0 0 0
[\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
[\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
[:Any:] [0, 110000] 1 1 1 1 1 1
*/
// Return the smallest i such that c < list[i]. Assume
// list[len - 1] == HIGH and that c is legal (0..HIGH-1).
if (c < list[lo])
return lo;
// High runner test. c is often after the last range, so an
// initial check for this condition pays off.
if (lo >= hi || c >= list[hi-1])
return hi;
// invariant: c >= list[lo]
// invariant: c < list[hi]
for (;;) {
int32_t i = (lo + hi) >> 1;
if (i == lo) {
break; // Found!
} else if (c < list[i]) {
hi = i;
} else {
lo = i;
}
}
return hi;
}
UBool
BMPSet::contains(UChar32 c) const {
if((uint32_t)c<=0x7f) {
return (UBool)asciiBytes[c];
} else if((uint32_t)c<=0x7ff) {
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
return (UBool)twoBits;
} else {
// Look up the code point in its 4k block of code points.
return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
}
} else if((uint32_t)c<=0x10ffff) {
// surrogate or supplementary code point
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
} else {
// Out-of-range code points get FALSE, consistent with long-standing
// behavior of UnicodeSet::contains(c).
return FALSE;
}
}
/*
* Check for sufficient length for trail unit for each surrogate pair.
* Handle single surrogates as surrogate code points as usual in ICU.
*/
const UChar *
BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
UChar c, c2;
if(spanCondition) {
// span
do {
c=*s;
if(c<=0x7f) {
if(!asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits==0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
// surrogate code point
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
++s;
}
} while(++s<limit);
} else {
// span not
do {
c=*s;
if(c<=0x7f) {
if(asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits!=0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
// surrogate code point
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
++s;
}
} while(++s<limit);
}
return s;
}
/* Symmetrical with span(). */
const UChar *
BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
UChar c, c2;
if(spanCondition) {
// span
for(;;) {
c=*(--limit);
if(c<=0x7f) {
if(!asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits==0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
// surrogate code point
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
--limit;
}
if(s==limit) {
return s;
}
}
} else {
// span not
for(;;) {
c=*(--limit);
if(c<=0x7f) {
if(asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits!=0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
// surrogate code point
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
--limit;
}
if(s==limit) {
return s;
}
}
}
return limit+1;
}
/*
* Precheck for sufficient trail bytes at end of string only once per span.
* Check validity.
*/
const uint8_t *
BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
const uint8_t *limit=s+length;
uint8_t b=*s;
if((int8_t)b>=0) {
// Initial all-ASCII span.
if(spanCondition) {
do {
if(!asciiBytes[b] || ++s==limit) {
return s;
}
b=*s;
} while((int8_t)b>=0);
} else {
do {
if(asciiBytes[b] || ++s==limit) {
return s;
}
b=*s;
} while((int8_t)b>=0);
}
length=(int32_t)(limit-s);
}
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
}
const uint8_t *limit0=limit;
/*
* Make sure that the last 1/2/3/4-byte sequence before limit is complete
* or runs into a lead byte.
* In the span loop compare s with limit only once
* per multi-byte character.
*
* Give a trailing illegal sequence the same value as the result of contains(FFFD),
* including it if that is part of the span, otherwise set limit0 to before
* the truncated sequence.
*/
b=*(limit-1);
if((int8_t)b<0) {
// b>=0x80: lead or trail byte
if(b<0xc0) {
// single trail byte, check for preceding 3- or 4-byte lead byte
if(length>=2 && (b=*(limit-2))>=0xe0) {
limit-=2;
if(asciiBytes[0x80]!=spanCondition) {
limit0=limit;
}
} else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
// 4-byte lead byte with only two trail bytes
limit-=3;
if(asciiBytes[0x80]!=spanCondition) {
limit0=limit;
}
}
} else {
// lead byte with no trail bytes
--limit;
if(asciiBytes[0x80]!=spanCondition) {
limit0=limit;
}
}
}
uint8_t t1, t2, t3;
while(s<limit) {
b=*s;
if(b<0xc0) {
// ASCII; or trail bytes with the result of contains(FFFD).
if(spanCondition) {
do {
if(!asciiBytes[b]) {
return s;
} else if(++s==limit) {
return limit0;
}
b=*s;
} while(b<0xc0);
} else {
do {
if(asciiBytes[b]) {
return s;
} else if(++s==limit) {
return limit0;
}
b=*s;
} while(b<0xc0);
}
}
++s; // Advance past the lead byte.
if(b>=0xe0) {
if(b<0xf0) {
if( /* handle U+0000..U+FFFF inline */
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f
) {
b&=0xf;
uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
if(twoBits<=1) {
// All 64 code points with this lead byte and middle trail byte
// are either in the set or not.
if(twoBits!=(uint32_t)spanCondition) {
return s-1;
}
} else {
// Look up the code point in its 4k block of code points.
UChar32 c=(b<<12)|(t1<<6)|t2;
if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
return s-1;
}
}
s+=2;
continue;
}
} else if( /* handle U+10000..U+10FFFF inline */
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
(t3=(uint8_t)(s[2]-0x80)) <= 0x3f
) {
// Give an illegal sequence the same value as the result of contains(FFFD).
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
if( ( (0x10000<=c && c<=0x10ffff) ?
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
asciiBytes[0x80]
) != spanCondition
) {
return s-1;
}
s+=3;
continue;
}
} else /* 0xc0<=b<0xe0 */ {
if( /* handle U+0000..U+07FF inline */
(t1=(uint8_t)(*s-0x80)) <= 0x3f
) {
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
return s-1;
}
++s;
continue;
}
}
// Give an illegal sequence the same value as the result of contains(FFFD).
// Handle each byte of an illegal sequence separately to simplify the code;
// no need to optimize error handling.
if(asciiBytes[0x80]!=spanCondition) {
return s-1;
}
}
return limit0;
}
/*
* While going backwards through UTF-8 optimize only for ASCII.
* Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
* possible to tell from the last byte in a multi-byte sequence how many
* preceding bytes there should be. Therefore, going backwards through UTF-8
* is much harder than going forward.
*/
int32_t
BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
}
uint8_t b;
do {
b=s[--length];
if((int8_t)b>=0) {
// ASCII sub-span
if(spanCondition) {
do {
if(!asciiBytes[b]) {
return length+1;
} else if(length==0) {
return 0;
}
b=s[--length];
} while((int8_t)b>=0);
} else {
do {
if(asciiBytes[b]) {
return length+1;
} else if(length==0) {
return 0;
}
b=s[--length];
} while((int8_t)b>=0);
}
}
int32_t prev=length;
UChar32 c;
if(b<0xc0) {
// trail byte: collect a multi-byte character
c=utf8_prevCharSafeBody(s, 0, &length, b, -1);
if(c<0) {
c=0xfffd;
}
} else {
// lead byte in last-trail position
c=0xfffd;
}
// c is a valid code point, not ASCII, not a surrogate
if(c<=0x7ff) {
if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
return prev+1;
}
} else if(c<=0xffff) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits!=(uint32_t)spanCondition) {
return prev+1;
}
} else {
// Look up the code point in its 4k block of code points.
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
return prev+1;
}
}
} else {
if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
return prev+1;
}
}
} while(length>0);
return 0;
}
U_NAMESPACE_END

161
source/common/bmpset.h Normal file
View file

@ -0,0 +1,161 @@
/*
******************************************************************************
*
* Copyright (C) 2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: bmpset.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2007jan29
* created by: Markus W. Scherer
*/
#ifndef __BMPSET_H__
#define __BMPSET_H__
#include "unicode/utypes.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN
/*
* Helper class for frozen UnicodeSets, implements contains() and span()
* optimized for BMP code points. Structured to be UTF-8-friendly.
*
* ASCII: Look up bytes.
* 2-byte characters: Bits organized vertically.
* 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
* with mixed for illegal ranges.
* Supplementary characters: Call contains() on the parent set.
*/
class BMPSet : public UMemory {
public:
BMPSet(const int32_t *parentList, int32_t parentListLength);
BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength);
virtual ~BMPSet();
virtual UBool contains(UChar32 c) const;
/*
* Span the initial substring for which each character c has spanCondition==contains(c).
* It must be s<limit and spanCondition==0 or 1.
* @return The string pointer which limits the span.
*/
const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
/*
* Span the trailing substring for which each character c has spanCondition==contains(c).
* It must be s<limit and spanCondition==0 or 1.
* @return The string pointer which starts the span.
*/
const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
/*
* Span the initial substring for which each character c has spanCondition==contains(c).
* It must be length>0 and spanCondition==0 or 1.
* @return The string pointer which limits the span.
*/
const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
/*
* Span the trailing substring for which each character c has spanCondition==contains(c).
* It must be length>0 and spanCondition==0 or 1.
* @return The start of the span.
*/
int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
private:
void initBits();
void overrideIllegal();
/**
* Same as UnicodeSet::findCodePoint(UChar32 c) const except that the
* binary search is restricted for finding code points in a certain range.
*
* For restricting the search for finding in the range start..end,
* pass in
* lo=findCodePoint(start) and
* hi=findCodePoint(end)
* with 0<=lo<=hi<len.
* findCodePoint(c) defaults to lo=0 and hi=len-1.
*
* @param c a character in a subrange of MIN_VALUE..MAX_VALUE
* @param lo The lowest index to be returned.
* @param hi The highest index to be returned.
* @return the smallest integer i in the range lo..hi,
* inclusive, such that c < list[i]
*/
int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const;
inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
/*
* One byte per ASCII character, or trail byte in lead position.
* 0 or 1 for ASCII characters.
* The value for trail bytes is the result of contains(FFFD)
* for faster validity checking at runtime.
*/
UBool asciiBytes[0xc0];
/*
* One bit per code point from U+0000..U+07FF.
* The bits are organized vertically; consecutive code points
* correspond to the same bit positions in consecutive table words.
* With code point parts
* lead=c{10..6}
* trail=c{5..0}
* it is set.contains(c)==(table7FF[trail] bit lead)
*
* Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
* for faster validity checking at runtime.
*/
uint32_t table7FF[64];
/*
* One bit per 64 BMP code points.
* The bits are organized vertically; consecutive 64-code point blocks
* correspond to the same bit position in consecutive table words.
* With code point parts
* lead=c{15..12}
* t1=c{11..6}
* test bits (lead+16) and lead in bmpBlockBits[t1].
* If the upper bit is 0, then the lower bit indicates if contains(c)
* for all code points in the 64-block.
* If the upper bit is 1, then the block is mixed and set.contains(c)
* must be called.
*
* Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
* the result of contains(FFFD) for faster validity checking at runtime.
*/
uint32_t bmpBlockBits[64];
/*
* Inversion list indexes for restricted binary searches in
* findCodePoint(), from
* findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
* U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
* always looked up in the bit tables.
* The last pair of indexes is for finding supplementary code points.
*/
int32_t list4kStarts[18];
/*
* The inversion list of the parent set, for the slower contains() implementation
* for mixed BMP blocks and for supplementary code points.
* The list is terminated with list[listLength-1]=0x110000.
*/
const int32_t *list;
int32_t listLength;
};
inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
return (UBool)(findCodePoint(c, lo, hi) & 1);
}
U_NAMESPACE_END
#endif

290
source/common/brkeng.cpp Normal file
View file

@ -0,0 +1,290 @@
/**
************************************************************************************
* Copyright (C) 2006-2007, International Business Machines Corporation and others. *
* All Rights Reserved. *
************************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "brkeng.h"
#include "dictbe.h"
#include "triedict.h"
#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/chariter.h"
#include "unicode/ures.h"
#include "unicode/udata.h"
#include "unicode/putil.h"
#include "unicode/ustring.h"
#include "unicode/uscript.h"
#include "uvector.h"
#include "umutex.h"
#include "uresimp.h"
#include "ubrkimpl.h"
U_NAMESPACE_BEGIN
/*
******************************************************************
*/
LanguageBreakEngine::LanguageBreakEngine() {
}
LanguageBreakEngine::~LanguageBreakEngine() {
}
/*
******************************************************************
*/
LanguageBreakFactory::LanguageBreakFactory() {
}
LanguageBreakFactory::~LanguageBreakFactory() {
}
/*
******************************************************************
*/
UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
fHandled[i] = 0;
}
}
UnhandledEngine::~UnhandledEngine() {
for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
if (fHandled[i] != 0) {
delete fHandled[i];
}
}
}
UBool
UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))
&& fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
}
int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &/*foundBreaks*/ ) const {
if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
UChar32 c = utext_current32(text);
if (reverse) {
while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
c = utext_previous32(text);
}
}
else {
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
}
}
return 0;
}
void
UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
if (fHandled[breakType] == 0) {
fHandled[breakType] = new UnicodeSet();
if (fHandled[breakType] == 0) {
return;
}
}
if (!fHandled[breakType]->contains(c)) {
UErrorCode status = U_ZERO_ERROR;
// Apply the entire script of the character.
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
}
}
}
/*
******************************************************************
*/
ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
fEngines = 0;
}
ICULanguageBreakFactory::~ICULanguageBreakFactory() {
if (fEngines != 0) {
delete fEngines;
}
}
U_NAMESPACE_END
U_CDECL_BEGIN
static void U_CALLCONV _deleteEngine(void *obj) {
delete (const U_NAMESPACE_QUALIFIER LanguageBreakEngine *) obj;
}
U_CDECL_END
U_NAMESPACE_BEGIN
const LanguageBreakEngine *
ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
UBool needsInit;
int32_t i;
const LanguageBreakEngine *lbe = NULL;
UErrorCode status = U_ZERO_ERROR;
// TODO: The global mutex should not be used.
// The global mutex should only be used for short periods.
// A ICULanguageBreakFactory specific mutex should be used.
umtx_lock(NULL);
needsInit = (UBool)(fEngines == NULL);
if (!needsInit) {
i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != NULL && lbe->handles(c, breakType)) {
break;
}
lbe = NULL;
}
}
umtx_unlock(NULL);
if (lbe != NULL) {
return lbe;
}
if (needsInit) {
UStack *engines = new UStack(_deleteEngine, NULL, status);
if (U_SUCCESS(status) && engines == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
else if (U_FAILURE(status)) {
delete engines;
engines = NULL;
}
else {
umtx_lock(NULL);
if (fEngines == NULL) {
fEngines = engines;
engines = NULL;
}
umtx_unlock(NULL);
delete engines;
}
}
if (fEngines == NULL) {
return NULL;
}
// We didn't find an engine the first time through, or there was no
// stack. Create an engine.
const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType);
// Now get the lock, and see if someone else has created it in the
// meantime
umtx_lock(NULL);
i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != NULL && lbe->handles(c, breakType)) {
break;
}
lbe = NULL;
}
if (lbe == NULL && newlbe != NULL) {
fEngines->push((void *)newlbe, status);
lbe = newlbe;
newlbe = NULL;
}
umtx_unlock(NULL);
delete newlbe;
return lbe;
}
const LanguageBreakEngine *
ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType);
if (dict != NULL) {
const LanguageBreakEngine *engine = NULL;
switch(code) {
case USCRIPT_THAI:
engine = new ThaiBreakEngine(dict, status);
break;
default:
break;
}
if (engine == NULL) {
delete dict;
}
else if (U_FAILURE(status)) {
delete engine;
engine = NULL;
}
return engine;
}
}
return NULL;
}
const CompactTrieDictionary *
ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakType*/) {
UErrorCode status = U_ZERO_ERROR;
// Open root from brkitr tree.
char dictnbuff[256];
char ext[4]={'\0'};
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status);
int32_t dictnlength = 0;
const UChar *dictfname = ures_getString(b, &dictnlength, &status);
if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) {
dictnlength = 0;
status = U_BUFFER_OVERFLOW_ERROR;
}
if (U_SUCCESS(status) && dictfname) {
UChar* extStart=u_strchr(dictfname, 0x002e);
int len = 0;
if(extStart!=NULL){
len = extStart-dictfname;
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
u_UCharsToChars(dictfname, dictnbuff, len);
}
dictnbuff[len]=0; // nul terminate
}
ures_close(b);
UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status);
if (U_SUCCESS(status)) {
const CompactTrieDictionary *dict = new CompactTrieDictionary(
file, status);
if (U_SUCCESS(status) && dict == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(status)) {
delete dict;
dict = NULL;
}
return dict;
}
return NULL;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

292
source/common/brkeng.h Normal file
View file

@ -0,0 +1,292 @@
/**
************************************************************************************
* Copyright (C) 2006-2007, International Business Machines Corporation and others. *
* All Rights Reserved. *
************************************************************************************
*/
#ifndef BRKENG_H
#define BRKENG_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/utext.h"
#include "unicode/uscript.h"
U_NAMESPACE_BEGIN
class UnicodeSet;
class UStack;
class CompactTrieDictionary;
/*******************************************************************
* LanguageBreakEngine
*/
/**
* <p>LanguageBreakEngines implement language-specific knowledge for
* finding text boundaries within a run of characters belonging to a
* specific set. The boundaries will be of a specific kind, e.g. word,
* line, etc.</p>
*
* <p>LanguageBreakEngines should normally be implemented so as to
* be shared between threads without locking.</p>
*/
class LanguageBreakEngine : public UMemory {
public:
/**
* <p>Default constructor.</p>
*
*/
LanguageBreakEngine();
/**
* <p>Virtual destructor.</p>
*/
virtual ~LanguageBreakEngine();
/**
* <p>Indicate whether this engine handles a particular character for
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
/**
* <p>Find any breaks within a run in the supplied text.</p>
*
* @param text A UText representing the text. The
* iterator is left at the end of the run of characters which the engine
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const = 0;
};
/*******************************************************************
* LanguageBreakFactory
*/
/**
* <p>LanguageBreakFactorys find and return a LanguageBreakEngine
* that can determine breaks for characters in a specific set, if
* such an object can be found.</p>
*
* <p>If a LanguageBreakFactory is to be shared between threads,
* appropriate synchronization must be used; there is none internal
* to the factory.</p>
*
* <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
* normally be shared between threads without synchronization, unless
* the specific subclass of LanguageBreakFactory indicates otherwise.</p>
*
* <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
* it returns when it itself is deleted, unless the specific subclass of
* LanguageBreakFactory indicates otherwise. Naturally, the factory should
* not be deleted until the LanguageBreakEngines it has returned are no
* longer needed.</p>
*/
class LanguageBreakFactory : public UMemory {
public:
/**
* <p>Default constructor.</p>
*
*/
LanguageBreakFactory();
/**
* <p>Virtual destructor.</p>
*/
virtual ~LanguageBreakFactory();
/**
* <p>Find and return a LanguageBreakEngine that can find the desired
* kind of break for the set of characters to which the supplied
* character belongs. It is up to the set of available engines to
* determine what the sets of characters are.</p>
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
};
/*******************************************************************
* UnhandledEngine
*/
/**
* <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
* handles characters that no other LanguageBreakEngine is available to
* handle. It is told the character and the type of break; at its
* discretion it may handle more than the specified character (e.g.,
* the entire script to which that character belongs.</p>
*
* <p>UnhandledEngines may not be shared between threads without
* external synchronization.</p>
*/
class UnhandledEngine : public LanguageBreakEngine {
private:
/**
* The sets of characters handled, for each break type
* @internal
*/
UnicodeSet *fHandled[4];
public:
/**
* <p>Default constructor.</p>
*
*/
UnhandledEngine(UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~UnhandledEngine();
/**
* <p>Indicate whether this engine handles a particular character for
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, int32_t breakType) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
*
* @param text A UText representing the text (TODO: UText). The
* iterator is left at the end of the run of characters which the engine
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const;
/**
* <p>Tell the engine to handle a particular character and break type.</p>
*
* @param c A character which the engine should handle
* @param breakType The type of text break for which the engine should handle c
*/
virtual void handleCharacter(UChar32 c, int32_t breakType);
};
/*******************************************************************
* ICULanguageBreakFactory
*/
/**
* <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
* ICU. It creates dictionary-based LanguageBreakEngines from dictionary
* data in the ICU data file.</p>
*/
class ICULanguageBreakFactory : public LanguageBreakFactory {
private:
/**
* The stack of break engines created by this factory
* @internal
*/
UStack *fEngines;
public:
/**
* <p>Standard constructor.</p>
*
*/
ICULanguageBreakFactory(UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~ICULanguageBreakFactory();
/**
* <p>Find and return a LanguageBreakEngine that can find the desired
* kind of break for the set of characters to which the supplied
* character belongs. It is up to the set of available engines to
* determine what the sets of characters are.</p>
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
protected:
/**
* <p>Create a LanguageBreakEngine for the set of characters to which
* the supplied character belongs, for the specified break type.</p>
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
/**
* <p>Create a CompactTrieDictionary for the specified script and break type.</p>
*
* @param script An ISO 15924 script code that identifies the dictionary to be
* created.
* @param breakType The kind of text break for which a dictionary is
* sought.
* @return A CompactTrieDictionary with the desired characteristics, or 0.
*/
virtual const CompactTrieDictionary *loadDictionaryFor(UScriptCode script, int32_t breakType);
};
U_NAMESPACE_END
/* BRKENG_H */
#endif

462
source/common/brkiter.cpp Normal file
View file

@ -0,0 +1,462 @@
/*
*******************************************************************************
* Copyright (C) 1997-2009, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* File TXTBDRY.CPP
*
* Modification History:
*
* Date Name Description
* 02/18/97 aliu Converted from OpenClass. Added DONE.
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
*****************************************************************************************
*/
// *****************************************************************************
// This file was generated from the java source file BreakIterator.java
// *****************************************************************************
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/rbbi.h"
#include "unicode/brkiter.h"
#include "unicode/udata.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "ucln_cmn.h"
#include "cstring.h"
#include "umutex.h"
#include "servloc.h"
#include "locbased.h"
#include "uresimp.h"
#include "uassert.h"
#include "ubrkimpl.h"
// *****************************************************************************
// class BreakIterator
// This class implements methods for finding the location of boundaries in text.
// Instances of BreakIterator maintain a current position and scan over text
// returning the index of characters where boundaries occur.
// *****************************************************************************
U_NAMESPACE_BEGIN
// -------------------------------------
BreakIterator*
BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
{
char fnbuff[256];
char ext[4]={'\0'};
char actualLocale[ULOC_FULLNAME_CAPACITY];
int32_t size;
const UChar* brkfname = NULL;
UResourceBundle brkRulesStack;
UResourceBundle brkNameStack;
UResourceBundle *brkRules = &brkRulesStack;
UResourceBundle *brkName = &brkNameStack;
RuleBasedBreakIterator *result = NULL;
if (U_FAILURE(status))
return NULL;
ures_initStackObject(brkRules);
ures_initStackObject(brkName);
// Get the locale
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status);
/* this is a hack for now. Should be fixed when the data is fetched from
brk_index.txt */
if(status==U_USING_DEFAULT_WARNING){
status=U_ZERO_ERROR;
ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status);
}
// Get the "boundaries" array.
if (U_SUCCESS(status)) {
brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
// Get the string object naming the rules file
brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
// Get the actual string
brkfname = ures_getString(brkName, &size, &status);
U_ASSERT((size_t)size<sizeof(fnbuff));
if ((size_t)size>=sizeof(fnbuff)) {
size=0;
if (U_SUCCESS(status)) {
status = U_BUFFER_OVERFLOW_ERROR;
}
}
// Use the string if we found it
if (U_SUCCESS(status) && brkfname) {
uprv_strncpy(actualLocale,
ures_getLocale(brkName, &status),
sizeof(actualLocale)/sizeof(actualLocale[0]));
UChar* extStart=u_strchr(brkfname, 0x002e);
int len = 0;
if(extStart!=NULL){
len = extStart-brkfname;
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
u_UCharsToChars(brkfname, fnbuff, len);
}
fnbuff[len]=0; // nul terminate
}
}
ures_close(brkRules);
ures_close(brkName);
UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
if (U_FAILURE(status)) {
ures_close(b);
return NULL;
}
// Create a RuleBasedBreakIterator
result = new RuleBasedBreakIterator(file, status);
// If there is a result, set the valid locale and actual locale, and the kind
if (U_SUCCESS(status) && result != NULL) {
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale);
result->setBreakType(kind);
}
ures_close(b);
if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple
delete result;
return NULL;
}
if (result == NULL) {
udata_close(file);
if (U_SUCCESS(status)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
return result;
}
// Creates a break iterator for word breaks.
BreakIterator* U_EXPORT2
BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_WORD, status);
}
// -------------------------------------
// Creates a break iterator for line breaks.
BreakIterator* U_EXPORT2
BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_LINE, status);
}
// -------------------------------------
// Creates a break iterator for character breaks.
BreakIterator* U_EXPORT2
BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_CHARACTER, status);
}
// -------------------------------------
// Creates a break iterator for sentence breaks.
BreakIterator* U_EXPORT2
BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_SENTENCE, status);
}
// -------------------------------------
// Creates a break iterator for title casing breaks.
BreakIterator* U_EXPORT2
BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_TITLE, status);
}
// -------------------------------------
// Gets all the available locales that has localized text boundary data.
const Locale* U_EXPORT2
BreakIterator::getAvailableLocales(int32_t& count)
{
return Locale::getAvailableLocales(count);
}
// -------------------------------------
// Gets the objectLocale display name in the default locale language.
UnicodeString& U_EXPORT2
BreakIterator::getDisplayName(const Locale& objectLocale,
UnicodeString& name)
{
return objectLocale.getDisplayName(name);
}
// -------------------------------------
// Gets the objectLocale display name in the displayLocale language.
UnicodeString& U_EXPORT2
BreakIterator::getDisplayName(const Locale& objectLocale,
const Locale& displayLocale,
UnicodeString& name)
{
return objectLocale.getDisplayName(displayLocale, name);
}
// ------------------------------------------
//
// Default constructor and destructor
//
//-------------------------------------------
BreakIterator::BreakIterator()
{
fBufferClone = FALSE;
*validLocale = *actualLocale = 0;
}
BreakIterator::~BreakIterator()
{
}
// ------------------------------------------
//
// Registration
//
//-------------------------------------------
#if !UCONFIG_NO_SERVICE
// -------------------------------------
class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
protected:
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
return BreakIterator::makeInstance(loc, kind, status);
}
};
// -------------------------------------
class ICUBreakIteratorService : public ICULocaleService {
public:
ICUBreakIteratorService()
: ICULocaleService(UNICODE_STRING("Break Iterator", 14))
{
UErrorCode status = U_ZERO_ERROR;
registerFactory(new ICUBreakIteratorFactory(), status);
}
virtual UObject* cloneInstance(UObject* instance) const {
return ((BreakIterator*)instance)->clone();
}
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
LocaleKey& lkey = (LocaleKey&)key;
int32_t kind = lkey.kind();
Locale loc;
lkey.currentLocale(loc);
return BreakIterator::makeInstance(loc, kind, status);
}
virtual UBool isDefault() const {
return countFactories() == 1;
}
};
// -------------------------------------
U_NAMESPACE_END
// defined in ucln_cmn.h
static U_NAMESPACE_QUALIFIER ICULocaleService* gService = NULL;
/**
* Release all static memory held by breakiterator.
*/
U_CDECL_BEGIN
static UBool U_CALLCONV breakiterator_cleanup(void) {
#if !UCONFIG_NO_SERVICE
if (gService) {
delete gService;
gService = NULL;
}
#endif
return TRUE;
}
U_CDECL_END
U_NAMESPACE_BEGIN
static ICULocaleService*
getService(void)
{
UBool needsInit;
UMTX_CHECK(NULL, (UBool)(gService == NULL), needsInit);
if (needsInit) {
ICULocaleService *tService = new ICUBreakIteratorService();
umtx_lock(NULL);
if (gService == NULL) {
gService = tService;
tService = NULL;
ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
}
umtx_unlock(NULL);
delete tService;
}
return gService;
}
// -------------------------------------
static inline UBool
hasService(void)
{
UBool retVal;
UMTX_CHECK(NULL, gService != NULL, retVal);
return retVal;
}
// -------------------------------------
URegistryKey U_EXPORT2
BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
{
ICULocaleService *service = getService();
if (service == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
return service->registerInstance(toAdopt, locale, kind, status);
}
// -------------------------------------
UBool U_EXPORT2
BreakIterator::unregister(URegistryKey key, UErrorCode& status)
{
if (U_SUCCESS(status)) {
if (hasService()) {
return gService->unregister(key, status);
}
status = U_MEMORY_ALLOCATION_ERROR;
}
return FALSE;
}
// -------------------------------------
StringEnumeration* U_EXPORT2
BreakIterator::getAvailableLocales(void)
{
ICULocaleService *service = getService();
if (service == NULL) {
return NULL;
}
return service->getAvailableLocales();
}
#endif /* UCONFIG_NO_SERVICE */
// -------------------------------------
BreakIterator*
BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
{
if (U_FAILURE(status)) {
return NULL;
}
#if !UCONFIG_NO_SERVICE
if (hasService()) {
Locale actualLoc("");
BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
// TODO: The way the service code works in ICU 2.8 is that if
// there is a real registered break iterator, the actualLoc
// will be populated, but if the handleDefault path is taken
// (because nothing is registered that can handle the
// requested locale) then the actualLoc comes back empty. In
// that case, the returned object already has its actual/valid
// locale data populated (by makeInstance, which is what
// handleDefault calls), so we don't touch it. YES, A COMMENT
// THIS LONG is a sign of bad code -- so the action item is to
// revisit this in ICU 3.0 and clean it up/fix it/remove it.
if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
U_LOCALE_BASED(locBased, *result);
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
}
return result;
}
else
#endif
{
return makeInstance(loc, kind, status);
}
}
// -------------------------------------
BreakIterator*
BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
{
if (U_FAILURE(status)) {
return NULL;
}
BreakIterator *result = NULL;
switch (kind) {
case UBRK_CHARACTER:
result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
break;
case UBRK_WORD:
result = BreakIterator::buildInstance(loc, "word", kind, status);
break;
case UBRK_LINE:
result = BreakIterator::buildInstance(loc, "line", kind, status);
break;
case UBRK_SENTENCE:
result = BreakIterator::buildInstance(loc, "sentence", kind, status);
break;
case UBRK_TITLE:
result = BreakIterator::buildInstance(loc, "title", kind, status);
break;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
}
if (U_FAILURE(status)) {
return NULL;
}
return result;
}
Locale
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
U_LOCALE_BASED(locBased, *this);
return locBased.getLocale(type, status);
}
const char *
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
U_LOCALE_BASED(locBased, *this);
return locBased.getLocaleID(type, status);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
//eof

View file

@ -0,0 +1,65 @@
// Copyright (C) 2009, International Business Machines
// Corporation and others. All Rights Reserved.
//
// Copyright 2007 Google Inc. All Rights Reserved.
// Author: sanjay@google.com (Sanjay Ghemawat)
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
char* ByteSink::GetAppendBuffer(int32_t min_capacity,
int32_t /*desired_capacity_hint*/,
char* scratch, int32_t scratch_capacity,
int32_t* result_capacity) {
if (min_capacity < 1 || scratch_capacity < min_capacity) {
*result_capacity = 0;
return NULL;
}
*result_capacity = scratch_capacity;
return scratch;
}
void ByteSink::Flush() {}
CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
: outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity), size_(0), overflowed_(false) {
}
void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
if (n <= 0) {
return;
}
int32_t available = capacity_ - size_;
if (n > available) {
n = available;
overflowed_ = true;
}
if (n > 0 && bytes != (outbuf_ + size_)) {
uprv_memcpy(outbuf_ + size_, bytes, n);
}
size_ += n;
}
char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity,
int32_t /*desired_capacity_hint*/,
char* scratch,
int32_t scratch_capacity,
int32_t* result_capacity) {
if (min_capacity < 1 || scratch_capacity < min_capacity) {
*result_capacity = 0;
return NULL;
}
int32_t available = capacity_ - size_;
if (available >= min_capacity) {
*result_capacity = available;
return outbuf_ + size_;
} else {
*result_capacity = scratch_capacity;
return scratch;
}
}
U_NAMESPACE_END

611
source/common/caniter.cpp Normal file
View file

@ -0,0 +1,611 @@
/*
*****************************************************************************
* Copyright (C) 1996-2006, International Business Machines Corporation and *
* others. All Rights Reserved. *
*****************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/uset.h"
#include "unicode/ustring.h"
#include "hash.h"
#include "unormimp.h"
#include "unicode/caniter.h"
#include "unicode/normlzr.h"
#include "unicode/uchar.h"
#include "cmemory.h"
/**
* This class allows one to iterate through all the strings that are canonically equivalent to a given
* string. For example, here are some sample results:
Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
1: \u0041\u030A\u0064\u0307\u0327
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
2: \u0041\u030A\u0064\u0327\u0307
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
3: \u0041\u030A\u1E0B\u0327
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
4: \u0041\u030A\u1E11\u0307
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
5: \u00C5\u0064\u0307\u0327
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
6: \u00C5\u0064\u0327\u0307
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
7: \u00C5\u1E0B\u0327
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
8: \u00C5\u1E11\u0307
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
9: \u212B\u0064\u0307\u0327
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
10: \u212B\u0064\u0327\u0307
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
11: \u212B\u1E0B\u0327
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
12: \u212B\u1E11\u0307
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
*<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
* since it has not been optimized for that situation.
*@author M. Davis
*@draft
*/
// public
U_NAMESPACE_BEGIN
// TODO: add boilerplate methods.
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
/**
*@param source string to get results for
*/
CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
pieces(NULL),
pieces_length(0),
pieces_lengths(NULL),
current(NULL),
current_length(0)
{
if(U_SUCCESS(status)) {
setSource(sourceStr, status);
}
}
CanonicalIterator::~CanonicalIterator() {
cleanPieces();
}
void CanonicalIterator::cleanPieces() {
int32_t i = 0;
if(pieces != NULL) {
for(i = 0; i < pieces_length; i++) {
if(pieces[i] != NULL) {
delete[] pieces[i];
}
}
uprv_free(pieces);
pieces = NULL;
pieces_length = 0;
}
if(pieces_lengths != NULL) {
uprv_free(pieces_lengths);
pieces_lengths = NULL;
}
if(current != NULL) {
uprv_free(current);
current = NULL;
current_length = 0;
}
}
/**
*@return gets the source: NOTE: it is the NFD form of source
*/
UnicodeString CanonicalIterator::getSource() {
return source;
}
/**
* Resets the iterator so that one can start again from the beginning.
*/
void CanonicalIterator::reset() {
done = FALSE;
for (int i = 0; i < current_length; ++i) {
current[i] = 0;
}
}
/**
*@return the next string that is canonically equivalent. The value null is returned when
* the iteration is done.
*/
UnicodeString CanonicalIterator::next() {
int32_t i = 0;
if (done) {
buffer.setToBogus();
return buffer;
}
// delete old contents
buffer.remove();
// construct return value
for (i = 0; i < pieces_length; ++i) {
buffer.append(pieces[i][current[i]]);
}
//String result = buffer.toString(); // not needed
// find next value for next time
for (i = current_length - 1; ; --i) {
if (i < 0) {
done = TRUE;
break;
}
current[i]++;
if (current[i] < pieces_lengths[i]) break; // got sequence
current[i] = 0;
}
return buffer;
}
/**
*@param set the source string to iterate against. This allows the same iterator to be used
* while changing the source string, saving object creation.
*/
void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) {
int32_t list_length = 0;
UChar32 cp = 0;
int32_t start = 0;
int32_t i = 0;
UnicodeString *list = NULL;
Normalizer::normalize(newSource, UNORM_NFD, 0, source, status);
if(U_FAILURE(status)) {
return;
}
done = FALSE;
cleanPieces();
// catch degenerate case
if (newSource.length() == 0) {
pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
pieces_length = 1;
current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
current_length = 1;
if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
current[0] = 0;
pieces[0] = new UnicodeString[1];
pieces_lengths[0] = 1;
if (pieces[0] == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
return;
}
list = new UnicodeString[source.length()];
if (list == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
// i should initialy be the number of code units at the
// start of the string
i = UTF16_CHAR_LENGTH(source.char32At(0));
//int32_t i = 1;
// find the segments
// This code iterates through the source string and
// extracts segments that end up on a codepoint that
// doesn't start any decompositions. (Analysis is done
// on the NFD form - see above).
for (; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) {
cp = source.char32At(i);
if (unorm_isCanonSafeStart(cp)) {
source.extract(start, i-start, list[list_length++]); // add up to i
start = i;
}
}
source.extract(start, i-start, list[list_length++]); // add last one
// allocate the arrays, and find the strings that are CE to each segment
pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
pieces_length = list_length;
pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
current_length = list_length;
if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
for (i = 0; i < current_length; i++) {
current[i] = 0;
}
// for each segment, get all the combinations that can produce
// it after NFD normalization
for (i = 0; i < pieces_length; ++i) {
//if (PROGRESS) printf("SEGMENT\n");
pieces[i] = getEquivalents(list[i], pieces_lengths[i], status);
}
delete[] list;
return;
// Common section to cleanup all local variables and reset object variables.
CleanPartialInitialization:
if (list != NULL) {
delete[] list;
}
cleanPieces();
}
/**
* Dumb recursive implementation of permutation.
* TODO: optimize
* @param source the string to find permutations for
* @return the results in a set.
*/
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
if(U_FAILURE(status)) {
return;
}
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
int32_t i = 0;
// optimization:
// if zero or one character, just return a set with it
// we check for length < 2 to keep from counting code points all the time
if (source.length() <= 2 && source.countChar32() <= 1) {
UnicodeString *toPut = new UnicodeString(source);
/* test for NULL */
if (toPut == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
result->put(source, toPut, status);
return;
}
// otherwise iterate through the string, and recursively permute all the other characters
UChar32 cp;
Hashtable subpermute(status);
if(U_FAILURE(status)) {
return;
}
subpermute.setValueDeleter(uhash_deleteUnicodeString);
for (i = 0; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) {
cp = source.char32At(i);
const UHashElement *ne = NULL;
int32_t el = -1;
UnicodeString subPermuteString = source;
// optimization:
// if the character is canonical combining class zero,
// don't permute it
if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
//System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
continue;
}
subpermute.removeAll();
// see what the permutations of the characters before and after this one are
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
permute(subPermuteString.replace(i, UTF16_CHAR_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status);
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return;
}
// The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents
// of source at this point.
// prefix this character to all of them
ne = subpermute.nextElement(el);
while (ne != NULL) {
UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
UnicodeString *chStr = new UnicodeString(cp);
//test for NULL
if (chStr == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
//if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr));
result->put(*chStr, chStr, status);
ne = subpermute.nextElement(el);
}
}
//return result;
}
// privates
// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) {
Hashtable result(status);
Hashtable permutations(status);
Hashtable basic(status);
if (U_FAILURE(status)) {
return 0;
}
result.setValueDeleter(uhash_deleteUnicodeString);
permutations.setValueDeleter(uhash_deleteUnicodeString);
basic.setValueDeleter(uhash_deleteUnicodeString);
UChar USeg[256];
int32_t segLen = segment.extract(USeg, 256, status);
getEquivalents2(&basic, USeg, segLen, status);
// now get all the permutations
// add only the ones that are canonically equivalent
// TODO: optimize by not permuting any class zero.
const UHashElement *ne = NULL;
int32_t el = -1;
//Iterator it = basic.iterator();
ne = basic.nextElement(el);
//while (it.hasNext())
while (ne != NULL) {
//String item = (String) it.next();
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
permutations.removeAll();
permute(item, CANITER_SKIP_ZEROES, &permutations, status);
const UHashElement *ne2 = NULL;
int32_t el2 = -1;
//Iterator it2 = permutations.iterator();
ne2 = permutations.nextElement(el2);
//while (it2.hasNext())
while (ne2 != NULL) {
//String possible = (String) it2.next();
//UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
UnicodeString attempt;
Normalizer::normalize(possible, UNORM_NFD, 0, attempt, status);
// TODO: check if operator == is semanticaly the same as attempt.equals(segment)
if (attempt==segment) {
//if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible)));
// TODO: use the hashtable just to catch duplicates - store strings directly (somehow).
result.put(possible, new UnicodeString(possible), status); //add(possible);
} else {
//if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible)));
}
ne2 = permutations.nextElement(el2);
}
ne = basic.nextElement(el);
}
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return 0;
}
// convert into a String[] to clean up storage
//String[] finalResult = new String[result.size()];
UnicodeString *finalResult = NULL;
int32_t resultCount;
if((resultCount = result.count())) {
finalResult = new UnicodeString[resultCount];
if (finalResult == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
}
else {
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
//result.toArray(finalResult);
result_len = 0;
el = -1;
ne = result.nextElement(el);
while(ne != NULL) {
finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
ne = result.nextElement(el);
}
return finalResult;
}
Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
if (U_FAILURE(status)) {
return NULL;
}
//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));
UnicodeString toPut(segment, segLen);
fillinResult->put(toPut, new UnicodeString(toPut), status);
USerializedSet starts;
// cycle through all the characters
UChar32 cp, end = 0;
int32_t i = 0, j;
for (i = 0; i < segLen; i += UTF16_CHAR_LENGTH(cp)) {
// see if any character is at the start of some decomposition
UTF_GET_CHAR(segment, 0, i, segLen, cp);
if (!unorm_getCanonStartSet(cp, &starts)) {
continue;
}
// if so, see which decompositions match
for(j = 0, cp = end+1; cp <= end || uset_getSerializedRange(&starts, j++, &cp, &end); ++cp) {
Hashtable remainder(status);
remainder.setValueDeleter(uhash_deleteUnicodeString);
if (extract(&remainder, cp, segment, segLen, i, status) == NULL) {
continue;
}
// there were some matches, so add all the possibilities to the set.
UnicodeString prefix(segment, i);
prefix += cp;
int32_t el = -1;
const UHashElement *ne = remainder.nextElement(el);
while (ne != NULL) {
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
UnicodeString *toAdd = new UnicodeString(prefix);
/* test for NULL */
if (toAdd == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
*toAdd += item;
fillinResult->put(*toAdd, toAdd, status);
//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));
ne = remainder.nextElement(el);
}
}
}
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return NULL;
}
return fillinResult;
}
/**
* See if the decomposition of cp2 is at segment starting at segmentPos
* (with canonical rearrangment!)
* If so, take the remainder, and return the equivalents
*/
Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
//if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
//if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);
if (U_FAILURE(status)) {
return NULL;
}
const int32_t bufSize = 256;
int32_t bufLen = 0;
UChar temp[bufSize];
int32_t inputLen = 0, decompLen;
UChar stackBuffer[4];
const UChar *decomp;
U16_APPEND_UNSAFE(temp, inputLen, comp);
decomp = unorm_getCanonicalDecomposition(comp, stackBuffer, &decompLen);
if(decomp == NULL) {
/* copy temp */
stackBuffer[0] = temp[0];
if(inputLen > 1) {
stackBuffer[1] = temp[1];
}
decomp = stackBuffer;
decompLen = inputLen;
}
UChar *buff = temp+inputLen;
// See if it matches the start of segment (at segmentPos)
UBool ok = FALSE;
UChar32 cp;
int32_t decompPos = 0;
UChar32 decompCp;
UTF_NEXT_CHAR(decomp, decompPos, decompLen, decompCp);
int32_t i;
UBool overflow = FALSE;
i = segmentPos;
while(i < segLen) {
UTF_NEXT_CHAR(segment, i, segLen, cp);
if (cp == decompCp) { // if equal, eat another cp from decomp
//if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp))));
if (decompPos == decompLen) { // done, have all decomp characters!
//u_strcat(buff+bufLen, segment+i);
uprv_memcpy(buff+bufLen, segment+i, (segLen-i)*sizeof(UChar));
bufLen+=segLen-i;
ok = TRUE;
break;
}
UTF_NEXT_CHAR(decomp, decompPos, decompLen, decompCp);
} else {
//if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp))));
// brute force approach
U16_APPEND(buff, bufLen, bufSize, cp, overflow);
if(overflow) {
/*
* ### TODO handle buffer overflow
* The buffer is large, but an overflow may still happen with
* unusual input (many combining marks?).
* Reallocate buffer and continue.
* markus 20020929
*/
overflow = FALSE;
}
/* TODO: optimize
// since we know that the classes are monotonically increasing, after zero
// e.g. 0 5 7 9 0 3
// we can do an optimization
// there are only a few cases that work: zero, less, same, greater
// if both classes are the same, we fail
// if the decomp class < the segment class, we fail
segClass = getClass(cp);
if (decompClass <= segClass) return null;
*/
}
}
if (!ok)
return NULL; // we failed, characters left over
//if (PROGRESS) printf("Matches\n");
if (bufLen == 0) {
fillinResult->put(UnicodeString(), new UnicodeString(), status);
return fillinResult; // succeed, but no remainder
}
// brute force approach
// check to make sure result is canonically equivalent
int32_t tempLen = inputLen + bufLen;
UChar trial[bufSize];
unorm_decompose(trial, bufSize, temp, tempLen, FALSE, 0, &status);
if(U_FAILURE(status)
|| uprv_memcmp(segment+segmentPos, trial, (segLen - segmentPos)*sizeof(UChar)) != 0)
{
return NULL;
}
return getEquivalents2(fillinResult, buff, bufLen, status);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -0,0 +1,96 @@
/*
**********************************************************************
* Copyright (C) 1999-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#include "unicode/chariter.h"
U_NAMESPACE_BEGIN
ForwardCharacterIterator::~ForwardCharacterIterator() {}
ForwardCharacterIterator::ForwardCharacterIterator()
: UObject()
{}
ForwardCharacterIterator::ForwardCharacterIterator(const ForwardCharacterIterator &other)
: UObject(other)
{}
CharacterIterator::CharacterIterator()
: textLength(0), pos(0), begin(0), end(0) {
}
CharacterIterator::CharacterIterator(int32_t length)
: textLength(length), pos(0), begin(0), end(length) {
if(textLength < 0) {
textLength = end = 0;
}
}
CharacterIterator::CharacterIterator(int32_t length, int32_t position)
: textLength(length), pos(position), begin(0), end(length) {
if(textLength < 0) {
textLength = end = 0;
}
if(pos < 0) {
pos = 0;
} else if(pos > end) {
pos = end;
}
}
CharacterIterator::CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position)
: textLength(length), pos(position), begin(textBegin), end(textEnd) {
if(textLength < 0) {
textLength = 0;
}
if(begin < 0) {
begin = 0;
} else if(begin > textLength) {
begin = textLength;
}
if(end < begin) {
end = begin;
} else if(end > textLength) {
end = textLength;
}
if(pos < begin) {
pos = begin;
} else if(pos > end) {
pos = end;
}
}
CharacterIterator::CharacterIterator(const CharacterIterator &that) :
ForwardCharacterIterator(that),
textLength(that.textLength), pos(that.pos), begin(that.begin), end(that.end)
{
}
CharacterIterator &
CharacterIterator::operator=(const CharacterIterator &that) {
ForwardCharacterIterator::operator=(that);
textLength = that.textLength;
pos = that.pos;
begin = that.begin;
end = that.end;
return *this;
}
// implementing first[32]PostInc() directly in a subclass should be faster
// but these implementations make subclassing a little easier
UChar
CharacterIterator::firstPostInc(void) {
setToStart();
return nextPostInc();
}
UChar32
CharacterIterator::first32PostInc(void) {
setToStart();
return next32PostInc();
}
U_NAMESPACE_END

88
source/common/charstr.h Normal file
View file

@ -0,0 +1,88 @@
/*
**********************************************************************
* Copyright (c) 2001-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/19/2001 aliu Creation.
**********************************************************************
*/
#ifndef CHARSTRING_H
#define CHARSTRING_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "cmemory.h"
//--------------------------------------------------------------------
// class CharString
//
// This is a tiny wrapper class that is used internally to make a
// UnicodeString look like a const char*. It can be allocated on the
// stack. It only creates a heap buffer if it needs to.
//--------------------------------------------------------------------
U_NAMESPACE_BEGIN
class U_COMMON_API CharString : public UMemory {
public:
#if !UCONFIG_NO_CONVERSION
// Constructor
// @param str The unicode string to be converted to char *
// @param codepage The char * code page. "" for invariant conversion.
// NULL for default code page.
// inline CharString(const UnicodeString& str, const char *codepage);
#endif
inline CharString(const UnicodeString& str);
inline ~CharString();
inline operator const char*() const { return ptr; }
private:
char buf[128];
char* ptr;
CharString(const CharString &other); // forbid copying of this class
CharString &operator=(const CharString &other); // forbid copying of this class
};
#if !UCONFIG_NO_CONVERSION
// PLEASE DON'T USE THIS FUNCTION.
// We don't want the static dependency on conversion or the performance hit that comes from a codepage conversion.
/*
inline CharString::CharString(const UnicodeString& str, const char *codepage) {
int32_t len;
ptr = buf;
len = str.extract(0, 0x7FFFFFFF, buf ,sizeof(buf)-1, codepage);
if (len >= (int32_t)(sizeof(buf)-1)) {
ptr = (char *)uprv_malloc(len+1);
str.extract(0, 0x7FFFFFFF, ptr, len+1, codepage);
}
}*/
#endif
inline CharString::CharString(const UnicodeString& str) {
int32_t len;
ptr = buf;
len = str.extract(0, 0x7FFFFFFF, buf, (int32_t)(sizeof(buf)-1), US_INV);
if (len >= (int32_t)(sizeof(buf)-1)) {
ptr = (char *)uprv_malloc(len+1);
str.extract(0, 0x7FFFFFFF, ptr, len+1, US_INV);
}
}
inline CharString::~CharString() {
if (ptr != buf) {
uprv_free(ptr);
}
}
U_NAMESPACE_END
#endif
//eof

124
source/common/cmemory.c Normal file
View file

@ -0,0 +1,124 @@
/*
******************************************************************************
*
* Copyright (C) 2002-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File cmemory.c ICU Heap allocation.
* All ICU heap allocation, both for C and C++ new of ICU
* class types, comes through these functions.
*
* If you have a need to replace ICU allocation, this is the
* place to do it.
*
* Note that uprv_malloc(0) returns a non-NULL pointer, and
* that a subsequent free of that pointer value is a NOP.
*
******************************************************************************
*/
#include "unicode/uclean.h"
#include "cmemory.h"
#include <stdlib.h>
/* uprv_malloc(0) returns a pointer to this read-only data. */
static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0};
/* Function Pointers for user-supplied heap functions */
static const void *pContext;
static UMemAllocFn *pAlloc;
static UMemReallocFn *pRealloc;
static UMemFreeFn *pFree;
/* Flag indicating whether any heap allocations have happened.
* Used to prevent changing out the heap functions after allocations have been made */
static UBool gHeapInUse;
U_CAPI void * U_EXPORT2
uprv_malloc(size_t s) {
if (s > 0) {
gHeapInUse = TRUE;
if (pAlloc) {
return (*pAlloc)(pContext, s);
} else {
return malloc(s);
}
} else {
return (void *)zeroMem;
}
}
U_CAPI void * U_EXPORT2
uprv_realloc(void * buffer, size_t size) {
if (buffer == zeroMem) {
return uprv_malloc(size);
} else if (size == 0) {
if (pFree) {
(*pFree)(pContext, buffer);
} else {
free(buffer);
}
return (void *)zeroMem;
} else {
gHeapInUse = TRUE;
if (pRealloc) {
return (*pRealloc)(pContext, buffer, size);
} else {
return realloc(buffer, size);
}
}
}
U_CAPI void U_EXPORT2
uprv_free(void *buffer) {
if (buffer != zeroMem) {
if (pFree) {
(*pFree)(pContext, buffer);
} else {
free(buffer);
}
}
}
U_CAPI void U_EXPORT2
u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status)
{
if (U_FAILURE(*status)) {
return;
}
if (a==NULL || r==NULL || f==NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (gHeapInUse) {
*status = U_INVALID_STATE_ERROR;
return;
}
pContext = context;
pAlloc = a;
pRealloc = r;
pFree = f;
}
U_CFUNC UBool cmemory_cleanup(void) {
pContext = NULL;
pAlloc = NULL;
pRealloc = NULL;
pFree = NULL;
gHeapInUse = FALSE;
return TRUE;
}
/*
* gHeapInUse
* Return True if ICU has allocated any memory.
* Used by u_SetMutexFunctions() and similar to verify that ICU has not
* been used, that it is in a pristine initial state.
*/
U_CFUNC UBool cmemory_inUse() {
return gHeapInUse;
}

94
source/common/cmemory.h Normal file
View file

@ -0,0 +1,94 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File CMEMORY.H
*
* Contains stdlib.h/string.h memory functions
*
* @author Bertrand A. Damiba
*
* Modification History:
*
* Date Name Description
* 6/20/98 Bertrand Created.
* 05/03/99 stephen Changed from functions to macros.
*
******************************************************************************
*/
#ifndef CMEMORY_H
#define CMEMORY_H
#include "unicode/utypes.h"
#include <stddef.h>
#include <string.h>
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
U_CAPI void * U_EXPORT2
uprv_malloc(size_t s);
U_CAPI void * U_EXPORT2
uprv_realloc(void *mem, size_t size);
U_CAPI void U_EXPORT2
uprv_free(void *mem);
/**
* This should align the memory properly on any machine.
* This is very useful for the safeClone functions.
*/
typedef union {
long t1;
double t2;
void *t3;
} UAlignedMemory;
/**
* Get the least significant bits of a pointer (a memory address).
* For example, with a mask of 3, the macro gets the 2 least significant bits,
* which will be 0 if the pointer is 32-bit (4-byte) aligned.
*
* ptrdiff_t is the most appropriate integer type to cast to.
* size_t should work too, since on most (or all?) platforms it has the same
* width as ptrdiff_t.
*/
#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
/**
* Get the amount of bytes that a pointer is off by from
* the previous UAlignedMemory-aligned pointer.
*/
#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
/**
* Get the amount of bytes to add to a pointer
* in order to get the next UAlignedMemory-aligned address.
*/
#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
/**
* Indicate whether the ICU allocation functions have been used.
* This is used to determine whether ICU is in an initial, unused state.
*/
U_CFUNC UBool
cmemory_inUse(void);
/**
* Heap clean up function, called from u_cleanup()
* Clears any user heap functions from u_setMemoryFunctions()
* Does NOT deallocate any remaining allocated memory.
*/
U_CFUNC UBool
cmemory_cleanup(void);
#endif

108
source/common/common.rc Normal file
View file

@ -0,0 +1,108 @@
// Do not edit with Microsoft Developer Studio Resource Editor.
// It will permanently substitute version numbers that are intended to be
// picked up by the pre-processor during each build.
// Copyright (c) 2001-2009 International Business Machines
// Corporation and others. All Rights Reserved.
//
#include "msvcres.h"
#define APSTUDIO_READONLY_SYMBOLS
/////////////////////////////////////////////////////////////////////////////
//
// Generated from the TEXTINCLUDE 2 resource.
//
#include <winresrc.h>
/////////////////////////////////////////////////////////////////////////////
#undef APSTUDIO_READONLY_SYMBOLS
/////////////////////////////////////////////////////////////////////////////
//
LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL
#pragma code_page(1252)
#ifdef APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// TEXTINCLUDE
//
1 TEXTINCLUDE
BEGIN
"msvcres.h\0"
END
2 TEXTINCLUDE
BEGIN
"#include <winresrc.h>\0"
END
3 TEXTINCLUDE
BEGIN
"\r\n"
"\0"
END
#endif // APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// Version
//
#define STR(s) #s
#define CommaVersionString(a, b, c, d) STR(a) ", " STR(b) ", " STR(c) ", " STR(d) "\0"
VS_VERSION_INFO VERSIONINFO
FILEVERSION U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM
PRODUCTVERSION U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
#else
FILEFLAGS 0x0L
#endif
FILEOS VOS__WINDOWS32
FILETYPE VFT_DLL
FILESUBTYPE 0x0L
BEGIN
BLOCK "StringFileInfo"
BEGIN
BLOCK "00000000"
BEGIN
VALUE "Comments", ICU_WEBSITE "\0"
VALUE "CompanyName", "IBM Corporation and others\0"
VALUE "FileDescription", "IBM ICU Common DLL\0"
VALUE "FileVersion", CommaVersionString(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM)
VALUE "LegalCopyright", U_COPYRIGHT_STRING "\0"
#ifdef _DEBUG
VALUE "OriginalFilename", "icuuc" U_ICU_VERSION_SHORT "d.dll\0"
#else
VALUE "OriginalFilename", "icuuc" U_ICU_VERSION_SHORT ".dll\0"
#endif
VALUE "PrivateBuild", "\0"
VALUE "ProductName", "International Components for Unicode\0"
VALUE "ProductVersion", CommaVersionString(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM)
VALUE "SpecialBuild", "\0"
END
END
BLOCK "VarFileInfo"
BEGIN
VALUE "Translation", 0x000, 0000
END
END
/////////////////////////////////////////////////////////////////////////////
#ifndef APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// Generated from the TEXTINCLUDE 3 resource.
//
/////////////////////////////////////////////////////////////////////////////
#endif // not APSTUDIO_INVOKED

4444
source/common/common.vcproj Normal file

File diff suppressed because it is too large Load diff

84
source/common/cpputils.h Normal file
View file

@ -0,0 +1,84 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2006, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: cpputils.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*/
#ifndef CPPUTILS_H
#define CPPUTILS_H
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "cmemory.h"
/*==========================================================================*/
/* Array copy utility functions */
/*==========================================================================*/
static
inline void uprv_arrayCopy(const double* src, double* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const double* src, int32_t srcStart,
double* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart,
int8_t* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart,
int16_t* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,
int32_t* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void
uprv_arrayCopy(const UChar *src, int32_t srcStart,
UChar *dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
/**
* Copy an array of UnicodeString OBJECTS (not pointers).
* @internal
*/
static inline void
uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count)
{ while(count-- > 0) *dst++ = *src++; }
/**
* Copy an array of UnicodeString OBJECTS (not pointers).
* @internal
*/
static inline void
uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart,
U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)
{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
#endif /* _CPPUTILS */

328
source/common/cstring.c Normal file
View file

@ -0,0 +1,328 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File CSTRING.C
*
* @author Helena Shih
*
* Modification History:
*
* Date Name Description
* 6/18/98 hshih Created
* 09/08/98 stephen Added include for ctype, for Mac Port
* 11/15/99 helena Integrated S/390 IEEE changes.
******************************************************************************
*/
#include <stdlib.h>
#include <stdio.h>
#include "unicode/utypes.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
/*
* We hardcode case conversion for invariant characters to match our expectation
* and the compiler execution charset.
* This prevents problems on systems
* - with non-default casing behavior, like Turkish system locales where
* tolower('I') maps to dotless i and toupper('i') maps to dotted I
* - where there are no lowercase Latin characters at all, or using different
* codes (some old EBCDIC codepages)
*
* This works because the compiler usually runs on a platform where the execution
* charset includes all of the invariant characters at their expected
* code positions, so that the char * string literals in ICU code match
* the char literals here.
*
* Note that the set of lowercase Latin letters is discontiguous in EBCDIC
* and the set of uppercase Latin letters is discontiguous as well.
*/
U_CAPI char U_EXPORT2
uprv_toupper(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
c=(char)(c+('A'-'a'));
}
#else
if('a'<=c && c<='z') {
c=(char)(c+('A'-'a'));
}
#endif
return c;
}
#if 0
/*
* Commented out because cstring.h defines uprv_tolower() to be
* the same as either uprv_asciitolower() or uprv_ebcdictolower()
* to reduce the amount of code to cover with tests.
*
* Note that this uprv_tolower() definition is likely to work for most
* charset families, not just ASCII and EBCDIC, because its #else branch
* is written generically.
*/
U_CAPI char U_EXPORT2
uprv_tolower(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
c=(char)(c+('a'-'A'));
}
#else
if('A'<=c && c<='Z') {
c=(char)(c+('a'-'A'));
}
#endif
return c;
}
#endif
U_CAPI char U_EXPORT2
uprv_asciitolower(char c) {
if(0x41<=c && c<=0x5a) {
c=(char)(c+0x20);
}
return c;
}
U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c) {
if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
(0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
(0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
) {
c=(char)(c-0x40);
}
return c;
}
U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char* str)
{
char* origPtr = str;
if (str) {
do
*str = (char)uprv_tolower(*str);
while (*(str++));
}
return origPtr;
}
U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char* str)
{
char* origPtr = str;
if (str) {
do
*str = (char)uprv_toupper(*str);
while (*(str++));
}
return origPtr;
}
/*
* Takes a int32_t and fills in a char* string with that number "radix"-based.
* Does not handle negative values (makes an empty string for them).
* Writes at most 12 chars ("-2147483647" plus NUL).
* Returns the length of the string (not including the NUL).
*/
U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
{
char tbuf[30];
int32_t tbx = sizeof(tbuf);
uint8_t digit;
int32_t length = 0;
uint32_t uval;
U_ASSERT(radix>=2 && radix<=16);
uval = (uint32_t) v;
if(v<0 && radix == 10) {
/* Only in base 10 do we conside numbers to be signed. */
uval = (uint32_t)(-v);
buffer[length++] = '-';
}
tbx = sizeof(tbuf)-1;
tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
do {
digit = (uint8_t)(uval % radix);
tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
uval = uval / radix;
} while (uval != 0);
/* copy converted number into user buffer */
uprv_strcpy(buffer+length, tbuf+tbx);
length += sizeof(tbuf) - tbx -1;
return length;
}
/*
* Takes a int64_t and fills in a char* string with that number "radix"-based.
* Writes at most 21: chars ("-9223372036854775807" plus NUL).
* Returns the length of the string, not including the terminating NULL.
*/
U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
{
char tbuf[30];
int32_t tbx = sizeof(tbuf);
uint8_t digit;
int32_t length = 0;
uint64_t uval;
U_ASSERT(radix>=2 && radix<=16);
uval = (uint64_t) v;
if(v<0 && radix == 10) {
/* Only in base 10 do we conside numbers to be signed. */
uval = (uint64_t)(-v);
buffer[length++] = '-';
}
tbx = sizeof(tbuf)-1;
tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
do {
digit = (uint8_t)(uval % radix);
tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
uval = uval / radix;
} while (uval != 0);
/* copy converted number into user buffer */
uprv_strcpy(buffer+length, tbuf+tbx);
length += sizeof(tbuf) - tbx -1;
return length;
}
U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char *integerString, int32_t radix)
{
char *end;
return uprv_strtoul(integerString, &end, radix);
}
U_CAPI int U_EXPORT2
T_CString_stricmp(const char *str1, const char *str2) {
if(str1==NULL) {
if(str2==NULL) {
return 0;
} else {
return -1;
}
} else if(str2==NULL) {
return 1;
} else {
/* compare non-NULL strings lexically with lowercase */
int rc;
unsigned char c1, c2;
for(;;) {
c1=(unsigned char)*str1;
c2=(unsigned char)*str2;
if(c1==0) {
if(c2==0) {
return 0;
} else {
return -1;
}
} else if(c2==0) {
return 1;
} else {
/* compare non-zero characters with lowercase */
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
if(rc!=0) {
return rc;
}
}
++str1;
++str2;
}
}
}
U_CAPI int U_EXPORT2
T_CString_strnicmp(const char *str1, const char *str2, uint32_t n) {
if(str1==NULL) {
if(str2==NULL) {
return 0;
} else {
return -1;
}
} else if(str2==NULL) {
return 1;
} else {
/* compare non-NULL strings lexically with lowercase */
int rc;
unsigned char c1, c2;
for(; n--;) {
c1=(unsigned char)*str1;
c2=(unsigned char)*str2;
if(c1==0) {
if(c2==0) {
return 0;
} else {
return -1;
}
} else if(c2==0) {
return 1;
} else {
/* compare non-zero characters with lowercase */
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
if(rc!=0) {
return rc;
}
}
++str1;
++str2;
}
}
return 0;
}
U_CAPI char* U_EXPORT2
uprv_strdup(const char *src) {
size_t len = uprv_strlen(src) + 1;
char *dup = (char *) uprv_malloc(len);
if (dup) {
uprv_memcpy(dup, src, len);
}
return dup;
}
U_CAPI char* U_EXPORT2
uprv_strndup(const char *src, int32_t n) {
char *dup;
if(n < 0) {
dup = uprv_strdup(src);
} else {
dup = (char*)uprv_malloc(n+1);
if (dup) {
uprv_memcpy(dup, src, n);
dup[n] = 0;
}
}
return dup;
}

120
source/common/cstring.h Normal file
View file

@ -0,0 +1,120 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2005, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File CSTRING.H
*
* Contains CString interface
*
* @author Helena Shih
*
* Modification History:
*
* Date Name Description
* 6/17/98 hshih Created.
* 05/03/99 stephen Changed from functions to macros.
* 06/14/99 stephen Added icu_strncat, icu_strncmp, icu_tolower
*
******************************************************************************
*/
#ifndef CSTRING_H
#define CSTRING_H 1
#include "unicode/utypes.h"
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE strcpy(dst, src)
#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str)
#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2)
#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src)
#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
U_CAPI char U_EXPORT2
uprv_toupper(char c);
U_CAPI char U_EXPORT2
uprv_asciitolower(char c);
U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c);
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
# define uprv_tolower uprv_asciitolower
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
# define uprv_tolower uprv_ebcdictolower
#else
# error U_CHARSET_FAMILY is not valid
#endif
#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end)
#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base)
#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base)
#ifdef U_WINDOWS
# if defined(__BORLANDC__)
# define uprv_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE stricmp(str1, str2)
# define uprv_strnicmp(str1, str2, n) U_STANDARD_CPP_NAMESPACE strnicmp(str1, str2, n)
# else
# define uprv_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE _stricmp(str1, str2)
# define uprv_strnicmp(str1, str2, n) U_STANDARD_CPP_NAMESPACE _strnicmp(str1, str2, n)
# endif
#elif defined(POSIX)
# define uprv_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE strcasecmp(str1, str2)
# define uprv_strnicmp(str1, str2, n) U_STANDARD_CPP_NAMESPACE strncasecmp(str1, str2, n)
#else
# define uprv_stricmp(str1, str2) T_CString_stricmp(str1, str2)
# define uprv_strnicmp(str1, str2, n) T_CString_strnicmp(str1, str2, n)
#endif
/* Conversion from a digit to the character with radix base from 2-19 */
/* May need to use U_UPPER_ORDINAL*/
#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10))
U_CAPI char* U_EXPORT2
uprv_strdup(const char *src);
/**
* uprv_malloc n+1 bytes, and copy n bytes from src into the new string.
* Terminate with a null at offset n. If n is -1, works like uprv_strdup
* @param src
* @param n length of the input string, not including null.
* @return new string (owned by caller, use uprv_free to free).
* @internal
*/
U_CAPI char* U_EXPORT2
uprv_strndup(const char *src, int32_t n);
U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char* str);
U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char* str);
U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char *buffer, int32_t n, int32_t radix);
U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix);
U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char *integerString, int32_t radix);
U_CAPI int U_EXPORT2
T_CString_stricmp(const char *str1, const char *str2);
U_CAPI int U_EXPORT2
T_CString_strnicmp(const char *str1, const char *str2, uint32_t n);
#endif /* ! CSTRING_H */

53
source/common/cwchar.c Normal file
View file

@ -0,0 +1,53 @@
/*
******************************************************************************
*
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: cwchar.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2001may25
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#if !U_HAVE_WCSCPY
#include "cwchar.h"
U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) {
wchar_t *start=dst;
while(*dst!=0) {
++dst;
}
while((*dst=*src)!=0) {
++dst;
++src;
}
return start;
}
U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) {
wchar_t *start=dst;
while((*dst=*src)!=0) {
++dst;
++src;
}
return start;
}
U_CAPI size_t uprv_wcslen(const wchar_t *src) {
const wchar_t *start=src;
while(*src!=0) {
++src;
}
return src-start;
}
#endif

56
source/common/cwchar.h Normal file
View file

@ -0,0 +1,56 @@
/*
******************************************************************************
*
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: cwchar.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2001may25
* created by: Markus W. Scherer
*
* This file contains ICU-internal definitions of wchar_t operations.
* These definitions were moved here from cstring.h so that fewer
* ICU implementation files include wchar.h.
*/
#ifndef __CWCHAR_H__
#define __CWCHAR_H__
#include <string.h>
#include <stdlib.h>
#include "unicode/utypes.h"
/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */
#if U_HAVE_WCHAR_H
# include <wchar.h>
#endif
/*===========================================================================*/
/* Wide-character functions */
/*===========================================================================*/
/* The following are not available on all systems, defined in wchar.h or string.h. */
#if U_HAVE_WCSCPY
# define uprv_wcscpy wcscpy
# define uprv_wcscat wcscat
# define uprv_wcslen wcslen
#else
U_CAPI wchar_t* U_EXPORT2
uprv_wcscpy(wchar_t *dst, const wchar_t *src);
U_CAPI wchar_t* U_EXPORT2
uprv_wcscat(wchar_t *dst, const wchar_t *src);
U_CAPI size_t U_EXPORT2
uprv_wcslen(const wchar_t *src);
#endif
/* The following are part of the ANSI C standard, defined in stdlib.h . */
#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count)
#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count)
#endif

427
source/common/dictbe.cpp Normal file
View file

@ -0,0 +1,427 @@
/**
*******************************************************************************
* Copyright (C) 2006-2008, International Business Machines Corporation and others. *
* All Rights Reserved. *
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "brkeng.h"
#include "dictbe.h"
#include "unicode/uniset.h"
#include "unicode/chariter.h"
#include "unicode/ubrk.h"
#include "uvector.h"
#include "triedict.h"
U_NAMESPACE_BEGIN
/*
******************************************************************
*/
/*DictionaryBreakEngine::DictionaryBreakEngine() {
fTypes = 0;
}*/
DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) {
fTypes = breakTypes;
}
DictionaryBreakEngine::~DictionaryBreakEngine() {
}
UBool
DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const {
return (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)
&& fSet.contains(c));
}
int32_t
DictionaryBreakEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const {
int32_t result = 0;
// Find the span of characters included in the set.
int32_t start = (int32_t)utext_getNativeIndex(text);
int32_t current;
int32_t rangeStart;
int32_t rangeEnd;
UChar32 c = utext_current32(text);
if (reverse) {
UBool isDict = fSet.contains(c);
while((current = (int32_t)utext_getNativeIndex(text)) > startPos && isDict) {
c = utext_previous32(text);
isDict = fSet.contains(c);
}
rangeStart = (current < startPos) ? startPos : current+(isDict ? 0 : 1);
rangeEnd = start + 1;
}
else {
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
utext_next32(text); // TODO: recast loop for postincrement
c = utext_current32(text);
}
rangeStart = start;
rangeEnd = current;
}
if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
utext_setNativeIndex(text, current);
}
return result;
}
void
DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
fSet = set;
// Compact for caching
fSet.compact();
}
/*void
DictionaryBreakEngine::setBreakTypes( uint32_t breakTypes ) {
fTypes = breakTypes;
}*/
/*
******************************************************************
*/
// Helper class for improving readability of the Thai word break
// algorithm. The implementation is completely inline.
// List size, limited by the maximum number of words in the dictionary
// that form a nested sequence.
#define POSSIBLE_WORD_LIST_MAX 20
class PossibleWord {
private:
// list of word candidate lengths, in increasing length order
int32_t lengths[POSSIBLE_WORD_LIST_MAX];
int count; // Count of candidates
int32_t prefix; // The longest match with a dictionary word
int32_t offset; // Offset in the text of these candidates
int mark; // The preferred candidate's offset
int current; // The candidate we're currently looking at
public:
PossibleWord();
~PossibleWord();
// Fill the list of candidates if needed, select the longest, and return the number found
int candidates( UText *text, const TrieWordDictionary *dict, int32_t rangeEnd );
// Select the currently marked candidate, point after it in the text, and invalidate self
int32_t acceptMarked( UText *text );
// Back up from the current candidate to the next shorter one; return TRUE if that exists
// and point the text after it
UBool backUp( UText *text );
// Return the longest prefix this candidate location shares with a dictionary word
int32_t longestPrefix();
// Mark the current candidate as the one we like
void markCurrent();
};
inline
PossibleWord::PossibleWord() {
offset = -1;
}
inline
PossibleWord::~PossibleWord() {
}
inline int
PossibleWord::candidates( UText *text, const TrieWordDictionary *dict, int32_t rangeEnd ) {
// TODO: If getIndex is too slow, use offset < 0 and add discardAll()
int32_t start = (int32_t)utext_getNativeIndex(text);
if (start != offset) {
offset = start;
prefix = dict->matches(text, rangeEnd-start, lengths, count, sizeof(lengths)/sizeof(lengths[0]));
// Dictionary leaves text after longest prefix, not longest word. Back up.
if (count <= 0) {
utext_setNativeIndex(text, start);
}
}
if (count > 0) {
utext_setNativeIndex(text, start+lengths[count-1]);
}
current = count-1;
mark = current;
return count;
}
inline int32_t
PossibleWord::acceptMarked( UText *text ) {
utext_setNativeIndex(text, offset + lengths[mark]);
return lengths[mark];
}
inline UBool
PossibleWord::backUp( UText *text ) {
if (current > 0) {
utext_setNativeIndex(text, offset + lengths[--current]);
return TRUE;
}
return FALSE;
}
inline int32_t
PossibleWord::longestPrefix() {
return prefix;
}
inline void
PossibleWord::markCurrent() {
mark = current;
}
// How many words in a row are "good enough"?
#define THAI_LOOKAHEAD 3
// Will not combine a non-word with a preceding dictionary word longer than this
#define THAI_ROOT_COMBINE_THRESHOLD 3
// Will not combine a non-word that shares at least this much prefix with a
// dictionary word, with a preceding word
#define THAI_PREFIX_COMBINE_THRESHOLD 3
// Ellision character
#define THAI_PAIYANNOI 0x0E2F
// Repeat character
#define THAI_MAIYAMOK 0x0E46
// Minimum word size
#define THAI_MIN_WORD 2
// Minimum number of characters for two words
#define THAI_MIN_WORD_SPAN (THAI_MIN_WORD * 2)
ThaiBreakEngine::ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status)
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
fDictionary(adoptDictionary)
{
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fThaiWordSet);
}
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fEndWordSet = fThaiWordSet;
fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
fSuffixSet.add(THAI_PAIYANNOI);
fSuffixSet.add(THAI_MAIYAMOK);
// Compact for caching.
fMarkSet.compact();
fEndWordSet.compact();
fBeginWordSet.compact();
fSuffixSet.compact();
}
ThaiBreakEngine::~ThaiBreakEngine() {
delete fDictionary;
}
int32_t
ThaiBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const {
if ((rangeEnd - rangeStart) < THAI_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words
}
uint32_t wordsFound = 0;
int32_t wordLength;
int32_t current;
UErrorCode status = U_ZERO_ERROR;
PossibleWord words[THAI_LOOKAHEAD];
UChar32 uc;
utext_setNativeIndex(text, rangeStart);
while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
wordLength = 0;
// Look for candidate words at the current position
int candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
// If we found exactly one, use that
if (candidates == 1) {
wordLength = words[wordsFound%THAI_LOOKAHEAD].acceptMarked(text);
wordsFound += 1;
}
// If there was more than one, see which one can take us forward the most words
else if (candidates > 1) {
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
do {
int wordsMatched = 1;
if (words[(wordsFound+1)%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
if (wordsMatched < 2) {
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
wordsMatched = 2;
}
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
// See if any of the possible second words is followed by a third word
do {
// If we find a third word, stop right away
if (words[(wordsFound+2)%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
goto foundBest;
}
}
while (words[(wordsFound+1)%THAI_LOOKAHEAD].backUp(text));
}
}
while (words[wordsFound%THAI_LOOKAHEAD].backUp(text));
foundBest:
wordLength = words[wordsFound%THAI_LOOKAHEAD].acceptMarked(text);
wordsFound += 1;
}
// We come here after having either found a word or not. We look ahead to the
// next word. If it's not a dictionary word, we will combine it withe the word we
// just found (if there is one), but only if the preceding word does not exceed
// the threshold.
// The text iterator should now be positioned at the end of the word we found.
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength < THAI_ROOT_COMBINE_THRESHOLD) {
// if it is a dictionary word, do nothing. If it isn't, then if there is
// no preceding word, or the non-word shares less than the minimum threshold
// of characters with a dictionary word, then scan to resynchronize
if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
&& (wordLength == 0
|| words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) {
// Look for a plausible word boundary
//TODO: This section will need a rework for UText.
int32_t remaining = rangeEnd - (current+wordLength);
UChar32 pc = utext_current32(text);
int32_t chars = 0;
for (;;) {
utext_next32(text);
uc = utext_current32(text);
// TODO: Here we're counting on the fact that the SA languages are all
// in the BMP. This should get fixed with the UText rework.
chars += 1;
if (--remaining <= 0) {
break;
}
if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
// Maybe. See if it's in the dictionary.
// NOTE: In the original Apple code, checked that the next
// two characters after uc were not 0x0E4C THANTHAKHAT before
// checking the dictionary. That is just a performance filter,
// but it's not clear it's faster than checking the trie.
int candidates = words[(wordsFound+1)%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
utext_setNativeIndex(text, current+wordLength+chars);
if (candidates > 0) {
break;
}
}
pc = uc;
}
// Bump the word count if there wasn't already one
if (wordLength <= 0) {
wordsFound += 1;
}
// Update the length with the passed-over characters
wordLength += chars;
}
else {
// Back up to where we were for next iteration
utext_setNativeIndex(text, current+wordLength);
}
}
// Never stop before a combining mark.
int32_t currPos;
while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
utext_next32(text);
wordLength += (int32_t)utext_getNativeIndex(text) - currPos;
}
// Look ahead for possible suffixes if a dictionary word does not follow.
// We do this in code rather than using a rule so that the heuristic
// resynch continues to function. For example, one of the suffix characters
// could be a typo in the middle of a word.
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) {
if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
&& fSuffixSet.contains(uc = utext_current32(text))) {
if (uc == THAI_PAIYANNOI) {
if (!fSuffixSet.contains(utext_previous32(text))) {
// Skip over previous end and PAIYANNOI
utext_next32(text);
utext_next32(text);
wordLength += 1; // Add PAIYANNOI to word
uc = utext_current32(text); // Fetch next character
}
else {
// Restore prior position
utext_next32(text);
}
}
if (uc == THAI_MAIYAMOK) {
if (utext_previous32(text) != THAI_MAIYAMOK) {
// Skip over previous end and MAIYAMOK
utext_next32(text);
utext_next32(text);
wordLength += 1; // Add MAIYAMOK to word
}
else {
// Restore prior position
utext_next32(text);
}
}
}
else {
utext_setNativeIndex(text, current+wordLength);
}
}
// Did we find a word on this iteration? If so, push it on the break stack
if (wordLength > 0) {
foundBreaks.push((current+wordLength), status);
}
}
// Don't return a break for the end of the dictionary range if there is one there.
if (foundBreaks.peeki() >= rangeEnd) {
(void) foundBreaks.popi();
wordsFound -= 1;
}
return wordsFound;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

193
source/common/dictbe.h Normal file
View file

@ -0,0 +1,193 @@
/**
*******************************************************************************
* Copyright (C) 2006, International Business Machines Corporation and others. *
* All Rights Reserved. *
*******************************************************************************
*/
#ifndef DICTBE_H
#define DICTBE_H
#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "unicode/utext.h"
#include "brkeng.h"
U_NAMESPACE_BEGIN
class TrieWordDictionary;
/*******************************************************************
* DictionaryBreakEngine
*/
/**
* <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a
* dictionary to determine language-specific breaks.</p>
*
* <p>After it is constructed a DictionaryBreakEngine may be shared between
* threads without synchronization.</p>
*/
class DictionaryBreakEngine : public LanguageBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fSet;
/**
* The set of break types handled by this engine
* @internal
*/
uint32_t fTypes;
/**
* <p>Default constructor.</p>
*
*/
DictionaryBreakEngine();
public:
/**
* <p>Constructor setting the break types handled.</p>
*
* @param breakTypes A bitmap of types handled by the engine.
*/
DictionaryBreakEngine( uint32_t breakTypes );
/**
* <p>Virtual destructor.</p>
*/
virtual ~DictionaryBreakEngine();
/**
* <p>Indicate whether this engine handles a particular character for
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles( UChar32 c, int32_t breakType ) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
*
* @param text A UText representing the text. The
* iterator is left at the end of the run of characters which the engine
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const;
protected:
/**
* <p>Set the character set handled by this engine.</p>
*
* @param set A UnicodeSet of the set of characters handled by the engine
*/
virtual void setCharacters( const UnicodeSet &set );
/**
* <p>Set the break types handled by this engine.</p>
*
* @param breakTypes A bitmap of types handled by the engine.
*/
// virtual void setBreakTypes( uint32_t breakTypes );
/**
* <p>Divide up a range of known dictionary characters.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const = 0;
};
/*******************************************************************
* ThaiBreakEngine
*/
/**
* <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a
* TrieWordDictionary and heuristics to determine Thai-specific breaks.</p>
*
* <p>After it is constructed a ThaiBreakEngine may be shared between
* threads without synchronization.</p>
*/
class ThaiBreakEngine : public DictionaryBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fThaiWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fSuffixSet;
UnicodeSet fMarkSet;
const TrieWordDictionary *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the
* engine is deleted.
*/
ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~ThaiBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
};
U_NAMESPACE_END
/* DICTBE_H */
#endif

61
source/common/dtintrv.cpp Normal file
View file

@ -0,0 +1,61 @@
/*******************************************************************************
* Copyright (C) 2008, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File DTINTRV.CPP
*
*******************************************************************************
*/
#include "unicode/dtintrv.h"
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval)
//DateInterval::DateInterval(){}
DateInterval::DateInterval(UDate from, UDate to)
: fromDate(from),
toDate(to)
{}
DateInterval::~DateInterval(){}
DateInterval::DateInterval(const DateInterval& other)
: UObject(other) {
*this = other;
}
DateInterval&
DateInterval::operator=(const DateInterval& other) {
if ( this != &other ) {
fromDate = other.fromDate;
toDate = other.toDate;
}
return *this;
}
DateInterval*
DateInterval::clone() const {
return new DateInterval(*this);
}
UBool
DateInterval::operator==(const DateInterval& other) const {
return ( fromDate == other.fromDate && toDate == other.toDate );
}
U_NAMESPACE_END

View file

@ -0,0 +1,34 @@
/*
*******************************************************************************
*
* Copyright (C) 2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: errorcode.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009mar10
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/errorcode.h"
U_NAMESPACE_BEGIN
UErrorCode ErrorCode::reset() {
UErrorCode code = errorCode;
errorCode = U_ZERO_ERROR;
return code;
}
void ErrorCode::assertSuccess() const {
if(isFailure()) {
handleFailure();
}
}
U_NAMESPACE_END

207
source/common/hash.h Normal file
View file

@ -0,0 +1,207 @@
/*
******************************************************************************
* Copyright (C) 1997-2006, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
* 03/28/00 aliu Creation.
******************************************************************************
*/
#ifndef HASH_H
#define HASH_H
#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
/**
* Hashtable is a thin C++ wrapper around UHashtable, a general-purpose void*
* hashtable implemented in C. Hashtable is designed to be idiomatic and
* easy-to-use in C++.
*
* Hashtable is an INTERNAL CLASS.
*/
class U_COMMON_API Hashtable : public UMemory {
UHashtable* hash;
UHashtable hashObj;
inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
public:
/**
* Construct a hashtable
* @param ignoreKeyCase If true, keys are case insensitive.
* @param status Error code
*/
Hashtable(UBool ignoreKeyCase, UErrorCode& status);
/**
* Construct a hashtable
* @param keyComp Compartor for comparing the keys
* @param valueComp Compartor for comparing the values
* @param status Error code
*/
Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
/**
* Construct a hashtable
* @param status Error code
*/
Hashtable(UErrorCode& status);
/**
* Construct a hashtable, _disregarding any error_. Use this constructor
* with caution.
*/
Hashtable();
/**
* Non-virtual destructor; make this virtual if Hashtable is subclassed
* in the future.
*/
~Hashtable();
UObjectDeleter *setValueDeleter(UObjectDeleter *fn);
int32_t count() const;
void* put(const UnicodeString& key, void* value, UErrorCode& status);
int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
void* get(const UnicodeString& key) const;
int32_t geti(const UnicodeString& key) const;
void* remove(const UnicodeString& key);
int32_t removei(const UnicodeString& key);
void removeAll(void);
const UHashElement* find(const UnicodeString& key) const;
const UHashElement* nextElement(int32_t& pos) const;
UKeyComparator* setKeyCompartor(UKeyComparator*keyComp);
UValueComparator* setValueCompartor(UValueComparator* valueComp);
UBool equals(const Hashtable& that) const;
private:
Hashtable(const Hashtable &other); // forbid copying of this class
Hashtable &operator=(const Hashtable &other); // forbid copying of this class
};
/*********************************************************************
* Implementation
********************************************************************/
inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
UValueComparator *valueComp, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
if (U_SUCCESS(status)) {
hash = &hashObj;
uhash_setKeyDeleter(hash, uhash_deleteUnicodeString);
}
}
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
UErrorCode& status) : hash(0) {
init( uhash_hashUnicodeString, keyComp, valueComp, status);
}
inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
: hash(0)
{
init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
: uhash_hashUnicodeString,
ignoreKeyCase ? uhash_compareCaselessUnicodeString
: uhash_compareUnicodeString,
NULL,
status);
}
inline Hashtable::Hashtable(UErrorCode& status)
: hash(0)
{
init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
}
inline Hashtable::Hashtable()
: hash(0)
{
UErrorCode status = U_ZERO_ERROR;
init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
}
inline Hashtable::~Hashtable() {
if (hash != NULL) {
uhash_close(hash);
}
}
inline UObjectDeleter *Hashtable::setValueDeleter(UObjectDeleter *fn) {
return uhash_setValueDeleter(hash, fn);
}
inline int32_t Hashtable::count() const {
return uhash_count(hash);
}
inline void* Hashtable::put(const UnicodeString& key, void* value, UErrorCode& status) {
return uhash_put(hash, new UnicodeString(key), value, &status);
}
inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCode& status) {
return uhash_puti(hash, new UnicodeString(key), value, &status);
}
inline void* Hashtable::get(const UnicodeString& key) const {
return uhash_get(hash, &key);
}
inline int32_t Hashtable::geti(const UnicodeString& key) const {
return uhash_geti(hash, &key);
}
inline void* Hashtable::remove(const UnicodeString& key) {
return uhash_remove(hash, &key);
}
inline int32_t Hashtable::removei(const UnicodeString& key) {
return uhash_removei(hash, &key);
}
inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
return uhash_find(hash, &key);
}
inline const UHashElement* Hashtable::nextElement(int32_t& pos) const {
return uhash_nextElement(hash, &pos);
}
inline void Hashtable::removeAll(void) {
uhash_removeAll(hash);
}
inline UKeyComparator* Hashtable::setKeyCompartor(UKeyComparator*keyComp){
return uhash_setKeyComparator(hash, keyComp);
}
inline UValueComparator* Hashtable::setValueCompartor(UValueComparator* valueComp){
return uhash_setValueComparator(hash, valueComp);
}
inline UBool Hashtable::equals(const Hashtable& that)const{
return uhash_equals(hash, that.hash);
}
U_NAMESPACE_END
#endif

61
source/common/icucfg.h.in Normal file
View file

@ -0,0 +1,61 @@
/* common/icucfg.h.in. Generated automatically from configure.in by autoheader. */
/* Define if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Define if your processor stores words with the most significant
byte first (like Motorola and SPARC, unlike Intel and VAX). */
#undef WORDS_BIGENDIAN
/* Copyright (c) 1999-2000, International Business Machines Corporation and
others. All Rights Reserved. */
/* Define to signed char if not in <sys/types.h> */
#undef int8_t
/* Define to unsigned char if not in <sys/types.h> */
#undef uint8_t
/* Define to signed short if not in <sys/types.h> */
#undef int16_t
/* Define to unsigned short if not in <sys/types.h> */
#undef uint16_t
/* Define to signed long if not in <sys/types.h> */
#undef int32_t
/* Define to unsigned long if not in <sys/types.h> */
#undef uint32_t
/* Define to signed char if not in <sys/types.h> */
#undef bool_t
/* Define if your system has <wchar.h> */
#undef HAVE_WCHAR_H
/* Define to the size of wchar_t */
#undef SIZEOF_WCHAR_T
/* Define if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define if you have the cma library (-lcma). */
#undef HAVE_LIBCMA
/* Define if you have the dl library (-ldl). */
#undef HAVE_LIBDL
/* Define if you have the dld library (-ldld). */
#undef HAVE_LIBDLD
/* Define if you have the m library (-lm). */
#undef HAVE_LIBM
/* Define if you have the pthread library (-lpthread). */
#undef HAVE_LIBPTHREAD
/* Define if you have the pthreads library (-lpthreads). */
#undef HAVE_LIBPTHREADS
/* Define if you have the wcs library (-lwcs). */
#undef HAVE_LIBWCS

View file

@ -0,0 +1,83 @@
/*
******************************************************************************
*
* Copyright (C) 2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*/
#include "unicode/utypes.h"
#include "unicode/icudataver.h"
#include "unicode/uversion.h"
#include "unicode/ures.h"
#include "uresimp.h" /* for ures_getVersionByKey */
#include "cmemory.h"
/*
* Determines if icustd is in the data.
*/
static UBool hasICUSTDBundle();
static UBool hasICUSTDBundle() {
UErrorCode status = U_ZERO_ERROR;
UBool result = TRUE;
UResourceBundle *icustdbundle = ures_openDirect(NULL, U_ICU_STD_BUNDLE, &status);
if (U_SUCCESS(status)) {
result = TRUE;
} else {
result = FALSE;
}
ures_close(icustdbundle);
return result;
}
U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) {
UResourceBundle *icudatares = NULL;
if (U_FAILURE(*status)) {
return;
}
if (dataVersionFillin != NULL) {
icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status);
if (U_SUCCESS(*status)) {
ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status);
}
ures_close(icudatares);
}
}
U_CAPI UBool U_EXPORT2 u_isDataOlder(UVersionInfo dataVersionFillin, UBool *isModifiedFillin, UErrorCode *status) {
UBool result = TRUE;
UVersionInfo dataVersion;
UVersionInfo wiredVersion;
if (U_FAILURE(*status)) {
return result;
}
u_getDataVersion(dataVersion, status);
if (U_SUCCESS(*status)) {
u_versionFromString(wiredVersion, U_ICU_DATA_VERSION);
if (uprv_memcmp(dataVersion, wiredVersion, sizeof(UVersionInfo)) >= 0) {
result = FALSE;
}
if (dataVersionFillin != NULL) {
uprv_memcpy(dataVersionFillin, dataVersion, sizeof(UVersionInfo));
}
if (hasICUSTDBundle()) {
*isModifiedFillin = FALSE;
} else {
*isModifiedFillin = TRUE;
}
}
return result;
}

25
source/common/localsvc.h Normal file
View file

@ -0,0 +1,25 @@
/*
***************************************************************************
* Copyright (C) 2006 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
*/
#ifndef LOCALSVC_H
#define LOCALSVC_H
#include "unicode/utypes.h"
#if U_LOCAL_SERVICE_HOOK
/**
* Prototype for user-supplied service hook. This function is expected to return
* a type of factory object specific to the requested service.
*
* @param what service-specific string identifying the specific user hook
* @param status error status
* @return a service-specific hook, or NULL on failure.
*/
U_CAPI void* uprv_svc_hook(const char *what, UErrorCode *status);
#endif
#endif

View file

@ -0,0 +1,46 @@
/*
**********************************************************************
* Copyright (c) 2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: January 16 2004
* Since: ICU 2.8
**********************************************************************
*/
#include "locbased.h"
#include "cstring.h"
U_NAMESPACE_BEGIN
Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
const char* id = getLocaleID(type, status);
return Locale((id != 0) ? id : "");
}
const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
if (U_FAILURE(status)) {
return NULL;
}
switch(type) {
case ULOC_VALID_LOCALE:
return valid;
case ULOC_ACTUAL_LOCALE:
return actual;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
}
void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
if (validID != 0) {
uprv_strcpy(valid, validID);
}
if (actualID != 0) {
uprv_strcpy(actual, actualID);
}
}
U_NAMESPACE_END

97
source/common/locbased.h Normal file
View file

@ -0,0 +1,97 @@
/*
**********************************************************************
* Copyright (c) 2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: January 16 2004
* Since: ICU 2.8
**********************************************************************
*/
#ifndef LOCBASED_H
#define LOCBASED_H
#include "unicode/locid.h"
#include "unicode/uobject.h"
/**
* Macro to declare a locale LocaleBased wrapper object for the given
* object, which must have two members named `validLocale' and
* `actualLocale'.
*/
#define U_LOCALE_BASED(varname, objname) \
LocaleBased varname((objname).validLocale, (objname).actualLocale);
U_NAMESPACE_BEGIN
/**
* A utility class that unifies the implementation of getLocale() by
* various ICU services. This class is likely to be removed in the
* ICU 3.0 time frame in favor of an integrated approach with the
* services framework.
* @since ICU 2.8
*/
class U_COMMON_API LocaleBased : public UMemory {
public:
/**
* Construct a LocaleBased wrapper around the two pointers. These
* will be aliased for the lifetime of this object.
*/
inline LocaleBased(char* validAlias, char* actualAlias);
/**
* Construct a LocaleBased wrapper around the two const pointers.
* These will be aliased for the lifetime of this object.
*/
inline LocaleBased(const char* validAlias, const char* actualAlias);
/**
* Return locale meta-data for the service object wrapped by this
* object. Either the valid or the actual locale may be
* retrieved.
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
* @param status input-output error code
* @return the indicated locale
*/
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
/**
* Return the locale ID for the service object wrapped by this
* object. Either the valid or the actual locale may be
* retrieved.
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
* @param status input-output error code
* @return the indicated locale ID
*/
const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
/**
* Set the locale meta-data for the service object wrapped by this
* object. If either parameter is zero, it is ignored.
* @param valid the ID of the valid locale
* @param actual the ID of the actual locale
*/
void setLocaleIDs(const char* valid, const char* actual);
private:
char* valid;
char* actual;
};
inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
valid(validAlias), actual(actualAlias) {
}
inline LocaleBased::LocaleBased(const char* validAlias,
const char* actualAlias) :
// ugh: cast away const
valid((char*)validAlias), actual((char*)actualAlias) {
}
U_NAMESPACE_END
#endif

1340
source/common/locid.cpp Normal file

File diff suppressed because it is too large Load diff

893
source/common/locmap.c Normal file
View file

@ -0,0 +1,893 @@
/*
**********************************************************************
* Copyright (C) 1996-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* Provides functionality for mapping between
* LCID and Posix IDs or ICU locale to codepage
*
* Note: All classes and code in this file are
* intended for internal use only.
*
* Methods of interest:
* unsigned long convertToLCID(const char*);
* const char* convertToPosix(unsigned long);
*
* Kathleen Wilson, 4/30/96
*
* Date Name Description
* 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
* setId() method and safety check against
* MAX_ID_LENGTH.
* 04/23/99 stephen Added C wrapper for convertToPosix.
* 09/18/00 george Removed the memory leaks.
* 08/23/01 george Convert to C
*/
#include "locmap.h"
#include "cstring.h"
/*
* Note:
* The mapping from Win32 locale ID numbers to POSIX locale strings should
* be the faster one.
*
* Many LCID values come from winnt.h
* Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
*/
/*
////////////////////////////////////////////////
//
// Internal Classes for LCID <--> POSIX Mapping
//
/////////////////////////////////////////////////
*/
typedef struct ILcidPosixElement
{
const uint32_t hostID;
const char * const posixID;
} ILcidPosixElement;
typedef struct ILcidPosixMap
{
const uint32_t numRegions;
const struct ILcidPosixElement* const regionMaps;
} ILcidPosixMap;
/*
/////////////////////////////////////////////////
//
// Easy macros to make the LCID <--> POSIX Mapping
//
/////////////////////////////////////////////////
*/
/*
The standard one language/one country mapping for LCID.
The first element must be the language, and the following
elements are the language with the country.
*/
#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
static const ILcidPosixElement languageID[] = { \
{LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
{hostID, #posixID}, \
};
/*
Create the map for the posixID. This macro supposes that the language string
name is the same as the global variable name, and that the first element
in the ILcidPosixElement is just the language.
*/
#define ILCID_POSIX_MAP(_posixID) \
{sizeof(_posixID)/sizeof(ILcidPosixElement), _posixID}
/*
////////////////////////////////////////////
//
// Create the table of LCID to POSIX Mapping
// None of it should be dynamically created.
//
// Keep static locale variables inside the function so that
// it can be created properly during static init.
//
////////////////////////////////////////////
*/
ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
static const ILcidPosixElement ar[] = {
{0x01, "ar"},
{0x3801, "ar_AE"},
{0x3c01, "ar_BH"},
{0x1401, "ar_DZ"},
{0x0c01, "ar_EG"},
{0x0801, "ar_IQ"},
{0x2c01, "ar_JO"},
{0x3401, "ar_KW"},
{0x3001, "ar_LB"},
{0x1001, "ar_LY"},
{0x1801, "ar_MA"},
{0x2001, "ar_OM"},
{0x4001, "ar_QA"},
{0x0401, "ar_SA"},
{0x2801, "ar_SY"},
{0x1c01, "ar_TN"},
{0x2401, "ar_YE"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
static const ILcidPosixElement az[] = {
{0x2c, "az"},
{0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
{0x082c, "az_Cyrl"}, /* Cyrillic based */
{0x042c, "az_Latn_AZ"}, /* Latin based */
{0x042c, "az_Latn"}, /* Latin based */
{0x042c, "az_AZ"} /* Latin based */
};
ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
static const ILcidPosixElement ber[] = {
{0x5f, "ber"},
{0x045f, "ber_Arab_DZ"},
{0x045f, "ber_Arab"},
{0x085f, "ber_Latn_DZ"},
{0x085f, "ber_Latn"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
static const ILcidPosixElement bn[] = {
{0x45, "bn"},
{0x0845, "bn_BD"},
{0x0445, "bn_IN"}
};
static const ILcidPosixElement bo[] = {
{0x51, "bo"},
{0x0851, "bo_BT"},
{0x0451, "bo_CN"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
ILCID_POSIX_ELEMENT_ARRAY(0x0403, ca, ca_ES)
ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
static const ILcidPosixElement cs_CZ[] = {
{0x05, "cs"},
{0x0405, "cs_CZ"},
};
ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
static const ILcidPosixElement de[] = {
{0x07, "de"},
{0x0c07, "de_AT"},
{0x0807, "de_CH"},
{0x0407, "de_DE"},
{0x1407, "de_LI"},
{0x1007, "de_LU"},
{0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
{0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
};
ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
static const ILcidPosixElement en[] = {
{0x09, "en"},
{0x0c09, "en_AU"},
{0x2809, "en_BZ"},
{0x1009, "en_CA"},
{0x0809, "en_GB"},
{0x1809, "en_IE"},
{0x4009, "en_IN"},
{0x2009, "en_JM"},
{0x4409, "en_MY"},
{0x1409, "en_NZ"},
{0x3409, "en_PH"},
{0x4809, "en_SG"},
{0x2C09, "en_TT"},
{0x0409, "en_US"},
{0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
{0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */
{0x1c09, "en_ZA"},
{0x3009, "en_ZW"},
{0x0409, "en_AS"}, /* Alias for en_US. Leave last. */
{0x0409, "en_GU"}, /* Alias for en_US. Leave last. */
{0x0409, "en_MH"}, /* Alias for en_US. Leave last. */
{0x0409, "en_MP"}, /* Alias for en_US. Leave last. */
{0x0409, "en_UM"} /* Alias for en_US. Leave last. */
};
static const ILcidPosixElement en_US_POSIX[] = {
{0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
};
static const ILcidPosixElement es[] = {
{0x0a, "es"},
{0x2c0a, "es_AR"},
{0x400a, "es_BO"},
{0x340a, "es_CL"},
{0x240a, "es_CO"},
{0x140a, "es_CR"},
{0x1c0a, "es_DO"},
{0x300a, "es_EC"},
{0x0c0a, "es_ES"}, /*Modern sort.*/
{0x100a, "es_GT"},
{0x480a, "es_HN"},
{0x080a, "es_MX"},
{0x4c0a, "es_NI"},
{0x180a, "es_PA"},
{0x280a, "es_PE"},
{0x500a, "es_PR"},
{0x3c0a, "es_PY"},
{0x440a, "es_SV"},
{0x540a, "es_US"},
{0x380a, "es_UY"},
{0x200a, "es_VE"},
{0x040a, "es_ES@collation=traditional"},
{0x040a, "es@collation=traditional"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
/* ISO-639 doesn't distinguish between Persian and Dari.*/
static const ILcidPosixElement fa[] = {
{0x29, "fa"},
{0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
{0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
};
/* duplicate for roundtripping */
static const ILcidPosixElement fa_AF[] = {
{0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
{0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
};
ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
static const ILcidPosixElement fr[] = {
{0x0c, "fr"},
{0x080c, "fr_BE"},
{0x0c0c, "fr_CA"},
{0x240c, "fr_CD"},
{0x100c, "fr_CH"},
{0x300c, "fr_CI"},
{0x2c0c, "fr_CM"},
{0x040c, "fr_FR"},
{0x3c0c, "fr_HT"},
{0x140c, "fr_LU"},
{0x380c, "fr_MA"},
{0x180c, "fr_MC"},
{0x340c, "fr_ML"},
{0x200c, "fr_RE"},
{0x280c, "fr_SN"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
/* This LCID is really two different locales.*/
static const ILcidPosixElement ga[] = {
{0x3c, "ga"},
{0x3c, "gd"},
{0x083c, "ga_IE"}, /* Gaelic (Ireland) */
{0x043c, "gd_GB"} /* Gaelic (Scotland) */
};
ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
ILCID_POSIX_ELEMENT_ARRAY(0x0468, ha, ha_NG) /* ha_Latn_NG? */
ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
/* This LCID is really four different locales.*/
static const ILcidPosixElement hr[] = {
{0x1a, "hr"},
{0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
{0x141a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
{0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
{0x141a, "bs"}, /* Bosnian */
{0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
{0x201a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
{0x101a, "hr_BA"}, /* Croatian in Bosnia */
{0x041a, "hr_HR"}, /* Croatian*/
{0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
{0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
{0x081a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
{0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
{0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
{0x0c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
{0x0c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
};
ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
static const ILcidPosixElement it[] = {
{0x10, "it"},
{0x0810, "it_CH"},
{0x0410, "it_IT"}
};
static const ILcidPosixElement iu[] = {
{0x5d, "iu"},
{0x045d, "iu_Cans_CA"},
{0x045d, "iu_Cans"},
{0x085d, "iu_Latn_CA"},
{0x085d, "iu_Latn"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
static const ILcidPosixElement ko[] = {
{0x12, "ko"},
{0x0812, "ko_KP"},
{0x0412, "ko_KR"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
static const ILcidPosixElement ks[] = { /* We could add PK and CN too */
{0x60, "ks"},
{0x0860, "ks_IN"}, /* Documentation doesn't mention script */
{0x0460, "ks_Arab_IN"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */
ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
static const ILcidPosixElement mn[] = {
{0x50, "mn"},
{0x0850, "mn_CN"},
{0x0450, "mn_MN"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
static const ILcidPosixElement ms[] = {
{0x3e, "ms"},
{0x083e, "ms_BN"}, /* Brunei Darussalam*/
{0x043e, "ms_MY"} /* Malaysia*/
};
ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
static const ILcidPosixElement ne[] = {
{0x61, "ne"},
{0x0861, "ne_IN"}, /* India*/
{0x0461, "ne_NP"} /* Nepal*/
};
static const ILcidPosixElement nl[] = {
{0x13, "nl"},
{0x0813, "nl_BE"},
{0x0413, "nl_NL"}
};
/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
static const ILcidPosixElement no[] = {
{0x14, "nb"}, /* really nb */
{0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
{0x0414, "no"}, /* really nb_NO */
{0x0414, "no_NO"}, /* really nb_NO */
{0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
{0x0814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
{0x0814, "no_NO_NY"}/* really nn_NO */
};
ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
ILCID_POSIX_ELEMENT_ARRAY(0x0472, om, om_ET) /* TODO: Verify the country */
/* Declared as or_IN to get around compiler errors*/
static const ILcidPosixElement or_IN[] = {
{0x48, "or"},
{0x0448, "or_IN"},
};
static const ILcidPosixElement pa[] = {
{0x46, "pa"},
{0x0446, "pa_IN"},
{0x0846, "pa_PK"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
static const ILcidPosixElement pt[] = {
{0x16, "pt"},
{0x0416, "pt_BR"},
{0x0816, "pt_PT"}
};
static const ILcidPosixElement qu[] = {
{0x6b, "qu"},
{0x046b, "qu_BO"},
{0x086b, "qu_EC"},
{0x0C6b, "qu_PE"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0486, qut, qut_GT) /* qut is an ISO-639-3 code */
ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
ILCID_POSIX_ELEMENT_ARRAY(0x0418, ro, ro_RO)
static const ILcidPosixElement root[] = {
{0x00, "root"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0419, ru, ru_RU)
ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
static const ILcidPosixElement sd[] = {
{0x59, "sd"},
{0x0459, "sd_IN"},
{0x0859, "sd_PK"}
};
static const ILcidPosixElement se[] = {
{0x3b, "se"},
{0x0c3b, "se_FI"},
{0x043b, "se_NO"},
{0x083b, "se_SE"},
{0x183b, "sma_NO"},
{0x1c3b, "sma_SE"},
{0x103b, "smj_NO"},
{0x143b, "smj_SE"},
{0x243b, "smn_FI"},
{0x203b, "sms_FI"},
};
ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
ILCID_POSIX_ELEMENT_ARRAY(0x0477, so, so_ET) /* TODO: Verify the country */
ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
static const ILcidPosixElement sv[] = {
{0x1d, "sv"},
{0x081d, "sv_FI"},
{0x041d, "sv_SE"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
ILCID_POSIX_ELEMENT_ARRAY(0x0449, ta, ta_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
ILCID_POSIX_ELEMENT_ARRAY(0x0428, tg, tg_TJ) /* Cyrillic based by default */
ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
static const ILcidPosixElement ti[] = {
{0x73, "ti"},
{0x0873, "ti_ER"},
{0x0473, "ti_ET"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
ILCID_POSIX_ELEMENT_ARRAY(0x0432, tn, tn_BW)
ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
ILCID_POSIX_ELEMENT_ARRAY(0x0480, ug, ug_CN)
ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
static const ILcidPosixElement ur[] = {
{0x20, "ur"},
{0x0820, "ur_IN"},
{0x0420, "ur_PK"}
};
static const ILcidPosixElement uz[] = {
{0x43, "uz"},
{0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
{0x0843, "uz_Cyrl"}, /* Cyrillic based */
{0x0843, "uz_UZ"}, /* Cyrillic based */
{0x0443, "uz_Latn_UZ"}, /* Latin based */
{0x0443, "uz_Latn"} /* Latin based */
};
ILCID_POSIX_ELEMENT_ARRAY(0x0433, ve, ve_ZA) /* TODO: Verify the country */
ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
static const ILcidPosixElement wen[] = {
{0x2E, "wen"},
{0x042E, "wen_DE"},
{0x042E, "hsb_DE"},
{0x082E, "dsb_DE"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
static const ILcidPosixElement zh[] = {
{0x04, "zh"},
{0x0804, "zh_Hans_CN"},
{0x0804, "zh_Hans"},
{0x0804, "zh_CN"},
{0x0c04, "zh_Hant_HK"},
{0x0c04, "zh_HK"},
{0x1404, "zh_Hant_MO"},
{0x1404, "zh_MO"},
{0x1004, "zh_Hans_SG"},
{0x1004, "zh_SG"},
{0x0404, "zh_Hant_TW"},
{0x0404, "zh_Hant"},
{0x0404, "zh_TW"},
{0x30404,"zh_Hant_TW"}, /* Bopomofo order */
{0x30404,"zh_TW"}, /* Bopomofo order */
{0x20404,"zh_Hant_TW@collation=stroke"},
{0x20404,"zh_TW@collation=stroke"},
{0x20804,"zh_Hans_CN@collation=stroke"},
{0x20804,"zh_CN@collation=stroke"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
/* This must be static and grouped by LCID. */
/* non-existent ISO-639-2 codes */
/*
0x466 Edo
0x467 Fulfulde - Nigeria
0x486 K'iche - Guatemala
0x430 Sutu
*/
static const ILcidPosixMap gPosixIDmap[] = {
ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
ILCID_POSIX_MAP(ber), /* ber Berber/Tamazight 0x5f */
ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
ILCID_POSIX_MAP(br), /* br Breton 0x7e */
ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
ILCID_POSIX_MAP(cs_CZ), /* cs Czech 0x05 */
ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
ILCID_POSIX_MAP(da), /* da Danish 0x06 */
ILCID_POSIX_MAP(de), /* de German 0x07 */
ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
ILCID_POSIX_MAP(el), /* el Greek 0x08 */
ILCID_POSIX_MAP(en), /* en English 0x09 */
ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
ILCID_POSIX_MAP(fr), /* fr French 0x0c */
ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
ILCID_POSIX_MAP(it), /* it Italian 0x10 */
ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
ILCID_POSIX_MAP(la), /* la Latin 0x76 */
ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
ILCID_POSIX_MAP(root), /* root 0x00 */
ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
ILCID_POSIX_MAP(se), /* se Sami 0x3b */
/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
ILCID_POSIX_MAP(so), /* so Somali 0x77 */
ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
ILCID_POSIX_MAP(th), /* th Thai 0x1e */
ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
ILCID_POSIX_MAP(wen), /* wen Sorbian 0x2e */
ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
};
static const uint32_t gLocaleCount = sizeof(gPosixIDmap)/sizeof(ILcidPosixMap);
/**
* Do not call this function. It is called by hostID.
* The function is not private because this struct must stay as a C struct,
* and this is an internal class.
*/
static int32_t
idCmp(const char* id1, const char* id2)
{
int32_t diffIdx = 0;
while (*id1 == *id2 && *id1 != 0) {
diffIdx++;
id1++;
id2++;
}
return diffIdx;
}
/**
* Searches for a Windows LCID
*
* @param posixid the Posix style locale id.
* @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
* no equivalent Windows LCID.
* @return the LCID
*/
static uint32_t
getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
{
int32_t bestIdx = 0;
int32_t bestIdxDiff = 0;
int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
uint32_t idx;
for (idx = 0; idx < this_0->numRegions; idx++ ) {
int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
if (posixIDlen == sameChars) {
/* Exact match */
return this_0->regionMaps[idx].hostID;
}
bestIdxDiff = sameChars;
bestIdx = idx;
}
}
/* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
/* We also have to make sure that sid and si and similar string subsets don't match. */
if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
&& this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
{
*status = U_USING_FALLBACK_WARNING;
return this_0->regionMaps[bestIdx].hostID;
}
/*no match found */
*status = U_ILLEGAL_ARGUMENT_ERROR;
return this_0->regionMaps->hostID;
}
static const char*
getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
{
uint32_t i;
for (i = 0; i <= this_0->numRegions; i++)
{
if (this_0->regionMaps[i].hostID == hostID)
{
return this_0->regionMaps[i].posixID;
}
}
/* If you get here, then no matching region was found,
so return the language id with the wild card region. */
return this_0->regionMaps[0].posixID;
}
/*
//////////////////////////////////////
//
// LCID --> POSIX
//
/////////////////////////////////////
*/
U_CAPI const char *
uprv_convertToPosix(uint32_t hostid, UErrorCode* status)
{
uint16_t langID = LANGUAGE_LCID(hostid);
uint32_t localeIndex;
for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++)
{
if (langID == gPosixIDmap[localeIndex].regionMaps->hostID)
{
return getPosixID(&gPosixIDmap[localeIndex], hostid);
}
}
/* no match found */
*status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
/*
//////////////////////////////////////
//
// POSIX --> LCID
// This should only be called from uloc_getLCID.
// The locale ID must be in canonical form.
// langID is separate so that this file doesn't depend on the uloc_* API.
//
/////////////////////////////////////
*/
U_CAPI uint32_t
uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
{
uint32_t low = 0;
uint32_t high = gLocaleCount;
uint32_t mid = high;
uint32_t oldmid = 0;
int32_t compVal;
uint32_t value = 0;
uint32_t fallbackValue = (uint32_t)-1;
UErrorCode myStatus;
uint32_t idx;
/* Check for incomplete id. */
if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
return 0;
}
/*Binary search for the map entry for normal cases */
while (high > low) /*binary search*/{
mid = (high+low) >> 1; /*Finds median*/
if (mid == oldmid)
break;
compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
if (compVal < 0){
high = mid;
}
else if (compVal > 0){
low = mid;
}
else /*we found it*/{
return getHostID(&gPosixIDmap[mid], posixID, status);
}
oldmid = mid;
}
/*
* Sometimes we can't do a binary search on posixID because some LCIDs
* go to different locales. We hit one of those special cases.
*/
for (idx = 0; idx < gLocaleCount; idx++ ) {
myStatus = U_ZERO_ERROR;
value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
if (myStatus == U_ZERO_ERROR) {
return value;
}
else if (myStatus == U_USING_FALLBACK_WARNING) {
fallbackValue = value;
}
}
if (fallbackValue != (uint32_t)-1) {
*status = U_USING_FALLBACK_WARNING;
return fallbackValue;
}
/* no match found */
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0; /* return international (root) */
}

37
source/common/locmap.h Normal file
View file

@ -0,0 +1,37 @@
/*
******************************************************************************
*
* Copyright (C) 1996-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File locmap.h : Locale Mapping Classes
*
*
* Created by: Helena Shih
*
* Modification History:
*
* Date Name Description
* 3/11/97 aliu Added setId().
* 4/20/99 Madhu Added T_convertToPosix()
* 09/18/00 george Removed the memory leaks.
* 08/23/01 george Convert to C
*============================================================================
*/
#ifndef LOCMAP_H
#define LOCMAP_H
#include "unicode/utypes.h"
#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID)
U_CAPI const char *uprv_convertToPosix(uint32_t hostid, UErrorCode* status);
/* Don't call this function directly. Use uloc_getLCID instead. */
U_CAPI uint32_t uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status);
#endif /* LOCMAP_H */

267
source/common/locutil.cpp Normal file
View file

@ -0,0 +1,267 @@
/**
*******************************************************************************
* Copyright (C) 2002-2006, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
#include "unicode/resbund.h"
#include "cmemory.h"
#include "ustrfmt.h"
#include "locutil.h"
#include "charstr.h"
#include "ucln_cmn.h"
#include "uassert.h"
#include "umutex.h"
// see LocaleUtility::getAvailableLocaleNames
static U_NAMESPACE_QUALIFIER Hashtable * LocaleUtility_cache = NULL;
#define UNDERSCORE_CHAR ((UChar)0x005f)
#define AT_SIGN_CHAR ((UChar)64)
#define PERIOD_CHAR ((UChar)46)
/*
******************************************************************
*/
/**
* Release all static memory held by Locale Utility.
*/
U_CDECL_BEGIN
static UBool U_CALLCONV service_cleanup(void) {
if (LocaleUtility_cache) {
delete LocaleUtility_cache;
LocaleUtility_cache = NULL;
}
return TRUE;
}
U_CDECL_END
U_NAMESPACE_BEGIN
UnicodeString&
LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
{
if (id == NULL) {
result.setToBogus();
} else {
// Fix case only (no other changes) up to the first '@' or '.' or
// end of string, whichever comes first. In 3.0 I changed this to
// stop at first '@' or '.'. It used to run out to the end of
// string. My fix makes the tests pass but is probably
// structurally incorrect. See below. [alan 3.0]
// TODO: Doug, you might want to revise this...
result = *id;
int32_t i = 0;
int32_t end = result.indexOf(AT_SIGN_CHAR);
int32_t n = result.indexOf(PERIOD_CHAR);
if (n >= 0 && n < end) {
end = n;
}
if (end < 0) {
end = result.length();
}
n = result.indexOf(UNDERSCORE_CHAR);
if (n < 0) {
n = end;
}
for (; i < n; ++i) {
UChar c = result.charAt(i);
if (c >= 0x0041 && c <= 0x005a) {
c += 0x20;
result.setCharAt(i, c);
}
}
for (n = end; i < n; ++i) {
UChar c = result.charAt(i);
if (c >= 0x0061 && c <= 0x007a) {
c -= 0x20;
result.setCharAt(i, c);
}
}
}
return result;
#if 0
// This code does a proper full level 2 canonicalization of id.
// It's nasty to go from UChar to char to char to UChar -- but
// that's what you have to do to use the uloc_canonicalize
// function on UnicodeStrings.
// I ended up doing the alternate fix (see above) not for
// performance reasons, although performance will certainly be
// better, but because doing a full level 2 canonicalization
// causes some tests to fail. [alan 3.0]
// TODO: Doug, you might want to revisit this...
result.setToBogus();
if (id != 0) {
int32_t buflen = id->length() + 8; // space for NUL
char* buf = (char*) uprv_malloc(buflen);
char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);
if (buf != 0 && canon != 0) {
U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);
UErrorCode ec = U_ZERO_ERROR;
uloc_canonicalize(buf, canon, buflen, &ec);
if (U_SUCCESS(ec)) {
result = UnicodeString(canon);
}
}
uprv_free(buf);
uprv_free(canon);
}
return result;
#endif
}
Locale&
LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
{
enum { BUFLEN = 128 }; // larger than ever needed
if (id.isBogus() || id.length() >= BUFLEN) {
result.setToBogus();
} else {
/*
* We need to convert from a UnicodeString to char * in order to
* create a Locale.
*
* Problem: Locale ID strings may contain '@' which is a variant
* character and cannot be handled by invariant-character conversion.
*
* Hack: Since ICU code can handle locale IDs with multiple encodings
* of '@' (at least for EBCDIC; it's not known to be a problem for
* ASCII-based systems),
* we use regular invariant-character conversion for everything else
* and manually convert U+0040 into a compiler-char-constant '@'.
* While this compilation-time constant may not match the runtime
* encoding of '@', it should be one of the encodings which ICU
* recognizes.
*
* There should be only at most one '@' in a locale ID.
*/
char buffer[BUFLEN];
int32_t prev, i;
prev = 0;
for(;;) {
i = id.indexOf((UChar)0x40, prev);
if(i < 0) {
// no @ between prev and the rest of the string
id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
break; // done
} else {
// normal invariant-character conversion for text between @s
id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
// manually "convert" U+0040 at id[i] into '@' at buffer[i]
buffer[i] = '@';
prev = i + 1;
}
}
result = Locale::createFromName(buffer);
}
return result;
}
UnicodeString&
LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
{
if (locale.isBogus()) {
result.setToBogus();
} else {
result.append(UnicodeString(locale.getName(), -1, US_INV));
}
return result;
}
const Hashtable*
LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
{
// LocaleUtility_cache is a hash-of-hashes. The top-level keys
// are path strings ('bundleID') passed to
// ures_openAvailableLocales. The top-level values are
// second-level hashes. The second-level keys are result strings
// from ures_openAvailableLocales. The second-level values are
// garbage ((void*)1 or other random pointer).
UErrorCode status = U_ZERO_ERROR;
Hashtable* cache;
umtx_lock(NULL);
cache = LocaleUtility_cache;
umtx_unlock(NULL);
if (cache == NULL) {
cache = new Hashtable(status);
if (cache == NULL || U_FAILURE(status)) {
return NULL; // catastrophic failure; e.g. out of memory
}
cache->setValueDeleter(uhash_deleteHashtable);
Hashtable* h; // set this to final LocaleUtility_cache value
umtx_lock(NULL);
h = LocaleUtility_cache;
if (h == NULL) {
LocaleUtility_cache = h = cache;
cache = NULL;
ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
}
umtx_unlock(NULL);
if(cache != NULL) {
delete cache;
}
cache = h;
}
U_ASSERT(cache != NULL);
Hashtable* htp;
umtx_lock(NULL);
htp = (Hashtable*) cache->get(bundleID);
umtx_unlock(NULL);
if (htp == NULL) {
htp = new Hashtable(status);
if (htp && U_SUCCESS(status)) {
CharString cbundleID(bundleID);
const char* path = (const char*) cbundleID;
if (*path == 0) path = NULL; // empty string => NULL
UEnumeration *uenum = ures_openAvailableLocales(path, &status);
for (;;) {
const UChar* id = uenum_unext(uenum, NULL, &status);
if (id == NULL) {
break;
}
htp->put(UnicodeString(id), (void*)htp, status);
}
uenum_close(uenum);
if (U_FAILURE(status)) {
delete htp;
return NULL;
}
umtx_lock(NULL);
cache->put(bundleID, (void*)htp, status);
umtx_unlock(NULL);
}
}
return htp;
}
UBool
LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
{
return child.indexOf(root) == 0 &&
(child.length() == root.length() ||
child.charAt(root.length()) == UNDERSCORE_CHAR);
}
U_NAMESPACE_END
/* !UCONFIG_NO_SERVICE */
#endif

37
source/common/locutil.h Normal file
View file

@ -0,0 +1,37 @@
/**
*******************************************************************************
* Copyright (C) 2002-2005, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
#ifndef LOCUTIL_H
#define LOCUTIL_H
#include "unicode/utypes.h"
#include "hash.h"
#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
U_NAMESPACE_BEGIN
// temporary utility functions, till I know where to find them
// in header so tests can also access them
class U_COMMON_API LocaleUtility {
public:
static UnicodeString& canonicalLocaleString(const UnicodeString* id, UnicodeString& result);
static Locale& initLocaleFromName(const UnicodeString& id, Locale& result);
static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result);
static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID);
static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
};
U_NAMESPACE_END
#endif
#endif

20
source/common/msvcres.h Normal file
View file

@ -0,0 +1,20 @@
//{{NO_DEPENDENCIES}}
// Copyright (c) 2003-2005 International Business Machines
// Corporation and others. All Rights Reserved.
//
// Used by common.rc and other .rc files.
//Do not edit with Microsoft Developer Studio because it will modify this
//header the wrong way. This is here to prevent Visual Studio .NET from
//unnessarily building the resource files when it's not needed.
//
/*
These are defined before unicode/uversion.h in order to prevent
STLPort's broken stddef.h from being used when rc.exe parses this file.
*/
#define _STLP_OUTERMOST_HEADER_ID 0
#define _STLP_WINCE 1
#include "unicode/uversion.h"
#define ICU_WEBSITE "http://ibm.com/software/globalization/icu/"

18
source/common/mutex.cpp Normal file
View file

@ -0,0 +1,18 @@
/**
*******************************************************************************
* Copyright (C) 2008, International Business Machines Corporation. *
* All Rights Reserved. *
*******************************************************************************
*/
#include "unicode/utypes.h"
#if UCONFIG_NO_SERVICE
/* If UCONFIG_NO_SERVICE, then there is no invocation of Mutex elsewhere in
common, so add one here to force an export */
#include "mutex.h"
static Mutex *aMutex = 0;
/* UCONFIG_NO_SERVICE */
#endif

77
source/common/mutex.h Normal file
View file

@ -0,0 +1,77 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*/
//----------------------------------------------------------------------------
// File: mutex.h
//
// Lightweight C++ wrapper for umtx_ C mutex functions
//
// Author: Alan Liu 1/31/97
// History:
// 06/04/97 helena Updated setImplementation as per feedback from 5/21 drop.
// 04/07/1999 srl refocused as a thin wrapper
//
//----------------------------------------------------------------------------
#ifndef MUTEX_H
#define MUTEX_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "umutex.h"
U_NAMESPACE_BEGIN
//----------------------------------------------------------------------------
// Code within that accesses shared static or global data should
// should instantiate a Mutex object while doing so. You should make your own
// private mutex where possible.
// For example:
//
// UMTX myMutex;
//
// void Function(int arg1, int arg2)
// {
// static Object* foo; // Shared read-write object
// Mutex mutex(&myMutex); // or no args for the global lock
// foo->Method();
// // When 'mutex' goes out of scope and gets destroyed here, the lock is released
// }
//
// Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
// returning a Mutex. This is a common mistake which silently slips through the
// compiler!!
//
class U_COMMON_API Mutex : public UMemory {
public:
inline Mutex(UMTX *mutex = NULL);
inline ~Mutex();
private:
UMTX *fMutex;
Mutex(const Mutex &other); // forbid copying of this class
Mutex &operator=(const Mutex &other); // forbid copying of this class
};
inline Mutex::Mutex(UMTX *mutex)
: fMutex(mutex)
{
umtx_lock(fMutex);
}
inline Mutex::~Mutex()
{
umtx_unlock(fMutex);
}
U_NAMESPACE_END
#endif //_MUTEX_
//eof

611
source/common/normlzr.cpp Normal file
View file

@ -0,0 +1,611 @@
/*
*************************************************************************
* COPYRIGHT:
* Copyright (c) 1996-2005, International Business Machines Corporation and
* others. All Rights Reserved.
*************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
#include "unicode/uiter.h"
#include "unicode/normlzr.h"
#include "cmemory.h"
#include "unormimp.h"
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
//-------------------------------------------------------------------------
// Constructors and other boilerplate
//-------------------------------------------------------------------------
Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
UObject(), fUMode(mode), fOptions(0),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
init(new StringCharacterIterator(str));
}
Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
UObject(), fUMode(mode), fOptions(0),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
init(new UCharCharacterIterator(str, length));
}
Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
UObject(), fUMode(mode), fOptions(0),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
init(iter.clone());
}
Normalizer::Normalizer(const Normalizer &copy) :
UObject(copy), fUMode(copy.fUMode), fOptions(copy.fOptions),
currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
buffer(copy.buffer), bufferPos(copy.bufferPos)
{
init(((CharacterIterator *)(copy.text->context))->clone());
}
static const UChar _NUL=0;
void
Normalizer::init(CharacterIterator *iter) {
UErrorCode errorCode=U_ZERO_ERROR;
text=(UCharIterator *)uprv_malloc(sizeof(UCharIterator));
if(text!=NULL) {
if(unorm_haveData(&errorCode)) {
uiter_setCharacterIterator(text, iter);
} else {
delete iter;
uiter_setCharacterIterator(text, new UCharCharacterIterator(&_NUL, 0));
}
} else {
delete iter;
}
}
Normalizer::~Normalizer()
{
if(text!=NULL) {
delete (CharacterIterator *)text->context;
uprv_free(text);
}
}
Normalizer*
Normalizer::clone() const
{
if(this!=0) {
return new Normalizer(*this);
} else {
return 0;
}
}
/**
* Generates a hash code for this iterator.
*/
int32_t Normalizer::hashCode() const
{
return ((CharacterIterator *)(text->context))->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
}
UBool Normalizer::operator==(const Normalizer& that) const
{
return
this==&that ||
fUMode==that.fUMode &&
fOptions==that.fOptions &&
*((CharacterIterator *)(text->context))==*((CharacterIterator *)(that.text->context)) &&
buffer==that.buffer &&
bufferPos==that.bufferPos &&
nextIndex==that.nextIndex;
}
//-------------------------------------------------------------------------
// Static utility methods
//-------------------------------------------------------------------------
void U_EXPORT2
Normalizer::normalize(const UnicodeString& source,
UNormalizationMode mode, int32_t options,
UnicodeString& result,
UErrorCode &status) {
if(source.isBogus() || U_FAILURE(status)) {
result.setToBogus();
if(U_SUCCESS(status)) {
status=U_ILLEGAL_ARGUMENT_ERROR;
}
} else {
UnicodeString localDest;
UnicodeString *dest;
if(&source!=&result) {
dest=&result;
} else {
// the source and result strings are the same object, use a temporary one
dest=&localDest;
}
UChar *buffer=dest->getBuffer(source.length());
int32_t length=unorm_internalNormalize(buffer, dest->getCapacity(),
source.getBuffer(), source.length(),
mode, options,
&status);
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
if(status==U_BUFFER_OVERFLOW_ERROR) {
status=U_ZERO_ERROR;
buffer=dest->getBuffer(length);
length=unorm_internalNormalize(buffer, dest->getCapacity(),
source.getBuffer(), source.length(),
mode, options,
&status);
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
}
if(dest==&localDest) {
result=*dest;
}
if(U_FAILURE(status)) {
result.setToBogus();
}
}
}
void U_EXPORT2
Normalizer::compose(const UnicodeString& source,
UBool compat, int32_t options,
UnicodeString& result,
UErrorCode &status) {
if(source.isBogus() || U_FAILURE(status)) {
result.setToBogus();
if(U_SUCCESS(status)) {
status=U_ILLEGAL_ARGUMENT_ERROR;
}
} else {
UnicodeString localDest;
UnicodeString *dest;
if(&source!=&result) {
dest=&result;
} else {
// the source and result strings are the same object, use a temporary one
dest=&localDest;
}
UChar *buffer=dest->getBuffer(source.length());
int32_t length=unorm_compose(buffer, dest->getCapacity(),
source.getBuffer(), source.length(),
compat, options,
&status);
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
if(status==U_BUFFER_OVERFLOW_ERROR) {
status=U_ZERO_ERROR;
buffer=dest->getBuffer(length);
length=unorm_compose(buffer, dest->getCapacity(),
source.getBuffer(), source.length(),
compat, options,
&status);
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
}
if(dest==&localDest) {
result=*dest;
}
if(U_FAILURE(status)) {
result.setToBogus();
}
}
}
void U_EXPORT2
Normalizer::decompose(const UnicodeString& source,
UBool compat, int32_t options,
UnicodeString& result,
UErrorCode &status) {
if(source.isBogus() || U_FAILURE(status)) {
result.setToBogus();
if(U_SUCCESS(status)) {
status=U_ILLEGAL_ARGUMENT_ERROR;
}
} else {
UnicodeString localDest;
UnicodeString *dest;
if(&source!=&result) {
dest=&result;
} else {
// the source and result strings are the same object, use a temporary one
dest=&localDest;
}
UChar *buffer=dest->getBuffer(source.length());
int32_t length=unorm_decompose(buffer, dest->getCapacity(),
source.getBuffer(), source.length(),
compat, options,
&status);
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
if(status==U_BUFFER_OVERFLOW_ERROR) {
status=U_ZERO_ERROR;
buffer=dest->getBuffer(length);
length=unorm_decompose(buffer, dest->getCapacity(),
source.getBuffer(), source.length(),
compat, options,
&status);
dest->releaseBuffer(U_SUCCESS(status) ? length : 0);
}
if(dest==&localDest) {
result=*dest;
}
if(U_FAILURE(status)) {
result.setToBogus();
}
}
}
UnicodeString & U_EXPORT2
Normalizer::concatenate(UnicodeString &left, UnicodeString &right,
UnicodeString &result,
UNormalizationMode mode, int32_t options,
UErrorCode &errorCode) {
if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
result.setToBogus();
if(U_SUCCESS(errorCode)) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
}
} else {
UnicodeString localDest;
UnicodeString *dest;
if(&left!=&result && &right!=&result) {
dest=&result;
} else {
// the source and result strings are the same object, use a temporary one
dest=&localDest;
}
UChar *buffer=dest->getBuffer(left.length()+right.length());
int32_t length=unorm_concatenate(left.getBuffer(), left.length(),
right.getBuffer(), right.length(),
buffer, dest->getCapacity(),
mode, options,
&errorCode);
dest->releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
buffer=dest->getBuffer(length);
int32_t length=unorm_concatenate(left.getBuffer(), left.length(),
right.getBuffer(), right.length(),
buffer, dest->getCapacity(),
mode, options,
&errorCode);
dest->releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
if(dest==&localDest) {
result=*dest;
}
if(U_FAILURE(errorCode)) {
result.setToBogus();
}
}
return result;
}
//-------------------------------------------------------------------------
// Iteration API
//-------------------------------------------------------------------------
/**
* Return the current character in the normalized text.
*/
UChar32 Normalizer::current() {
if(bufferPos<buffer.length() || nextNormalize()) {
return buffer.char32At(bufferPos);
} else {
return DONE;
}
}
/**
* Return the next character in the normalized text and advance
* the iteration position by one. If the end
* of the text has already been reached, {@link #DONE} is returned.
*/
UChar32 Normalizer::next() {
if(bufferPos<buffer.length() || nextNormalize()) {
UChar32 c=buffer.char32At(bufferPos);
bufferPos+=UTF_CHAR_LENGTH(c);
return c;
} else {
return DONE;
}
}
/**
* Return the previous character in the normalized text and decrement
* the iteration position by one. If the beginning
* of the text has already been reached, {@link #DONE} is returned.
*/
UChar32 Normalizer::previous() {
if(bufferPos>0 || previousNormalize()) {
UChar32 c=buffer.char32At(bufferPos-1);
bufferPos-=UTF_CHAR_LENGTH(c);
return c;
} else {
return DONE;
}
}
void Normalizer::reset() {
currentIndex=nextIndex=text->move(text, 0, UITER_START);
clearBuffer();
}
void
Normalizer::setIndexOnly(int32_t index) {
currentIndex=nextIndex=text->move(text, index, UITER_ZERO); // validates index
clearBuffer();
}
/**
* Return the first character in the normalized text-> This resets
* the <tt>Normalizer's</tt> position to the beginning of the text->
*/
UChar32 Normalizer::first() {
reset();
return next();
}
/**
* Return the last character in the normalized text-> This resets
* the <tt>Normalizer's</tt> position to be just before the
* the input text corresponding to that normalized character.
*/
UChar32 Normalizer::last() {
currentIndex=nextIndex=text->move(text, 0, UITER_LIMIT);
clearBuffer();
return previous();
}
/**
* Retrieve the current iteration position in the input text that is
* being normalized. This method is useful in applications such as
* searching, where you need to be able to determine the position in
* the input text that corresponds to a given normalized output character.
* <p>
* <b>Note:</b> This method sets the position in the <em>input</em>, while
* {@link #next} and {@link #previous} iterate through characters in the
* <em>output</em>. This means that there is not necessarily a one-to-one
* correspondence between characters returned by <tt>next</tt> and
* <tt>previous</tt> and the indices passed to and returned from
* <tt>setIndex</tt> and {@link #getIndex}.
*
*/
int32_t Normalizer::getIndex() const {
if(bufferPos<buffer.length()) {
return currentIndex;
} else {
return nextIndex;
}
}
/**
* Retrieve the index of the start of the input text-> This is the begin index
* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
* over which this <tt>Normalizer</tt> is iterating
*/
int32_t Normalizer::startIndex() const {
return text->getIndex(text, UITER_START);
}
/**
* Retrieve the index of the end of the input text-> This is the end index
* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
* over which this <tt>Normalizer</tt> is iterating
*/
int32_t Normalizer::endIndex() const {
return text->getIndex(text, UITER_LIMIT);
}
//-------------------------------------------------------------------------
// Property access methods
//-------------------------------------------------------------------------
void
Normalizer::setMode(UNormalizationMode newMode)
{
fUMode = newMode;
}
UNormalizationMode
Normalizer::getUMode() const
{
return fUMode;
}
void
Normalizer::setOption(int32_t option,
UBool value)
{
if (value) {
fOptions |= option;
} else {
fOptions &= (~option);
}
}
UBool
Normalizer::getOption(int32_t option) const
{
return (fOptions & option) != 0;
}
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning of the input text->
*/
void
Normalizer::setText(const UnicodeString& newText,
UErrorCode &status)
{
if (U_FAILURE(status)) {
return;
}
CharacterIterator *newIter = new StringCharacterIterator(newText);
if (newIter == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
delete (CharacterIterator *)(text->context);
text->context = newIter;
reset();
}
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning of the string.
*/
void
Normalizer::setText(const CharacterIterator& newText,
UErrorCode &status)
{
if (U_FAILURE(status)) {
return;
}
CharacterIterator *newIter = newText.clone();
if (newIter == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
delete (CharacterIterator *)(text->context);
text->context = newIter;
reset();
}
void
Normalizer::setText(const UChar* newText,
int32_t length,
UErrorCode &status)
{
if (U_FAILURE(status)) {
return;
}
CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
if (newIter == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
delete (CharacterIterator *)(text->context);
text->context = newIter;
reset();
}
/**
* Copies the text under iteration into the UnicodeString referred to by "result".
* @param result Receives a copy of the text under iteration.
*/
void
Normalizer::getText(UnicodeString& result)
{
((CharacterIterator *)(text->context))->getText(result);
}
//-------------------------------------------------------------------------
// Private utility methods
//-------------------------------------------------------------------------
void Normalizer::clearBuffer() {
buffer.remove();
bufferPos=0;
}
UBool
Normalizer::nextNormalize() {
UChar *p;
int32_t length;
UErrorCode errorCode;
clearBuffer();
currentIndex=nextIndex;
text->move(text, nextIndex, UITER_ZERO);
if(!text->hasNext(text)) {
return FALSE;
}
errorCode=U_ZERO_ERROR;
p=buffer.getBuffer(-1);
length=unorm_next(text, p, buffer.getCapacity(),
fUMode, fOptions,
TRUE, 0,
&errorCode);
buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
text->move(text, nextIndex, UITER_ZERO);
p=buffer.getBuffer(length);
length=unorm_next(text, p, buffer.getCapacity(),
fUMode, fOptions,
TRUE, 0,
&errorCode);
buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
nextIndex=text->getIndex(text, UITER_CURRENT);
return U_SUCCESS(errorCode) && !buffer.isEmpty();
}
UBool
Normalizer::previousNormalize() {
UChar *p;
int32_t length;
UErrorCode errorCode;
clearBuffer();
nextIndex=currentIndex;
text->move(text, currentIndex, UITER_ZERO);
if(!text->hasPrevious(text)) {
return FALSE;
}
errorCode=U_ZERO_ERROR;
p=buffer.getBuffer(-1);
length=unorm_previous(text, p, buffer.getCapacity(),
fUMode, fOptions,
TRUE, 0,
&errorCode);
buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
text->move(text, currentIndex, UITER_ZERO);
p=buffer.getBuffer(length);
length=unorm_previous(text, p, buffer.getCapacity(),
fUMode, fOptions,
TRUE, 0,
&errorCode);
buffer.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
bufferPos=buffer.length();
currentIndex=text->getIndex(text, UITER_CURRENT);
return U_SUCCESS(errorCode) && !buffer.isEmpty();
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_NORMALIZATION */

View file

@ -0,0 +1,21 @@
/*
**********************************************************************
* Copyright (C) 2003-2003, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#include "unicode/parsepos.h"
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ParsePosition)
ParsePosition::~ParsePosition() {}
ParsePosition *
ParsePosition::clone() const {
return new ParsePosition(*this);
}
U_NAMESPACE_END

758
source/common/propname.cpp Normal file
View file

@ -0,0 +1,758 @@
/*
**********************************************************************
* Copyright (c) 2002-2006, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: October 30 2002
* Since: ICU 2.4
**********************************************************************
*/
#include "propname.h"
#include "unicode/uchar.h"
#include "unicode/udata.h"
#include "umutex.h"
#include "cmemory.h"
#include "cstring.h"
#include "ucln_cmn.h"
#include "uarrsort.h"
U_CDECL_BEGIN
/**
* Get the next non-ignorable ASCII character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static inline int32_t
getASCIIPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and ASCII White_Space */
for(i=0;
(c=name[i++])==0x2d || c==0x5f ||
c==0x20 || (0x09<=c && c<=0x0d);
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
} else {
return i<<8;
}
}
/**
* Get the next non-ignorable EBCDIC character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static inline int32_t
getEBCDICPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and EBCDIC White_Space */
for(i=0;
(c=name[i++])==0x60 || c==0x6d ||
c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
} else {
return i<<8;
}
}
/**
* Unicode property names and property value names are compared "loosely".
*
* UCD.html 4.0.1 says:
* For all property names, property value names, and for property values for
* Enumerated, Binary, or Catalog properties, use the following
* loose matching rule:
*
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
*
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getASCIIPropertyNameChar(name1);
r2=getASCIIPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getEBCDICPropertyNameChar(name1);
r2=getEBCDICPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CDECL_END
U_NAMESPACE_BEGIN
//----------------------------------------------------------------------
// PropertyAliases implementation
const char*
PropertyAliases::chooseNameInGroup(Offset offset,
UPropertyNameChoice choice) const {
int32_t c = choice;
if (!offset || c < 0) {
return NULL;
}
const Offset* p = (const Offset*) getPointer(offset);
while (c-- > 0) {
if (*p++ < 0) return NULL;
}
Offset a = *p;
if (a < 0) a = -a;
return (const char*) getPointerNull(a);
}
const ValueMap*
PropertyAliases::getValueMap(EnumValue prop) const {
NonContiguousEnumToOffset* e2o = (NonContiguousEnumToOffset*) getPointer(enumToValue_offset);
Offset a = e2o->getOffset(prop);
return (const ValueMap*) (a ? getPointerNull(a) : NULL);
}
inline const char*
PropertyAliases::getPropertyName(EnumValue prop,
UPropertyNameChoice choice) const {
NonContiguousEnumToOffset* e2n = (NonContiguousEnumToOffset*) getPointer(enumToName_offset);
return chooseNameInGroup(e2n->getOffset(prop), choice);
}
inline EnumValue
PropertyAliases::getPropertyEnum(const char* alias) const {
NameToEnum* n2e = (NameToEnum*) getPointer(nameToEnum_offset);
return n2e->getEnum(alias, *this);
}
inline const char*
PropertyAliases::getPropertyValueName(EnumValue prop,
EnumValue value,
UPropertyNameChoice choice) const {
const ValueMap* vm = getValueMap(prop);
if (!vm) return NULL;
Offset a;
if (vm->enumToName_offset) {
a = ((EnumToOffset*) getPointer(vm->enumToName_offset))->
getOffset(value);
} else {
a = ((NonContiguousEnumToOffset*) getPointer(vm->ncEnumToName_offset))->
getOffset(value);
}
return chooseNameInGroup(a, choice);
}
inline EnumValue
PropertyAliases::getPropertyValueEnum(EnumValue prop,
const char* alias) const {
const ValueMap* vm = getValueMap(prop);
if (!vm) return UCHAR_INVALID_CODE;
NameToEnum* n2e = (NameToEnum*) getPointer(vm->nameToEnum_offset);
return n2e->getEnum(alias, *this);
}
U_NAMESPACE_END
U_NAMESPACE_USE
//----------------------------------------------------------------------
// UDataMemory structures
static const PropertyAliases* PNAME = NULL;
static UDataMemory* UDATA = NULL;
//----------------------------------------------------------------------
// UDataMemory loading/unloading
/**
* udata callback to verify the zone data.
*/
U_CDECL_BEGIN
static UBool U_CALLCONV
isPNameAcceptable(void* /*context*/,
const char* /*type*/, const char* /*name*/,
const UDataInfo* info) {
return
info->size >= sizeof(UDataInfo) &&
info->isBigEndian == U_IS_BIG_ENDIAN &&
info->charsetFamily == U_CHARSET_FAMILY &&
info->dataFormat[0] == PNAME_SIG_0 &&
info->dataFormat[1] == PNAME_SIG_1 &&
info->dataFormat[2] == PNAME_SIG_2 &&
info->dataFormat[3] == PNAME_SIG_3 &&
info->formatVersion[0] == PNAME_FORMAT_VERSION;
}
static UBool U_CALLCONV pname_cleanup(void) {
if (UDATA) {
udata_close(UDATA);
UDATA = NULL;
}
PNAME = NULL;
return TRUE;
}
U_CDECL_END
/**
* Load the property names data. Caller should check that data is
* not loaded BEFORE calling this function. Returns TRUE if the load
* succeeds.
*/
static UBool _load() {
UErrorCode ec = U_ZERO_ERROR;
UDataMemory* data =
udata_openChoice(0, PNAME_DATA_TYPE, PNAME_DATA_NAME,
isPNameAcceptable, 0, &ec);
if (U_SUCCESS(ec)) {
umtx_lock(NULL);
if (UDATA == NULL) {
UDATA = data;
PNAME = (const PropertyAliases*) udata_getMemory(UDATA);
ucln_common_registerCleanup(UCLN_COMMON_PNAME, pname_cleanup);
data = NULL;
}
umtx_unlock(NULL);
}
if (data) {
udata_close(data);
}
return PNAME!=NULL;
}
/**
* Inline function that expands to code that does a lazy load of the
* property names data. If the data is already loaded, avoids an
* unnecessary function call. If the data is not loaded, call _load()
* to load it, and return TRUE if the load succeeds.
*/
static inline UBool load() {
UBool f;
UMTX_CHECK(NULL, (PNAME!=NULL), f);
return f || _load();
}
//----------------------------------------------------------------------
// Public API implementation
// The C API is just a thin wrapper. Each function obtains a pointer
// to the singleton PropertyAliases, and calls the appropriate method
// on it. If it cannot obtain a pointer, because valid data is not
// available, then it returns NULL or UCHAR_INVALID_CODE.
U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,
UPropertyNameChoice nameChoice) {
return load() ? PNAME->getPropertyName(property, nameChoice)
: NULL;
}
U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char* alias) {
UProperty p = load() ? (UProperty) PNAME->getPropertyEnum(alias)
: UCHAR_INVALID_CODE;
return p;
}
U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,
int32_t value,
UPropertyNameChoice nameChoice) {
return load() ? PNAME->getPropertyValueName(property, value, nameChoice)
: NULL;
}
U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,
const char* alias) {
return load() ? PNAME->getPropertyValueEnum(property, alias)
: (int32_t)UCHAR_INVALID_CODE;
}
/* data swapping ------------------------------------------------------------ */
/*
* Sub-structure-swappers use the temp array (which is as large as the
* actual data) for intermediate storage,
* as well as to indicate if a particular structure has been swapped already.
* The temp array is initially reset to all 0.
* pos is the byte offset of the sub-structure in the inBytes/outBytes/temp arrays.
*/
int32_t
EnumToOffset::swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
uint8_t *temp, int32_t pos,
UErrorCode *pErrorCode) {
const EnumToOffset *inMap;
EnumToOffset *outMap, *tempMap;
int32_t size;
tempMap=(EnumToOffset *)(temp+pos);
if(tempMap->enumStart!=0 || tempMap->enumLimit!=0) {
/* this map was swapped already */
size=tempMap->getSize();
return size;
}
inMap=(const EnumToOffset *)(inBytes+pos);
outMap=(EnumToOffset *)(outBytes+pos);
tempMap->enumStart=udata_readInt32(ds, inMap->enumStart);
tempMap->enumLimit=udata_readInt32(ds, inMap->enumLimit);
size=tempMap->getSize();
if(length>=0) {
if(length<(pos+size)) {
if(length<(int32_t)sizeof(PropertyAliases)) {
udata_printError(ds, "upname_swap(EnumToOffset): too few bytes (%d after header)\n"
" for pnames.icu EnumToOffset{%d..%d} at %d\n",
length, tempMap->enumStart, tempMap->enumLimit, pos);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
/* swap enumStart and enumLimit */
ds->swapArray32(ds, inMap, 2*sizeof(EnumValue), outMap, pErrorCode);
/* swap _offsetArray[] */
ds->swapArray16(ds, inMap->getOffsetArray(), (tempMap->enumLimit-tempMap->enumStart)*sizeof(Offset),
outMap->getOffsetArray(), pErrorCode);
}
return size;
}
int32_t
NonContiguousEnumToOffset::swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
uint8_t *temp, int32_t pos,
UErrorCode *pErrorCode) {
const NonContiguousEnumToOffset *inMap;
NonContiguousEnumToOffset *outMap, *tempMap;
int32_t size;
tempMap=(NonContiguousEnumToOffset *)(temp+pos);
if(tempMap->count!=0) {
/* this map was swapped already */
size=tempMap->getSize();
return size;
}
inMap=(const NonContiguousEnumToOffset *)(inBytes+pos);
outMap=(NonContiguousEnumToOffset *)(outBytes+pos);
tempMap->count=udata_readInt32(ds, inMap->count);
size=tempMap->getSize();
if(length>=0) {
if(length<(pos+size)) {
if(length<(int32_t)sizeof(PropertyAliases)) {
udata_printError(ds, "upname_swap(NonContiguousEnumToOffset): too few bytes (%d after header)\n"
" for pnames.icu NonContiguousEnumToOffset[%d] at %d\n",
length, tempMap->count, pos);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
/* swap count and _enumArray[] */
length=(1+tempMap->count)*sizeof(EnumValue);
ds->swapArray32(ds, inMap, length,
outMap, pErrorCode);
/* swap _offsetArray[] */
pos+=length;
ds->swapArray16(ds, inBytes+pos, tempMap->count*sizeof(Offset),
outBytes+pos, pErrorCode);
}
return size;
}
struct NameAndIndex {
Offset name, index;
};
U_CDECL_BEGIN
typedef int32_t U_CALLCONV PropNameCompareFn(const char *name1, const char *name2);
struct CompareContext {
const char *chars;
PropNameCompareFn *propCompare;
};
static int32_t U_CALLCONV
upname_compareRows(const void *context, const void *left, const void *right) {
CompareContext *cmp=(CompareContext *)context;
return cmp->propCompare(cmp->chars+((const NameAndIndex *)left)->name,
cmp->chars+((const NameAndIndex *)right)->name);
}
U_CDECL_END
int32_t
NameToEnum::swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
uint8_t *temp, int32_t pos,
UErrorCode *pErrorCode) {
const NameToEnum *inMap;
NameToEnum *outMap, *tempMap;
const EnumValue *inEnumArray;
EnumValue *outEnumArray;
const Offset *inNameArray;
Offset *outNameArray;
NameAndIndex *sortArray;
CompareContext cmp;
int32_t i, size, oldIndex;
tempMap=(NameToEnum *)(temp+pos);
if(tempMap->count!=0) {
/* this map was swapped already */
size=tempMap->getSize();
return size;
}
inMap=(const NameToEnum *)(inBytes+pos);
outMap=(NameToEnum *)(outBytes+pos);
tempMap->count=udata_readInt32(ds, inMap->count);
size=tempMap->getSize();
if(length>=0) {
if(length<(pos+size)) {
if(length<(int32_t)sizeof(PropertyAliases)) {
udata_printError(ds, "upname_swap(NameToEnum): too few bytes (%d after header)\n"
" for pnames.icu NameToEnum[%d] at %d\n",
length, tempMap->count, pos);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
/* swap count */
ds->swapArray32(ds, inMap, 4, outMap, pErrorCode);
inEnumArray=inMap->getEnumArray();
outEnumArray=outMap->getEnumArray();
inNameArray=(const Offset *)(inEnumArray+tempMap->count);
outNameArray=(Offset *)(outEnumArray+tempMap->count);
if(ds->inCharset==ds->outCharset) {
/* no need to sort, just swap the enum/name arrays */
ds->swapArray32(ds, inEnumArray, tempMap->count*4, outEnumArray, pErrorCode);
ds->swapArray16(ds, inNameArray, tempMap->count*2, outNameArray, pErrorCode);
return size;
}
/*
* The name and enum arrays are sorted by names and must be resorted
* if inCharset!=outCharset.
* We use the corresponding part of the temp array to sort an array
* of pairs of name offsets and sorting indexes.
* Then the sorting indexes are used to permutate-swap the name and enum arrays.
*
* The outBytes must already contain the swapped strings.
*/
sortArray=(NameAndIndex *)tempMap->getEnumArray();
for(i=0; i<tempMap->count; ++i) {
sortArray[i].name=udata_readInt16(ds, inNameArray[i]);
sortArray[i].index=(Offset)i;
}
/*
* use a stable sort to avoid shuffling of equal strings,
* which makes testing harder
*/
cmp.chars=(const char *)outBytes;
if (ds->outCharset==U_ASCII_FAMILY) {
cmp.propCompare=uprv_compareASCIIPropertyNames;
}
else {
cmp.propCompare=uprv_compareEBCDICPropertyNames;
}
uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex),
upname_compareRows, &cmp,
TRUE, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed\n",
tempMap->count);
return 0;
}
/* copy/swap/permutate _enumArray[] and _nameArray[] */
if(inEnumArray!=outEnumArray) {
for(i=0; i<tempMap->count; ++i) {
oldIndex=sortArray[i].index;
ds->swapArray32(ds, inEnumArray+oldIndex, 4, outEnumArray+i, pErrorCode);
ds->swapArray16(ds, inNameArray+oldIndex, 2, outNameArray+i, pErrorCode);
}
} else {
/*
* in-place swapping: need to permutate into a temporary array
* and then copy back to not destroy the data
*/
EnumValue *tempEnumArray;
Offset *oldIndexes;
/* write name offsets directly from sortArray */
for(i=0; i<tempMap->count; ++i) {
ds->writeUInt16((uint16_t *)outNameArray+i, (uint16_t)sortArray[i].name);
}
/*
* compress the oldIndexes into a separate array to make space for tempEnumArray
* the tempMap _nameArray becomes oldIndexes[], getting the index
* values from the 2D sortArray[],
* while sortArray=tempMap _enumArray[] becomes tempEnumArray[]
* this saves us allocating more memory
*
* it works because sizeof(NameAndIndex)<=sizeof(EnumValue)
* and because the nameArray[] can be used for oldIndexes[]
*/
tempEnumArray=(EnumValue *)sortArray;
oldIndexes=(Offset *)(sortArray+tempMap->count);
/* copy sortArray[].index values into oldIndexes[] */
for(i=0; i<tempMap->count; ++i) {
oldIndexes[i]=sortArray[i].index;
}
/* permutate inEnumArray[] into tempEnumArray[] */
for(i=0; i<tempMap->count; ++i) {
ds->swapArray32(ds, inEnumArray+oldIndexes[i], 4, tempEnumArray+i, pErrorCode);
}
/* copy tempEnumArray[] to outEnumArray[] */
uprv_memcpy(outEnumArray, tempEnumArray, tempMap->count*4);
}
}
return size;
}
int32_t
PropertyAliases::swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
UErrorCode *pErrorCode) {
const PropertyAliases *inAliases;
PropertyAliases *outAliases;
PropertyAliases aliases;
const ValueMap *inValueMaps;
ValueMap *outValueMaps;
ValueMap valueMap;
uint8_t *temp;
int32_t i;
inAliases=(const PropertyAliases *)inBytes;
outAliases=(PropertyAliases *)outBytes;
/* read the input PropertyAliases - all 16-bit values */
for(i=0; i<(int32_t)sizeof(PropertyAliases)/2; ++i) {
((uint16_t *)&aliases)[i]=ds->readUInt16(((const uint16_t *)inBytes)[i]);
}
if(length>=0) {
if(length<aliases.total_size) {
udata_printError(ds, "upname_swap(): too few bytes (%d after header) for all of pnames.icu\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
/* copy the data for inaccessible bytes */
if(inBytes!=outBytes) {
uprv_memcpy(outBytes, inBytes, aliases.total_size);
}
/* swap the PropertyAliases class fields */
ds->swapArray16(ds, inAliases, sizeof(PropertyAliases), outAliases, pErrorCode);
/* swap the name groups */
ds->swapArray16(ds, inBytes+aliases.nameGroupPool_offset,
aliases.stringPool_offset-aliases.nameGroupPool_offset,
outBytes+aliases.nameGroupPool_offset, pErrorCode);
/* swap the strings */
udata_swapInvStringBlock(ds, inBytes+aliases.stringPool_offset,
aliases.total_size-aliases.stringPool_offset,
outBytes+aliases.stringPool_offset, pErrorCode);
/*
* alloc uint8_t temp[total_size] and reset it
* swap each top-level struct, put at least the count fields into temp
* use subclass-specific swap() functions
* enumerate value maps, for each
* if temp does not have count!=0 yet
* read count, put it into temp
* swap the array(s)
* resort strings in name->enum maps
* swap value maps
*/
temp=(uint8_t *)uprv_malloc(aliases.total_size);
if(temp==NULL) {
udata_printError(ds, "upname_swap(): unable to allocate temp memory (%d bytes)\n",
aliases.total_size);
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return 0;
}
uprv_memset(temp, 0, aliases.total_size);
/* swap properties->name groups map */
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
temp, aliases.enumToName_offset, pErrorCode);
/* swap name->properties map */
NameToEnum::swap(ds, inBytes, length, outBytes,
temp, aliases.nameToEnum_offset, pErrorCode);
/* swap properties->value maps map */
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
temp, aliases.enumToValue_offset, pErrorCode);
/* enumerate all ValueMaps and swap them */
inValueMaps=(const ValueMap *)(inBytes+aliases.valueMap_offset);
outValueMaps=(ValueMap *)(outBytes+aliases.valueMap_offset);
for(i=0; i<aliases.valueMap_count; ++i) {
valueMap.enumToName_offset=udata_readInt16(ds, inValueMaps[i].enumToName_offset);
valueMap.ncEnumToName_offset=udata_readInt16(ds, inValueMaps[i].ncEnumToName_offset);
valueMap.nameToEnum_offset=udata_readInt16(ds, inValueMaps[i].nameToEnum_offset);
if(valueMap.enumToName_offset!=0) {
EnumToOffset::swap(ds, inBytes, length, outBytes,
temp, valueMap.enumToName_offset,
pErrorCode);
} else if(valueMap.ncEnumToName_offset!=0) {
NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
temp, valueMap.ncEnumToName_offset,
pErrorCode);
}
if(valueMap.nameToEnum_offset!=0) {
NameToEnum::swap(ds, inBytes, length, outBytes,
temp, valueMap.nameToEnum_offset,
pErrorCode);
}
}
/* swap the ValueMaps array itself */
ds->swapArray16(ds, inValueMaps, aliases.valueMap_count*sizeof(ValueMap),
outValueMaps, pErrorCode);
/* name groups and strings were swapped above */
/* release temp */
uprv_free(temp);
}
return aliases.total_size;
}
U_CAPI int32_t U_EXPORT2
upname_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode) {
const UDataInfo *pInfo;
int32_t headerSize;
const uint8_t *inBytes;
uint8_t *outBytes;
/* udata_swapDataHeader checks the arguments */
headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
/* check data format and format version */
pInfo=(const UDataInfo *)((const char *)inData+4);
if(!(
pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */
pInfo->dataFormat[1]==0x6e &&
pInfo->dataFormat[2]==0x61 &&
pInfo->dataFormat[3]==0x6d &&
pInfo->formatVersion[0]==1
)) {
udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
pInfo->formatVersion[0]);
*pErrorCode=U_UNSUPPORTED_ERROR;
return 0;
}
inBytes=(const uint8_t *)inData+headerSize;
outBytes=(uint8_t *)outData+headerSize;
if(length>=0) {
length-=headerSize;
if(length<(int32_t)sizeof(PropertyAliases)) {
udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
length);
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
return headerSize+PropertyAliases::swap(ds, inBytes, length, outBytes, pErrorCode);
}
//eof

515
source/common/propname.h Normal file
View file

@ -0,0 +1,515 @@
/*
**********************************************************************
* Copyright (c) 2002-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: October 30 2002
* Since: ICU 2.4
**********************************************************************
*/
#ifndef PROPNAME_H
#define PROPNAME_H
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "udataswp.h"
#include "uprops.h"
/*
* This header defines the in-memory layout of the property names data
* structure representing the UCD data files PropertyAliases.txt and
* PropertyValueAliases.txt. It is used by:
* propname.cpp - reads data
* genpname - creates data
*/
/* low-level char * property name comparison -------------------------------- */
U_CDECL_BEGIN
/**
* \var uprv_comparePropertyNames
* Unicode property names and property value names are compared "loosely".
*
* UCD.html 4.0.1 says:
* For all property names, property value names, and for property values for
* Enumerated, Binary, or Catalog properties, use the following
* loose matching rule:
*
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
*
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
#else
# error U_CHARSET_FAMILY is not valid
#endif
U_CDECL_END
/* UDataMemory structure and signatures ------------------------------------- */
#define PNAME_DATA_NAME "pnames"
#define PNAME_DATA_TYPE "icu"
/* Fields in UDataInfo: */
/* PNAME_SIG[] is encoded as numeric literals for compatibility with the HP compiler */
#define PNAME_SIG_0 ((uint8_t)0x70) /* p */
#define PNAME_SIG_1 ((uint8_t)0x6E) /* n */
#define PNAME_SIG_2 ((uint8_t)0x61) /* a */
#define PNAME_SIG_3 ((uint8_t)0x6D) /* m */
#define PNAME_FORMAT_VERSION ((int8_t)1) /* formatVersion[0] */
/**
* Swap pnames.icu. See udataswp.h.
* @internal
*/
U_CAPI int32_t U_EXPORT2
upname_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
#ifdef XP_CPLUSPLUS
class Builder;
U_NAMESPACE_BEGIN
/**
* An offset from the start of the pnames data to a contained entity.
* This must be a signed value, since negative offsets are used as an
* end-of-list marker. Offsets to actual objects are non-zero. A
* zero offset indicates an absent entry; this corresponds to aliases
* marked "n/a" in the original Unicode data files.
*/
typedef int16_t Offset; /* must be signed */
#define MAX_OFFSET 0x7FFF
/**
* A generic value for a property or property value. Typically an
* enum from uchar.h, but sometimes a non-enum value. It must be
* large enough to accomodate the largest enum value, which as of this
* writing is the largest general category mask. Need not be signed
* but may be. Typically it doesn't matter, since the caller will
* cast it to the proper type before use. Takes the special value
* UCHAR_INVALID_CODE for invalid input.
*/
typedef int32_t EnumValue;
/* ---------------------------------------------------------------------- */
/* ValueMap */
/**
* For any top-level property that has named values (binary and
* enumerated properties), there is a ValueMap object. This object
* maps from enum values to two other maps. One goes from value enums
* to value names. The other goes from value names to value enums.
*
* The value enum values may be contiguous or disjoint. If they are
* contiguous then the enumToName_offset is nonzero, and the
* ncEnumToName_offset is zero. Vice versa if the value enums are
* disjoint.
*
* There are n of these objects, where n is the number of binary
* properties + the number of enumerated properties.
*/
struct ValueMap {
/* -- begin pnames data -- */
/* Enum=>name EnumToOffset / NonContiguousEnumToOffset objects. */
/* Exactly one of these will be nonzero. */
Offset enumToName_offset;
Offset ncEnumToName_offset;
Offset nameToEnum_offset; /* Name=>enum data */
/* -- end pnames data -- */
};
/* ---------------------------------------------------------------------- */
/* PropertyAliases class */
/**
* A class encapsulating access to the memory-mapped data representing
* property aliases and property value aliases (pnames). The class
* MUST have no v-table and declares certain methods inline -- small
* methods and methods that are called from only one point.
*
* The data members in this class correspond to the in-memory layout
* of the header of the pnames data.
*/
class PropertyAliases {
/* -- begin pnames data -- */
/* Enum=>name EnumToOffset object for binary and enumerated */
/* properties */
Offset enumToName_offset;
/* Name=>enum data for binary & enumerated properties */
Offset nameToEnum_offset;
/* Enum=>offset EnumToOffset object mapping enumerated properties */
/* to ValueMap objects */
Offset enumToValue_offset;
/* The following are needed by external readers of this data. */
/* We don't use them ourselves. */
int16_t total_size; /* size in bytes excluding the udata header */
Offset valueMap_offset; /* offset to start of array */
int16_t valueMap_count; /* number of entries */
Offset nameGroupPool_offset; /* offset to start of array */
int16_t nameGroupPool_count; /* number of entries (not groups) */
Offset stringPool_offset; /* offset to start of pool */
int16_t stringPool_count; /* number of strings (not size in bytes) */
/* -- end pnames data -- */
friend class ::Builder;
const ValueMap* getValueMap(EnumValue prop) const;
const char* chooseNameInGroup(Offset offset,
UPropertyNameChoice choice) const;
public:
inline const int8_t* getPointer(Offset o) const {
return ((const int8_t*) this) + o;
}
inline const int8_t* getPointerNull(Offset o) const {
return o ? getPointer(o) : NULL;
}
inline const char* getPropertyName(EnumValue prop,
UPropertyNameChoice choice) const;
inline EnumValue getPropertyEnum(const char* alias) const;
inline const char* getPropertyValueName(EnumValue prop, EnumValue value,
UPropertyNameChoice choice) const;
inline EnumValue getPropertyValueEnum(EnumValue prop,
const char* alias) const;
static int32_t
swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
UErrorCode *pErrorCode);
};
/* ---------------------------------------------------------------------- */
/* EnumToOffset */
/**
* A generic map from enum values to Offsets. The enum values must be
* contiguous, from enumStart to enumLimit. The Offset values may
* point to anything.
*/
class EnumToOffset {
/* -- begin pnames data -- */
EnumValue enumStart;
EnumValue enumLimit;
Offset _offsetArray; /* [array of enumLimit-enumStart] */
/* -- end pnames data -- */
friend class ::Builder;
Offset* getOffsetArray() {
return &_offsetArray;
}
const Offset* getOffsetArray() const {
return &_offsetArray;
}
static int32_t getSize(int32_t n) {
return sizeof(EnumToOffset) + sizeof(Offset) * (n - 1);
}
int32_t getSize() {
return getSize(enumLimit - enumStart);
}
public:
Offset getOffset(EnumValue enumProbe) const {
if (enumProbe < enumStart ||
enumProbe >= enumLimit) {
return 0; /* not found */
}
const Offset* p = getOffsetArray();
return p[enumProbe - enumStart];
}
static int32_t
swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
uint8_t *temp, int32_t pos,
UErrorCode *pErrorCode);
};
/* ---------------------------------------------------------------------- */
/* NonContiguousEnumToOffset */
/**
* A generic map from enum values to Offsets. The enum values may be
* disjoint. If they are contiguous, an EnumToOffset should be used
* instead. The Offset values may point to anything.
*/
class NonContiguousEnumToOffset {
/* -- begin pnames data -- */
int32_t count;
EnumValue _enumArray; /* [array of count] */
/* Offset _offsetArray; // [array of count] after enumValue[count-1] */
/* -- end pnames data -- */
friend class ::Builder;
EnumValue* getEnumArray() {
return &_enumArray;
}
const EnumValue* getEnumArray() const {
return &_enumArray;
}
Offset* getOffsetArray() {
return (Offset*) (getEnumArray() + count);
}
const Offset* getOffsetArray() const {
return (Offset*) (getEnumArray() + count);
}
static int32_t getSize(int32_t n) {
return sizeof(int32_t) + (sizeof(EnumValue) + sizeof(Offset)) * n;
}
int32_t getSize() {
return getSize(count);
}
public:
Offset getOffset(EnumValue enumProbe) const {
const EnumValue* e = getEnumArray();
const Offset* p = getOffsetArray();
/* linear search; binary later if warranted */
/* (binary is not faster for short lists) */
for (int32_t i=0; i<count; ++i) {
if (e[i] < enumProbe) continue;
if (e[i] > enumProbe) break;
return p[i];
}
return 0; /* not found */
}
static int32_t
swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
uint8_t *temp, int32_t pos,
UErrorCode *pErrorCode);
};
/* ---------------------------------------------------------------------- */
/* NameToEnum */
/**
* A map from names to enum values.
*/
class NameToEnum {
/* -- begin pnames data -- */
int32_t count; /* number of entries */
EnumValue _enumArray; /* [array of count] EnumValues */
/* Offset _nameArray; // [array of count] offsets to names */
/* -- end pnames data -- */
friend class ::Builder;
EnumValue* getEnumArray() {
return &_enumArray;
}
const EnumValue* getEnumArray() const {
return &_enumArray;
}
Offset* getNameArray() {
return (Offset*) (getEnumArray() + count);
}
const Offset* getNameArray() const {
return (Offset*) (getEnumArray() + count);
}
static int32_t getSize(int32_t n) {
return sizeof(int32_t) + (sizeof(Offset) + sizeof(EnumValue)) * n;
}
int32_t getSize() {
return getSize(count);
}
public:
EnumValue getEnum(const char* alias, const PropertyAliases& data) const {
const Offset* n = getNameArray();
const EnumValue* e = getEnumArray();
/* linear search; binary later if warranted */
/* (binary is not faster for short lists) */
for (int32_t i=0; i<count; ++i) {
const char* name = (const char*) data.getPointer(n[i]);
int32_t c = uprv_comparePropertyNames(alias, name);
if (c > 0) continue;
if (c < 0) break;
return e[i];
}
return UCHAR_INVALID_CODE;
}
static int32_t
swap(const UDataSwapper *ds,
const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
uint8_t *temp, int32_t pos,
UErrorCode *pErrorCode);
};
/*----------------------------------------------------------------------
*
* In-memory layout. THIS IS NOT A STANDALONE DOCUMENT. It goes
* together with above C++ declarations and gives an overview.
*
* See above for definitions of Offset and EnumValue. Also, refer to
* above class declarations for the "bottom line" on data layout.
*
* Sizes:
* '*_offset' is an Offset (see above)
* 'count' members are typically int32_t (see above declarations)
* 'enumArray' is an array of EnumValue (see above)
* 'offsetArray' is an array of Offset (see above)
* 'nameArray' is an array of Offset (see above)
* 'enum*' is an EnumValue (see above)
* '*Array [x n]' means that *Array has n elements
*
* References:
* Instead of pointers, this flat data structure contains offsets.
* All offsets are relative to the start of 'header'. A notation
* is used to indicate what structure each offset points to:
* 'foo (>x)' the offset(s) in foo point to structure x
*
* Structures:
* Each structure is assigned a number, except for the header,
* which is called 'header'. The numbers are not contiguous
* for historical reasons. Some structures have sub-parts
* that are denoted with a letter, e.g., "5a".
*
* BEGIN LAYOUT
* ============
* header:
* enumToName_offset (>0)
* nameToEnum_offset (>2)
* enumToValue_offset (>3)
* (alignment padding build in to header)
*
* The header also contains the following, used by "external readers"
* like ICU4J and icuswap.
*
* // The following are needed by external readers of this data.
* // We don't use them ourselves.
* int16_t total_size; // size in bytes excluding the udata header
* Offset valueMap_offset; // offset to start of array
* int16_t valueMap_count; // number of entries
* Offset nameGroupPool_offset; // offset to start of array
* int16_t nameGroupPool_count; // number of entries (not groups)
* Offset stringPool_offset; // offset to start of pool
* int16_t stringPool_count; // number of strings (not size in bytes)
*
* 0: # NonContiguousEnumToOffset obj for props => name groups
* count
* enumArray [x count]
* offsetArray [x count] (>98)
*
* => pad to next 4-byte boundary
*
* (1: omitted -- no longer used)
*
* 2: # NameToEnum obj for binary & enumerated props
* count
* enumArray [x count]
* nameArray [x count] (>99)
*
* => pad to next 4-byte boundary
*
* 3: # NonContiguousEnumToOffset obj for enumerated props => ValueMaps
* count
* enumArray [x count]
* offsetArray [x count] (>4)
*
* => pad to next 4-byte boundary
*
* 4: # ValueMap array [x one for each enumerated prop i]
* enumToName_offset (>5a +2*i) one of these two is NULL, one is not
* ncEnumToName_offset (>5b +2*i)
* nameToEnums_offset (>6 +2*i)
*
* => pad to next 4-byte boundary
*
* for each enumerated prop (either 5a or 5b):
*
* 5a: # EnumToOffset for enumerated prop's values => name groups
* enumStart
* enumLimit
* offsetArray [x enumLimit - enumStart] (>98)
*
* => pad to next 4-byte boundary
*
* 5b: # NonContiguousEnumToOffset for enumerated prop's values => name groups
* count
* enumArray [x count]
* offsetArray [x count] (>98)
*
* => pad to next 4-byte boundary
*
* 6: # NameToEnum for enumerated prop's values
* count
* enumArray [x count]
* nameArray [x count] (>99)
*
* => pad to next 4-byte boundary
*
* 98: # name group pool {NGP}
* [array of Offset values] (>99)
*
* 99: # string pool {SP}
* [pool of nul-terminated char* strings]
*/
U_NAMESPACE_END
#endif /* C++ */
#endif

557
source/common/propsvec.c Normal file
View file

@ -0,0 +1,557 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: propsvec.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002feb22
* created by: Markus W. Scherer
*
* Store bits (Unicode character properties) in bit set vectors.
*/
#include <stdlib.h>
#include "unicode/utypes.h"
#include "cmemory.h"
#include "utrie.h"
#include "utrie2.h"
#include "uarrsort.h"
#include "propsvec.h"
struct UPropsVectors {
uint32_t *v;
int32_t columns; /* number of columns, plus two for start & limit values */
int32_t maxRows;
int32_t rows;
int32_t prevRow; /* search optimization: remember last row seen */
UBool isCompacted;
};
#define UPVEC_INITIAL_ROWS (1<<12)
#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16)
#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
U_CAPI UPropsVectors * U_EXPORT2
upvec_open(int32_t columns, UErrorCode *pErrorCode) {
UPropsVectors *pv;
uint32_t *v, *row;
uint32_t cp;
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(columns<1) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
columns+=2; /* count range start and limit columns */
pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors));
v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4);
if(pv==NULL || v==NULL) {
uprv_free(pv);
uprv_free(v);
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memset(pv, 0, sizeof(UPropsVectors));
pv->v=v;
pv->columns=columns;
pv->maxRows=UPVEC_INITIAL_ROWS;
pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
/* set the all-Unicode row and the special-value rows */
row=pv->v;
uprv_memset(row, 0, pv->rows*columns*4);
row[0]=0;
row[1]=0x110000;
row+=columns;
for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
row[0]=cp;
row[1]=cp+1;
row+=columns;
}
return pv;
}
U_CAPI void U_EXPORT2
upvec_close(UPropsVectors *pv) {
if(pv!=NULL) {
uprv_free(pv->v);
uprv_free(pv);
}
}
static uint32_t *
_findRow(UPropsVectors *pv, UChar32 rangeStart) {
uint32_t *row;
int32_t columns, i, start, limit, prevRow, rows;
columns=pv->columns;
rows=limit=pv->rows;
prevRow=pv->prevRow;
/* check the vicinity of the last-seen row (start searching with an unrolled loop) */
row=pv->v+prevRow*columns;
if(rangeStart>=(UChar32)row[0]) {
if(rangeStart<(UChar32)row[1]) {
/* same row as last seen */
return row;
} else if(rangeStart<(UChar32)(row+=columns)[1]) {
/* next row after the last one */
pv->prevRow=prevRow+1;
return row;
} else if(rangeStart<(UChar32)(row+=columns)[1]) {
/* second row after the last one */
pv->prevRow=prevRow+2;
return row;
} else if((rangeStart-(UChar32)row[1])<10) {
/* we are close, continue looping */
prevRow+=2;
do {
++prevRow;
row+=columns;
} while(rangeStart>=(UChar32)row[1]);
pv->prevRow=prevRow;
return row;
}
} else if(rangeStart<(UChar32)pv->v[1]) {
/* the very first row */
pv->prevRow=0;
return pv->v;
}
/* do a binary search for the start of the range */
start=0;
while(start<limit-1) {
i=(start+limit)/2;
row=pv->v+i*columns;
if(rangeStart<(UChar32)row[0]) {
limit=i;
} else if(rangeStart<(UChar32)row[1]) {
pv->prevRow=i;
return row;
} else {
start=i;
}
}
/* must be found because all ranges together always cover all of Unicode */
pv->prevRow=start;
return pv->v+start*columns;
}
U_CAPI void U_EXPORT2
upvec_setValue(UPropsVectors *pv,
UChar32 start, UChar32 end,
int32_t column,
uint32_t value, uint32_t mask,
UErrorCode *pErrorCode) {
uint32_t *firstRow, *lastRow;
int32_t columns;
UChar32 limit;
UBool splitFirstRow, splitLastRow;
/* argument checking */
if(U_FAILURE(*pErrorCode)) {
return;
}
if( pv==NULL ||
start<0 || start>end || end>UPVEC_MAX_CP ||
column<0 || column>=(pv->columns-2)
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if(pv->isCompacted) {
*pErrorCode=U_NO_WRITE_PERMISSION;
return;
}
limit=end+1;
/* initialize */
columns=pv->columns;
column+=2; /* skip range start and limit columns */
value&=mask;
/* find the rows whose ranges overlap with the input range */
/* find the first and last rows, always successful */
firstRow=_findRow(pv, start);
lastRow=_findRow(pv, end);
/*
* Rows need to be split if they partially overlap with the
* input range (only possible for the first and last rows)
* and if their value differs from the input value.
*/
splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask));
splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask));
/* split first/last rows if necessary */
if(splitFirstRow || splitLastRow) {
int32_t count, rows;
rows=pv->rows;
if((rows+splitFirstRow+splitLastRow)>pv->maxRows) {
uint32_t *newVectors;
int32_t newMaxRows;
if(pv->maxRows<UPVEC_MEDIUM_ROWS) {
newMaxRows=UPVEC_MEDIUM_ROWS;
} else if(pv->maxRows<UPVEC_MAX_ROWS) {
newMaxRows=UPVEC_MAX_ROWS;
} else {
/* Implementation bug, or UPVEC_MAX_ROWS too low. */
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
return;
}
newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4);
if(newVectors==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_memcpy(newVectors, pv->v, rows*columns*4);
firstRow=newVectors+(firstRow-pv->v);
lastRow=newVectors+(lastRow-pv->v);
uprv_free(pv->v);
pv->v=newVectors;
pv->maxRows=newMaxRows;
}
/* count the number of row cells to move after the last row, and move them */
count = (int32_t)((pv->v+rows*columns)-(lastRow+columns));
if(count>0) {
uprv_memmove(
lastRow+(1+splitFirstRow+splitLastRow)*columns,
lastRow+columns,
count*4);
}
pv->rows=rows+splitFirstRow+splitLastRow;
/* split the first row, and move the firstRow pointer to the second part */
if(splitFirstRow) {
/* copy all affected rows up one and move the lastRow pointer */
count = (int32_t)((lastRow-firstRow)+columns);
uprv_memmove(firstRow+columns, firstRow, count*4);
lastRow+=columns;
/* split the range and move the firstRow pointer */
firstRow[1]=firstRow[columns]=(uint32_t)start;
firstRow+=columns;
}
/* split the last row */
if(splitLastRow) {
/* copy the last row data */
uprv_memcpy(lastRow+columns, lastRow, columns*4);
/* split the range and move the firstRow pointer */
lastRow[1]=lastRow[columns]=(uint32_t)limit;
}
}
/* set the "row last seen" to the last row for the range */
pv->prevRow=(int32_t)((lastRow-(pv->v))/columns);
/* set the input value in all remaining rows */
firstRow+=column;
lastRow+=column;
mask=~mask;
for(;;) {
*firstRow=(*firstRow&mask)|value;
if(firstRow==lastRow) {
break;
}
firstRow+=columns;
}
}
U_CAPI uint32_t U_EXPORT2
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) {
uint32_t *row;
UPropsVectors *ncpv;
if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) {
return 0;
}
ncpv=(UPropsVectors *)pv;
row=_findRow(ncpv, c);
return row[2+column];
}
U_CAPI uint32_t * U_EXPORT2
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
UChar32 *pRangeStart, UChar32 *pRangeEnd) {
uint32_t *row;
int32_t columns;
if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) {
return NULL;
}
columns=pv->columns;
row=pv->v+rowIndex*columns;
if(pRangeStart!=NULL) {
*pRangeStart=(UChar32)row[0];
}
if(pRangeEnd!=NULL) {
*pRangeEnd=(UChar32)row[1]-1;
}
return row+2;
}
static int32_t U_CALLCONV
upvec_compareRows(const void *context, const void *l, const void *r) {
const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
const UPropsVectors *pv=(const UPropsVectors *)context;
int32_t i, count, columns;
count=columns=pv->columns; /* includes start/limit columns */
/* start comparing after start/limit but wrap around to them */
i=2;
do {
if(left[i]!=right[i]) {
return left[i]<right[i] ? -1 : 1;
}
if(++i==columns) {
i=0;
}
} while(--count>0);
return 0;
}
U_CAPI void U_EXPORT2
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
uint32_t *row;
int32_t i, columns, valueColumns, rows, count;
UChar32 start, limit;
/* argument checking */
if(U_FAILURE(*pErrorCode)) {
return;
}
if(handler==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if(pv->isCompacted) {
return;
}
/* Set the flag now: Sorting and compacting destroys the builder data structure. */
pv->isCompacted=TRUE;
rows=pv->rows;
columns=pv->columns;
valueColumns=columns-2; /* not counting start & limit */
/* sort the properties vectors to find unique vector values */
uprv_sortArray(pv->v, rows, columns*4,
upvec_compareRows, pv, FALSE, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
/*
* Find and set the special values.
* This has to do almost the same work as the compaction below,
* to find the indexes where the special-value rows will move.
*/
row=pv->v;
count=-valueColumns;
for(i=0; i<rows; ++i) {
start=(UChar32)row[0];
/* count a new values vector if it is different from the current one */
if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) {
count+=valueColumns;
}
if(start>=UPVEC_FIRST_SPECIAL_CP) {
handler(context, start, start, count, row+2, valueColumns, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
}
row+=columns;
}
/* count is at the beginning of the last vector, add valueColumns to include that last vector */
count+=valueColumns;
/* Call the handler once more to signal the start of delivering real values. */
handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP,
count, row-valueColumns, valueColumns, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
/*
* Move vector contents up to a contiguous array with only unique
* vector values, and call the handler function for each vector.
*
* This destroys the Properties Vector structure and replaces it
* with an array of just vector values.
*/
row=pv->v;
count=-valueColumns;
for(i=0; i<rows; ++i) {
/* fetch these first before memmove() may overwrite them */
start=(UChar32)row[0];
limit=(UChar32)row[1];
/* add a new values vector if it is different from the current one */
if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) {
count+=valueColumns;
uprv_memmove(pv->v+count, row+2, valueColumns*4);
}
if(start<UPVEC_FIRST_SPECIAL_CP) {
handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
}
row+=columns;
}
/* count is at the beginning of the last vector, add one to include that last vector */
pv->rows=count/valueColumns+1;
}
U_CAPI const uint32_t * U_EXPORT2
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
if(!pv->isCompacted) {
return NULL;
}
if(pRows!=NULL) {
*pRows=pv->rows;
}
if(pColumns!=NULL) {
*pColumns=pv->columns-2;
}
return pv->v;
}
U_CAPI uint32_t * U_EXPORT2
upvec_cloneArray(const UPropsVectors *pv,
int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) {
uint32_t *clonedArray;
int32_t byteLength;
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(!pv->isCompacted) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
byteLength=pv->rows*(pv->columns-2)*4;
clonedArray=(uint32_t *)uprv_malloc(byteLength);
if(clonedArray==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memcpy(clonedArray, pv->v, byteLength);
if(pRows!=NULL) {
*pRows=pv->rows;
}
if(pColumns!=NULL) {
*pColumns=pv->columns-2;
}
return clonedArray;
}
U_CAPI UTrie2 * U_EXPORT2
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
UPVecToUTrie2Context toUTrie2={ NULL };
upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode);
utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
utrie2_close(toUTrie2.trie);
toUTrie2.trie=NULL;
}
return toUTrie2.trie;
}
/*
* TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
* some 16-bit field and builds and returns a UTrie2.
*/
U_CAPI void U_CALLCONV
upvec_compactToUTrieHandler(void *context,
UChar32 start, UChar32 end,
int32_t rowIndex, uint32_t *row, int32_t columns,
UErrorCode *pErrorCode) {
UPVecToUTrieContext *toUTrie=(UPVecToUTrieContext *)context;
if(start<UPVEC_FIRST_SPECIAL_CP) {
if(!utrie_setRange32(toUTrie->newTrie, start, end+1, (uint32_t)rowIndex, TRUE)) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
} else {
switch(start) {
case UPVEC_INITIAL_VALUE_CP:
toUTrie->initialValue=rowIndex;
break;
case UPVEC_START_REAL_VALUES_CP:
if(rowIndex>0xffff) {
/* too many rows for a 16-bit trie */
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
} else {
toUTrie->newTrie=utrie_open(NULL, NULL, toUTrie->capacity,
toUTrie->initialValue, toUTrie->initialValue,
toUTrie->latin1Linear);
if(toUTrie->newTrie==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
break;
default:
break;
}
}
}
U_CAPI void U_CALLCONV
upvec_compactToUTrie2Handler(void *context,
UChar32 start, UChar32 end,
int32_t rowIndex, uint32_t *row, int32_t columns,
UErrorCode *pErrorCode) {
UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context;
if(start<UPVEC_FIRST_SPECIAL_CP) {
utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode);
} else {
switch(start) {
case UPVEC_INITIAL_VALUE_CP:
toUTrie2->initialValue=rowIndex;
break;
case UPVEC_ERROR_VALUE_CP:
toUTrie2->errorValue=rowIndex;
break;
case UPVEC_START_REAL_VALUES_CP:
toUTrie2->maxValue=rowIndex;
if(rowIndex>0xffff) {
/* too many rows for a 16-bit trie */
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
} else {
toUTrie2->trie=utrie2_open(toUTrie2->initialValue,
toUTrie2->errorValue, pErrorCode);
}
break;
default:
break;
}
}
}

191
source/common/propsvec.h Normal file
View file

@ -0,0 +1,191 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: propsvec.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002feb22
* created by: Markus W. Scherer
*
* Store bits (Unicode character properties) in bit set vectors.
*/
#ifndef __UPROPSVEC_H__
#define __UPROPSVEC_H__
#include "unicode/utypes.h"
#include "utrie.h"
#include "utrie2.h"
U_CDECL_BEGIN
/**
* Unicode Properties Vectors associated with code point ranges.
*
* Rows of uint32_t integers in a contiguous array store
* the range limits and the properties vectors.
*
* Logically, each row has a certain number of uint32_t values,
* which is set via the upvec_open() "columns" parameter.
*
* Internally, two additional columns are stored.
* In each internal row,
* row[0] contains the start code point and
* row[1] contains the limit code point,
* which is the start of the next range.
*
* Initially, there is only one "normal" row for
* range [0..0x110000[ with values 0.
* There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
*
* It would be possible to store only one range boundary per row,
* but self-contained rows allow to later sort them by contents.
*/
struct UPropsVectors;
typedef struct UPropsVectors UPropsVectors;
/*
* Special pseudo code points for storing the initialValue and the errorValue,
* which are used to initialize a UTrie2 or similar.
*/
#define UPVEC_FIRST_SPECIAL_CP 0x110000
#define UPVEC_INITIAL_VALUE_CP 0x110000
#define UPVEC_ERROR_VALUE_CP 0x110001
#define UPVEC_MAX_CP 0x110001
/*
* Special pseudo code point used in upvec_compact() signalling the end of
* delivering special values and the beginning of delivering real ones.
* Stable value, unlike UPVEC_MAX_CP which might grow over time.
*/
#define UPVEC_START_REAL_VALUES_CP 0x200000
/*
* Open a UPropsVectors object.
* @param columns Number of value integers (uint32_t) per row.
*/
U_CAPI UPropsVectors * U_EXPORT2
upvec_open(int32_t columns, UErrorCode *pErrorCode);
U_CAPI void U_EXPORT2
upvec_close(UPropsVectors *pv);
/*
* In rows for code points [start..end], select the column,
* reset the mask bits and set the value bits (ANDed with the mask).
*
* Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
*/
U_CAPI void U_EXPORT2
upvec_setValue(UPropsVectors *pv,
UChar32 start, UChar32 end,
int32_t column,
uint32_t value, uint32_t mask,
UErrorCode *pErrorCode);
/*
* Logically const but must not be used on the same pv concurrently!
* Always returns 0 if called after upvec_compact().
*/
U_CAPI uint32_t U_EXPORT2
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
/*
* pRangeStart and pRangeEnd can be NULL.
* @return NULL if rowIndex out of range and for illegal arguments,
* or if called after upvec_compact()
*/
U_CAPI uint32_t * U_EXPORT2
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
UChar32 *pRangeStart, UChar32 *pRangeEnd);
/*
* Compact the vectors:
* - modify the memory
* - keep only unique vectors
* - store them contiguously from the beginning of the memory
* - for each (non-unique) row, call the handler function
*
* The handler's rowIndex is the index of the row in the compacted
* memory block.
* (Therefore, it starts at 0 increases in increments of the columns value.)
*
* In a first phase, only special values are delivered (each exactly once),
* with start==end both equalling a special pseudo code point.
* Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
* where rowIndex is the length of the compacted array,
* and the row is arbitrary (but not NULL).
* Then, in the second phase, the handler is called for each row of real values.
*/
typedef void U_CALLCONV
UPVecCompactHandler(void *context,
UChar32 start, UChar32 end,
int32_t rowIndex, uint32_t *row, int32_t columns,
UErrorCode *pErrorCode);
U_CAPI void U_EXPORT2
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
/*
* Get the vectors array after calling upvec_compact().
* The caller must not modify nor release the returned array.
* Returns NULL if called before upvec_compact().
*/
U_CAPI const uint32_t * U_EXPORT2
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
/*
* Get a clone of the vectors array after calling upvec_compact().
* The caller owns the returned array and must uprv_free() it.
* Returns NULL if called before upvec_compact().
*/
U_CAPI uint32_t * U_EXPORT2
upvec_cloneArray(const UPropsVectors *pv,
int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
/*
* Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
* vectors array, and freeze the trie.
*/
U_CAPI UTrie2 * U_EXPORT2
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
struct UPVecToUTrieContext {
UNewTrie *newTrie;
int32_t capacity;
int32_t initialValue;
UBool latin1Linear;
};
typedef struct UPVecToUTrieContext UPVecToUTrieContext;
/* context=UPVecToUTrieContext, creates the trie and stores the rowIndex values */
U_CAPI void U_CALLCONV
upvec_compactToUTrieHandler(void *context,
UChar32 start, UChar32 end,
int32_t rowIndex, uint32_t *row, int32_t columns,
UErrorCode *pErrorCode);
struct UPVecToUTrie2Context {
UTrie2 *trie;
int32_t initialValue;
int32_t errorValue;
int32_t maxValue;
};
typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
U_CAPI void U_CALLCONV
upvec_compactToUTrie2Handler(void *context,
UChar32 start, UChar32 end,
int32_t rowIndex, uint32_t *row, int32_t columns,
UErrorCode *pErrorCode);
U_CDECL_END
#endif

580
source/common/punycode.c Normal file
View file

@ -0,0 +1,580 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: punycode.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002jan31
* created by: Markus W. Scherer
*/
/* This ICU code derived from: */
/*
punycode.c 0.4.0 (2001-Nov-17-Sat)
http://www.cs.berkeley.edu/~amc/idn/
Adam M. Costello
http://www.nicemice.net/amc/
Disclaimer and license
Regarding this entire document or any portion of it (including
the pseudocode and C code), the author makes no guarantees and
is not responsible for any damage resulting from its use. The
author grants irrevocable permission to anyone to use, modify,
and distribute it in any way that does not diminish the rights
of anyone else to use, modify, and distribute it, provided that
redistributed derivative works do not contain misleading author or
version information. Derivative works need not be licensed under
similar terms.
*/
/*
* ICU modifications:
* - ICU data types and coding conventions
* - ICU string buffer handling with implicit source lengths
* and destination preflighting
* - UTF-16 handling
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
#include "ustr_imp.h"
#include "cstring.h"
#include "cmemory.h"
#include "punycode.h"
#include "unicode/ustring.h"
/* Punycode ----------------------------------------------------------------- */
/* Punycode parameters for Bootstring */
#define BASE 36
#define TMIN 1
#define TMAX 26
#define SKEW 38
#define DAMP 700
#define INITIAL_BIAS 72
#define INITIAL_N 0x80
/* "Basic" Unicode/ASCII code points */
#define _HYPHEN 0X2d
#define DELIMITER _HYPHEN
#define _ZERO_ 0X30
#define _NINE 0x39
#define _SMALL_A 0X61
#define _SMALL_Z 0X7a
#define _CAPITAL_A 0X41
#define _CAPITAL_Z 0X5a
#define IS_BASIC(c) ((c)<0x80)
#define IS_BASIC_UPPERCASE(c) (_CAPITAL_A<=(c) && (c)<=_CAPITAL_Z)
/**
* digitToBasic() returns the basic code point whose value
* (when used for representing integers) is d, which must be in the
* range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
* nonzero, in which case the uppercase form is used.
*/
static U_INLINE char
digitToBasic(int32_t digit, UBool uppercase) {
/* 0..25 map to ASCII a..z or A..Z */
/* 26..35 map to ASCII 0..9 */
if(digit<26) {
if(uppercase) {
return (char)(_CAPITAL_A+digit);
} else {
return (char)(_SMALL_A+digit);
}
} else {
return (char)((_ZERO_-26)+digit);
}
}
/**
* basicToDigit[] contains the numeric value of a basic code
* point (for use in representing integers) in the range 0 to
* BASE-1, or -1 if b is does not represent a value.
*/
static const int8_t
basicToDigit[256]={
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
static U_INLINE char
asciiCaseMap(char b, UBool uppercase) {
if(uppercase) {
if(_SMALL_A<=b && b<=_SMALL_Z) {
b-=(_SMALL_A-_CAPITAL_A);
}
} else {
if(_CAPITAL_A<=b && b<=_CAPITAL_Z) {
b+=(_SMALL_A-_CAPITAL_A);
}
}
return b;
}
/* Punycode-specific Bootstring code ---------------------------------------- */
/*
* The following code omits the {parts} of the pseudo-algorithm in the spec
* that are not used with the Punycode parameter set.
*/
/* Bias adaptation function. */
static int32_t
adaptBias(int32_t delta, int32_t length, UBool firstTime) {
int32_t count;
if(firstTime) {
delta/=DAMP;
} else {
delta/=2;
}
delta+=delta/length;
for(count=0; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
delta/=(BASE-TMIN);
}
return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
}
#define MAX_CP_COUNT 200
U_CFUNC int32_t
u_strToPunycode(const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity,
const UBool *caseFlags,
UErrorCode *pErrorCode) {
int32_t cpBuffer[MAX_CP_COUNT];
int32_t n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
UChar c, c2;
/* argument checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/*
* Handle the basic code points and
* convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
*/
srcCPCount=destLength=0;
if(srcLength==-1) {
/* NUL-terminated input */
for(j=0; /* no condition */; ++j) {
if((c=src[j])==0) {
break;
}
if(srcCPCount==MAX_CP_COUNT) {
/* too many input code points */
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
if(IS_BASIC(c)) {
cpBuffer[srcCPCount++]=0;
if(destLength<destCapacity) {
dest[destLength]=
caseFlags!=NULL ?
asciiCaseMap((char)c, caseFlags[j]) :
(char)c;
}
++destLength;
} else {
n=(caseFlags!=NULL && caseFlags[j])<<31L;
if(UTF_IS_SINGLE(c)) {
n|=c;
} else if(UTF_IS_LEAD(c) && UTF_IS_TRAIL(c2=src[j+1])) {
++j;
n|=(int32_t)UTF16_GET_PAIR_VALUE(c, c2);
} else {
/* error: unmatched surrogate */
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
cpBuffer[srcCPCount++]=n;
}
}
} else {
/* length-specified input */
for(j=0; j<srcLength; ++j) {
if(srcCPCount==MAX_CP_COUNT) {
/* too many input code points */
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
c=src[j];
if(IS_BASIC(c)) {
if(destLength<destCapacity) {
cpBuffer[srcCPCount++]=0;
dest[destLength]=
caseFlags!=NULL ?
asciiCaseMap((char)c, caseFlags[j]) :
(char)c;
}
++destLength;
} else {
n=(caseFlags!=NULL && caseFlags[j])<<31L;
if(UTF_IS_SINGLE(c)) {
n|=c;
} else if(UTF_IS_LEAD(c) && (j+1)<srcLength && UTF_IS_TRAIL(c2=src[j+1])) {
++j;
n|=(int32_t)UTF16_GET_PAIR_VALUE(c, c2);
} else {
/* error: unmatched surrogate */
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
cpBuffer[srcCPCount++]=n;
}
}
}
/* Finish the basic string - if it is not empty - with a delimiter. */
basicLength=destLength;
if(basicLength>0) {
if(destLength<destCapacity) {
dest[destLength]=DELIMITER;
}
++destLength;
}
/*
* handledCPCount is the number of code points that have been handled
* basicLength is the number of basic code points
* destLength is the number of chars that have been output
*/
/* Initialize the state: */
n=INITIAL_N;
delta=0;
bias=INITIAL_BIAS;
/* Main encoding loop: */
for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
/*
* All non-basic code points < n have been handled already.
* Find the next larger one:
*/
for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
if(n<=q && q<m) {
m=q;
}
}
/*
* Increase delta enough to advance the decoder's
* <n,i> state to <m,0>, but guard against overflow:
*/
if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
return 0;
}
delta+=(m-n)*(handledCPCount+1);
n=m;
/* Encode a sequence of same code points n */
for(j=0; j<srcCPCount; ++j) {
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
if(q<n) {
++delta;
} else if(q==n) {
/* Represent delta as a generalized variable-length integer: */
for(q=delta, k=BASE; /* no condition */; k+=BASE) {
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
t=k-bias;
if(t<TMIN) {
t=TMIN;
} else if(t>TMAX) {
t=TMAX;
}
*/
t=k-bias;
if(t<TMIN) {
t=TMIN;
} else if(k>=(bias+TMAX)) {
t=TMAX;
}
if(q<t) {
break;
}
if(destLength<destCapacity) {
dest[destLength++]=digitToBasic(t+(q-t)%(BASE-t), 0);
}
q=(q-t)/(BASE-t);
}
if(destLength<destCapacity) {
dest[destLength++]=digitToBasic(q, (UBool)(cpBuffer[j]<0));
}
bias=adaptBias(delta, handledCPCount+1, (UBool)(handledCPCount==basicLength));
delta=0;
++handledCPCount;
}
}
++delta;
++n;
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
}
U_CFUNC int32_t
u_strFromPunycode(const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity,
UBool *caseFlags,
UErrorCode *pErrorCode) {
int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
destCPCount, firstSupplementaryIndex, cpLength;
UChar b;
/* argument checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(srcLength==-1) {
srcLength=u_strlen(src);
}
/*
* Handle the basic code points:
* Let basicLength be the number of input code points
* before the last delimiter, or 0 if there is none,
* then copy the first basicLength code points to the output.
*
* The two following loops iterate backward.
*/
for(j=srcLength; j>0;) {
if(src[--j]==DELIMITER) {
break;
}
}
destLength=basicLength=destCPCount=j;
while(j>0) {
b=src[--j];
if(!IS_BASIC(b)) {
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
if(j<destCapacity) {
dest[j]=(UChar)b;
if(caseFlags!=NULL) {
caseFlags[j]=IS_BASIC_UPPERCASE(b);
}
}
}
/* Initialize the state: */
n=INITIAL_N;
i=0;
bias=INITIAL_BIAS;
firstSupplementaryIndex=1000000000;
/*
* Main decoding loop:
* Start just after the last delimiter if any
* basic code points were copied; start at the beginning otherwise.
*/
for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
/*
* in is the index of the next character to be consumed, and
* destCPCount is the number of code points in the output array.
*
* Decode a generalized variable-length integer into delta,
* which gets added to i. The overflow checking is easier
* if we increase i as we go, then subtract off its starting
* value at the end to obtain delta.
*/
for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
if(in>=srcLength) {
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
return 0;
}
digit=basicToDigit[(uint8_t)src[in++]];
if(digit<0) {
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
if(digit>(0x7fffffff-i)/w) {
/* integer overflow */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
return 0;
}
i+=digit*w;
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
t=k-bias;
if(t<TMIN) {
t=TMIN;
} else if(t>TMAX) {
t=TMAX;
}
*/
t=k-bias;
if(t<TMIN) {
t=TMIN;
} else if(k>=(bias+TMAX)) {
t=TMAX;
}
if(digit<t) {
break;
}
if(w>0x7fffffff/(BASE-t)) {
/* integer overflow */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
return 0;
}
w*=BASE-t;
}
/*
* Modification from sample code:
* Increments destCPCount here,
* where needed instead of in for() loop tail.
*/
++destCPCount;
bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0));
/*
* i was supposed to wrap around from (incremented) destCPCount to 0,
* incrementing n each time, so we'll fix that now:
*/
if(i/destCPCount>(0x7fffffff-n)) {
/* integer overflow */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
return 0;
}
n+=i/destCPCount;
i%=destCPCount;
/* not needed for Punycode: */
/* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
if(n>0x10ffff || UTF_IS_SURROGATE(n)) {
/* Unicode code point overflow */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
return 0;
}
/* Insert n at position i of the output: */
cpLength=UTF_CHAR_LENGTH(n);
if((destLength+cpLength)<destCapacity) {
int32_t codeUnitIndex;
/*
* Handle indexes when supplementary code points are present.
*
* In almost all cases, there will be only BMP code points before i
* and even in the entire string.
* This is handled with the same efficiency as with UTF-32.
*
* Only the rare cases with supplementary code points are handled
* more slowly - but not too bad since this is an insertion anyway.
*/
if(i<=firstSupplementaryIndex) {
codeUnitIndex=i;
if(cpLength>1) {
firstSupplementaryIndex=codeUnitIndex;
} else {
++firstSupplementaryIndex;
}
} else {
codeUnitIndex=firstSupplementaryIndex;
UTF_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex);
}
/* use the UChar index codeUnitIndex instead of the code point index i */
if(codeUnitIndex<destLength) {
uprv_memmove(dest+codeUnitIndex+cpLength,
dest+codeUnitIndex,
(destLength-codeUnitIndex)*U_SIZEOF_UCHAR);
if(caseFlags!=NULL) {
uprv_memmove(caseFlags+codeUnitIndex+cpLength,
caseFlags+codeUnitIndex,
destLength-codeUnitIndex);
}
}
if(cpLength==1) {
/* BMP, insert one code unit */
dest[codeUnitIndex]=(UChar)n;
} else {
/* supplementary character, insert two code units */
dest[codeUnitIndex]=UTF16_LEAD(n);
dest[codeUnitIndex+1]=UTF16_TRAIL(n);
}
if(caseFlags!=NULL) {
/* Case of last character determines uppercase flag: */
caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]);
if(cpLength==2) {
caseFlags[codeUnitIndex+1]=FALSE;
}
}
}
destLength+=cpLength;
++i;
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
}
/* ### check notes on overflow handling - only necessary if not IDNA? are these Punycode functions to be public? */
#endif /* #if !UCONFIG_NO_IDNA */

118
source/common/punycode.h Normal file
View file

@ -0,0 +1,118 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: punycode.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002jan31
* created by: Markus W. Scherer
*/
/* This ICU code derived from: */
/*
punycode.c 0.4.0 (2001-Nov-17-Sat)
http://www.cs.berkeley.edu/~amc/idn/
Adam M. Costello
http://www.nicemice.net/amc/
*/
#ifndef __PUNYCODE_H__
#define __PUNYCODE_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
/**
* u_strToPunycode() converts Unicode to Punycode.
*
* The input string must not contain single, unpaired surrogates.
* The output will be represented as an array of ASCII code points.
*
* The output string is NUL-terminated according to normal ICU
* string output rules.
*
* @param src Input Unicode string.
* This function handles a limited amount of code points
* (the limit is >=64).
* U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded.
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
* @param dest Output Punycode array.
* @param destCapacity Size of dest.
* @param caseFlags Vector of boolean values, one per input UChar,
* indicating that the corresponding character is to be
* marked for the decoder optionally
* uppercasing (TRUE) or lowercasing (FALSE)
* the character.
* ASCII characters are output directly in the case as marked.
* Flags corresponding to trail surrogates are ignored.
* If caseFlags==NULL then input characters are not
* case-mapped.
* @param pErrorCode ICU in/out error code parameter.
* U_INVALID_CHAR_FOUND if src contains
* unmatched single surrogates.
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
* too many code points.
* @return Number of ASCII characters in puny.
*
* @see u_strFromPunycode
*/
U_CFUNC int32_t
u_strToPunycode(const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity,
const UBool *caseFlags,
UErrorCode *pErrorCode);
/**
* u_strFromPunycode() converts Punycode to Unicode.
* The Unicode string will be at most as long (in UChars)
* than the Punycode string (in chars).
*
* @param src Input Punycode string.
* @param srcLength Length of puny, or -1 if NUL-terminated
* @param dest Output Unicode string buffer.
* @param destCapacity Size of dest in number of UChars,
* and of caseFlags in numbers of UBools.
* @param caseFlags Output array for case flags as
* defined by the Punycode string.
* The caller should uppercase (TRUE) or lowercase (FASLE)
* the corresponding character in dest.
* For supplementary characters, only the lead surrogate
* is marked, and FALSE is stored for the trail surrogate.
* This is redundant and not necessary for ASCII characters
* because they are already in the case indicated.
* Can be NULL if the case flags are not needed.
* @param pErrorCode ICU in/out error code parameter.
* U_INVALID_CHAR_FOUND if a non-ASCII character
* precedes the last delimiter ('-'),
* or if an invalid character (not a-zA-Z0-9) is found
* after the last delimiter.
* U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed.
* @return Number of UChars written to dest.
*
* @see u_strToPunycode
*/
U_CFUNC int32_t
u_strFromPunycode(const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity,
UBool *caseFlags,
UErrorCode *pErrorCode);
#endif /* #if !UCONFIG_NO_IDNA */
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

1949
source/common/putil.c Normal file

File diff suppressed because it is too large Load diff

277
source/common/putilimp.h Normal file
View file

@ -0,0 +1,277 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : putilimp.h
*
* Date Name Description
* 10/17/04 grhoten Move internal functions from putil.h to this file.
******************************************************************************
*/
#ifndef PUTILIMP_H
#define PUTILIMP_H
#include "unicode/utypes.h"
#include "unicode/putil.h"
/*==========================================================================*/
/* Platform utilities */
/*==========================================================================*/
/**
* Platform utilities isolates the platform dependencies of the
* libarary. For each platform which this code is ported to, these
* functions may have to be re-implemented.
*/
/**
* Floating point utility to determine if a double is Not a Number (NaN).
* @internal
*/
U_INTERNAL UBool U_EXPORT2 uprv_isNaN(double d);
/**
* Floating point utility to determine if a double has an infinite value.
* @internal
*/
U_INTERNAL UBool U_EXPORT2 uprv_isInfinite(double d);
/**
* Floating point utility to determine if a double has a positive infinite value.
* @internal
*/
U_INTERNAL UBool U_EXPORT2 uprv_isPositiveInfinity(double d);
/**
* Floating point utility to determine if a double has a negative infinite value.
* @internal
*/
U_INTERNAL UBool U_EXPORT2 uprv_isNegativeInfinity(double d);
/**
* Floating point utility that returns a Not a Number (NaN) value.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_getNaN(void);
/**
* Floating point utility that returns an infinite value.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_getInfinity(void);
/**
* Floating point utility to truncate a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_trunc(double d);
/**
* Floating point utility to calculate the floor of a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_floor(double d);
/**
* Floating point utility to calculate the ceiling of a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_ceil(double d);
/**
* Floating point utility to calculate the absolute value of a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_fabs(double d);
/**
* Floating point utility to calculate the fractional and integer parts of a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_modf(double d, double* pinteger);
/**
* Floating point utility to calculate the remainder of a double divided by another double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_fmod(double d, double y);
/**
* Floating point utility to calculate d to the power of exponent (d^exponent).
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_pow(double d, double exponent);
/**
* Floating point utility to calculate 10 to the power of exponent (10^exponent).
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_pow10(int32_t exponent);
/**
* Floating point utility to calculate the maximum value of two doubles.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_fmax(double d, double y);
/**
* Floating point utility to calculate the minimum value of two doubles.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_fmin(double d, double y);
/**
* Private utility to calculate the maximum value of two integers.
* @internal
*/
U_INTERNAL int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y);
/**
* Private utility to calculate the minimum value of two integers.
* @internal
*/
U_INTERNAL int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y);
#if U_IS_BIG_ENDIAN
# define uprv_isNegative(number) (*((signed char *)&(number))<0)
#else
# define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
#endif
/**
* Return the largest positive number that can be represented by an integer
* type of arbitrary bit length.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_maxMantissa(void);
/**
* Floating point utility to calculate the logarithm of a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_log(double d);
/**
* Does common notion of rounding e.g. uprv_floor(x + 0.5);
* @param x the double number
* @return the rounded double
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_round(double x);
#if 0
/**
* Returns the number of digits after the decimal point in a double number x.
*
* @param x the double number
* @return the number of digits after the decimal point in a double number x.
* @internal
*/
/*U_INTERNAL int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x);*/
#endif
/**
* Time zone utilities
*
* Wrappers for C runtime library functions relating to timezones.
* The t_tzset() function (similar to tzset) uses the current setting
* of the environment variable TZ to assign values to three global
* variables: daylight, timezone, and tzname. These variables have the
* following meanings, and are declared in &lt;time.h&gt;.
*
* daylight Nonzero if daylight-saving-time zone (DST) is specified
* in TZ; otherwise, 0. Default value is 1.
* timezone Difference in seconds between coordinated universal
* time and local time. E.g., -28,800 for PST (GMT-8hrs)
* tzname(0) Three-letter time-zone name derived from TZ environment
* variable. E.g., "PST".
* tzname(1) Three-letter DST zone name derived from TZ environment
* variable. E.g., "PDT". If DST zone is omitted from TZ,
* tzname(1) is an empty string.
*
* Notes: For example, to set the TZ environment variable to correspond
* to the current time zone in Germany, you can use one of the
* following statements:
*
* set TZ=GST1GDT
* set TZ=GST+1GDT
*
* If the TZ value is not set, t_tzset() attempts to use the time zone
* information specified by the operating system. Under Windows NT
* and Windows 95, this information is specified in the Control Panel's
* Date/Time application.
* @internal
*/
U_INTERNAL void U_EXPORT2 uprv_tzset(void);
/**
* Difference in seconds between coordinated universal
* time and local time. E.g., -28,800 for PST (GMT-8hrs)
* @return the difference in seconds between coordinated universal time and local time.
* @internal
*/
U_INTERNAL int32_t U_EXPORT2 uprv_timezone(void);
/**
* tzname(0) Three-letter time-zone name derived from TZ environment
* variable. E.g., "PST".
* tzname(1) Three-letter DST zone name derived from TZ environment
* variable. E.g., "PDT". If DST zone is omitted from TZ,
* tzname(1) is an empty string.
* @internal
*/
U_INTERNAL const char* U_EXPORT2 uprv_tzname(int n);
/**
* Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
* @return the UTC time measured in milliseconds
* @internal
*/
U_INTERNAL UDate U_EXPORT2 uprv_getUTCtime(void);
/**
* Determine whether a pathname is absolute or not, as defined by the platform.
* @param path Pathname to test
* @return TRUE if the path is absolute
* @internal (ICU 3.0)
*/
U_INTERNAL UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path);
/**
* Use U_MAX_PTR instead of this function.
* @param void pointer to test
* @return the largest possible pointer greater than the base
* @internal (ICU 3.8)
*/
U_INTERNAL void * U_EXPORT2 uprv_maximumPtr(void *base);
/**
* Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer.
* In fact, buffer sizes must not exceed 2GB so that the difference between
* the buffer limit and the buffer start can be expressed in an int32_t.
*
* The definition of U_MAX_PTR must fulfill the following conditions:
* - return the largest possible pointer greater than base
* - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
* - avoid wrapping around at high addresses
* - make sure that the returned pointer is not farther from base than 0x7fffffff
*
* @param base The beginning of a buffer to find the maximum offset from
* @internal
*/
#ifndef U_MAX_PTR
# if defined(OS390) && !defined(_LP64)
/* We have 31-bit pointers. */
# define U_MAX_PTR(base) ((void *)0x7fffffff)
# elif defined(OS400)
# define U_MAX_PTR(base) uprv_maximumPtr((void *)base)
# elif defined(__GNUC__) && __GNUC__ >= 4
/*
* Due to a compiler optimization bug, gcc 4 causes test failures when doing
* this math arithmetic on pointers on some platforms. It seems like the
* pointers are considered signed instead of unsigned. The uintptr_t type
* isn't available on all platforms (i.e MSVC 6) and pointers aren't always
* a scalar value (i.e. i5/OS see uprv_maximumPtr function).
*/
# define U_MAX_PTR(base) \
((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \
? ((uintptr_t)(base)+0x7fffffffu) \
: (uintptr_t)-1))
# else
# define U_MAX_PTR(base) \
((char *)(((char *)(base)+0x7fffffffu) > (char *)(base) \
? ((char *)(base)+0x7fffffffu) \
: (char *)-1))
# endif
#endif
#endif

1855
source/common/rbbi.cpp Normal file

File diff suppressed because it is too large Load diff

453
source/common/rbbicst.pl Executable file
View file

@ -0,0 +1,453 @@
#**************************************************************************
# Copyright (C) 2002-2005 International Business Machines Corporation *
# and others. All rights reserved. *
#**************************************************************************
#
# rbbicst Compile the RBBI rule paser state table data into initialized C data.
# Usage:
# cd icu/source/common
# perl rbbicst.pl < rbbirpt.txt > rbbirpt.h
# perl rbbicst.pl -j < rbbirpt.txt > RBBIRuleParseTable.java
#
# The output file, rbbrpt.h, is included by some of the .cpp rbbi
# implementation files. This perl script is NOT run as part
# of a normal ICU build. It is run by hand when needed, and the
# rbbirpt.h generated file is put back into cvs.
#
# See rbbirpt.txt for a description of the input format for this script.
#
if ($ARGV[0] eq "-j") {
$javaOutput = 1;
shift @ARGV;
}
$num_states = 1; # Always the state number for the line being compiled.
$line_num = 0; # The line number in the input file.
$states{"pop"} = 255; # Add the "pop" to the list of defined state names.
# This prevents any state from being labelled with "pop",
# and resolves references to "pop" in the next state field.
line_loop: while (<>) {
chomp();
$line = $_;
@fields = split();
$line_num++;
# Remove # comments, which are any fields beginning with a #, plus all
# that follow on the line.
for ($i=0; $i<@fields; $i++) {
if ($fields[$i] =~ /^#/) {
@fields = @fields[0 .. $i-1];
last;
}
}
# ignore blank lines, and those with no fields left after stripping comments..
if (@fields == 0) {
next;
}
#
# State Label: handling.
# Does the first token end with a ":"? If so, it's the name of a state.
# Put in a hash, together with the current state number,
# so that we can later look up the number from the name.
#
if (@fields[0] =~ /.*:$/) {
$state_name = @fields[0];
$state_name =~ s/://; # strip off the colon from the state name.
if ($states{$state_name} != 0) {
print " rbbicst: at line $line-num duplicate definition of state $state_name\n";
}
$states{$state_name} = $num_states;
$stateNames[$num_states] = $state_name;
# if the label was the only thing on this line, go on to the next line,
# otherwise assume that a state definition is on the same line and fall through.
if (@fields == 1) {
next line_loop;
}
shift @fields; # shift off label field in preparation
# for handling the rest of the line.
}
#
# State Transition line.
# syntax is this,
# character [n] target-state [^push-state] [function-name]
# where
# [something] is an optional something
# character is either a single quoted character e.g. '['
# or a name of a character class, e.g. white_space
#
$state_line_num[$num_states] = $line_num; # remember line number with each state
# so we can make better error messages later.
#
# First field, character class or literal character for this transition.
#
if ($fields[0] =~ /^'.'$/) {
# We've got a quoted literal character.
$state_literal_chars[$num_states] = $fields[0];
$state_literal_chars[$num_states] =~ s/'//g;
} else {
# We've got the name of a character class.
$state_char_class[$num_states] = $fields[0];
if ($fields[0] =~ /[\W]/) {
print " rbbicsts: at line $line_num, bad character literal or character class name.\n";
print " scanning $fields[0]\n";
exit(-1);
}
}
shift @fields;
#
# do the 'n' flag
#
$state_flag[$num_states] = $javaOutput? "false" : "FALSE";
if ($fields[0] eq "n") {
$state_flag[$num_states] = $javaOutput? "true": "TRUE";
shift @fields;
}
#
# do the destination state.
#
$state_dest_state[$num_states] = $fields[0];
if ($fields[0] eq "") {
print " rbbicsts: at line $line_num, destination state missing.\n";
exit(-1);
}
shift @fields;
#
# do the push state, if present.
#
if ($fields[0] =~ /^\^/) {
$fields[0] =~ s/^\^//;
$state_push_state[$num_states] = $fields[0];
if ($fields[0] eq "" ) {
print " rbbicsts: at line $line_num, expected state after ^ (no spaces).\n";
exit(-1);
}
shift @fields;
}
#
# Lastly, do the optional action name.
#
if ($fields[0] ne "") {
$state_func_name[$num_states] = $fields[0];
shift @fields;
}
#
# There should be no fields left on the line at this point.
#
if (@fields > 0) {
print " rbbicsts: at line $line_num, unexpected extra stuff on input line.\n";
print " scanning $fields[0]\n";
}
$num_states++;
}
#
# We've read in the whole file, now go back and output the
# C source code for the state transition table.
#
# We read all states first, before writing anything, so that the state numbers
# for the destination states are all available to be written.
#
#
# Make hashes for the names of the character classes and
# for the names of the actions that appeared.
#
for ($state=1; $state < $num_states; $state++) {
if ($state_char_class[$state] ne "") {
if ($charClasses{$state_char_class[$state]} == 0) {
$charClasses{$state_char_class[$state]} = 1;
}
}
if ($state_func_name[$state] eq "") {
$state_func_name[$state] = "doNOP";
}
if ($actions{$state_action_name[$state]} == 0) {
$actions{$state_func_name[$state]} = 1;
}
}
#
# Check that all of the destination states have been defined
#
#
$states{"exit"} = 0; # Predefined state name, terminates state machine.
for ($state=1; $state<$num_states; $state++) {
if ($states{$state_dest_state[$state]} == 0 && $state_dest_state[$state] ne "exit") {
print "Error at line $state_line_num[$state]: target state \"$state_dest_state[$state]\" is not defined.\n";
$errors++;
}
if ($state_push_state[$state] ne "" && $states{$state_push_state[$state]} == 0) {
print "Error at line $state_line_num[$state]: target state \"$state_push_state[$state]\" is not defined.\n";
$errors++;
}
}
die if ($errors>0);
#
# Assign numbers to each of the character classes classes used.
# Sets are numbered from 128 - 250
# The values 0-127 in the state table are used for matching
# individual ASCII characters (the only thing that can appear in the rules.)
# The "set" names appearing in the code below (default, etc.) need special
# handling because they do not correspond to a normal set of characters,
# but trigger special handling by code in the state machine.
#
$i = 128;
foreach $setName (sort keys %charClasses) {
if ($setName eq "default") {
$charClasses{$setName} = 255;}
elsif ($setName eq "escaped") {
$charClasses{$setName} = 254;}
elsif ($setName eq "escapedP") {
$charClasses{$setName} = 253;}
elsif ($setName eq "eof") {
$charClasses{$setName} = 252;}
else {
# Normal (single) character class. Number them.
$charClasses{$setName} = $i;
$i++;
}
}
my ($sec, $min, $hour, , $day, $mon, $year, $wday, $yday, $isdst) = localtime;
$year += 1900;
if ($javaOutput) {
print "/*\n";
print " *******************************************************************************\n";
print " * Copyright (C) 2003-$year,\n";
print " * International Business Machines Corporation and others. All Rights Reserved.\n";
print " *******************************************************************************\n";
print " */\n";
print " \n";
print "package com.ibm.icu.text;\n";
print " \n";
print "/**\n";
print " * Generated Java File. Do not edit by hand.\n";
print " * This file contains the state table for the ICU Rule Based Break Iterator\n";
print " * rule parser.\n";
print " * It is generated by the Perl script \"rbbicst.pl\" from\n";
print " * the rule parser state definitions file \"rbbirpt.txt\".\n";
print " * \@internal \n";
print " *\n";
print " */\n";
print "class RBBIRuleParseTable\n";
print "{\n";
#
# Emit the constants for the actions to be performed.
#
$n = 1;
foreach $act (sort keys %actions) {
print " static final short $act = $n;\n";
$n++;
}
print " \n";
#
# Emit constants for char class names
#
foreach $setName (sort keys %charClasses) {
print " static final short kRuleSet_$setName = $charClasses{$setName};\n";
}
print "\n\n";
print " static class RBBIRuleTableElement { \n";
print " short fAction; \n";
print " short fCharClass; \n";
print " short fNextState; \n";
print " short fPushState; \n";
print " boolean fNextChar; \n";
print " String fStateName; \n";
print " RBBIRuleTableElement(short a, int cc, int ns, int ps, boolean nc, String sn) { \n";
print " fAction = a; \n";
print " fCharClass = (short)cc; \n";
print " fNextState = (short)ns; \n";
print " fPushState = (short)ps; \n";
print " fNextChar = nc; \n";
print " fStateName = sn; \n";
print " } \n";
print " }; \n";
print " \n";
print " static RBBIRuleTableElement[] gRuleParseStateTable = { \n ";
print " new RBBIRuleTableElement(doNOP, 0, 0,0, true, null ) // 0 \n"; #output the unused state 0.
for ($state=1; $state < $num_states; $state++) {
print " , new RBBIRuleTableElement($state_func_name[$state],";
if ($state_literal_chars[$state] ne "") {
$c = $state_literal_chars[$state];
print("'$c', ");
}else {
print " $charClasses{$state_char_class[$state]},";
}
print " $states{$state_dest_state[$state]},";
# The push-state field is optional. If omitted, fill field with a zero, which flags
# the state machine that there is no push state.
if ($state_push_state[$state] eq "") {
print "0, ";
} else {
print " $states{$state_push_state[$state]},";
}
print " $state_flag[$state], ";
# if this is the first row of the table for this state, put out the state name.
if ($stateNames[$state] ne "") {
print " \"$stateNames[$state]\") ";
} else {
print " null ) ";
}
# Put out a comment showing the number (index) of this state row,
print " // $state ";
print "\n";
}
print " };\n";
print "}; \n";
}
else
{
#
# C++ Output ...
#
print "//---------------------------------------------------------------------------------\n";
print "//\n";
print "// Generated Header File. Do not edit by hand.\n";
print "// This file contains the state table for the ICU Rule Based Break Iterator\n";
print "// rule parser.\n";
print "// It is generated by the Perl script \"rbbicst.pl\" from\n";
print "// the rule parser state definitions file \"rbbirpt.txt\".\n";
print "//\n";
print "// Copyright (C) 2002-$year International Business Machines Corporation \n";
print "// and others. All rights reserved. \n";
print "//\n";
print "//---------------------------------------------------------------------------------\n";
print "#ifndef RBBIRPT_H\n";
print "#define RBBIRPT_H\n";
print "\n";
print "U_NAMESPACE_BEGIN\n";
#
# Emit the constants for indicies of Unicode Sets
# Define one constant for each of the character classes encountered.
# At the same time, store the index corresponding to the set name back into hash.
#
print "//\n";
print "// Character classes for RBBI rule scanning.\n";
print "//\n";
foreach $setName (sort keys %charClasses) {
if ($charClasses{$setName} < 250) {
# Normal character class.
print " static const uint8_t kRuleSet_$setName = $charClasses{$setName};\n";
}
}
print "\n\n";
#
# Emit the enum for the actions to be performed.
#
print "enum RBBI_RuleParseAction {\n";
foreach $act (sort keys %actions) {
print " $act,\n";
}
print " rbbiLastAction};\n\n";
#
# Emit the struct definition for transtion table elements.
#
print "//-------------------------------------------------------------------------------\n";
print "//\n";
print "// RBBIRuleTableEl represents the structure of a row in the transition table\n";
print "// for the rule parser state machine.\n";
print "//-------------------------------------------------------------------------------\n";
print "struct RBBIRuleTableEl {\n";
print " RBBI_RuleParseAction fAction;\n";
print " uint8_t fCharClass; // 0-127: an individual ASCII character\n";
print " // 128-255: character class index\n";
print " uint8_t fNextState; // 0-250: normal next-stat numbers\n";
print " // 255: pop next-state from stack.\n";
print " uint8_t fPushState;\n";
print " UBool fNextChar;\n";
print "};\n\n";
#
# emit the state transition table
#
print "static const struct RBBIRuleTableEl gRuleParseStateTable[] = {\n";
print " {doNOP, 0, 0, 0, TRUE}\n"; # State 0 is a dummy. Real states start with index = 1.
for ($state=1; $state < $num_states; $state++) {
print " , {$state_func_name[$state],";
if ($state_literal_chars[$state] ne "") {
$c = $state_literal_chars[$state];
printf(" %d /* $c */,", ord($c)); # use numeric value, so EBCDIC machines are ok.
}else {
print " $charClasses{$state_char_class[$state]},";
}
print " $states{$state_dest_state[$state]},";
# The push-state field is optional. If omitted, fill field with a zero, which flags
# the state machine that there is no push state.
if ($state_push_state[$state] eq "") {
print "0, ";
} else {
print " $states{$state_push_state[$state]},";
}
print " $state_flag[$state]} ";
# Put out a C++ comment showing the number (index) of this state row,
# and, if this is the first row of the table for this state, the state name.
print " // $state ";
if ($stateNames[$state] ne "") {
print " $stateNames[$state]";
}
print "\n";
};
print " };\n";
#
# emit a mapping array from state numbers to state names.
#
# This array is used for producing debugging output from the rule parser.
#
print "#ifdef RBBI_DEBUG\n";
print "static const char * const RBBIRuleStateNames[] = {";
for ($state=0; $state<$num_states; $state++) {
if ($stateNames[$state] ne "") {
print " \"$stateNames[$state]\",\n";
} else {
print " 0,\n";
}
}
print " 0};\n";
print "#endif\n\n";
print "U_NAMESPACE_END\n";
print "#endif\n";
}

450
source/common/rbbidata.cpp Normal file
View file

@ -0,0 +1,450 @@
/*
***************************************************************************
* Copyright (C) 1999-2008 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/utypes.h"
#include "rbbidata.h"
#include "rbbirb.h"
#include "utrie.h"
#include "udatamem.h"
#include "cmemory.h"
#include "cstring.h"
#include "umutex.h"
#include "uassert.h"
//-----------------------------------------------------------------------------------
//
// Trie access folding function. Copied as-is from properties code in uchar.c
//
//-----------------------------------------------------------------------------------
U_CDECL_BEGIN
static int32_t U_CALLCONV
getFoldingOffset(uint32_t data) {
/* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */
if(data&0x8000) {
return (int32_t)(data&0x7fff);
} else {
return 0;
}
}
U_CDECL_END
U_NAMESPACE_BEGIN
//-----------------------------------------------------------------------------
//
// Constructors.
//
//-----------------------------------------------------------------------------
RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status) {
init(data, status);
}
RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt, UErrorCode &status) {
init(data, status);
fDontFreeData = TRUE;
}
RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
const RBBIDataHeader *d = (const RBBIDataHeader *)
// ((char *)&(udm->pHeader->info) + udm->pHeader->info.size);
// taking into consideration the padding added in by udata_write
((char *)(udm->pHeader) + udm->pHeader->dataHeader.headerSize);
init(d, status);
fUDataMem = udm;
}
//-----------------------------------------------------------------------------
//
// init(). Does most of the work of construction, shared between the
// constructors.
//
//-----------------------------------------------------------------------------
void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
fHeader = data;
if (fHeader->fMagic != 0xb1a0 ||
!(fHeader->fFormatVersion[0] == 3 || // ICU 3.4
*(int32_t *)fHeader->fFormatVersion == 1)) // ICU 3.2 and earlier.
{
status = U_INVALID_FORMAT_ERROR;
return;
}
fDontFreeData = FALSE;
fUDataMem = NULL;
fReverseTable = NULL;
fSafeFwdTable = NULL;
fSafeRevTable = NULL;
if (data->fFTableLen != 0) {
fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable);
}
if (data->fRTableLen != 0) {
fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
}
if (data->fSFTableLen != 0) {
fSafeFwdTable = (RBBIStateTable *)((char *)data + fHeader->fSFTable);
}
if (data->fSRTableLen != 0) {
fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
}
utrie_unserialize(&fTrie,
(uint8_t *)data + fHeader->fTrie,
fHeader->fTrieLen,
&status);
if (U_FAILURE(status)) {
return;
}
fTrie.getFoldingOffset=getFoldingOffset;
fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource);
fRuleString.setTo(TRUE, fRuleSource, -1);
U_ASSERT(data->fRuleSourceLen > 0);
fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable);
fStatusMaxIdx = data->fStatusTableLen / sizeof(int32_t);
fRefCount = 1;
#ifdef RBBI_DEBUG
char *debugEnv = getenv("U_RBBIDEBUG");
if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();}
#endif
}
//-----------------------------------------------------------------------------
//
// Destructor. Don't call this - use removeReference() instead.
//
//-----------------------------------------------------------------------------
RBBIDataWrapper::~RBBIDataWrapper() {
U_ASSERT(fRefCount == 0);
if (fUDataMem) {
udata_close(fUDataMem);
} else if (!fDontFreeData) {
uprv_free((void *)fHeader);
}
}
//-----------------------------------------------------------------------------
//
// Operator == Consider two RBBIDataWrappers to be equal if they
// refer to the same underlying data. Although
// the data wrappers are normally shared between
// iterator instances, it's possible to independently
// open the same data twice, and get two instances, which
// should still be ==.
//
//-----------------------------------------------------------------------------
UBool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const {
if (fHeader == other.fHeader) {
return TRUE;
}
if (fHeader->fLength != other.fHeader->fLength) {
return FALSE;
}
if (uprv_memcmp(fHeader, other.fHeader, fHeader->fLength) == 0) {
return TRUE;
}
return FALSE;
}
int32_t RBBIDataWrapper::hashCode() {
return fHeader->fFTableLen;
}
//-----------------------------------------------------------------------------
//
// Reference Counting. A single RBBIDataWrapper object is shared among
// however many RulesBasedBreakIterator instances are
// referencing the same data.
//
//-----------------------------------------------------------------------------
void RBBIDataWrapper::removeReference() {
if (umtx_atomic_dec(&fRefCount) == 0) {
delete this;
}
}
RBBIDataWrapper *RBBIDataWrapper::addReference() {
umtx_atomic_inc(&fRefCount);
return this;
}
//-----------------------------------------------------------------------------
//
// getRuleSourceString
//
//-----------------------------------------------------------------------------
const UnicodeString &RBBIDataWrapper::getRuleSourceString() const {
return fRuleString;
}
//-----------------------------------------------------------------------------
//
// print - debugging function to dump the runtime data tables.
//
//-----------------------------------------------------------------------------
#ifdef RBBI_DEBUG
void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *table) {
uint32_t c;
uint32_t s;
RBBIDebugPrintf(" %s\n", heading);
RBBIDebugPrintf("State | Acc LA TagIx");
for (c=0; c<fHeader->fCatCount; c++) {RBBIDebugPrintf("%3d ", c);}
RBBIDebugPrintf("\n------|---------------"); for (c=0;c<fHeader->fCatCount; c++) {
RBBIDebugPrintf("----");
}
RBBIDebugPrintf("\n");
if (table == NULL) {
RBBIDebugPrintf(" N U L L T A B L E\n\n");
return;
}
for (s=0; s<table->fNumStates; s++) {
RBBIStateTableRow *row = (RBBIStateTableRow *)
(table->fTableData + (table->fRowLen * s));
RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->fAccepting, row->fLookAhead, row->fTagIdx);
for (c=0; c<fHeader->fCatCount; c++) {
RBBIDebugPrintf("%3d ", row->fNextState[c]);
}
RBBIDebugPrintf("\n");
}
RBBIDebugPrintf("\n");
}
#endif
#ifdef RBBI_DEBUG
void RBBIDataWrapper::printData() {
RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
RBBIDebugPrintf(" total length of data = %d\n", fHeader->fLength);
RBBIDebugPrintf(" number of character categories = %d\n\n", fHeader->fCatCount);
printTable("Forward State Transition Table", fForwardTable);
printTable("Reverse State Transition Table", fReverseTable);
printTable("Safe Forward State Transition Table", fSafeFwdTable);
printTable("Safe Reverse State Transition Table", fSafeRevTable);
RBBIDebugPrintf("\nOrignal Rules source:\n");
for (int32_t c=0; fRuleSource[c] != 0; c++) {
RBBIDebugPrintf("%c", fRuleSource[c]);
}
RBBIDebugPrintf("\n\n");
}
#endif
U_NAMESPACE_END
U_NAMESPACE_USE
//-----------------------------------------------------------------------------
//
// ubrk_swap - byte swap and char encoding swap of RBBI data
//
//-----------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2
ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
UErrorCode *status) {
if (status == NULL || U_FAILURE(*status)) {
return 0;
}
if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
//
// Check that the data header is for for break data.
// (Header contents are defined in genbrk.cpp)
//
const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4);
if(!( pInfo->dataFormat[0]==0x42 && /* dataFormat="Brk " */
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6b &&
pInfo->dataFormat[3]==0x20 &&
pInfo->formatVersion[0]==3 )) {
udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
pInfo->formatVersion[0]);
*status=U_UNSUPPORTED_ERROR;
return 0;
}
//
// Swap the data header. (This is the generic ICU Data Header, not the RBBI Specific
// RBBIDataHeader). This swap also conveniently gets us
// the size of the ICU d.h., which lets us locate the start
// of the RBBI specific data.
//
int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
//
// Get the RRBI Data Header, and check that it appears to be OK.
//
// Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
// an int32_t with a value of 1. Starting with ICU 3.4,
// RBBI's fDataFormat matches the dataFormat field from the
// UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
//
const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
UBool formatVersionOne = ds->readUInt32(*(int32_t *)rbbiDH->fFormatVersion) == 1;
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
!(formatVersionOne || rbbiDH->fFormatVersion[0] == 3) ||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader))
{
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
*status=U_UNSUPPORTED_ERROR;
return 0;
}
//
// Prefight operation? Just return the size
//
int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength);
int32_t totalSize = headerSize + breakDataLength;
if (length < 0) {
return totalSize;
}
//
// Check that length passed in is consistent with length from RBBI data header.
//
if (length < totalSize) {
udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n",
breakDataLength);
*status=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
//
// Swap the Data. Do the data itself first, then the RBBI Data Header, because
// we need to reference the header to locate the data, and an
// inplace swap of the header leaves it unusable.
//
uint8_t *outBytes = (uint8_t *)outData + headerSize;
RBBIDataHeader *outputDH = (RBBIDataHeader *)outBytes;
int32_t tableStartOffset;
int32_t tableLength;
//
// If not swapping in place, zero out the output buffer before starting.
// Individual tables and other data items within are aligned to 8 byte boundaries
// when originally created. Any unused space between items needs to be zero.
//
if (inBytes != outBytes) {
uprv_memset(outBytes, 0, breakDataLength);
}
//
// Each state table begins with several 32 bit fields. Calculate the size
// in bytes of these.
//
int32_t topSize = offsetof(RBBIStateTable, fTableData);
// Forward state table.
tableStartOffset = ds->readUInt32(rbbiDH->fFTable);
tableLength = ds->readUInt32(rbbiDH->fFTableLen);
if (tableLength > 0) {
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
}
// Reverse state table. Same layout as forward table, above.
tableStartOffset = ds->readUInt32(rbbiDH->fRTable);
tableLength = ds->readUInt32(rbbiDH->fRTableLen);
if (tableLength > 0) {
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
}
// Safe Forward state table. Same layout as forward table, above.
tableStartOffset = ds->readUInt32(rbbiDH->fSFTable);
tableLength = ds->readUInt32(rbbiDH->fSFTableLen);
if (tableLength > 0) {
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
}
// Safe Reverse state table. Same layout as forward table, above.
tableStartOffset = ds->readUInt32(rbbiDH->fSRTable);
tableLength = ds->readUInt32(rbbiDH->fSRTableLen);
if (tableLength > 0) {
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
}
// Trie table for character categories
utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
outBytes+ds->readUInt32(rbbiDH->fTrie), status);
// Source Rules Text. It's UChar data
ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),
outBytes+ds->readUInt32(rbbiDH->fRuleSource), status);
// Table of rule status values. It's all int_32 values
ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen),
outBytes+ds->readUInt32(rbbiDH->fStatusTable), status);
// And, last, the header.
// For the old version one format, the entire header consists of int32_t values.
// For the newer formats, the fDataFormat field is an array of four bytes.
// Swap the whole thing as int32_t, then, for the newer format, re-swap the one field.
//
ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
if (formatVersionOne == FALSE) {
ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status);
}
return totalSize;
}
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

198
source/common/rbbidata.h Normal file
View file

@ -0,0 +1,198 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2005,2008 International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: rbbidata.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* RBBI data formats Includes
*
* Structs that describes the format of the Binary RBBI data,
* as it is stored in ICU's data file.
*
* RBBIDataWrapper - Instances of this class sit between the
* raw data structs and the RulesBasedBreakIterator objects
* that are created by applications. The wrapper class
* provides reference counting for the underlying data,
* and direct pointers to data that would not otherwise
* be accessible without ugly pointer arithmetic. The
* wrapper does not attempt to provide any higher level
* abstractions for the data itself.
*
* There will be only one instance of RBBIDataWrapper for any
* set of RBBI run time data being shared by instances
* (clones) of RulesBasedBreakIterator.
*/
#ifndef __RBBIDATA_H__
#define __RBBIDATA_H__
#include "unicode/utypes.h"
#include "unicode/udata.h"
#include "udataswp.h"
/**
* Swap RBBI data. See udataswp.h.
* @internal
*/
U_CAPI int32_t U_EXPORT2
ubrk_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
#ifdef XP_CPLUSPLUS
#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "utrie.h"
U_NAMESPACE_BEGIN
/*
* The following structs map exactly onto the raw data from ICU common data file.
*/
struct RBBIDataHeader {
uint32_t fMagic; /* == 0xbla0 */
uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
/* if there is one associated with this data. */
/* (version originates in rbbi, is copied to UDataInfo) */
/* For ICU 3.2 and earlier, this field was */
/* uint32_t fVersion */
/* with a value of 1. */
uint32_t fLength; /* Total length in bytes of this RBBI Data, */
/* including all sections, not just the header. */
uint32_t fCatCount; /* Number of character categories. */
/* */
/* Offsets and sizes of each of the subsections within the RBBI data. */
/* All offsets are bytes from the start of the RBBIDataHeader. */
/* All sizes are in bytes. */
/* */
uint32_t fFTable; /* forward state transition table. */
uint32_t fFTableLen;
uint32_t fRTable; /* Offset to the reverse state transition table. */
uint32_t fRTableLen;
uint32_t fSFTable; /* safe point forward transition table */
uint32_t fSFTableLen;
uint32_t fSRTable; /* safe point reverse transition table */
uint32_t fSRTableLen;
uint32_t fTrie; /* Offset to Trie data for character categories */
uint32_t fTrieLen;
uint32_t fRuleSource; /* Offset to the source for for the break */
uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
uint32_t fStatusTable; /* Offset to the table of rule status values */
uint32_t fStatusTableLen;
uint32_t fReserved[6]; /* Reserved for expansion */
};
struct RBBIStateTableRow {
int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
/* Value 0: not an accepting state. */
/* -1: Unconditional Accepting state. */
/* positive: Look-ahead match has completed. */
/* Actual boundary position happened earlier */
/* Value here == fLookAhead in earlier */
/* state, at actual boundary pos. */
int16_t fLookAhead; /* Non-zero if this row is for a state that */
/* corresponds to a '/' in the rule source. */
/* Value is the same as the fAccepting */
/* value for the rule (which will appear */
/* in a different state. */
int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
/* from a rule. Value is the index in the */
/* StatusTable of the set of matching */
/* tags (rule status values) */
int16_t fReserved;
uint16_t fNextState[2]; /* Next State, indexed by char category. */
/* Array Size is fNumCols from the */
/* state table header. */
/* CAUTION: see RBBITableBuilder::getTableSize() */
/* before changing anything here. */
};
struct RBBIStateTable {
uint32_t fNumStates; /* Number of states. */
uint32_t fRowLen; /* Length of a state table row, in bytes. */
uint32_t fFlags; /* Option Flags for this state table */
uint32_t fReserved; /* reserved */
char fTableData[4]; /* First RBBIStateTableRow begins here. */
/* (making it char[] simplifies ugly address */
/* arithmetic for indexing variable length rows.) */
};
typedef enum {
RBBI_LOOKAHEAD_HARD_BREAK = 1,
RBBI_BOF_REQUIRED = 2
} RBBIStateTableFlags;
/* */
/* The reference counting wrapper class */
/* */
class RBBIDataWrapper : public UMemory {
public:
enum EDontAdopt {
kDontAdopt
};
RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
~RBBIDataWrapper();
void init(const RBBIDataHeader *data, UErrorCode &status);
RBBIDataWrapper *addReference();
void removeReference();
UBool operator ==(const RBBIDataWrapper &other) const;
int32_t hashCode();
const UnicodeString &getRuleSourceString() const;
#ifdef RBBI_DEBUG
void printData();
void printTable(const char *heading, const RBBIStateTable *table);
#else
#define printData()
#define printTable(heading, table)
#endif
/* */
/* Pointers to items within the data */
/* */
const RBBIDataHeader *fHeader;
const RBBIStateTable *fForwardTable;
const RBBIStateTable *fReverseTable;
const RBBIStateTable *fSafeFwdTable;
const RBBIStateTable *fSafeRevTable;
const UChar *fRuleSource;
const int32_t *fRuleStatusTable;
/* number of int32_t values in the rule status table. Used to sanity check indexing */
int32_t fStatusMaxIdx;
UTrie fTrie;
private:
int32_t fRefCount;
UDataMemory *fUDataMem;
UnicodeString fRuleString;
UBool fDontFreeData;
RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */
RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */
};
U_NAMESPACE_END
#endif /* C++ */
#endif

358
source/common/rbbinode.cpp Normal file
View file

@ -0,0 +1,358 @@
/*
***************************************************************************
* Copyright (C) 2002-2008 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
*/
//
// File: rbbinode.cpp
//
// Implementation of class RBBINode, which represents a node in the
// tree generated when parsing the Rules Based Break Iterator rules.
//
// This "Class" is actually closer to a struct.
// Code using it is expected to directly access fields much of the time.
//
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/unistr.h"
#include "unicode/uniset.h"
#include "unicode/uchar.h"
#include "unicode/parsepos.h"
#include "uvector.h"
#include "rbbirb.h"
#include "rbbinode.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
#ifdef RBBI_DEBUG
static int gLastSerial = 0;
#endif
//-------------------------------------------------------------------------
//
// Constructor. Just set the fields to reasonable default values.
//
//-------------------------------------------------------------------------
RBBINode::RBBINode(NodeType t) : UMemory() {
#ifdef RBBI_DEBUG
fSerialNum = ++gLastSerial;
#endif
fType = t;
fParent = NULL;
fLeftChild = NULL;
fRightChild = NULL;
fInputSet = NULL;
fFirstPos = 0;
fLastPos = 0;
fNullable = FALSE;
fLookAheadEnd = FALSE;
fVal = 0;
fPrecedence = precZero;
UErrorCode status = U_ZERO_ERROR;
fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
fLastPosSet = new UVector(status);
fFollowPos = new UVector(status);
if (t==opCat) {fPrecedence = precOpCat;}
else if (t==opOr) {fPrecedence = precOpOr;}
else if (t==opStart) {fPrecedence = precStart;}
else if (t==opLParen) {fPrecedence = precLParen;}
}
RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
#ifdef RBBI_DEBUG
fSerialNum = ++gLastSerial;
#endif
fType = other.fType;
fParent = NULL;
fLeftChild = NULL;
fRightChild = NULL;
fInputSet = other.fInputSet;
fPrecedence = other.fPrecedence;
fText = other.fText;
fFirstPos = other.fFirstPos;
fLastPos = other.fLastPos;
fNullable = other.fNullable;
fVal = other.fVal;
UErrorCode status = U_ZERO_ERROR;
fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
fLastPosSet = new UVector(status);
fFollowPos = new UVector(status);
}
//-------------------------------------------------------------------------
//
// Destructor. Deletes both this node AND any child nodes,
// except in the case of variable reference nodes. For
// these, the l. child points back to the definition, which
// is common for all references to the variable, meaning
// it can't be deleted here.
//
//-------------------------------------------------------------------------
RBBINode::~RBBINode() {
// printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
delete fInputSet;
fInputSet = NULL;
switch (this->fType) {
case varRef:
case setRef:
// for these node types, multiple instances point to the same "children"
// Storage ownership of children handled elsewhere. Don't delete here.
break;
default:
delete fLeftChild;
fLeftChild = NULL;
delete fRightChild;
fRightChild = NULL;
}
delete fFirstPosSet;
delete fLastPosSet;
delete fFollowPos;
}
//-------------------------------------------------------------------------
//
// cloneTree Make a copy of the subtree rooted at this node.
// Discard any variable references encountered along the way,
// and replace with copies of the variable's definitions.
// Used to replicate the expression underneath variable
// references in preparation for generating the DFA tables.
//
//-------------------------------------------------------------------------
RBBINode *RBBINode::cloneTree() {
RBBINode *n;
if (fType == RBBINode::varRef) {
// If the current node is a variable reference, skip over it
// and clone the definition of the variable instead.
n = fLeftChild->cloneTree();
} else if (fType == RBBINode::uset) {
n = this;
} else {
n = new RBBINode(*this);
// Check for null pointer.
if (n != NULL) {
if (fLeftChild != NULL) {
n->fLeftChild = fLeftChild->cloneTree();
n->fLeftChild->fParent = n;
}
if (fRightChild != NULL) {
n->fRightChild = fRightChild->cloneTree();
n->fRightChild->fParent = n;
}
}
}
return n;
}
//-------------------------------------------------------------------------
//
// flattenVariables Walk a parse tree, replacing any variable
// references with a copy of the variable's definition.
// Aside from variables, the tree is not changed.
//
// Return the root of the tree. If the root was not a variable
// reference, it remains unchanged - the root we started with
// is the root we return. If, however, the root was a variable
// reference, the root of the newly cloned replacement tree will
// be returned, and the original tree deleted.
//
// This function works by recursively walking the tree
// without doing anything until a variable reference is
// found, then calling cloneTree() at that point. Any
// nested references are handled by cloneTree(), not here.
//
//-------------------------------------------------------------------------
RBBINode *RBBINode::flattenVariables() {
if (fType == varRef) {
RBBINode *retNode = fLeftChild->cloneTree();
delete this;
return retNode;
}
if (fLeftChild != NULL) {
fLeftChild = fLeftChild->flattenVariables();
fLeftChild->fParent = this;
}
if (fRightChild != NULL) {
fRightChild = fRightChild->flattenVariables();
fRightChild->fParent = this;
}
return this;
}
//-------------------------------------------------------------------------
//
// flattenSets Walk the parse tree, replacing any nodes of type setRef
// with a copy of the expression tree for the set. A set's
// equivalent expression tree is precomputed and saved as
// the left child of the uset node.
//
//-------------------------------------------------------------------------
void RBBINode::flattenSets() {
U_ASSERT(fType != setRef);
if (fLeftChild != NULL) {
if (fLeftChild->fType==setRef) {
RBBINode *setRefNode = fLeftChild;
RBBINode *usetNode = setRefNode->fLeftChild;
RBBINode *replTree = usetNode->fLeftChild;
fLeftChild = replTree->cloneTree();
fLeftChild->fParent = this;
delete setRefNode;
} else {
fLeftChild->flattenSets();
}
}
if (fRightChild != NULL) {
if (fRightChild->fType==setRef) {
RBBINode *setRefNode = fRightChild;
RBBINode *usetNode = setRefNode->fLeftChild;
RBBINode *replTree = usetNode->fLeftChild;
fRightChild = replTree->cloneTree();
fRightChild->fParent = this;
delete setRefNode;
} else {
fRightChild->flattenSets();
}
}
}
//-------------------------------------------------------------------------
//
// findNodes() Locate all the nodes of the specified type, starting
// at the specified root.
//
//-------------------------------------------------------------------------
void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
/* test for buffer overflows */
if (U_FAILURE(status)) {
return;
}
if (fType == kind) {
dest->addElement(this, status);
}
if (fLeftChild != NULL) {
fLeftChild->findNodes(dest, kind, status);
}
if (fRightChild != NULL) {
fRightChild->findNodes(dest, kind, status);
}
}
//-------------------------------------------------------------------------
//
// print. Print out a single node, for debugging.
//
//-------------------------------------------------------------------------
#ifdef RBBI_DEBUG
void RBBINode::printNode() {
static const char * const nodeTypeNames[] = {
"setRef",
"uset",
"varRef",
"leafChar",
"lookAhead",
"tag",
"endMark",
"opStart",
"opCat",
"opOr",
"opStar",
"opPlus",
"opQuestion",
"opBreak",
"opReverse",
"opLParen"
};
if (this==NULL) {
RBBIDebugPrintf("%10p", (void *)this);
} else {
RBBIDebugPrintf("%10p %12s %10p %10p %10p %4d %6d %d ",
(void *)this, nodeTypeNames[fType], (void *)fParent, (void *)fLeftChild, (void *)fRightChild,
fSerialNum, fFirstPos, fVal);
if (fType == varRef) {
RBBI_DEBUG_printUnicodeString(fText);
}
}
RBBIDebugPrintf("\n");
}
#endif
#ifdef RBBI_DEBUG
U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth)
{
int i;
for (i=0; i<s.length(); i++) {
RBBIDebugPrintf("%c", s.charAt(i));
// putc(s.charAt(i), stdout);
}
for (i=s.length(); i<minWidth; i++) {
RBBIDebugPrintf(" ");
}
}
#endif
//-------------------------------------------------------------------------
//
// print. Print out the tree of nodes rooted at "this"
//
//-------------------------------------------------------------------------
#ifdef RBBI_DEBUG
void RBBINode::printTree(UBool printHeading) {
if (printHeading) {
RBBIDebugPrintf( "-------------------------------------------------------------------\n"
" Address type Parent LeftChild RightChild serial position value\n"
);
}
this->printNode();
if (this != NULL) {
// Only dump the definition under a variable reference if asked to.
// Unconditinally dump children of all other node types.
if (fType != varRef) {
if (fLeftChild != NULL) {
fLeftChild->printTree(FALSE);
}
if (fRightChild != NULL) {
fRightChild->printTree(FALSE);
}
}
}
}
#endif
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

118
source/common/rbbinode.h Normal file
View file

@ -0,0 +1,118 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2001-2006, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
#ifndef RBBINODE_H
#define RBBINODE_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
//
// class RBBINode
//
// Represents a node in the parse tree generated when reading
// a rule file.
//
U_NAMESPACE_BEGIN
class UnicodeSet;
class UVector;
class RBBINode : public UMemory {
public:
enum NodeType {
setRef,
uset,
varRef,
leafChar,
lookAhead,
tag,
endMark,
opStart,
opCat,
opOr,
opStar,
opPlus,
opQuestion,
opBreak,
opReverse,
opLParen
};
enum OpPrecedence {
precZero,
precStart,
precLParen,
precOpOr,
precOpCat
};
NodeType fType;
RBBINode *fParent;
RBBINode *fLeftChild;
RBBINode *fRightChild;
UnicodeSet *fInputSet; // For uset nodes only.
OpPrecedence fPrecedence; // For binary ops only.
UnicodeString fText; // Text corresponding to this node.
// May be lazily evaluated when (if) needed
// for some node types.
int fFirstPos; // Position in the rule source string of the
// first text associated with the node.
// If there's a left child, this will be the same
// as that child's left pos.
int fLastPos; // Last position in the rule source string
// of any text associated with this node.
// If there's a right child, this will be the same
// as that child's last postion.
UBool fNullable; // See Aho.
int32_t fVal; // For leafChar nodes, the value.
// Values are the character category,
// corresponds to columns in the final
// state transition table.
UBool fLookAheadEnd; // For endMark nodes, set TRUE if
// marking the end of a look-ahead rule.
UVector *fFirstPosSet;
UVector *fLastPosSet; // TODO: rename fFirstPos & fLastPos to avoid confusion.
UVector *fFollowPos;
RBBINode(NodeType t);
RBBINode(const RBBINode &other);
~RBBINode();
RBBINode *cloneTree();
RBBINode *flattenVariables();
void flattenSets();
void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);
#ifdef RBBI_DEBUG
void printNode();
void printTree(UBool withHeading);
#endif
private:
RBBINode &operator = (const RBBINode &other); // No defs.
UBool operator == (const RBBINode &other); // Private, so these functions won't accidently be used.
#ifdef RBBI_DEBUG
int fSerialNum; // Debugging aids.
#endif
};
#ifdef RBBI_DEBUG
U_CFUNC void
RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth=0);
#endif
U_NAMESPACE_END
#endif

323
source/common/rbbirb.cpp Normal file
View file

@ -0,0 +1,323 @@
//
// file: rbbirb.cpp
//
// Copyright (C) 2002-2008, International Business Machines Corporation and others.
// All Rights Reserved.
//
// This file contains the RBBIRuleBuilder class implementation. This is the main class for
// building (compiling) break rules into the tables required by the runtime
// RBBI engine.
//
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h"
#include "unicode/rbbi.h"
#include "unicode/ubrk.h"
#include "unicode/unistr.h"
#include "unicode/uniset.h"
#include "unicode/uchar.h"
#include "unicode/uchriter.h"
#include "unicode/parsepos.h"
#include "unicode/parseerr.h"
#include "cmemory.h"
#include "cstring.h"
#include "rbbirb.h"
#include "rbbinode.h"
#include "rbbiscan.h"
#include "rbbisetb.h"
#include "rbbitblb.h"
#include "rbbidata.h"
U_NAMESPACE_BEGIN
//----------------------------------------------------------------------------------------
//
// Constructor.
//
//----------------------------------------------------------------------------------------
RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
UParseError *parseErr,
UErrorCode &status)
: fRules(rules)
{
fStatus = &status; // status is checked below
fParseError = parseErr;
fDebugEnv = NULL;
#ifdef RBBI_DEBUG
fDebugEnv = getenv("U_RBBIDEBUG");
#endif
fForwardTree = NULL;
fReverseTree = NULL;
fSafeFwdTree = NULL;
fSafeRevTree = NULL;
fDefaultTree = &fForwardTree;
fForwardTables = NULL;
fReverseTables = NULL;
fSafeFwdTables = NULL;
fSafeRevTables = NULL;
fRuleStatusVals = NULL;
fChainRules = FALSE;
fLBCMNoChain = FALSE;
fLookAheadHardBreak = FALSE;
fUSetNodes = NULL;
fRuleStatusVals = NULL;
fScanner = NULL;
fSetBuilder = NULL;
if (parseErr) {
uprv_memset(parseErr, 0, sizeof(UParseError));
}
if (U_FAILURE(status)) {
return;
}
fUSetNodes = new UVector(status); // bcos status gets overwritten here
fRuleStatusVals = new UVector(status);
fScanner = new RBBIRuleScanner(this);
fSetBuilder = new RBBISetBuilder(this);
if (U_FAILURE(status)) {
return;
}
if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
//----------------------------------------------------------------------------------------
//
// Destructor
//
//----------------------------------------------------------------------------------------
RBBIRuleBuilder::~RBBIRuleBuilder() {
int i;
for (i=0; ; i++) {
RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
if (n==NULL) {
break;
}
delete n;
}
delete fUSetNodes;
delete fSetBuilder;
delete fForwardTables;
delete fReverseTables;
delete fSafeFwdTables;
delete fSafeRevTables;
delete fForwardTree;
delete fReverseTree;
delete fSafeFwdTree;
delete fSafeRevTree;
delete fScanner;
delete fRuleStatusVals;
}
//----------------------------------------------------------------------------------------
//
// flattenData() - Collect up the compiled RBBI rule data and put it into
// the format for saving in ICU data files,
// which is also the format needed by the RBBI runtime engine.
//
//----------------------------------------------------------------------------------------
static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
RBBIDataHeader *RBBIRuleBuilder::flattenData() {
int32_t i;
if (U_FAILURE(*fStatus)) {
return NULL;
}
// Remove comments and whitespace from the rules to make it smaller.
UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules));
// Calculate the size of each section in the data.
// Sizes here are padded up to a multiple of 8 for better memory alignment.
// Sections sizes actually stored in the header are for the actual data
// without the padding.
//
int32_t headerSize = align8(sizeof(RBBIDataHeader));
int32_t forwardTableSize = align8(fForwardTables->getTableSize());
int32_t reverseTableSize = align8(fReverseTables->getTableSize());
int32_t safeFwdTableSize = align8(fSafeFwdTables->getTableSize());
int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
int32_t trieSize = align8(fSetBuilder->getTrieSize());
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
int32_t totalSize = headerSize + forwardTableSize + reverseTableSize
+ safeFwdTableSize + safeRevTableSize
+ statusTableSize + trieSize + rulesSize;
RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
if (data == NULL) {
*fStatus = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memset(data, 0, totalSize);
data->fMagic = 0xb1a0;
data->fFormatVersion[0] = 3;
data->fFormatVersion[1] = 1;
data->fFormatVersion[2] = 0;
data->fFormatVersion[3] = 0;
data->fLength = totalSize;
data->fCatCount = fSetBuilder->getNumCharCategories();
data->fFTable = headerSize;
data->fFTableLen = forwardTableSize;
data->fRTable = data->fFTable + forwardTableSize;
data->fRTableLen = reverseTableSize;
data->fSFTable = data->fRTable + reverseTableSize;
data->fSFTableLen = safeFwdTableSize;
data->fSRTable = data->fSFTable + safeFwdTableSize;
data->fSRTableLen = safeRevTableSize;
data->fTrie = data->fSRTable + safeRevTableSize;
data->fTrieLen = fSetBuilder->getTrieSize();
data->fStatusTable = data->fTrie + trieSize;
data->fStatusTableLen= statusTableSize;
data->fRuleSource = data->fStatusTable + statusTableSize;
data->fRuleSourceLen = strippedRules.length() * sizeof(UChar);
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
fForwardTables->exportTable((uint8_t *)data + data->fFTable);
fReverseTables->exportTable((uint8_t *)data + data->fRTable);
fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
for (i=0; i<fRuleStatusVals->size(); i++) {
ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
}
strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
return data;
}
//----------------------------------------------------------------------------------------
//
// createRuleBasedBreakIterator construct from source rules that are passed in
// in a UnicodeString
//
//----------------------------------------------------------------------------------------
BreakIterator *
RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
UParseError *parseError,
UErrorCode &status)
{
// status checked below
//
// Read the input rules, generate a parse tree, symbol table,
// and list of all Unicode Sets referenced by the rules.
//
RBBIRuleBuilder builder(rules, parseError, status);
if (U_FAILURE(status)) { // status checked here bcos build below doesn't
return NULL;
}
builder.fScanner->parse();
//
// UnicodeSet processing.
// Munge the Unicode Sets to create a set of character categories.
// Generate the mapping tables (TRIE) from input 32-bit characters to
// the character categories.
//
builder.fSetBuilder->build();
//
// Generate the DFA state transition table.
//
builder.fForwardTables = new RBBITableBuilder(&builder, &builder.fForwardTree);
builder.fReverseTables = new RBBITableBuilder(&builder, &builder.fReverseTree);
builder.fSafeFwdTables = new RBBITableBuilder(&builder, &builder.fSafeFwdTree);
builder.fSafeRevTables = new RBBITableBuilder(&builder, &builder.fSafeRevTree);
if (U_SUCCESS(status)
&& (builder.fForwardTables == NULL || builder.fReverseTables == NULL ||
builder.fSafeFwdTables == NULL || builder.fSafeRevTables == NULL))
{
status = U_MEMORY_ALLOCATION_ERROR;
}
// Before building the tables, check to make sure the status is ok.
if (U_FAILURE(status)) {
delete builder.fForwardTables; builder.fForwardTables = NULL;
delete builder.fReverseTables; builder.fReverseTables = NULL;
delete builder.fSafeFwdTables; builder.fSafeFwdTables = NULL;
delete builder.fSafeRevTables; builder.fSafeRevTables = NULL;
return NULL;
}
builder.fForwardTables->build();
builder.fReverseTables->build();
builder.fSafeFwdTables->build();
builder.fSafeRevTables->build();
#ifdef RBBI_DEBUG
if (builder.fDebugEnv && uprv_strstr(builder.fDebugEnv, "states")) {
builder.fForwardTables->printRuleStatusTable();
}
#endif
//
// Package up the compiled data into a memory image
// in the run-time format.
//
RBBIDataHeader *data = builder.flattenData(); // returns NULL if error
if (U_FAILURE(*builder.fStatus)) {
return NULL;
}
//
// Clean up the compiler related stuff
//
//
// Create a break iterator from the compiled rules.
// (Identical to creation from stored pre-compiled rules)
//
// status is checked after init in construction.
RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status);
if (U_FAILURE(status)) {
delete This;
This = NULL;
}
else if(This == NULL) { // test for NULL
status = U_MEMORY_ALLOCATION_ERROR;
}
return This;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

211
source/common/rbbirb.h Normal file
View file

@ -0,0 +1,211 @@
//
// rbbirb.h
//
// Copyright (C) 2002-2008, International Business Machines Corporation and others.
// All Rights Reserved.
//
// This file contains declarations for several classes from the
// Rule Based Break Iterator rule builder.
//
#ifndef RBBIRB_H
#define RBBIRB_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "unicode/uniset.h"
#include "unicode/parseerr.h"
#include "uhash.h"
#include "uvector.h"
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
// looks up references to $variables within a set.
U_NAMESPACE_BEGIN
class RBBIRuleScanner;
struct RBBIRuleTableEl;
class RBBISetBuilder;
class RBBINode;
class RBBITableBuilder;
//--------------------------------------------------------------------------------
//
// RBBISymbolTable. Implements SymbolTable interface that is used by the
// UnicodeSet parser to resolve references to $variables.
//
//--------------------------------------------------------------------------------
class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
public: // of these structs for each entry.
RBBISymbolTableEntry();
UnicodeString key;
RBBINode *val;
~RBBISymbolTableEntry();
private:
RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
};
class RBBISymbolTable : public UMemory, public SymbolTable {
private:
const UnicodeString &fRules;
UHashtable *fHashTable;
RBBIRuleScanner *fRuleScanner;
// These next two fields are part of the mechanism for passing references to
// already-constructed UnicodeSets back to the UnicodeSet constructor
// when the pattern includes $variable references.
const UnicodeString ffffString; // = "/uffff"
UnicodeSet *fCachedSetLookup;
public:
// API inherited from class SymbolTable
virtual const UnicodeString* lookup(const UnicodeString& s) const;
virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
virtual UnicodeString parseReference(const UnicodeString& text,
ParsePosition& pos, int32_t limit) const;
// Additional Functions
RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
virtual ~RBBISymbolTable();
virtual RBBINode *lookupNode(const UnicodeString &key) const;
virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
#ifdef RBBI_DEBUG
virtual void rbbiSymtablePrint() const;
#else
// A do-nothing inline function for non-debug builds. Member funcs can't be empty
// or the call sites won't compile.
int32_t fFakeField;
#define rbbiSymtablePrint() fFakeField=0;
#endif
private:
RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
};
//--------------------------------------------------------------------------------
//
// class RBBIRuleBuilder The top-level class handling RBBI rule compiling.
//
//--------------------------------------------------------------------------------
class RBBIRuleBuilder : public UMemory {
public:
// Create a rule based break iterator from a set of rules.
// This function is the main entry point into the rule builder. The
// public ICU API for creating RBBIs uses this function to do the actual work.
//
static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
UParseError *parseError,
UErrorCode &status);
public:
// The "public" functions and data members that appear below are accessed
// (and shared) by the various parts that make up the rule builder. They
// are NOT intended to be accessed by anything outside of the
// rule builder implementation.
RBBIRuleBuilder(const UnicodeString &rules,
UParseError *parseErr,
UErrorCode &status
);
virtual ~RBBIRuleBuilder();
char *fDebugEnv; // controls debug trace output
UErrorCode *fStatus; // Error reporting. Keeping status
UParseError *fParseError; // here avoids passing it everywhere.
const UnicodeString &fRules; // The rule string that we are compiling
RBBIRuleScanner *fScanner; // The scanner.
RBBINode *fForwardTree; // The parse trees, generated by the scanner,
RBBINode *fReverseTree; // then manipulated by subsequent steps.
RBBINode *fSafeFwdTree;
RBBINode *fSafeRevTree;
RBBINode **fDefaultTree; // For rules not qualified with a !
// the tree to which they belong to.
UBool fChainRules; // True for chained Unicode TR style rules.
// False for traditional regexp rules.
UBool fLBCMNoChain; // True: suppress chaining of rules on
// chars with LineBreak property == CM.
UBool fLookAheadHardBreak; // True: Look ahead matches cause an
// immediate break, no continuing for the
// longest match.
RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
UVector *fUSetNodes; // Vector of all uset nodes.
RBBITableBuilder *fForwardTables; // State transition tables
RBBITableBuilder *fReverseTables;
RBBITableBuilder *fSafeFwdTables;
RBBITableBuilder *fSafeRevTables;
UVector *fRuleStatusVals; // The values that can be returned
// from getRuleStatus().
RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
// data tables..
private:
RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
};
//----------------------------------------------------------------------------
//
// RBBISetTableEl is an entry in the hash table of UnicodeSets that have
// been encountered. The val Node will be of nodetype uset
// and contain pointers to the actual UnicodeSets.
// The Key is the source string for initializing the set.
//
// The hash table is used to avoid creating duplicate
// unnamed (not $var references) UnicodeSets.
//
// Memory Management:
// The Hash Table owns these RBBISetTableEl structs and
// the key strings. It does NOT own the val nodes.
//
//----------------------------------------------------------------------------
struct RBBISetTableEl {
UnicodeString *key;
RBBINode *val;
};
//----------------------------------------------------------------------------
//
// RBBIDebugPrintf Printf equivalent, for debugging output.
// Conditional compilation of the implementation lets us
// get rid of the stdio dependency in environments where it
// is unavailable.
//
//----------------------------------------------------------------------------
#ifdef RBBI_DEBUG
#include <stdio.h>
#define RBBIDebugPrintf printf
#define RBBIDebugPuts puts
#else
#undef RBBIDebugPrintf
#define RBBIDebugPuts(arg)
#endif
U_NAMESPACE_END
#endif

275
source/common/rbbirpt.h Normal file
View file

@ -0,0 +1,275 @@
//---------------------------------------------------------------------------------
//
// Generated Header File. Do not edit by hand.
// This file contains the state table for the ICU Rule Based Break Iterator
// rule parser.
// It is generated by the Perl script "rbbicst.pl" from
// the rule parser state definitions file "rbbirpt.txt".
//
// Copyright (C) 2002-2005 International Business Machines Corporation
// and others. All rights reserved.
//
//---------------------------------------------------------------------------------
#ifndef RBBIRPT_H
#define RBBIRPT_H
U_NAMESPACE_BEGIN
//
// Character classes for RBBI rule scanning.
//
static const uint8_t kRuleSet_digit_char = 128;
static const uint8_t kRuleSet_name_char = 129;
static const uint8_t kRuleSet_name_start_char = 130;
static const uint8_t kRuleSet_rule_char = 131;
static const uint8_t kRuleSet_white_space = 132;
enum RBBI_RuleParseAction {
doCheckVarDef,
doDotAny,
doEndAssign,
doEndOfRule,
doEndVariableName,
doExit,
doExprCatOperator,
doExprFinished,
doExprOrOperator,
doExprRParen,
doExprStart,
doLParen,
doNOP,
doOptionEnd,
doOptionStart,
doReverseDir,
doRuleChar,
doRuleError,
doRuleErrorAssignExpr,
doScanUnicodeSet,
doSlash,
doStartAssign,
doStartTagValue,
doStartVariableName,
doTagDigit,
doTagExpectedError,
doTagValue,
doUnaryOpPlus,
doUnaryOpQuestion,
doUnaryOpStar,
doVariableNameExpectedErr,
rbbiLastAction};
//-------------------------------------------------------------------------------
//
// RBBIRuleTableEl represents the structure of a row in the transition table
// for the rule parser state machine.
//-------------------------------------------------------------------------------
struct RBBIRuleTableEl {
RBBI_RuleParseAction fAction;
uint8_t fCharClass; // 0-127: an individual ASCII character
// 128-255: character class index
uint8_t fNextState; // 0-250: normal next-stat numbers
// 255: pop next-state from stack.
uint8_t fPushState;
UBool fNextChar;
};
static const struct RBBIRuleTableEl gRuleParseStateTable[] = {
{doNOP, 0, 0, 0, TRUE}
, {doExprStart, 254, 21, 8, FALSE} // 1 start
, {doNOP, 132, 1,0, TRUE} // 2
, {doExprStart, 36 /* $ */, 80, 90, FALSE} // 3
, {doNOP, 33 /* ! */, 11,0, TRUE} // 4
, {doNOP, 59 /* ; */, 1,0, TRUE} // 5
, {doNOP, 252, 0,0, FALSE} // 6
, {doExprStart, 255, 21, 8, FALSE} // 7
, {doEndOfRule, 59 /* ; */, 1,0, TRUE} // 8 break-rule-end
, {doNOP, 132, 8,0, TRUE} // 9
, {doRuleError, 255, 95,0, FALSE} // 10
, {doNOP, 33 /* ! */, 13,0, TRUE} // 11 rev-option
, {doReverseDir, 255, 20, 8, FALSE} // 12
, {doOptionStart, 130, 15,0, TRUE} // 13 option-scan1
, {doRuleError, 255, 95,0, FALSE} // 14
, {doNOP, 129, 15,0, TRUE} // 15 option-scan2
, {doOptionEnd, 255, 17,0, FALSE} // 16
, {doNOP, 59 /* ; */, 1,0, TRUE} // 17 option-scan3
, {doNOP, 132, 17,0, TRUE} // 18
, {doRuleError, 255, 95,0, FALSE} // 19
, {doExprStart, 255, 21, 8, FALSE} // 20 reverse-rule
, {doRuleChar, 254, 30,0, TRUE} // 21 term
, {doNOP, 132, 21,0, TRUE} // 22
, {doRuleChar, 131, 30,0, TRUE} // 23
, {doNOP, 91 /* [ */, 86, 30, FALSE} // 24
, {doLParen, 40 /* ( */, 21, 30, TRUE} // 25
, {doNOP, 36 /* $ */, 80, 29, FALSE} // 26
, {doDotAny, 46 /* . */, 30,0, TRUE} // 27
, {doRuleError, 255, 95,0, FALSE} // 28
, {doCheckVarDef, 255, 30,0, FALSE} // 29 term-var-ref
, {doNOP, 132, 30,0, TRUE} // 30 expr-mod
, {doUnaryOpStar, 42 /* * */, 35,0, TRUE} // 31
, {doUnaryOpPlus, 43 /* + */, 35,0, TRUE} // 32
, {doUnaryOpQuestion, 63 /* ? */, 35,0, TRUE} // 33
, {doNOP, 255, 35,0, FALSE} // 34
, {doExprCatOperator, 254, 21,0, FALSE} // 35 expr-cont
, {doNOP, 132, 35,0, TRUE} // 36
, {doExprCatOperator, 131, 21,0, FALSE} // 37
, {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 38
, {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 39
, {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 40
, {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 41
, {doExprCatOperator, 47 /* / */, 47,0, FALSE} // 42
, {doExprCatOperator, 123 /* { */, 59,0, TRUE} // 43
, {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 44
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 45
, {doExprFinished, 255, 255,0, FALSE} // 46
, {doSlash, 47 /* / */, 49,0, TRUE} // 47 look-ahead
, {doNOP, 255, 95,0, FALSE} // 48
, {doExprCatOperator, 254, 21,0, FALSE} // 49 expr-cont-no-slash
, {doNOP, 132, 35,0, TRUE} // 50
, {doExprCatOperator, 131, 21,0, FALSE} // 51
, {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 52
, {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 53
, {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 54
, {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 55
, {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 56
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 57
, {doExprFinished, 255, 255,0, FALSE} // 58
, {doNOP, 132, 59,0, TRUE} // 59 tag-open
, {doStartTagValue, 128, 62,0, FALSE} // 60
, {doTagExpectedError, 255, 95,0, FALSE} // 61
, {doNOP, 132, 66,0, TRUE} // 62 tag-value
, {doNOP, 125 /* } */, 66,0, FALSE} // 63
, {doTagDigit, 128, 62,0, TRUE} // 64
, {doTagExpectedError, 255, 95,0, FALSE} // 65
, {doNOP, 132, 66,0, TRUE} // 66 tag-close
, {doTagValue, 125 /* } */, 69,0, TRUE} // 67
, {doTagExpectedError, 255, 95,0, FALSE} // 68
, {doExprCatOperator, 254, 21,0, FALSE} // 69 expr-cont-no-tag
, {doNOP, 132, 69,0, TRUE} // 70
, {doExprCatOperator, 131, 21,0, FALSE} // 71
, {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 72
, {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 73
, {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 74
, {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 75
, {doExprCatOperator, 47 /* / */, 47,0, FALSE} // 76
, {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 77
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 78
, {doExprFinished, 255, 255,0, FALSE} // 79
, {doStartVariableName, 36 /* $ */, 82,0, TRUE} // 80 scan-var-name
, {doNOP, 255, 95,0, FALSE} // 81
, {doNOP, 130, 84,0, TRUE} // 82 scan-var-start
, {doVariableNameExpectedErr, 255, 95,0, FALSE} // 83
, {doNOP, 129, 84,0, TRUE} // 84 scan-var-body
, {doEndVariableName, 255, 255,0, FALSE} // 85
, {doScanUnicodeSet, 91 /* [ */, 255,0, TRUE} // 86 scan-unicode-set
, {doScanUnicodeSet, 112 /* p */, 255,0, TRUE} // 87
, {doScanUnicodeSet, 80 /* P */, 255,0, TRUE} // 88
, {doNOP, 255, 95,0, FALSE} // 89
, {doNOP, 132, 90,0, TRUE} // 90 assign-or-rule
, {doStartAssign, 61 /* = */, 21, 93, TRUE} // 91
, {doNOP, 255, 29, 8, FALSE} // 92
, {doEndAssign, 59 /* ; */, 1,0, TRUE} // 93 assign-end
, {doRuleErrorAssignExpr, 255, 95,0, FALSE} // 94
, {doExit, 255, 95,0, TRUE} // 95 errorDeath
};
#ifdef RBBI_DEBUG
static const char * const RBBIRuleStateNames[] = { 0,
"start",
0,
0,
0,
0,
0,
0,
"break-rule-end",
0,
0,
"rev-option",
0,
"option-scan1",
0,
"option-scan2",
0,
"option-scan3",
0,
0,
"reverse-rule",
"term",
0,
0,
0,
0,
0,
0,
0,
"term-var-ref",
"expr-mod",
0,
0,
0,
0,
"expr-cont",
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
"look-ahead",
0,
"expr-cont-no-slash",
0,
0,
0,
0,
0,
0,
0,
0,
0,
"tag-open",
0,
0,
"tag-value",
0,
0,
0,
"tag-close",
0,
0,
"expr-cont-no-tag",
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
"scan-var-name",
0,
"scan-var-start",
0,
"scan-var-body",
0,
"scan-unicode-set",
0,
0,
0,
"assign-or-rule",
0,
0,
"assign-end",
0,
"errorDeath",
0};
#endif
U_NAMESPACE_END
#endif

315
source/common/rbbirpt.txt Normal file
View file

@ -0,0 +1,315 @@
#*****************************************************************************
#
# Copyright (C) 2002-2003, International Business Machines Corporation and others.
# All Rights Reserved.
#
#*****************************************************************************
#
# file: rbbirpt.txt
# ICU Break Iterator Rule Parser State Table
#
# This state table is used when reading and parsing a set of RBBI rules
# The rule parser uses a state machine; the data in this file define the
# state transitions that occur for each input character.
#
# *** This file defines the RBBI rule grammar. This is it.
# *** The determination of what is accepted is here.
#
# This file is processed by a perl script "rbbicst.pl" to produce initialized C arrays
# that are then built with the rule parser.
#
#
# Here is the syntax of the state definitions in this file:
#
#
#StateName:
# input-char n next-state ^push-state action
# input-char n next-state ^push-state action
# | | | | |
# | | | | |--- action to be performed by state machine
# | | | | See function RBBIRuleScanner::doParseActions()
# | | | |
# | | | |--- Push this named state onto the state stack.
# | | | Later, when next state is specified as "pop",
# | | | the pushed state will become the current state.
# | | |
# | | |--- Transition to this state if the current input character matches the input
# | | character or char class in the left hand column. "pop" causes the next
# | | state to be popped from the state stack.
# | |
# | |--- When making the state transition specified on this line, advance to the next
# | character from the input only if 'n' appears here.
# |
# |--- Character or named character classes to test for. If the current character being scanned
# matches, peform the actions and go to the state specified on this line.
# The input character is tested sequentally, in the order written. The characters and
# character classes tested for do not need to be mutually exclusive. The first match wins.
#
#
# start state, scan position is at the beginning of the rules file, or in between two rules.
#
start:
escaped term ^break-rule-end doExprStart
white_space n start
'$' scan-var-name ^assign-or-rule doExprStart
'!' n rev-option
';' n start # ignore empty rules.
eof exit
default term ^break-rule-end doExprStart
#
# break-rule-end: Returned from doing a break-rule expression.
#
break-rule-end:
';' n start doEndOfRule
white_space n break-rule-end
default errorDeath doRuleError
#
# ! We've just scanned a '!', indicating either a !!key word flag or a
# !Reverse rule.
#
rev-option:
'!' n option-scan1
default reverse-rule ^break-rule-end doReverseDir
option-scan1:
name_start_char n option-scan2 doOptionStart
default errorDeath doRuleError
option-scan2:
name_char n option-scan2
default option-scan3 doOptionEnd
option-scan3:
';' n start
white_space n option-scan3
default errorDeath doRuleError
reverse-rule:
default term ^break-rule-end doExprStart
#
# term. Eat through a single rule character, or a composite thing, which
# could be a parenthesized expression, a variable name, or a Unicode Set.
#
term:
escaped n expr-mod doRuleChar
white_space n term
rule_char n expr-mod doRuleChar
'[' scan-unicode-set ^expr-mod
'(' n term ^expr-mod doLParen
'$' scan-var-name ^term-var-ref
'.' n expr-mod doDotAny
default errorDeath doRuleError
#
# term-var-ref We've just finished scanning a reference to a $variable.
# Check that the variable was defined.
# The variable name scanning is in common with assignment statements,
# so the check can't be done there.
term-var-ref:
default expr-mod doCheckVarDef
#
# expr-mod We've just finished scanning a term, now look for the optional
# trailing '*', '?', '+'
#
expr-mod:
white_space n expr-mod
'*' n expr-cont doUnaryOpStar
'+' n expr-cont doUnaryOpPlus
'?' n expr-cont doUnaryOpQuestion
default expr-cont
#
# expr-cont Expression, continuation. At a point where additional terms are
# allowed, but not required.
#
expr-cont:
escaped term doExprCatOperator
white_space n expr-cont
rule_char term doExprCatOperator
'[' term doExprCatOperator
'(' term doExprCatOperator
'$' term doExprCatOperator
'.' term doExprCatOperator
'/' look-ahead doExprCatOperator
'{' n tag-open doExprCatOperator
'|' n term doExprOrOperator
')' n pop doExprRParen
default pop doExprFinished
#
# look-ahead Scanning a '/', which identifies a break point, assuming that the
# remainder of the expression matches.
#
# Generate a parse tree as if this was a special kind of input symbol
# appearing in an otherwise normal concatenation expression.
#
look-ahead:
'/' n expr-cont-no-slash doSlash
default errorDeath
#
# expr-cont-no-slash Expression, continuation. At a point where additional terms are
# allowed, but not required. Just like
# expr-cont, above, except that no '/'
# look-ahead symbol is permitted.
#
expr-cont-no-slash:
escaped term doExprCatOperator
white_space n expr-cont
rule_char term doExprCatOperator
'[' term doExprCatOperator
'(' term doExprCatOperator
'$' term doExprCatOperator
'.' term doExprCatOperator
'|' n term doExprOrOperator
')' n pop doExprRParen
default pop doExprFinished
#
# tags scanning a '{', the opening delimiter for a tag that identifies
# the kind of match. Scan the whole {dddd} tag, where d=digit
#
tag-open:
white_space n tag-open
digit_char tag-value doStartTagValue
default errorDeath doTagExpectedError
tag-value:
white_space n tag-close
'}' tag-close
digit_char n tag-value doTagDigit
default errorDeath doTagExpectedError
tag-close:
white_space n tag-close
'}' n expr-cont-no-tag doTagValue
default errorDeath doTagExpectedError
#
# expr-cont-no-tag Expression, continuation. At a point where additional terms are
# allowed, but not required. Just like
# expr-cont, above, except that no "{ddd}"
# tagging is permitted.
#
expr-cont-no-tag:
escaped term doExprCatOperator
white_space n expr-cont-no-tag
rule_char term doExprCatOperator
'[' term doExprCatOperator
'(' term doExprCatOperator
'$' term doExprCatOperator
'.' term doExprCatOperator
'/' look-ahead doExprCatOperator
'|' n term doExprOrOperator
')' n pop doExprRParen
default pop doExprFinished
#
# Variable Name Scanning.
#
# The state that branched to here must have pushed a return state
# to go to after completion of the variable name scanning.
#
# The current input character must be the $ that introduces the name.
# The $ is consummed here rather than in the state that first detected it
# so that the doStartVariableName action only needs to happen in one
# place (here), and the other states don't need to worry about it.
#
scan-var-name:
'$' n scan-var-start doStartVariableName
default errorDeath
scan-var-start:
name_start_char n scan-var-body
default errorDeath doVariableNameExpectedErr
scan-var-body:
name_char n scan-var-body
default pop doEndVariableName
#
# scan-unicode-set Unicode Sets are parsed by the the UnicodeSet class.
# Within the RBBI parser, after finding the first character
# of a Unicode Set, we just hand the rule input at that
# point of to the Unicode Set constructor, then pick
# up parsing after the close of the set.
#
# The action for this state invokes the UnicodeSet parser.
#
scan-unicode-set:
'[' n pop doScanUnicodeSet
'p' n pop doScanUnicodeSet
'P' n pop doScanUnicodeSet
default errorDeath
#
# assign-or-rule. A $variable was encountered at the start of something, could be
# either an assignment statement or a rule, depending on whether an '='
# follows the variable name. We get to this state when the variable name
# scanning does a return.
#
assign-or-rule:
white_space n assign-or-rule
'=' n term ^assign-end doStartAssign # variable was target of assignment
default term-var-ref ^break-rule-end # variable was a term in a rule
#
# assign-end This state is entered when the end of the expression on the
# right hand side of an assignment is found. We get here via
# a pop; this state is pushed when the '=' in an assignment is found.
#
# The only thing allowed at this point is a ';'. The RHS of an
# assignment must look like a rule expression, and we come here
# when what is being scanned no longer looks like an expression.
#
assign-end:
';' n start doEndAssign
default errorDeath doRuleErrorAssignExpr
#
# errorDeath. This state is specified as the next state whenever a syntax error
# in the source rules is detected. Barring bugs, the state machine will never
# actually get here, but will stop because of the action associated with the error.
# But, just in case, this state asks the state machine to exit.
errorDeath:
default n errorDeath doExit

1210
source/common/rbbiscan.cpp Normal file

File diff suppressed because it is too large Load diff

162
source/common/rbbiscan.h Normal file
View file

@ -0,0 +1,162 @@
//
// rbbiscan.h
//
// Copyright (C) 2002-2008, International Business Machines Corporation and others.
// All Rights Reserved.
//
// This file contains declarations for class RBBIRuleScanner
//
#ifndef RBBISCAN_H
#define RBBISCAN_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "unicode/uniset.h"
#include "unicode/parseerr.h"
#include "uhash.h"
#include "uvector.h"
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
// looks up references to $variables within a set.
#include "rbbinode.h"
//#include "rbbitblb.h"
U_NAMESPACE_BEGIN
class RBBIRuleBuilder;
class RBBISymbolTable;
//--------------------------------------------------------------------------------
//
// class RBBIRuleScanner does the lowest level, character-at-a-time
// scanning of break iterator rules.
//
// The output of the scanner is parse trees for
// the rule expressions and a list of all Unicode Sets
// encountered.
//
//--------------------------------------------------------------------------------
class RBBIRuleScanner : public UMemory {
public:
enum {
kStackSize = 100 // The size of the state stack for
}; // rules parsing. Corresponds roughly
// to the depth of parentheses nesting
// that is allowed in the rules.
struct RBBIRuleChar {
UChar32 fChar;
UBool fEscaped;
};
RBBIRuleScanner(RBBIRuleBuilder *rb);
virtual ~RBBIRuleScanner();
void nextChar(RBBIRuleChar &c); // Get the next char from the input stream.
// Return false if at end.
UBool push(const RBBIRuleChar &c); // Push (unget) one character.
// Only a single character may be pushed.
void parse(); // Parse the rules, generating two parse
// trees, one each for the forward and
// reverse rules,
// and a list of UnicodeSets encountered.
/**
* Return a rules string without unnecessary
* characters.
*/
static UnicodeString stripRules(const UnicodeString &rules);
private:
UBool doParseActions(int32_t a);
void error(UErrorCode e); // error reporting convenience function.
void fixOpStack(RBBINode::OpPrecedence p);
// a character.
void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
UChar32 nextCharLL();
#ifdef RBBI_DEBUG
void printNodeStack(const char *title);
#endif
RBBINode *pushNewNode(RBBINode::NodeType t);
void scanSet();
RBBIRuleBuilder *fRB; // The rule builder that we are part of.
int32_t fScanIndex; // Index of current character being processed
// in the rule input string.
int32_t fNextIndex; // Index of the next character, which
// is the first character not yet scanned.
UBool fQuoteMode; // Scan is in a 'quoted region'
int32_t fLineNum; // Line number in input file.
int32_t fCharNum; // Char position within the line.
UChar32 fLastChar; // Previous char, needed to count CR-LF
// as a single line, not two.
RBBIRuleChar fC; // Current char for parse state machine
// processing.
UnicodeString fVarName; // $variableName, valid when we've just
// scanned one.
RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule
// parsing. index by p[state][char-class]
uint16_t fStack[kStackSize]; // State stack, holds state pushes
int32_t fStackPtr; // and pops as specified in the state
// transition rules.
RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created
// during the parse of a rule
int32_t fNodeStackPtr;
UBool fReverseRule; // True if the rule currently being scanned
// is a reverse direction rule (if it
// starts with a '!')
UBool fLookAheadRule; // True if the rule includes a '/'
// somewhere within it.
RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of
// $variable symbols.
UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to
// the sets created while parsing rules.
// The key is the string used for creating
// the set.
UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during
// the scanning of RBBI rules. The
// indicies for these are assigned by the
// perl script that builds the state tables.
// See rbbirpt.h.
int32_t fRuleNum; // Counts each rule as it is scanned.
int32_t fOptionStart; // Input index of start of a !!option
// keyword, while being scanned.
UnicodeSet *gRuleSet_rule_char;
UnicodeSet *gRuleSet_white_space;
UnicodeSet *gRuleSet_name_char;
UnicodeSet *gRuleSet_name_start_char;
RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
};
U_NAMESPACE_END
#endif

695
source/common/rbbisetb.cpp Normal file
View file

@ -0,0 +1,695 @@
//
// rbbisetb.cpp
//
/*
***************************************************************************
* Copyright (C) 2002-2008 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
*/
//
// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules
// (part of the rule building process.)
//
// Starting with the rules parse tree from the scanner,
//
// - Enumerate the set of UnicodeSets that are referenced
// by the RBBI rules.
// - compute a set of non-overlapping character ranges
// with all characters within a range belonging to the same
// set of input uniocde sets.
// - Derive a set of non-overlapping UnicodeSet (like things)
// that will correspond to columns in the state table for
// the RBBI execution engine. All characters within one
// of these sets belong to the same set of the original
// UnicodeSets from the user's rules.
// - construct the trie table that maps input characters
// to the index of the matching non-overlapping set of set from
// the previous step.
//
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/uniset.h"
#include "utrie.h"
#include "uvector.h"
#include "uassert.h"
#include "cmemory.h"
#include "cstring.h"
#include "rbbisetb.h"
#include "rbbinode.h"
//------------------------------------------------------------------------
//
// getFoldedRBBIValue Call-back function used during building of Trie table.
// Folding value: just store the offset (16 bits)
// if there is any non-0 entry.
// (It'd really be nice if the Trie builder would provide a
// simple default, so this function could go away from here.)
//
//------------------------------------------------------------------------
/* folding value: just store the offset (16 bits) if there is any non-0 entry */
U_CDECL_BEGIN
static uint32_t U_CALLCONV
getFoldedRBBIValue(UNewTrie *trie, UChar32 start, int32_t offset) {
uint32_t value;
UChar32 limit;
UBool inBlockZero;
limit=start+0x400;
while(start<limit) {
value=utrie_get32(trie, start, &inBlockZero);
if(inBlockZero) {
start+=UTRIE_DATA_BLOCK_LENGTH;
} else if(value!=0) {
return (uint32_t)(offset|0x8000);
} else {
++start;
}
}
return 0;
}
U_CDECL_END
U_NAMESPACE_BEGIN
//------------------------------------------------------------------------
//
// Constructor
//
//------------------------------------------------------------------------
RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb)
{
fRB = rb;
fStatus = rb->fStatus;
fRangeList = 0;
fTrie = 0;
fTrieSize = 0;
fGroupCount = 0;
fSawBOF = FALSE;
}
//------------------------------------------------------------------------
//
// Destructor
//
//------------------------------------------------------------------------
RBBISetBuilder::~RBBISetBuilder()
{
RangeDescriptor *nextRangeDesc;
// Walk through & delete the linked list of RangeDescriptors
for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) {
RangeDescriptor *r = nextRangeDesc;
nextRangeDesc = r->fNext;
delete r;
}
utrie_close(fTrie);
}
//------------------------------------------------------------------------
//
// build Build the list of non-overlapping character ranges
// from the Unicode Sets.
//
//------------------------------------------------------------------------
void RBBISetBuilder::build() {
RBBINode *usetNode;
RangeDescriptor *rlRange;
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();}
//
// Initialize the process by creating a single range encompassing all characters
// that is in no sets.
//
fRangeList = new RangeDescriptor(*fStatus); // will check for status here
if (fRangeList == NULL) {
*fStatus = U_MEMORY_ALLOCATION_ERROR;
return;
}
fRangeList->fStartChar = 0;
fRangeList->fEndChar = 0x10ffff;
if (U_FAILURE(*fStatus)) {
return;
}
//
// Find the set of non-overlapping ranges of characters
//
int ni;
for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules
usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
if (usetNode==NULL) {
break;
}
UnicodeSet *inputSet = usetNode->fInputSet;
int32_t inputSetRangeCount = inputSet->getRangeCount();
int inputSetRangeIndex = 0;
rlRange = fRangeList;
for (;;) {
if (inputSetRangeIndex >= inputSetRangeCount) {
break;
}
UChar32 inputSetRangeBegin = inputSet->getRangeStart(inputSetRangeIndex);
UChar32 inputSetRangeEnd = inputSet->getRangeEnd(inputSetRangeIndex);
// skip over ranges from the range list that are completely
// below the current range from the input unicode set.
while (rlRange->fEndChar < inputSetRangeBegin) {
rlRange = rlRange->fNext;
}
// If the start of the range from the range list is before with
// the start of the range from the unicode set, split the range list range
// in two, with one part being before (wholly outside of) the unicode set
// and the other containing the rest.
// Then continue the loop; the post-split current range will then be skipped
// over
if (rlRange->fStartChar < inputSetRangeBegin) {
rlRange->split(inputSetRangeBegin, *fStatus);
if (U_FAILURE(*fStatus)) {
return;
}
continue;
}
// Same thing at the end of the ranges...
// If the end of the range from the range list doesn't coincide with
// the end of the range from the unicode set, split the range list
// range in two. The first part of the split range will be
// wholly inside the Unicode set.
if (rlRange->fEndChar > inputSetRangeEnd) {
rlRange->split(inputSetRangeEnd+1, *fStatus);
if (U_FAILURE(*fStatus)) {
return;
}
}
// The current rlRange is now entirely within the UnicodeSet range.
// Add this unicode set to the list of sets for this rlRange
if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
rlRange->fIncludesSets->addElement(usetNode, *fStatus);
if (U_FAILURE(*fStatus)) {
return;
}
}
// Advance over ranges that we are finished with.
if (inputSetRangeEnd == rlRange->fEndChar) {
inputSetRangeIndex++;
}
rlRange = rlRange->fNext;
}
}
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}
//
// Group the above ranges, with each group consisting of one or more
// ranges that are in exactly the same set of original UnicodeSets.
// The groups are numbered, and these group numbers are the set of
// input symbols recognized by the run-time state machine.
//
// Numbering: # 0 (state table column 0) is unused.
// # 1 is reserved - table column 1 is for end-of-input
// # 2 is reserved - table column 2 is for beginning-in-input
// # 3 is the first range list.
//
RangeDescriptor *rlSearchRange;
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
rlRange->fNum = rlSearchRange->fNum;
break;
}
}
if (rlRange->fNum == 0) {
fGroupCount ++;
rlRange->fNum = fGroupCount+2;
rlRange->setDictionaryFlag();
addValToSets(rlRange->fIncludesSets, fGroupCount+2);
}
}
// Handle input sets that contain the special string {eof}.
// Column 1 of the state table is reserved for EOF on input.
// Column 2 is reserved for before-the-start-input.
// (This column can be optimized away later if there are no rule
// references to {bof}.)
// Add this column value (1 or 2) to the equivalent expression
// subtree for each UnicodeSet that contains the string {eof}
// Because {bof} and {eof} are not a characters in the normal sense,
// they doesn't affect the computation of ranges or TRIE.
static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0};
static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0};
UnicodeString eofString(eofUString);
UnicodeString bofString(bofUString);
for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules
usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
if (usetNode==NULL) {
break;
}
UnicodeSet *inputSet = usetNode->fInputSet;
if (inputSet->contains(eofString)) {
addValToSet(usetNode, 1);
}
if (inputSet->contains(bofString)) {
addValToSet(usetNode, 2);
fSawBOF = TRUE;
}
}
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
//
// Build the Trie table for mapping UChar32 values to the corresponding
// range group number
//
fTrie = utrie_open(NULL, // Pre-existing trie to be filled in
NULL, // Data array (utrie will allocate one)
100000, // Max Data Length
0, // Initial value for all code points
0, // Lead surrogate unit value
TRUE); // Keep Latin 1 in separately
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE);
}
}
//-----------------------------------------------------------------------------------
//
// getTrieSize() Return the size that will be required to serialize the Trie.
//
//-----------------------------------------------------------------------------------
int32_t RBBISetBuilder::getTrieSize() /*const*/ {
fTrieSize = utrie_serialize(fTrie,
NULL, // Buffer
0, // Capacity
getFoldedRBBIValue,
TRUE, // Reduce to 16 bits
fStatus);
// RBBIDebugPrintf("Trie table size is %d\n", trieSize);
return fTrieSize;
}
//-----------------------------------------------------------------------------------
//
// serializeTrie() Put the serialized trie at the specified address.
// Trust the caller to have given us enough memory.
// getTrieSize() MUST be called first.
//
//-----------------------------------------------------------------------------------
void RBBISetBuilder::serializeTrie(uint8_t *where) {
utrie_serialize(fTrie,
where, // Buffer
fTrieSize, // Capacity
getFoldedRBBIValue,
TRUE, // Reduce to 16 bits
fStatus);
}
//------------------------------------------------------------------------
//
// addValToSets Add a runtime-mapped input value to each uset from a
// list of uset nodes. (val corresponds to a state table column.)
// For each of the original Unicode sets - which correspond
// directly to uset nodes - a logically equivalent expression
// is constructed in terms of the remapped runtime input
// symbol set. This function adds one runtime input symbol to
// a list of sets.
//
// The "logically equivalent expression" is the tree for an
// or-ing together of all of the symbols that go into the set.
//
//------------------------------------------------------------------------
void RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
int32_t ix;
for (ix=0; ix<sets->size(); ix++) {
RBBINode *usetNode = (RBBINode *)sets->elementAt(ix);
addValToSet(usetNode, val);
}
}
void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
RBBINode *leafNode = new RBBINode(RBBINode::leafChar);
if (leafNode == NULL) {
*fStatus = U_MEMORY_ALLOCATION_ERROR;
return;
}
leafNode->fVal = (unsigned short)val;
if (usetNode->fLeftChild == NULL) {
usetNode->fLeftChild = leafNode;
leafNode->fParent = usetNode;
} else {
// There are already input symbols present for this set.
// Set up an OR node, with the previous stuff as the left child
// and the new value as the right child.
RBBINode *orNode = new RBBINode(RBBINode::opOr);
if (orNode == NULL) {
*fStatus = U_MEMORY_ALLOCATION_ERROR;
return;
}
orNode->fLeftChild = usetNode->fLeftChild;
orNode->fRightChild = leafNode;
orNode->fLeftChild->fParent = orNode;
orNode->fRightChild->fParent = orNode;
usetNode->fLeftChild = orNode;
orNode->fParent = usetNode;
}
}
//------------------------------------------------------------------------
//
// getNumCharCategories
//
//------------------------------------------------------------------------
int32_t RBBISetBuilder::getNumCharCategories() const {
return fGroupCount + 3;
}
//------------------------------------------------------------------------
//
// sawBOF
//
//------------------------------------------------------------------------
UBool RBBISetBuilder::sawBOF() const {
return fSawBOF;
}
//------------------------------------------------------------------------
//
// getFirstChar Given a runtime RBBI character category, find
// the first UChar32 that is in the set of chars
// in the category.
//------------------------------------------------------------------------
UChar32 RBBISetBuilder::getFirstChar(int32_t category) const {
RangeDescriptor *rlRange;
UChar32 retVal = (UChar32)-1;
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
if (rlRange->fNum == category) {
retVal = rlRange->fStartChar;
break;
}
}
return retVal;
}
//------------------------------------------------------------------------
//
// printRanges A debugging function.
// dump out all of the range definitions.
//
//------------------------------------------------------------------------
#ifdef RBBI_DEBUG
void RBBISetBuilder::printRanges() {
RangeDescriptor *rlRange;
int i;
RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n");
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
RBBIDebugPrintf("%2i %4x-%4x ", rlRange->fNum, rlRange->fStartChar, rlRange->fEndChar);
for (i=0; i<rlRange->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
UnicodeString setName = UNICODE_STRING("anon", 4);
RBBINode *setRef = usetNode->fParent;
if (setRef != NULL) {
RBBINode *varRef = setRef->fParent;
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
setName = varRef->fText;
}
}
RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
}
RBBIDebugPrintf("\n");
}
}
#endif
//------------------------------------------------------------------------
//
// printRangeGroups A debugging function.
// dump out all of the range groups.
//
//------------------------------------------------------------------------
#ifdef RBBI_DEBUG
void RBBISetBuilder::printRangeGroups() {
RangeDescriptor *rlRange;
RangeDescriptor *tRange;
int i;
int lastPrintedGroupNum = 0;
RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n");
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
int groupNum = rlRange->fNum & 0xbfff;
if (groupNum > lastPrintedGroupNum) {
lastPrintedGroupNum = groupNum;
RBBIDebugPrintf("%2i ", groupNum);
if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}
for (i=0; i<rlRange->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
UnicodeString setName = UNICODE_STRING("anon", 4);
RBBINode *setRef = usetNode->fParent;
if (setRef != NULL) {
RBBINode *varRef = setRef->fParent;
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
setName = varRef->fText;
}
}
RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
}
i = 0;
for (tRange = rlRange; tRange != 0; tRange = tRange->fNext) {
if (tRange->fNum == rlRange->fNum) {
if (i++ % 5 == 0) {
RBBIDebugPrintf("\n ");
}
RBBIDebugPrintf(" %05x-%05x", tRange->fStartChar, tRange->fEndChar);
}
}
RBBIDebugPrintf("\n");
}
}
RBBIDebugPrintf("\n");
}
#endif
//------------------------------------------------------------------------
//
// printSets A debugging function.
// dump out all of the set definitions.
//
//------------------------------------------------------------------------
#ifdef RBBI_DEBUG
void RBBISetBuilder::printSets() {
int i;
RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n");
for (i=0; ; i++) {
RBBINode *usetNode;
RBBINode *setRef;
RBBINode *varRef;
UnicodeString setName;
usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i);
if (usetNode == NULL) {
break;
}
RBBIDebugPrintf("%3d ", i);
setName = UNICODE_STRING("anonymous", 9);
setRef = usetNode->fParent;
if (setRef != NULL) {
varRef = setRef->fParent;
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
setName = varRef->fText;
}
}
RBBI_DEBUG_printUnicodeString(setName);
RBBIDebugPrintf(" ");
RBBI_DEBUG_printUnicodeString(usetNode->fText);
RBBIDebugPrintf("\n");
if (usetNode->fLeftChild != NULL) {
usetNode->fLeftChild->printTree(TRUE);
}
}
RBBIDebugPrintf("\n");
}
#endif
//-------------------------------------------------------------------------------------
//
// RangeDescriptor copy constructor
//
//-------------------------------------------------------------------------------------
RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) {
int i;
this->fStartChar = other.fStartChar;
this->fEndChar = other.fEndChar;
this->fNum = other.fNum;
this->fNext = NULL;
UErrorCode oldstatus = status;
this->fIncludesSets = new UVector(status);
if (U_FAILURE(oldstatus)) {
status = oldstatus;
}
if (U_FAILURE(status)) {
return;
}
/* test for NULL */
if (this->fIncludesSets == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
for (i=0; i<other.fIncludesSets->size(); i++) {
this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status);
}
}
//-------------------------------------------------------------------------------------
//
// RangeDesriptor default constructor
//
//-------------------------------------------------------------------------------------
RangeDescriptor::RangeDescriptor(UErrorCode &status) {
this->fStartChar = 0;
this->fEndChar = 0;
this->fNum = 0;
this->fNext = NULL;
UErrorCode oldstatus = status;
this->fIncludesSets = new UVector(status);
if (U_FAILURE(oldstatus)) {
status = oldstatus;
}
if (U_FAILURE(status)) {
return;
}
/* test for NULL */
if(this->fIncludesSets == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
//-------------------------------------------------------------------------------------
//
// RangeDesriptor Destructor
//
//-------------------------------------------------------------------------------------
RangeDescriptor::~RangeDescriptor() {
delete fIncludesSets;
fIncludesSets = NULL;
}
//-------------------------------------------------------------------------------------
//
// RangeDesriptor::split()
//
//-------------------------------------------------------------------------------------
void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
U_ASSERT(where>fStartChar && where<=fEndChar);
RangeDescriptor *nr = new RangeDescriptor(*this, status);
if(nr == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
if (U_FAILURE(status)) {
delete nr;
return;
}
// RangeDescriptor copy constructor copies all fields.
// Only need to update those that are different after the split.
nr->fStartChar = where;
this->fEndChar = where-1;
nr->fNext = this->fNext;
this->fNext = nr;
}
//-------------------------------------------------------------------------------------
//
// RangeDescriptor::setDictionaryFlag
//
// Character Category Numbers that include characters from
// the original Unicode Set named "dictionary" have bit 14
// set to 1. The RBBI runtime engine uses this to trigger
// use of the word dictionary.
//
// This function looks through the Unicode Sets that it
// (the range) includes, and sets the bit in fNum when
// "dictionary" is among them.
//
// TODO: a faster way would be to find the set node for
// "dictionary" just once, rather than looking it
// up by name every time.
//
//-------------------------------------------------------------------------------------
void RangeDescriptor::setDictionaryFlag() {
int i;
for (i=0; i<this->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
UnicodeString setName;
RBBINode *setRef = usetNode->fParent;
if (setRef != NULL) {
RBBINode *varRef = setRef->fParent;
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
setName = varRef->fText;
}
}
if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals.
this->fNum |= 0x4000;
break;
}
}
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

130
source/common/rbbisetb.h Normal file
View file

@ -0,0 +1,130 @@
//
// rbbisetb.h
/*
**********************************************************************
* Copyright (c) 2001-2005, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#ifndef RBBISETB_H
#define RBBISETB_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "rbbirb.h"
#include "uvector.h"
struct UNewTrie;
U_NAMESPACE_BEGIN
//
// RBBISetBuilder Derives the character categories used by the runtime RBBI engine
// from the Unicode Sets appearing in the source RBBI rules, and
// creates the TRIE table used to map from Unicode to the
// character categories.
//
//
// RangeDescriptor
//
// Each of the non-overlapping character ranges gets one of these descriptors.
// All of them are strung together in a linked list, which is kept in order
// (by character)
//
class RangeDescriptor : public UMemory {
public:
UChar32 fStartChar; // Start of range, unicode 32 bit value.
UChar32 fEndChar; // End of range, unicode 32 bit value.
int32_t fNum; // runtime-mapped input value for this range.
UVector *fIncludesSets; // vector of the the original
// Unicode sets that include this range.
// (Contains ptrs to uset nodes)
RangeDescriptor *fNext; // Next RangeDescriptor in the linked list.
RangeDescriptor(UErrorCode &status);
RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
~RangeDescriptor();
void split(UChar32 where, UErrorCode &status); // Spit this range in two at "where", with
// where appearing in the second (higher) part.
void setDictionaryFlag(); // Check whether this range appears as part of
// the Unicode set named "dictionary"
private:
RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
};
//
// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules.
//
// Starting with the rules parse tree from the scanner,
//
// - Enumerate the set of UnicodeSets that are referenced
// by the RBBI rules.
// - compute a derived set of non-overlapping UnicodeSets
// that will correspond to columns in the state table for
// the RBBI execution engine.
// - construct the trie table that maps input characters
// to set numbers in the non-overlapping set of sets.
//
class RBBISetBuilder : public UMemory {
public:
RBBISetBuilder(RBBIRuleBuilder *rb);
~RBBISetBuilder();
void build();
void addValToSets(UVector *sets, uint32_t val);
void addValToSet (RBBINode *usetNode, uint32_t val);
int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the
// runtime state machine, which are the same as
// columns in the DFA state table
int32_t getTrieSize() /*const*/; // Size in bytes of the serialized Trie.
void serializeTrie(uint8_t *where); // write out the serialized Trie.
UChar32 getFirstChar(int32_t val) const;
UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
// character were encountered.
#ifdef RBBI_DEBUG
void printSets();
void printRanges();
void printRangeGroups();
#else
#define printSets()
#define printRanges()
#define printRangeGroups()
#endif
private:
void numberSets();
RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns us.
UErrorCode *fStatus;
RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors
UNewTrie *fTrie; // The mapping TRIE that is the end result of processing
uint32_t fTrieSize; // the Unicode Sets.
// Groups correspond to character categories -
// groups of ranges that are in the same original UnicodeSets.
// fGroupCount is the index of the last used group.
// fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
// State table column 0 is not used. Column 1 is for end-of-input.
// column 2 is for group 0. Funny counting.
int32_t fGroupCount;
UBool fSawBOF;
RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
};
U_NAMESPACE_END
#endif

269
source/common/rbbistbl.cpp Normal file
View file

@ -0,0 +1,269 @@
//
// file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class
//
/*
***************************************************************************
* Copyright (C) 2002-2006 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/unistr.h"
#include "unicode/uniset.h"
#include "unicode/uchar.h"
#include "unicode/parsepos.h"
#include "umutex.h"
#include "rbbirb.h"
#include "rbbinode.h"
//
// RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
// when the hash table is deleted.
//
U_CDECL_BEGIN
static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
U_NAMESPACE_QUALIFIER RBBISymbolTableEntry *px = (U_NAMESPACE_QUALIFIER RBBISymbolTableEntry *)p;
delete px;
}
U_CDECL_END
U_NAMESPACE_BEGIN
RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
:fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
{
fHashTable = NULL;
fCachedSetLookup = NULL;
fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
// uhash_open checks status
if (U_FAILURE(status)) {
return;
}
uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
}
RBBISymbolTable::~RBBISymbolTable()
{
uhash_close(fHashTable);
}
//
// RBBISymbolTable::lookup This function from the abstract symbol table inteface
// looks up a variable name and returns a UnicodeString
// containing the substitution text.
//
// The variable name does NOT include the leading $.
//
const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const
{
RBBISymbolTableEntry *el;
RBBINode *varRefNode;
RBBINode *exprNode;
RBBINode *usetNode;
const UnicodeString *retString;
RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
if (el == NULL) {
return NULL;
}
varRefNode = el->val;
exprNode = varRefNode->fLeftChild; // Root node of expression for variable
if (exprNode->fType == RBBINode::setRef) {
// The $variable refers to a single UnicodeSet
// return the ffffString, which will subsequently be interpreted as a
// stand-in character for the set by RBBISymbolTable::lookupMatcher()
usetNode = exprNode->fLeftChild;
This->fCachedSetLookup = usetNode->fInputSet;
retString = &ffffString;
}
else
{
// The variable refers to something other than just a set.
// return the original source string for the expression
retString = &exprNode->fText;
This->fCachedSetLookup = NULL;
}
return retString;
}
//
// RBBISymbolTable::lookupMatcher This function from the abstract symbol table
// interface maps a single stand-in character to a
// pointer to a Unicode Set. The Unicode Set code uses this
// mechanism to get all references to the same $variable
// name to refer to a single common Unicode Set instance.
//
// This implementation cheats a little, and does not maintain a map of stand-in chars
// to sets. Instead, it takes advantage of the fact that the UnicodeSet
// constructor will always call this function right after calling lookup(),
// and we just need to remember what set to return between these two calls.
const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
{
UnicodeSet *retVal = NULL;
RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
if (ch == 0xffff) {
retVal = fCachedSetLookup;
This->fCachedSetLookup = 0;
}
return retVal;
}
//
// RBBISymbolTable::parseReference This function from the abstract symbol table interface
// looks for a $variable name in the source text.
// It does not look it up, only scans for it.
// It is used by the UnicodeSet parser.
//
// This implementation is lifted pretty much verbatim
// from the rules based transliterator implementation.
// I didn't see an obvious way of sharing it.
//
UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text,
ParsePosition& pos, int32_t limit) const
{
int32_t start = pos.getIndex();
int32_t i = start;
UnicodeString result;
while (i < limit) {
UChar c = text.charAt(i);
if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
break;
}
++i;
}
if (i == start) { // No valid name chars
return result; // Indicate failure with empty string
}
pos.setIndex(i);
text.extractBetween(start, i, result);
return result;
}
//
// RBBISymbolTable::lookupNode Given a key (a variable name), return the
// corresponding RBBI Node. If there is no entry
// in the table for this name, return NULL.
//
RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
RBBINode *retNode = NULL;
RBBISymbolTableEntry *el;
el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
if (el != NULL) {
retNode = el->val;
}
return retNode;
}
//
// RBBISymbolTable::addEntry Add a new entry to the symbol table.
// Indicate an error if the name already exists -
// this will only occur in the case of duplicate
// variable assignments.
//
void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
RBBISymbolTableEntry *e;
/* test for buffer overflows */
if (U_FAILURE(err)) {
return;
}
e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
if (e != NULL) {
err = U_BRK_VARIABLE_REDFINITION;
return;
}
e = new RBBISymbolTableEntry;
if (e == NULL) {
err = U_MEMORY_ALLOCATION_ERROR;
return;
}
e->key = key;
e->val = val;
uhash_put( fHashTable, &e->key, e, &err);
}
RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
RBBISymbolTableEntry::~RBBISymbolTableEntry() {
// The "val" of a symbol table entry is a variable reference node.
// The l. child of the val is the rhs expression from the assignment.
// Unlike other node types, children of variable reference nodes are not
// automatically recursively deleted. We do it manually here.
delete val->fLeftChild;
val->fLeftChild = NULL;
delete val;
// Note: the key UnicodeString is destructed by virtue of being in the object by value.
}
//
// RBBISymbolTable::print Debugging function, dump out the symbol table contents.
//
#ifdef RBBI_DEBUG
void RBBISymbolTable::rbbiSymtablePrint() const {
RBBIDebugPrintf("Variable Definitions\n"
"Name Node Val String Val\n"
"----------------------------------------------------------------------\n");
int32_t pos = -1;
const UHashElement *e = NULL;
for (;;) {
e = uhash_nextElement(fHashTable, &pos);
if (e == NULL ) {
break;
}
RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
RBBI_DEBUG_printUnicodeString(s->key, 15);
RBBIDebugPrintf(" %8p ", (void *)s->val);
RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText);
RBBIDebugPrintf("\n");
}
RBBIDebugPrintf("\nParsed Variable Definitions\n");
pos = -1;
for (;;) {
e = uhash_nextElement(fHashTable, &pos);
if (e == NULL ) {
break;
}
RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
RBBI_DEBUG_printUnicodeString(s->key);
s->val->fLeftChild->printTree(TRUE);
RBBIDebugPrintf("\n");
}
}
#endif
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

1278
source/common/rbbitblb.cpp Normal file

File diff suppressed because it is too large Load diff

127
source/common/rbbitblb.h Normal file
View file

@ -0,0 +1,127 @@
//
// rbbitblb.h
//
/*
**********************************************************************
* Copyright (c) 2002-2005, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#ifndef RBBITBLB_H
#define RBBITBLB_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "rbbinode.h"
U_NAMESPACE_BEGIN
class RBBIRuleScanner;
class RBBIRuleBuilder;
//
// class RBBITableBuilder is part of the RBBI rule compiler.
// It builds the state transition table used by the RBBI runtime
// from the expression syntax tree generated by the rule scanner.
//
// This class is part of the RBBI implementation only.
// There is no user-visible public API here.
//
class RBBITableBuilder : public UMemory {
public:
RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode);
~RBBITableBuilder();
void build();
int32_t getTableSize() const; // Return the runtime size in bytes of
// the built state table
void exportTable(void *where); // fill in the runtime state table.
// Sufficient memory must exist at
// the specified location.
private:
void calcNullable(RBBINode *n);
void calcFirstPos(RBBINode *n);
void calcLastPos(RBBINode *n);
void calcFollowPos(RBBINode *n);
void calcChainedFollowPos(RBBINode *n);
void bofFixup();
void buildStateTable();
void flagAcceptingStates();
void flagLookAheadStates();
void flagTaggedStates();
void mergeRuleStatusVals();
// Set functions for UVector.
// TODO: make a USet subclass of UVector
void setAdd(UVector *dest, UVector *source);
UBool setEquals(UVector *a, UVector *b);
void sortedAdd(UVector **dest, int32_t val);
public:
#ifdef RBBI_DEBUG
void printSet(UVector *s);
void printPosSets(RBBINode *n /* = NULL*/);
void printStates();
void printRuleStatusTable();
#else
#define printSet(s)
#define printPosSets(n)
#define printStates()
#define printRuleStatusTable()
#endif
private:
RBBIRuleBuilder *fRB;
RBBINode *&fTree; // The root node of the parse tree to build a
// table for.
UErrorCode *fStatus;
UVector *fDStates; // D states (Aho's terminology)
// Index is state number
// Contents are RBBIStateDescriptor pointers.
RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class
RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class
};
//
// RBBIStateDescriptor - The DFA is constructed as a set of these descriptors,
// one for each state.
class RBBIStateDescriptor : public UMemory {
public:
UBool fMarked;
int32_t fAccepting;
int32_t fLookAhead;
UVector *fTagVals;
int32_t fTagsIdx;
UVector *fPositions; // Set of parse tree positions associated
// with this state. Unordered (it's a set).
// UVector contents are RBBINode *
UVector *fDtran; // Transitions out of this state.
// indexed by input character
// contents is int index of dest state
// in RBBITableBuilder.fDStates
RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus);
~RBBIStateDescriptor();
private:
RBBIStateDescriptor(const RBBIStateDescriptor &other); // forbid copying of this class
RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other); // forbid copying of this class
};
U_NAMESPACE_END
#endif

400
source/common/resbund.cpp Normal file
View file

@ -0,0 +1,400 @@
/*
**********************************************************************
* Copyright (C) 1997-2008, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File resbund.cpp
*
* Modification History:
*
* Date Name Description
* 02/05/97 aliu Fixed bug in chopLocale. Added scanForLocaleInFile
* based on code taken from scanForLocale. Added
* constructor which attempts to read resource bundle
* from a specific file, without searching other files.
* 02/11/97 aliu Added UErrorCode return values to constructors. Fixed
* infinite loops in scanForFile and scanForLocale.
* Modified getRawResourceData to not delete storage in
* localeData and resourceData which it doesn't own.
* Added Mac compatibility #ifdefs for tellp() and
* ios::nocreate.
* 03/04/97 aliu Modified to use ExpandingDataSink objects instead of
* the highly inefficient ostrstream objects.
* 03/13/97 aliu Rewrote to load in entire resource bundle and store
* it as a Hashtable of ResourceBundleData objects.
* Added state table to govern parsing of files.
* Modified to load locale index out of new file distinct
* from default.txt.
* 03/25/97 aliu Modified to support 2-d arrays, needed for timezone data.
* Added support for custom file suffixes. Again, needed
* to support timezone data. Improved error handling to
* detect duplicate tags and subtags.
* 04/07/97 aliu Fixed bug in getHashtableForLocale(). Fixed handling
* of failing UErrorCode values on entry to API methods.
* Fixed bugs in getArrayItem() for negative indices.
* 04/29/97 aliu Update to use new Hashtable deletion protocol.
* 05/06/97 aliu Flattened kTransitionTable for HP compiler.
* Fixed usage of CharString.
* 06/11/99 stephen Removed parsing of .txt files.
* Reworked to use new binary format.
* Cleaned up.
* 06/14/99 stephen Removed methods taking a filename suffix.
* 06/22/99 stephen Added missing T_FileStream_close in parse()
* 11/09/99 weiv Added getLocale(), rewritten constructForLocale()
* March 2000 weiv complete overhaul.
******************************************************************************
*/
#include "unicode/utypes.h"
#include "unicode/resbund.h"
#include "umutex.h"
#include "uresimp.h"
U_NAMESPACE_BEGIN
/*-----------------------------------------------------------------------------
* Implementation Notes
*
* Resource bundles are read in once, and thereafter cached.
* ResourceBundle statically keeps track of which files have been
* read, so we are guaranteed that each file is read at most once.
* Resource bundles can be loaded from different data directories and
* will be treated as distinct, even if they are for the same locale.
*
* Resource bundles are lightweight objects, which have pointers to
* one or more shared Hashtable objects containing all the data.
* Copying would be cheap, but there is no copy constructor, since
* there wasn't one in the original API.
*
* The ResourceBundle parsing mechanism is implemented as a transition
* network, for easy maintenance and modification. The network is
* implemented as a matrix (instead of in code) to make this even
* easier. The matrix contains Transition objects. Each Transition
* object describes a destination node and an action to take before
* moving to the destination node. The source node is encoded by the
* index of the object in the array that contains it. The pieces
* needed to understand the transition network are the enums for node
* IDs and actions, the parse() method, which walks through the
* network and implements the actions, and the network itself. The
* network guarantees certain conditions, for example, that a new
* resource will not be closed until one has been opened first; or
* that data will not be stored into a TaggedList until a TaggedList
* has been created. Nonetheless, the code in parse() does some
* consistency checks as it runs the network, and fails with an
* U_INTERNAL_PROGRAM_ERROR if one of these checks fails. If the input
* data has a bad format, an U_INVALID_FORMAT_ERROR is returned. If you
* see an U_INTERNAL_PROGRAM_ERROR the transition matrix has a bug in
* it.
*
* Old functionality of multiple locales in a single file is still
* supported. For this reason, LOCALE names override FILE names. If
* data for en_US is located in the en.txt file, once it is loaded,
* the code will not care where it came from (other than remembering
* which directory it came from). However, if there is an en_US
* resource in en_US.txt, that will take precedence. There is no
* limit to the number or type of resources that can be stored in a
* file, however, files are only searched in a specific way. If
* en_US_CA is requested, then first en_US_CA.txt is searched, then
* en_US.txt, then en.txt, then default.txt. So it only makes sense
* to put certain locales in certain files. In this example, it would
* be logical to put en_US_CA, en_US, and en into the en.txt file,
* since they would be found there if asked for. The extreme example
* is to place all locale resources into default.txt, which should
* also work.
*
* Inheritance is implemented. For example, xx_YY_zz inherits as
* follows: xx_YY_zz, xx_YY, xx, default. Inheritance is implemented
* as an array of hashtables. There will be from 1 to 4 hashtables in
* the array.
*
* Fallback files are implemented. The fallback pattern is Language
* Country Variant (LCV) -> LC -> L. Fallback is first done for the
* requested locale. Then it is done for the default locale, as
* returned by Locale::getDefault(). Then the special file
* default.txt is searched for the default locale. The overall FILE
* fallback path is LCV -> LC -> L -> dLCV -> dLC -> dL -> default.
*
* Note that although file name searching includes the default locale,
* once a ResourceBundle object is constructed, the inheritance path
* no longer includes the default locale. The path is LCV -> LC -> L
* -> default.
*
* File parsing is lazy. Nothing is parsed unless it is called for by
* someone. So when a ResourceBundle for xx_YY_zz is constructed,
* only that locale is parsed (along with anything else in the same
* file). Later, if the FooBar tag is asked for, and if it isn't
* found in xx_YY_zz, then xx_YY.txt will be parsed and checked, and
* so forth, until the chain is exhausted or the tag is found.
*
* Thread-safety is implemented around caches, both the cache that
* stores all the resouce data, and the cache that stores flags
* indicating whether or not a file has been visited. These caches
* delete their storage at static cleanup time, when the process
* quits.
*
* ResourceBundle supports TableCollation as a special case. This
* involves having special ResourceBundle objects which DO own their
* data, since we don't want large collation rule strings in the
* ResourceBundle cache (these are already cached in the
* TableCollation cache). TableCollation files (.ctx files) have the
* same format as normal resource data files, with a different
* interpretation, from the standpoint of ResourceBundle. .ctx files
* are loaded into otherwise ordinary ResourceBundle objects. They
* don't inherit (that's implemented by TableCollation) and they own
* their data (as mentioned above). However, they still support
* possible multiple locales in a single .ctx file. (This is in
* practice a bad idea, since you only want the one locale you're
* looking for, and only one tag will be present
* ("CollationElements"), so you don't need an inheritance chain of
* multiple locales.) Up to 4 locale resources will be loaded from a
* .ctx file; everything after the first 4 is ignored (parsed and
* deleted). (Normal .txt files have no limit.) Instead of being
* loaded into the cache, and then looked up as needed, the locale
* resources are read straight into the ResourceBundle object.
*
* The Index, which used to reside in default.txt, has been moved to a
* new file, index.txt. This file contains a slightly modified format
* with the addition of the "InstalledLocales" tag; it looks like:
*
* Index {
* InstalledLocales {
* ar
* ..
* zh_TW
* }
* }
*/
//-----------------------------------------------------------------------------
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ResourceBundle)
ResourceBundle::ResourceBundle(UErrorCode &err)
:UObject(), fLocale(NULL)
{
fResource = ures_open(0, Locale::getDefault().getName(), &err);
}
ResourceBundle::ResourceBundle(const ResourceBundle &other)
:UObject(other), fLocale(NULL)
{
UErrorCode status = U_ZERO_ERROR;
if (other.fResource) {
fResource = ures_copyResb(0, other.fResource, &status);
} else {
/* Copying a bad resource bundle */
fResource = NULL;
}
}
ResourceBundle::ResourceBundle(UResourceBundle *res, UErrorCode& err)
:UObject(), fLocale(NULL)
{
if (res) {
fResource = ures_copyResb(0, res, &err);
} else {
/* Copying a bad resource bundle */
fResource = NULL;
}
}
ResourceBundle::ResourceBundle(const char* path, const Locale& locale, UErrorCode& err)
:UObject(), fLocale(NULL)
{
fResource = ures_open(path, locale.getName(), &err);
}
ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other)
{
if(this == &other) {
return *this;
}
if(fResource != 0) {
ures_close(fResource);
fResource = NULL;
}
UErrorCode status = U_ZERO_ERROR;
if (other.fResource) {
fResource = ures_copyResb(0, other.fResource, &status);
} else {
/* Copying a bad resource bundle */
fResource = NULL;
}
return *this;
}
ResourceBundle::~ResourceBundle()
{
if(fResource != 0) {
ures_close(fResource);
}
if(fLocale != NULL) {
delete(fLocale);
}
}
ResourceBundle *
ResourceBundle::clone() const {
return new ResourceBundle(*this);
}
UnicodeString ResourceBundle::getString(UErrorCode& status) const {
int32_t len = 0;
const UChar *r = ures_getString(fResource, &len, &status);
return UnicodeString(TRUE, r, len);
}
const uint8_t *ResourceBundle::getBinary(int32_t& len, UErrorCode& status) const {
return ures_getBinary(fResource, &len, &status);
}
const int32_t *ResourceBundle::getIntVector(int32_t& len, UErrorCode& status) const {
return ures_getIntVector(fResource, &len, &status);
}
uint32_t ResourceBundle::getUInt(UErrorCode& status) const {
return ures_getUInt(fResource, &status);
}
int32_t ResourceBundle::getInt(UErrorCode& status) const {
return ures_getInt(fResource, &status);
}
const char *ResourceBundle::getName(void) const {
return ures_getName(fResource);
}
const char *ResourceBundle::getKey(void) const {
return ures_getKey(fResource);
}
UResType ResourceBundle::getType(void) const {
return ures_getType(fResource);
}
int32_t ResourceBundle::getSize(void) const {
return ures_getSize(fResource);
}
UBool ResourceBundle::hasNext(void) const {
return ures_hasNext(fResource);
}
void ResourceBundle::resetIterator(void) {
ures_resetIterator(fResource);
}
ResourceBundle ResourceBundle::getNext(UErrorCode& status) {
UResourceBundle r;
ures_initStackObject(&r);
ures_getNextResource(fResource, &r, &status);
ResourceBundle res(&r, status);
if (U_SUCCESS(status)) {
ures_close(&r);
}
return res;
}
UnicodeString ResourceBundle::getNextString(UErrorCode& status) {
int32_t len = 0;
const UChar* r = ures_getNextString(fResource, &len, 0, &status);
return UnicodeString(TRUE, r, len);
}
UnicodeString ResourceBundle::getNextString(const char ** key, UErrorCode& status) {
int32_t len = 0;
const UChar* r = ures_getNextString(fResource, &len, key, &status);
return UnicodeString(TRUE, r, len);
}
ResourceBundle ResourceBundle::get(int32_t indexR, UErrorCode& status) const {
UResourceBundle r;
ures_initStackObject(&r);
ures_getByIndex(fResource, indexR, &r, &status);
ResourceBundle res(&r, status);
if (U_SUCCESS(status)) {
ures_close(&r);
}
return res;
}
UnicodeString ResourceBundle::getStringEx(int32_t indexS, UErrorCode& status) const {
int32_t len = 0;
const UChar* r = ures_getStringByIndex(fResource, indexS, &len, &status);
return UnicodeString(TRUE, r, len);
}
ResourceBundle ResourceBundle::get(const char* key, UErrorCode& status) const {
UResourceBundle r;
ures_initStackObject(&r);
ures_getByKey(fResource, key, &r, &status);
ResourceBundle res(&r, status);
if (U_SUCCESS(status)) {
ures_close(&r);
}
return res;
}
ResourceBundle ResourceBundle::getWithFallback(const char* key, UErrorCode& status){
UResourceBundle r;
ures_initStackObject(&r);
ures_getByKeyWithFallback(fResource, key, &r, &status);
ResourceBundle res(&r, status);
if(U_SUCCESS(status)){
ures_close(&r);
}
return res;
}
UnicodeString ResourceBundle::getStringEx(const char* key, UErrorCode& status) const {
int32_t len = 0;
const UChar* r = ures_getStringByKey(fResource, key, &len, &status);
return UnicodeString(TRUE, r, len);
}
const char*
ResourceBundle::getVersionNumber() const
{
return ures_getVersionNumber(fResource);
}
void ResourceBundle::getVersion(UVersionInfo versionInfo) const {
ures_getVersion(fResource, versionInfo);
}
const Locale &ResourceBundle::getLocale(void) const
{
UBool needInit;
UMTX_CHECK(NULL, (fLocale == NULL), needInit);
if(needInit) {
UErrorCode status = U_ZERO_ERROR;
const char *localeName = ures_getLocale(fResource, &status);
Locale *tLocale = new Locale(localeName);
// Null pointer check
if (tLocale == NULL) {
return Locale::getDefault(); // Return default locale if one could not be created.
}
umtx_lock(NULL);
ResourceBundle *me = (ResourceBundle *)this; // semantically const
if (me->fLocale == NULL) {
me->fLocale = tLocale;
tLocale = NULL;
}
umtx_unlock(NULL);
delete tLocale;
}
return *fLocale;
}
const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const
{
return ures_getLocaleByType(fResource, type, &status);
}
//eof
U_NAMESPACE_END

View file

@ -0,0 +1,55 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-2006, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: resbund_cnv.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2004aug25
* created by: Markus W. Scherer
*
* Character conversion functions moved here from resbund.cpp
*/
#include "unicode/utypes.h"
#include "unicode/resbund.h"
#include "uinvchar.h"
U_NAMESPACE_BEGIN
ResourceBundle::ResourceBundle( const UnicodeString& path,
const Locale& locale,
UErrorCode& error)
:UObject(), fLocale(NULL)
{
constructForLocale(path, locale, error);
}
ResourceBundle::ResourceBundle( const UnicodeString& path,
UErrorCode& error)
:UObject(), fLocale(NULL)
{
constructForLocale(path, Locale::getDefault(), error);
}
void
ResourceBundle::constructForLocale(const UnicodeString& path,
const Locale& locale,
UErrorCode& error)
{
if (path.isEmpty()) {
fResource = ures_open(NULL, locale.getName(), &error);
}
else {
UnicodeString nullTerminatedPath(path);
nullTerminatedPath.append((UChar)0);
fResource = ures_openU(nullTerminatedPath.getBuffer(), locale.getName(), &error);
}
}
U_NAMESPACE_END

160
source/common/ruleiter.cpp Normal file
View file

@ -0,0 +1,160 @@
/*
**********************************************************************
* Copyright (c) 2003-2007, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: September 24 2003
* Since: ICU 2.8
**********************************************************************
*/
#include "ruleiter.h"
#include "unicode/parsepos.h"
#include "unicode/unistr.h"
#include "unicode/symtable.h"
#include "util.h"
/* \U87654321 or \ud800\udc00 */
#define MAX_U_NOTATION_LEN 12
U_NAMESPACE_BEGIN
RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
ParsePosition& thePos) :
text(theText),
pos(thePos),
sym(theSym),
buf(0),
bufPos(0)
{}
UBool RuleCharacterIterator::atEnd() const {
return buf == 0 && pos.getIndex() == text.length();
}
UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
if (U_FAILURE(ec)) return DONE;
UChar32 c = DONE;
isEscaped = FALSE;
for (;;) {
c = _current();
_advance(UTF_CHAR_LENGTH(c));
if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
(options & PARSE_VARIABLES) != 0 && sym != 0) {
UnicodeString name = sym->parseReference(text, pos, text.length());
// If name is empty there was an isolated SYMBOL_REF;
// return it. Caller must be prepared for this.
if (name.length() == 0) {
break;
}
bufPos = 0;
buf = sym->lookup(name);
if (buf == 0) {
ec = U_UNDEFINED_VARIABLE;
return DONE;
}
// Handle empty variable value
if (buf->length() == 0) {
buf = 0;
}
continue;
}
if ((options & SKIP_WHITESPACE) != 0 &&
uprv_isRuleWhiteSpace(c)) {
continue;
}
if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
UnicodeString tempEscape;
int32_t offset = 0;
c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
jumpahead(offset);
isEscaped = TRUE;
if (c < 0) {
ec = U_MALFORMED_UNICODE_ESCAPE;
return DONE;
}
}
break;
}
return c;
}
void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
p.buf = buf;
p.pos = pos.getIndex();
p.bufPos = bufPos;
}
void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
buf = p.buf;
pos.setIndex(p.pos);
bufPos = p.bufPos;
}
void RuleCharacterIterator::skipIgnored(int32_t options) {
if ((options & SKIP_WHITESPACE) != 0) {
for (;;) {
UChar32 a = _current();
if (!uprv_isRuleWhiteSpace(a)) break;
_advance(UTF_CHAR_LENGTH(a));
}
}
}
UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
if (maxLookAhead < 0) {
maxLookAhead = 0x7FFFFFFF;
}
if (buf != 0) {
buf->extract(bufPos, maxLookAhead, result);
} else {
text.extract(pos.getIndex(), maxLookAhead, result);
}
return result;
}
void RuleCharacterIterator::jumpahead(int32_t count) {
_advance(count);
}
/*
UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
int32_t b = pos.getIndex();
text.extract(0, b, result);
return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
}
*/
UChar32 RuleCharacterIterator::_current() const {
if (buf != 0) {
return buf->char32At(bufPos);
} else {
int i = pos.getIndex();
return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
}
}
void RuleCharacterIterator::_advance(int32_t count) {
if (buf != 0) {
bufPos += count;
if (bufPos == buf->length()) {
buf = 0;
}
} else {
pos.setIndex(pos.getIndex() + count);
if (pos.getIndex() > text.length()) {
pos.setIndex(text.length());
}
}
}
U_NAMESPACE_END
//eof

232
source/common/ruleiter.h Normal file
View file

@ -0,0 +1,232 @@
/*
**********************************************************************
* Copyright (c) 2003-2007, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: September 24 2003
* Since: ICU 2.8
**********************************************************************
*/
#ifndef _RULEITER_H_
#define _RULEITER_H_
#include "unicode/uobject.h"
U_NAMESPACE_BEGIN
class UnicodeString;
class ParsePosition;
class SymbolTable;
/**
* An iterator that returns 32-bit code points. This class is deliberately
* <em>not</em> related to any of the ICU character iterator classes
* in order to minimize complexity.
* @author Alan Liu
* @since ICU 2.8
*/
class RuleCharacterIterator : public UMemory {
// TODO: Ideas for later. (Do not implement if not needed, lest the
// code coverage numbers go down due to unused methods.)
// 1. Add a copy constructor, operator==() method.
// 2. Rather than return DONE, throw an exception if the end
// is reached -- this is an alternate usage model, probably not useful.
private:
/**
* Text being iterated.
*/
const UnicodeString& text;
/**
* Position of iterator.
*/
ParsePosition& pos;
/**
* Symbol table used to parse and dereference variables. May be 0.
*/
const SymbolTable* sym;
/**
* Current variable expansion, or 0 if none.
*/
const UnicodeString* buf;
/**
* Position within buf. Meaningless if buf == 0.
*/
int32_t bufPos;
public:
/**
* Value returned when there are no more characters to iterate.
*/
enum { DONE = -1 };
/**
* Bitmask option to enable parsing of variable names. If (options &
* PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
* its value. Variables are parsed using the SymbolTable API.
*/
enum { PARSE_VARIABLES = 1 };
/**
* Bitmask option to enable parsing of escape sequences. If (options &
* PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
* to its value. Escapes are parsed using Utility.unescapeAt().
*/
enum { PARSE_ESCAPES = 2 };
/**
* Bitmask option to enable skipping of whitespace. If (options &
* SKIP_WHITESPACE) != 0, then whitespace characters will be silently
* skipped, as if they were not present in the input. Whitespace
* characters are defined by UCharacterProperty.isRuleWhiteSpace().
*/
enum { SKIP_WHITESPACE = 4 };
/**
* Constructs an iterator over the given text, starting at the given
* position.
* @param text the text to be iterated
* @param sym the symbol table, or null if there is none. If sym is null,
* then variables will not be deferenced, even if the PARSE_VARIABLES
* option is set.
* @param pos upon input, the index of the next character to return. If a
* variable has been dereferenced, then pos will <em>not</em> increment as
* characters of the variable value are iterated.
*/
RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym,
ParsePosition& pos);
/**
* Returns true if this iterator has no more characters to return.
*/
UBool atEnd() const;
/**
* Returns the next character using the given options, or DONE if there
* are no more characters, and advance the position to the next
* character.
* @param options one or more of the following options, bitwise-OR-ed
* together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
* @param isEscaped output parameter set to TRUE if the character
* was escaped
* @param ec input-output error code. An error will only be set by
* this routing if options includes PARSE_VARIABLES and an unknown
* variable name is seen, or if options includes PARSE_ESCAPES and
* an invalid escape sequence is seen.
* @return the current 32-bit code point, or DONE
*/
UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec);
/**
* Returns true if this iterator is currently within a variable expansion.
*/
inline UBool inVariable() const;
/**
* An opaque object representing the position of a RuleCharacterIterator.
*/
struct Pos : public UMemory {
private:
const UnicodeString* buf;
int32_t pos;
int32_t bufPos;
friend class RuleCharacterIterator;
};
/**
* Sets an object which, when later passed to setPos(), will
* restore this iterator's position. Usage idiom:
*
* RuleCharacterIterator iterator = ...;
* RuleCharacterIterator::Pos pos;
* iterator.getPos(pos);
* for (;;) {
* iterator.getPos(pos);
* int c = iterator.next(...);
* ...
* }
* iterator.setPos(pos);
*
* @param p a position object to be set to this iterator's
* current position.
*/
void getPos(Pos& p) const;
/**
* Restores this iterator to the position it had when getPos()
* set the given object.
* @param p a position object previously set by getPos()
*/
void setPos(const Pos& p);
/**
* Skips ahead past any ignored characters, as indicated by the given
* options. This is useful in conjunction with the lookahead() method.
*
* Currently, this only has an effect for SKIP_WHITESPACE.
* @param options one or more of the following options, bitwise-OR-ed
* together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
*/
void skipIgnored(int32_t options);
/**
* Returns a string containing the remainder of the characters to be
* returned by this iterator, without any option processing. If the
* iterator is currently within a variable expansion, this will only
* extend to the end of the variable expansion. This method is provided
* so that iterators may interoperate with string-based APIs. The typical
* sequence of calls is to call skipIgnored(), then call lookahead(), then
* parse the string returned by lookahead(), then call jumpahead() to
* resynchronize the iterator.
* @param result a string to receive the characters to be returned
* by future calls to next()
* @param maxLookAhead The maximum to copy into the result.
* @return a reference to result
*/
UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const;
/**
* Advances the position by the given number of 16-bit code units.
* This is useful in conjunction with the lookahead() method.
* @param count the number of 16-bit code units to jump over
*/
void jumpahead(int32_t count);
/**
* Returns a string representation of this object, consisting of the
* characters being iterated, with a '|' marking the current position.
* Position within an expanded variable is <em>not</em> indicated.
* @param result output parameter to receive a string
* representation of this object
*/
// UnicodeString& toString(UnicodeString& result) const;
private:
/**
* Returns the current 32-bit code point without parsing escapes, parsing
* variables, or skipping whitespace.
* @return the current 32-bit code point
*/
UChar32 _current() const;
/**
* Advances the position by the given amount.
* @param count the number of 16-bit code units to advance past
*/
void _advance(int32_t count);
};
inline UBool RuleCharacterIterator::inVariable() const {
return buf != 0;
}
U_NAMESPACE_END
#endif // _RULEITER_H_
//eof

115
source/common/schriter.cpp Normal file
View file

@ -0,0 +1,115 @@
/*
******************************************************************************
* Copyright (C) 1998-2007, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*
* File schriter.cpp
*
* Modification History:
*
* Date Name Description
* 05/05/99 stephen Cleaned up.
******************************************************************************
*/
#include "unicode/chariter.h"
#include "unicode/schriter.h"
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringCharacterIterator)
StringCharacterIterator::StringCharacterIterator()
: UCharCharacterIterator(),
text()
{
// NEVER DEFAULT CONSTRUCT!
}
StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr)
: UCharCharacterIterator(textStr.getBuffer(), textStr.length()),
text(textStr)
{
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.getBuffer();
}
StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr,
int32_t textPos)
: UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textPos),
text(textStr)
{
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.getBuffer();
}
StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr,
int32_t textBegin,
int32_t textEnd,
int32_t textPos)
: UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textBegin, textEnd, textPos),
text(textStr)
{
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.getBuffer();
}
StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator& that)
: UCharCharacterIterator(that),
text(that.text)
{
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.getBuffer();
}
StringCharacterIterator::~StringCharacterIterator() {
}
StringCharacterIterator&
StringCharacterIterator::operator=(const StringCharacterIterator& that) {
UCharCharacterIterator::operator=(that);
text = that.text;
// we had set the input parameter's array, now we need to set our copy's array
UCharCharacterIterator::text = this->text.getBuffer();
return *this;
}
UBool
StringCharacterIterator::operator==(const ForwardCharacterIterator& that) const {
if (this == &that) {
return TRUE;
}
// do not call UCharCharacterIterator::operator==()
// because that checks for array pointer equality
// while we compare UnicodeString objects
if (getDynamicClassID() != that.getDynamicClassID()) {
return FALSE;
}
StringCharacterIterator& realThat = (StringCharacterIterator&)that;
return text == realThat.text
&& pos == realThat.pos
&& begin == realThat.begin
&& end == realThat.end;
}
CharacterIterator*
StringCharacterIterator::clone() const {
return new StringCharacterIterator(*this);
}
void
StringCharacterIterator::setText(const UnicodeString& newText) {
text = newText;
UCharCharacterIterator::setText(text.getBuffer(), text.length());
}
void
StringCharacterIterator::getText(UnicodeString& result) {
result = text;
}
U_NAMESPACE_END

981
source/common/serv.cpp Normal file
View file

@ -0,0 +1,981 @@
/**
*******************************************************************************
* Copyright (C) 2001-2008, International Business Machines Corporation. *
* All Rights Reserved. *
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_SERVICE
#include "serv.h"
#include "umutex.h"
#undef SERVICE_REFCOUNT
// in case we use the refcount stuff
U_NAMESPACE_BEGIN
/*
******************************************************************
*/
const UChar ICUServiceKey::PREFIX_DELIMITER = 0x002F; /* '/' */
ICUServiceKey::ICUServiceKey(const UnicodeString& id)
: _id(id) {
}
ICUServiceKey::~ICUServiceKey()
{
}
const UnicodeString&
ICUServiceKey::getID() const
{
return _id;
}
UnicodeString&
ICUServiceKey::canonicalID(UnicodeString& result) const
{
return result.append(_id);
}
UnicodeString&
ICUServiceKey::currentID(UnicodeString& result) const
{
return canonicalID(result);
}
UnicodeString&
ICUServiceKey::currentDescriptor(UnicodeString& result) const
{
prefix(result);
result.append(PREFIX_DELIMITER);
return currentID(result);
}
UBool
ICUServiceKey::fallback()
{
return FALSE;
}
UBool
ICUServiceKey::isFallbackOf(const UnicodeString& id) const
{
return id == _id;
}
UnicodeString&
ICUServiceKey::prefix(UnicodeString& result) const
{
return result;
}
UnicodeString&
ICUServiceKey::parsePrefix(UnicodeString& result)
{
int32_t n = result.indexOf(PREFIX_DELIMITER);
if (n < 0) {
n = 0;
}
result.remove(n);
return result;
}
UnicodeString&
ICUServiceKey::parseSuffix(UnicodeString& result)
{
int32_t n = result.indexOf(PREFIX_DELIMITER);
if (n >= 0) {
result.remove(0, n+1);
}
return result;
}
#ifdef SERVICE_DEBUG
UnicodeString&
ICUServiceKey::debug(UnicodeString& result) const
{
debugClass(result);
result.append(" id: ");
result.append(_id);
return result;
}
UnicodeString&
ICUServiceKey::debugClass(UnicodeString& result) const
{
return result.append("ICUServiceKey");
}
#endif
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUServiceKey)
/*
******************************************************************
*/
SimpleFactory::SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible)
: _instance(instanceToAdopt), _id(id), _visible(visible)
{
}
SimpleFactory::~SimpleFactory()
{
delete _instance;
}
UObject*
SimpleFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const
{
if (U_SUCCESS(status)) {
UnicodeString temp;
if (_id == key.currentID(temp)) {
return service->cloneInstance(_instance);
}
}
return NULL;
}
void
SimpleFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const
{
if (_visible) {
result.put(_id, (void*)this, status); // cast away const
} else {
result.remove(_id);
}
}
UnicodeString&
SimpleFactory::getDisplayName(const UnicodeString& id, const Locale& /* locale */, UnicodeString& result) const
{
if (_visible && _id == id) {
result = _id;
} else {
result.setToBogus();
}
return result;
}
#ifdef SERVICE_DEBUG
UnicodeString&
SimpleFactory::debug(UnicodeString& toAppendTo) const
{
debugClass(toAppendTo);
toAppendTo.append(" id: ");
toAppendTo.append(_id);
toAppendTo.append(", visible: ");
toAppendTo.append(_visible ? "T" : "F");
return toAppendTo;
}
UnicodeString&
SimpleFactory::debugClass(UnicodeString& toAppendTo) const
{
return toAppendTo.append("SimpleFactory");
}
#endif
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleFactory)
/*
******************************************************************
*/
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceListener)
/*
******************************************************************
*/
// Record the actual id for this service in the cache, so we can return it
// even if we succeed later with a different id.
class CacheEntry : public UMemory {
private:
int32_t refcount;
public:
UnicodeString actualDescriptor;
UObject* service;
/**
* Releases a reference to the shared resource.
*/
~CacheEntry() {
delete service;
}
CacheEntry(const UnicodeString& _actualDescriptor, UObject* _service)
: refcount(1), actualDescriptor(_actualDescriptor), service(_service) {
}
/**
* Instantiation creates an initial reference, so don't call this
* unless you're creating a new pointer to this. Management of
* that pointer will have to know how to deal with refcounts.
* Return true if the resource has not already been released.
*/
CacheEntry* ref() {
++refcount;
return this;
}
/**
* Destructions removes a reference, so don't call this unless
* you're removing pointer to this somewhere. Management of that
* pointer will have to know how to deal with refcounts. Once
* the refcount drops to zero, the resource is released. Return
* false if the resouce has been released.
*/
CacheEntry* unref() {
if ((--refcount) == 0) {
delete this;
return NULL;
}
return this;
}
/**
* Return TRUE if there is at least one reference to this and the
* resource has not been released.
*/
UBool isShared() const {
return refcount > 1;
}
};
// UObjectDeleter for serviceCache
U_CDECL_BEGIN
static void U_CALLCONV
cacheDeleter(void* obj) {
U_NAMESPACE_USE ((CacheEntry*)obj)->unref();
}
/**
* Deleter for UObjects
*/
static void U_CALLCONV
deleteUObject(void *obj) {
U_NAMESPACE_USE delete (UObject*) obj;
}
U_CDECL_END
/*
******************************************************************
*/
class DNCache : public UMemory {
public:
Hashtable cache;
const Locale locale;
DNCache(const Locale& _locale)
: cache(), locale(_locale)
{
// cache.setKeyDeleter(uhash_deleteUnicodeString);
}
};
/*
******************************************************************
*/
StringPair*
StringPair::create(const UnicodeString& displayName,
const UnicodeString& id,
UErrorCode& status)
{
if (U_SUCCESS(status)) {
StringPair* sp = new StringPair(displayName, id);
if (sp == NULL || sp->isBogus()) {
status = U_MEMORY_ALLOCATION_ERROR;
delete sp;
return NULL;
}
return sp;
}
return NULL;
}
UBool
StringPair::isBogus() const {
return displayName.isBogus() || id.isBogus();
}
StringPair::StringPair(const UnicodeString& _displayName,
const UnicodeString& _id)
: displayName(_displayName)
, id(_id)
{
}
U_CDECL_BEGIN
static void U_CALLCONV
userv_deleteStringPair(void *obj) {
U_NAMESPACE_USE delete (StringPair*) obj;
}
U_CDECL_END
/*
******************************************************************
*/
ICUService::ICUService()
: name()
, lock(0)
, timestamp(0)
, factories(NULL)
, serviceCache(NULL)
, idCache(NULL)
, dnCache(NULL)
{
umtx_init(&lock);
}
ICUService::ICUService(const UnicodeString& newName)
: name(newName)
, lock(0)
, timestamp(0)
, factories(NULL)
, serviceCache(NULL)
, idCache(NULL)
, dnCache(NULL)
{
umtx_init(&lock);
}
ICUService::~ICUService()
{
{
Mutex mutex(&lock);
clearCaches();
delete factories;
factories = NULL;
}
umtx_destroy(&lock);
}
UObject*
ICUService::get(const UnicodeString& descriptor, UErrorCode& status) const
{
return get(descriptor, NULL, status);
}
UObject*
ICUService::get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const
{
UObject* result = NULL;
ICUServiceKey* key = createKey(&descriptor, status);
if (key) {
result = getKey(*key, actualReturn, status);
delete key;
}
return result;
}
UObject*
ICUService::getKey(ICUServiceKey& key, UErrorCode& status) const
{
return getKey(key, NULL, status);
}
// this is a vector that subclasses of ICUService can override to further customize the result object
// before returning it. All other public get functions should call this one.
UObject*
ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const
{
return getKey(key, actualReturn, NULL, status);
}
// make it possible to call reentrantly on systems that don't have reentrant mutexes.
// we can use this simple approach since we know the situation where we're calling
// reentrantly even without knowing the thread.
class XMutex : public UMemory {
public:
inline XMutex(UMTX *mutex, UBool reentering)
: fMutex(mutex)
, fActive(!reentering)
{
if (fActive) umtx_lock(fMutex);
}
inline ~XMutex() {
if (fActive) umtx_unlock(fMutex);
}
private:
UMTX *fMutex;
UBool fActive;
};
struct UVectorDeleter {
UVector* _obj;
UVectorDeleter() : _obj(NULL) {}
~UVectorDeleter() { delete _obj; }
};
// called only by factories, treat as private
UObject*
ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const
{
if (U_FAILURE(status)) {
return NULL;
}
if (isDefault()) {
return handleDefault(key, actualReturn, status);
}
ICUService* ncthis = (ICUService*)this; // cast away semantic const
CacheEntry* result = NULL;
{
// The factory list can't be modified until we're done,
// otherwise we might update the cache with an invalid result.
// The cache has to stay in synch with the factory list.
// ICU doesn't have monitors so we can't use rw locks, so
// we single-thread everything using this service, for now.
// if factory is not null, we're calling from within the mutex,
// and since some unix machines don't have reentrant mutexes we
// need to make sure not to try to lock it again.
XMutex mutex(&ncthis->lock, factory != NULL);
if (serviceCache == NULL) {
ncthis->serviceCache = new Hashtable(status);
if (ncthis->serviceCache == NULL) {
return NULL;
}
if (U_FAILURE(status)) {
delete serviceCache;
return NULL;
}
serviceCache->setValueDeleter(cacheDeleter);
}
UnicodeString currentDescriptor;
UVectorDeleter cacheDescriptorList;
UBool putInCache = FALSE;
int32_t startIndex = 0;
int32_t limit = factories->size();
UBool cacheResult = TRUE;
if (factory != NULL) {
for (int32_t i = 0; i < limit; ++i) {
if (factory == (const ICUServiceFactory*)factories->elementAt(i)) {
startIndex = i + 1;
break;
}
}
if (startIndex == 0) {
// throw new InternalError("Factory " + factory + "not registered with service: " + this);
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
cacheResult = FALSE;
}
do {
currentDescriptor.remove();
key.currentDescriptor(currentDescriptor);
result = (CacheEntry*)serviceCache->get(currentDescriptor);
if (result != NULL) {
break;
}
// first test of cache failed, so we'll have to update
// the cache if we eventually succeed-- that is, if we're
// going to update the cache at all.
putInCache = TRUE;
int32_t index = startIndex;
while (index < limit) {
ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(index++);
UObject* service = f->create(key, this, status);
if (U_FAILURE(status)) {
delete service;
return NULL;
}
if (service != NULL) {
result = new CacheEntry(currentDescriptor, service);
if (result == NULL) {
delete service;
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
goto outerEnd;
}
}
// prepare to load the cache with all additional ids that
// will resolve to result, assuming we'll succeed. We
// don't want to keep querying on an id that's going to
// fallback to the one that succeeded, we want to hit the
// cache the first time next goaround.
if (cacheDescriptorList._obj == NULL) {
cacheDescriptorList._obj = new UVector(uhash_deleteUnicodeString, NULL, 5, status);
if (U_FAILURE(status)) {
return NULL;
}
}
UnicodeString* idToCache = new UnicodeString(currentDescriptor);
if (idToCache == NULL || idToCache->isBogus()) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
cacheDescriptorList._obj->addElement(idToCache, status);
if (U_FAILURE(status)) {
return NULL;
}
} while (key.fallback());
outerEnd:
if (result != NULL) {
if (putInCache && cacheResult) {
serviceCache->put(result->actualDescriptor, result, status);
if (U_FAILURE(status)) {
delete result;
return NULL;
}
if (cacheDescriptorList._obj != NULL) {
for (int32_t i = cacheDescriptorList._obj->size(); --i >= 0;) {
UnicodeString* desc = (UnicodeString*)cacheDescriptorList._obj->elementAt(i);
serviceCache->put(*desc, result, status);
if (U_FAILURE(status)) {
delete result;
return NULL;
}
result->ref();
cacheDescriptorList._obj->removeElementAt(i);
}
}
}
if (actualReturn != NULL) {
// strip null prefix
if (result->actualDescriptor.indexOf((UChar)0x2f) == 0) { // U+002f=slash (/)
actualReturn->remove();
actualReturn->append(result->actualDescriptor,
1,
result->actualDescriptor.length() - 1);
} else {
*actualReturn = result->actualDescriptor;
}
if (actualReturn->isBogus()) {
status = U_MEMORY_ALLOCATION_ERROR;
delete result;
return NULL;
}
}
UObject* service = cloneInstance(result->service);
if (putInCache && !cacheResult) {
delete result;
}
return service;
}
}
return handleDefault(key, actualReturn, status);
}
UObject*
ICUService::handleDefault(const ICUServiceKey& /* key */, UnicodeString* /* actualIDReturn */, UErrorCode& /* status */) const
{
return NULL;
}
UVector&
ICUService::getVisibleIDs(UVector& result, UErrorCode& status) const {
return getVisibleIDs(result, NULL, status);
}
UVector&
ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const
{
result.removeAllElements();
if (U_FAILURE(status)) {
return result;
}
ICUService * ncthis = (ICUService*)this; // cast away semantic const
{
Mutex mutex(&ncthis->lock);
const Hashtable* map = getVisibleIDMap(status);
if (map != NULL) {
ICUServiceKey* fallbackKey = createKey(matchID, status);
for (int32_t pos = -1;;) {
const UHashElement* e = map->nextElement(pos);
if (e == NULL) {
break;
}
const UnicodeString* id = (const UnicodeString*)e->key.pointer;
if (fallbackKey != NULL) {
if (!fallbackKey->isFallbackOf(*id)) {
continue;
}
}
UnicodeString* idClone = new UnicodeString(*id);
if (idClone == NULL || idClone->isBogus()) {
delete idClone;
status = U_MEMORY_ALLOCATION_ERROR;
break;
}
result.addElement(idClone, status);
if (U_FAILURE(status)) {
delete idClone;
break;
}
}
delete fallbackKey;
}
}
if (U_FAILURE(status)) {
result.removeAllElements();
}
return result;
}
const Hashtable*
ICUService::getVisibleIDMap(UErrorCode& status) const {
if (U_FAILURE(status)) return NULL;
// must only be called when lock is already held
ICUService* ncthis = (ICUService*)this; // cast away semantic const
if (idCache == NULL) {
ncthis->idCache = new Hashtable(status);
if (idCache == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
} else if (factories != NULL) {
for (int32_t pos = factories->size(); --pos >= 0;) {
ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(pos);
f->updateVisibleIDs(*idCache, status);
}
if (U_FAILURE(status)) {
delete idCache;
ncthis->idCache = NULL;
}
}
}
return idCache;
}
UnicodeString&
ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result) const
{
return getDisplayName(id, result, Locale::getDefault());
}
UnicodeString&
ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const
{
{
ICUService* ncthis = (ICUService*)this; // cast away semantic const
UErrorCode status = U_ZERO_ERROR;
Mutex mutex(&ncthis->lock);
const Hashtable* map = getVisibleIDMap(status);
if (map != NULL) {
ICUServiceFactory* f = (ICUServiceFactory*)map->get(id);
if (f != NULL) {
f->getDisplayName(id, locale, result);
return result;
}
// fallback
UErrorCode status = U_ZERO_ERROR;
ICUServiceKey* fallbackKey = createKey(&id, status);
while (fallbackKey->fallback()) {
UnicodeString us;
fallbackKey->currentID(us);
f = (ICUServiceFactory*)map->get(us);
if (f != NULL) {
f->getDisplayName(id, locale, result);
delete fallbackKey;
return result;
}
}
delete fallbackKey;
}
}
result.setToBogus();
return result;
}
UVector&
ICUService::getDisplayNames(UVector& result, UErrorCode& status) const
{
return getDisplayNames(result, Locale::getDefault(), NULL, status);
}
UVector&
ICUService::getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const
{
return getDisplayNames(result, locale, NULL, status);
}
UVector&
ICUService::getDisplayNames(UVector& result,
const Locale& locale,
const UnicodeString* matchID,
UErrorCode& status) const
{
result.removeAllElements();
result.setDeleter(userv_deleteStringPair);
if (U_SUCCESS(status)) {
ICUService* ncthis = (ICUService*)this; // cast away semantic const
Mutex mutex(&ncthis->lock);
if (dnCache != NULL && dnCache->locale != locale) {
delete dnCache;
ncthis->dnCache = NULL;
}
if (dnCache == NULL) {
const Hashtable* m = getVisibleIDMap(status);
if (m != NULL) {
ncthis->dnCache = new DNCache(locale);
if (dnCache == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return result;
}
int32_t pos = -1;
const UHashElement* entry = NULL;
while ((entry = m->nextElement(pos)) != NULL) {
const UnicodeString* id = (const UnicodeString*)entry->key.pointer;
ICUServiceFactory* f = (ICUServiceFactory*)entry->value.pointer;
UnicodeString dname;
f->getDisplayName(*id, locale, dname);
if (dname.isBogus()) {
status = U_MEMORY_ALLOCATION_ERROR;
} else {
dnCache->cache.put(dname, (void*)id, status); // share pointer with visibleIDMap
if (U_SUCCESS(status)) {
continue;
}
}
delete dnCache;
ncthis->dnCache = NULL;
return result;
}
}
}
}
ICUServiceKey* matchKey = createKey(matchID, status);
/* To ensure that all elements in the hashtable are iterated, set pos to -1.
* nextElement(pos) will skip the position at pos and begin the iteration
* at the next position, which in this case will be 0.
*/
int32_t pos = -1;
const UHashElement *entry = NULL;
while ((entry = dnCache->cache.nextElement(pos)) != NULL) {
const UnicodeString* id = (const UnicodeString*)entry->value.pointer;
if (matchKey != NULL && !matchKey->isFallbackOf(*id)) {
continue;
}
const UnicodeString* dn = (const UnicodeString*)entry->key.pointer;
StringPair* sp = StringPair::create(*id, *dn, status);
result.addElement(sp, status);
if (U_FAILURE(status)) {
result.removeAllElements();
break;
}
}
delete matchKey;
return result;
}
URegistryKey
ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status)
{
return registerInstance(objToAdopt, id, TRUE, status);
}
URegistryKey
ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status)
{
ICUServiceKey* key = createKey(&id, status);
if (key != NULL) {
UnicodeString canonicalID;
key->canonicalID(canonicalID);
delete key;
ICUServiceFactory* f = createSimpleFactory(objToAdopt, canonicalID, visible, status);
if (f != NULL) {
return registerFactory(f, status);
}
}
delete objToAdopt;
return NULL;
}
ICUServiceFactory*
ICUService::createSimpleFactory(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status)
{
if (U_SUCCESS(status)) {
if ((objToAdopt != NULL) && (!id.isBogus())) {
return new SimpleFactory(objToAdopt, id, visible);
}
status = U_ILLEGAL_ARGUMENT_ERROR;
}
return NULL;
}
URegistryKey
ICUService::registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status)
{
if (U_SUCCESS(status) && factoryToAdopt != NULL) {
Mutex mutex(&lock);
if (factories == NULL) {
factories = new UVector(deleteUObject, NULL, status);
if (U_FAILURE(status)) {
delete factories;
return NULL;
}
}
factories->insertElementAt(factoryToAdopt, 0, status);
if (U_SUCCESS(status)) {
clearCaches();
} else {
delete factoryToAdopt;
factoryToAdopt = NULL;
}
}
if (factoryToAdopt != NULL) {
notifyChanged();
}
return (URegistryKey)factoryToAdopt;
}
UBool
ICUService::unregister(URegistryKey rkey, UErrorCode& status)
{
ICUServiceFactory *factory = (ICUServiceFactory*)rkey;
UBool result = FALSE;
if (factory != NULL && factories != NULL) {
Mutex mutex(&lock);
if (factories->removeElement(factory)) {
clearCaches();
result = TRUE;
} else {
status = U_ILLEGAL_ARGUMENT_ERROR;
delete factory;
}
}
if (result) {
notifyChanged();
}
return result;
}
void
ICUService::reset()
{
{
Mutex mutex(&lock);
reInitializeFactories();
clearCaches();
}
notifyChanged();
}
void
ICUService::reInitializeFactories()
{
if (factories != NULL) {
factories->removeAllElements();
}
}
UBool
ICUService::isDefault() const
{
return countFactories() == 0;
}
ICUServiceKey*
ICUService::createKey(const UnicodeString* id, UErrorCode& status) const
{
return (U_FAILURE(status) || id == NULL) ? NULL : new ICUServiceKey(*id);
}
void
ICUService::clearCaches()
{
// callers synchronize before use
++timestamp;
delete dnCache;
dnCache = NULL;
delete idCache;
idCache = NULL;
delete serviceCache; serviceCache = NULL;
}
void
ICUService::clearServiceCache()
{
// callers synchronize before use
delete serviceCache; serviceCache = NULL;
}
UBool
ICUService::acceptsListener(const EventListener& l) const
{
return l.getDynamicClassID() == ServiceListener::getStaticClassID();
}
void
ICUService::notifyListener(EventListener& l) const
{
((ServiceListener&)l).serviceChanged(*this);
}
UnicodeString&
ICUService::getName(UnicodeString& result) const
{
return result.append(name);
}
int32_t
ICUService::countFactories() const
{
return factories == NULL ? 0 : factories->size();
}
int32_t
ICUService::getTimestamp() const
{
return timestamp;
}
U_NAMESPACE_END
/* UCONFIG_NO_SERVICE */
#endif

996
source/common/serv.h Normal file
View file

@ -0,0 +1,996 @@
/**
*******************************************************************************
* Copyright (C) 2001-2007, International Business Machines Corporation. *
* All Rights Reserved. *
*******************************************************************************
*/
#ifndef ICUSERV_H
#define ICUSERV_H
#include "unicode/utypes.h"
#if UCONFIG_NO_SERVICE
U_NAMESPACE_BEGIN
/*
* Allow the declaration of APIs with pointers to ICUService
* even when service is removed from the build.
*/
class ICUService;
U_NAMESPACE_END
#else
#include "unicode/unistr.h"
#include "unicode/locid.h"
#include "unicode/umisc.h"
#include "hash.h"
#include "uvector.h"
#include "servnotf.h"
class ICUServiceTest;
U_NAMESPACE_BEGIN
class ICUServiceKey;
class ICUServiceFactory;
class SimpleFactory;
class ServiceListener;
class ICUService;
class DNCache;
/*******************************************************************
* ICUServiceKey
*/
/**
* <p>ICUServiceKeys are used to communicate with factories to
* generate an instance of the service. ICUServiceKeys define how
* ids are canonicalized, provide both a current id and a current
* descriptor to use in querying the cache and factories, and
* determine the fallback strategy.</p>
*
* <p>ICUServiceKeys provide both a currentDescriptor and a currentID.
* The descriptor contains an optional prefix, followed by '/'
* and the currentID. Factories that handle complex keys,
* for example number format factories that generate multiple
* kinds of formatters for the same locale, use the descriptor
* to provide a fully unique identifier for the service object,
* while using the currentID (in this case, the locale string),
* as the visible IDs that can be localized.</p>
*
* <p>The default implementation of ICUServiceKey has no fallbacks and
* has no custom descriptors.</p>
*/
class U_COMMON_API ICUServiceKey : public UObject {
private:
const UnicodeString _id;
protected:
static const UChar PREFIX_DELIMITER;
public:
/**
* <p>Construct a key from an id.</p>
*
* @param id the ID from which to construct the key.
*/
ICUServiceKey(const UnicodeString& id);
/**
* <p>Virtual destructor.</p>
*/
virtual ~ICUServiceKey();
/**
* <p>Return the original ID used to construct this key.</p>
*
* @return the ID used to construct this key.
*/
virtual const UnicodeString& getID() const;
/**
* <p>Return the canonical version of the original ID. This implementation
* appends the original ID to result. Result is returned as a convenience.</p>
*
* @param result the output parameter to which the id will be appended.
* @return the modified result.
*/
virtual UnicodeString& canonicalID(UnicodeString& result) const;
/**
* <p>Return the (canonical) current ID. This implementation appends
* the canonical ID to result. Result is returned as a convenience.</p>
*
* @param result the output parameter to which the current id will be appended.
* @return the modified result.
*/
virtual UnicodeString& currentID(UnicodeString& result) const;
/**
* <p>Return the current descriptor. This implementation appends
* the current descriptor to result. Result is returned as a convenience.</p>
*
* <p>The current descriptor is used to fully
* identify an instance of the service in the cache. A
* factory may handle all descriptors for an ID, or just a
* particular descriptor. The factory can either parse the
* descriptor or use custom API on the key in order to
* instantiate the service.</p>
*
* @param result the output parameter to which the current id will be appended.
* @return the modified result.
*/
virtual UnicodeString& currentDescriptor(UnicodeString& result) const;
/**
* <p>If the key has a fallback, modify the key and return true,
* otherwise return false. The current ID will change if there
* is a fallback. No currentIDs should be repeated, and fallback
* must eventually return false. This implementation has no fallbacks
* and always returns false.</p>
*
* @return TRUE if the ICUServiceKey changed to a valid fallback value.
*/
virtual UBool fallback();
/**
* <p>Return TRUE if a key created from id matches, or would eventually
* fallback to match, the canonical ID of this ICUServiceKey.</p>
*
* @param id the id to test.
* @return TRUE if this ICUServiceKey's canonical ID is a fallback of id.
*/
virtual UBool isFallbackOf(const UnicodeString& id) const;
/**
* <p>Return the prefix. This implementation leaves result unchanged.
* Result is returned as a convenience.</p>
*
* @param result the output parameter to which the prefix will be appended.
* @return the modified result.
*/
virtual UnicodeString& prefix(UnicodeString& result) const;
/**
* <p>A utility to parse the prefix out of a descriptor string. Only
* the (undelimited) prefix, if any, remains in result. Result is returned as a
* convenience.</p>
*
* @param result an input/output parameter that on entry is a descriptor, and
* on exit is the prefix of that descriptor.
* @return the modified result.
*/
static UnicodeString& parsePrefix(UnicodeString& result);
/**
* <p>A utility to parse the suffix out of a descriptor string. Only
* the (undelimited) suffix, if any, remains in result. Result is returned as a
* convenience.</p>
*
* @param result an input/output parameter that on entry is a descriptor, and
* on exit is the suffix of that descriptor.
* @return the modified result.
*/
static UnicodeString& parseSuffix(UnicodeString& result);
public:
/**
* UObject RTTI boilerplate.
*/
static UClassID U_EXPORT2 getStaticClassID();
/**
* UObject RTTI boilerplate.
*/
virtual UClassID getDynamicClassID() const;
#ifdef SERVICE_DEBUG
public:
virtual UnicodeString& debug(UnicodeString& result) const;
virtual UnicodeString& debugClass(UnicodeString& result) const;
#endif
};
/*******************************************************************
* ICUServiceFactory
*/
/**
* <p>An implementing ICUServiceFactory generates the service objects maintained by the
* service. A factory generates a service object from a key,
* updates id->factory mappings, and returns the display name for
* a supported id.</p>
*/
class U_COMMON_API ICUServiceFactory : public UObject {
public:
/**
* <p>Create a service object from the key, if this factory
* supports the key. Otherwise, return NULL.</p>
*
* <p>If the factory supports the key, then it can call
* the service's getKey(ICUServiceKey, String[], ICUServiceFactory) method
* passing itself as the factory to get the object that
* the service would have created prior to the factory's
* registration with the service. This can change the
* key, so any information required from the key should
* be extracted before making such a callback.</p>
*
* @param key the service key.
* @param service the service with which this factory is registered.
* @param status the error code status.
* @return the service object, or NULL if the factory does not support the key.
*/
virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const = 0;
/**
* <p>Update result to reflect the IDs (not descriptors) that this
* factory publicly handles. Result contains mappings from ID to
* factory. On entry it will contain all (visible) mappings from
* previously-registered factories.</p>
*
* <p>This function, together with getDisplayName, are used to
* support ICUService::getDisplayNames. The factory determines
* which IDs (of those it supports) it will make visible, and of
* those, which it will provide localized display names for. In
* most cases it will register mappings from all IDs it supports
* to itself.</p>
*
* @param result the mapping table to update.
* @param status the error code status.
*/
virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const = 0;
/**
* <p>Return, in result, the display name of the id in the provided locale.
* This is an id, not a descriptor. If the id is
* not visible, sets result to bogus. If the
* incoming result is bogus, it remains bogus. Result is returned as a
* convenience. Results are not defined if id is not one supported by this
* factory.</p>
*
* @param id a visible id supported by this factory.
* @param locale the locale for which to generate the corresponding localized display name.
* @param result output parameter to hold the display name.
* @return result.
*/
virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const = 0;
};
/*
******************************************************************
*/
/**
* <p>A default implementation of factory. This provides default
* implementations for subclasses, and implements a singleton
* factory that matches a single ID and returns a single
* (possibly deferred-initialized) instance. This implements
* updateVisibleIDs to add a mapping from its ID to itself
* if visible is true, or to remove any existing mapping
* for its ID if visible is false. No localization of display
* names is performed.</p>
*/
class U_COMMON_API SimpleFactory : public ICUServiceFactory {
protected:
UObject* _instance;
const UnicodeString _id;
const UBool _visible;
public:
/**
* <p>Construct a SimpleFactory that maps a single ID to a single
* service instance. If visible is TRUE, the ID will be visible.
* The instance must not be NULL. The SimpleFactory will adopt
* the instance, which must not be changed subsequent to this call.</p>
*
* @param instanceToAdopt the service instance to adopt.
* @param id the ID to assign to this service instance.
* @param visible if TRUE, the ID will be visible.
*/
SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible = TRUE);
/**
* <p>Destructor.</p>
*/
virtual ~SimpleFactory();
/**
* <p>This implementation returns a clone of the service instance if the factory's ID is equal to
* the key's currentID. Service and prefix are ignored.</p>
*
* @param key the service key.
* @param service the service with which this factory is registered.
* @param status the error code status.
* @return the service object, or NULL if the factory does not support the key.
*/
virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
/**
* <p>This implementation adds a mapping from ID -> this to result if visible is TRUE,
* otherwise it removes ID from result.</p>
*
* @param result the mapping table to update.
* @param status the error code status.
*/
virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
/**
* <p>This implementation returns the factory ID if it equals id and visible is TRUE,
* otherwise it returns the empty string. (This implementation provides
* no localized id information.)</p>
*
* @param id a visible id supported by this factory.
* @param locale the locale for which to generate the corresponding localized display name.
* @param result output parameter to hold the display name.
* @return result.
*/
virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
public:
/**
* UObject RTTI boilerplate.
*/
static UClassID U_EXPORT2 getStaticClassID();
/**
* UObject RTTI boilerplate.
*/
virtual UClassID getDynamicClassID() const;
#ifdef SERVICE_DEBUG
public:
virtual UnicodeString& debug(UnicodeString& toAppendTo) const;
virtual UnicodeString& debugClass(UnicodeString& toAppendTo) const;
#endif
};
/*
******************************************************************
*/
/**
* <p>ServiceListener is the listener that ICUService provides by default.
* ICUService will notifiy this listener when factories are added to
* or removed from the service. Subclasses can provide
* different listener interfaces that extend EventListener, and modify
* acceptsListener and notifyListener as appropriate.</p>
*/
class U_COMMON_API ServiceListener : public EventListener {
public:
/**
* <p>This method is called when the service changes. At the time of the
* call this listener is registered with the service. It must
* not modify the notifier in the context of this call.</p>
*
* @param service the service that changed.
*/
virtual void serviceChanged(const ICUService& service) const = 0;
public:
/**
* UObject RTTI boilerplate.
*/
static UClassID U_EXPORT2 getStaticClassID();
/**
* UObject RTTI boilerplate.
*/
virtual UClassID getDynamicClassID() const;
};
/*
******************************************************************
*/
/**
* <p>A StringPair holds a displayName/ID pair. ICUService uses it
* as the array elements returned by getDisplayNames.
*/
class U_COMMON_API StringPair : public UMemory {
public:
/**
* <p>The display name of the pair.</p>
*/
const UnicodeString displayName;
/**
* <p>The ID of the pair.</p>
*/
const UnicodeString id;
/**
* <p>Creates a string pair from a displayName and an ID.</p>
*
* @param displayName the displayName.
* @param id the ID.
* @param status the error code status.
* @return a StringPair if the creation was successful, otherwise NULL.
*/
static StringPair* create(const UnicodeString& displayName,
const UnicodeString& id,
UErrorCode& status);
/**
* <p>Return TRUE if either string of the pair is bogus.</p>
* @return TRUE if either string of the pair is bogus.
*/
UBool isBogus() const;
private:
StringPair(const UnicodeString& displayName, const UnicodeString& id);
};
/*******************************************************************
* ICUService
*/
/**
* <p>A Service provides access to service objects that implement a
* particular service, e.g. transliterators. Users provide a String
* id (for example, a locale string) to the service, and get back an
* object for that id. Service objects can be any kind of object. A
* new service object is returned for each query. The caller is
* responsible for deleting it.</p>
*
* <p>Services 'canonicalize' the query ID and use the canonical ID to
* query for the service. The service also defines a mechanism to
* 'fallback' the ID multiple times. Clients can optionally request
* the actual ID that was matched by a query when they use an ID to
* retrieve a service object.</p>
*
* <p>Service objects are instantiated by ICUServiceFactory objects
* registered with the service. The service queries each
* ICUServiceFactory in turn, from most recently registered to
* earliest registered, until one returns a service object. If none
* responds with a service object, a fallback ID is generated, and the
* process repeats until a service object is returned or until the ID
* has no further fallbacks.</p>
*
* <p>In ICU 2.4, UObject (the base class of service instances) does
* not define a polymorphic clone function. ICUService uses clones to
* manage ownership. Thus, for now, ICUService defines an abstract
* method, cloneInstance, that clients must implement to create clones
* of the service instances. This may change in future releases of
* ICU.</p>
*
* <p>ICUServiceFactories can be dynamically registered and
* unregistered with the service. When registered, an
* ICUServiceFactory is installed at the head of the factory list, and
* so gets 'first crack' at any keys or fallback keys. When
* unregistered, it is removed from the service and can no longer be
* located through it. Service objects generated by this factory and
* held by the client are unaffected.</p>
*
* <p>If a service has variants (e.g., the different variants of
* BreakIterator) an ICUServiceFactory can use the prefix of the
* ICUServiceKey to determine the variant of a service to generate.
* If it does not support all variants, it can request
* previously-registered factories to handle the ones it does not
* support.</p>
*
* <p>ICUService uses ICUServiceKeys to query factories and perform
* fallback. The ICUServiceKey defines the canonical form of the ID,
* and implements the fallback strategy. Custom ICUServiceKeys can be
* defined that parse complex IDs into components that
* ICUServiceFactories can more easily use. The ICUServiceKey can
* cache the results of this parsing to save repeated effort.
* ICUService provides convenience APIs that take UnicodeStrings and
* generate default ICUServiceKeys for use in querying.</p>
*
* <p>ICUService provides API to get the list of IDs publicly
* supported by the service (although queries aren't restricted to
* this list). This list contains only 'simple' IDs, and not fully
* unique IDs. ICUServiceFactories are associated with each simple ID
* and the responsible factory can also return a human-readable
* localized version of the simple ID, for use in user interfaces.
* ICUService can also provide an array of the all the localized
* visible IDs and their corresponding internal IDs.</p>
*
* <p>ICUService implements ICUNotifier, so that clients can register
* to receive notification when factories are added or removed from
* the service. ICUService provides a default EventListener
* subinterface, ServiceListener, which can be registered with the
* service. When the service changes, the ServiceListener's
* serviceChanged method is called with the service as the
* argument.</p>
*
* <p>The ICUService API is both rich and generic, and it is expected
* that most implementations will statically 'wrap' ICUService to
* present a more appropriate API-- for example, to declare the type
* of the objects returned from get, to limit the factories that can
* be registered with the service, or to define their own listener
* interface with a custom callback method. They might also customize
* ICUService by overriding it, for example, to customize the
* ICUServiceKey and fallback strategy. ICULocaleService is a
* subclass of ICUService that uses Locale names as IDs and uses
* ICUServiceKeys that implement the standard resource bundle fallback
* strategy. Most clients will wish to subclass it instead of
* ICUService.</p>
*/
class U_COMMON_API ICUService : public ICUNotifier {
protected:
/**
* Name useful for debugging.
*/
const UnicodeString name;
private:
/**
* single lock used by this service.
*/
UMTX lock;
/**
* Timestamp so iterators can be fail-fast.
*/
uint32_t timestamp;
/**
* All the factories registered with this service.
*/
UVector* factories;
/**
* The service cache.
*/
Hashtable* serviceCache;
/**
* The ID cache.
*/
Hashtable* idCache;
/**
* The name cache.
*/
DNCache* dnCache;
/**
* Constructor.
*/
public:
/**
* <p>Construct a new ICUService.</p>
*/
ICUService();
/**
* <p>Construct with a name (useful for debugging).</p>
*
* @param name a name to use in debugging.
*/
ICUService(const UnicodeString& name);
/**
* <p>Destructor.</p>
*/
virtual ~ICUService();
/**
* <p>Return the name of this service. This will be the empty string if none was assigned.
* Returns result as a convenience.</p>
*
* @param result an output parameter to contain the name of this service.
* @return the name of this service.
*/
UnicodeString& getName(UnicodeString& result) const;
/**
* <p>Convenience override for get(ICUServiceKey&, UnicodeString*). This uses
* createKey to create a key for the provided descriptor.</p>
*
* @param descriptor the descriptor.
* @param status the error code status.
* @return the service instance, or NULL.
*/
UObject* get(const UnicodeString& descriptor, UErrorCode& status) const;
/**
* <p>Convenience override for get(ICUServiceKey&, UnicodeString*). This uses
* createKey to create a key from the provided descriptor.</p>
*
* @param descriptor the descriptor.
* @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
* @param status the error code status.
* @return the service instance, or NULL.
*/
UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const;
/**
* <p>Convenience override for get(ICUServiceKey&, UnicodeString*).</p>
*
* @param key the key.
* @param status the error code status.
* @return the service instance, or NULL.
*/
UObject* getKey(ICUServiceKey& key, UErrorCode& status) const;
/**
* <p>Given a key, return a service object, and, if actualReturn
* is not NULL, the descriptor with which it was found in the
* first element of actualReturn. If no service object matches
* this key, returns NULL and leaves actualReturn unchanged.</p>
*
* <p>This queries the cache using the key's descriptor, and if no
* object in the cache matches, tries the key on each
* registered factory, in order. If none generates a service
* object for the key, repeats the process with each fallback of
* the key, until either a factory returns a service object, or the key
* has no fallback. If no object is found, the result of handleDefault
* is returned.</p>
*
* <p>Subclasses can override this method to further customize the
* result before returning it.
*
* @param key the key.
* @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
* @param status the error code status.
* @return the service instance, or NULL.
*/
virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const;
/**
* <p>This version of getKey is only called by ICUServiceFactories within the scope
* of a previous getKey call, to determine what previously-registered factories would
* have returned. For details, see getKey(ICUServiceKey&, UErrorCode&). Subclasses
* should not call it directly, but call through one of the other get functions.</p>
*
* @param key the key.
* @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
* @param factory the factory making the recursive call.
* @param status the error code status.
* @return the service instance, or NULL.
*/
UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const;
/**
* <p>Convenience override for getVisibleIDs(String) that passes null
* as the fallback, thus returning all visible IDs.</p>
*
* @param result a vector to hold the returned IDs.
* @param status the error code status.
* @return the result vector.
*/
UVector& getVisibleIDs(UVector& result, UErrorCode& status) const;
/**
* <p>Return a snapshot of the visible IDs for this service. This
* list will not change as ICUServiceFactories are added or removed, but the
* supported IDs will, so there is no guarantee that all and only
* the IDs in the returned list will be visible and supported by the
* service in subsequent calls.</p>
*
* <p>The IDs are returned as pointers to UnicodeStrings. The
* caller owns the IDs. Previous contents of result are discarded before
* new elements, if any, are added.</p>
*
* <p>matchID is passed to createKey to create a key. If the key
* is not NULL, its isFallbackOf method is used to filter out IDs
* that don't match the key or have it as a fallback.</p>
*
* @param result a vector to hold the returned IDs.
* @param matchID an ID used to filter the result, or NULL if all IDs are desired.
* @param status the error code status.
* @return the result vector.
*/
UVector& getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const;
/**
* <p>Convenience override for getDisplayName(const UnicodeString&, const Locale&, UnicodeString&) that
* uses the current default locale.</p>
*
* @param id the ID for which to retrieve the localized displayName.
* @param result an output parameter to hold the display name.
* @return the modified result.
*/
UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result) const;
/**
* <p>Given a visible ID, return the display name in the requested locale.
* If there is no directly supported ID corresponding to this ID, result is
* set to bogus.</p>
*
* @param id the ID for which to retrieve the localized displayName.
* @param result an output parameter to hold the display name.
* @param locale the locale in which to localize the ID.
* @return the modified result.
*/
UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const;
/**
* <p>Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that
* uses the current default Locale as the locale and NULL for
* the matchID.</p>
*
* @param result a vector to hold the returned displayName/id StringPairs.
* @param status the error code status.
* @return the modified result vector.
*/
UVector& getDisplayNames(UVector& result, UErrorCode& status) const;
/**
* <p>Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that
* uses NULL for the matchID.</p>
*
* @param result a vector to hold the returned displayName/id StringPairs.
* @param locale the locale in which to localize the ID.
* @param status the error code status.
* @return the modified result vector.
*/
UVector& getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const;
/**
* <p>Return a snapshot of the mapping from display names to visible
* IDs for this service. This set will not change as factories
* are added or removed, but the supported IDs will, so there is
* no guarantee that all and only the IDs in the returned map will
* be visible and supported by the service in subsequent calls,
* nor is there any guarantee that the current display names match
* those in the result.</p>
*
* <p>The names are returned as pointers to StringPairs, which
* contain both the displayName and the corresponding ID. The
* caller owns the StringPairs. Previous contents of result are
* discarded before new elements, if any, are added.</p>
*
* <p>matchID is passed to createKey to create a key. If the key
* is not NULL, its isFallbackOf method is used to filter out IDs
* that don't match the key or have it as a fallback.</p>
*
* @param result a vector to hold the returned displayName/id StringPairs.
* @param locale the locale in which to localize the ID.
* @param matchID an ID used to filter the result, or NULL if all IDs are desired.
* @param status the error code status.
* @return the result vector. */
UVector& getDisplayNames(UVector& result,
const Locale& locale,
const UnicodeString* matchID,
UErrorCode& status) const;
/**
* <p>A convenience override of registerInstance(UObject*, const UnicodeString&, UBool)
* that defaults visible to TRUE.</p>
*
* @param objToAdopt the object to register and adopt.
* @param id the ID to assign to this object.
* @param status the error code status.
* @return a registry key that can be passed to unregister to unregister
* (and discard) this instance.
*/
URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status);
/**
* <p>Register a service instance with the provided ID. The ID will be
* canonicalized. The canonicalized ID will be returned by
* getVisibleIDs if visible is TRUE. The service instance will be adopted and
* must not be modified subsequent to this call.</p>
*
* <p>This issues a serviceChanged notification to registered listeners.</p>
*
* <p>This implementation wraps the object using
* createSimpleFactory, and calls registerFactory.</p>
*
* @param objToAdopt the object to register and adopt.
* @param id the ID to assign to this object.
* @param visible TRUE if getVisibleIDs is to return this ID.
* @param status the error code status.
* @return a registry key that can be passed to unregister() to unregister
* (and discard) this instance.
*/
virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status);
/**
* <p>Register an ICUServiceFactory. Returns a registry key that
* can be used to unregister the factory. The factory
* must not be modified subsequent to this call. The service owns
* all registered factories. In case of an error, the factory is
* deleted.</p>
*
* <p>This issues a serviceChanged notification to registered listeners.</p>
*
* <p>The default implementation accepts all factories.</p>
*
* @param factoryToAdopt the factory to register and adopt.
* @param status the error code status.
* @return a registry key that can be passed to unregister to unregister
* (and discard) this factory.
*/
virtual URegistryKey registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status);
/**
* <p>Unregister a factory using a registry key returned by
* registerInstance or registerFactory. After a successful call,
* the factory will be removed from the service factory list and
* deleted, and the key becomes invalid.</p>
*
* <p>This issues a serviceChanged notification to registered
* listeners.</p>
*
* @param rkey the registry key.
* @param status the error code status.
* @return TRUE if the call successfully unregistered the factory.
*/
virtual UBool unregister(URegistryKey rkey, UErrorCode& status);
/**
* </p>Reset the service to the default factories. The factory
* lock is acquired and then reInitializeFactories is called.</p>
*
* <p>This issues a serviceChanged notification to registered listeners.</p>
*/
virtual void reset(void);
/**
* <p>Return TRUE if the service is in its default state.</p>
*
* <p>The default implementation returns TRUE if there are no
* factories registered.</p>
*/
virtual UBool isDefault(void) const;
/**
* <p>Create a key from an ID. If ID is NULL, returns NULL.</p>
*
* <p>The default implementation creates an ICUServiceKey instance.
* Subclasses can override to define more useful keys appropriate
* to the factories they accept.</p>
*
* @param a pointer to the ID for which to create a default ICUServiceKey.
* @param status the error code status.
* @return the ICUServiceKey corresponding to ID, or NULL.
*/
virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const;
/**
* <p>Clone object so that caller can own the copy. In ICU2.4, UObject doesn't define
* clone, so we need an instance-aware method that knows how to do this.
* This is public so factories can call it, but should really be protected.</p>
*
* @param instance the service instance to clone.
* @return a clone of the passed-in instance, or NULL if cloning was unsuccessful.
*/
virtual UObject* cloneInstance(UObject* instance) const = 0;
/************************************************************************
* Subclassing API
*/
protected:
/**
* <p>Create a factory that wraps a single service object. Called by registerInstance.</p>
*
* <p>The default implementation returns an instance of SimpleFactory.</p>
*
* @param instanceToAdopt the service instance to adopt.
* @param id the ID to assign to this service instance.
* @param visible if TRUE, the ID will be visible.
* @param status the error code status.
* @return an instance of ICUServiceFactory that maps this instance to the provided ID.
*/
virtual ICUServiceFactory* createSimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status);
/**
* <p>Reinitialize the factory list to its default state. After this call, isDefault()
* must return TRUE.</p>
*
* <p>This issues a serviceChanged notification to registered listeners.</p>
*
* <p>The default implementation clears the factory list.
* Subclasses can override to provide other default initialization
* of the factory list. Subclasses must not call this method
* directly, since it must only be called while holding write
* access to the factory list.</p>
*/
virtual void reInitializeFactories(void);
/**
* <p>Default handler for this service if no factory in the factory list
* handled the key passed to getKey.</p>
*
* <p>The default implementation returns NULL.</p>
*
* @param key the key.
* @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL.
* @param status the error code status.
* @return the service instance, or NULL.
*/
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const;
/**
* <p>Clear caches maintained by this service.</p>
*
* <p>Subclasses can override if they implement additional caches
* that need to be cleared when the service changes. Subclasses
* should generally not call this method directly, as it must only
* be called while synchronized on the factory lock.</p>
*/
virtual void clearCaches(void);
/**
* <p>Return true if the listener is accepted.</p>
*
* <p>The default implementation accepts the listener if it is
* a ServiceListener. Subclasses can override this to accept
* different listeners.</p>
*
* @param l the listener to test.
* @return TRUE if the service accepts the listener.
*/
virtual UBool acceptsListener(const EventListener& l) const;
/**
* <p>Notify the listener of a service change.</p>
*
* <p>The default implementation assumes a ServiceListener.
* If acceptsListener has been overridden to accept different
* listeners, this should be overridden as well.</p>
*
* @param l the listener to notify.
*/
virtual void notifyListener(EventListener& l) const;
/************************************************************************
* Utilities for subclasses.
*/
/**
* <p>Clear only the service cache.</p>
*
* <p>This can be called by subclasses when a change affects the service
* cache but not the ID caches, e.g., when the default locale changes
* the resolution of IDs also changes, requiring the cache to be
* flushed, but not the visible IDs themselves.</p>
*/
void clearServiceCache(void);
/**
* <p>Return a map from visible IDs to factories.
* This must only be called when the mutex is held.</p>
*
* @param status the error code status.
* @return a Hashtable containing mappings from visible
* IDs to factories.
*/
const Hashtable* getVisibleIDMap(UErrorCode& status) const;
/**
* <p>Allow subclasses to read the time stamp.</p>
*
* @return the timestamp.
*/
int32_t getTimestamp(void) const;
/**
* <p>Return the number of registered factories.</p>
*
* @return the number of factories registered at the time of the call.
*/
int32_t countFactories(void) const;
private:
friend class ::ICUServiceTest; // give tests access to countFactories.
};
U_NAMESPACE_END
/* UCONFIG_NO_SERVICE */
#endif
/* ICUSERV_H */
#endif

187
source/common/servlk.cpp Normal file
View file

@ -0,0 +1,187 @@
/**
*******************************************************************************
* Copyright (C) 2001-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_SERVICE
#include "unicode/resbund.h"
#include "uresimp.h"
#include "cmemory.h"
#include "servloc.h"
#include "ustrfmt.h"
#include "uhash.h"
#include "charstr.h"
#include "ucln_cmn.h"
#include "uassert.h"
#define UNDERSCORE_CHAR ((UChar)0x005f)
#define AT_SIGN_CHAR ((UChar)64)
#define PERIOD_CHAR ((UChar)46)
U_NAMESPACE_BEGIN
LocaleKey*
LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID,
const UnicodeString* canonicalFallbackID,
UErrorCode& status)
{
return LocaleKey::createWithCanonicalFallback(primaryID, canonicalFallbackID, KIND_ANY, status);
}
LocaleKey*
LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID,
const UnicodeString* canonicalFallbackID,
int32_t kind,
UErrorCode& status)
{
if (primaryID == NULL || U_FAILURE(status)) {
return NULL;
}
UnicodeString canonicalPrimaryID;
LocaleUtility::canonicalLocaleString(primaryID, canonicalPrimaryID);
return new LocaleKey(*primaryID, canonicalPrimaryID, canonicalFallbackID, kind);
}
LocaleKey::LocaleKey(const UnicodeString& primaryID,
const UnicodeString& canonicalPrimaryID,
const UnicodeString* canonicalFallbackID,
int32_t kind)
: ICUServiceKey(primaryID)
, _kind(kind)
, _primaryID(canonicalPrimaryID)
, _fallbackID()
, _currentID()
{
_fallbackID.setToBogus();
if (_primaryID.length() != 0) {
if (canonicalFallbackID != NULL && _primaryID != *canonicalFallbackID) {
_fallbackID = *canonicalFallbackID;
}
}
_currentID = _primaryID;
}
LocaleKey::~LocaleKey() {}
UnicodeString&
LocaleKey::prefix(UnicodeString& result) const {
if (_kind != KIND_ANY) {
UChar buffer[64];
uprv_itou(buffer, 64, _kind, 10, 0);
UnicodeString temp(buffer);
result.append(temp);
}
return result;
}
int32_t
LocaleKey::kind() const {
return _kind;
}
UnicodeString&
LocaleKey::canonicalID(UnicodeString& result) const {
return result.append(_primaryID);
}
UnicodeString&
LocaleKey::currentID(UnicodeString& result) const {
if (!_currentID.isBogus()) {
result.append(_currentID);
}
return result;
}
UnicodeString&
LocaleKey::currentDescriptor(UnicodeString& result) const {
if (!_currentID.isBogus()) {
prefix(result).append(PREFIX_DELIMITER).append(_currentID);
} else {
result.setToBogus();
}
return result;
}
Locale&
LocaleKey::canonicalLocale(Locale& result) const {
return LocaleUtility::initLocaleFromName(_primaryID, result);
}
Locale&
LocaleKey::currentLocale(Locale& result) const {
return LocaleUtility::initLocaleFromName(_currentID, result);
}
UBool
LocaleKey::fallback() {
if (!_currentID.isBogus()) {
int x = _currentID.lastIndexOf(UNDERSCORE_CHAR);
if (x != -1) {
_currentID.remove(x); // truncate current or fallback, whichever we're pointing to
return TRUE;
}
if (!_fallbackID.isBogus()) {
_currentID = _fallbackID;
_fallbackID.setToBogus();
return TRUE;
}
if (_currentID.length() > 0) {
_currentID.remove(0); // completely truncate
return TRUE;
}
_currentID.setToBogus();
}
return FALSE;
}
UBool
LocaleKey::isFallbackOf(const UnicodeString& id) const {
UnicodeString temp(id);
parseSuffix(temp);
return temp.indexOf(_primaryID) == 0 &&
(temp.length() == _primaryID.length() ||
temp.charAt(_primaryID.length()) == UNDERSCORE_CHAR);
}
#ifdef SERVICE_DEBUG
UnicodeString&
LocaleKey::debug(UnicodeString& result) const
{
ICUServiceKey::debug(result);
result.append(" kind: ");
result.append(_kind);
result.append(" primaryID: ");
result.append(_primaryID);
result.append(" fallbackID: ");
result.append(_fallbackID);
result.append(" currentID: ");
result.append(_currentID);
return result;
}
UnicodeString&
LocaleKey::debugClass(UnicodeString& result) const
{
return result.append("LocaleKey ");
}
#endif
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKey)
U_NAMESPACE_END
/* !UCONFIG_NO_SERVICE */
#endif

151
source/common/servlkf.cpp Normal file
View file

@ -0,0 +1,151 @@
/**
*******************************************************************************
* Copyright (C) 2001-2005, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_SERVICE
#include "unicode/resbund.h"
#include "uresimp.h"
#include "cmemory.h"
#include "servloc.h"
#include "ustrfmt.h"
#include "uhash.h"
#include "charstr.h"
#include "ucln_cmn.h"
#include "uassert.h"
#define UNDERSCORE_CHAR ((UChar)0x005f)
#define AT_SIGN_CHAR ((UChar)64)
#define PERIOD_CHAR ((UChar)46)
U_NAMESPACE_BEGIN
LocaleKeyFactory::LocaleKeyFactory(int32_t coverage)
: _name()
, _coverage(coverage)
{
}
LocaleKeyFactory::LocaleKeyFactory(int32_t coverage, const UnicodeString& name)
: _name(name)
, _coverage(coverage)
{
}
LocaleKeyFactory::~LocaleKeyFactory() {
}
UObject*
LocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const {
if (handlesKey(key, status)) {
const LocaleKey& lkey = (const LocaleKey&)key;
int32_t kind = lkey.kind();
Locale loc;
lkey.currentLocale(loc);
return handleCreate(loc, kind, service, status);
}
return NULL;
}
UBool
LocaleKeyFactory::handlesKey(const ICUServiceKey& key, UErrorCode& status) const {
const Hashtable* supported = getSupportedIDs(status);
if (supported) {
UnicodeString id;
key.currentID(id);
return supported->get(id) != NULL;
}
return FALSE;
}
void
LocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const {
const Hashtable* supported = getSupportedIDs(status);
if (supported) {
UBool visible = (_coverage & 0x1) == 0;
const UHashElement* elem = NULL;
int32_t pos = 0;
while ((elem = supported->nextElement(pos)) != NULL) {
const UnicodeString& id = *((const UnicodeString*)elem->key.pointer);
if (!visible) {
result.remove(id);
} else {
result.put(id, (void*)this, status); // this is dummy non-void marker used for set semantics
if (U_FAILURE(status)) {
break;
}
}
}
}
}
UnicodeString&
LocaleKeyFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const {
if ((_coverage & 0x1) == 0) {
//UErrorCode status = U_ZERO_ERROR;
// assume if this is called on us, we support some fallback of this id
// if (isSupportedID(id, status)) {
Locale loc;
LocaleUtility::initLocaleFromName(id, loc);
return loc.getDisplayName(locale, result);
// }
}
result.setToBogus();
return result;
}
UObject*
LocaleKeyFactory::handleCreate(const Locale& /* loc */,
int32_t /* kind */,
const ICUService* /* service */,
UErrorCode& /* status */) const {
return NULL;
}
//UBool
//LocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& status) const {
// const Hashtable* ids = getSupportedIDs(status);
// return ids && ids->get(id);
//}
const Hashtable*
LocaleKeyFactory::getSupportedIDs(UErrorCode& /* status */) const {
return NULL;
}
#ifdef SERVICE_DEBUG
UnicodeString&
LocaleKeyFactory::debug(UnicodeString& result) const
{
debugClass(result);
result.append(", name: ");
result.append(_name);
result.append(", coverage: ");
result.append(_coverage);
return result;
}
UnicodeString&
LocaleKeyFactory::debugClass(UnicodeString& result) const
{
return result.append("LocaleKeyFactory");
}
#endif
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKeyFactory)
U_NAMESPACE_END
/* !UCONFIG_NO_SERVICE */
#endif

550
source/common/servloc.h Normal file
View file

@ -0,0 +1,550 @@
/**
*******************************************************************************
* Copyright (C) 2001-2005, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
#ifndef ICULSERV_H
#define ICULSERV_H
#include "unicode/utypes.h"
#if UCONFIG_NO_SERVICE
U_NAMESPACE_BEGIN
/*
* Allow the declaration of APIs with pointers to ICUService
* even when service is removed from the build.
*/
class ICULocaleService;
U_NAMESPACE_END
#else
#include "unicode/unistr.h"
#include "unicode/locid.h"
#include "unicode/strenum.h"
#include "hash.h"
#include "uvector.h"
#include "serv.h"
#include "locutil.h"
U_NAMESPACE_BEGIN
class ICULocaleService;
class LocaleKey;
class LocaleKeyFactory;
class SimpleLocaleKeyFactory;
class ServiceListener;
/*
******************************************************************
*/
/**
* A subclass of Key that implements a locale fallback mechanism.
* The first locale to search for is the locale provided by the
* client, and the fallback locale to search for is the current
* default locale. If a prefix is present, the currentDescriptor
* includes it before the locale proper, separated by "/". This
* is the default key instantiated by ICULocaleService.</p>
*
* <p>Canonicalization adjusts the locale string so that the
* section before the first understore is in lower case, and the rest
* is in upper case, with no trailing underscores.</p>
*/
class U_COMMON_API LocaleKey : public ICUServiceKey {
private:
int32_t _kind;
UnicodeString _primaryID;
UnicodeString _fallbackID;
UnicodeString _currentID;
public:
enum {
KIND_ANY = -1
};
/**
* Create a LocaleKey with canonical primary and fallback IDs.
*/
static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID,
const UnicodeString* canonicalFallbackID,
UErrorCode& status);
/**
* Create a LocaleKey with canonical primary and fallback IDs.
*/
static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID,
const UnicodeString* canonicalFallbackID,
int32_t kind,
UErrorCode& status);
protected:
/**
* PrimaryID is the user's requested locale string,
* canonicalPrimaryID is this string in canonical form,
* fallbackID is the current default locale's string in
* canonical form.
*/
LocaleKey(const UnicodeString& primaryID,
const UnicodeString& canonicalPrimaryID,
const UnicodeString* canonicalFallbackID,
int32_t kind);
public:
/**
* Append the prefix associated with the kind, or nothing if the kind is KIND_ANY.
*/
virtual UnicodeString& prefix(UnicodeString& result) const;
/**
* Return the kind code associated with this key.
*/
virtual int32_t kind() const;
/**
* Return the canonicalID.
*/
virtual UnicodeString& canonicalID(UnicodeString& result) const;
/**
* Return the currentID.
*/
virtual UnicodeString& currentID(UnicodeString& result) const;
/**
* Return the (canonical) current descriptor, or null if no current id.
*/
virtual UnicodeString& currentDescriptor(UnicodeString& result) const;
/**
* Convenience method to return the locale corresponding to the (canonical) original ID.
*/
virtual Locale& canonicalLocale(Locale& result) const;
/**
* Convenience method to return the locale corresponding to the (canonical) current ID.
*/
virtual Locale& currentLocale(Locale& result) const;
/**
* If the key has a fallback, modify the key and return true,
* otherwise return false.</p>
*
* <p>First falls back through the primary ID, then through
* the fallbackID. The final fallback is the empty string,
* unless the primary id was the empty string, in which case
* there is no fallback.
*/
virtual UBool fallback();
/**
* Return true if a key created from id matches, or would eventually
* fallback to match, the canonical ID of this key.
*/
virtual UBool isFallbackOf(const UnicodeString& id) const;
public:
/**
* UObject boilerplate.
*/
static UClassID U_EXPORT2 getStaticClassID();
virtual UClassID getDynamicClassID() const;
/**
* Destructor.
*/
virtual ~LocaleKey();
#ifdef SERVICE_DEBUG
public:
virtual UnicodeString& debug(UnicodeString& result) const;
virtual UnicodeString& debugClass(UnicodeString& result) const;
#endif
};
/*
******************************************************************
*/
/**
* A subclass of ICUServiceFactory that uses LocaleKeys, and is able to
* 'cover' more specific locales with more general locales that it
* supports.
*
* <p>Coverage may be either of the values VISIBLE or INVISIBLE.
*
* <p>'Visible' indicates that the specific locale(s) supported by
* the factory are registered in getSupportedIDs, 'Invisible'
* indicates that they are not.
*
* <p>Localization of visible ids is handled
* by the handling factory, regardless of kind.
*/
class U_COMMON_API LocaleKeyFactory : public ICUServiceFactory {
protected:
const UnicodeString _name;
const int32_t _coverage;
public:
enum {
/**
* Coverage value indicating that the factory makes
* its locales visible, and does not cover more specific
* locales.
*/
VISIBLE = 0,
/**
* Coverage value indicating that the factory does not make
* its locales visible, and does not cover more specific
* locales.
*/
INVISIBLE = 1
};
/**
* Destructor.
*/
virtual ~LocaleKeyFactory();
protected:
/**
* Constructor used by subclasses.
*/
LocaleKeyFactory(int32_t coverage);
/**
* Constructor used by subclasses.
*/
LocaleKeyFactory(int32_t coverage, const UnicodeString& name);
/**
* Implement superclass abstract method. This checks the currentID of
* the key against the supported IDs, and passes the canonicalLocale and
* kind off to handleCreate (which subclasses must implement).
*/
public:
virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
protected:
virtual UBool handlesKey(const ICUServiceKey& key, UErrorCode& status) const;
public:
/**
* Override of superclass method. This adjusts the result based
* on the coverage rule for this factory.
*/
virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
/**
* Return a localized name for the locale represented by id.
*/
virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
protected:
/**
* Utility method used by create(ICUServiceKey, ICUService). Subclasses can implement
* this instead of create. The default returns NULL.
*/
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const;
/**
* Return true if this id is one the factory supports (visible or
* otherwise).
*/
// virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const;
/**
* Return the set of ids that this factory supports (visible or
* otherwise). This can be called often and might need to be
* cached if it is expensive to create.
*/
virtual const Hashtable* getSupportedIDs(UErrorCode& status) const;
public:
/**
* UObject boilerplate.
*/
static UClassID U_EXPORT2 getStaticClassID();
virtual UClassID getDynamicClassID() const;
#ifdef SERVICE_DEBUG
public:
virtual UnicodeString& debug(UnicodeString& result) const;
virtual UnicodeString& debugClass(UnicodeString& result) const;
#endif
};
/*
******************************************************************
*/
/**
* A LocaleKeyFactory that just returns a single object for a kind/locale.
*/
class U_COMMON_API SimpleLocaleKeyFactory : public LocaleKeyFactory {
private:
UObject* _obj;
UnicodeString _id;
const int32_t _kind;
public:
SimpleLocaleKeyFactory(UObject* objToAdopt,
const UnicodeString& locale,
int32_t kind,
int32_t coverage);
SimpleLocaleKeyFactory(UObject* objToAdopt,
const Locale& locale,
int32_t kind,
int32_t coverage);
/**
* Destructor.
*/
virtual ~SimpleLocaleKeyFactory();
/**
* Override of superclass method. Returns the service object if kind/locale match. Service is not used.
*/
virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
/**
* Override of superclass method. This adjusts the result based
* on the coverage rule for this factory.
*/
virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
protected:
/**
* Return true if this id is equal to the locale name.
*/
//virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const;
public:
/**
* UObject boilerplate.
*/
static UClassID U_EXPORT2 getStaticClassID();
virtual UClassID getDynamicClassID() const;
#ifdef SERVICE_DEBUG
public:
virtual UnicodeString& debug(UnicodeString& result) const;
virtual UnicodeString& debugClass(UnicodeString& result) const;
#endif
};
/*
******************************************************************
*/
/**
* A LocaleKeyFactory that creates a service based on the ICU locale data.
* This is a base class for most ICU factories. Subclasses instantiate it
* with a constructor that takes a bundle name, which determines the supported
* IDs. Subclasses then override handleCreate to create the actual service
* object. The default implementation returns a resource bundle.
*/
class U_COMMON_API ICUResourceBundleFactory : public LocaleKeyFactory
{
protected:
UnicodeString _bundleName;
public:
/**
* Convenience constructor that uses the main ICU bundle name.
*/
ICUResourceBundleFactory();
/**
* A service factory based on ICU resource data in resources with
* the given name. This should be a 'path' that can be passed to
* ures_openAvailableLocales, such as U_ICUDATA or U_ICUDATA_COLL.
* The empty string is equivalent to U_ICUDATA.
*/
ICUResourceBundleFactory(const UnicodeString& bundleName);
/**
* Destructor
*/
virtual ~ICUResourceBundleFactory();
protected:
/**
* Return the supported IDs. This is the set of all locale names in ICULocaleData.
*/
virtual const Hashtable* getSupportedIDs(UErrorCode& status) const;
/**
* Create the service. The default implementation returns the resource bundle
* for the locale, ignoring kind, and service.
*/
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const;
public:
/**
* UObject boilerplate.
*/
static UClassID U_EXPORT2 getStaticClassID();
virtual UClassID getDynamicClassID() const;
#ifdef SERVICE_DEBUG
public:
virtual UnicodeString& debug(UnicodeString& result) const;
virtual UnicodeString& debugClass(UnicodeString& result) const;
#endif
};
/*
******************************************************************
*/
class U_COMMON_API ICULocaleService : public ICUService
{
private:
Locale fallbackLocale;
UnicodeString fallbackLocaleName;
UMTX llock;
public:
/**
* Construct an ICULocaleService.
*/
ICULocaleService();
/**
* Construct an ICULocaleService with a name (useful for debugging).
*/
ICULocaleService(const UnicodeString& name);
/**
* Destructor.
*/
virtual ~ICULocaleService();
#if 0
// redeclare because of overload resolution rules?
// no, causes ambiguities since both UnicodeString and Locale have constructors that take a const char*
// need some compiler flag to remove warnings
UObject* get(const UnicodeString& descriptor, UErrorCode& status) const {
return ICUService::get(descriptor, status);
}
UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const {
return ICUService::get(descriptor, actualReturn, status);
}
#endif
/**
* Convenience override for callers using locales. This calls
* get(Locale, int, Locale[]) with KIND_ANY for kind and null for
* actualReturn.
*/
UObject* get(const Locale& locale, UErrorCode& status) const;
/**
* Convenience override for callers using locales. This calls
* get(Locale, int, Locale[]) with a null actualReturn.
*/
UObject* get(const Locale& locale, int32_t kind, UErrorCode& status) const;
/**
* Convenience override for callers using locales. This calls
* get(Locale, String, Locale[]) with a null kind.
*/
UObject* get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const;
/**
* Convenience override for callers using locales. This uses
* createKey(Locale.toString(), kind) to create a key, calls getKey, and then
* if actualReturn is not null, returns the actualResult from
* getKey (stripping any prefix) into a Locale.
*/
UObject* get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const;
/**
* Convenience override for callers using locales. This calls
* registerObject(Object, Locale, int32_t kind, int coverage)
* passing KIND_ANY for the kind, and VISIBLE for the coverage.
*/
virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status);
/**
* Convenience function for callers using locales. This calls
* registerObject(Object, Locale, int kind, int coverage)
* passing VISIBLE for the coverage.
*/
virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status);
/**
* Convenience function for callers using locales. This instantiates
* a SimpleLocaleKeyFactory, and registers the factory.
*/
virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status);
/**
* (Stop compiler from complaining about hidden overrides.)
* Since both UnicodeString and Locale have constructors that take const char*, adding a public
* method that takes UnicodeString causes ambiguity at call sites that use const char*.
* We really need a flag that is understood by all compilers that will suppress the warning about
* hidden overrides.
*/
virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status);
/**
* Convenience method for callers using locales. This returns the standard
* service ID enumeration.
*/
virtual StringEnumeration* getAvailableLocales(void) const;
protected:
/**
* Return the name of the current fallback locale. If it has changed since this was
* last accessed, the service cache is cleared.
*/
const UnicodeString& validateFallbackLocale() const;
/**
* Override superclass createKey method.
*/
virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const;
/**
* Additional createKey that takes a kind.
*/
virtual ICUServiceKey* createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const;
friend class ServiceEnumeration;
};
U_NAMESPACE_END
/* UCONFIG_NO_SERVICE */
#endif
/* ICULSERV_H */
#endif

297
source/common/servls.cpp Normal file
View file

@ -0,0 +1,297 @@
/**
*******************************************************************************
* Copyright (C) 2001-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_SERVICE
#include "unicode/resbund.h"
#include "uresimp.h"
#include "cmemory.h"
#include "servloc.h"
#include "ustrfmt.h"
#include "uhash.h"
#include "charstr.h"
#include "ucln_cmn.h"
#include "uassert.h"
#define UNDERSCORE_CHAR ((UChar)0x005f)
#define AT_SIGN_CHAR ((UChar)64)
#define PERIOD_CHAR ((UChar)46)
U_NAMESPACE_BEGIN
ICULocaleService::ICULocaleService()
: fallbackLocale(Locale::getDefault())
, llock(0)
{
umtx_init(&llock);
}
ICULocaleService::ICULocaleService(const UnicodeString& dname)
: ICUService(dname)
, fallbackLocale(Locale::getDefault())
, llock(0)
{
umtx_init(&llock);
}
ICULocaleService::~ICULocaleService()
{
umtx_destroy(&llock);
}
UObject*
ICULocaleService::get(const Locale& locale, UErrorCode& status) const
{
return get(locale, LocaleKey::KIND_ANY, NULL, status);
}
UObject*
ICULocaleService::get(const Locale& locale, int32_t kind, UErrorCode& status) const
{
return get(locale, kind, NULL, status);
}
UObject*
ICULocaleService::get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const
{
return get(locale, LocaleKey::KIND_ANY, actualReturn, status);
}
UObject*
ICULocaleService::get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const
{
UObject* result = NULL;
if (U_FAILURE(status)) {
return result;
}
UnicodeString locName(locale.getName(), -1, US_INV);
if (locName.isBogus()) {
status = U_MEMORY_ALLOCATION_ERROR;
} else {
ICUServiceKey* key = createKey(&locName, kind, status);
if (key) {
if (actualReturn == NULL) {
result = getKey(*key, status);
} else {
UnicodeString temp;
result = getKey(*key, &temp, status);
if (result != NULL) {
key->parseSuffix(temp);
LocaleUtility::initLocaleFromName(temp, *actualReturn);
}
}
delete key;
}
}
return result;
}
URegistryKey
ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale,
UBool visible, UErrorCode& status)
{
Locale loc;
LocaleUtility::initLocaleFromName(locale, loc);
return registerInstance(objToAdopt, loc, LocaleKey::KIND_ANY,
visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE, status);
}
URegistryKey
ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status)
{
return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status);
}
URegistryKey
ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status)
{
return registerInstance(objToAdopt, locale, kind, LocaleKeyFactory::VISIBLE, status);
}
URegistryKey
ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status)
{
ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage);
if (factory != NULL) {
return registerFactory(factory, status);
}
delete objToAdopt;
return NULL;
}
#if 0
URegistryKey
ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UErrorCode& status)
{
return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status);
}
URegistryKey
ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status)
{
return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY,
visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE,
status);
}
URegistryKey
ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, int32_t kind, int32_t coverage, UErrorCode& status)
{
ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage);
if (factory != NULL) {
return registerFactory(factory, status);
}
delete objToAdopt;
return NULL;
}
#endif
class ServiceEnumeration : public StringEnumeration {
private:
const ICULocaleService* _service;
int32_t _timestamp;
UVector _ids;
int32_t _pos;
private:
ServiceEnumeration(const ICULocaleService* service, UErrorCode &status)
: _service(service)
, _timestamp(service->getTimestamp())
, _ids(uhash_deleteUnicodeString, NULL, status)
, _pos(0)
{
_service->getVisibleIDs(_ids, status);
}
ServiceEnumeration(const ServiceEnumeration &other, UErrorCode &status)
: _service(other._service)
, _timestamp(other._timestamp)
, _ids(uhash_deleteUnicodeString, NULL, status)
, _pos(0)
{
if(U_SUCCESS(status)) {
int32_t i, length;
length = other._ids.size();
for(i = 0; i < length; ++i) {
_ids.addElement(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
}
if(U_SUCCESS(status)) {
_pos = other._pos;
}
}
}
public:
static ServiceEnumeration* create(const ICULocaleService* service) {
UErrorCode status = U_ZERO_ERROR;
ServiceEnumeration* result = new ServiceEnumeration(service, status);
if (U_SUCCESS(status)) {
return result;
}
delete result;
return NULL;
}
virtual ~ServiceEnumeration() {}
virtual StringEnumeration *clone() const {
UErrorCode status = U_ZERO_ERROR;
ServiceEnumeration *cl = new ServiceEnumeration(*this, status);
if(U_FAILURE(status)) {
delete cl;
cl = NULL;
}
return cl;
}
UBool upToDate(UErrorCode& status) const {
if (U_SUCCESS(status)) {
if (_timestamp == _service->getTimestamp()) {
return TRUE;
}
status = U_ENUM_OUT_OF_SYNC_ERROR;
}
return FALSE;
}
virtual int32_t count(UErrorCode& status) const {
return upToDate(status) ? _ids.size() : 0;
}
virtual const UnicodeString* snext(UErrorCode& status) {
if (upToDate(status) && (_pos < _ids.size())) {
return (const UnicodeString*)_ids[_pos++];
}
return NULL;
}
virtual void reset(UErrorCode& status) {
if (status == U_ENUM_OUT_OF_SYNC_ERROR) {
status = U_ZERO_ERROR;
}
if (U_SUCCESS(status)) {
_timestamp = _service->getTimestamp();
_pos = 0;
_service->getVisibleIDs(_ids, status);
}
}
public:
static UClassID U_EXPORT2 getStaticClassID(void);
virtual UClassID getDynamicClassID(void) const;
};
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceEnumeration)
StringEnumeration*
ICULocaleService::getAvailableLocales(void) const
{
return ServiceEnumeration::create(this);
}
const UnicodeString&
ICULocaleService::validateFallbackLocale() const
{
const Locale& loc = Locale::getDefault();
ICULocaleService* ncThis = (ICULocaleService*)this;
{
Mutex mutex(&ncThis->llock);
if (loc != fallbackLocale) {
ncThis->fallbackLocale = loc;
LocaleUtility::initNameFromLocale(loc, ncThis->fallbackLocaleName);
ncThis->clearServiceCache();
}
}
return fallbackLocaleName;
}
ICUServiceKey*
ICULocaleService::createKey(const UnicodeString* id, UErrorCode& status) const
{
return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), status);
}
ICUServiceKey*
ICULocaleService::createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const
{
return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), kind, status);
}
U_NAMESPACE_END
/* !UCONFIG_NO_SERVICE */
#endif

118
source/common/servnotf.cpp Normal file
View file

@ -0,0 +1,118 @@
/**
*******************************************************************************
* Copyright (C) 2001-2006, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_SERVICE
#include "servnotf.h"
#ifdef NOTIFIER_DEBUG
#include <stdio.h>
#endif
U_NAMESPACE_BEGIN
EventListener::~EventListener() {}
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EventListener)
ICUNotifier::ICUNotifier(void)
: notifyLock(0), listeners(NULL)
{
umtx_init(&notifyLock);
}
ICUNotifier::~ICUNotifier(void) {
{
Mutex lmx(&notifyLock);
delete listeners;
listeners = NULL;
}
umtx_destroy(&notifyLock);
}
void
ICUNotifier::addListener(const EventListener* l, UErrorCode& status)
{
if (U_SUCCESS(status)) {
if (l == NULL) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (acceptsListener(*l)) {
Mutex lmx(&notifyLock);
if (listeners == NULL) {
listeners = new UVector(5, status);
} else {
for (int i = 0, e = listeners->size(); i < e; ++i) {
const EventListener* el = (const EventListener*)(listeners->elementAt(i));
if (l == el) {
return;
}
}
}
listeners->addElement((void*)l, status); // cast away const
}
#ifdef NOTIFIER_DEBUG
else {
fprintf(stderr, "Listener invalid for this notifier.");
exit(1);
}
#endif
}
}
void
ICUNotifier::removeListener(const EventListener *l, UErrorCode& status)
{
if (U_SUCCESS(status)) {
if (l == NULL) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
{
Mutex lmx(&notifyLock);
if (listeners != NULL) {
// identity equality check
for (int i = 0, e = listeners->size(); i < e; ++i) {
const EventListener* el = (const EventListener*)listeners->elementAt(i);
if (l == el) {
listeners->removeElementAt(i);
if (listeners->size() == 0) {
delete listeners;
listeners = NULL;
}
return;
}
}
}
}
}
}
void
ICUNotifier::notifyChanged(void)
{
if (listeners != NULL) {
Mutex lmx(&notifyLock);
if (listeners != NULL) {
for (int i = 0, e = listeners->size(); i < e; ++i) {
EventListener* el = (EventListener*)listeners->elementAt(i);
notifyListener(*el);
}
}
}
}
U_NAMESPACE_END
/* UCONFIG_NO_SERVICE */
#endif

124
source/common/servnotf.h Normal file
View file

@ -0,0 +1,124 @@
/**
*******************************************************************************
* Copyright (C) 2001-2004, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
#ifndef ICUNOTIF_H
#define ICUNOTIF_H
#include "unicode/utypes.h"
#if UCONFIG_NO_SERVICE
U_NAMESPACE_BEGIN
/*
* Allow the declaration of APIs with pointers to BreakIterator
* even when break iteration is removed from the build.
*/
class ICUNotifier;
U_NAMESPACE_END
#else
#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "mutex.h"
#include "uvector.h"
U_NAMESPACE_BEGIN
class U_COMMON_API EventListener : public UObject {
public:
virtual ~EventListener();
public:
static UClassID U_EXPORT2 getStaticClassID();
virtual UClassID getDynamicClassID() const;
public:
#ifdef SERVICE_DEBUG
virtual UnicodeString& debug(UnicodeString& result) const {
return debugClass(result);
}
virtual UnicodeString& debugClass(UnicodeString& result) const {
return result.append("Key");
}
#endif
};
/**
* <p>Abstract implementation of a notification facility. Clients add
* EventListeners with addListener and remove them with removeListener.
* Notifiers call notifyChanged when they wish to notify listeners.
* This queues the listener list on the notification thread, which
* eventually dequeues the list and calls notifyListener on each
* listener in the list.</p>
*
* <p>Subclasses override acceptsListener and notifyListener
* to add type-safe notification. AcceptsListener should return
* true if the listener is of the appropriate type; ICUNotifier
* itself will ensure the listener is non-null and that the
* identical listener is not already registered with the Notifier.
* NotifyListener should cast the listener to the appropriate
* type and call the appropriate method on the listener.
*/
class U_COMMON_API ICUNotifier : public UMemory {
private: UMTX notifyLock;
private: UVector* listeners;
public:
ICUNotifier(void);
virtual ~ICUNotifier(void);
/**
* Add a listener to be notified when notifyChanged is called.
* The listener must not be null. AcceptsListener must return
* true for the listener. Attempts to concurrently
* register the identical listener more than once will be
* silently ignored.
*/
virtual void addListener(const EventListener* l, UErrorCode& status);
/**
* Stop notifying this listener. The listener must
* not be null. Attemps to remove a listener that is
* not registered will be silently ignored.
*/
virtual void removeListener(const EventListener* l, UErrorCode& status);
/**
* ICU doesn't spawn its own threads. All listeners are notified in
* the thread of the caller. Misbehaved listeners can therefore
* indefinitely block the calling thread. Callers should beware of
* deadlock situations.
*/
virtual void notifyChanged(void);
protected:
/**
* Subclasses implement this to return TRUE if the listener is
* of the appropriate type.
*/
virtual UBool acceptsListener(const EventListener& l) const = 0;
/**
* Subclasses implement this to notify the listener.
*/
virtual void notifyListener(EventListener& l) const = 0;
};
U_NAMESPACE_END
/* UCONFIG_NO_SERVICE */
#endif
/* ICUNOTIF_H */
#endif

94
source/common/servrbf.cpp Normal file
View file

@ -0,0 +1,94 @@
/**
*******************************************************************************
* Copyright (C) 2001-2005, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_SERVICE
#include "unicode/resbund.h"
#include "uresimp.h"
#include "cmemory.h"
#include "servloc.h"
#include "ustrfmt.h"
#include "uhash.h"
#include "charstr.h"
#include "ucln_cmn.h"
#include "uassert.h"
#define UNDERSCORE_CHAR ((UChar)0x005f)
#define AT_SIGN_CHAR ((UChar)64)
#define PERIOD_CHAR ((UChar)46)
U_NAMESPACE_BEGIN
ICUResourceBundleFactory::ICUResourceBundleFactory()
: LocaleKeyFactory(VISIBLE)
, _bundleName()
{
}
ICUResourceBundleFactory::ICUResourceBundleFactory(const UnicodeString& bundleName)
: LocaleKeyFactory(VISIBLE)
, _bundleName(bundleName)
{
}
ICUResourceBundleFactory::~ICUResourceBundleFactory() {}
const Hashtable*
ICUResourceBundleFactory::getSupportedIDs(UErrorCode& status) const
{
if (U_SUCCESS(status)) {
return LocaleUtility::getAvailableLocaleNames(_bundleName);
}
return NULL;
}
UObject*
ICUResourceBundleFactory::handleCreate(const Locale& loc, int32_t /* kind */, const ICUService* /* service */, UErrorCode& status) const
{
if (U_SUCCESS(status)) {
// _bundleName is a package name
// and should only contain invariant characters
// ??? is it always true that the max length of the bundle name is 19?
// who made this change? -- dlf
char pkg[20];
int32_t length;
length=_bundleName.extract(0, INT32_MAX, pkg, (int32_t)sizeof(pkg), US_INV);
if(length>=(int32_t)sizeof(pkg)) {
return NULL;
}
return new ResourceBundle(pkg, loc, status);
}
return NULL;
}
#ifdef SERVICE_DEBUG
UnicodeString&
ICUResourceBundleFactory::debug(UnicodeString& result) const
{
LocaleKeyFactory::debug(result);
result.append(", bundle: ");
return result.append(_bundleName);
}
UnicodeString&
ICUResourceBundleFactory::debugClass(UnicodeString& result) const
{
return result.append("ICUResourceBundleFactory");
}
#endif
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUResourceBundleFactory)
U_NAMESPACE_END
/* !UCONFIG_NO_SERVICE */
#endif

122
source/common/servslkf.cpp Normal file
View file

@ -0,0 +1,122 @@
/**
*******************************************************************************
* Copyright (C) 2001-2005, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_SERVICE
#include "unicode/resbund.h"
#include "uresimp.h"
#include "cmemory.h"
#include "servloc.h"
#include "ustrfmt.h"
#include "uhash.h"
#include "charstr.h"
#include "ucln_cmn.h"
#include "uassert.h"
#define UNDERSCORE_CHAR ((UChar)0x005f)
#define AT_SIGN_CHAR ((UChar)64)
#define PERIOD_CHAR ((UChar)46)
U_NAMESPACE_BEGIN
/*
******************************************************************
*/
SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt,
const UnicodeString& locale,
int32_t kind,
int32_t coverage)
: LocaleKeyFactory(coverage)
, _obj(objToAdopt)
, _id(locale)
, _kind(kind)
{
}
SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt,
const Locale& locale,
int32_t kind,
int32_t coverage)
: LocaleKeyFactory(coverage)
, _obj(objToAdopt)
, _id()
, _kind(kind)
{
LocaleUtility::initNameFromLocale(locale, _id);
}
SimpleLocaleKeyFactory::~SimpleLocaleKeyFactory()
{
delete _obj;
_obj = NULL;
}
UObject*
SimpleLocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const
{
if (U_SUCCESS(status)) {
const LocaleKey& lkey = (const LocaleKey&)key;
if (_kind == LocaleKey::KIND_ANY || _kind == lkey.kind()) {
UnicodeString keyID;
lkey.currentID(keyID);
if (_id == keyID) {
return service->cloneInstance(_obj);
}
}
}
return NULL;
}
//UBool
//SimpleLocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& /* status */) const
//{
// return id == _id;
//}
void
SimpleLocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const
{
if (U_SUCCESS(status)) {
if (_coverage & 0x1) {
result.remove(_id);
} else {
result.put(_id, (void*)this, status);
}
}
}
#ifdef SERVICE_DEBUG
UnicodeString&
SimpleLocaleKeyFactory::debug(UnicodeString& result) const
{
LocaleKeyFactory::debug(result);
result.append(", id: ");
result.append(_id);
result.append(", kind: ");
result.append(_kind);
return result;
}
UnicodeString&
SimpleLocaleKeyFactory::debugClass(UnicodeString& result) const
{
return result.append("SimpleLocaleKeyFactory");
}
#endif
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleLocaleKeyFactory)
U_NAMESPACE_END
/* !UCONFIG_NO_SERVICE */
#endif

129
source/common/sprpimpl.h Normal file
View file

@ -0,0 +1,129 @@
/*
*******************************************************************************
*
* Copyright (C) 2003-2006, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: sprpimpl.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003feb1
* created by: Ram Viswanadha
*/
#ifndef SPRPIMPL_H
#define SPRPIMPL_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
#include "unicode/ustring.h"
#include "unicode/parseerr.h"
#include "unicode/usprep.h"
#include "unicode/udata.h"
#include "utrie.h"
#include "udataswp.h"
#include "ubidi_props.h"
#define _SPREP_DATA_TYPE "spp"
enum UStringPrepType{
USPREP_UNASSIGNED = 0x0000 ,
USPREP_MAP = 0x0001 ,
USPREP_PROHIBITED = 0x0002 ,
USPREP_DELETE = 0x0003 ,
USPREP_TYPE_LIMIT = 0x0004
};
typedef enum UStringPrepType UStringPrepType;
#ifdef USPREP_TYPE_NAMES_ARRAY
static const char* usprepTypeNames[] ={
"UNASSIGNED" ,
"MAP" ,
"PROHIBITED" ,
"DELETE",
"TYPE_LIMIT"
};
#endif
enum{
_SPREP_NORMALIZATION_ON = 0x0001,
_SPREP_CHECK_BIDI_ON = 0x0002
};
enum{
_SPREP_TYPE_THRESHOLD = 0xFFF0,
_SPREP_MAX_INDEX_VALUE = 0x3FBF, /*16139*/
_SPREP_MAX_INDEX_TOP_LENGTH = 0x0003
};
/* indexes[] value names */
enum {
_SPREP_INDEX_TRIE_SIZE = 0, /* number of bytes in StringPrep trie */
_SPREP_INDEX_MAPPING_DATA_SIZE = 1, /* The array that contains the mapping */
_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION = 2, /* The index of Unicode version of last entry in NormalizationCorrections.txt */
_SPREP_ONE_UCHAR_MAPPING_INDEX_START = 3, /* The starting index of 1 UChar mapping index in the mapping data array */
_SPREP_TWO_UCHARS_MAPPING_INDEX_START = 4, /* The starting index of 2 UChars mapping index in the mapping data array */
_SPREP_THREE_UCHARS_MAPPING_INDEX_START = 5, /* The starting index of 3 UChars mapping index in the mapping data array */
_SPREP_FOUR_UCHARS_MAPPING_INDEX_START = 6, /* The starting index of 4 UChars mapping index in the mapping data array */
_SPREP_OPTIONS = 7, /* Bit set of options to turn on in the profile */
_SPREP_INDEX_TOP=16 /* changing this requires a new formatVersion */
};
typedef struct UStringPrepKey UStringPrepKey;
struct UStringPrepKey{
char* name;
char* path;
};
struct UStringPrepProfile{
int32_t indexes[_SPREP_INDEX_TOP];
UTrie sprepTrie;
const uint16_t* mappingData;
UDataMemory* sprepData;
const UBiDiProps *bdp; /* used only if checkBiDi is set */
int32_t refCount;
UBool isDataLoaded;
UBool doNFKC;
UBool checkBiDi;
};
/**
* Helper function for populating the UParseError struct
* @internal
*/
U_CAPI void U_EXPORT2
uprv_syntaxError(const UChar* rules,
int32_t pos,
int32_t rulesLen,
UParseError* parseError);
/**
* Swap StringPrep .spp profile data. See udataswp.h.
* @internal
*/
U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
#endif /* #if !UCONFIG_NO_IDNA */
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

View file

@ -0,0 +1,54 @@
// Copyright (C) 2009, International Business Machines
// Corporation and others. All Rights Reserved.
//
// Copyright 2004 and onwards Google Inc.
//
// Author: wilsonh@google.com (Wilson Hsieh)
//
#include "unicode/utypes.h"
#include "unicode/stringpiece.h"
#include "cstring.h"
U_NAMESPACE_BEGIN
StringPiece::StringPiece(const char* str)
: ptr_(str), length_((str == NULL) ? 0 : static_cast<int32_t>(uprv_strlen(str))) { }
StringPiece::StringPiece(const StringPiece& x, int32_t pos) {
if (pos < 0) {
pos = 0;
} else if (pos > x.length_) {
pos = x.length_;
}
ptr_ = x.ptr_ + pos;
length_ = x.length_ - pos;
}
StringPiece::StringPiece(const StringPiece& x, int32_t pos, int32_t len) {
if (pos < 0) {
pos = 0;
} else if (pos > x.length_) {
pos = x.length_;
}
if (len < 0) {
len = 0;
} else if (len > x.length_ - pos) {
len = x.length_ - pos;
}
ptr_ = x.ptr_ + pos;
length_ = len;
}
/* Microsft Visual Studios <= 8.0 complains about redefinition of this
* static const class variable. However, the C++ standard states that this
* definition is correct. Perhaps there is a bug in the Microsoft compiler.
* This is not an issue on any other compilers (that we know of) including
* Visual Studios 9.0.
* Cygwin with MSVC 9.0 also complains here about redefinition.
*/
#if (!defined(_MSC_VER) || (_MSC_VER >= 1500)) && !defined(CYGWINMSVC)
const int32_t StringPiece::npos;
#endif
U_NAMESPACE_END

1408
source/common/triedict.cpp Normal file

File diff suppressed because it is too large Load diff

346
source/common/triedict.h Normal file
View file

@ -0,0 +1,346 @@
/**
*******************************************************************************
* Copyright (C) 2006, International Business Machines Corporation and others. *
* All Rights Reserved. *
*******************************************************************************
*/
#ifndef TRIEDICT_H
#define TRIEDICT_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/utext.h"
struct UEnumeration;
struct UDataSwapper;
struct UDataMemory;
/**
* <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
*
* @param ds Pointer to UDataSwapper containing global data about the
* transformation and function pointers for handling primitive
* types.
* @param inData Pointer to the input data to be transformed or examined.
* @param length Length of the data, counting bytes. May be -1 for preflighting.
* If length>=0, then transform the data.
* If length==-1, then only determine the length of the data.
* The length cannot be determined from the data itself for all
* types of data (e.g., not for simple arrays of integers).
* @param outData Pointer to the output data buffer.
* If length>=0 (transformation), then the output buffer must
* have a capacity of at least length.
* If length==-1, then outData will not be used and can be NULL.
* @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
* fulfill U_SUCCESS on input.
* @return The actual length of the data.
*
* @see UDataSwapper
*/
U_CAPI int32_t U_EXPORT2
triedict_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
U_NAMESPACE_BEGIN
class StringEnumeration;
struct CompactTrieHeader;
/*******************************************************************
* TrieWordDictionary
*/
/**
* <p>TrieWordDictionary is an abstract class that represents a word
* dictionary based on a trie. The base protocol is read-only.
* Subclasses may allow writing.</p>
*/
class U_COMMON_API TrieWordDictionary : public UMemory {
public:
/**
* <p>Default constructor.</p>
*
*/
TrieWordDictionary();
/**
* <p>Virtual destructor.</p>
*/
virtual ~TrieWordDictionary();
/**
* <p>Find dictionary words that match the text.</p>
*
* @param text A UText representing the text. The
* iterator is left after the longest prefix match in the dictionary.
* @param start The current position in text.
* @param maxLength The maximum number of code units to match.
* @param lengths An array that is filled with the lengths of words that matched.
* @param count Filled with the number of elements output in lengths.
* @param limit The size of the lengths array; this limits the number of words output.
* @return The number of characters in text that were matched.
*/
virtual int32_t matches( UText *text,
int32_t maxLength,
int32_t *lengths,
int &count,
int limit ) const = 0;
/**
* <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
*
* @param status A status code recording the success of the call.
* @return A StringEnumeration that will iterate through the whole dictionary.
* The caller is responsible for closing it. The order is unspecified.
*/
virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
};
/*******************************************************************
* MutableTrieDictionary
*/
/**
* <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
* added.</p>
*/
struct TernaryNode; // Forwards declaration
class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
private:
/**
* The root node of the trie
* @internal
*/
TernaryNode *fTrie;
/**
* A UText for internal use
* @internal
*/
UText *fIter;
friend class CompactTrieDictionary; // For fast conversion
public:
/**
* <p>Constructor.</p>
*
* @param median A UChar around which to balance the trie. Ideally, it should
* begin at least one word that is near the median of the set in the dictionary
* @param status A status code recording the success of the call.
*/
MutableTrieDictionary( UChar median, UErrorCode &status );
/**
* <p>Virtual destructor.</p>
*/
virtual ~MutableTrieDictionary();
/**
* <p>Find dictionary words that match the text.</p>
*
* @param text A UText representing the text. The
* iterator is left after the longest prefix match in the dictionary.
* @param maxLength The maximum number of code units to match.
* @param lengths An array that is filled with the lengths of words that matched.
* @param count Filled with the number of elements output in lengths.
* @param limit The size of the lengths array; this limits the number of words output.
* @return The number of characters in text that were matched.
*/
virtual int32_t matches( UText *text,
int32_t maxLength,
int32_t *lengths,
int &count,
int limit ) const;
/**
* <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
*
* @param status A status code recording the success of the call.
* @return A StringEnumeration that will iterate through the whole dictionary.
* The caller is responsible for closing it. The order is unspecified.
*/
virtual StringEnumeration *openWords( UErrorCode &status ) const;
/**
* <p>Add one word to the dictionary.</p>
*
* @param word A UChar buffer containing the word.
* @param length The length of the word.
* @param status The resultant status
*/
virtual void addWord( const UChar *word,
int32_t length,
UErrorCode &status);
#if 0
/**
* <p>Add all strings from a UEnumeration to the dictionary.</p>
*
* @param words A UEnumeration that will return the desired words.
* @param status The resultant status
*/
virtual void addWords( UEnumeration *words, UErrorCode &status );
#endif
protected:
/**
* <p>Search the dictionary for matches.</p>
*
* @param text A UText representing the text. The
* iterator is left after the longest prefix match in the dictionary.
* @param maxLength The maximum number of code units to match.
* @param lengths An array that is filled with the lengths of words that matched.
* @param count Filled with the number of elements output in lengths.
* @param limit The size of the lengths array; this limits the number of words output.
* @param parent The parent of the current node
* @param pMatched The returned parent node matched the input
* @return The number of characters in text that were matched.
*/
virtual int32_t search( UText *text,
int32_t maxLength,
int32_t *lengths,
int &count,
int limit,
TernaryNode *&parent,
UBool &pMatched ) const;
private:
/**
* <p>Private constructor. The root node it not allocated.</p>
*
* @param status A status code recording the success of the call.
*/
MutableTrieDictionary( UErrorCode &status );
};
/*******************************************************************
* CompactTrieDictionary
*/
/**
* <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
* to save space.</p>
*/
class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
private:
/**
* The root node of the trie
*/
const CompactTrieHeader *fData;
/**
* A UBool indicating whether or not we own the fData.
*/
UBool fOwnData;
UDataMemory *fUData;
public:
/**
* <p>Construct a dictionary from a UDataMemory.</p>
*
* @param data A pointer to a UDataMemory, which is adopted
* @param status A status code giving the result of the constructor
*/
CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
/**
* <p>Construct a dictionary from raw saved data.</p>
*
* @param data A pointer to the raw data, which is still owned by the caller
* @param status A status code giving the result of the constructor
*/
CompactTrieDictionary(const void *dataObj, UErrorCode &status);
/**
* <p>Construct a dictionary from a MutableTrieDictionary.</p>
*
* @param dict The dictionary to use as input.
* @param status A status code recording the success of the call.
*/
CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status );
/**
* <p>Virtual destructor.</p>
*/
virtual ~CompactTrieDictionary();
/**
* <p>Find dictionary words that match the text.</p>
*
* @param text A UText representing the text. The
* iterator is left after the longest prefix match in the dictionary.
* @param maxLength The maximum number of code units to match.
* @param lengths An array that is filled with the lengths of words that matched.
* @param count Filled with the number of elements output in lengths.
* @param limit The size of the lengths array; this limits the number of words output.
* @return The number of characters in text that were matched.
*/
virtual int32_t matches( UText *text,
int32_t rangeEnd,
int32_t *lengths,
int &count,
int limit ) const;
/**
* <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
*
* @param status A status code recording the success of the call.
* @return A StringEnumeration that will iterate through the whole dictionary.
* The caller is responsible for closing it. The order is unspecified.
*/
virtual StringEnumeration *openWords( UErrorCode &status ) const;
/**
* <p>Return the size of the compact data.</p>
*
* @return The size of the dictionary's compact data.
*/
virtual uint32_t dataSize() const;
/**
* <p>Return a void * pointer to the compact data, platform-endian.</p>
*
* @return The data for the compact dictionary, suitable for passing to the
* constructor.
*/
virtual const void *data() const;
/**
* <p>Return a MutableTrieDictionary clone of this dictionary.</p>
*
* @param status A status code recording the success of the call.
* @return A MutableTrieDictionary with the same data as this dictionary
*/
virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const;
private:
/**
* <p>Convert a MutableTrieDictionary into a compact data blob.</p>
*
* @param dict The dictionary to convert.
* @param status A status code recording the success of the call.
* @return A single data blob starting with a CompactTrieHeader.
*/
static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict,
UErrorCode &status );
};
U_NAMESPACE_END
/* TRIEDICT_H */
#endif

236
source/common/uarrsort.c Normal file
View file

@ -0,0 +1,236 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uarrsort.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003aug04
* created by: Markus W. Scherer
*
* Internal function for sorting arrays.
*/
#include "unicode/utypes.h"
#include "cmemory.h"
#include "uarrsort.h"
enum {
MIN_QSORT=9, /* from Knuth */
STACK_ITEM_SIZE=200
};
/* UComparator convenience implementations ---------------------------------- */
U_CAPI int32_t U_EXPORT2
uprv_uint16Comparator(const void *context, const void *left, const void *right) {
return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right;
}
U_CAPI int32_t U_EXPORT2
uprv_int32Comparator(const void *context, const void *left, const void *right) {
return *(const int32_t *)left - *(const int32_t *)right;
}
U_CAPI int32_t U_EXPORT2
uprv_uint32Comparator(const void *context, const void *left, const void *right) {
uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right;
/* compare directly because (l-r) would overflow the int32_t result */
if(l<r) {
return -1;
} else if(l==r) {
return 0;
} else /* l>r */ {
return 1;
}
}
/* Straight insertion sort from Knuth vol. III, pg. 81 ---------------------- */
static void
doInsertionSort(char *array, int32_t start, int32_t limit, int32_t itemSize,
UComparator *cmp, const void *context, void *pv) {
int32_t i, j;
for(j=start+1; j<limit; ++j) {
/* v=array[j] */
uprv_memcpy(pv, array+j*itemSize, itemSize);
for(i=j; i>start; --i) {
if(/* v>=array[i-1] */ cmp(context, pv, array+(i-1)*itemSize)>=0) {
break;
}
/* array[i]=array[i-1]; */
uprv_memcpy(array+i*itemSize, array+(i-1)*itemSize, itemSize);
}
if(i!=j) {
/* array[i]=v; */
uprv_memcpy(array+i*itemSize, pv, itemSize);
}
}
}
static void
insertionSort(char *array, int32_t length, int32_t itemSize,
UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
UAlignedMemory v[STACK_ITEM_SIZE/sizeof(UAlignedMemory)+1];
void *pv;
/* allocate an intermediate item variable (v) */
if(itemSize<=STACK_ITEM_SIZE) {
pv=v;
} else {
pv=uprv_malloc(itemSize);
if(pv==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
}
doInsertionSort(array, 0, length, itemSize, cmp, context, pv);
if(pv!=v) {
uprv_free(pv);
}
}
/* QuickSort ---------------------------------------------------------------- */
/*
* This implementation is semi-recursive:
* It recurses for the smaller sub-array to shorten the recursion depth,
* and loops for the larger sub-array.
*
* Loosely after QuickSort algorithms in
* Niklaus Wirth
* Algorithmen und Datenstrukturen mit Modula-2
* B.G. Teubner Stuttgart
* 4. Auflage 1986
* ISBN 3-519-02260-5
*/
static void
subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize,
UComparator *cmp, const void *context,
void *px, void *pw) {
int32_t left, right;
/* start and left are inclusive, limit and right are exclusive */
do {
if((start+MIN_QSORT)>=limit) {
doInsertionSort(array, start, limit, itemSize, cmp, context, px);
break;
}
left=start;
right=limit;
/* x=array[middle] */
uprv_memcpy(px, array+((start+limit)/2)*itemSize, itemSize);
do {
while(/* array[left]<x */
cmp(context, array+left*itemSize, px)<0
) {
++left;
}
while(/* x<array[right-1] */
cmp(context, px, array+(right-1)*itemSize)<0
) {
--right;
}
/* swap array[left] and array[right-1] via w; ++left; --right */
if(left<right) {
--right;
if(left<right) {
uprv_memcpy(pw, array+left*itemSize, itemSize);
uprv_memcpy(array+left*itemSize, array+right*itemSize, itemSize);
uprv_memcpy(array+right*itemSize, pw, itemSize);
}
++left;
}
} while(left<right);
/* sort sub-arrays */
if((right-start)<(limit-left)) {
/* sort [start..right[ */
if(start<(right-1)) {
subQuickSort(array, start, right, itemSize, cmp, context, px, pw);
}
/* sort [left..limit[ */
start=left;
} else {
/* sort [left..limit[ */
if(left<(limit-1)) {
subQuickSort(array, left, limit, itemSize, cmp, context, px, pw);
}
/* sort [start..right[ */
limit=right;
}
} while(start<(limit-1));
}
static void
quickSort(char *array, int32_t length, int32_t itemSize,
UComparator *cmp, const void *context, UErrorCode *pErrorCode) {
UAlignedMemory xw[(2*STACK_ITEM_SIZE)/sizeof(UAlignedMemory)+1];
void *p;
/* allocate two intermediate item variables (x and w) */
if(itemSize<=STACK_ITEM_SIZE) {
p=xw;
} else {
p=uprv_malloc(2*itemSize);
if(p==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
}
subQuickSort(array, 0, length, itemSize,
cmp, context, p, (char *)p+itemSize);
if(p!=xw) {
uprv_free(p);
}
}
/* uprv_sortArray() API ----------------------------------------------------- */
/*
* Check arguments, select an appropriate implementation,
* cast the array to char * so that array+i*itemSize works.
*/
U_CAPI void U_EXPORT2
uprv_sortArray(void *array, int32_t length, int32_t itemSize,
UComparator *cmp, const void *context,
UBool sortStable, UErrorCode *pErrorCode) {
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if(length<=1) {
return;
} else if(length<MIN_QSORT || sortStable) {
insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode);
/* could add heapSort or similar for stable sorting of longer arrays */
} else {
quickSort((char *)array, length, itemSize, cmp, context, pErrorCode);
}
}

Some files were not shown because too many files have changed in this diff Show more