commit 5a8dc8f3b3f3e923f6cc54543b84f6953bdac05a
Author: Markus Scherer
Date: Mon May 23 21:13:20 2011 +0000
ICU-8581 ICU4J 4.8 maintenance branch copied from trunk at r30135
X-SVN-Rev: 30137
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000000..774b8534b56
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,454 @@
+* text=auto !eol
+
+*.c text !eol
+*.cc text !eol
+*.classpath text !eol
+*.cpp text !eol
+*.css text !eol
+*.dsp text !eol
+*.dsw text !eol
+*.filters text !eol
+*.h text !eol
+*.htm text !eol
+*.html text !eol
+*.in text !eol
+*.java text !eol
+*.launch text !eol
+*.mak text !eol
+*.md text !eol
+*.MF text !eol
+*.mk text !eol
+*.pl text !eol
+*.pm text !eol
+*.project text !eol
+*.properties text !eol
+*.py text !eol
+*.rc text !eol
+*.sh text eol=lf
+*.sln text !eol
+*.stub text !eol
+*.txt text !eol
+*.ucm text !eol
+*.vcproj text !eol
+*.vcxproj text !eol
+*.xml text !eol
+*.xsl text !eol
+*.xslt text !eol
+Makefile text !eol
+configure text !eol
+LICENSE text !eol
+README text !eol
+
+*.bin -text
+*.brk -text
+*.cnv -text
+*.icu -text
+*.res -text
+*.nrm -text
+*.spp -text
+*.tri2 -text
+
+/build.properties -text
+demos/.settings/org.eclipse.core.resources.prefs -text
+demos/manifest.stub -text
+eclipse-build/features.template/com.ibm.icu.base/.project -text
+eclipse-build/features.template/com.ibm.icu.base/build.properties -text
+eclipse-build/plugins.template/com.ibm.icu.base.tests/.classpath -text
+eclipse-build/plugins.template/com.ibm.icu.base.tests/.project -text
+eclipse-build/plugins.template/com.ibm.icu.base.tests/.settings/org.eclipse.jdt.core.prefs -text
+eclipse-build/plugins.template/com.ibm.icu.base.tests/.settings/org.eclipse.jdt.ui.prefs -text
+eclipse-build/plugins.template/com.ibm.icu.base.tests/META-INF/MANIFEST.MF -text
+eclipse-build/plugins.template/com.ibm.icu.base.tests/build.properties -text
+eclipse-build/plugins.template/com.ibm.icu.base.tests/plugin.properties -text
+eclipse-build/plugins.template/com.ibm.icu.base/.classpath -text
+eclipse-build/plugins.template/com.ibm.icu.base/.project -text
+eclipse-build/plugins.template/com.ibm.icu.base/.settings/org.eclipse.jdt.core.prefs -text
+eclipse-build/plugins.template/com.ibm.icu.base/.settings/org.eclipse.jdt.ui.prefs -text
+eclipse-build/plugins.template/com.ibm.icu.base/META-INF/MANIFEST.MF -text
+eclipse-build/plugins.template/com.ibm.icu.base/build.properties -text
+eclipse-build/plugins.template/com.ibm.icu.base/plugin.properties -text
+eclipse-build/plugins.template/com.ibm.icu.tests/META-INF/MANIFEST.MF -text
+eclipse-build/plugins.template/com.ibm.icu.tests/plugin.properties -text
+eclipse-build/plugins.template/com.ibm.icu/META-INF/MANIFEST.MF -text
+main/classes/charset/.classpath -text
+main/classes/charset/.project -text
+main/classes/charset/.settings/org.eclipse.core.resources.prefs -text
+main/classes/charset/.settings/org.eclipse.jdt.core.prefs -text
+main/classes/charset/manifest.stub -text
+main/classes/collate/.classpath -text
+main/classes/collate/.project -text
+main/classes/collate/.settings/org.eclipse.core.resources.prefs -text
+main/classes/collate/.settings/org.eclipse.jdt.core.prefs -text
+main/classes/collate/.settings/org.eclipse.jdt.ui.prefs -text
+main/classes/collate/collate-build.launch -text
+main/classes/core/.classpath -text
+main/classes/core/.project -text
+main/classes/core/.settings/org.eclipse.core.resources.prefs -text
+main/classes/core/.settings/org.eclipse.jdt.core.prefs -text
+main/classes/core/manifest.stub -text
+main/classes/currdata/.externalToolBuilders/copy-data-currdata.launch -text
+main/classes/currdata/.settings/org.eclipse.core.resources.prefs -text
+main/classes/currdata/.settings/org.eclipse.jdt.core.prefs -text
+main/classes/currdata/.settings/org.eclipse.jdt.ui.prefs -text
+main/classes/currdata/currdata-build.launch -text
+main/classes/langdata/.externalToolBuilders/copy-data-langdata.launch -text
+main/classes/langdata/.settings/org.eclipse.core.resources.prefs -text
+main/classes/langdata/.settings/org.eclipse.jdt.core.prefs -text
+main/classes/langdata/.settings/org.eclipse.jdt.ui.prefs -text
+main/classes/langdata/langdata-build.launch -text
+main/classes/localespi/.classpath -text
+main/classes/localespi/.project -text
+main/classes/localespi/.settings/org.eclipse.core.resources.prefs -text
+main/classes/localespi/.settings/org.eclipse.jdt.core.prefs -text
+main/classes/localespi/manifest.stub -text
+main/classes/localespi/src/META-INF/services/java.text.spi.BreakIteratorProvider -text
+main/classes/localespi/src/META-INF/services/java.text.spi.CollatorProvider -text
+main/classes/localespi/src/META-INF/services/java.text.spi.DateFormatProvider -text
+main/classes/localespi/src/META-INF/services/java.text.spi.DateFormatSymbolsProvider -text
+main/classes/localespi/src/META-INF/services/java.text.spi.DecimalFormatSymbolsProvider -text
+main/classes/localespi/src/META-INF/services/java.text.spi.NumberFormatProvider -text
+main/classes/localespi/src/META-INF/services/java.util.spi.CurrencyNameProvider -text
+main/classes/localespi/src/META-INF/services/java.util.spi.LocaleNameProvider -text
+main/classes/localespi/src/META-INF/services/java.util.spi.TimeZoneNameProvider -text
+main/classes/localespi/src/com/ibm/icu/impl/javaspi/ICULocaleServiceProviderConfig.properties -text
+main/classes/regiondata/.externalToolBuilders/copy-data-regiondata.launch -text
+main/classes/regiondata/.settings/org.eclipse.core.resources.prefs -text
+main/classes/regiondata/.settings/org.eclipse.jdt.core.prefs -text
+main/classes/regiondata/.settings/org.eclipse.jdt.ui.prefs -text
+main/classes/regiondata/regiondata-build.launch -text
+main/classes/translit/.externalToolBuilders/copy-data-translit.launch -text
+main/classes/translit/.settings/org.eclipse.core.resources.prefs -text
+main/classes/translit/.settings/org.eclipse.jdt.core.prefs -text
+main/classes/translit/.settings/org.eclipse.jdt.ui.prefs -text
+main/classes/translit/translit-build.launch -text
+main/shared/.project -text
+main/shared/.settings/org.eclipse.core.resources.prefs -text
+main/shared/data/icudata.jar -text
+main/shared/data/testdata.jar -text
+main/tests/charset/.classpath -text
+main/tests/charset/.project -text
+main/tests/charset/.settings/org.eclipse.core.resources.prefs -text
+main/tests/charset/.settings/org.eclipse.jdt.core.prefs -text
+main/tests/charset/manifest.stub -text
+main/tests/collate/.classpath -text
+main/tests/collate/.project -text
+main/tests/collate/.settings/org.eclipse.core.resources.prefs -text
+main/tests/collate/.settings/org.eclipse.jdt.core.prefs -text
+main/tests/collate/.settings/org.eclipse.jdt.ui.prefs -text
+main/tests/collate/collate-tests-build.launch -text
+main/tests/core/.classpath -text
+main/tests/core/.project -text
+main/tests/core/.settings/org.eclipse.core.resources.prefs -text
+main/tests/core/.settings/org.eclipse.jdt.core.prefs -text
+main/tests/core/manifest.stub -text
+main/tests/core/src/com/ibm/icu/dev/data/rbbi/english.dict -text
+main/tests/core/src/com/ibm/icu/dev/data/resources/testmessages.properties -text
+main/tests/core/src/com/ibm/icu/dev/data/thai6.ucs -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.OlsonTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.TimeZoneAdapter.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.BigDecimal.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.MathContext.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ArabicShapingException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ChineseDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ChineseDateFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DateFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DecimalFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DecimalFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.MessageFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.NumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.RuleBasedNumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.SimpleDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.StringPrepParseException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.BuddhistCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.Calendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.ChineseCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.CopticCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.Currency.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.EthiopicCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.GregorianCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.HebrewCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.IslamicCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.JapaneseCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.SimpleTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.TimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.ULocale.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.UResourceTypeMismatchException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.DateNumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.InvalidFormatException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.OlsonTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.RelativeDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.TimeZoneAdapter.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.impl.duration.BasicDurationFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.math.BigDecimal.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.math.MathContext.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ArabicShapingException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ChineseDateFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ChineseDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.ChineseDateFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DateFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DateFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DecimalFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.DecimalFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.MessageFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.MessageFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.NumberFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.NumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.PluralFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.PluralRules.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.RuleBasedNumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.SimpleDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.text.StringPrepParseException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.AnnualTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.BuddhistCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.Calendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.ChineseCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.CopticCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.Currency.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.DateTimeRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.EthiopicCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.GregorianCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.HebrewCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.IndianCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.InitialTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.IslamicCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.JapaneseCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.RuleBasedTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.SimpleTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.TaiwanCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.TimeArrayTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.TimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.ULocale.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.UResourceTypeMismatchException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.8.1/com.ibm.icu.util.VTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.DateNumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.InvalidFormatException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.JavaTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.OlsonTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.RelativeDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.TimeZoneAdapter.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.impl.duration.BasicDurationFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.math.BigDecimal.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.math.MathContext.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ArabicShapingException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ChineseDateFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ChineseDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.ChineseDateFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateIntervalFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateIntervalInfo$PatternInfo.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DateIntervalInfo.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DecimalFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.DecimalFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.MessageFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.MessageFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.NumberFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.NumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.PluralFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.PluralRules.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.RuleBasedNumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.SimpleDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.StringPrepParseException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.text.TimeUnitFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.AnnualTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.BuddhistCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.Calendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.ChineseCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.CopticCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.Currency.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.DateInterval.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.DateTimeRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.EthiopicCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.GregorianCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.HebrewCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.IndianCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.InitialTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.IslamicCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.JapaneseCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.RuleBasedTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.SimpleTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.TaiwanCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.TimeArrayTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.TimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.ULocale.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.UResourceTypeMismatchException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.0/com.ibm.icu.util.VTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.DateNumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.IllegalIcuArgumentException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.InvalidFormatException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.JavaTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.OlsonTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.RelativeDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.TimeZoneAdapter.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.duration.BasicDurationFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.impl.locale.LocaleSyntaxException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.math.BigDecimal.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.math.MathContext.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ArabicShapingException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ChineseDateFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ChineseDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.ChineseDateFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.CurrencyPluralInfo.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateIntervalFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateIntervalInfo$PatternInfo.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DateIntervalInfo.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DecimalFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.DecimalFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.MessageFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.MessageFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.NumberFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.NumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.PluralFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.PluralRules.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.RuleBasedNumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.SimpleDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.StringPrepParseException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.text.TimeUnitFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.AnnualTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.BuddhistCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.Calendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.ChineseCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.CopticCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.Currency.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.DateInterval.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.DateTimeRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.EthiopicCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.GregorianCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.HebrewCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.IllformedLocaleException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.IndianCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.InitialTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.IslamicCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.JapaneseCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.RuleBasedTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.SimpleTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.TaiwanCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.TimeArrayTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.TimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.ULocale.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.UResourceTypeMismatchException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.2.1/com.ibm.icu.util.VTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.impl.DateNumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.impl.IllegalIcuArgumentException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.impl.InvalidFormatException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.impl.JavaTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.impl.OlsonTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.impl.RelativeDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.impl.TimeZoneAdapter.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.impl.duration.BasicDurationFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.impl.locale.LocaleSyntaxException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.math.BigDecimal.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.math.MathContext.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.ArabicShapingException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.ChineseDateFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.ChineseDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.ChineseDateFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.CurrencyPluralInfo.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.DateFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.DateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.DateFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.DateIntervalFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.DateIntervalInfo$PatternInfo.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.DateIntervalInfo.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.DecimalFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.DecimalFormatSymbols.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.MessageFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.MessageFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.NumberFormat$Field.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.NumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.PluralFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.PluralRules.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.RuleBasedNumberFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.SelectFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.SimpleDateFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.StringPrepParseException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.text.TimeUnitFormat.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.AnnualTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.BuddhistCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.Calendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.ChineseCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.CopticCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.Currency.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.DateInterval.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.DateTimeRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.EthiopicCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.GregorianCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.HebrewCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.IllformedLocaleException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.IndianCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.InitialTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.IslamicCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.JapaneseCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.RuleBasedTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.SimpleTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.TaiwanCalendar.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.TimeArrayTimeZoneRule.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.TimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.ULocale.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.UResourceTypeMismatchException.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_4.4/com.ibm.icu.util.VTimeZone.dat -text
+main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges1.16.tri2 -text
+main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges1.32.tri2 -text
+main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges2.16.tri2 -text
+main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges2.32.tri2 -text
+main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges3.16.tri2 -text
+main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRanges3.32.tri2 -text
+main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesEmpty.16.tri2 -text
+main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesEmpty.32.tri2 -text
+main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesSingleValue.16.tri2 -text
+main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.setRangesSingleValue.32.tri2 -text
+main/tests/framework/.classpath -text
+main/tests/framework/.project -text
+main/tests/framework/.settings/org.eclipse.core.resources.prefs -text
+main/tests/framework/.settings/org.eclipse.jdt.core.prefs -text
+main/tests/framework/manifest.stub -text
+main/tests/localespi/.classpath -text
+main/tests/localespi/.project -text
+main/tests/localespi/.settings/org.eclipse.core.resources.prefs -text
+main/tests/localespi/manifest.stub -text
+main/tests/packaging/.settings/org.eclipse.core.resources.prefs -text
+main/tests/packaging/.settings/org.eclipse.jdt.core.prefs -text
+main/tests/packaging/.settings/org.eclipse.jdt.ui.prefs -text
+main/tests/packaging/packaging-tests-build.launch -text
+main/tests/translit/.externalToolBuilders/copy-translit-test-data.launch -text
+main/tests/translit/.settings/org.eclipse.core.resources.prefs -text
+main/tests/translit/.settings/org.eclipse.jdt.core.prefs -text
+main/tests/translit/.settings/org.eclipse.jdt.ui.prefs -text
+main/tests/translit/translit-tests-build.launch -text
+/manifest.stub -text
+tools/build/.settings/org.eclipse.core.resources.prefs -text
+tools/build/icu4j28.api.gz -text
+tools/build/icu4j30.api.gz -text
+tools/build/icu4j32.api.gz -text
+tools/build/icu4j34.api.gz -text
+tools/build/icu4j341.api.gz -text
+tools/build/icu4j342.api.gz -text
+tools/build/icu4j343.api.gz -text
+tools/build/icu4j36.api.gz -text
+tools/build/icu4j38.api.gz -text
+tools/build/icu4j381.api.gz -text
+tools/build/icu4j400.api.gz -text
+tools/build/icu4j401.api.gz -text
+tools/build/icu4j42.api.gz -text
+tools/build/icu4j421.api.gz -text
+tools/build/icu4j44.api.gz -text
+tools/build/icu4j44.api2.gz -text
+tools/build/icu4j46.api2.gz -text
+tools/build/manifest.stub -text
+tools/misc/.settings/org.eclipse.core.resources.prefs -text
+tools/misc/manifest.stub -text
+
+# The following file types are stored in Git-LFS.
+*.jar filter=lfs diff=lfs merge=lfs -text
+*.dat filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000000..110959588d9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,22 @@
+/*.jar
+/.project
+demos/out
+/doc
+main/classes/charset/out
+main/classes/collate/out
+main/classes/core/out
+main/classes/currdata/out
+main/classes/langdata/out
+main/classes/localespi/out
+main/classes/regiondata/out
+main/classes/translit/out
+main/tests/charset/out
+main/tests/collate/out
+main/tests/core/out
+main/tests/framework/out
+main/tests/localespi/out
+main/tests/packaging/out
+main/tests/translit/out
+/out
+tools/build/out
+tools/misc/out
diff --git a/APIChangeReport.html b/APIChangeReport.html
new file mode 100644
index 00000000000..c122f855ff1
--- /dev/null
+++ b/APIChangeReport.html
@@ -0,0 +1,208 @@
+
+
+
+
+ICU4J API Comparison: ICU4J 4.6 with ICU4J 4.8
+
+
+
+ICU4J API Comparison: ICU4J 4.6 with ICU4J 4.8
+
+
+Removed from ICU4J 4.6
+
+Package com.ibm.icu.lang
+
+UCharacter
+
+(stable) public static int getPropertyEnum (java.lang.String)
+(stable) public static int getPropertyValueEnum (int, java.lang.String)
+
+
+
+Package com.ibm.icu.text
+
+SpoofChecker
+
+(draft) public boolean check (java.lang.String)
+(draft) public boolean check (java.lang.String, SpoofChecker.CheckResult)
+
+UnicodeSet
+
+(stable) public final UnicodeSet add (java.lang.String)
+(stable) public final UnicodeSet addAll (java.lang.String)
+
+
+
+Package com.ibm.icu.util
+
+Currency
+
+(obsolete) public final ULocale getLocale (ULocale.Type)
+
+
+
+
+
+Deprecated or Obsoleted in ICU4J 4.8
+(no API obsoleted)
+
+
+Changed in ICU4J 4.8 (old, new)
+
+Package com.ibm.icu.text
+
+ (stable) public abstract class Collator extends java.lang.Object implements java.util.Comparator, java.lang.Cloneable
+(stable) public abstract class Collator extends java.lang.Object implements java.util.Comparator, com.ibm.icu.util.Freezable
+MessageFormat
+
+ (stable) public static java.lang.String format (java.lang.String, java.lang.Object[])
+(stable) public static java.lang.String format (java.lang.String, java.lang.Object...)
+
+
+
+
+
+Promoted to stable in ICU4J 4.8
+
+Package com.ibm.icu.text
+
+DateFormat
+
+(stable) public static final int MILLISECOND_FIELD
+
+
+
+
+
+Added in ICU4J 4.8
+
+Package com.ibm.icu.charset
+
+CharsetDecoderICU
+
+(draft) public final float maxBytesPerChar ()
+
+CharsetEncoderICU
+
+(draft) public final float maxCharsPerByte ()
+
+CharsetICU
+
+(draft) public boolean isFixedWidth ()
+
+
+
+Package com.ibm.icu.lang
+
+UCharacter
+
+(stable) public static int getPropertyEnum (java.lang.CharSequence)
+(stable) public static int getPropertyValueEnum (int, java.lang.CharSequence)
+
+UScript
+
+(stable) public static final int AFAKA
+(stable) public static final int JURCHEN
+(stable) public static final int MRO
+(stable) public static final int NUSHU
+(stable) public static final int SHARADA
+(stable) public static final int SORA_SOMPENG
+(stable) public static final int TAKRI
+(stable) public static final int TANGUT
+(stable) public static final int WOLEAI
+
+
+
+Package com.ibm.icu.text
+
+(draft) public static interface Collator.ReorderCodes
+(draft) public final class MessagePattern
+(draft) public static final class MessagePattern.ApostropheMode
+(draft) public static final class MessagePattern.ArgType
+(draft) public static final class MessagePattern.Part
+(draft) public static final class MessagePattern.Part.Type
+Bidi
+
+(draft) public void setContext (java.lang.String, java.lang.String)
+
+Collator
+
+(draft) public Collator cloneAsThawed ()
+(draft) public Collator freeze ()
+(draft) public static int[] getEquivalentReorderCodes (int)
+(draft) public int[] getReorderCodes ()
+(draft) public boolean isFrozen ()
+(draft) public void setReorderCodes (int...)
+
+MessageFormat
+
+(draft) public void applyPattern (java.lang.String, MessagePattern.ApostropheMode)
+(draft) public MessagePattern.ApostropheMode getApostropheMode ()
+(draft) public java.util.Set getArgumentNames ()
+(draft) public java.text.Format getFormatByArgumentName (java.lang.String)
+
+PluralRules
+
+(draft) public static final double NO_UNIQUE_VALUE
+(draft) public java.util.Collection getAllKeywordValues (java.lang.String)
+(draft) public java.util.Collection getSamples (java.lang.String)
+(draft) public double getUniqueKeywordValue (java.lang.String)
+
+RuleBasedBreakIterator
+
+(draft) public static void compileRules (java.lang.String, java.io.OutputStream)
+(draft) public static RuleBasedBreakIterator getInstanceFromCompiledRules (java.io.InputStream)
+
+RuleBasedCollator
+
+(draft) public RuleBasedCollator cloneAsThawed ()
+(draft) public Collator freeze ()
+(draft) public static int[] getEquivalentReorderCodes (int)
+(draft) public int[] getReorderCodes ()
+(draft) public boolean isFrozen ()
+(draft) public void setReorderCodes (int...)
+
+SpoofChecker
+
+(draft) public boolean failsChecks (java.lang.String)
+(draft) public boolean failsChecks (java.lang.String, SpoofChecker.CheckResult)
+
+UnicodeSet
+
+(draft) public static final UnicodeSet ALL_CODE_POINTS
+(draft) public static final UnicodeSet EMPTY
+(stable) public final UnicodeSet add (java.lang.CharSequence)
+(stable) public final UnicodeSet addAll (java.lang.CharSequence)
+
+
+
+Package com.ibm.icu.util
+
+(draft) public final class BytesTrie
+(draft) public static final class BytesTrie.Entry
+(draft) public static final class BytesTrie.Iterator
+(draft) public static final class BytesTrie.Result
+(draft) public static final class BytesTrie.State
+(draft) public final class BytesTrieBuilder
+(draft) public final class CharsTrie
+(draft) public static final class CharsTrie.Entry
+(draft) public static final class CharsTrie.Iterator
+(draft) public static final class CharsTrie.State
+(draft) public final class CharsTrieBuilder
+(draft) public class Output
+(draft) public abstract class StringTrieBuilder
+(draft) public static final class StringTrieBuilder.Option
+(draft) public static final class TimeZone.SystemTimeZoneType
+TimeZone
+
+(draft) public static final java.lang.String UNKNOWN_ZONE_ID
+(draft) public static java.util.Set getAvailableIDs (TimeZone.SystemTimeZoneType, java.lang.String, java.lang.Integer)
+(draft) public static java.lang.String getRegion (java.lang.String)
+
+
+
+
+Contents generated by ReportAPI tool on Fri May 13 12:38:20 EDT 2011 Copyright (C) 2011, International Business Machines Corporation, All Rights Reserved.
+
+
diff --git a/build.properties b/build.properties
new file mode 100644
index 00000000000..002cdbd686f
--- /dev/null
+++ b/build.properties
@@ -0,0 +1,8 @@
+#*******************************************************************************
+#* Copyright (C) 2009-2011, International Business Machines Corporation and *
+#* others. All Rights Reserved. *
+#*******************************************************************************
+api.report.version = 48
+api.report.prev.version = 46
+release.file.ver = 4_8
+
diff --git a/build.xml b/build.xml
new file mode 100644
index 00000000000..e1a919f8bb8
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,1631 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/demos/.classpath b/demos/.classpath
new file mode 100644
index 00000000000..c13960c4471
--- /dev/null
+++ b/demos/.classpath
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff --git a/demos/.project b/demos/.project
new file mode 100644
index 00000000000..69e0c1ba71f
--- /dev/null
+++ b/demos/.project
@@ -0,0 +1,20 @@
+
+
+ icu4j-demos
+
+
+ icu4j-charset
+ icu4j-core
+ icu4j-shared
+
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+
+
+ org.eclipse.jdt.core.javanature
+
+
diff --git a/demos/.settings/org.eclipse.core.resources.prefs b/demos/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 00000000000..c62da786ca8
--- /dev/null
+++ b/demos/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,3 @@
+#Fri Nov 05 14:18:21 EDT 2010
+eclipse.preferences.version=1
+encoding/=UTF-8
diff --git a/demos/.settings/org.eclipse.jdt.core.prefs b/demos/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 00000000000..1c6961a58bd
--- /dev/null
+++ b/demos/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,330 @@
+#Fri Aug 28 16:05:27 EDT 2009
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
+org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning
+org.eclipse.jdt.core.compiler.problem.deadCode=ignore
+org.eclipse.jdt.core.compiler.problem.deprecation=ignore
+org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
+org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
+org.eclipse.jdt.core.compiler.problem.discouragedReference=warning
+org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore
+org.eclipse.jdt.core.compiler.problem.fatalOptionalError=enabled
+org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
+org.eclipse.jdt.core.compiler.problem.finalParameterBound=ignore
+org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=error
+org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning
+org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning
+org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore
+org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
+org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
+org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning
+org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning
+org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore
+org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning
+org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning
+org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
+org.eclipse.jdt.core.compiler.problem.nullReference=warning
+org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning
+org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
+org.eclipse.jdt.core.compiler.problem.rawTypeReference=ignore
+org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
+org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
+org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
+org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning
+org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
+org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
+org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning
+org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=ignore
+org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
+org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
+org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
+org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.unusedImport=warning
+org.eclipse.jdt.core.compiler.problem.unusedLabel=warning
+org.eclipse.jdt.core.compiler.problem.unusedLocal=warning
+org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
+org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=ignore
+org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
+org.eclipse.jdt.core.compiler.source=1.5
+org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_assignment=0
+org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
+org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
+org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
+org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
+org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
+org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
+org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
+org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
+org.eclipse.jdt.core.formatter.blank_lines_after_package=1
+org.eclipse.jdt.core.formatter.blank_lines_before_field=0
+org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
+org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
+org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
+org.eclipse.jdt.core.formatter.blank_lines_before_method=1
+org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
+org.eclipse.jdt.core.formatter.blank_lines_before_package=0
+org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
+org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
+org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
+org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
+org.eclipse.jdt.core.formatter.comment.format_block_comments=true
+org.eclipse.jdt.core.formatter.comment.format_header=false
+org.eclipse.jdt.core.formatter.comment.format_html=true
+org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
+org.eclipse.jdt.core.formatter.comment.format_line_comments=true
+org.eclipse.jdt.core.formatter.comment.format_source_code=true
+org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
+org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
+org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
+org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert
+org.eclipse.jdt.core.formatter.comment.line_length=120
+org.eclipse.jdt.core.formatter.compact_else_if=true
+org.eclipse.jdt.core.formatter.continuation_indentation=2
+org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
+org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
+org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
+org.eclipse.jdt.core.formatter.indent_empty_lines=false
+org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
+org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
+org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
+org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false
+org.eclipse.jdt.core.formatter.indentation.size=4
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member=insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
+org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
+org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
+org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
+org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
+org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
+org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
+org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
+org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
+org.eclipse.jdt.core.formatter.lineSplit=120
+org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
+org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
+org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
+org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
+org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
+org.eclipse.jdt.core.formatter.tabulation.char=space
+org.eclipse.jdt.core.formatter.tabulation.size=4
+org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
+org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
diff --git a/demos/.settings/org.eclipse.jdt.ui.prefs b/demos/.settings/org.eclipse.jdt.ui.prefs
new file mode 100644
index 00000000000..abf9d8707a9
--- /dev/null
+++ b/demos/.settings/org.eclipse.jdt.ui.prefs
@@ -0,0 +1,6 @@
+#Tue Jun 09 16:57:19 EDT 2009
+eclipse.preferences.version=1
+formatter_profile=_ICU4J Standard
+formatter_settings_version=11
+org.eclipse.jdt.ui.javadoc=false
+org.eclipse.jdt.ui.text.custom_code_templates=/**\r\n * @return the ${bare_field_name}\r\n */ /**\r\n * @param ${param} the ${bare_field_name} to set\r\n */ /**\r\n * ${tags}\r\n */ /*\r\n *******************************************************************************\r\n * Copyright (C) ${year}, International Business Machines Corporation and *\r\n * others. All Rights Reserved. *\r\n *******************************************************************************\r\n */ /**\r\n * @author ${user}\r\n *\r\n * ${tags}\r\n */ /**\r\n * \r\n */ /**\r\n * ${tags}\r\n */ /* (non-Javadoc)\r\n * ${see_to_overridden}\r\n */ /**\r\n * ${tags}\r\n * ${see_to_target}\r\n */ ${filecomment}\r\n${package_declaration}\r\n\r\n${typecomment}\r\n${type_declaration} \r\n \r\n \r\n \r\n // ${todo} Auto-generated catch block\r\n${exception_var}.printStackTrace(); // ${todo} Auto-generated method stub\r\n${body_statement} ${body_statement}\r\n// ${todo} Auto-generated constructor stub return ${field}; ${field} \= ${param};
diff --git a/demos/build.properties b/demos/build.properties
new file mode 100644
index 00000000000..f374881842f
--- /dev/null
+++ b/demos/build.properties
@@ -0,0 +1,5 @@
+#*******************************************************************************
+#* Copyright (C) 2009, International Business Machines Corporation and *
+#* others. All Rights Reserved. *
+#*******************************************************************************
+shared.dir = ../main/shared
diff --git a/demos/build.xml b/demos/build.xml
new file mode 100644
index 00000000000..a47a7c4de4a
--- /dev/null
+++ b/demos/build.xml
@@ -0,0 +1,31 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/demos/demos-build.launch b/demos/demos-build.launch
new file mode 100644
index 00000000000..cead29ec957
--- /dev/null
+++ b/demos/demos-build.launch
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/demos/manifest.stub b/demos/manifest.stub
new file mode 100644
index 00000000000..5180077ab25
--- /dev/null
+++ b/demos/manifest.stub
@@ -0,0 +1,13 @@
+Manifest-Version: 1.0
+Main-Class: com.ibm.icu.dev.demo.Launcher
+Class-Path: icu4j.jar
+
+Name: com/ibm/icu/dev/demo
+Specification-Title: ICU for Java Demo
+Specification-Version: @SPECVERSION@
+Specification-Vendor: ICU
+Implementation-Title: ICU for Java Demo
+Implementation-Version: @IMPLVERSION@
+Implementation-Vendor: IBM Corporation
+Implementation-Vendor-Id: com.ibm
+Copyright-Info: @COPYRIGHT@
diff --git a/demos/src/com/ibm/icu/dev/demo/Launcher.java b/demos/src/com/ibm/icu/dev/demo/Launcher.java
new file mode 100644
index 00000000000..2d8804453ee
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/Launcher.java
@@ -0,0 +1,192 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2007, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo;
+
+import java.awt.BorderLayout;
+import java.awt.Button;
+import java.awt.Color;
+import java.awt.Frame;
+import java.awt.GridLayout;
+import java.awt.Label;
+import java.awt.Panel;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.WindowAdapter;
+import java.awt.event.WindowEvent;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+import com.ibm.icu.dev.demo.impl.DemoApplet;
+import com.ibm.icu.dev.demo.impl.DemoUtility;
+import com.ibm.icu.util.VersionInfo;
+
+
+/**
+ * @author srl
+ * Application to provide a panel of demos to launch
+ */
+public class Launcher extends DemoApplet {
+ private static final long serialVersionUID = -8054963875776183877L;
+
+ /**
+ * base package of all demos
+ */
+ public static final String demoBase = "com.ibm.icu.dev.demo";
+ /**
+ * list of classes, relative to the demoBase. all must have a static void main(String[])
+ */
+ public static final String demoList[] = {
+ "calendar.CalendarApp",
+ "charsetdet.DetectingViewer",
+ "holiday.HolidayCalendarDemo",
+// "number.CurrencyDemo", -- console
+// "rbbi.DBBIDemo",
+// "rbbi.RBBIDemo",
+// "rbbi.TextBoundDemo",
+ "rbnf.RbnfDemo",
+// "timescale.PivotDemo", -- console
+ "translit.Demo",
+ };
+
+ public class LauncherFrame extends Frame implements ActionListener {
+ private static final long serialVersionUID = -8054963875776183878L;
+
+ public Button buttonList[] = new Button[demoList.length]; // one button for each demo
+ public Label statusLabel;
+ private DemoApplet applet;
+
+ LauncherFrame(DemoApplet applet) {
+ init();
+ this.applet = applet;
+ }
+
+ public void init() {
+ // close down when close is clicked.
+ // TODO: this should be factored..
+ addWindowListener(
+ new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ setVisible(false);
+ dispose();
+
+ if (applet != null) {
+ applet.demoClosed();
+ } else System.exit(0);
+ }
+ } );
+
+ setBackground(DemoUtility.bgColor);
+ setLayout(new BorderLayout());
+
+ Panel topPanel = new Panel();
+ topPanel.setLayout(new GridLayout(5,3));
+
+ for(int i=0;i 0 && locales[i].getLanguage().equals(locales[i-1].getLanguage()) ||
+ i < locales.length - 1 &&
+ locales[i].getLanguage().equals(locales[i+1].getLanguage()))
+ {
+ localeMenu.addItem( locales[i].getDisplayName() );
+ } else {
+ localeMenu.addItem( locales[i].getDisplayLanguage());
+ }
+
+ thisMatch = DemoUtility.compareLocales(locales[i], defaultLocale);
+
+ if (thisMatch >= bestMatch) {
+ bestMatch = thisMatch;
+ selectMe = i;
+ }
+ }
+
+ localeMenu.setBackground(DemoUtility.choiceColor);
+ localeMenu.select(selectMe);
+
+ Label localeLabel =new Label("Display Locale");
+ localeLabel.setFont(DemoUtility.labelFont);
+
+ localePanel.add(localeLabel);
+ localePanel.add(localeMenu);
+ DemoUtility.fixGrid(localePanel,2);
+
+ localeMenu.addItemListener( new ItemListener() {
+ public void itemStateChanged(ItemEvent e) {
+ Locale loc = locales[localeMenu.getSelectedIndex()];
+ System.out.println("Change locale to " + loc.getDisplayName());
+
+ for (int i = 0; i < calendars.length; i++) {
+ calendars[i].setLocale(loc);
+ }
+ millisFormat();
+ }
+ } );
+ }
+ add(rollAddPanel);
+ add(DemoUtility.createSpacer());
+ add(localePanel);
+ add(DemoUtility.createSpacer());
+
+// COPYRIGHT
+ Panel copyrightPanel = new Panel();
+ addWithFont (copyrightPanel,new Label(DemoUtility.copyright1, Label.LEFT),
+ DemoUtility.creditFont);
+ DemoUtility.fixGrid(copyrightPanel,1);
+ add(copyrightPanel);
+ }
+
+ /**
+ * This function is called when users change the pattern text.
+ */
+ public void setFormatFromPattern() {
+ String timePattern = patternText.getText();
+
+ for (int i = 0; i < calendars.length; i++) {
+ calendars[i].applyPattern(timePattern);
+ }
+
+ millisFormat();
+ }
+
+ /**
+ * This function is called when it is necessary to parse the time
+ * string in one of the formatted date fields
+ */
+ public void textChanged(int index) {
+ String rightString = calendars[index].text.getText();
+
+ ParsePosition status = new ParsePosition(0);
+
+ if (rightString.length() == 0)
+ {
+ errorText("Error: no input to parse!");
+ return;
+ }
+
+ try {
+ Date date = calendars[index].format.parse(rightString, status);
+ time = date.getTime();
+ }
+ catch (Exception e) {
+ for (int i = 0; i < calendars.length; i++) {
+ if (i != index) {
+ calendars[i].text.setText("ERROR");
+ }
+ }
+ errorText("Exception: " + e.getClass().toString() + " parsing: "+rightString);
+ return;
+ }
+
+ int start = calendars[index].text.getSelectionStart();
+ int end = calendars[index].text.getSelectionEnd();
+
+ millisFormat();
+
+ calendars[index].text.select(start,end);
+ }
+
+ /**
+ * This function is called when it is necessary to format the time
+ * in the "Millis" text field.
+ */
+ public void millisFormat() {
+ String out = "";
+
+ for (int i = 0; i < calendars.length; i++) {
+ try {
+ out = calendars[i].format.format(new Date(time));
+ calendars[i].text.setText(out);
+ }
+ catch (Exception e) {
+ calendars[i].text.setText("ERROR");
+ errorText("Exception: " + e.getClass().toString() + " formatting "
+ + calendars[i].name + " " + time);
+ }
+ }
+ }
+
+
+ /**
+ * This function is called when users change the pattern text.
+ */
+ public void patternTextChanged() {
+ setFormatFromPattern();
+ }
+
+ /**
+ * This function is called when users select a new representative city.
+ */
+ public void cityChanged() {
+ TimeZone timeZone = TimeZone.getDefault();
+
+ for (int i = 0; i < calendars.length; i++) {
+ calendars[i].format.setTimeZone(timeZone);
+ }
+ millisFormat();
+ }
+
+ /**
+ * This function is called when users select a new time field
+ * to add or roll its value.
+ */
+ public void dateFieldChanged(boolean isUp) {
+ int field = kRollAddFields[dateMenu.getSelectedIndex()].field;
+
+ for (int i = 0; i < calendars.length; i++)
+ {
+ if (calendars[i].rollAdd.getState())
+ {
+ Calendar c = calendars[i].calendar;
+ c.setTime(new Date(time));
+
+ if (getAdd.getState()) {
+ c.add(field, isUp ? 1 : -1);
+ } else {
+ c.roll(field, isUp);
+ }
+
+ time = c.getTime().getTime();
+ millisFormat();
+ break;
+ }
+ }
+ }
+
+ /**
+ * Print out the error message while debugging this program.
+ */
+ public void errorText(String s)
+ {
+ if (true) {
+ System.out.println(s);
+ }
+ }
+
+ /**
+ * Called if an action occurs in the CalendarCalcFrame object.
+ */
+ public void actionPerformed(ActionEvent evt)
+ {
+ // *** Button events are handled here.
+ Object obj = evt.getSource();
+ System.out.println("action " + obj);
+ if (obj instanceof Button) {
+ if (evt.getSource() == up) {
+ dateFieldChanged(false);
+ } else
+ if (evt.getSource() == down) {
+ dateFieldChanged(true);
+ }
+ }
+ }
+
+ /**
+ * Handles the event. Returns true if the event is handled and should not
+ * be passed to the parent of this component. The default event handler
+ * calls some helper methods to make life easier on the programmer.
+ */
+ protected void processKeyEvent(KeyEvent evt)
+ {
+ System.out.println("key " + evt);
+ if (evt.getID() == KeyEvent.KEY_RELEASED) {
+ if (evt.getSource() == patternText) {
+ patternTextChanged();
+ }
+ else {
+ for (int i = 0; i < calendars.length; i++) {
+ if (evt.getSource() == calendars[i].text) {
+ textChanged(i);
+ }
+ }
+ }
+ }
+ }
+
+ protected void processWindowEvent(WindowEvent evt)
+ {
+ System.out.println("window " + evt);
+ if (evt.getID() == WindowEvent.WINDOW_CLOSING &&
+ evt.getSource() == this) {
+ this.hide();
+ this.dispose();
+
+ if (applet != null) {
+ applet.demoClosed();
+ } else System.exit(0);
+ }
+ }
+
+ /*
+ protected void processEvent(AWTEvent evt)
+ {
+ if (evt.getID() == AWTEvent. Event.ACTION_EVENT && evt.target == up) {
+ dateFieldChanged(true);
+ return true;
+ }
+ else if (evt.id == Event.ACTION_EVENT && evt.target == down) {
+ dateFieldChanged(false);
+ return true;
+ }
+ }
+ */
+
+ private static final int FIELD_COLUMNS = 35;
+
+
+ class CalendarRec {
+ public CalendarRec(String nameStr, Calendar cal)
+ {
+ name = nameStr;
+ calendar = cal;
+ rollAdd = new Checkbox();
+
+ text = new JTextField("",FIELD_COLUMNS);
+ text.setFont(DemoUtility.editFont);
+
+ format = DateFormat.getDateInstance(cal, DateFormat.FULL,
+ Locale.getDefault());
+ //format.applyPattern(DEFAULT_FORMAT);
+ }
+
+ public void setLocale(Locale loc) {
+ String pattern = toPattern();
+
+ format = DateFormat.getDateInstance(calendar, DateFormat.FULL,
+ loc);
+ applyPattern(pattern);
+ }
+
+ public void applyPattern(String pattern) {
+ if (format instanceof SimpleDateFormat) {
+ ((SimpleDateFormat)format).applyPattern(pattern);
+//hey {al} -
+// } else if (format instanceof java.text.SimpleDateFormat) {
+// ((java.text.SimpleDateFormat)format).applyPattern(pattern);
+ }
+ }
+
+ private String toPattern() {
+ if (format instanceof SimpleDateFormat) {
+ return ((SimpleDateFormat)format).toPattern();
+//hey {al} -
+// } else if (format instanceof java.text.SimpleDateFormat) {
+// return ((java.text.SimpleDateFormat)format).toPattern();
+ }
+ return "";
+ }
+
+ Calendar calendar;
+ DateFormat format;
+ String name;
+ JTextField text;
+ Checkbox rollAdd;
+ }
+
+ private final CalendarRec[] calendars = {
+ new CalendarRec("Gregorian", new GregorianCalendar()),
+ new CalendarRec("Hebrew", new HebrewCalendar()),
+ new CalendarRec("Islamic (civil)", makeIslamic(true)),
+ new CalendarRec("Islamic (true)", makeIslamic(false)),
+ new CalendarRec("Buddhist", new BuddhistCalendar()),
+ new CalendarRec("Japanese", new JapaneseCalendar()),
+// new CalendarRec("Chinese", new ChineseCalendar()),
+ };
+
+ static private final Calendar makeIslamic(boolean civil) {
+ IslamicCalendar cal = new IslamicCalendar();
+ cal.setCivil(civil);
+ return cal;
+ }
+}
+
+class RollAddField {
+ RollAddField(int field, String name) {
+ this.field = field;
+ this.name = name;
+ }
+ int field;
+ String name;
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/calendar/CalendarFrame.java b/demos/src/com/ibm/icu/dev/demo/calendar/CalendarFrame.java
new file mode 100644
index 00000000000..fd76ec7d17d
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/calendar/CalendarFrame.java
@@ -0,0 +1,442 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1997-2007, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.dev.demo.calendar;
+
+import java.awt.BorderLayout;
+import java.awt.Button;
+import java.awt.Choice;
+import java.awt.Color;
+import java.awt.Component;
+import java.awt.Container;
+import java.awt.Dimension;
+import java.awt.FlowLayout;
+import java.awt.Font;
+import java.awt.FontMetrics;
+import java.awt.Frame;
+import java.awt.Graphics;
+import java.awt.GridBagConstraints;
+import java.awt.GridBagLayout;
+import java.awt.Label;
+import java.awt.Panel;
+import java.awt.Rectangle;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.ItemEvent;
+import java.awt.event.ItemListener;
+import java.awt.event.WindowAdapter;
+import java.awt.event.WindowEvent;
+import java.util.Date;
+import java.util.Locale;
+
+import com.ibm.icu.dev.demo.impl.DemoApplet;
+import com.ibm.icu.dev.demo.impl.DemoUtility;
+import com.ibm.icu.text.DateFormat;
+import com.ibm.icu.util.BuddhistCalendar;
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.GregorianCalendar;
+import com.ibm.icu.util.HebrewCalendar;
+import com.ibm.icu.util.IslamicCalendar;
+import com.ibm.icu.util.JapaneseCalendar;
+import com.ibm.icu.util.SimpleTimeZone;
+
+/**
+ * A Frame is a top-level window with a title. The default layout for a frame
+ * is BorderLayout. The CalendarFrame class defines the window layout of
+ * CalendarDemo.
+ */
+class CalendarFrame extends Frame
+{
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = -4289697663503820619L;
+
+ private static final boolean DEBUG = false;
+
+ private DemoApplet applet;
+
+ /**
+ * Constructs a new CalendarFrame that is initially invisible.
+ */
+ public CalendarFrame(DemoApplet myApplet)
+ {
+ super("Calendar Demo");
+ this.applet = myApplet;
+ init();
+
+ // When the window is closed, we want to shut down the applet or application
+ addWindowListener(
+ new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ setVisible(false);
+ dispose();
+
+ if (applet != null) {
+ applet.demoClosed();
+ } else System.exit(0);
+ }
+ } );
+ }
+
+ private Choice displayMenu;
+ private Locale[] locales = DemoUtility.getG7Locales();
+
+ private Calendar calendars[] = new Calendar[2];
+ private Choice calMenu[] = new Choice[2];
+ private ColoredLabel monthLabel[] = new ColoredLabel[2];
+ private DateFormat monthFormat[] = new DateFormat[2];
+
+ private Button prevYear;
+ private Button prevMonth;
+ private Button gotoToday;
+ private Button nextMonth;
+ private Button nextYear;
+ private CalendarPanel calendarPanel;
+
+ private static void add(Container container, Component component,
+ GridBagLayout g, GridBagConstraints c,
+ int gridwidth, int weightx)
+ {
+ c.gridwidth = gridwidth;
+ c.weightx = weightx;
+ g.setConstraints(component, c);
+ container.add(component);
+ }
+
+ /**
+ * Initializes the applet. You never need to call this directly, it
+ * is called automatically by the system once the applet is created.
+ */
+ public void init() {
+ setBackground(DemoUtility.bgColor);
+ setLayout(new BorderLayout(10,10));
+
+ Panel topPanel = new Panel();
+ GridBagLayout g = new GridBagLayout();
+ topPanel.setLayout(g);
+ GridBagConstraints c = new GridBagConstraints();
+ c.fill = GridBagConstraints.HORIZONTAL;
+
+ // Build the two menus for selecting which calendar is displayed,
+ // plus the month/year label for each calendar
+ for (int i = 0; i < 2; i++) {
+ calMenu[i] = new Choice();
+ for (int j = 0; j < CALENDARS.length; j++) {
+ calMenu[i].addItem(CALENDARS[j].name);
+ }
+ calMenu[i].setBackground(DemoUtility.choiceColor);
+ calMenu[i].select(i);
+ calMenu[i].addItemListener(new CalMenuListener());
+
+ // Label for the current month name
+ monthLabel[i] = new ColoredLabel("", COLORS[i]);
+ monthLabel[i].setFont(DemoUtility.titleFont);
+
+ // And the default calendar to use for this slot
+ calendars[i] = CALENDARS[i].calendar;
+
+ add(topPanel, calMenu[i], g, c, 5, 0);
+ add(topPanel, monthLabel[i], g, c, GridBagConstraints.REMAINDER, 1);
+ }
+
+ // Now add the next/previous year/month buttons:
+ prevYear = new Button("<<");
+ prevYear.addActionListener(new AddAction(Calendar.YEAR, -1));
+
+ prevMonth = new Button("<");
+ prevMonth.addActionListener(new AddAction(Calendar.MONTH, -1));
+
+ gotoToday = new Button("Today");
+ gotoToday.addActionListener( new ActionListener()
+ {
+ public void actionPerformed(ActionEvent e) {
+ calendarPanel.setDate( new Date() );
+ updateMonthName();
+ }
+ } );
+
+ nextMonth = new Button(">");
+ nextMonth.addActionListener(new AddAction(Calendar.MONTH, 1));
+
+ nextYear = new Button(">>");
+ nextYear.addActionListener(new AddAction(Calendar.YEAR, 1));
+
+ c.fill = GridBagConstraints.NONE;
+ add(topPanel, prevYear, g, c, 1, 0);
+ add(topPanel, prevMonth, g, c, 1, 0);
+ add(topPanel, gotoToday, g, c, 1, 0);
+ add(topPanel, nextMonth, g, c, 1, 0);
+ add(topPanel, nextYear, g, c, 1, 0);
+
+ // Now add the menu for selecting the display language
+ Panel displayPanel = new Panel();
+ {
+ displayMenu = new Choice();
+ Locale defaultLocale = Locale.getDefault();
+ int bestMatch = -1, thisMatch = -1;
+ int selectMe = 0;
+
+ for (int i = 0; i < locales.length; i++) {
+ if (i > 0 &&
+ locales[i].getLanguage().equals(locales[i-1].getLanguage()) ||
+ i < locales.length - 1 &&
+ locales[i].getLanguage().equals(locales[i+1].getLanguage()))
+ {
+ displayMenu.addItem( locales[i].getDisplayName() );
+ } else {
+ displayMenu.addItem( locales[i].getDisplayLanguage());
+ }
+
+ thisMatch = DemoUtility.compareLocales(locales[i], defaultLocale);
+
+ if (thisMatch >= bestMatch) {
+ bestMatch = thisMatch;
+ selectMe = i;
+ }
+ }
+
+ displayMenu.setBackground(DemoUtility.choiceColor);
+ displayMenu.select(selectMe);
+
+ displayMenu.addItemListener( new ItemListener()
+ {
+ public void itemStateChanged(ItemEvent e) {
+ Locale loc = locales[displayMenu.getSelectedIndex()];
+ calendarPanel.setLocale( loc );
+ monthFormat[0] = monthFormat[1] = null;
+ updateMonthName();
+ repaint();
+ }
+ } );
+
+ Label l1 = new Label("Display Language:", Label.RIGHT);
+ l1.setFont(DemoUtility.labelFont);
+
+ displayPanel.setLayout(new FlowLayout());
+ displayPanel.add(l1);
+ displayPanel.add(displayMenu);
+
+ }
+ c.fill = GridBagConstraints.NONE;
+ c.anchor = GridBagConstraints.EAST;
+
+ add(topPanel, displayPanel, g, c, GridBagConstraints.REMAINDER, 0);
+
+ // The title, buttons, etc. go in a panel at the top of the window
+ add("North", topPanel);
+
+ // The copyright notice goes at the bottom of the window
+ Label copyright = new Label(DemoUtility.copyright1, Label.LEFT);
+ copyright.setFont(DemoUtility.creditFont);
+ add("South", copyright);
+
+ // Now create the big calendar panel and stick it in the middle
+ calendarPanel = new CalendarPanel( locales[displayMenu.getSelectedIndex()] );
+ add("Center", calendarPanel);
+
+ for (int i = 0; i < 2; i++) {
+ calendarPanel.setCalendar(i, calendars[i]);
+ calendarPanel.setColor(i, COLORS[i]);
+ }
+
+ updateMonthName();
+ }
+
+
+ private void updateMonthName()
+ {
+ for (int i = 0; i < 2; i++) {
+ try {
+ if (monthFormat[i] == null) { // TODO: optimize
+ DateFormat f = DateFormat.getDateTimeInstance(
+ calendars[i], DateFormat.MEDIUM, -1,
+ locales[displayMenu.getSelectedIndex()]);
+ if (f instanceof com.ibm.icu.text.SimpleDateFormat) {
+ com.ibm.icu.text.SimpleDateFormat f1 = (com.ibm.icu.text.SimpleDateFormat) f;
+ f1.applyPattern("MMMM, yyyy G");
+ f1.setTimeZone(new SimpleTimeZone(0, "UTC"));
+ }
+ monthFormat[i] = f;
+ }
+ } catch (ClassCastException e) {
+ //hey {lw} - there's something wrong in this routine that cuases exceptions.
+ System.out.println(e);
+ }
+
+ monthLabel[i].setText( monthFormat[i].format( calendarPanel.firstOfMonth() ));
+ }
+ }
+
+ /**
+ * CalMenuListener responds to events in the two popup menus that select
+ * the calendar systems to be used in the display. It figures out which
+ * of the two menus the event occurred in and updates the corresponding
+ * element of the calendars[] array to match the new selection.
+ */
+ private class CalMenuListener implements ItemListener
+ {
+ public void itemStateChanged(ItemEvent e)
+ {
+ for (int i = 0; i < calMenu.length; i++)
+ {
+ if (e.getItemSelectable() == calMenu[i])
+ {
+ // We found the menu that the event happened in.
+ // Figure out which new calendar they selected.
+ Calendar newCal = CALENDARS[ calMenu[i].getSelectedIndex() ].calendar;
+
+ if (newCal != calendars[i])
+ {
+ // If any of the other menus are set to the same new calendar
+ // we're about to use for this menu, set them to the current
+ // calendar from *this* menu so we won't have two the same
+ for (int j = 0; j < calendars.length; j++) {
+ if (j != i && calendars[j] == newCal) {
+ calendars[j] = calendars[i];
+ calendarPanel.setCalendar(j, calendars[j]);
+ monthFormat[j] = null;
+
+ for (int k = 0; k < CALENDARS.length; k++) {
+ if (calendars[j] == CALENDARS[k].calendar) {
+ calMenu[j].select(k);
+ break;
+ }
+ }
+ }
+ }
+ // Now update this menu to use the new calendar the user selected
+ calendars[i] = newCal;
+ calendarPanel.setCalendar(i, newCal);
+ monthFormat[i] = null;
+
+ updateMonthName();
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ /**
+ * AddAction handles the next/previous year/month buttons...
+ */
+ private class AddAction implements ActionListener {
+ AddAction(int field, int amount) {
+ this.field = field;
+ this.amount = amount;
+ }
+
+ public void actionPerformed(ActionEvent e) {
+ calendarPanel.add(field, amount);
+ updateMonthName();
+ }
+
+ private int field, amount;
+ }
+
+ /**
+ * ColoredLabel is similar to java.awt.Label, with two differences:
+ *
+ * - You can set its text color
+ *
+ * - It draws text using drawString rather than using a host-specific
+ * "Peer" object like AWT does. On 1.2, using drawString gives
+ * us Bidi reordering for free.
+ */
+ static private class ColoredLabel extends Component {
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = 5004484960341875722L;
+ public ColoredLabel(String label) {
+ text = label;
+ }
+
+ public ColoredLabel(String label, Color c) {
+ text = label;
+ color = c;
+ }
+
+ public void setText(String label) {
+ text = label;
+ repaint();
+ }
+
+ public void setFont(Font f) {
+ font = f;
+ repaint();
+ }
+
+ public void paint(Graphics g) {
+ FontMetrics fm = g.getFontMetrics(font);
+
+ Rectangle bounds = getBounds();
+
+ g.setColor(color);
+ g.setFont(font);
+ g.drawString(text, fm.stringWidth("\u00a0"),
+ bounds.height/2 + fm.getHeight()
+ - fm.getAscent() + fm.getLeading()/2);
+ }
+
+ public Dimension getPreferredSize() {
+ return getMinimumSize();
+ }
+
+ public Dimension getMinimumSize() {
+ FontMetrics fm = getFontMetrics(font);
+
+ return new Dimension( fm.stringWidth(text) + 2*fm.stringWidth("\u00a0"),
+ fm.getHeight() + fm.getLeading()*2);
+ }
+
+ String text;
+ Color color = Color.black;
+ Font font = DemoUtility.labelFont;
+ }
+
+ /**
+ * Print out the error message while debugging this program.
+ */
+ public void errorText(String s)
+ {
+ if (DEBUG)
+ {
+ System.out.println(s);
+ }
+ }
+
+ class CalendarRec {
+ public CalendarRec(String nameStr, Calendar cal)
+ {
+ name = nameStr;
+ calendar = cal;
+ }
+
+ Calendar calendar;
+ String name;
+ }
+
+ private final CalendarRec[] CALENDARS = {
+ new CalendarRec("Gregorian Calendar", new GregorianCalendar()),
+ new CalendarRec("Hebrew Calendar", new HebrewCalendar()),
+ new CalendarRec("Islamic Calendar", makeIslamic(false)),
+ new CalendarRec("Islamic Civil Calendar ", makeIslamic(true)),
+ new CalendarRec("Buddhist Calendar", new BuddhistCalendar()),
+ new CalendarRec("Japanese Calendar", new JapaneseCalendar()),
+ };
+
+ static private final Calendar makeIslamic(boolean civil) {
+ IslamicCalendar cal = new IslamicCalendar();
+ cal.setCivil(civil);
+ return cal;
+ }
+
+ static final Color[] COLORS = { Color.blue, Color.black };
+}
+
diff --git a/demos/src/com/ibm/icu/dev/demo/calendar/CalendarPanel.java b/demos/src/com/ibm/icu/dev/demo/calendar/CalendarPanel.java
new file mode 100644
index 00000000000..8ea94d3f9fa
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/calendar/CalendarPanel.java
@@ -0,0 +1,365 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1997-2007, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.dev.demo.calendar;
+
+import java.awt.Canvas;
+import java.awt.Color;
+import java.awt.Dimension;
+import java.awt.FontMetrics;
+import java.awt.Graphics;
+import java.awt.Point;
+import java.util.Date;
+import java.util.Locale;
+
+import com.ibm.icu.dev.demo.impl.DemoUtility;
+import com.ibm.icu.text.DateFormatSymbols;
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.SimpleTimeZone;
+
+class CalendarPanel extends Canvas {
+
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = 625400018027387141L;
+
+ public CalendarPanel( Locale locale ) {
+ setLocale(locale);
+ }
+
+ public void setLocale(Locale locale) {
+ if (fDisplayLocale == null || !fDisplayLocale.equals(locale)) {
+ fDisplayLocale = locale;
+ dirty = true;
+
+ for (int i = 0; i < fCalendar.length; i++) {
+ if (fCalendar[i] != null) {
+ fSymbols[i] = new DateFormatSymbols(fCalendar[i],
+ fDisplayLocale);
+ }
+ }
+ String lang = locale.getLanguage();
+ leftToRight = !(lang.equals("iw") || lang.equals("ar"));
+
+ repaint();
+ }
+ }
+
+ public void setDate(Date date) {
+ fStartOfMonth = date;
+ dirty = true;
+ repaint();
+ }
+
+ public void add(int field, int delta)
+ {
+ synchronized(fCalendar) {
+ fCalendar[0].setTime(fStartOfMonth);
+ fCalendar[0].add(field, delta);
+ fStartOfMonth = fCalendar[0].getTime();
+ }
+ dirty = true;
+ repaint();
+ }
+
+ public void setColor(int index, Color c) {
+ fColor[index] = c;
+ repaint();
+ }
+
+ public void setCalendar(int index, Calendar c) {
+ Date date = (fCalendar[index] == null) ? new Date()
+ : fCalendar[index].getTime();
+
+ fCalendar[index] = c;
+ fCalendar[index].setTime(date);
+
+ fSymbols[index] = new DateFormatSymbols(c, fDisplayLocale);
+ dirty = true;
+ repaint();
+ }
+
+ public Calendar getCalendar(int index) {
+ return fCalendar[index];
+ }
+
+ public Locale getDisplayLocale() {
+ return fDisplayLocale;
+ }
+
+ public Date firstOfMonth() {
+ return fStartOfMonth;
+ }
+
+ private Date startOfMonth(Date dateInMonth)
+ {
+ synchronized(fCalendar) {
+ fCalendar[0].setTime(dateInMonth);
+
+ int era = fCalendar[0].get(Calendar.ERA);
+ int year = fCalendar[0].get(Calendar.YEAR);
+ int month = fCalendar[0].get(Calendar.MONTH);
+
+ fCalendar[0].clear();
+ fCalendar[0].set(Calendar.ERA, era);
+ fCalendar[0].set(Calendar.YEAR, year);
+ fCalendar[0].set(Calendar.MONTH, month);
+ fCalendar[0].set(Calendar.DATE, 1);
+
+ return fCalendar[0].getTime();
+ }
+ }
+
+ private void calculate()
+ {
+ //
+ // As a workaround for JDK 1.1.3 and below, where Calendars and time
+ // zones are a bit goofy, always set my calendar's time zone to UTC.
+ // You would think I would want to do this in the "set" function above,
+ // but if I do that, the program hangs when this class is loaded,
+ // perhaps due to some sort of static initialization ordering problem.
+ // So I do it here instead.
+ //
+ fCalendar[0].setTimeZone(new SimpleTimeZone(0, "UTC"));
+
+ Calendar c = (Calendar)fCalendar[0].clone(); // Temporary copy
+
+ fStartOfMonth = startOfMonth(fStartOfMonth);
+
+ // Stash away a few useful constants for this calendar and display
+ minDay = c.getMinimum(Calendar.DAY_OF_WEEK);
+ daysInWeek = c.getMaximum(Calendar.DAY_OF_WEEK) - minDay + 1;
+
+ firstDayOfWeek = Calendar.getInstance(fDisplayLocale).getFirstDayOfWeek();
+
+ // Stash away a Date for the start of this month
+
+ // Find the day of week of the first day in this month
+ c.setTime(fStartOfMonth);
+ firstDayInMonth = c.get(Calendar.DAY_OF_WEEK);
+ int firstWeek = c.get(Calendar.WEEK_OF_MONTH);
+
+ // Now find the # of days in the month
+ c.roll(Calendar.DATE, false);
+ daysInMonth = c.get(Calendar.DATE);
+
+ // Finally, find the end of the month, i.e. the start of the next one
+ c.roll(Calendar.DATE, true);
+ c.add(Calendar.MONTH, 1);
+ c.getTime(); // JDK 1.1.2 bug workaround
+ c.add(Calendar.SECOND, -1);
+ Date endOfMonth = c.getTime();
+ if(endOfMonth==null){
+ //do nothing
+ }
+ endOfMonth = null;
+ int lastWeek = c.get(Calendar.WEEK_OF_MONTH);
+
+ // Calculate the number of full or partial weeks in this month.
+ numWeeks = lastWeek - firstWeek + 1;
+
+ dirty = false;
+ }
+
+ static final int XINSET = 4;
+ static final int YINSET = 2;
+
+ /*
+ * Convert from the day number within a month (1-based)
+ * to the cell coordinates on the calendar (0-based)
+ */
+ private void dateToCell(int date, Point pos)
+ {
+ int cell = (date + firstDayInMonth - firstDayOfWeek - minDay);
+ if (firstDayInMonth < firstDayOfWeek) {
+ cell += daysInWeek;
+ }
+
+ pos.x = cell % daysInWeek;
+ pos.y = cell / daysInWeek;
+ }
+ //private Point dateToCell(int date) {
+ // Point p = new Point(0,0);
+ // dateToCell(date, p);
+ // return p;
+ //}
+
+ public void paint(Graphics g) {
+
+ if (dirty) {
+ calculate();
+ }
+
+ Point cellPos = new Point(0,0); // Temporary variable
+ Dimension d = this.getSize();
+
+ g.setColor(Color.lightGray);
+ g.fillRect(0,0,d.width,d.height);
+
+ // Draw the day names at the top
+ g.setColor(Color.black);
+ g.setFont(DemoUtility.labelFont);
+ FontMetrics fm = g.getFontMetrics();
+ int labelHeight = fm.getHeight() + YINSET * 2;
+
+ int v = fm.getAscent() + YINSET;
+ for (int i = 0; i < daysInWeek; i++) {
+ int dayNum = (i + minDay + firstDayOfWeek - 2) % daysInWeek + 1;
+ String dayName = fSymbols[0].getWeekdays()[dayNum];
+
+
+ double h;
+ if (leftToRight) {
+ h = d.width*(i + 0.5) / daysInWeek;
+ } else {
+ h = d.width*(daysInWeek - i - 0.5) / daysInWeek;
+ }
+ h -= fm.stringWidth(dayName) / 2;
+
+ g.drawString(dayName, (int)h, v);
+ }
+
+ double cellHeight = (d.height - labelHeight - 1) / numWeeks;
+ double cellWidth = (double)(d.width - 1) / daysInWeek;
+
+ // Draw a white background in the part of the calendar
+ // that displays this month.
+ // First figure out how much of the first week should be shaded.
+ {
+ g.setColor(Color.white);
+ dateToCell(1, cellPos);
+ int width = (int)(cellPos.x*cellWidth); // Width of unshaded area
+
+ if (leftToRight) {
+ g.fillRect((int)(width), labelHeight ,
+ d.width - width, (int)cellHeight);
+ } else {
+ g.fillRect(0, labelHeight ,
+ d.width - width, (int)cellHeight);
+ }
+
+ // All of the intermediate weeks get shaded completely
+ g.fillRect(0, (int)(labelHeight + cellHeight),
+ d.width, (int)(cellHeight * (numWeeks - 2)));
+
+ // Now figure out the last week.
+ dateToCell(daysInMonth, cellPos);
+ width = (int)((cellPos.x+1)*cellWidth); // Width of shaded area
+
+ if (leftToRight) {
+ g.fillRect(0, (int)(labelHeight + (numWeeks-1) * cellHeight),
+ width, (int)cellHeight);
+ } else {
+ g.fillRect(d.width - width, (int)(labelHeight + (numWeeks-1) * cellHeight),
+ width, (int)cellHeight);
+ }
+
+ }
+ // Draw the X/Y grid lines
+ g.setColor(Color.black);
+ for (int i = 0; i <= numWeeks; i++) {
+ int y = (int)(labelHeight + i * cellHeight);
+ g.drawLine(0, y, d.width - 1, y);
+ }
+ for (int i = 0; i <= daysInWeek; i++) {
+ int x = (int)(i * cellWidth);
+ g.drawLine(x, labelHeight, x, d.height - 1);
+ }
+
+ // Now loop through all of the days in the month, figure out where
+ // they go in the grid, and draw the day # for each one
+
+ // Figure out the date of the first cell in the calendar display
+ int cell = (1 + firstDayInMonth - firstDayOfWeek - minDay);
+ if (firstDayInMonth < firstDayOfWeek) {
+ cell += daysInWeek;
+ }
+
+ Calendar c = (Calendar)fCalendar[0].clone();
+ c.setTime(fStartOfMonth);
+ c.add(Calendar.DATE, -cell);
+
+ StringBuffer buffer = new StringBuffer();
+
+ for (int row = 0; row < numWeeks; row++) {
+ for (int col = 0; col < daysInWeek; col++) {
+
+ g.setFont(DemoUtility.numberFont);
+ g.setColor(Color.black);
+ fm = g.getFontMetrics();
+
+ int cellx;
+ if (leftToRight) {
+ cellx = (int)((col) * cellWidth);
+ } else {
+ cellx = (int)((daysInWeek - col - 1) * cellWidth);
+ }
+
+ int celly = (int)(row * cellHeight + labelHeight);
+
+ for (int i = 0; i < 2; i++) {
+ fCalendar[i].setTime(c.getTime());
+
+ int date = fCalendar[i].get(Calendar.DATE);
+ buffer.setLength(0);
+ buffer.append(date);
+ String dayNum = buffer.toString();
+
+ int x;
+
+ if (leftToRight) {
+ x = cellx + (int)cellWidth - XINSET - fm.stringWidth(dayNum);
+ } else {
+ x = cellx + XINSET;
+ }
+ int y = celly + + fm.getAscent() + YINSET + i * fm.getHeight();
+
+ if (fColor[i] != null) {
+ g.setColor(fColor[i]);
+ }
+ g.drawString(dayNum, x, y);
+
+ if (date == 1 || row == 0 && col == 0) {
+ g.setFont(DemoUtility.numberFont);
+ String month = fSymbols[i].getMonths()[
+ fCalendar[i].get(Calendar.MONTH)];
+
+ if (leftToRight) {
+ x = cellx + XINSET;
+ } else {
+ x = cellx + (int)cellWidth - XINSET - fm.stringWidth(month);
+ }
+ g.drawString(month, x, y);
+ }
+ }
+
+ c.add(Calendar.DATE, 1);
+ }
+ }
+ }
+
+ // Important state variables
+ private Calendar[] fCalendar = new Calendar[4];
+ private Color[] fColor = new Color[4];
+
+ private Locale fDisplayLocale;
+ private DateFormatSymbols[] fSymbols = new DateFormatSymbols[4];
+
+ private Date fStartOfMonth = new Date(); // 00:00:00 on first day of month
+
+ // Cached calculations to make drawing faster.
+ private transient int minDay; // Minimum legal day #
+ private transient int daysInWeek; // # of days in a week
+ private transient int firstDayOfWeek; // First day to display in week
+ private transient int numWeeks; // # full or partial weeks in month
+ private transient int daysInMonth; // # days in this month
+ private transient int firstDayInMonth; // Day of week of first day in month
+ private transient boolean leftToRight;
+
+ private transient boolean dirty = true;
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/calendar/package.html b/demos/src/com/ibm/icu/dev/demo/calendar/package.html
new file mode 100644
index 00000000000..c1bb1050957
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/calendar/package.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+Calendar demo applications including date/time arithmetic.
+
+
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java b/demos/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java
new file mode 100644
index 00000000000..284d16d26b5
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/charsetdet/DetectingViewer.java
@@ -0,0 +1,421 @@
+/*
+ **************************************************************************
+ * Copyright (C) 2005-2010, International Business Machines Corporation *
+ * and others. All Rights Reserved. *
+ **************************************************************************
+ *
+ */
+
+package com.ibm.icu.dev.demo.charsetdet;
+
+import java.awt.Font;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.KeyEvent;
+import java.awt.event.WindowAdapter;
+import java.awt.event.WindowEvent;
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.security.AccessControlException;
+
+import javax.swing.JFileChooser;
+import javax.swing.JFrame;
+import javax.swing.JMenu;
+import javax.swing.JMenuBar;
+import javax.swing.JMenuItem;
+import javax.swing.JOptionPane;
+import javax.swing.JScrollPane;
+import javax.swing.JTextPane;
+import javax.swing.KeyStroke;
+
+import com.ibm.icu.charset.CharsetICU;
+import com.ibm.icu.dev.demo.impl.DemoApplet;
+import com.ibm.icu.text.CharsetDetector;
+import com.ibm.icu.text.CharsetMatch;
+
+/**
+ * This simple application demonstrates how to use the CharsetDetector API. It
+ * opens a file or web page, detects the encoding, and then displays it using that
+ * encoding.
+ */
+public class DetectingViewer extends JFrame implements ActionListener
+{
+
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = -2307065724464747775L;
+ private JTextPane text;
+ private JFileChooser fileChooser;
+
+ /**
+ * @throws java.awt.HeadlessException
+ */
+ public DetectingViewer()
+ {
+ super();
+ DemoApplet.demoFrameOpened();
+
+ try {
+ fileChooser = new JFileChooser();
+ } catch (AccessControlException ace) {
+ System.err.println("no file chooser - access control exception. Continuing without file browsing. "+ace.toString());
+ fileChooser = null; //
+ }
+
+// setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+ setSize(800, 800);
+
+ setJMenuBar(makeMenus());
+ text = new JTextPane();
+ text.setContentType("text/plain");
+ text.setText("");
+ text.setSize(800, 800);
+
+ Font font = new Font("Arial Unicode MS", Font.PLAIN, 24);
+ text.setFont(font);
+
+ JScrollPane scrollPane = new JScrollPane(text);
+
+ getContentPane().add(scrollPane);
+ setVisible(true);
+
+ addWindowListener(
+ new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+// setVisible(false);
+// dispose();
+
+ doQuit();
+ }
+ } );
+
+
+ }
+
+ public void actionPerformed(ActionEvent event)
+ {
+ String cmd = event.getActionCommand();
+
+ if (cmd.equals("New...")) {
+ doNew();
+ } else if (cmd.equals("Open File...")) {
+ doOpenFile();
+ } else if (cmd.equals("Open URL...")) {
+ doOpenURL();
+ } else if (cmd.equals("Quit")) {
+ doQuit();
+ }
+ }
+
+ public static void main(String[] args)
+ {
+ new DetectingViewer();
+ }
+
+ private void errorDialog(String title, String msg)
+ {
+ JOptionPane.showMessageDialog(this, msg, title, JOptionPane.ERROR_MESSAGE);
+ }
+
+ private BufferedInputStream openFile(File file)
+ {
+ FileInputStream fileStream = null;
+
+ try {
+ fileStream = new FileInputStream(file);
+ } catch (Exception e) {
+ errorDialog("Error Opening File", e.getMessage());
+ return null;
+ }
+
+ return new BufferedInputStream(fileStream);
+ }
+
+// private void openFile(String directory, String filename)
+// {
+// openFile(new File(directory, filename));
+// }
+
+
+ private BufferedInputStream openURL(String url)
+ {
+ InputStream s = null;
+
+ try {
+ URL aURL = new URL(url);
+ s = aURL.openStream();
+ } catch (Exception e) {
+ errorDialog("Error Opening URL", e.getMessage());
+ return null;
+ }
+
+ return new BufferedInputStream(s);
+ }
+
+ private String encodingName(CharsetMatch match)
+ {
+ return match.getName() + " (" + match.getLanguage() + ")";
+ }
+
+ private void setMatchMenu(CharsetMatch[] matches)
+ {
+ JMenu menu = getJMenuBar().getMenu(1);
+ JMenuItem menuItem;
+
+ menu.removeAll();
+
+ for (int i = 0; i < matches.length; i += 1) {
+ CharsetMatch match = matches[i];
+
+ menuItem = new JMenuItem(encodingName(match) + " " + match.getConfidence());
+
+ menu.add(menuItem);
+ }
+ }
+
+ private byte[] scriptTag = {(byte) 's', (byte) 'c', (byte) 'r', (byte) 'i', (byte) 'p', (byte) 't'};
+ private byte[] styleTag = {(byte) 's', (byte) 't', (byte) 'y', (byte) 'l', (byte) 'e'};
+ private static int BUFFER_SIZE = 100000;
+
+ private boolean openTag(byte[] buffer, int offset, int length, byte[] tag)
+ {
+ int tagLen = tag.length;
+ int bufRem = length - offset;
+ int b;
+
+ for (b = 0; b < tagLen && b < bufRem; b += 1) {
+ if (buffer[b + offset] != tag[b]) {
+ return false;
+ }
+ }
+
+ return b == tagLen;
+ }
+
+ private boolean closedTag(byte[] buffer, int offset, int length, byte[] tag)
+ {
+ if (buffer[offset] != (byte) '/') {
+ return false;
+ }
+
+ return openTag(buffer, offset + 1, length, tag);
+ }
+
+ private byte[] filter(InputStream in)
+ {
+ byte[] buffer = new byte[BUFFER_SIZE];
+ int bytesRemaining = BUFFER_SIZE;
+ int bufLen = 0;
+
+ in.mark(BUFFER_SIZE);
+
+ try {
+ while (bytesRemaining > 0) {
+ int bytesRead = in.read(buffer, bufLen, bytesRemaining);
+
+ if (bytesRead <= 0) {
+ break;
+ }
+
+ bufLen += bytesRead;
+ bytesRemaining -= bytesRead;
+ }
+ } catch (Exception e) {
+ // TODO: error handling?
+ return null;
+ }
+
+ boolean inTag = false;
+ boolean skip = false;
+ int out = 0;
+
+ for (int i = 0; i < bufLen; i += 1) {
+ byte b = buffer[i];
+
+ if (b == (byte) '<') {
+ inTag = true;
+
+ if (openTag(buffer, i + 1, bufLen, scriptTag) ||
+ openTag(buffer, i + 1, bufLen, styleTag)) {
+ skip = true;
+ } else if (closedTag(buffer, i + 1, bufLen, scriptTag) ||
+ closedTag(buffer, i + 1, bufLen, styleTag)) {
+ skip = false;
+ }
+ } else if (b == (byte) '>') {
+ inTag = false;
+ } else if (! (inTag || skip)) {
+ buffer[out++] = b;
+ }
+ }
+
+ byte[] filtered = new byte[out];
+
+ System.arraycopy(buffer, 0, filtered, 0, out);
+ return filtered;
+ }
+
+ private CharsetMatch[] detect(byte[] bytes)
+ {
+ CharsetDetector det = new CharsetDetector();
+
+ det.setText(bytes);
+
+ return det.detectAll();
+ }
+
+ private CharsetMatch[] detect(BufferedInputStream inputStream)
+ {
+ CharsetDetector det = new CharsetDetector();
+
+ try {
+ det.setText(inputStream);
+
+ return det.detectAll();
+ } catch (Exception e) {
+ // TODO: error message?
+ return null;
+ }
+ }
+
+ private void show(InputStream inputStream, CharsetMatch[] matches, String title)
+ {
+ InputStreamReader isr;
+ char[] buffer = new char[1024];
+ int bytesRead = 0;
+
+ if (matches == null || matches.length == 0) {
+ errorDialog("Match Error", "No matches!");
+ return;
+ }
+
+ try {
+ StringBuffer sb = new StringBuffer();
+ String encoding = matches[0].getName();
+
+ inputStream.reset();
+
+ if (encoding.startsWith("UTF-32")) {
+ byte[] bytes = new byte[1024];
+ int offset = 0;
+ int chBytes = 0;
+ Charset utf32 = CharsetICU.forNameICU(encoding);
+
+ while ((bytesRead = inputStream.read(bytes, offset, 1024)) >= 0) {
+ offset = bytesRead % 4;
+ chBytes = bytesRead - offset;
+
+ sb.append(utf32.decode(ByteBuffer.wrap(bytes)).toString());
+
+ if (offset != 0) {
+ for (int i = 0; i < offset; i += 1) {
+ bytes[i] = bytes[chBytes + i];
+ }
+ }
+ }
+ } else {
+ isr = new InputStreamReader(inputStream, encoding);
+
+ while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) {
+ sb.append(buffer, 0, bytesRead);
+ }
+
+ isr.close();
+ }
+
+ this.setTitle(title + " - " + encodingName(matches[0]));
+
+ setMatchMenu(matches);
+ text.setText(sb.toString());
+ } catch (IOException e) {
+ errorDialog("IO Error", e.getMessage());
+ } catch (Exception e) {
+ errorDialog("Internal Error", e.getMessage());
+ }
+ }
+
+ private void doNew()
+ {
+ // open a new window...
+ }
+
+ private void doOpenFile()
+ {
+ int retVal = fileChooser.showOpenDialog(this);
+
+ if (retVal == JFileChooser.APPROVE_OPTION) {
+ File file = fileChooser.getSelectedFile();
+ BufferedInputStream inputStream = openFile(file);
+
+ if (inputStream != null) {
+ CharsetMatch[] matches = detect(inputStream);
+
+ show(inputStream, matches, file.getName());
+ }
+ }
+ }
+
+ private void doOpenURL()
+ {
+ String url = (String) JOptionPane.showInputDialog(this, "URL to open:", "Open URL", JOptionPane.PLAIN_MESSAGE,
+ null, null, null);
+
+ if (url != null && url.length() > 0) {
+ BufferedInputStream inputStream = openURL(url);
+
+ if (inputStream != null) {
+ byte[] filtered = filter(inputStream);
+ CharsetMatch[] matches = detect(filtered);
+
+ show(inputStream, matches, url);
+ }
+ }
+}
+
+ private void doQuit()
+ {
+ DemoApplet.demoFrameClosed();
+ this.setVisible(false);
+ this.dispose();
+ }
+
+ private JMenuBar makeMenus()
+ {
+ JMenu menu = new JMenu("File");
+ JMenuItem mi;
+
+ mi = new JMenuItem("Open File...");
+ mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.CTRL_MASK)));
+ mi.addActionListener(this);
+ menu.add(mi);
+ if(fileChooser == null) {
+ mi.setEnabled(false); // no file chooser.
+ }
+
+ mi = new JMenuItem("Open URL...");
+ mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_U, ActionEvent.CTRL_MASK)));
+ mi.addActionListener(this);
+ menu.add(mi);
+
+ mi = new JMenuItem("Quit");
+ mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_Q, ActionEvent.CTRL_MASK)));
+ mi.addActionListener(this);
+ menu.add(mi);
+
+ JMenuBar mbar = new JMenuBar();
+ mbar.add(menu);
+
+ menu = new JMenu("Detected Encodings");
+ mbar.add(menu);
+
+ return mbar;
+ }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/holiday/HolidayBorderPanel.java b/demos/src/com/ibm/icu/dev/demo/holiday/HolidayBorderPanel.java
new file mode 100644
index 00000000000..cd81ef1fa74
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/holiday/HolidayBorderPanel.java
@@ -0,0 +1,552 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1997-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.holiday;
+
+import java.awt.Color;
+import java.awt.Dimension;
+import java.awt.Font;
+import java.awt.FontMetrics;
+import java.awt.Graphics;
+import java.awt.Insets;
+import java.awt.Panel;
+
+/**
+ * Various graphical borders. The border itself is a Panel so that it can
+ * contain other Components (i.e. it borders something). You use the
+ * HolidayBorderPanel like any other Panel: you set the layout that you prefer and
+ * add Components to it. Beware that a null layout does not obey the insets
+ * of the panel so if you use null layouts, adjust your measurements to
+ * handle the border by calling insets().
+ *
+ * @author Andy Clark, Taligent Inc.
+ * @version 1.0
+ */
+public class HolidayBorderPanel extends Panel {
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = 4669213306492461159L;
+ // Constants
+
+ /** Solid border. */
+ public final static int SOLID = 0;
+ /** A raised border. */
+ public final static int RAISED = 1;
+ /** A lowered border. */
+ public final static int LOWERED = 2;
+ /** An etched in border. */
+ public final static int IN = 3;
+ /** An etched out border. */
+ public final static int OUT = 4;
+
+ /** Left alignment. */
+ public final static int LEFT = 0;
+ /** Center alignment. */
+ public final static int CENTER = 1;
+ /** Right alignment. */
+ public final static int RIGHT = 2;
+
+ /** Default style (IN). */
+ public final static int DEFAULT_STYLE = IN;
+ /** Default thickness (10). */
+ public final static int DEFAULT_THICKNESS = 10;
+ /** Default thickness for solid borders (4). */
+ public final static int DEFAULT_SOLID_THICKNESS = 4;
+ /** Default thickness for raised borders (2). */
+ public final static int DEFAULT_RAISED_THICKNESS = 2;
+ /** Default thickness for lowered borders (2). */
+ public final static int DEFAULT_LOWERED_THICKNESS = 2;
+ /** Default thickness for etched-in borders (10). */
+ public final static int DEFAULT_IN_THICKNESS = 10;
+ /** Default thickness for etched-out borders (10). */
+ public final static int DEFAULT_OUT_THICKNESS = 10;
+ /** Default gap between border and contained component (5). */
+ public final static int DEFAULT_GAP = 5;
+ /** Default color (black). Applies to SOLID and etched borders. */
+ public final static Color DEFAULT_COLOR = Color.black;
+
+ /** Default font (TimesRoman,PLAIN,14). Only applies to etched borders. */
+ public final static Font DEFAULT_FONT = new Font("TimesRoman", Font.PLAIN, 14);
+ /** Default alignment (LEFT). Only applies to etched borders. */
+ public final static int DEFAULT_ALIGNMENT = LEFT;
+
+ // Data
+ private int style;
+ private int thickness;
+ private int gap;
+ private Color color;
+
+ private Font font;
+ private String text;
+ private int alignment;
+
+ /**
+ * Constructor. Makes default border.
+ */
+ public HolidayBorderPanel() {
+
+ // initialize data
+ style = DEFAULT_STYLE;
+ thickness = DEFAULT_THICKNESS;
+ gap = DEFAULT_GAP;
+ color = DEFAULT_COLOR;
+
+ text = null;
+ font = DEFAULT_FONT;
+ alignment = DEFAULT_ALIGNMENT;
+
+ }
+
+ /**
+ * Constructor. Makes an etched IN border with given text caption.
+ *
+ * @param text Text caption
+ */
+ public HolidayBorderPanel(String text) {
+ this();
+
+ style = IN;
+ this.text = text;
+ }
+
+ /**
+ * Constructor. Makes SOLID border with color and thickness given.
+ *
+ * @param color The color for the border.
+ * @param thickness The thickness of the border.
+ */
+ public HolidayBorderPanel(Color color, int thickness) {
+ this();
+
+ style = SOLID;
+ this.color = color;
+ this.thickness = thickness;
+ }
+
+ /**
+ * Constructor. Makes a border of the given style with the default
+ * thickness for that style.
+ *
+ * @param style The style for this border.
+ */
+ public HolidayBorderPanel(int style) {
+ this();
+
+ // set thickness appropriate to this style
+ switch (style) {
+ case SOLID: thickness = DEFAULT_SOLID_THICKNESS; break;
+ case RAISED: thickness = DEFAULT_RAISED_THICKNESS; break;
+ case LOWERED: thickness = DEFAULT_LOWERED_THICKNESS; break;
+ case IN: thickness = DEFAULT_IN_THICKNESS; break;
+ case OUT: thickness = DEFAULT_OUT_THICKNESS; break;
+ default:
+ thickness = DEFAULT_THICKNESS;
+ }
+
+ this.style = style;
+ }
+
+ /**
+ * Constructor. Makes border with given style and thickness.
+ *
+ * @param style The style for this border.
+ * @param thickness The thickness for this border.
+ */
+ public HolidayBorderPanel(int style, int thickness) {
+ this();
+
+ this.style = style;
+ this.thickness = thickness;
+ }
+
+ /**
+ * Returns the insets of this panel..
+ */
+ public Insets getInsets() {
+ int adjustment = 0;
+
+ // adjust for text string
+ if (style == IN || style == OUT) {
+ if (text != null && text.length() > 0) {
+ try {
+ // set font and get info
+ int height = getGraphics().getFontMetrics(font).getHeight();
+ if (height > thickness)
+ adjustment = height - thickness;
+ }
+ catch (Exception e) {
+ // nothing: just in case there is no graphics context
+ // at the beginning.
+ System.out.print("");
+ }
+ }
+ }
+
+ // return appropriate insets
+ int dist = thickness + gap;
+ return new Insets(dist + adjustment, dist, dist, dist);
+ }
+
+ /**
+ * Sets the style of the border
+ *
+ * @param style The new style.
+ */
+ public HolidayBorderPanel setStyle(int style) {
+
+ // set the style and re-layout the panel
+ this.style = style;
+ doLayout();
+ repaint();
+
+ return this;
+ }
+
+ /**
+ * Gets the style of the border
+ */
+ public int getStyle() {
+
+ return style;
+ }
+
+ /**
+ * Sets the thickness of the border.
+ *
+ * @param thickness The new thickness
+ */
+ public HolidayBorderPanel setThickness(int thickness) {
+
+ if (thickness > 0) {
+ this.thickness = thickness;
+ doLayout();
+ repaint();
+ }
+
+ return this;
+ }
+
+ /**
+ * Gets the thickness of the border.
+ */
+ public int getThickness() {
+
+ return thickness;
+ }
+
+ /**
+ * Sets the gap between the border and the contained Component.
+ *
+ * @param gap The new gap, in pixels.
+ */
+ public HolidayBorderPanel setGap(int gap) {
+
+ if (gap > -1) {
+ this.gap = gap;
+ doLayout();
+ repaint();
+ }
+
+ return this;
+ }
+
+ /**
+ * Gets the gap between the border and the contained Component.
+ */
+ public int getGap() {
+
+ return gap;
+ }
+
+ /**
+ * Sets the current color for SOLID borders and the caption text
+ * color for etched borders.
+ *
+ * @param color The new color.
+ */
+ public HolidayBorderPanel setColor(Color color) {
+
+ this.color = color;
+ if (style == SOLID || style == IN || style == OUT)
+ repaint();
+
+ return this;
+ }
+
+ /**
+ * Gets the current color for SOLID borders and the caption
+ * text color for etched borders.
+ */
+ public Color getColor() {
+
+ return color;
+ }
+
+ /**
+ * Sets the font. Only applies to etched borders.
+ */
+ public HolidayBorderPanel setTextFont(Font font) {
+
+ // set font
+ if (font != null) {
+ this.font = font;
+ if (style == IN || style == OUT) {
+ doLayout();
+ repaint();
+ }
+ }
+
+ return this;
+ }
+
+ /**
+ * Gets the font of the text. Only applies to etched borders.
+ */
+ public Font getTextFont() {
+
+ return font;
+ }
+
+ /**
+ * Sets the text. Only applies to etched borders.
+ *
+ * @param text The new text.
+ */
+ public HolidayBorderPanel setText(String text) {
+
+ this.text = text;
+ if (style == IN || style == OUT) {
+ doLayout();
+ repaint();
+ }
+
+ return this;
+ }
+
+ /**
+ * Gets the text. Only applies to etched borders.
+ */
+ public String getText() {
+
+ return text;
+ }
+
+ /**
+ * Sets the text alignment. Only applies to etched borders.
+ *
+ * @param alignment The new alignment.
+ */
+ public HolidayBorderPanel setAlignment(int alignment) {
+
+ this.alignment = alignment;
+ if (style == IN || style == OUT) {
+ doLayout();
+ repaint();
+ }
+
+ return this;
+ }
+
+ /**
+ * Gets the text alignment.
+ */
+ public int getAlignment() {
+
+ return alignment;
+ }
+
+ /**
+ * Repaints the border.
+ *
+ * @param g The graphics context.
+ */
+ public void paint(Graphics g) {
+
+ // get current dimensions
+ Dimension size = getSize();
+ int width = size.width;
+ int height = size.height;
+
+ // set colors
+ Color light = getBackground().brighter().brighter().brighter();
+ Color dark = getBackground().darker().darker().darker();
+
+ // Draw border
+ switch (style) {
+ case RAISED: // 3D Border (in or out)
+ case LOWERED:
+ Color topleft = null;
+ Color bottomright = null;
+
+ // set colors
+ if (style == RAISED) {
+ topleft = light;
+ bottomright = dark;
+ }
+ else {
+ topleft = dark;
+ bottomright = light;
+ }
+
+ // draw border
+ g.setColor(topleft);
+ for (int i = 0; i < thickness; i++) {
+ g.drawLine(i, i, width - i - 2, i);
+ g.drawLine(i, i + 1, i, height - i - 1);
+ }
+ g.setColor(bottomright);
+ for (int i = 0; i < thickness; i++) {
+ g.drawLine(i + 1, height - i - 1, width - i - 1, height - i - 1);
+ g.drawLine(width - i - 1, i, width - i - 1, height - i - 2);
+ }
+ break;
+
+ case IN: // Etched Border (in or out)
+ case OUT:
+ int adjust1 = 0;
+ int adjust2 = 0;
+
+ // set font and get info
+ Font oldfont = g.getFont();
+ g.setFont(font);
+ FontMetrics fm = g.getFontMetrics();
+ int ascent = fm.getAscent();
+
+ // set adjustment
+ if (style == IN)
+ adjust1 = 1;
+ else
+ adjust2 = 1;
+
+ // Calculate adjustment for text
+ int adjustment = 0;
+ if (text != null && text.length() > 0) {
+ if (ascent > thickness)
+ adjustment = (ascent - thickness) / 2;
+ }
+
+ // The adjustment is there so that we always draw the
+ // light rectangle first. Otherwise, your eye picks up
+ // the discrepancy where the light rect. passes over
+ // the darker rect.
+ int x = thickness / 2;
+ int y = thickness / 2 + adjustment;
+ int w = width - thickness - 1;
+ int h = height - thickness - 1 - adjustment;
+
+ // draw rectangles
+ g.setColor(light);
+ g.drawRect(x + adjust1, y + adjust1, w, h);
+ g.setColor(dark);
+ g.drawRect(x + adjust2, y + adjust2, w, h);
+
+ // draw text, if applicable
+ if (text != null && text.length() > 0) {
+ // calculate drawing area
+ int fontheight = fm.getHeight();
+ int strwidth = fm.stringWidth(text);
+
+ int textwidth = width - 2 * (thickness + 5);
+ if (strwidth > textwidth)
+ strwidth = textwidth;
+
+ // calculate offset for alignment
+ int offset;
+ switch (alignment) {
+ case CENTER:
+ offset = (width - strwidth) / 2;
+ break;
+ case RIGHT:
+ offset = width - strwidth - thickness - 5;
+ break;
+ case LEFT:
+ default: // assume left alignment if invalid
+ offset = thickness + 5;
+ break;
+ }
+
+ // clear drawing area and set clipping region
+ g.clearRect(offset - 5, 0, strwidth + 10, fontheight);
+ g.clipRect(offset, 0, strwidth, fontheight);
+
+ // draw text
+ g.setColor(color);
+ g.drawString(text, offset, ascent);
+
+ // restore old clipping area
+ g.clipRect(0, 0, width, height);
+ }
+
+ g.setFont(oldfont);
+ break;
+
+ case SOLID:
+ default: // assume SOLID
+ g.setColor(color);
+ for (int i = 0; i < thickness; i++)
+ g.drawRect(i, i, width - 2 * i - 1, height - 2 * i - 1);
+ }
+
+ }
+
+ /**
+ * Returns the settings of this HolidayBorderPanel instance as a string.
+ */
+ public String toString() {
+ StringBuffer str = new StringBuffer("HolidayBorderPanel[");
+
+ // style
+ str.append("style=");
+ switch (style) {
+ case SOLID: str.append("SOLID"); break;
+ case RAISED: str.append("RAISED"); break;
+ case LOWERED: str.append("LOWERED"); break;
+ case IN: str.append("IN"); break;
+ case OUT: str.append("OUT"); break;
+ default: str.append("unknown");
+ }
+ str.append(",");
+
+ // thickness
+ str.append("thickness=");
+ str.append(thickness);
+ str.append(",");
+
+ // gap
+ str.append("gap=");
+ str.append(gap);
+ str.append(",");
+
+ // color
+ str.append(color);
+ str.append(",");
+
+ // font
+ str.append(font);
+ str.append(",");
+
+ // text
+ str.append("text=");
+ str.append(text);
+ str.append(",");
+
+ // alignment
+ str.append("alignment=");
+ switch (alignment) {
+ case LEFT: str.append("LEFT"); break;
+ case CENTER: str.append("CENTER"); break;
+ case RIGHT: str.append("RIGHT"); break;
+ default: str.append("unknown");
+ }
+
+ str.append("]");
+
+ return str.toString();
+ }
+
+ }
+
diff --git a/demos/src/com/ibm/icu/dev/demo/holiday/HolidayCalendarDemo.java b/demos/src/com/ibm/icu/dev/demo/holiday/HolidayCalendarDemo.java
new file mode 100644
index 00000000000..5899b78cc01
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/holiday/HolidayCalendarDemo.java
@@ -0,0 +1,744 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2007, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.dev.demo.holiday;
+
+import java.awt.BorderLayout;
+import java.awt.Button;
+import java.awt.Canvas;
+import java.awt.Choice;
+import java.awt.Color;
+import java.awt.Component;
+import java.awt.Container;
+import java.awt.Dimension;
+import java.awt.Font;
+import java.awt.FontMetrics;
+import java.awt.Frame;
+import java.awt.Graphics;
+import java.awt.GridBagConstraints;
+import java.awt.GridBagLayout;
+import java.awt.Label;
+import java.awt.Panel;
+import java.awt.Point;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.ItemEvent;
+import java.awt.event.ItemListener;
+import java.awt.event.WindowEvent;
+import java.text.DateFormatSymbols;
+import java.util.Date;
+import java.util.Locale;
+import java.util.Vector;
+
+import com.ibm.icu.dev.demo.impl.DemoApplet;
+import com.ibm.icu.dev.demo.impl.DemoTextBox;
+import com.ibm.icu.dev.demo.impl.DemoUtility;
+import com.ibm.icu.text.SimpleDateFormat;
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.Holiday;
+import com.ibm.icu.util.SimpleTimeZone;
+
+/**
+ * CalendarDemo demonstrates how Calendar works.
+ */
+public class HolidayCalendarDemo extends DemoApplet
+{
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = 4546085430817359372L;
+
+ /**
+ * The main function which defines the behavior of the CalendarDemo
+ * applet when an applet is started.
+ */
+ public static void main(String argv[]) {
+
+ new HolidayCalendarDemo().showDemo();
+ }
+
+ /* This creates a CalendarFrame for the demo applet. */
+ public Frame createDemoFrame(DemoApplet applet) {
+ return new CalendarFrame(applet);
+ }
+
+ /**
+ * A Frame is a top-level window with a title. The default layout for a frame
+ * is BorderLayout. The CalendarFrame class defines the window layout of
+ * CalendarDemo.
+ */
+ private static class CalendarFrame extends Frame implements ActionListener,
+ ItemListener
+ {
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = -7023296782393042761L;
+
+ private static final boolean DEBUG = false;
+
+ //private Locale curLocale = Locale.US; // unused
+
+ private DemoApplet applet;
+
+ private static final Locale[] calendars = {
+ //new Locale("de","AT"),
+ Locale.CANADA,
+ Locale.CANADA_FRENCH,
+ Locale.FRANCE,
+ Locale.GERMANY,
+ new Locale("iw","IL"),
+ new Locale("el","GR"),
+ //new Locale("es","MX"),
+ Locale.UK,
+ Locale.US,
+ };
+ private static final Locale[] displays = {
+ Locale.CANADA,
+ Locale.UK,
+ Locale.US,
+ Locale.FRANCE,
+ Locale.CANADA_FRENCH,
+ //new Locale("de","AT"),
+ Locale.GERMAN,
+ new Locale("el","GR"),
+ //new Locale("iw","IL"),
+ new Locale("es","MX"),
+ };
+
+ /**
+ * Constructs a new CalendarFrame that is initially invisible.
+ */
+ public CalendarFrame(DemoApplet applet)
+ {
+ super("Calendar Demo");
+ this.applet = applet;
+ init();
+ start();
+ enableEvents(WindowEvent.WINDOW_CLOSING);
+ }
+
+ /**
+ * Initializes the applet. You never need to call this directly, it
+ * is called automatically by the system once the applet is created.
+ */
+ public void init()
+ {
+ // Get G7 locales only for demo purpose. To get all the locales
+ // supported, switch to calling Calendar.getAvailableLocales().
+ // commented
+ locales = displays;
+
+ buildGUI();
+ }
+
+ //------------------------------------------------------------
+ // package private
+ //------------------------------------------------------------
+ void addWithFont(Container container, Component foo, Font font) {
+ if (font != null)
+ foo.setFont(font);
+ container.add(foo);
+ }
+
+ /**
+ * Called to start the applet. You never need to call this method
+ * directly, it is called when the applet's document is visited.
+ */
+ public void start()
+ {
+ // do nothing
+ }
+
+ private Choice localeMenu;
+ private Choice displayMenu;
+ private Locale[] locales;
+
+ private Label monthLabel;
+ private Button prevYear;
+ private Button prevMonth;
+ private Button gotoToday;
+ private Button nextMonth;
+ private Button nextYear;
+ private CalendarPanel calendarPanel;
+
+ private static final Locale kFirstLocale = Locale.US;
+
+ private static void add(Container container, Component component,
+ GridBagLayout g, GridBagConstraints c)
+ {
+ g.setConstraints(component, c);
+ container.add(component);
+ }
+
+ public void buildGUI()
+ {
+ setBackground(DemoUtility.bgColor);
+ setLayout(new BorderLayout(10,10));
+
+ // Label for the demo's title
+ Label titleLabel = new Label("Calendar Demo", Label.CENTER);
+ titleLabel.setFont(DemoUtility.titleFont);
+
+ // Label for the current month name
+ monthLabel = new Label("", Label.LEFT);
+ monthLabel.setFont(new Font(DemoUtility.titleFont.getName(),
+ DemoUtility.titleFont.getStyle(),
+ (DemoUtility.titleFont.getSize() * 3)/2));
+
+ // Make the locale popup menus
+ localeMenu= new Choice();
+ localeMenu.addItemListener(this);
+ int selectMe = 0;
+
+ for (int i = 0; i < calendars.length; i++) {
+ if (i > 0 &&
+ calendars[i].getCountry().equals(calendars[i-1].getCountry()) ||
+ i < calendars.length - 1 &&
+ calendars[i].getCountry().equals(calendars[i+1].getCountry()))
+ {
+ localeMenu.addItem(calendars[i].getDisplayCountry() + " (" +
+ calendars[i].getDisplayLanguage() + ")");
+ } else {
+ localeMenu.addItem( calendars[i].getDisplayCountry() );
+ }
+
+ if (calendars[i].equals(kFirstLocale)) {
+ selectMe = i;
+ }
+ }
+
+ localeMenu.setBackground(DemoUtility.choiceColor);
+ localeMenu.select(selectMe);
+
+ displayMenu = new Choice();
+ displayMenu.addItemListener(this);
+
+ selectMe = 0;
+ for (int i = 0; i < locales.length; i++) {
+ if (i > 0 &&
+ locales[i].getLanguage().equals(locales[i-1].getLanguage()) ||
+ i < locales.length - 1 &&
+ locales[i].getLanguage().equals(locales[i+1].getLanguage()))
+ {
+ displayMenu.addItem( locales[i].getDisplayName() );
+ } else {
+ displayMenu.addItem( locales[i].getDisplayLanguage());
+ }
+
+ if (locales[i].equals(kFirstLocale)) {
+ selectMe = i;
+ }
+ }
+
+ displayMenu.setBackground(DemoUtility.choiceColor);
+ displayMenu.select(selectMe);
+
+ // Make all the next/previous/today buttons
+ prevYear = new Button("<<");
+ prevYear.addActionListener(this);
+ prevMonth = new Button("<");
+ prevMonth.addActionListener(this);
+ gotoToday = new Button("Today");
+ gotoToday.addActionListener(this);
+ nextMonth = new Button(">");
+ nextMonth.addActionListener(this);
+ nextYear = new Button(">>");
+ nextYear.addActionListener(this);
+
+ // The month name and the control buttons are bunched together
+ Panel monthPanel = new Panel();
+ {
+ GridBagLayout g = new GridBagLayout();
+ GridBagConstraints c = new GridBagConstraints();
+ monthPanel.setLayout(g);
+
+ c.weightx = 1;
+ c.weighty = 1;
+
+ c.gridwidth = 1;
+ c.fill = GridBagConstraints.HORIZONTAL;
+ c.gridwidth = GridBagConstraints.REMAINDER;
+ add(monthPanel, monthLabel, g, c);
+
+ c.gridwidth = 1;
+ add(monthPanel, prevYear, g, c);
+ add(monthPanel, prevMonth, g, c);
+ add(monthPanel, gotoToday, g, c);
+ add(monthPanel, nextMonth, g, c);
+ c.gridwidth = GridBagConstraints.REMAINDER;
+ add(monthPanel, nextYear, g, c);
+ }
+
+ // Stick the menu and buttons in a little "control panel"
+ Panel menuPanel = new Panel();
+ {
+ GridBagLayout g = new GridBagLayout();
+ GridBagConstraints c = new GridBagConstraints();
+ menuPanel.setLayout(g);
+
+ c.weightx = 1;
+ c.weighty = 1;
+
+ c.fill = GridBagConstraints.HORIZONTAL;
+
+ c.gridwidth = GridBagConstraints.RELATIVE;
+ Label l1 = new Label("Holidays");
+ l1.setFont(DemoUtility.labelFont);
+ add(menuPanel, l1, g, c);
+
+ c.gridwidth = GridBagConstraints.REMAINDER;
+ add(menuPanel, localeMenu, g, c);
+
+ c.gridwidth = GridBagConstraints.RELATIVE;
+ Label l2 = new Label("Display:");
+ l2.setFont(DemoUtility.labelFont);
+ add(menuPanel, l2, g, c);
+
+ c.gridwidth = GridBagConstraints.REMAINDER;
+ add(menuPanel, displayMenu, g, c);
+ }
+
+ // The title, buttons, etc. go in a panel at the top of the window
+ Panel topPanel = new Panel();
+ {
+ topPanel.setLayout(new BorderLayout());
+
+ //topPanel.add("North", titleLabel);
+ topPanel.add("Center", monthPanel);
+ topPanel.add("East", menuPanel);
+ }
+ add("North", topPanel);
+
+ // The copyright notice goes at the bottom of the window
+ Label copyright = new Label(DemoUtility.copyright1, Label.LEFT);
+ copyright.setFont(DemoUtility.creditFont);
+ add("South", copyright);
+
+ // Now create the big calendar panel and stick it in the middle
+ calendarPanel = new CalendarPanel( kFirstLocale );
+ add("Center", calendarPanel);
+
+ updateMonthName();
+ }
+
+ private void updateMonthName()
+ {
+ SimpleDateFormat f = new SimpleDateFormat("MMMM yyyyy",
+ calendarPanel.getDisplayLocale());
+ f.setCalendar(calendarPanel.getCalendar());
+ f.setTimeZone(new SimpleTimeZone(0, "UTC")); // JDK 1.1.2 workaround
+ monthLabel.setText( f.format( calendarPanel.firstOfMonth() ));
+ }
+
+ /**
+ * Handles the event. Returns true if the event is handled and should not
+ * be passed to the parent of this component. The default event handler
+ * calls some helper methods to make life easier on the programmer.
+ */
+ public void actionPerformed(ActionEvent e)
+ {
+ Object obj = e.getSource();
+
+ // *** Button events are handled here.
+ if (obj instanceof Button) {
+ if (obj == nextMonth) {
+ calendarPanel.add(Calendar.MONTH, +1);
+ }
+ else
+ if (obj == prevMonth) {
+ calendarPanel.add(Calendar.MONTH, -1);
+ }
+ else
+ if (obj == prevYear) {
+ calendarPanel.add(Calendar.YEAR, -1);
+ }
+ else
+ if (obj == nextYear) {
+ calendarPanel.add(Calendar.YEAR, +1);
+ }
+ else
+ if (obj == gotoToday) {
+ calendarPanel.set( new Date() );
+ }
+ updateMonthName();
+ }
+ }
+
+ public void itemStateChanged(ItemEvent e)
+ {
+ Object obj = e.getSource();
+ if (obj == localeMenu) {
+ calendarPanel.setCalendarLocale(calendars[localeMenu.getSelectedIndex()]);
+ updateMonthName();
+ }
+ else
+ if (obj == displayMenu) {
+ calendarPanel.setDisplayLocale(locales[displayMenu.getSelectedIndex()]);
+ updateMonthName();
+ }
+ }
+
+ /**
+ * Print out the error message while debugging this program.
+ */
+ public void errorText(String s)
+ {
+ if (DEBUG)
+ {
+ System.out.println(s);
+ }
+ }
+
+ protected void processWindowEvent(WindowEvent e)
+ {
+ System.out.println("event " + e);
+ if (e.getID() == WindowEvent.WINDOW_CLOSING) {
+ this.hide();
+ this.dispose();
+
+ if (applet != null) {
+ applet.demoClosed();
+ } else {
+ System.exit(0);
+ }
+ }
+ }
+ }
+
+
+ private static class CalendarPanel extends Canvas {
+
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = 1521099412250120821L;
+
+ public CalendarPanel( Locale locale ) {
+ set(locale, locale, new Date());
+ }
+
+ public void setCalendarLocale(Locale locale) {
+ set(locale, fDisplayLocale, fCalendar.getTime());
+ }
+
+ public void setDisplayLocale(Locale locale) {
+ set(fCalendarLocale, locale, fCalendar.getTime());
+ }
+
+ public void set(Date date) {
+ set(fCalendarLocale, fDisplayLocale, date);
+ }
+
+ public void set(Locale loc, Locale display, Date date)
+ {
+ if (fCalendarLocale == null || !loc.equals(fCalendarLocale)) {
+ fCalendarLocale = loc;
+ fCalendar = Calendar.getInstance(fCalendarLocale);
+ fAllHolidays = Holiday.getHolidays(fCalendarLocale);
+ }
+ if (fDisplayLocale == null || !display.equals(fDisplayLocale)) {
+ fDisplayLocale = display;
+ fSymbols = new DateFormatSymbols(fDisplayLocale);
+ }
+
+ fStartOfMonth = date;
+
+ dirty = true;
+ repaint();
+ }
+
+ public void add(int field, int delta)
+ {
+ synchronized(fCalendar) {
+ fCalendar.setTime(fStartOfMonth);
+ fCalendar.add(field, delta);
+ fStartOfMonth = fCalendar.getTime();
+ }
+ dirty = true;
+ repaint();
+ }
+
+ public com.ibm.icu.util.Calendar getCalendar() {
+ return fCalendar;
+ }
+
+ public Locale getCalendarLocale() {
+ return fCalendarLocale;
+ }
+
+ public Locale getDisplayLocale() {
+ return fDisplayLocale;
+ }
+
+
+ public Date firstOfMonth() {
+ return fStartOfMonth;
+ }
+
+ private Date startOfMonth(Date dateInMonth)
+ {
+ synchronized(fCalendar) {
+ fCalendar.setTime(dateInMonth); // TODO: synchronization
+
+ int era = fCalendar.get(Calendar.ERA);
+ int year = fCalendar.get(Calendar.YEAR);
+ int month = fCalendar.get(Calendar.MONTH);
+
+ fCalendar.clear();
+ fCalendar.set(Calendar.ERA, era);
+ fCalendar.set(Calendar.YEAR, year);
+ fCalendar.set(Calendar.MONTH, month);
+ fCalendar.set(Calendar.DATE, 1);
+
+ return fCalendar.getTime();
+ }
+ }
+
+ private void calculate()
+ {
+ //
+ // As a workaround for JDK 1.1.3 and below, where Calendars and time
+ // zones are a bit goofy, always set my calendar's time zone to UTC.
+ // You would think I would want to do this in the "set" function above,
+ // but if I do that, the program hangs when this class is loaded,
+ // perhaps due to some sort of static initialization ordering problem.
+ // So I do it here instead.
+ //
+ fCalendar.setTimeZone(new SimpleTimeZone(0, "UTC"));
+
+ Calendar c = (Calendar)fCalendar.clone(); // Temporary copy
+
+ fStartOfMonth = startOfMonth(fStartOfMonth);
+
+ // Stash away a few useful constants for this calendar and display
+ minDay = c.getMinimum(Calendar.DAY_OF_WEEK);
+ daysInWeek = c.getMaximum(Calendar.DAY_OF_WEEK) - minDay + 1;
+
+ firstDayOfWeek = Calendar.getInstance(fDisplayLocale).getFirstDayOfWeek();
+
+ // Stash away a Date for the start of this month
+
+ // Find the day of week of the first day in this month
+ c.setTime(fStartOfMonth);
+ firstDayInMonth = c.get(Calendar.DAY_OF_WEEK);
+
+ // Now find the # of days in the month
+ c.roll(Calendar.DATE, false);
+ daysInMonth = c.get(Calendar.DATE);
+
+ // Finally, find the end of the month, i.e. the start of the next one
+ c.roll(Calendar.DATE, true);
+ c.add(Calendar.MONTH, 1);
+ c.getTime(); // JDK 1.1.2 bug workaround
+ c.add(Calendar.SECOND, -1);
+ Date endOfMonth = c.getTime();
+
+ //
+ // Calculate the number of full or partial weeks in this month.
+ // To do this I can just reuse the code that calculates which
+ // calendar cell contains a given date.
+ //
+ numWeeks = dateToCell(daysInMonth).y - dateToCell(1).y + 1;
+
+ // Remember which holidays fall on which days in this month,
+ // to save the trouble of having to do it later
+ fHolidays.setSize(0);
+
+ for (int h = 0; h < fAllHolidays.length; h++)
+ {
+ Date d = fStartOfMonth;
+ while ( (d = fAllHolidays[h].firstBetween(d, endOfMonth) ) != null)
+ {
+ c.setTime(d);
+ fHolidays.addElement( new HolidayInfo(c.get(Calendar.DATE),
+ fAllHolidays[h],
+ fAllHolidays[h].getDisplayName(fDisplayLocale) ));
+
+ d.setTime( d.getTime() + 1000 ); // "d++"
+ }
+ }
+ dirty = false;
+ }
+
+ static final int INSET = 2;
+
+ /*
+ * Convert from the day number within a month (1-based)
+ * to the cell coordinates on the calendar (0-based)
+ */
+ private void dateToCell(int date, Point pos)
+ {
+ int cell = (date + firstDayInMonth - firstDayOfWeek - minDay);
+ if (firstDayInMonth < firstDayOfWeek) {
+ cell += daysInWeek;
+ }
+
+ pos.x = cell % daysInWeek;
+ pos.y = cell / daysInWeek;
+ }
+ private Point dateToCell(int date) {
+ Point p = new Point(0,0);
+ dateToCell(date, p);
+ return p;
+ }
+
+ public void paint(Graphics g) {
+
+ if (dirty) {
+ calculate();
+ }
+
+ Point cellPos = new Point(0,0); // Temporary variable
+ Dimension d = getSize();
+
+ g.setColor(DemoUtility.bgColor);
+ g.fillRect(0,0,d.width,d.height);
+
+ // Draw the day names at the top
+ g.setColor(Color.black);
+ g.setFont(DemoUtility.labelFont);
+ FontMetrics fm = g.getFontMetrics();
+ int labelHeight = fm.getHeight() + INSET * 2;
+
+ int v = fm.getAscent() + INSET;
+ for (int i = 0; i < daysInWeek; i++) {
+ int dayNum = (i + minDay + firstDayOfWeek - 2) % daysInWeek + 1;
+ String dayName = fSymbols.getWeekdays()[dayNum];
+
+ int h = (int) (d.width * (i + 0.5)) / daysInWeek;
+ h -= fm.stringWidth(dayName) / 2;
+
+ g.drawString(dayName, h, v);
+ }
+
+ double cellHeight = (d.height - labelHeight - 1) / numWeeks;
+ double cellWidth = (double)(d.width - 1) / daysInWeek;
+
+ // Draw a white background in the part of the calendar
+ // that displays this month.
+ // First figure out how much of the first week should be shaded.
+ {
+ g.setColor(Color.white);
+ dateToCell(1, cellPos);
+ int width = (int)(cellPos.x*cellWidth); // Width of unshaded area
+
+ g.fillRect((int)(width), labelHeight ,
+ (int)(d.width - width), (int)cellHeight);
+
+ // All of the intermediate weeks get shaded completely
+ g.fillRect(0, (int)(labelHeight + cellHeight),
+ d.width, (int)(cellHeight * (numWeeks - 2)));
+
+ // Now figure out the last week.
+ dateToCell(daysInMonth, cellPos);
+ width = (int)((cellPos.x+1)*cellWidth); // Width of shaded area
+
+ g.fillRect(0, (int)(labelHeight + (numWeeks-1) * cellHeight),
+ width, (int)(cellHeight));
+
+ }
+ // Draw the X/Y grid lines
+ g.setColor(Color.black);
+ for (int i = 0; i <= numWeeks; i++) {
+ int y = (int)(labelHeight + i * cellHeight);
+ g.drawLine(0, y, d.width - 1, y);
+ }
+ for (int i = 0; i <= daysInWeek; i++) {
+ int x = (int)(i * cellWidth);
+ g.drawLine(x, labelHeight, x, d.height - 1);
+ }
+
+ // Now loop through all of the days in the month, figure out where
+ // they go in the grid, and draw the day # for each one
+ Font numberFont = new Font("Helvetica",Font.PLAIN,12);
+ // not used Font holidayFont = DemoUtility.creditFont;
+
+ Calendar c = (Calendar)fCalendar.clone();
+ c.setTime(fStartOfMonth);
+
+ for (int i = 1, h = 0; i <= daysInMonth; i++) {
+ g.setFont(numberFont);
+ g.setColor(Color.black);
+ fm = g.getFontMetrics();
+
+ dateToCell(i, cellPos);
+ int x = (int)((cellPos.x + 1) * cellWidth);
+ int y = (int)(cellPos.y * cellHeight + labelHeight);
+
+ StringBuffer buffer = new StringBuffer();
+ buffer.append(i);
+ String dayNum = buffer.toString();
+
+ x = x - INSET - fm.stringWidth(dayNum);
+ y = y + fm.getAscent() + INSET;
+
+ g.drawString(dayNum, x, y);
+
+ // See if any of the holidays land on this day....
+ HolidayInfo info = null;
+ int count = 0;
+
+ // Coordinates of lower-left corner of cell.
+ x = (int)((cellPos.x) * cellWidth);
+ y = (int)((cellPos.y+1) * cellHeight) + labelHeight;
+
+ while (h < fHolidays.size() &&
+ (info = (HolidayInfo)fHolidays.elementAt(h)).date <= i)
+ {
+ if (info.date == i) {
+ // Draw the holiday here.
+ g.setFont(numberFont);
+ g.setColor(Color.red);
+
+ DemoTextBox box = new DemoTextBox(g, info.name, (int)(cellWidth - INSET));
+ box.draw(g, x + INSET, y - INSET - box.getHeight());
+
+ y -= (box.getHeight() + INSET);
+ count++;
+ }
+ h++;
+ }
+ }
+ }
+
+ // Important state variables
+ private Locale fCalendarLocale; // Whose calendar
+ private Calendar fCalendar; // Calendar for calculations
+
+ private Locale fDisplayLocale; // How to display it
+ private DateFormatSymbols fSymbols; // Symbols for drawing
+
+ private Date fStartOfMonth; // 00:00:00 on first day of month
+
+ // Cached calculations to make drawing faster.
+ private transient int minDay; // Minimum legal day #
+ private transient int daysInWeek; // # of days in a week
+ private transient int firstDayOfWeek; // First day to display in week
+ private transient int numWeeks; // # full or partial weeks in month
+ private transient int daysInMonth; // # days in this month
+ private transient int firstDayInMonth; // Day of week of first day in month
+
+ private transient Holiday[] fAllHolidays;
+ private transient Vector fHolidays = new Vector(5,5);
+
+ private transient boolean dirty = true;
+ }
+
+ private static class HolidayInfo {
+ public HolidayInfo(int date, Holiday holiday, String name) {
+ this.date = date;
+ this.holiday = holiday;
+ this.name = name;
+ }
+
+ public Holiday holiday;
+ public int date;
+ public String name;
+ }
+}
+
diff --git a/demos/src/com/ibm/icu/dev/demo/holiday/package.html b/demos/src/com/ibm/icu/dev/demo/holiday/package.html
new file mode 100644
index 00000000000..d05e2f55ccf
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/holiday/package.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+Holiday demo application.
+
+
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/AppletFrame.java b/demos/src/com/ibm/icu/dev/demo/impl/AppletFrame.java
new file mode 100644
index 00000000000..d4089367562
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/AppletFrame.java
@@ -0,0 +1,149 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.impl;
+import java.applet.Applet;
+import java.applet.AppletContext;
+import java.applet.AppletStub;
+import java.applet.AudioClip;
+import java.awt.Frame;
+import java.awt.Image;
+import java.awt.event.WindowAdapter;
+import java.awt.event.WindowEvent;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.Enumeration;
+import java.util.Iterator;
+
+/**
+ * A Frame that runs an Applet within itself, making it possible
+ * for an applet to run as an application. Usage:
+ *
+ *
+ * public class MyApplet extends Applet {
+ * public static void main(String args[]) {
+ * MyApplet applet = new MyApplet();
+ * new AppletFrame("My Applet Running As An App", applet, 640, 480);
+ * }
+ * ...
+ * }
+ *
+ *
+ * @author Alan Liu
+ */
+public class AppletFrame extends Frame implements AppletStub, AppletContext {
+
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = 818828281190757725L;
+ Applet applet;
+
+ /**
+ * Construct a Frame running the given Applet with the default size
+ * of 640 by 480.
+ * When the Frame is closed, the applet's stop() method is called,
+ * the Frame is dispose()d of, and System.exit(0) is called.
+ *
+ * @param name the Frame title
+ * @param applet the applet to be run
+ */
+ public AppletFrame(String name, Applet applet) {
+ this(name, applet, 640, 480);
+ }
+
+ /**
+ * Construct a Frame running the given Applet with the given size.
+ * When the Frame is closed, the applet's stop() method is called,
+ * the Frame is dispose()d of, and System.exit(0) is called.
+ *
+ * @param name the Frame title
+ * @param applet the applet to be run
+ * @param width width of the Frame
+ * @param height height of the Frame
+ */
+ public AppletFrame(String name, Applet applet, int width, int height) {
+ super(name);
+ this.applet = applet;
+ applet.setStub(this);
+
+ setSize(width, height);
+ add("Center", applet);
+ show();
+ addWindowListener(new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ AppletFrame.this.applet.stop();
+ dispose();
+ System.exit(0);
+ }
+ });
+
+ applet.init();
+ applet.start();
+ }
+
+ // AppletStub API
+ public void appletResize(int width, int height) {
+ setSize(width, height);
+ }
+
+ public AppletContext getAppletContext() {
+ return this;
+ }
+
+ public URL getCodeBase() {
+ return null;
+ }
+
+ public URL getDocumentBase() {
+ return null;
+ }
+
+ public String getParameter(String name) {
+ return "PARAMETER";
+ }
+
+ public boolean isActive() {
+ return true;
+ }
+
+
+ // AppletContext API
+ public Applet getApplet(String name) {
+ return applet;
+ }
+
+ public Enumeration getApplets() {
+ return null;
+ }
+
+ public AudioClip getAudioClip(URL url) {
+ return null;
+ }
+
+ public Image getImage(URL url) {
+ return null;
+ }
+
+ public void showDocument(URL url) {}
+ public void showDocument(URL url, String target) {}
+
+ public void showStatus(String status) {
+ System.out.println(status);
+ }
+
+ public void setStream(String key, InputStream stream) throws IOException {
+ }
+
+ public InputStream getStream(String key) {
+ return null;
+ }
+
+ public Iterator getStreamKeys() {
+ return null;
+ }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/DemoApplet.java b/demos/src/com/ibm/icu/dev/demo/impl/DemoApplet.java
new file mode 100644
index 00000000000..339e6a76160
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/DemoApplet.java
@@ -0,0 +1,80 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1997-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.dev.demo.impl;
+
+import java.awt.Button;
+import java.awt.Color;
+import java.awt.Dimension;
+import java.awt.Frame;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+
+public abstract class DemoApplet extends java.applet.Applet {
+ private static final long serialVersionUID = -8983602961925702071L;
+ private Button demoButton;
+ private Frame demoFrame;
+ private static int demoFrameCount = 0;
+
+ protected abstract Frame createDemoFrame(DemoApplet applet);
+ protected Dimension getDefaultFrameSize(DemoApplet applet, Frame f) {
+ return new Dimension(700, 550);
+ }
+
+ //Create a button that will display the demo
+ public void init()
+ {
+ setBackground(Color.white);
+ demoButton = new Button("Demo");
+ demoButton.setBackground(Color.yellow);
+ add( demoButton );
+
+ demoButton.addActionListener( new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ if (e.getID() == ActionEvent.ACTION_PERFORMED) {
+ demoButton.setLabel("loading");
+
+ if (demoFrame == null) {
+ demoFrame = createDemoFrame(DemoApplet.this);
+ showDemo();
+ }
+
+ demoButton.setLabel("Demo");
+ }
+ }
+ } );
+ }
+
+ public void showDemo()
+ {
+ demoFrame = createDemoFrame(this);
+ demoFrame.doLayout();
+ Dimension d = getDefaultFrameSize(this, demoFrame);
+ demoFrame.setSize(d.width, d.height);
+ demoFrame.show();
+ demoFrameOpened();
+ }
+
+ public void demoClosed()
+ {
+ demoFrame = null;
+ demoFrameClosed();
+ }
+
+ public static void demoFrameOpened() {
+ demoFrameCount++;
+ System.err.println("DemoFrameOpened, now at:"+demoFrameCount);
+ }
+ public static void demoFrameClosed() {
+ if (--demoFrameCount == 0) {
+ System.err.println("DemoFrameClosed, now at:"+demoFrameCount + " - quitting");
+ System.exit(0);
+ }
+ System.err.println("DemoFrameClosed, now at:"+demoFrameCount);
+ }
+}
+
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/DemoTextBox.java b/demos/src/com/ibm/icu/dev/demo/impl/DemoTextBox.java
new file mode 100644
index 00000000000..a3d83499421
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/DemoTextBox.java
@@ -0,0 +1,96 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1997-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.impl;
+
+
+import java.awt.FontMetrics;
+import java.awt.Graphics;
+import java.text.BreakIterator;
+
+public class DemoTextBox {
+
+ public DemoTextBox(Graphics g, String text, int width)
+ {
+ this.text = text;
+ this.chars = new char[text.length()];
+ text.getChars(0, text.length(), chars, 0);
+
+ this.width = width;
+// this.port = g;
+ this.metrics = g.getFontMetrics();
+
+ breakText();
+ }
+
+ public int getHeight() {
+ return (nbreaks + 1) * metrics.getHeight();
+ }
+
+ public void draw(Graphics g, int x, int y)
+ {
+ int index = 0;
+
+ y += metrics.getAscent();
+
+ for (int i = 0; i < nbreaks; i++)
+ {
+ g.drawChars(chars, index, breakPos[i] - index, x, y);
+ index = breakPos[i];
+ y += metrics.getHeight();
+ }
+
+ g.drawChars(chars, index, chars.length - index, x, y);
+ }
+
+
+ private void breakText()
+ {
+ if (metrics.charsWidth(chars, 0, chars.length) > width)
+ {
+ BreakIterator iter = BreakIterator.getWordInstance();
+ iter.setText(text);
+
+ int start = iter.first();
+ int end = start;
+ int pos;
+
+ while ( (pos = iter.next()) != BreakIterator.DONE )
+ {
+ int w = metrics.charsWidth(chars, start, pos - start);
+ if (w > width)
+ {
+ // We've gone past the maximum width, so break the line
+ if (end > start) {
+ // There was at least one break position before this point
+ breakPos[nbreaks++] = end;
+ start = end;
+ end = pos;
+ } else {
+ // There weren't any break positions before this one, so
+ // let this word overflow the margin (yuck)
+ breakPos[nbreaks++] = pos;
+ start = end = pos;
+ }
+ } else {
+ // the current position still fits on the line; it's the best
+ // tentative break position we have so far.
+ end = pos;
+ }
+
+ }
+ }
+ }
+
+ private String text;
+ private char[] chars;
+// private Graphics port;
+ private FontMetrics metrics;
+ private int width;
+
+ private int[] breakPos = new int[10]; // TODO: get real
+ private int nbreaks = 0;
+}
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/DemoUtility.java b/demos/src/com/ibm/icu/dev/demo/impl/DemoUtility.java
new file mode 100644
index 00000000000..c838a13ea2b
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/DemoUtility.java
@@ -0,0 +1,136 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1997-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.impl;
+
+import java.awt.Color;
+import java.awt.Component;
+import java.awt.Container;
+import java.awt.Font;
+import java.awt.GridBagConstraints;
+import java.awt.GridBagLayout;
+import java.awt.Insets;
+import java.awt.Label;
+import java.awt.Panel;
+import java.awt.TextComponent;
+import java.util.Locale;
+
+public class DemoUtility
+{
+ public static final Font titleFont = new Font("TimesRoman",Font.BOLD,18);
+ public static final Font labelFont = new Font("TimesRoman",Font.BOLD,14);
+ public static final Font choiceFont = new Font("Helvetica",Font.BOLD,12);
+ public static final Font editFont = new Font("Helvetica",Font.PLAIN,14);
+ public static final Font creditFont = new Font("Helvetica",Font.PLAIN,10);
+ public static final Font numberFont = new Font("sansserif", Font.PLAIN, 14);
+
+ public static final Color bgColor = Color.lightGray;
+ public static final Color choiceColor = Color.white;
+
+ public static final String copyright1 =
+ "Copyright (C) IBM Corp and others. 1997 - 2002 All Rights Reserved";
+
+ /**
+ Provides easy way to use basic functions of GridBagLayout, without
+ the complications. After building a panel, and inserting all the
+ * subcomponents, call this to lay it out in the desired number of columns.
+ */
+ public static void fixGrid(Container cont, int columns) {
+ GridBagLayout gridbag = new GridBagLayout();
+ cont.setLayout(gridbag);
+
+ GridBagConstraints c = new GridBagConstraints();
+ c.fill = GridBagConstraints.VERTICAL;
+ c.weightx = 1.0;
+ c.insets = new Insets(2,2,2,2);
+
+ Component[] components = cont.getComponents();
+ for (int i = 0; i < components.length; ++i) {
+ // not used int colNumber = i%columns;
+ c.gridwidth = 1; // default
+ if ((i%columns) == columns - 1)
+ c.gridwidth = GridBagConstraints.REMAINDER; // last in grid
+ if (components[i] instanceof Label) {
+ switch (((Label)components[i]).getAlignment()) {
+ case Label.CENTER: c.anchor = GridBagConstraints.CENTER; break;
+ case Label.LEFT: c.anchor = GridBagConstraints.WEST; break;
+ case Label.RIGHT: c.anchor = GridBagConstraints.EAST; break;
+ }
+ }
+ gridbag.setConstraints(components[i], c);
+ }
+
+ }
+
+ /**
+ Provides easy way to change the spacing around an object in a GridBagLayout.
+ Call AFTER fixGridBag, passing in the container, the component, and the
+ new insets.
+ */
+ public static void setInsets(Container cont, Component comp, Insets insets) {
+ GridBagLayout gbl = (GridBagLayout)cont.getLayout();
+ GridBagConstraints g = gbl.getConstraints(comp);
+ g.insets = insets;
+ gbl.setConstraints(comp,g);
+ }
+
+ public static Panel createSpacer() {
+ Panel spacer = new Panel();
+ spacer.setLayout(null);
+ spacer.setSize(1000, 1);
+ return spacer;
+ }
+
+ // to avoid goofy updates and misplaced cursors
+ public static void setText(TextComponent area, String newText) {
+ String foo = area.getText();
+ if (foo.equals(newText)) return;
+ area.setText(newText);
+ }
+
+ /**
+ * Compares two locals. Return value is negative
+ * if they're different, and more positive the more
+ * fields that match.
+ */
+
+ public static int compareLocales(Locale l1, Locale l2)
+ {
+ int result = -1;
+
+ if (l1.getLanguage().equals(l2.getLanguage())) {
+ result += 1;
+
+ if (l1.getCountry().equals(l2.getCountry())) {
+ result += 1;
+
+ if (l1.getVariant().equals(l2.getVariant())) {
+ result += 1;
+ }
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Get the G7 locale list for demos.
+ */
+ public static Locale[] getG7Locales() {
+ return localeList;
+ }
+ private static Locale[] localeList = {
+ new Locale("DA", "DK", ""),
+ new Locale("EN", "US", ""),
+ new Locale("EN", "GB", ""),
+ new Locale("EN", "CA", ""),
+ new Locale("FR", "FR", ""),
+ new Locale("FR", "CA", ""),
+ new Locale("DE", "DE", ""),
+ new Locale("IT", "IT", ""),
+ //new Locale("JA", "JP", ""),
+ };
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/DumbTextComponent.java b/demos/src/com/ibm/icu/dev/demo/impl/DumbTextComponent.java
new file mode 100644
index 00000000000..e6147be986e
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/DumbTextComponent.java
@@ -0,0 +1,827 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.impl;
+import java.awt.AWTEventMulticaster;
+import java.awt.Canvas;
+import java.awt.Color;
+import java.awt.Cursor;
+import java.awt.Dimension;
+import java.awt.Font;
+import java.awt.FontMetrics;
+import java.awt.Graphics;
+import java.awt.Image;
+import java.awt.Point;
+import java.awt.datatransfer.Clipboard;
+import java.awt.datatransfer.DataFlavor;
+import java.awt.datatransfer.StringSelection;
+import java.awt.datatransfer.Transferable;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.FocusEvent;
+import java.awt.event.FocusListener;
+import java.awt.event.InputEvent;
+import java.awt.event.KeyEvent;
+import java.awt.event.KeyListener;
+import java.awt.event.MouseEvent;
+import java.awt.event.MouseListener;
+import java.awt.event.MouseMotionListener;
+import java.awt.event.TextEvent;
+import java.awt.event.TextListener;
+import java.text.BreakIterator;
+
+// LIU: Changed from final to non-final
+public class DumbTextComponent extends Canvas
+ implements KeyListener, MouseListener, MouseMotionListener, FocusListener
+{
+
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = 8265547730738652151L;
+
+// private transient static final String copyright =
+// "Copyright \u00A9 1998, Mark Davis. All Rights Reserved.";
+ private transient static boolean DEBUG = false;
+
+ private String contents = "";
+ private Selection selection = new Selection();
+ private int activeStart = -1;
+ private boolean editable = true;
+
+ private transient Selection tempSelection = new Selection();
+ private transient boolean focus;
+ private transient BreakIterator lineBreaker = BreakIterator.getLineInstance();
+ private transient BreakIterator wordBreaker = BreakIterator.getWordInstance();
+ private transient BreakIterator charBreaker = BreakIterator.getCharacterInstance();
+ private transient int lineAscent;
+ private transient int lineHeight;
+ private transient int lineLeading;
+ private transient int lastHeight = 10;
+ private transient int lastWidth = 50;
+ private static final int MAX_LINES = 200; // LIU: Use symbolic name
+ private transient int[] lineStarts = new int[MAX_LINES]; // LIU
+ private transient int lineCount = 1;
+
+ private transient boolean valid = false;
+ private transient FontMetrics fm;
+ private transient boolean redoLines = true;
+ private transient boolean doubleClick = false;
+ private transient TextListener textListener;
+ private transient ActionListener selectionListener;
+ private transient Image cacheImage;
+ private transient Dimension mySize;
+ private transient int xInset = 5;
+ private transient int yInset = 5;
+ private transient Point startPoint = new Point();
+ private transient Point endPoint = new Point();
+ private transient Point caretPoint = new Point();
+ private transient Point activePoint = new Point();
+
+ //private transient static String clipBoard;
+
+ private static final char CR = '\015'; // LIU
+
+ // ============================================
+
+ public DumbTextComponent() {
+ addMouseListener(this);
+ addMouseMotionListener(this);
+ addKeyListener(this);
+ addFocusListener(this);
+ setCursor(Cursor.getPredefinedCursor(Cursor.TEXT_CURSOR));
+
+ }
+
+// ================ Events ====================
+
+ // public boolean isFocusTraversable() { return true; }
+
+ public void addActionListener(ActionListener l) {
+ selectionListener = AWTEventMulticaster.add(selectionListener, l);
+ }
+
+ public void removeActionListener(ActionListener l) {
+ selectionListener = AWTEventMulticaster.remove(selectionListener, l);
+ }
+
+ public void addTextListener(TextListener l) {
+ textListener = AWTEventMulticaster.add(textListener, l);
+ }
+
+ public void removeTextListener(TextListener l) {
+ textListener = AWTEventMulticaster.remove(textListener, l);
+ }
+
+ private transient boolean pressed;
+
+ public void mousePressed(MouseEvent e) {
+ if (DEBUG) System.out.println("mousePressed");
+ if (pressed) {
+ select(e,false);
+ } else {
+ doubleClick = e.getClickCount() > 1;
+ requestFocus();
+ select(e, true);
+ pressed = true;
+ }
+ }
+
+ public void mouseDragged(MouseEvent e) {
+ if (DEBUG) System.out.println("mouseDragged");
+ select(e, false);
+ }
+
+ public void mouseReleased(MouseEvent e) {
+ if (DEBUG) System.out.println("mouseReleased");
+ pressed = false;
+ }
+
+ public void mouseEntered(MouseEvent e) {
+ //if (pressed) select(e, false);
+ }
+
+ public void mouseExited(MouseEvent e){
+ //if (pressed) select(e, false);
+ }
+
+ public void mouseClicked(MouseEvent e) {}
+ public void mouseMoved(MouseEvent e) {}
+
+
+ public void focusGained(FocusEvent e) {
+ if (DEBUG) System.out.println("focusGained");
+ focus = true;
+ valid = false;
+ repaint(16);
+ }
+ public void focusLost(FocusEvent e) {
+ if (DEBUG) System.out.println("focusLost");
+ focus = false;
+ valid = false;
+ repaint(16);
+ }
+
+ public void select(MouseEvent e, boolean first) {
+ setKeyStart(-1);
+ point2Offset(e.getPoint(), tempSelection);
+ if (first) {
+ if ((e.getModifiers() & InputEvent.SHIFT_MASK) == 0) {
+ tempSelection.anchor = tempSelection.caret;
+ }
+ }
+ // fix words
+ if (doubleClick) {
+ tempSelection.expand(wordBreaker);
+ }
+ select(tempSelection);
+ }
+
+ public void keyPressed(KeyEvent e) {
+ int code = e.getKeyCode();
+ if (DEBUG) System.out.println("keyPressed "
+ + hex((char)code) + ", " + hex((char)e.getModifiers()));
+ int start = selection.getStart();
+ int end = selection.getEnd();
+ boolean shift = (e.getModifiers() & InputEvent.SHIFT_MASK) != 0;
+ boolean ctrl = (e.getModifiers() & InputEvent.CTRL_MASK) != 0;
+
+ switch (code) {
+ case KeyEvent.VK_Q:
+ if (!ctrl || !editable) break;
+ setKeyStart(-1);
+ fixHex();
+ break;
+ case KeyEvent.VK_V:
+ if (!ctrl) break;
+ if (!editable) {
+ this.getToolkit().beep();
+ } else {
+ paste();
+ }
+ break;
+ case KeyEvent.VK_C:
+ if (!ctrl) break;
+ copy();
+ break;
+ case KeyEvent.VK_X:
+ if (!ctrl) break;
+ if (!editable) {
+ this.getToolkit().beep();
+ } else {
+ copy();
+ insertText("");
+ }
+ break;
+ case KeyEvent.VK_A:
+ if (!ctrl) break;
+ setKeyStart(-1);
+ select(Integer.MAX_VALUE, 0, false);
+ break;
+ case KeyEvent.VK_RIGHT:
+ setKeyStart(-1);
+ tempSelection.set(selection);
+ tempSelection.nextBound(ctrl ? wordBreaker : charBreaker, +1, shift);
+ select(tempSelection);
+ break;
+ case KeyEvent.VK_LEFT:
+ setKeyStart(-1);
+ tempSelection.set(selection);
+ tempSelection.nextBound(ctrl ? wordBreaker : charBreaker, -1, shift);
+ select(tempSelection);
+ break;
+ case KeyEvent.VK_UP: // LIU: Add support for up arrow
+ setKeyStart(-1);
+ tempSelection.set(selection);
+ tempSelection.caret = lineDelta(tempSelection.caret, -1);
+ if (!shift) {
+ tempSelection.anchor = tempSelection.caret;
+ }
+ select(tempSelection);
+ break;
+ case KeyEvent.VK_DOWN: // LIU: Add support for down arrow
+ setKeyStart(-1);
+ tempSelection.set(selection);
+ tempSelection.caret = lineDelta(tempSelection.caret, +1);
+ if (!shift) {
+ tempSelection.anchor = tempSelection.caret;
+ }
+ select(tempSelection);
+ break;
+ case KeyEvent.VK_DELETE: // LIU: Add delete key support
+ if (!editable) break;
+ setKeyStart(-1);
+ if (contents.length() == 0) break;
+ start = selection.getStart();
+ end = selection.getEnd();
+ if (start == end) {
+ ++end;
+ if (end > contents.length()) {
+ getToolkit().beep();
+ return;
+ }
+ }
+ replaceRange("", start, end);
+ break;
+ }
+ }
+
+ void copy() {
+ Clipboard cb = this.getToolkit().getSystemClipboard();
+ StringSelection ss = new StringSelection(
+ contents.substring(selection.getStart(), selection.getEnd()));
+ cb.setContents(ss, ss);
+ }
+
+ void paste () {
+ Clipboard cb = this.getToolkit().getSystemClipboard();
+ Transferable t = cb.getContents(this);
+ if (t == null) {
+ this.getToolkit().beep();
+ return;
+ }
+ try {
+ String temp = (String) t.getTransferData(DataFlavor.stringFlavor);
+ insertText(temp);
+ } catch (Exception e) {
+ this.getToolkit().beep();
+ }
+ }
+
+ /**
+ * LIU: Given an offset into contents, moves up or down by lines,
+ * according to lineStarts[].
+ * @param off the offset into contents
+ * @param delta how many lines to move up (< 0) or down (> 0)
+ * @return the new offset into contents
+ */
+ private int lineDelta(int off, int delta) {
+ int line = findLine(off, false);
+ int posInLine = off - lineStarts[line];
+ // System.out.println("off=" + off + " at " + line + ":" + posInLine);
+ line += delta;
+ if (line < 0) {
+ line = posInLine = 0;
+ } else if (line >= lineCount) {
+ return contents.length();
+ }
+ off = lineStarts[line] + posInLine;
+ if (off >= lineStarts[line+1]) {
+ off = lineStarts[line+1] - 1;
+ }
+ return off;
+ }
+
+ public void keyReleased(KeyEvent e) {
+ int code = e.getKeyCode();
+ if (DEBUG) System.out.println("keyReleased "
+ + hex((char)code) + ", " + hex((char)e.getModifiers()));
+ }
+
+ public void keyTyped(KeyEvent e) {
+ char ch = e.getKeyChar();
+ if (DEBUG) System.out.println("keyTyped "
+ + hex((char)ch) + ", " + hex((char)e.getModifiers()));
+ if ((e.getModifiers() & InputEvent.CTRL_MASK) != 0) return;
+ int start, end;
+ switch (ch) {
+ case KeyEvent.CHAR_UNDEFINED:
+ break;
+ case KeyEvent.VK_BACK_SPACE:
+ //setKeyStart(-1);
+ if (!editable) break;
+ if (contents.length() == 0) break;
+ start = selection.getStart();
+ end = selection.getEnd();
+ if (start == end) {
+ --start;
+ if (start < 0) {
+ getToolkit().beep(); // LIU: Add audio feedback of NOP
+ return;
+ }
+ }
+ replaceRange("", start, end);
+ break;
+ case KeyEvent.VK_DELETE:
+ //setKeyStart(-1);
+ if (!editable) break;
+ if (contents.length() == 0) break;
+ start = selection.getStart();
+ end = selection.getEnd();
+ if (start == end) {
+ ++end;
+ if (end > contents.length()) {
+ getToolkit().beep(); // LIU: Add audio feedback of NOP
+ return;
+ }
+ }
+ replaceRange("", start, end);
+ break;
+ default:
+ if (!editable) break;
+ // LIU: Dispatch to subclass API
+ handleKeyTyped(e);
+ break;
+ }
+ }
+
+ // LIU: Subclass API for handling of key typing
+ protected void handleKeyTyped(KeyEvent e) {
+ insertText(String.valueOf(e.getKeyChar()));
+ }
+
+ protected void setKeyStart(int keyStart) {
+ if (activeStart != keyStart) {
+ activeStart = keyStart;
+ repaint(10);
+ }
+ }
+
+ protected void validateKeyStart() {
+ if (activeStart > selection.getStart()) {
+ activeStart = selection.getStart();
+ repaint(10);
+ }
+ }
+
+ protected int getKeyStart() {
+ return activeStart;
+ }
+
+// ===================== Control ======================
+
+ public synchronized void setEditable(boolean b) {
+ editable = b;
+ }
+
+ public boolean isEditable() {
+ return editable;
+ }
+
+ public void select(Selection newSelection) {
+ newSelection.pin(contents);
+ if (!selection.equals(newSelection)) {
+ selection.set(newSelection);
+ if (selectionListener != null) {
+ selectionListener.actionPerformed(
+ new ActionEvent(this, ActionEvent.ACTION_PERFORMED,
+ "Selection Changed", 0));
+ }
+ repaint(10);
+ valid = false;
+ }
+ }
+
+ public void select(int start, int end) {
+ select(start, end, false);
+ }
+
+ public void select(int start, int end, boolean clickAfter) {
+ tempSelection.set(start, end, clickAfter);
+ select(tempSelection);
+ }
+
+ public int getSelectionStart() {
+ return selection.getStart();
+ }
+
+ public int getSelectionEnd() {
+ return selection.getEnd();
+ }
+
+ public void setBounds(int x, int y, int w, int h) {
+ super.setBounds(x,y,w,h);
+ redoLines = true;
+ }
+
+ public Dimension getPreferredSize() {
+ return new Dimension(lastWidth,lastHeight);
+ }
+
+ public Dimension getMaximumSize() {
+ return new Dimension(lastWidth,lastHeight);
+ }
+
+ public Dimension getMinimumSize() {
+ return new Dimension(lastHeight,lastHeight);
+ }
+
+ public void setText(String text) {
+ setText2(text);
+ select(tempSelection.set(selection).pin(contents));
+ }
+
+ public void setText2(String text) {
+ contents = text;
+ charBreaker.setText(text);
+ wordBreaker.setText(text);
+ lineBreaker.setText(text);
+ redoLines = true;
+ if (textListener != null)
+ textListener.textValueChanged(
+ new TextEvent(this, TextEvent.TEXT_VALUE_CHANGED));
+ repaint(16);
+ }
+
+ public void insertText(String text) {
+ if (activeStart == -1) activeStart = selection.getStart();
+ replaceRange(text, selection.getStart(), selection.getEnd());
+ }
+
+ public void replaceRange(String s, int start, int end) {
+ setText2(contents.substring(0,start) + s
+ + contents.substring(end));
+ select(tempSelection.set(selection).
+ fixAfterReplace(start, end, s.length()));
+ validateKeyStart();
+ }
+
+ public String getText() {
+ return contents;
+ }
+
+ public void setFont(Font font) {
+ super.setFont(font);
+ redoLines = true;
+ repaint(16);
+ }
+
+ // ================== Graphics ======================
+
+ public void update(Graphics g) {
+ if (DEBUG) System.out.println("update");
+ paint(g);
+ }
+
+ public void paint(Graphics g) {
+ mySize = getSize();
+ if (cacheImage == null
+ || cacheImage.getHeight(this) != mySize.height
+ || cacheImage.getWidth(this) != mySize.width) {
+ cacheImage = createImage(mySize.width, mySize.height);
+ valid = false;
+ }
+ if (!valid || redoLines) {
+ if (DEBUG) System.out.println("painting");
+ paint2(cacheImage.getGraphics());
+ valid = true;
+ }
+ //getToolkit().sync();
+ if (DEBUG) System.out.println("copying");
+ g.drawImage(cacheImage,
+ 0, 0, mySize.width, mySize.height,
+ 0, 0, mySize.width, mySize.height,
+ this);
+ }
+
+ public void paint2(Graphics g) {
+ g.clearRect(0, 0, mySize.width, mySize.height);
+ if (DEBUG) System.out.println("print");
+ if (focus) g.setColor(Color.black);
+ else g.setColor(Color.gray);
+ g.drawRect(0,0,mySize.width-1,mySize.height-1);
+ g.setClip(1,1,
+ mySize.width-2,mySize.height-2);
+ g.setColor(Color.black);
+ g.setFont(getFont());
+ fm = g.getFontMetrics();
+ lineAscent = fm.getAscent();
+ lineLeading = fm.getLeading();
+ lineHeight = lineAscent + fm.getDescent() + lineLeading;
+ int y = yInset + lineAscent;
+ String lastSubstring = "";
+ if (redoLines) fixLineStarts(mySize.width-xInset-xInset);
+ for (int i = 0; i < lineCount; y += lineHeight, ++i) {
+ // LIU: Don't display terminating ^M characters
+ int lim = lineStarts[i+1];
+ if (lim > 0 && contents.length() > 0 &&
+ contents.charAt(lim-1) == CR) --lim;
+ lastSubstring = contents.substring(lineStarts[i],lim);
+ g.drawString(lastSubstring, xInset, y);
+ }
+ drawSelection(g, lastSubstring);
+ lastHeight = y + yInset - lineHeight + yInset;
+ lastWidth = mySize.width-xInset-xInset;
+ }
+
+ void paintRect(Graphics g, int x, int y, int w, int h) {
+ if (focus) {
+ g.fillRect(x, y, w, h);
+ } else {
+ g.drawRect(x, y, w-1, h-1);
+ }
+ }
+
+ public void drawSelection(Graphics g, String lastSubstring) {
+ g.setXORMode(Color.black);
+ if (activeStart != -1) {
+ offset2Point(activeStart, false, activePoint);
+ g.setColor(Color.magenta);
+ int line = activePoint.x - 1;
+ g.fillRect(line, activePoint.y, 1, lineHeight);
+ }
+ if (selection.isCaret()) {
+ offset2Point(selection.caret, selection.clickAfter, caretPoint);
+ } else {
+ if (focus) g.setColor(Color.blue);
+ else g.setColor(Color.yellow);
+ offset2Point(selection.getStart(), true, startPoint);
+ offset2Point(selection.getEnd(), false, endPoint);
+ if (selection.getStart() == selection.caret)
+ caretPoint.setLocation(startPoint);
+ else caretPoint.setLocation(endPoint);
+ if (startPoint.y == endPoint.y) {
+ paintRect(g, startPoint.x, startPoint.y,
+ Math.max(1,endPoint.x-startPoint.x), lineHeight);
+ } else {
+ paintRect(g, startPoint.x, startPoint.y,
+ (mySize.width-xInset)-startPoint.x, lineHeight);
+ if (startPoint.y + lineHeight < endPoint.y)
+ paintRect(g, xInset, startPoint.y + lineHeight,
+ (mySize.width-xInset)-xInset, endPoint.y - startPoint.y - lineHeight);
+ paintRect(g, xInset, endPoint.y, endPoint.x-xInset, lineHeight);
+ }
+ }
+ if (focus || selection.isCaret()) {
+ if (focus) g.setColor(Color.green);
+ else g.setColor(Color.red);
+ int line = caretPoint.x - (selection.clickAfter ? 0 : 1);
+ g.fillRect(line, caretPoint.y, 1, lineHeight);
+ int w = lineHeight/12 + 1;
+ int braces = line - (selection.clickAfter ? -1 : w);
+ g.fillRect(braces, caretPoint.y, w, 1);
+ g.fillRect(braces, caretPoint.y + lineHeight - 1, w, 1);
+ }
+ }
+
+ public Point offset2Point(int off, boolean start, Point p) {
+ int line = findLine(off, start);
+ int width = 0;
+ try {
+ width = fm.stringWidth(
+ contents.substring(lineStarts[line], off));
+ } catch (Exception e) {
+ System.out.println(e);
+ }
+ p.x = width + xInset;
+ if (p.x > mySize.width - xInset)
+ p.x = mySize.width - xInset;
+ p.y = lineHeight * line + yInset;
+ return p;
+ }
+
+ private int findLine(int off, boolean start) {
+ // if it is start, then go to the next line!
+ if (start) ++off;
+ for (int i = 1; i < lineCount; ++i) {
+ // LIU: This was <= ; changed to < to make caret after
+ // final CR in line appear at START of next line.
+ if (off < lineStarts[i]) return i-1;
+ }
+ // LIU: Check for special case; after CR at end of the last line
+ if (off == lineStarts[lineCount] &&
+ off > 0 && contents.length() > 0 && contents.charAt(off-1) == CR) {
+ return lineCount;
+ }
+ return lineCount-1;
+ }
+
+ // offsets on any line will go from start,true to end,false
+ // excluding start,false and end,true
+ public Selection point2Offset(Point p, Selection o) {
+ if (p.y < yInset) {
+ o.caret = 0;
+ o.clickAfter = true;
+ return o;
+ }
+ int line = (p.y - yInset)/lineHeight;
+ if (line >= lineCount) {
+ o.caret = contents.length();
+ o.clickAfter = false;
+ return o;
+ }
+ int target = p.x - xInset;
+ if (target <= 0) {
+ o.caret = lineStarts[line];
+ o.clickAfter = true;
+ return o;
+ }
+ int lowGuess = lineStarts[line];
+ int lowWidth = 0;
+ int highGuess = lineStarts[line+1];
+ int highWidth = fm.stringWidth(contents.substring(lineStarts[line],highGuess));
+ if (target >= highWidth) {
+ o.caret = lineStarts[line+1];
+ o.clickAfter = false;
+ return o;
+ }
+ while (lowGuess < highGuess - 1) {
+ int guess = (lowGuess + highGuess)/2;
+ int width = fm.stringWidth(contents.substring(lineStarts[line],guess));
+ if (width <= target) {
+ lowGuess = guess;
+ lowWidth = width;
+ if (width == target) break;
+ } else {
+ highGuess = guess;
+ highWidth = width;
+ }
+ }
+ // at end, either lowWidth < target < width(low+1), or lowWidth = target
+ int highBound = charBreaker.following(lowGuess);
+ int lowBound = charBreaker.previous();
+ // we are now at character boundaries
+ if (lowBound != lowGuess)
+ lowWidth = fm.stringWidth(contents.substring(lineStarts[line],lowBound));
+ if (highBound != highGuess)
+ highWidth = fm.stringWidth(contents.substring(lineStarts[line],highBound));
+ // we now have the right widths
+ if (target - lowWidth < highWidth - target) {
+ o.caret = lowBound;
+ o.clickAfter = true;
+ } else {
+ o.caret = highBound;
+ o.clickAfter = false;
+ }
+ // we now have the closest!
+ return o;
+ }
+
+ private void fixLineStarts(int width) {
+ lineCount = 1;
+ lineStarts[0] = 0;
+ if (contents.length() == 0) {
+ lineStarts[1] = 0;
+ return;
+ }
+ int end = 0;
+ // LIU: Add check for MAX_LINES
+ for (int start = 0; start < contents.length() && lineCount < MAX_LINES;
+ start = end) {
+ end = nextLine(fm, start, width);
+ lineStarts[lineCount++] = end;
+ if (end == start) { // LIU: Assertion
+ throw new RuntimeException("nextLine broken");
+ }
+ }
+ --lineCount;
+ redoLines = false;
+ }
+
+ // LIU: Enhanced to wrap long lines. Bug with return of start fixed.
+ public int nextLine(FontMetrics fMtr, int start, int width) {
+ int len = contents.length();
+ for (int i = start; i < len; ++i) {
+ // check for line separator
+ char ch = (contents.charAt(i));
+ if (ch >= 0x000A && ch <= 0x000D || ch == 0x2028 || ch == 0x2029) {
+ len = i + 1;
+ if (ch == 0x000D && i+1 < len && contents.charAt(i+1) == 0x000A) // crlf
+ ++len; // grab extra char
+ break;
+ }
+ }
+ String subject = contents.substring(start,len);
+ if (visibleWidth(fMtr, subject) <= width)
+ return len;
+
+ // LIU: Remainder of this method rewritten to accomodate lines
+ // longer than the component width by first trying to break
+ // into lines; then words; finally chars.
+ int n = findFittingBreak(fMtr, subject, width, lineBreaker);
+ if (n == 0) {
+ n = findFittingBreak(fMtr, subject, width, wordBreaker);
+ }
+ if (n == 0) {
+ n = findFittingBreak(fMtr, subject, width, charBreaker);
+ }
+ return n > 0 ? start + n : len;
+ }
+
+ /**
+ * LIU: Finds the longest substring that fits a given width
+ * composed of subunits returned by a BreakIterator. If the smallest
+ * subunit is too long, returns 0.
+ * @param fMtr metrics to use
+ * @param line the string to be fix into width
+ * @param width line.substring(0, result) must be <= width
+ * @param breaker the BreakIterator that will be used to find subunits
+ * @return maximum characters, at boundaries returned by breaker,
+ * that fit into width, or zero on failure
+ */
+ private int findFittingBreak(FontMetrics fMtr, String line, int width,
+ BreakIterator breaker) {
+ breaker.setText(line);
+ int last = breaker.first();
+ int end = breaker.next();
+ while (end != BreakIterator.DONE &&
+ visibleWidth(fMtr, line.substring(0, end)) <= width) {
+ last = end;
+ end = breaker.next();
+ }
+ return last;
+ }
+
+ public int visibleWidth(FontMetrics fMtr, String s) {
+ int i;
+ for (i = s.length()-1; i >= 0; --i) {
+ char ch = s.charAt(i);
+ if (!(ch == ' ' || ch >= 0x000A && ch <= 0x000D || ch == 0x2028 || ch == 0x2029))
+ return fMtr.stringWidth(s.substring(0,i+1));
+ }
+ return 0;
+ }
+
+// =============== Utility ====================
+
+ private void fixHex() {
+ if (selection.getEnd() == 0) return;
+ int store = 0;
+ int places = 1;
+ int count = 0;
+ int min = Math.min(8,selection.getEnd());
+ for (int i = 0; i < min; ++i) {
+ char ch = contents.charAt(selection.getEnd()-1-i);
+ int value = Character.getNumericValue(ch);
+ if (value < 0 || value > 15) break;
+ store += places * value;
+ ++count;
+ places *= 16;
+ }
+ String add = "";
+ int bottom = store & 0xFFFF;
+ if (store >= 0xD8000000 && store < 0xDC000000
+ && bottom >= 0xDC00 && bottom < 0xE000) { // surrogates
+ add = "" + (char)(store >> 16) + (char)bottom;
+ } else if (store > 0xFFFF && store <= 0x10FFFF) {
+ store -= 0x10000;
+ add = "" + (char)(((store >> 10) & 0x3FF) + 0xD800)
+ + (char)((store & 0x3FF) + 0xDC00);
+
+ } else if (count >= 4) {
+ count = 4;
+ add = ""+(char)(store & 0xFFFF);
+ } else {
+ count = 1;
+ char ch = contents.charAt(selection.getEnd()-1);
+ add = hex(ch);
+ if (ch >= 0xDC00 && ch <= 0xDFFF && selection.getEnd() > 1) {
+ ch = contents.charAt(selection.getEnd()-2);
+ if (ch >= 0xD800 && ch <= 0xDBFF) {
+ count = 2;
+ add = hex(ch) + add;
+ }
+ }
+ }
+ replaceRange(add, selection.getEnd()-count, selection.getEnd());
+ }
+
+ public static String hex(char ch) {
+ String result = Integer.toString(ch,16).toUpperCase();
+ result = "0000".substring(result.length(),4) + result;
+ return result;
+ }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/Selection.java b/demos/src/com/ibm/icu/dev/demo/impl/Selection.java
new file mode 100644
index 00000000000..c07b7704489
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/Selection.java
@@ -0,0 +1,161 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.impl;
+import java.text.BreakIterator;
+
+public final class Selection {
+
+ public int anchor;
+ public int caret;
+ public boolean clickAfter;
+
+ public int getStart() {
+ return anchor < caret ? anchor : caret;
+ }
+
+ public int getEnd() {
+ return anchor > caret ? anchor : caret;
+ }
+
+ public boolean isCaret() {
+ return anchor == caret;
+ }
+
+ public Selection set(Selection other) {
+ anchor = other.anchor;
+ caret = other.caret;
+ clickAfter = other.clickAfter;
+ return this;
+ }
+
+ public Selection set(int anchor, int caret, boolean clickAfter) {
+ this.anchor = anchor;
+ this.caret = caret;
+ this.clickAfter = clickAfter;
+ return this;
+ }
+
+ public boolean equals(Object other) {
+ Selection other2 = (Selection)other;
+ return anchor == other2.anchor
+ && caret == other2.caret
+ && clickAfter == other2.clickAfter;
+ }
+
+ public boolean isLessThan(Selection other) {
+ return getStart() < other.getEnd();
+ }
+
+ public Selection pin(String text) {
+ if (anchor > text.length()) {
+ anchor = text.length();
+ } else if (anchor < 0) {
+ anchor = 0;
+ }
+ if (caret > text.length()) {
+ caret = text.length();
+ clickAfter = true;
+ } else if (caret < 0) {
+ caret = 0;
+ clickAfter = false;
+ }
+ return this;
+ }
+
+ public Selection swap(Selection after) {
+ int temp = anchor;
+ anchor = after.anchor;
+ after.anchor = temp;
+ temp = caret;
+ caret = after.caret;
+ after.caret = temp;
+ boolean b = clickAfter;
+ clickAfter = after.clickAfter;
+ after.clickAfter = b;
+ return this;
+ }
+
+ public Selection fixAfterReplace(int start, int end, int len) {
+ if (anchor >= start) {
+ if (anchor < end) anchor = end;
+ anchor = start + len + anchor - end;
+ }
+ if (caret >= start) {
+ if (caret < end) caret = end;
+ caret = start + len + caret - end;
+ }
+ return this;
+ }
+
+ // Mac & Windows considerably different
+ // Mac: end++. If start!=end, start=end
+ // SHIFT: move end right
+ // CTL: no different
+ // Windows:
+ // UNSHIFTED: if start!=end, start = end, else start=end=end+1;
+ // anchor = tip = start
+ // SHIFT: tip++
+ // CTL: if start!=end, start = end = nextbound(end-1),
+ // else start=end=nextbound(end)
+ // anchor = tip = start
+ // CTL/SHIFT: tip = nextbound(tip)
+
+ public Selection nextBound(BreakIterator breaker,
+ int direction, boolean extend) {
+ if (!extend && anchor != caret) caret -= direction;
+ caret = next(caret, breaker, direction, true);
+ if (!extend) anchor = caret;
+ clickAfter = false;
+ return this;
+ }
+
+ // expand start and end to word breaks--if they are not already on one
+ public void expand(BreakIterator breaker) {
+ if (anchor <= caret) {
+ anchor = next(anchor,breaker,-1,false);
+ caret = next(caret,breaker,1,false);
+ /*
+ try {
+ breaker.following(anchor);
+ anchor = breaker.previous();
+ } catch (Exception e) {}
+ try {
+ caret = breaker.following(caret-1);
+ } catch (Exception e) {}
+ */
+ } else {
+ anchor = next(anchor,breaker,1,false);
+ caret = next(caret,breaker,-1,false);
+ /*
+ try {
+ breaker.following(caret);
+ caret = breaker.previous();
+ } catch (Exception e) {}
+ try {
+ anchor = breaker.following(anchor-1);
+ } catch (Exception e) {}
+ */
+ }
+ }
+
+ // different = false - move to next boundary, unless on one
+ // true - move to next boundary, even if on one
+ public static int next(int position, BreakIterator breaker,
+ int direction, boolean different) {
+ if (!different) position -= direction;
+ try {
+ if (direction > 0) {
+ position = breaker.following(position);
+ } else {
+ breaker.following(position-1);
+ position = breaker.previous();
+ }
+ } catch (Exception e) {}
+ return position;
+ }
+}
+
diff --git a/demos/src/com/ibm/icu/dev/demo/impl/package.html b/demos/src/com/ibm/icu/dev/demo/impl/package.html
new file mode 100644
index 00000000000..a7e8d35a2ac
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/impl/package.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+Shared utilities for demo applications and Applets.
+
+
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/messagepattern/MessagePatternDemo.java b/demos/src/com/ibm/icu/dev/demo/messagepattern/MessagePatternDemo.java
new file mode 100644
index 00000000000..4b8e75efb55
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/messagepattern/MessagePatternDemo.java
@@ -0,0 +1,123 @@
+/*
+*******************************************************************************
+* Copyright (C) 2010-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* created on: 2010aug21
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.dev.demo.messagepattern;
+
+import com.ibm.icu.text.MessagePattern;
+
+/**
+ * Demo code for MessagePattern class.
+ * Pretty-prints the list of MessagePattern Parts and uses the MiniMessageFormatter
+ * with a few patterns.
+ * @author Markus Scherer
+ * @since 2010-aug-21
+ */
+public final class MessagePatternDemo {
+ private static final String manySpaces=" ";
+
+ private static final void printParts(MessagePattern msg) {
+ String autoQA=msg.autoQuoteApostropheDeep();
+ if(!autoQA.equals(msg.getPatternString())) {
+ System.out.println("autoQA: "+autoQA);
+ }
+ String indent="";
+ StringBuilder explanation=new StringBuilder();
+ MessagePattern.Part prevPart=null;
+ int count=msg.countParts();
+ for(int i=0; i0) {
+ explanation.append("=\"").append(msg.getSubstring(part)).append('"');
+ }
+ if(type.hasNumericValue()) {
+ explanation.append('=').append(msg.getNumericValue(part));
+ }
+ System.out.format("%2d: %s%s%s\n", i, indent, partString, explanation);
+ if(type==MessagePattern.Part.Type.MSG_LIMIT) {
+ int nestingLevel=part.getValue();
+ if(nestingLevel>1) {
+ indent=manySpaces.substring(0, (nestingLevel-1)*2); // outdent
+ } else {
+ indent="";
+ }
+ }
+ prevPart=part;
+ }
+ }
+
+ private static final MessagePattern print(String s) {
+ System.out.println("message: "+s);
+ try {
+ MessagePattern msg=new MessagePattern(s);
+ printParts(msg);
+ return msg;
+ } catch(Exception e) {
+ System.out.println("Exception: "+e.getMessage());
+ return null;
+ }
+ }
+
+ private static final void printFormat(String s, Object... args) {
+ MessagePattern msg=print(s);
+ if(msg!=null) {
+ System.out.println(new MiniMessageFormatter(msg).format(new StringBuilder(), args));
+ }
+ }
+
+ private static final void printFormatWithNamedArgs(String s, Object... args) {
+ MessagePattern msg=print(s);
+ if(msg!=null) {
+ System.out.println(new MiniMessageFormatter(msg).format(
+ new StringBuilder(), MiniMessageFormatter.mapFromNameValuePairs(args)));
+ }
+ }
+
+ public static void main(String[] argv) {
+ print("Hello!");
+ print("Hel'lo!");
+ print("Hel'{o");
+ print("Hel'{'o");
+ // double apostrophe inside quoted literal text still encodes a single apostrophe
+ printFormat("a'{bc''de'f");
+ print("a'{bc''de'f{0,number,g'hi''jk'l#}");
+ print("abc{0}def");
+ print("abc{ arg }def");
+ print("abc{1}def{arg}ghi");
+ print("abc{2, number}ghi{3, select, xx {xxx} other {ooo}} xyz");
+ print("abc{gender,select,"+
+ "other{His name is {person,XML,{$PERSON} }.}}xyz");
+ print("abc{num_people, plural, offset:17 few{fff} other {oooo}}xyz");
+ print("abc{ num , plural , offset: 2 =1 {1} =-1 {-1} =3.14 {3.14} other {oo} }xyz");
+ print("I don't {a,plural,other{w'{'on't #'#'}} and "+
+ "{b,select,other{shan't'}'}} '{'''know'''}' and "+
+ "{c,choice,0#can't'|'}"+
+ "{z,number,#'#'###.00'}'}.");
+ print("a_{0,choice,-∞ #-inf| 5≤ five | 99 # ninety'|'nine }_z");
+ print("a_{0,plural,other{num=#'#'=#'#'={1,number,##}!}}_z");
+ print("}}}{0}}"); // yes, unmatched '}' are ok in ICU MessageFormat
+ printFormat("Hello {0}!", "Alice");
+ String msg="++{0, select, female{{1} calls you her friend}"+
+ "other{{1} calls you '{their}' friend}"+
+ "male{{1} calls you his friend}}--";
+ printFormat(msg, "female", "Alice");
+ printFormat(msg, "male", "Bob");
+ printFormat(msg, "unknown", "sushifan3");
+ msg="_'__{gender, select, female{Her n'ame is {person_name}.}"+
+ "other{His n'ame is {person_name}.}}__'_";
+ printFormatWithNamedArgs(msg, "gender", "female", "person_name", "Alice");
+ printFormatWithNamedArgs(msg, "gender", "male", "person_name", "Bob");
+ }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/messagepattern/MiniMessageFormatter.java b/demos/src/com/ibm/icu/dev/demo/messagepattern/MiniMessageFormatter.java
new file mode 100644
index 00000000000..2c4bb41c4d6
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/messagepattern/MiniMessageFormatter.java
@@ -0,0 +1,186 @@
+/*
+*******************************************************************************
+* Copyright (C) 2010-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* created on: 2010aug21
+* created by: Markus W. Scherer
+*/
+
+package com.ibm.icu.dev.demo.messagepattern;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import com.ibm.icu.text.MessagePattern;
+import com.ibm.icu.text.MessagePattern.ArgType;
+import com.ibm.icu.text.MessagePattern.Part;
+import com.ibm.icu.util.Freezable;
+
+/**
+ * Mini message formatter for a small subset of the ICU MessageFormat syntax.
+ * Supports only string substitution and select formatting.
+ * @author Markus Scherer
+ * @since 2010-aug-21
+ */
+public final class MiniMessageFormatter implements Freezable {
+ public MiniMessageFormatter() {
+ this.msg=new MessagePattern();
+ }
+
+ public MiniMessageFormatter(MessagePattern msg) {
+ this.msg=(MessagePattern)msg.clone();
+ }
+
+ public MiniMessageFormatter(String msg) {
+ this.msg=new MessagePattern(msg);
+ }
+
+ public MiniMessageFormatter applyPattern(String msg) {
+ this.msg.parse(msg);
+ return this;
+ }
+
+ public String getPatternString() {
+ return msg.getPatternString();
+ }
+
+ public boolean hasNamedArguments() {
+ return msg.hasNamedArguments();
+ }
+
+ public boolean hasNumberedArguments() {
+ return msg.hasNumberedArguments();
+ }
+
+ /**
+ * Formats the parsed message with positional arguments.
+ * Supports only string substitution (e.g., {3}) and select format.
+ * @param dest gets the formatted message appended
+ * @param args positional arguments
+ * @return dest
+ */
+ public Appendable format(Appendable dest, Object... args) {
+ if(msg.hasNamedArguments()) {
+ throw new IllegalArgumentException(
+ "Formatting message with named arguments using positional argument values.");
+ }
+ format(0, dest, args, null);
+ return dest;
+ }
+
+ public static final String format(String msg, Object... args) {
+ return new MiniMessageFormatter(msg).format(new StringBuilder(2*msg.length()), args).toString();
+ }
+
+ public Appendable format(Appendable dest, Map argsMap) {
+ if(msg.hasNumberedArguments()) {
+ throw new IllegalArgumentException(
+ "Formatting message with numbered arguments using named argument values.");
+ }
+ format(0, dest, null, argsMap);
+ return dest;
+ }
+
+ public static final String format(String msg, Map argsMap) {
+ return new MiniMessageFormatter(msg).format(new StringBuilder(2*msg.length()), argsMap).toString();
+ }
+
+ private int format(int msgStart, Appendable dest, Object[] args, Map argsMap) {
+ try {
+ String msgString=msg.getPatternString();
+ int prevIndex=msg.getPart(msgStart).getLimit();
+ for(int i=msgStart+1;; ++i) {
+ Part part=msg.getPart(i);
+ Part.Type type=part.getType();
+ int index=part.getIndex();
+ dest.append(msgString, prevIndex, index);
+ if(type==Part.Type.MSG_LIMIT) {
+ return i;
+ }
+ if(type==Part.Type.SKIP_SYNTAX || type==Part.Type.INSERT_CHAR) {
+ prevIndex=part.getLimit();
+ continue;
+ }
+ assert type==Part.Type.ARG_START : "Unexpected Part "+part+" in parsed message.";
+ int argLimit=msg.getLimitPartIndex(i);
+ ArgType argType=part.getArgType();
+ part=msg.getPart(++i);
+ Object arg;
+ if(args!=null) {
+ try {
+ arg=args[part.getValue()]; // args[ARG_NUMBER]
+ } catch(IndexOutOfBoundsException e) {
+ throw new IndexOutOfBoundsException(
+ "No argument at index "+part.getValue());
+ }
+ } else {
+ arg=argsMap.get(msg.getSubstring(part)); // args[ARG_NAME]
+ if(arg==null) {
+ throw new IndexOutOfBoundsException(
+ "No argument for name "+msg.getSubstring(part));
+ }
+ }
+ String argValue=arg.toString();
+ ++i;
+ if(argType==ArgType.NONE) {
+ dest.append(argValue);
+ } else if(argType==ArgType.SELECT) {
+ // Similar to SelectFormat.findSubMessage().
+ int subMsgStart=0;
+ for(;; ++i) { // (ARG_SELECTOR, message) pairs until ARG_LIMIT
+ part=msg.getPart(i++);
+ if(part.getType()==Part.Type.ARG_LIMIT) {
+ assert subMsgStart!=0; // The parser made sure this is the case.
+ break;
+ // else: part is an ARG_SELECTOR followed by a message
+ } else if(msg.partSubstringMatches(part, argValue)) {
+ // keyword matches
+ subMsgStart=i;
+ break;
+ } else if(subMsgStart==0 && msg.partSubstringMatches(part, "other")) {
+ subMsgStart=i;
+ }
+ i=msg.getLimitPartIndex(i);
+ }
+ format(subMsgStart, dest, args, argsMap);
+ } else {
+ throw new UnsupportedOperationException("Unsupported argument type "+argType);
+ }
+ prevIndex=msg.getPart(argLimit).getLimit();
+ i=argLimit;
+ }
+ } catch(IOException e) { // Appendable throws IOException
+ throw new RuntimeException(e); // We do not want a throws clause.
+ }
+ }
+
+ /**
+ * Presents an array of (String, Object) pairs as a Map.
+ * Only for temporary use for formatting with named arguments.
+ */
+ public static Map mapFromNameValuePairs(Object[] args) {
+ HashMap argsMap = new HashMap();
+ for(int i=0; i");
+ button.addActionListener( new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ theNumber /= 10;
+ redisplay();
+ }
+ } );
+ panel2.add(button);
+ button = new Button("-100");
+ button.addActionListener( new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ roll(-100);
+ }
+ } );
+ panel2.add(button);
+ button = new Button("-10");
+ button.addActionListener( new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ roll(-10);
+ }
+ } );
+ panel2.add(button);
+ button = new Button("-1");
+ button.addActionListener( new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ roll(-1);
+ }
+ } );
+ panel2.add(button);
+ panel.add(panel2, "East");
+ leftPanel.add(panel, "North");
+ leftPanel.add(textField, "Center");
+
+ Panel rightPanel = new Panel();
+ rightPanel.setLayout(new BorderLayout());
+ formatterMenu = new Choice();
+ for (int i = 0; i < RbnfSampleRuleSets.sampleRuleSetNames.length; i++)
+ formatterMenu.addItem(RbnfSampleRuleSets.sampleRuleSetNames[i]);
+ formatterMenu.addItem("Custom");
+ formatterMenu.addItemListener(new ItemListener() {
+ public void itemStateChanged(ItemEvent e) {
+ Choice source = (Choice)(e.getSource());
+ int item = source.getSelectedIndex();
+ Locale locale = RbnfSampleRuleSets.sampleRuleSetLocales[item];
+
+ commentaryField.setText(RbnfSampleRuleSets.
+ sampleRuleSetCommentary[item]);
+
+ if (locale != null && (locale.getLanguage().equals("iw")
+ || locale.getLanguage().equals("ru") || locale.getLanguage().equals("ja")
+ || locale.getLanguage().equals("el")
+ || locale.getLanguage().equals("zh"))) {
+ textField.togglePanes(false);
+ rulesField.togglePanes(false);
+ }
+ else {
+ textField.togglePanes(true);
+ rulesField.togglePanes(true);
+ }
+
+ makeNewSpelloutFormatter();
+ redisplay();
+ }
+ } );
+
+ ruleSetMenu = new Choice();
+ populateRuleSetMenu();
+
+ ruleSetMenu.addItemListener(new ItemListener() {
+ public void itemStateChanged(ItemEvent e) {
+ ruleSetName = ruleSetMenu.getSelectedItem();
+ redisplay();
+ }
+ } );
+
+ Panel menuPanel = new Panel();
+ menuPanel.setLayout(new GridLayout(1, 2));
+ menuPanel.add(formatterMenu);
+ menuPanel.add(ruleSetMenu);
+ rightPanel.add(menuPanel, "North");
+
+ rulesField.setText(RbnfSampleRuleSets.sampleRuleSets[formatterMenu.getSelectedIndex()]);
+ rightPanel.add(rulesField, "Center");
+
+ mainPanel.add(leftPanel);
+ mainPanel.add(rightPanel);
+
+ window.add(mainPanel, "Center");
+ window.add(commentaryField, "South");
+
+ window.doLayout();
+ window.show();
+ final DemoApplet theApplet = applet;
+ window.addWindowListener(
+ new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ setVisible(false);
+ window.dispose();
+
+ if (theApplet != null) {
+ theApplet.demoClosed();
+ } else System.exit(0);
+ }
+ } );
+ return window;
+ }
+
+ void roll(int delta) {
+ theNumber += delta;
+ redisplay();
+ }
+
+ void redisplay() {
+ numberField.setText(numberFormatter.format(theNumber));
+ textField.setText(spelloutFormatter.format(theNumber, ruleSetName));
+ }
+
+ void makeNewSpelloutFormatter() {
+ int item = formatterMenu.getSelectedIndex();
+ String formatterMenuItem = formatterMenu.getSelectedItem();
+
+ if (formatterMenuItem.equals("Custom")) {
+ rulesField.setText(customRuleSet);
+ spelloutFormatter = new RuleBasedNumberFormat(customRuleSet);
+ }
+ else {
+ rulesField.setText(RbnfSampleRuleSets.sampleRuleSets[item]);
+
+ Locale locale = RbnfSampleRuleSets.sampleRuleSetLocales[item];
+ if (locale == null)
+ locale = Locale.getDefault();
+
+ spelloutFormatter = new RuleBasedNumberFormat(RbnfSampleRuleSets.
+ sampleRuleSets[item], locale);
+ }
+ spelloutFormatter.setLenientParseMode(lenientParse);
+ populateRuleSetMenu();
+ }
+
+ void populateRuleSetMenu() {
+ String[] ruleSetNames = spelloutFormatter.getRuleSetNames();
+
+ if (ruleSetMenu != null) {
+ ruleSetMenu.removeAll();
+ for (int i = 0; i < ruleSetNames.length; i++)
+ ruleSetMenu.addItem(ruleSetNames[i]);
+
+ ruleSetName = ruleSetMenu.getSelectedItem();
+ }
+ else
+ ruleSetName = ruleSetNames[0];
+ }
+
+// private Frame demoWindow = null;
+
+ private TextComponent numberField;
+ private DemoTextFieldHolder textField;
+ private DemoTextFieldHolder rulesField;
+ private TextComponent commentaryField;
+ private Checkbox lenientParseButton;
+
+ private boolean numberFieldHasFocus = true;
+
+ private RuleBasedNumberFormat spelloutFormatter;
+ private DecimalFormat numberFormatter;
+ private ParsePosition parsePosition;
+
+ private boolean lenientParse = true;
+
+ private double theNumber = 0;
+// private boolean canEdit = true;
+
+ private Choice formatterMenu;
+ private Choice ruleSetMenu;
+ private String ruleSetName;
+
+ private String customRuleSet = "NO RULES!";
+}
+
+class DemoTextField extends Component {
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = -7947090021239472658L;
+ public DemoTextField() {
+ }
+
+ public void setText(String text) {
+ this.text = text;
+ this.repaint();
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public void paint(Graphics g) {
+ Font font = getFont();
+ FontMetrics fm = g.getFontMetrics();
+ g.setFont(font);
+ String txt = getText();
+ BreakIterator bi = BreakIterator.getLineInstance();
+ bi.setText(txt);
+ int lineHeight = fm.getHeight();
+ int width = getSize().width;
+ int penY = fm.getAscent();
+ int lineStart = 0;
+ int tempLineEnd = bi.first();
+ int lineEnd = 0;
+ int maxLineEnd = 0;
+ totalHeight = 0;
+
+ while (lineStart < txt.length()) {
+ maxLineEnd = txt.indexOf('\n', lineStart);
+ if (maxLineEnd == -1)
+ maxLineEnd = Integer.MAX_VALUE;
+ while (tempLineEnd != BreakIterator.DONE && fm.stringWidth(txt.substring(
+ lineStart, tempLineEnd)) < width) {
+ lineEnd = tempLineEnd;
+ tempLineEnd = bi.next();
+ }
+ if (lineStart >= lineEnd) {
+ if (tempLineEnd == BreakIterator.DONE)
+ lineEnd = txt.length();
+ else
+ lineEnd = tempLineEnd;
+ }
+ if (lineEnd > maxLineEnd)
+ lineEnd = maxLineEnd;
+ g.drawString(txt.substring(lineStart, lineEnd), 0, penY);
+ penY += lineHeight;
+ totalHeight += lineHeight;
+ lineStart = lineEnd;
+ if (lineStart < txt.length() && txt.charAt(lineStart) == '\n')
+ ++lineStart;
+ }
+ }
+
+/*
+ public Dimension getPreferredSize() {
+ Dimension size = getParent().getSize();
+ return new Dimension(size.width, totalHeight);
+ }
+*/
+
+ private String text;
+ private int totalHeight;
+}
+
+class DemoTextFieldHolder extends Panel {
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = 7514498764062569858L;
+ public DemoTextFieldHolder() {
+ tf1 = new TextArea("", 0, 0, TextArea.SCROLLBARS_VERTICAL_ONLY);
+ tf2 = new DemoTextField();
+ sp = new ScrollPane();
+
+ setLayout(new CardLayout());
+
+ sp.add(tf2, "TextField1");
+ sp.setVisible(false);
+ add(tf1, "TestField2");
+ add(sp, "ScrollPane");
+ }
+
+ public void addFocusListener(FocusListener l) {
+ tf1.addFocusListener(l);
+ }
+
+ public void addKeyListener(KeyListener l) {
+ tf1.addKeyListener(l);
+ }
+
+ public void setText(String text) {
+ tf1.setText(text);
+ tf2.setText(text);
+ }
+
+ public String getText() {
+ return tf1.getText();
+ }
+
+ public void select(int start, int end) {
+ tf1.select(start, end);
+ }
+
+ public void selectAll() {
+ tf1.selectAll();
+ }
+
+ public void togglePanes(boolean canShowRealTextField) {
+ if (canShowRealTextField != showingRealTextField) {
+ CardLayout layout = (CardLayout)(getLayout());
+ layout.next(this);
+ showingRealTextField = canShowRealTextField;
+ }
+ }
+
+ private TextArea tf1 = null;
+ private DemoTextField tf2 = null;
+ private ScrollPane sp = null;
+ private boolean showingRealTextField = true;
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfSampleRuleSets.java b/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfSampleRuleSets.java
new file mode 100644
index 00000000000..c03ed71a186
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfSampleRuleSets.java
@@ -0,0 +1,1941 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2007, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.rbnf;
+
+import java.util.Locale;
+
+/**
+ * A collection of example rule sets for use with RuleBasedNumberFormat.
+ * These examples are intended to serve both as demonstrations of what can
+ * be done with this framework, and as starting points for designing new
+ * rule sets.
+ *
+ * For those that claim to represent number-spellout rules for languages
+ * other than U.S. English, we make no claims of either accuracy or
+ * completeness. In fact, we know them to be incomplete, and suspect
+ * most have mistakes in them. If you see something that you know is wrong,
+ * please tell us!
+ *
+ * @author Richard Gillam
+ */
+public class RbnfSampleRuleSets {
+ /**
+ * Puts a copyright in the .class file
+ */
+// private static final String copyrightNotice
+// = "Copyright \u00a91997-1998 IBM Corp. All rights reserved.";
+
+ //========================================================================
+ // Spellout rules for various languages
+ //
+ // The following RuleBasedNumberFormat descriptions show the rules for
+ // spelling out numeric values in various languages. As mentioned
+ // before, we cannot vouch for the accuracy or completeness of this
+ // data, although we believe it's pretty close. Basically, this
+ // represents one day's worth of Web-surfing. If you can supply the
+ // missing information in any of these rule sets, or if you find errors,
+ // or if you can supply spellout rules for languages that aren't shown
+ // here, we want to hear from you!
+ //========================================================================
+
+ /**
+ * Spellout rules for U.S. English. This demonstration version of the
+ * U.S. English spellout rules has four variants: 1) %simplified is a
+ * set of rules showing the simple method of spelling out numbers in
+ * English: 289 is formatted as "two hundred eighty-nine". 2) %alt-teens
+ * is the same as %simplified, except that values between 1,000 and 9,999
+ * whose hundreds place isn't zero are formatted in hundreds. For example,
+ * 1,983 is formatted as "nineteen hundred eighty-three," and 2,183 is
+ * formatted as "twenty-one hundred eighty-three," but 2,083 is still
+ * formatted as "two thousand eighty-three." 3) %ordinal formats the
+ * values as ordinal numbers in English (e.g., 289 is "two hundred eighty-
+ * ninth"). 4) %default uses a more complicated algorithm to format
+ * numbers in a more natural way: 289 is formatted as "two hundred AND
+ * eighty-nine" and commas are inserted between the thousands groups for
+ * values above 100,000.
+ */
+ public static final String usEnglish =
+ // This rule set shows the normal simple formatting rules for English
+ "%simplified:\n"
+ // negative number rule. This rule is used to format negative
+ // numbers. The result of formatting the number's absolute
+ // value is placed where the >> is.
+ + " -x: minus >>;\n"
+ // faction rule. This rule is used for formatting numbers
+ // with fractional parts. The result of formatting the
+ // number's integral part is substituted for the <<, and
+ // the result of formatting the number's fractional part
+ // (one digit at a time, e.g., 0.123 is "zero point one two
+ // three") replaces the >>.
+ + " x.x: << point >>;\n"
+ // the rules for the values from 0 to 19 are simply the
+ // words for those numbers
+ + " zero; one; two; three; four; five; six; seven; eight; nine;\n"
+ + " ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
+ + " seventeen; eighteen; nineteen;\n"
+ // beginning at 20, we use the >> to mark the position where
+ // the result of formatting the number's ones digit. Thus,
+ // we only need a new rule at every multiple of 10. Text in
+ // backets is omitted if the value being formatted is an
+ // even multiple of 10.
+ + " 20: twenty[->>];\n"
+ + " 30: thirty[->>];\n"
+ + " 40: forty[->>];\n"
+ + " 50: fifty[->>];\n"
+ + " 60: sixty[->>];\n"
+ + " 70: seventy[->>];\n"
+ + " 80: eighty[->>];\n"
+ + " 90: ninety[->>];\n"
+ // beginning at 100, we can use << to mark the position where
+ // the result of formatting the multiple of 100 is to be
+ // inserted. Notice also that the meaning of >> has shifted:
+ // here, it refers to both the ones place and the tens place.
+ // The meanings of the << and >> tokens depend on the base value
+ // of the rule. A rule's divisor is (usually) the highest
+ // power of 10 that is less than or equal to the rule's base
+ // value. The value being formatted is divided by the rule's
+ // divisor, and the integral quotient is used to get the text
+ // for <<, while the remainder is used to produce the text
+ // for >>. Again, text in brackets is omitted if the value
+ // being formatted is an even multiple of the rule's divisor
+ // (in this case, an even multiple of 100)
+ + " 100: << hundred[ >>];\n"
+ // The rules for the higher numbers work the same way as the
+ // rule for 100: Again, the << and >> tokens depend on the
+ // rule's divisor, which for all these rules is also the rule's
+ // base value. To group by thousand, we simply don't have any
+ // rules between 1,000 and 1,000,000.
+ + " 1000: << thousand[ >>];\n"
+ + " 1,000,000: << million[ >>];\n"
+ + " 1,000,000,000: << billion[ >>];\n"
+ + " 1,000,000,000,000: << trillion[ >>];\n"
+ // overflow rule. This rule specifies that values of a
+ // quadrillion or more are shown in numerals rather than words.
+ // The == token means to format (with new rules) the value
+ // being formatted by this rule and place the result where
+ // the == is. The #,##0 inside the == signs is a
+ // DecimalFormat pattern. It specifies that the value should
+ // be formatted with a DecimalFormat object, and that it
+ // should be formatted with no decimal places, at least one
+ // digit, and a thousands separator.
+ + " 1,000,000,000,000,000: =#,##0=;\n"
+
+ // This rule set formats numbers between 1,000 and 9,999 somewhat
+ // differently: If the hundreds digit is not zero, the first two
+ // digits are treated as a number of hundreds. For example, 2,197
+ // would come out as "twenty-one hundred ninety-seven."
+ + "%alt-teens:\n"
+ // just use %simplified to format values below 1,000
+ + " =%simplified=;\n"
+ // values between 1,000 and 9,999 are delegated to %%alt-hundreds
+ // for formatting. The > after "1000" decreases the exponent
+ // of the rule's radix by one, causing the rule's divisor
+ // to be 100 instead of 1,000. This causes the first TWO
+ // digits of the number, instead of just the first digit,
+ // to be sent to %%alt-hundreds
+ + " 1000>: <%%alt-hundreds<[ >>];\n"
+ // for values of 10,000 and more, we again just use %simplified
+ + " 10,000: =%simplified=;\n"
+ // This rule set uses some obscure voodoo of the description language
+ // to format the first two digits of a value in the thousands.
+ // The rule at 10 formats the first two digits as a multiple of 1,000
+ // and the rule at 11 formats the first two digits as a multiple of
+ // 100. This works because of something known as the "rollback rule":
+ // if the rule applicable to the value being formatted has two
+ // substitutions, the value being formatted is an even multiple of
+ // the rule's divisor, and the rule's base value ISN'T an even multiple
+ // if the rule's divisor, then the rule that precedes this one in the
+ // list is used instead. (The [] notation is implemented internally
+ // using this notation: a rule containing [] is split into two rules,
+ // and the right one is chosen using the rollback rule.) In this case,
+ // it means that if the first two digits are an even multiple of 10,
+ // they're formatted with the 10 rule (containing "thousand"), and if
+ // they're not, they're formatted with the 11 rule (containing
+ // "hundred"). %%empty is a hack to cause the rollback rule to be
+ // invoked: it makes the 11 rule have two substitutions, even though
+ // the second substitution (calling %%empty) doesn't actually do
+ // anything.
+ + "%%alt-hundreds:\n"
+ + " 0: SHOULD NEVER GET HERE!;\n"
+ + " 10: <%simplified< thousand;\n"
+ + " 11: =%simplified= hundred>%%empty>;\n"
+ + "%%empty:\n"
+ + " 0:;"
+
+ // this rule set is the same as %simplified, except that it formats
+ // the value as an ordinal number: 234 is formatted as "two hundred
+ // thirty-fourth". Notice the calls to ^simplified: we have to
+ // call %simplified to avoid getting "second hundred thirty-fourth."
+ + "%ordinal:\n"
+ + " zeroth; first; second; third; fourth; fifth; sixth; seventh;\n"
+ + " eighth; ninth;\n"
+ + " tenth; eleventh; twelfth; thirteenth; fourteenth;\n"
+ + " fifteenth; sixteenth; seventeenth; eighteenth;\n"
+ + " nineteenth;\n"
+ + " twentieth; twenty->>;\n"
+ + " 30: thirtieth; thirty->>;\n"
+ + " 40: fortieth; forty->>;\n"
+ + " 50: fiftieth; fifty->>;\n"
+ + " 60: sixtieth; sixty->>;\n"
+ + " 70: seventieth; seventy->>;\n"
+ + " 80: eightieth; eighty->>;\n"
+ + " 90: ninetieth; ninety->>;\n"
+ + " 100: <%simplified< hundredth; <%simplified< hundred >>;\n"
+ + " 1000: <%simplified< thousandth; <%simplified< thousand >>;\n"
+ + " 1,000,000: <%simplified< millionth; <%simplified< million >>;\n"
+ + " 1,000,000,000: <%simplified< billionth;\n"
+ + " <%simplified< billion >>;\n"
+ + " 1,000,000,000,000: <%simplified< trillionth;\n"
+ + " <%simplified< trillion >>;\n"
+ + " 1,000,000,000,000,000: =#,##0=;"
+
+ // %default is a more elaborate form of %simplified; It is basically
+ // the same, except that it introduces "and" before the ones digit
+ // when appropriate (basically, between the tens and ones digits) and
+ // separates the thousands groups with commas in values over 100,000.
+ + "%default:\n"
+ // negative-number and fraction rules. These are the same
+ // as those for %simplified, but ave to be stated here too
+ // because this is an entry point
+ + " -x: minus >>;\n"
+ + " x.x: << point >>;\n"
+ // just use %simplified for values below 100
+ + " =%simplified=;\n"
+ // for values from 100 to 9,999 use %%and to decide whether or
+ // not to interpose the "and"
+ + " 100: << hundred[ >%%and>];\n"
+ + " 1000: << thousand[ >%%and>];\n"
+ // for values of 100,000 and up, use %%commas to interpose the
+ // commas in the right places (and also to interpose the "and")
+ + " 100,000>>: << thousand[>%%commas>];\n"
+ + " 1,000,000: << million[>%%commas>];\n"
+ + " 1,000,000,000: << billion[>%%commas>];\n"
+ + " 1,000,000,000,000: << trillion[>%%commas>];\n"
+ + " 1,000,000,000,000,000: =#,##0=;\n"
+ // if the value passed to this rule set is greater than 100, don't
+ // add the "and"; if it's less than 100, add "and" before the last
+ // digits
+ + "%%and:\n"
+ + " and =%default=;\n"
+ + " 100: =%default=;\n"
+ // this rule set is used to place the commas
+ + "%%commas:\n"
+ // for values below 100, add "and" (the apostrophe at the
+ // beginning is ignored, but causes the space that follows it
+ // to be significant: this is necessary because the rules
+ // calling %%commas don't put a space before it)
+ + " ' and =%default=;\n"
+ // put a comma after the thousands (or whatever preceded the
+ // hundreds)
+ + " 100: , =%default=;\n"
+ // put a comma after the millions (or whatever precedes the
+ // thousands)
+ + " 1000: , <%default< thousand, >%default>;\n"
+ // and so on...
+ + " 1,000,000: , =%default=;"
+ // %%lenient-parse isn't really a set of number formatting rules;
+ // it's a set of collation rules. Lenient-parse mode uses a Collator
+ // object to compare fragments of the text being parsed to the text
+ // in the rules, allowing more leeway in the matching text. This set
+ // of rules tells the formatter to ignore commas when parsing (it
+ // already ignores spaces, which is why we refer to the space; it also
+ // ignores hyphens, making "twenty one" and "twenty-one" parse
+ // identically)
+ + "%%lenient-parse:\n"
+ + " & ' ' , ',' ;\n";
+
+ /**
+ * Spellout rules for U.K. English. U.K. English has one significant
+ * difference from U.S. English: the names for values of 1,000,000,000
+ * and higher. In American English, each successive "-illion" is 1,000
+ * times greater than the preceding one: 1,000,000,000 is "one billion"
+ * and 1,000,000,000,000 is "one trillion." In British English, each
+ * successive "-illion" is one million times greater than the one before:
+ * "one billion" is 1,000,000,000,000 (or what Americans would call a
+ * "trillion"), and "one trillion" is 1,000,000,000,000,000,000.
+ * 1,000,000,000 in British English is "one thousand million." (This
+ * value is sometimes called a "milliard," but this word seems to have
+ * fallen into disuse.)
+ */
+ public static final String ukEnglish =
+ "%simplified:\n"
+ + " -x: minus >>;\n"
+ + " x.x: << point >>;\n"
+ + " zero; one; two; three; four; five; six; seven; eight; nine;\n"
+ + " ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
+ + " seventeen; eighteen; nineteen;\n"
+ + " 20: twenty[->>];\n"
+ + " 30: thirty[->>];\n"
+ + " 40: forty[->>];\n"
+ + " 50: fifty[->>];\n"
+ + " 60: sixty[->>];\n"
+ + " 70: seventy[->>];\n"
+ + " 80: eighty[->>];\n"
+ + " 90: ninety[->>];\n"
+ + " 100: << hundred[ >>];\n"
+ + " 1000: << thousand[ >>];\n"
+ + " 1,000,000: << million[ >>];\n"
+ + " 1,000,000,000,000: << billion[ >>];\n"
+ + " 1,000,000,000,000,000: =#,##0=;\n"
+ + "%alt-teens:\n"
+ + " =%simplified=;\n"
+ + " 1000>: <%%alt-hundreds<[ >>];\n"
+ + " 10,000: =%simplified=;\n"
+ + " 1,000,000: << million[ >%simplified>];\n"
+ + " 1,000,000,000,000: << billion[ >%simplified>];\n"
+ + " 1,000,000,000,000,000: =#,##0=;\n"
+ + "%%alt-hundreds:\n"
+ + " 0: SHOULD NEVER GET HERE!;\n"
+ + " 10: <%simplified< thousand;\n"
+ + " 11: =%simplified= hundred>%%empty>;\n"
+ + "%%empty:\n"
+ + " 0:;"
+ + "%ordinal:\n"
+ + " zeroth; first; second; third; fourth; fifth; sixth; seventh;\n"
+ + " eighth; ninth;\n"
+ + " tenth; eleventh; twelfth; thirteenth; fourteenth;\n"
+ + " fifteenth; sixteenth; seventeenth; eighteenth;\n"
+ + " nineteenth;\n"
+ + " twentieth; twenty->>;\n"
+ + " 30: thirtieth; thirty->>;\n"
+ + " 40: fortieth; forty->>;\n"
+ + " 50: fiftieth; fifty->>;\n"
+ + " 60: sixtieth; sixty->>;\n"
+ + " 70: seventieth; seventy->>;\n"
+ + " 80: eightieth; eighty->>;\n"
+ + " 90: ninetieth; ninety->>;\n"
+ + " 100: <%simplified< hundredth; <%simplified< hundred >>;\n"
+ + " 1000: <%simplified< thousandth; <%simplified< thousand >>;\n"
+ + " 1,000,000: <%simplified< millionth; <%simplified< million >>;\n"
+ + " 1,000,000,000,000: <%simplified< billionth;\n"
+ + " <%simplified< billion >>;\n"
+ + " 1,000,000,000,000,000: =#,##0=;"
+ + "%default:\n"
+ + " -x: minus >>;\n"
+ + " x.x: << point >>;\n"
+ + " =%simplified=;\n"
+ + " 100: << hundred[ >%%and>];\n"
+ + " 1000: << thousand[ >%%and>];\n"
+ + " 100,000>>: << thousand[>%%commas>];\n"
+ + " 1,000,000: << million[>%%commas>];\n"
+ + " 1,000,000,000,000: << billion[>%%commas>];\n"
+ + " 1,000,000,000,000,000: =#,##0=;\n"
+ + "%%and:\n"
+ + " and =%default=;\n"
+ + " 100: =%default=;\n"
+ + "%%commas:\n"
+ + " ' and =%default=;\n"
+ + " 100: , =%default=;\n"
+ + " 1000: , <%default< thousand, >%default>;\n"
+ + " 1,000,000: , =%default=;"
+ + "%%lenient-parse:\n"
+ + " & ' ' , ',' ;\n";
+ // Could someone please correct me if I'm wrong about "milliard" falling
+ // into disuse, or have missed any other details of how large numbers
+ // are rendered. Also, could someone please provide me with information
+ // on which other English-speaking countries use which system? Right now,
+ // I'm assuming that the U.S. system is used in Canada and that all the
+ // other English-speaking countries follow the British system. Can
+ // someone out there confirm this?
+
+ /**
+ * Spellout rules for Spanish. The Spanish rules are quite similar to
+ * the English rules, but there are some important differences:
+ * First, we have to provide separate rules for most of the twenties
+ * because the ones digit frequently picks up an accent mark that it
+ * doesn't have when standing alone. Second, each multiple of 100 has
+ * to be specified separately because the multiplier on 100 very often
+ * changes form in the contraction: 500 is "quinientos," not
+ * "cincocientos." In addition, the word for 100 is "cien" when
+ * standing alone, but changes to "ciento" when followed by more digits.
+ * There also some other differences.
+ */
+ public static final String spanish =
+ // negative-number and fraction rules
+ "-x: menos >>;\n"
+ + "x.x: << punto >>;\n"
+ // words for values from 0 to 19
+ + "cero; uno; dos; tres; cuatro; cinco; seis; siete; ocho; nueve;\n"
+ + "diez; once; doce; trece; catorce; quince; diecis\u00e9is;\n"
+ + " diecisiete; dieciocho; diecinueve;\n"
+ // words for values from 20 to 29 (necessary because the ones digit
+ // often picks up an accent mark it doesn't have when standing alone)
+ + "veinte; veintiuno; veintid\u00f3s; veintitr\u00e9s; veinticuatro;\n"
+ + " veinticinco; veintis\u00e9is; veintisiete; veintiocho;\n"
+ + " veintinueve;\n"
+ // words for multiples of 10 (notice that the tens digit is separated
+ // from the ones digit by the word "y".)
+ + "30: treinta[ y >>];\n"
+ + "40: cuarenta[ y >>];\n"
+ + "50: cincuenta[ y >>];\n"
+ + "60: sesenta[ y >>];\n"
+ + "70: setenta[ y >>];\n"
+ + "80: ochenta[ y >>];\n"
+ + "90: noventa[ y >>];\n"
+ // 100 by itself is "cien," but 100 followed by something is "cineto"
+ + "100: cien;\n"
+ + "101: ciento >>;\n"
+ // words for multiples of 100 (must be stated because they're
+ // rarely simple concatenations)
+ + "200: doscientos[ >>];\n"
+ + "300: trescientos[ >>];\n"
+ + "400: cuatrocientos[ >>];\n"
+ + "500: quinientos[ >>];\n"
+ + "600: seiscientos[ >>];\n"
+ + "700: setecientos[ >>];\n"
+ + "800: ochocientos[ >>];\n"
+ + "900: novecientos[ >>];\n"
+ // for 1,000, the multiplier on "mil" is omitted: 2,000 is "dos mil,"
+ // but 1,000 is just "mil."
+ + "1000: mil[ >>];\n"
+ + "2000: << mil[ >>];\n"
+ // 1,000,000 is "un millon," not "uno millon"
+ + "1,000,000: un mill\u00f3n[ >>];\n"
+ + "2,000,000: << mill\u00f3n[ >>];\n"
+ // overflow rule
+ + "1,000,000,000: =#,##0= (incomplete data);";
+ // The Spanish rules are incomplete. I'm missing information on negative
+ // numbers and numbers with fractional parts. I also don't have
+ // information on numbers higher than the millions
+
+ /**
+ * Spellout rules for French. French adds some interesting quirks of its
+ * own: 1) The word "et" is interposed between the tens and ones digits,
+ * but only if the ones digit if 1: 20 is "vingt," and 2 is "vingt-deux,"
+ * but 21 is "vingt-et-un." 2) There are no words for 70, 80, or 90.
+ * "quatre-vingts" ("four twenties") is used for 80, and values proceed
+ * by score from 60 to 99 (e.g., 73 is "soixante-treize" ["sixty-thirteen"]).
+ * Numbers from 1,100 to 1,199 are rendered as hundreds rather than
+ * thousands: 1,100 is "onze cents" ("eleven hundred"), rather than
+ * "mille cent" ("one thousand one hundred")
+ */
+ public static final String french =
+ // the main rule set
+ "%main:\n"
+ // negative-number and fraction rules
+ + " -x: moins >>;\n"
+ + " x.x: << virgule >>;\n"
+ // words for numbers from 0 to 10
+ + " z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
+ + " dix; onze; douze; treize; quatorze; quinze; seize;\n"
+ + " dix-sept; dix-huit; dix-neuf;\n"
+ // ords for the multiples of 10: %%alt-ones inserts "et"
+ // when needed
+ + " 20: vingt[->%%alt-ones>];\n"
+ + " 30: trente[->%%alt-ones>];\n"
+ + " 40: quarante[->%%alt-ones>];\n"
+ + " 50: cinquante[->%%alt-ones>];\n"
+ // rule for 60. The /20 causes this rule's multiplier to be
+ // 20 rather than 10, allowinhg us to recurse for all values
+ // from 60 to 79...
+ + " 60/20: soixante[->%%alt-ones>];\n"
+ // ...except for 71, which must be special-cased
+ + " 71: soixante et onze;\n"
+ // at 72, we have to repeat the rule for 60 to get us to 79
+ + " 72/20: soixante->%%alt-ones>;\n"
+ // at 80, we state a new rule with the phrase for 80. Since
+ // it changes form when there's a ones digit, we need a second
+ // rule at 81. This rule also includes "/20," allowing it to
+ // be used correctly for all values up to 99
+ + " 80: quatre-vingts; 81/20: quatre-vingt->>;\n"
+ // "cent" becomes plural when preceded by a multiplier, and
+ // the multiplier is omitted from the singular form
+ + " 100: cent[ >>];\n"
+ + " 200: << cents[ >>];\n"
+ + " 1000: mille[ >>];\n"
+ // values from 1,100 to 1,199 are rendered as "onze cents..."
+ // instead of "mille cent..." The > after "1000" decreases
+ // the rule's exponent, causing its multiplier to be 100 instead
+ // of 1,000. This prevents us from getting "onze cents cent
+ // vingt-deux" ("eleven hundred one hundred twenty-two").
+ + " 1100>: onze cents[ >>];\n"
+ // at 1,200, we go back to formating in thousands, so we
+ // repeat the rule for 1,000
+ + " 1200: mille >>;\n"
+ // at 2,000, the multiplier is added
+ + " 2000: << mille[ >>];\n"
+ + " 1,000,000: << million[ >>];\n"
+ + " 1,000,000,000: << milliarde[ >>];\n"
+ + " 1,000,000,000,000: << billion[ >>];\n"
+ + " 1,000,000,000,000,000: =#,##0=;\n"
+ // %%alt-ones is used to insert "et" when the ones digit is 1
+ + "%%alt-ones:\n"
+ + " ; et-un; =%main=;";
+
+ /**
+ * Spellout rules for Swiss French. Swiss French differs from French French
+ * in that it does have words for 70, 80, and 90. This rule set shows them,
+ * and is simpler as a result.
+ */
+ public static final String swissFrench =
+ "%main:\n"
+ + " -x: moins >>;\n"
+ + " x.x: << virgule >>;\n"
+ + " z\u00e9ro; un; deux; trois; quatre; cinq; six; sept; huit; neuf;\n"
+ + " dix; onze; douze; treize; quatorze; quinze; seize;\n"
+ + " dix-sept; dix-huit; dix-neuf;\n"
+ + " 20: vingt[->%%alt-ones>];\n"
+ + " 30: trente[->%%alt-ones>];\n"
+ + " 40: quarante[->%%alt-ones>];\n"
+ + " 50: cinquante[->%%alt-ones>];\n"
+ + " 60: soixante[->%%alt-ones>];\n"
+ // notice new words for 70, 80, and 90
+ + " 70: septante[->%%alt-ones>];\n"
+ + " 80: octante[->%%alt-ones>];\n"
+ + " 90: nonante[->%%alt-ones>];\n"
+ + " 100: cent[ >>];\n"
+ + " 200: << cents[ >>];\n"
+ + " 1000: mille[ >>];\n"
+ + " 1100>: onze cents[ >>];\n"
+ + " 1200: mille >>;\n"
+ + " 2000: << mille[ >>];\n"
+ + " 1,000,000: << million[ >>];\n"
+ + " 1,000,000,000: << milliarde[ >>];\n"
+ + " 1,000,000,000,000: << billion[ >>];\n"
+ + " 1,000,000,000,000,000: =#,##0=;\n"
+ + "%%alt-ones:\n"
+ + " ; et-un; =%main=;";
+ // I'm not 100% sure about Swiss French. Is
+ // this correct? Is "onze cents" commonly used for 1,100 in both France
+ // and Switzerland? Can someone fill me in on the rules for the other
+ // French-speaking countries? I've heard conflicting opinions on which
+ // version is used in Canada, and I understand there's an alternate set
+ // of words for 70, 80, and 90 that is used somewhere, but I don't know
+ // what those words are or where they're used.
+
+ /**
+ * Spellout rules for German. German also adds some interesting
+ * characteristics. For values below 1,000,000, numbers are customarily
+ * written out as a single word. And the ones digit PRECEDES the tens
+ * digit (e.g., 23 is "dreiundzwanzig," not "zwanzigunddrei").
+ */
+ public static final String german =
+ // 1 is "eins" when by itself, but turns into "ein" in most
+ // combinations
+ "%alt-ones:\n"
+ + " null; eins; =%%main=;\n"
+ + "%%main:\n"
+ // words for numbers from 0 to 12. Notice that the values
+ // from 13 to 19 can derived algorithmically, unlike in most
+ // other languages
+ + " null; ein; zwei; drei; vier; f\u00fcnf; sechs; sieben; acht; neun;\n"
+ + " zehn; elf; zw\u00f6lf; >>zehn;\n"
+ // rules for the multiples of 10. Notice that the ones digit
+ // goes on the front
+ + " 20: [>>und]zwanzig;\n"
+ + " 30: [>>und]drei\u00dfig;\n"
+ + " 40: [>>und]vierzig;\n"
+ + " 50: [>>und]f\u00fcnfzig;\n"
+ + " 60: [>>und]sechzig;\n"
+ + " 70: [>>und]siebzig;\n"
+ + " 80: [>>und]achtzig;\n"
+ + " 90: [>>und]neunzig;\n"
+ + " 100: hundert[>%alt-ones>];\n"
+ + " 200: <%alt-ones>];\n"
+ + " 1000: tausend[>%alt-ones>];\n"
+ + " 2000: <%alt-ones>];\n"
+ + " 1,000,000: eine Million[ >%alt-ones>];\n"
+ + " 2,000,000: << Millionen[ >%alt-ones>];\n"
+ + " 1,000,000,000: eine Milliarde[ >%alt-ones>];\n"
+ + " 2,000,000,000: << Milliarden[ >%alt-ones>];\n"
+ + " 1,000,000,000,000: eine Billion[ >%alt-ones>];\n"
+ + " 2,000,000,000,000: << Billionen[ >%alt-ones>];\n"
+ + " 1,000,000,000,000,000: =#,##0=;";
+ // again, I'm not 100% sure of these rules. I think both "hundert" and
+ // "einhundert" are correct or 100, but I'm not sure which is preferable
+ // in situations where this framework is likely to be used. Also, is it
+ // really true that numbers are run together into compound words all the
+ // time? And again, I'm missing information on negative numbers and
+ // decimals.
+
+ /**
+ * Spellout rules for Italian. Like German, most Italian numbers are
+ * written as single words. What makes these rules complicated is the rule
+ * that says that when a word ending in a vowel and a word beginning with
+ * a vowel are combined into a compound, the vowel is dropped from the
+ * end of the first word: 180 is "centottanta," not "centoottanta."
+ * The complexity of this rule set is to produce this behavior.
+ */
+ public static final String italian =
+ // main rule set. Follows the patterns of the preceding rule sets,
+ // except that the final vowel is omitted from words ending in
+ // vowels when they are followed by another word; instead, we have
+ // separate rule sets that are identical to this one, except that
+ // all the words that don't begin with a vowel have a vowel tacked
+ // onto them at the front. A word ending in a vowel calls a
+ // substitution that will supply that vowel, unless that vowel is to
+ // be elided.
+ "%main:\n"
+ + " -x: meno >>;\n"
+ + " x.x: << virgola >>;\n"
+ + " zero; uno; due; tre; quattro; cinque; sei; sette; otto;\n"
+ + " nove;\n"
+ + " dieci; undici; dodici; tredici; quattordici; quindici; sedici;\n"
+ + " diciasette; diciotto; diciannove;\n"
+ + " 20: venti; vent>%%with-i>;\n"
+ + " 30: trenta; trent>%%with-i>;\n"
+ + " 40: quaranta; quarant>%%with-a>;\n"
+ + " 50: cinquanta; cinquant>%%with-a>;\n"
+ + " 60: sessanta; sessant>%%with-a>;\n"
+ + " 70: settanta; settant>%%with-a>;\n"
+ + " 80: ottanta; ottant>%%with-a>;\n"
+ + " 90: novanta; novant>%%with-a>;\n"
+ + " 100: cento; cent[>%%with-o>];\n"
+ + " 200: <%%with-o>];\n"
+ + " 1000: mille; mill[>%%with-i>];\n"
+ + " 2000: <%%with-a>];\n"
+ + " 100,000>>: <>];\n"
+ + " 1,000,000: =#,##0= (incomplete data);\n"
+ + "%%with-a:\n"
+ + " azero; uno; adue; atre; aquattro; acinque; asei; asette; otto;\n"
+ + " anove;\n"
+ + " adieci; undici; adodici; atredici; aquattordici; aquindici; asedici;\n"
+ + " adiciasette; adiciotto; adiciannove;\n"
+ + " 20: aventi; avent>%%with-i>;\n"
+ + " 30: atrenta; atrent>%%with-i>;\n"
+ + " 40: aquaranta; aquarant>%%with-a>;\n"
+ + " 50: acinquanta; acinquant>%%with-a>;\n"
+ + " 60: asessanta; asessant>%%with-a>;\n"
+ + " 70: asettanta; asettant>%%with-a>;\n"
+ + " 80: ottanta; ottant>%%with-a>;\n"
+ + " 90: anovanta; anovant>%%with-a>;\n"
+ + " 100: acento; acent[>%%with-o>];\n"
+ + " 200: <%%with-a%%with-o>];\n"
+ + " 1000: amille; amill[>%%with-i>];\n"
+ + " 2000: <%%with-a%%with-a>];\n"
+ + " 100,000: =%main=;\n"
+ + "%%with-i:\n"
+ + " izero; uno; idue; itre; iquattro; icinque; isei; isette; otto;\n"
+ + " inove;\n"
+ + " idieci; undici; idodici; itredici; iquattordici; iquindici; isedici;\n"
+ + " idiciasette; idiciotto; idiciannove;\n"
+ + " 20: iventi; ivent>%%with-i>;\n"
+ + " 30: itrenta; itrent>%%with-i>;\n"
+ + " 40: iquaranta; iquarant>%%with-a>;\n"
+ + " 50: icinquanta; icinquant>%%with-a>;\n"
+ + " 60: isessanta; isessant>%%with-a>;\n"
+ + " 70: isettanta; isettant>%%with-a>;\n"
+ + " 80: ottanta; ottant>%%with-a>;\n"
+ + " 90: inovanta; inovant>%%with-a>;\n"
+ + " 100: icento; icent[>%%with-o>];\n"
+ + " 200: <%%with-i%%with-o>];\n"
+ + " 1000: imille; imill[>%%with-i>];\n"
+ + " 2000: <%%with-i%%with-a>];\n"
+ + " 100,000: =%main=;\n"
+ + "%%with-o:\n"
+ + " ozero; uno; odue; otre; oquattro; ocinque; osei; osette; otto;\n"
+ + " onove;\n"
+ + " odieci; undici; ododici; otredici; oquattordici; oquindici; osedici;\n"
+ + " odiciasette; odiciotto; odiciannove;\n"
+ + " 20: oventi; ovent>%%with-i>;\n"
+ + " 30: otrenta; otrent>%%with-i>;\n"
+ + " 40: oquaranta; oquarant>%%with-a>;\n"
+ + " 50: ocinquanta; ocinquant>%%with-a>;\n"
+ + " 60: osessanta; osessant>%%with-a>;\n"
+ + " 70: osettanta; osettant>%%with-a>;\n"
+ + " 80: ottanta; ottant>%%with-a>;\n"
+ + " 90: onovanta; onovant>%%with-a>;\n"
+ + " 100: ocento; ocent[>%%with-o>];\n"
+ + " 200: <%%with-o%%with-o>];\n"
+ + " 1000: omille; omill[>%%with-i>];\n"
+ + " 2000: <%%with-o%%with-a>];\n"
+ + " 100,000: =%main=;\n";
+ // Can someone confirm that I did the vowel-eliding thing right? I'm
+ // not 100% sure I'm doing it in all the right places, or completely
+ // correctly. Also, I don't have information for negatives and decimals,
+ // and I lack words fror values from 1,000,000 on up.
+
+ /**
+ * Spellout rules for Swedish.
+ */
+ public static final String swedish =
+ "noll; ett; tv\u00e5; tre; fyra; fem; sex; sjo; \u00e5tta; nio;\n"
+ + "tio; elva; tolv; tretton; fjorton; femton; sexton; sjutton; arton; nitton;\n"
+ + "20: tjugo[>>];\n"
+ + "30: trettio[>>];\n"
+ + "40: fyrtio[>>];\n"
+ + "50: femtio[>>];\n"
+ + "60: sextio[>>];\n"
+ + "70: sjuttio[>>];\n"
+ + "80: \u00e5ttio[>>];\n"
+ + "90: nittio[>>];\n"
+ + "100: hundra[>>];\n"
+ + "200: <>];\n"
+ + "1000: tusen[ >>];\n"
+ + "2000: << tusen[ >>];\n"
+ + "1,000,000: en miljon[ >>];\n"
+ + "2,000,000: << miljon[ >>];\n"
+ + "1,000,000,000: en miljard[ >>];\n"
+ + "2,000,000,000: << miljard[ >>];\n"
+ + "1,000,000,000,000: en biljon[ >>];\n"
+ + "2,000,000,000,000: << biljon[ >>];\n"
+ + "1,000,000,000,000,000: =#,##0=";
+ // can someone supply me with information on negatives and decimals?
+
+ /**
+ * Spellout rules for Dutch. Notice that in Dutch, as in German,
+ * the ones digit precedes the tens digit.
+ */
+ public static final String dutch =
+ " -x: min >>;\n"
+ + "x.x: << komma >>;\n"
+ + "(zero?); een; twee; drie; vier; vijf; zes; zeven; acht; negen;\n"
+ + "tien; elf; twaalf; dertien; veertien; vijftien; zestien;\n"
+ + "zeventien; achtien; negentien;\n"
+ + "20: [>> en ]twintig;\n"
+ + "30: [>> en ]dertig;\n"
+ + "40: [>> en ]veertig;\n"
+ + "50: [>> en ]vijftig;\n"
+ + "60: [>> en ]zestig;\n"
+ + "70: [>> en ]zeventig;\n"
+ + "80: [>> en ]tachtig;\n"
+ + "90: [>> en ]negentig;\n"
+ + "100: << honderd[ >>];\n"
+ + "1000: << duizend[ >>];\n"
+ + "1,000,000: << miljoen[ >>];\n"
+ + "1,000,000,000: << biljoen[ >>];\n"
+ + "1,000,000,000,000: =#,##0=";
+
+ /**
+ * Spellout rules for Japanese. In Japanese, there really isn't any
+ * distinction between a number written out in digits and a number
+ * written out in words: the ideographic characters are both digits
+ * and words. This rule set provides two variants: %traditional
+ * uses the traditional CJK numerals (which are also used in China
+ * and Korea). %financial uses alternate ideographs for many numbers
+ * that are harder to alter than the traditional numerals (one could
+ * fairly easily change a one to
+ * a three just by adding two strokes, for example). This is also done in
+ * the other countries using Chinese idographs, but different ideographs
+ * are used in those places.
+ */
+ public static final String japanese =
+ "%financial:\n"
+ + " \u96f6; \u58f1; \u5f10; \u53c2; \u56db; \u4f0d; \u516d; \u4e03; \u516b; \u4e5d;\n"
+ + " \u62fe[>>];\n"
+ + " 20: <<\u62fe[>>];\n"
+ + " 100: <<\u767e[>>];\n"
+ + " 1000: <<\u5343[>>];\n"
+ + " 10,000: <<\u4e07[>>];\n"
+ + " 100,000,000: <<\u5104[>>];\n"
+ + " 1,000,000,000,000: <<\u5146[>>];\n"
+ + " 10,000,000,000,000,000: =#,##0=;\n"
+ + "%traditional:\n"
+ + " \u96f6; \u4e00; \u4e8c; \u4e09; \u56db; \u4e94; \u516d; \u4e03; \u516b; \u4e5d;\n"
+ + " \u5341[>>];\n"
+ + " 20: <<\u5341[>>];\n"
+ + " 100: <<\u767e[>>];\n"
+ + " 1000: <<\u5343[>>];\n"
+ + " 10,000: <<\u4e07[>>];\n"
+ + " 100,000,000: <<\u5104[>>];\n"
+ + " 1,000,000,000,000: <<\u5146[>>];\n"
+ + " 10,000,000,000,000,000: =#,##0=;";
+ // Can someone supply me with the right fraud-proof ideographs for
+ // Simplified and Traditional Chinese, and for Korean? Can someone
+ // supply me with information on negatives and decimals?
+
+ /**
+ * Spellout rules for Greek. Again in Greek we have to supply the words
+ * for the multiples of 100 because they can't be derived algorithmically.
+ * Also, the tens dgit changes form when followed by a ones digit: an
+ * accent mark disappears from the tens digit and moves to the ones digit.
+ * Therefore, instead of using the [] notation, we actually have to use
+ * two separate rules for each multiple of 10 to show the two forms of
+ * the word.
+ */
+ public static final String greek =
+ "zero (incomplete data); \u03ad\u03bd\u03b1; \u03b4\u03cd\u03bf; \u03b4\u03c1\u03af\u03b1; "
+ + "\u03c4\u03ad\u03c3\u03c3\u03b5\u03c1\u03b1; \u03c0\u03ad\u03bd\u03c4\u03b5; "
+ + "\u03ad\u03be\u03b9; \u03b5\u03c0\u03c4\u03ac; \u03bf\u03ba\u03c4\u03ce; "
+ + "\u03b5\u03bd\u03bd\u03ad\u03b1;\n"
+ + "10: \u03b4\u03ad\u03ba\u03b1; "
+ + "\u03ad\u03bd\u03b4\u03b5\u03ba\u03b1; \u03b4\u03ce\u03b4\u03b5\u03ba\u03b1; "
+ + "\u03b4\u03b5\u03ba\u03b1>>;\n"
+ + "20: \u03b5\u03af\u03ba\u03bf\u03c3\u03b9; \u03b5\u03b9\u03ba\u03bf\u03c3\u03b9>>;\n"
+ + "30: \u03c4\u03c1\u03b9\u03ac\u03bd\u03c4\u03b1; \u03c4\u03c1\u03b9\u03b1\u03bd\u03c4\u03b1>>;\n"
+ + "40: \u03c3\u03b1\u03c1\u03ac\u03bd\u03c4\u03b1; \u03c3\u03b1\u03c1\u03b1\u03bd\u03c4\u03b1>>;\n"
+ + "50: \u03c0\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; \u03c0\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
+ + "60: \u03b5\u03be\u03ae\u03bd\u03c4\u03b1; \u03b5\u03be\u03b7\u03bd\u03c4\u03b1>>;\n"
+ + "70: \u03b5\u03b2\u03b4\u03bf\u03bc\u03ae\u03bd\u03c4\u03b1; "
+ + "\u03b5\u03b2\u03b4\u03bf\u03bc\u03b7\u03bd\u03c4\u03b1>>;\n"
+ + "80: \u03bf\u03b3\u03b4\u03cc\u03bd\u03c4\u03b1; \u03bf\u03b3\u03b4\u03bf\u03bd\u03c4\u03b1>>;\n"
+ + "90: \u03b5\u03bd\u03bd\u03b5\u03bd\u03ae\u03bd\u03c4\u03b1; "
+ + "\u03b5\u03bd\u03bd\u03b5\u03bd\u03b7\u03bd\u03c4\u03b1>>;\n"
+ + "100: \u03b5\u03ba\u03b1\u03c4\u03cc[\u03bd >>];\n"
+ + "200: \u03b4\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+ + "300: \u03c4\u03c1\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+ + "400: \u03c4\u03b5\u03c4\u03c1\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+ + "500: \u03c0\u03b5\u03bd\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+ + "600: \u03b5\u03be\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+ + "700: \u03b5\u03c0\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+ + "800: \u03bf\u03ba\u03c4\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+ + "900: \u03b5\u03bd\u03bd\u03b9\u03b1\u03ba\u03cc\u03c3\u03b9\u03b1[ >>];\n"
+ + "1000: \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
+ + "2000: << \u03c7\u03af\u03bb\u03b9\u03b1[ >>];\n"
+ + "1,000,000: << \u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
+ + "1,000,000,000: << \u03b4\u03b9\u03c3\u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
+ + "1,000,000,000,000: =#,##0=";
+ // Can someone supply me with information on negatives and decimals?
+ // I'm also missing the word for zero. Can someone clue me in?
+
+ /**
+ * Spellout rules for Russian.
+ */
+ public static final String russian =
+ "\u043d\u043e\u043b\u044c; \u043e\u0434\u0438\u043d; \u0434\u0432\u0430; \u0442\u0440\u0438; "
+ + "\u0447\u0435\u0442\u044b\u0440\u0435; \u043f\u044f\u0442; \u0448\u0435\u0441\u0442; "
+ + "\u0441\u0435\u043c\u044c; \u0432\u043e\u0441\u0435\u043c\u044c; \u0434\u0435\u0432\u044f\u0442;\n"
+ + "10: \u0434\u0435\u0441\u044f\u0442; "
+ + "\u043e\u0434\u0438\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
+ + "\u0434\u0432\u0435\u043d\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+ + "\u0442\u0440\u0438\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+ + "\u0447\u0435\u0442\u044b\u0440\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
+ + "15: \u043f\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+ + "\u0448\u0435\u0441\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+ + "\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+ + "\u0432\u043e\u0441\u0435\u043c\u043d\u0430\u0434\u0446\u0430\u0442\u044c; "
+ + "\u0434\u0435\u0432\u044f\u0442\u043d\u0430\u0434\u0446\u0430\u0442\u044c;\n"
+ + "20: \u0434\u0432\u0430\u0434\u0446\u0430\u0442\u044c[ >>];\n"
+ + "30: \u0442\u0440\u043b\u0434\u0446\u0430\u0442\u044c[ >>];\n"
+ + "40: \u0441\u043e\u0440\u043e\u043a[ >>];\n"
+ + "50: \u043f\u044f\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
+ + "60: \u0448\u0435\u0441\u0442\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
+ + "70: \u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
+ + "80: \u0432\u043e\u0441\u0435\u043c\u044c\u0434\u0435\u0441\u044f\u0442[ >>];\n"
+ + "90: \u0434\u0435\u0432\u044f\u043d\u043e\u0441\u0442\u043e[ >>];\n"
+ + "100: \u0441\u0442\u043e[ >>];\n"
+ + "200: << \u0441\u0442\u043e[ >>];\n"
+ + "1000: \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
+ + "2000: << \u0442\u044b\u0441\u044f\u0447\u0430[ >>];\n"
+ + "1,000,000: \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
+ + "2,000,000: << \u043c\u0438\u043b\u043b\u0438\u043e\u043d[ >>];\n"
+ + "1,000,000,000: =#,##0=;";
+ // Can someone supply me with information on negatives and decimals?
+ // How about words for billions and trillions?
+
+ /**
+ * Spellout rules for Hebrew. Hebrew actually has inflected forms for
+ * most of the lower-order numbers. The masculine forms are shown
+ * here.
+ */
+ public static final String hebrew =
+ "zero (incomplete data); \u05d0\u05d4\u05d3; \u05e9\u05d2\u05d9\u05d9\u05dd; \u05e9\u05dc\u05d5\u05e9\u05d4;\n"
+ + "4: \u05d0\u05d3\u05d1\u05e6\u05d4; \u05d7\u05d2\u05d5\u05d9\u05e9\u05d4; \u05e9\u05e9\u05d4;\n"
+ + "7: \u05e9\u05d1\u05e6\u05d4; \u05e9\u05de\u05d5\u05d2\u05d4; \u05ea\u05e9\u05e6\u05d4;\n"
+ + "10: \u05e6\u05e9\u05d3\u05d4[ >>];\n"
+ + "20: \u05e6\u05e9\u05d3\u05d9\u05dd[ >>];\n"
+ + "30: \u05e9\u05dc\u05d5\u05e9\u05d9\u05dd[ >>];\n"
+ + "40: \u05d0\u05d3\u05d1\u05e6\u05d9\u05dd[ >>];\n"
+ + "50: \u05d7\u05de\u05d9\u05e9\u05d9\u05dd[ >>];\n"
+ + "60: \u05e9\u05e9\u05d9\u05dd[ >>];\n"
+ + "70: \u05e9\u05d1\u05e6\u05d9\u05dd[ >>];\n"
+ + "80: \u05e9\u05de\u05d5\u05d2\u05d9\u05dd[ >>];\n"
+ + "90: \u05ea\u05e9\u05e6\u05d9\u05dd[ >>];\n"
+ + "100: \u05de\u05d0\u05d4[ >>];\n"
+ + "200: << \u05de\u05d0\u05d4[ >>];\n"
+ + "1000: \u05d0\u05dc\u05e3[ >>];\n"
+ + "2000: << \u05d0\u05dc\u05e3[ >>];\n"
+ + "1,000,000: =#,##0= (incomplete data);";
+ // This data is woefully incomplete. Can someone fill me in on the
+ // various inflected forms of the numbers, which seem to be necessary
+ // to do Hebrew correctly? Can somone supply me with data for values
+ // from 1,000,000 on up? What about the word for zero? What about
+ // information on negatives and decimals?
+
+ //========================================================================
+ // Simple examples
+ //========================================================================
+
+ /**
+ * This rule set adds an English ordinal abbreviation to the end of a
+ * number. For example, 2 is formatted as "2nd". Parsing doesn't work with
+ * this rule set. To parse, use DecimalFormat on the numeral.
+ */
+ public static final String ordinal =
+ // this rule set formats the numeral and calls %%abbrev to
+ // supply the abbreviation
+ "%main:\n"
+ + " =#,##0==%%abbrev=;\n"
+ // this rule set supplies the abbreviation
+ + "%%abbrev:\n"
+ // the abbreviations. Everything from 4 to 19 ends in "th"
+ + " th; st; nd; rd; th;\n"
+ // at 20, we begin repeating the cycle every 10 (13 is "13th",
+ // but 23 and 33 are "23rd" and "33rd") We do this by
+ // ignoring all bug the ones digit in selecting the abbreviation
+ + " 20: >>;\n"
+ // at 100, we repeat the whole cycle by considering only the
+ // tens and ones digits in picking an abbreviation
+ + " 100: >>;\n";
+
+ /**
+ * This is a simple message-formatting example. Normally one would
+ * use ChoiceFormat and MessageFormat to do something this simple,
+ * but this shows it could be done with RuleBasedNumberFormat too.
+ * A message-formatting example that might work better with
+ * RuleBasedNumberFormat appears later.
+ */
+ public static final String message1 =
+ // this rule surrounds whatever the other rules produce with the
+ // rest of the sentence
+ "x.0: The search found <<.;\n"
+ // use words for values below 10 (and change to "file" for 1)
+ + "no files; one file; two files; three files; four files; five files;\n"
+ + " six files; seven files; eight files; nine files;\n"
+ // use numerals for values higher than 10
+ + "=#,##0= files;";
+
+ //========================================================================
+ // Fraction handling
+ //
+ // The next few examples show how RuleBasedNumberFormat can be used for
+ // more flexible handling of fractions
+ //========================================================================
+
+ /**
+ * This example formats a number in one of the two styles often used
+ * on checks. %dollars-and-hundredths formats cents as hundredths of
+ * a dollar (23.40 comes out as "twenty-three and 40/100 dollars").
+ * %dollars-and-cents formats in dollars and cents (23.40 comes out as
+ * "twenty-three dollars and forty cents")
+ */
+ public static final String dollarsAndCents =
+ // this rule set formats numbers as dollars and cents
+ "%dollars-and-cents:\n"
+ // if the value is 1 or more, put "xx dollars and yy cents".
+ // the "and y cents" part is suppressed if the value is an
+ // even number of dollars
+ + " x.0: << [and >%%cents>];\n"
+ // if the value is between 0 and 1, put "xx cents"
+ + " 0.x: >%%cents>;\n"
+ // these three rules take care of the singular and plural
+ // forms of "dollar" and use %%main to format the number
+ + " 0: zero dollars; one dollar; =%%main= dollars;\n"
+ // these are the regular U.S. English number spellout rules
+ + "%%main:\n"
+ + " zero; one; two; three; four; five; six; seven; eight; nine;\n"
+ + " ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
+ + " seventeen; eighteen; nineteen;\n"
+ + " 20: twenty[->>];\n"
+ + " 30: thirty[->>];\n"
+ + " 40: forty[->>];\n"
+ + " 50: fifty[->>];\n"
+ + " 60: sixty[->>];\n"
+ + " 70: seventy[->>];\n"
+ + " 80: eighty[->>];\n"
+ + " 90: ninety[->>];\n"
+ + " 100: << hundred[ >>];\n"
+ + " 1000: << thousand[ >>];\n"
+ + " 1,000,000: << million[ >>];\n"
+ + " 1,000,000,000: << billion[ >>];\n"
+ + " 1,000,000,000,000: << trillion[ >>];\n"
+ + " 1,000,000,000,000,000: =#,##0=;\n"
+ // this rule takes care of the fractional part of the value. It
+ // multiplies the fractional part of the number being formatted by
+ // 100, formats it with %%main, and then addes the word "cent" or
+ // "cents" to the end. (The text in brackets is omitted if the
+ // numerator of the fraction is 1.)
+ + "%%cents:\n"
+ + " 100: <%%main< cent[s];\n"
+
+ // this rule set formats numbers as dollars and hundredths of dollars
+ + "%dollars-and-hundredths:\n"
+ // this rule takes care of the general shell of the output
+ // string. We always show the cents, even when there aren't
+ // any. Because of this, the word is always "dollars"--
+ // we don't have to worry about the singular form. We use
+ // %%main to format the number of dollars and %%hundredths to
+ // format the number of cents
+ + " x.0: <%%main< and >%%hundredths>/100 dollars;\n"
+ // this rule set formats the cents for %dollars-and-hundredths.
+ // It multiplies the fractional part of the number by 100 and formats
+ // the result using a DecimalFormat ("00" tells the DecimalFormat to
+ // always use two digits, even for numbers under 10)
+ + "%%hundredths:\n"
+ + " 100: <00<;\n";
+
+ /**
+ * This rule set shows the fractional part of the number as a fraction
+ * with a power of 10 as the denominator. Some languages don't spell
+ * out the fractional part of a number as "point one two three," but
+ * always render it as a fraction. If we still want to treat the fractional
+ * part of the number as a decimal, then the fraction's denominator
+ * is always a power of 10. This example does that: 23.125 is formatted
+ * as "twenty-three and one hundred twenty-five thousandths" (as opposed
+ * to "twenty-three point one two five" or "twenty-three and one eighth").
+ */
+ public static final String decimalAsFraction =
+ // the regular U.S. English spellout rules, with one difference
+ "%main:\n"
+ + " -x: minus >>;\n"
+ // the difference. This rule uses %%frac to show the fractional
+ // part of the number. Text in brackets is omitted when the
+ // value is between 0 and 1 (causing 0.3 to come out as "three
+ // tenths" instead of "zero and three tenths").
+ + " x.x: [<< and ]>%%frac>;\n"
+ + " zero; one; two; three; four; five; six; seven; eight; nine;\n"
+ + " ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n"
+ + " seventeen; eighteen; nineteen;\n"
+ + " twenty[->>];\n"
+ + " 30: thirty[->>];\n"
+ + " 40: forty[->>];\n"
+ + " 50: fifty[->>];\n"
+ + " 60: sixty[->>];\n"
+ + " 70: seventy[->>];\n"
+ + " 80: eighty[->>];\n"
+ + " 90: ninety[->>];\n"
+ + " 100: << hundred[ >>];\n"
+ + " 1000: << thousand[ >>];\n"
+ + " 1,000,000: << million[ >>];\n"
+ + " 1,000,000,000: << billion[ >>];\n"
+ + " 1,000,000,000,000: << trillion[ >>];\n"
+ + " 1,000,000,000,000,000: =#,##0=;\n"
+ // the rule set that formats the fractional part of the number.
+ // The rule that is used is the one that, when its baase value is
+ // multiplied by the fractional part of the number being formatted,
+ // produces the result closest to zero. Thus, the base values are
+ // prospective denominators of the fraction. The << marks the place
+ // where the numerator of the fraction (the result of multiplying the
+ // fractional part of the number by the rule's base value) is
+ // placed. Text in brackets is omitted when the numerator is 1, giving
+ // us the singular and plural forms of the words.
+ // [In languages where the singular and plural are completely different
+ // words, the rule can just be stated twice: the second time with
+ // the plural form.]
+ + "%%frac:\n"
+ + " 10: << tenth[s];\n"
+ + " 100: << hundredth[s];\n"
+ + " 1000: << thousandth[s];\n"
+ + " 10,000: << ten-thousandth[s];\n"
+ + " 100,000: << hundred-thousandth[s];\n"
+ + " 1,000,000: << millionth[s];";
+
+ /**
+ * Number with closest fraction. This example formats a value using
+ * numerals, but shows the fractional part as a ratio (fraction) rather
+ * than a decimal. The fraction always has a denominator between 2 and 10.
+ */
+ public static final String closestFraction =
+ "%main:\n"
+ // this rule formats the number if it's 1 or more. It formats
+ // the integral part using a DecimalFormat ("#,##0" puts
+ // thousands separators in the right places) and the fractional
+ // part using %%frac. If there is no fractional part, it
+ // just shows the integral part.
+ + " x.0: <#,##0<[ >%%frac>];\n"
+ // this rule formats the number if it's between 0 and 1. It
+ // shows only the fractional part (0.5 shows up as "1/2," not
+ // "0 1/2")
+ + " 0.x: >%%frac>;\n"
+ // the fraction rule set. This works the same way as the one in the
+ // preceding example: We multiply the fractional part of the number
+ // being formatted by each rule's base value and use the rule that
+ // produces the result closest to 0 (or the first rule that produces 0).
+ // Since we only provide rules for the numbers from 2 to 10, we know
+ // we'll get a fraction with a denominator between 2 and 10.
+ // "<0<" causes the numerator of the fraction to be formatted
+ // using numerals
+ + "%%frac:\n"
+ + " 2: 1/2;\n"
+ + " 3: <03;\n"
+ + " 4: <04;\n"
+ + " 5: <05;\n"
+ + " 6: <06;\n"
+ + " 7: <07;\n"
+ + " 8: <08;\n"
+ + " 9: <09;\n"
+ + " 10: <010;\n";
+
+ /**
+ * American stock-price formatting. Non-integral stock prices are still
+ * generally shown in eighths or sixteenths of dollars instead of dollars
+ * and cents. This example formats stock prices in this way if possible,
+ * and in dollars and cents if not.
+ */
+ public static final String stock =
+ "%main:\n"
+ // this rule formats the integral part of the number in numerals
+ // and (if necessary) the fractional part using %%frac1
+ + " x.0: <#,##0<[>%%frac1>];\n"
+ // this rule is used for values between 0 and 1 and omits the
+ // integral part
+ + " 0.x: >%%frac2>;\n"
+ // this rule set is used to format the fractional part of the number when
+ // there's an integral part before it (again, we try all denominators
+ // and use the "best" one)
+ + "%%frac1:\n"
+ // for even multiples of 1/4, format the fraction using the
+ // typographer's fractions
+ + " 4: <%%quarters<;\n"
+ // format the value as a number of eighths, sixteenths, or
+ // thirty-seconds, whichever produces the most accurate value.
+ // The apostrophe at the front of these rules is ignored, but
+ // it makes the space that follows it significant. This puts a
+ // space between the value's integral and fractional parts so
+ // you can read it
+ + " 8: ' <08;\n"
+ + " 16: ' <016;\n"
+ + " 32: ' <032;\n"
+ // if we can't reasonably format the number in powers of 2,
+ // then show it as dollars and cents
+ + " 100: .<00<;\n"
+ // this rule set is used when the fractional part of the value stands
+ // alone
+ + "%%frac2:\n"
+ + " 4: <%%quarters<;\n"
+ // for fractions that we can't show using typographer's fractions,
+ // we don't have to put a space before the fraction
+ + " 8: <08;\n"
+ + " 16: <016;\n"
+ + " 32: <032;\n"
+ // but dollars and cents look better with a leading 0
+ + " 100: 0.<00<;\n"
+ // this rule set formats 1/4, 1/2, and 3/4 using typographer's fractions
+ + "%%quarters:\n"
+ + " ; \u00bc; \u00bd; \u00be;\n"
+ // there are the lenient-parse rules. These allow the user to type
+ // "1/4," "1/2," and "3/4" instead of their typographical counterparts
+ // and still have them be understood by the formatter
+ + "%%lenient-parse:\n"
+ + " & '1/4' , \u00bc\n"
+ + " & '1/2' , \u00bd\n"
+ + " & '3/4' , \u00be\n;";
+
+ //========================================================================
+ // Changing dimensions
+ //
+ // The next few examples demonstrate using a RuleBasedNumberFormat to
+ // change the units a value is denominated in depending on its magnitude
+ //========================================================================
+
+ /**
+ * The example shows large numbers the way they often appear is nwespapers:
+ * 1,200,000 is formatted as "1.2 million".
+ */
+ public static final String abbEnglish =
+ "=#,##0=;\n"
+ // this is fairly self-explanatory, but note that the << substitution
+ // can show the fractional part of the substitution value if the user
+ // wants it
+ + "1,000,000: <##0.###< million;\n"
+ + "1,000,000,000: <##0.###< billion;\n"
+ + "1,000,000,000,000: <##0.###< trillion;\n";
+
+ /**
+ * This example takes a number of meters and formats it in whatever unit
+ * will produce a number with from one to three digits before the decimal
+ * point. For example, 230,000 is formatted as "230 km".
+ */
+ public static final String units =
+ "%main:\n"
+ // for values between 0 and 1, delegate to %%small
+ + " 0.x: >%%small>;\n"
+ // otherwise, show between 3 and 6 significant digits of the value
+ // along with the most appropriate unit
+ + " 0: =##0.###= m;\n"
+ + " 1,000: <##0.###< km;\n"
+ + " 1,000,000: <##0.###< Mm;\n"
+ + " 1,000,000,000: <##0.###< Gm;\n"
+ + " 1,000,000,000,000: <#,##0.###< Tm;\n"
+ // %%small formats the number when it's less then 1. It multiplies the
+ // value by one billion, and then uses %%small2 to actually do the
+ // formatting.
+ + "%%small:\n"
+ + " 1,000,000,000,000: <%%small2<;\n"
+ // this rule set actually formats small values. %%small passes this
+ // rule set a number of picometers, and it takes care of scaling up as
+ // appropriate in exactly the same way %main does (we can't normally
+ // handle fractional values this way: here, we're concerned about
+ // magnitude; most of the time, we're concerned about precsion)
+ + "%%small2:\n"
+ + " 0: =##0= pm;\n"
+ + " 1,000: <##0.###< nm;\n"
+ + " 1,000,000: <##0.###< \u00b5m;\n"
+ + " 1,000,000,000: <##0.###< mm;\n";
+
+ /**
+ * A more complicated message-formatting example. Here, in addition to
+ * handling the singular and plural versions of the word, the value is
+ * denominated in bytes, kilobytes, or megabytes depending on its magnitude.
+ * Also notice that it correctly treats a kilobyte as 1,024 bytes (not 1,000),
+ * and a megabyte as 1,024 kilobytes (not 1,000).
+ */
+ public static final String message2 =
+ // this rule supplies the shell of the sentence
+ "x.0: There << free space on the disk.;\n"
+ // handle singular and plural forms of "byte" (and format 0 as
+ // "There is no free space...")
+ + "0: is no;\n"
+ + "is one byte of;\n"
+ + "are =0= bytes of;\n"
+ // for values above 1,024, format the number in K (since "K" is usually
+ // promounced "K" regardless of whether it's singular or plural, we
+ // don't worry about the plural form). The "/1024" here causes us to
+ // treat a K as 1,024 bytes rather than 1,000 bytes.
+ + "1024/1024: is <0>];\n"
+ // format values over 144 in gross
+ + "144/12: << gross[, >>];\n"
+ // format values over 1,000 in thousands
+ + "1000: << thousand[, >>];\n"
+ // overflow rule. Format values over 10,000 in numerals
+ + "10,000: =#,##0=;\n";
+
+ //========================================================================
+ // Major and minor units
+ //
+ // These examples show how a single value can be divided up into major
+ // and minor units that don't relate to each other by a factor of 10.
+ //========================================================================
+
+ /**
+ * This example formats a number of seconds in sexagesimal notation
+ * (i.e., hours, minutes, and seconds). %with-words formats it with
+ * words (3740 is "1 hour, 2 minutes, 20 seconds") and %in-numerals
+ * formats it entirely in numerals (3740 is "1:02:20").
+ */
+ public static final String durationInSeconds =
+ // main rule set for formatting with words
+ "%with-words:\n"
+ // take care of singular and plural forms of "second"
+ + " 0 seconds; 1 second; =0= seconds;\n"
+ // use %%min to format values greater than 60 seconds
+ + " 60/60: <%%min<[, >>];\n"
+ // use %%hr to format values greater than 3,600 seconds
+ // (the ">>>" below causes us to see the number of minutes
+ // when when there are zero minutes)
+ + " 3600/60: <%%hr<[, >>>];\n"
+ // this rule set takes care of the singular and plural forms
+ // of "minute"
+ + "%%min:\n"
+ + " 0 minutes; 1 minute; =0= minutes;\n"
+ // this rule set takes care of the singular and plural forms
+ // of "hour"
+ + "%%hr:\n"
+ + " 0 hours; 1 hour; =0= hours;\n"
+
+ // main rule set for formatting in numerals
+ + "%in-numerals:\n"
+ // values below 60 seconds are shown with "sec."
+ + " =0= sec.;\n"
+ // higher values are shown with colons: %%min-sec is used for
+ // values below 3,600 seconds...
+ + " 60: =%%min-sec=;\n"
+ // ...and %%hr-min-sec is used for values of 3,600 seconds
+ // and above
+ + " 3600: =%%hr-min-sec=;\n"
+ // this rule causes values of less than 10 minutes to show without
+ // a leading zero
+ + "%%min-sec:\n"
+ + " 0: :=00=;\n"
+ + " 60/60: <0<>>;\n"
+ // this rule set is used for values of 3,600 or more. Minutes are always
+ // shown, and always shown with two digits
+ + "%%hr-min-sec:\n"
+ + " 0: :=00=;\n"
+ + " 60/60: <00<>>;\n"
+ + " 3600/60: <#,##0<:>>>;\n"
+ // the lenient-parse rules allow several different characters to be used
+ // as delimiters between hours, minutes, and seconds
+ + "%%lenient-parse:\n"
+ + " & : = . = ' ' = -;\n";
+
+ /**
+ * This example formats a number of hours in sexagesimal notation (i.e.,
+ * hours, minutes, and seconds). %with-words formats the value using
+ * words for the units, and %in-numerals formats the value using only
+ * numerals.
+ */
+ public static final String durationInHours =
+ // main entry point for formatting with words
+ "%with-words:\n"
+ // this rule omits minutes and seconds when the value is
+ // an even number of hours
+ + " x.0: <<[, >%%min-sec>];\n"
+ // these rules take care of the singular and plural forms
+ // of hours
+ + " 0 hours; 1 hour; =#,##0= hours;\n"
+ // this rule set takes the fractional part of the number and multiplies
+ // it by 3,600 (turning it into a number of seconds). Then it delegates
+ // to %%min-sec-implementation to format the resulting value
+ + "%%min-sec:\n"
+ + " 3600: =%%min-sec-implementation=;\n"
+ // this rule set formats the seconds as either seconds or minutes and
+ // seconds, and takes care of the singular and plural forms of
+ // "minute" and "second"
+ + "%%min-sec-implementation:\n"
+ + " 0 seconds; 1 second; =0= seconds;\n"
+ + " 60/60: 1 minute[, >>];\n"
+ + " 120/60: <0< minutes[, >>];\n"
+
+ // main entry point for formatting in numerals
+ + "%in-numerals:\n"
+ // show minutes even for even numbers of hours
+ + " x.0: <#,##0<:00;\n"
+ // delegate to %%min-sec2 to format minutes and seconds
+ + " x.x: <#,##0<:>%%min-sec2>;\n"
+ // this rule set formats minutes when there is an even number of
+ // minutes, and delegates to %%min-sec2-implementation when there
+ // are seconds
+ + "%%min-sec2:\n"
+ + " 60: <00<;\n"
+ + " 3600: <%%min-sec2-implementation<;\n"
+ // these two rule sets are used to format the minutes and seconds
+ + "%%min-sec2-implementation:\n"
+ // if there are fewer than 60 seconds, show the minutes anyway
+ + " 0: 00:=00=;\n"
+ // if there are minutes, format them too, and always use 2 digits
+ // for both minutes and seconds
+ + " 60: =%%min-sec3=;\n"
+ + "%%min-sec3:\n"
+ + " 0: :=00=;\n"
+ + " 60/60: <00<>>;\n"
+ // the lenient-parse rules allow the user to use any of several
+ // characters as delimiters between hours, minutes, and seconds
+ + "%%lenient-parse:\n"
+ + " & : = . = ' ' = -;\n";
+
+ /**
+ * This rule set formats a number of pounds as pounds, shillings, and
+ * pence in the old English system of currency.
+ */
+ public static final String poundsShillingsAndPence =
+ // for values of 1 or more, format the integral part with a pound
+ // sign in front, and show shillings and pence if necessary
+ "%main:\n"
+ + " x.0: \u00a3<#,##0<[ >%%shillings-and-pence>];\n"
+ // for values between 0 and 1, omit the number of pounds
+ + " 0.x: >%%pence-alone>;\n"
+ // this rule set is used to show shillings and pence. It multiplies
+ // the fractional part of the number by 240 (the number of pence in a
+ // pound) and uses %%shillings-and-pence-implementation to format
+ // the result
+ + "%%shillings-and-pence:\n"
+ + " 240: <%%shillings-and-pence-implementation<;\n"
+ // this rule set is used to show shillings and pence when there are
+ // no pounds. It also multiplies the value by 240, and then it uses
+ // %%pence-alone-implementation to format the result.
+ + "%%pence-alone:\n"
+ + " 240: <%%pence-alone-implementation<;\n"
+ // this rule set formats a number of pence when we know we also
+ // have pounds. We always show shillings (with a 0 if necessary),
+ // but only show pence if the value isn't an even number of shillings
+ + "%%shillings-and-pence-implementation:\n"
+ + " 0/; 0/=0=;\n"
+ + " 12/12: <0[>0>];\n"
+ // this rule set formats a number of pence when we know there are
+ // no pounds. Values less than a shilling are shown with "d." (the
+ // abbreviation for pence), and values greater than a shilling are
+ // shown with a shilling bar (and without pence when the value is
+ // an even number of shillings)
+ + "%%pence-alone-implementation:\n"
+ + " =0= d.;\n"
+ + " 12/12: <0[>0>];\n";
+
+ //========================================================================
+ // Alternate numeration systems
+ //
+ // These examples show how RuleBasedNumberFormat can be used to format
+ // numbers using non-positional numeration systems.
+ //========================================================================
+
+ /**
+ * Arabic digits. This example formats numbers in Arabic numerals.
+ * Normally, you'd do this with DecimalFormat, but this shows that
+ * RuleBasedNumberFormat can handle it too.
+ */
+ public static final String arabicNumerals =
+ "0; 1; 2; 3; 4; 5; 6; 7; 8; 9;\n"
+ + "10: <<>>;\n"
+ + "100: <<>>>;\n"
+ + "1000: <<,>>>;\n"
+ + "1,000,000: <<,>>>;\n"
+ + "1,000,000,000: <<,>>>;\n"
+ + "1,000,000,000,000: <<,>>>;\n"
+ + "1,000,000,000,000,000: =#,##0=;\n"
+ + "-x: ->>;\n"
+ + "x.x: <<.>>;";
+
+ /**
+ * Words for digits. Follows the same pattern as the Arabic-numerals
+ * example above, but uses words for the various digits (e.g., 123 comes
+ * out as "one two three").
+ */
+ public static final String wordsForDigits =
+ "-x: minus >>;\n"
+ + "x.x: << point >>;\n"
+ + "zero; one; two; three; four; five; six;\n"
+ + " seven; eight; nine;\n"
+ + "10: << >>;\n"
+ + "100: << >>>;\n"
+ + "1000: <<, >>>;\n"
+ + "1,000,000: <<, >>>;\n"
+ + "1,000,000,000: <<, >>>;\n"
+ + "1,000,000,000,000: <<, >>>;\n"
+ + "1,000,000,000,000,000: =#,##0=;\n";
+
+ /**
+ * This example formats numbers using Chinese characters in the Arabic
+ * place-value method. This was used historically in China for a while.
+ */
+ public static final String chinesePlaceValue =
+ "\u3007; \u4e00; \u4e8c; \u4e09; \u56db; \u4e94; \u516d; \u4e03; \u516b; \u4e5d;\n"
+ + "10: <<>>;\n"
+ + "100: <<>>>;\n"
+ + "1000: <<>>>;\n"
+ + "1,000,000: <<>>>;\n"
+ + "1,000,000,000: <<>>>;\n"
+ + "1,000,000,000,000: <<>>>;\n"
+ + "1,000,000,000,000,000: =#,##0=;\n";
+
+ /**
+ * Roman numerals. This example has two variants: %modern shows how large
+ * numbers are usually handled today; %historical ses the older symbols for
+ * thousands.
+ */
+ public static final String romanNumerals =
+ "%historical:\n"
+ + " =%modern=;\n"
+ // in early Roman numerals, 1,000 was shown with a circle
+ // bisected by a vertical line. Additional thousands were
+ // shown by adding more concentric circles, and fives were
+ // shown by cutting the symbol for next-higher power of 10
+ // in half (the letter D for 500 evolved from this).
+ // We could go beyond 40,000, but Unicode doesn't encode
+ // the symbols for higher numbers/
+ + " 1000: \u2180[>>]; 2000: \u2180\u2180[>>]; 3000: \u2180\u2180\u2180[>>]; 4000: \u2180\u2181[>>];\n"
+ + " 5000: \u2181[>>]; 6000: \u2181\u2180[>>]; 7000: \u2181\u2180\u2180[>>];\n"
+ + " 8000: \u2181\u2180\u2180\u2180[>>]; 9000: \u2180\u2182[>>];\n"
+ + " 10,000: \u2182[>>]; 20,000: \u2182\u2182[>>]; 30,000: \u2182\u2182\u2182[>>];\n"
+ + " 40,000: =#,##0=;\n"
+ + "%modern:\n"
+ + " ; I; II; III; IV; V; VI; VII; VIII; IX;\n"
+ + " 10: X[>>]; 20: XX[>>]; 30: XXX[>>]; 40: XL[>>]; 50: L[>>];\n"
+ + " 60: LX[>>]; 70: LXX[>>]; 80: LXXX[>>]; 90: XC[>>];\n"
+ + " 100: C[>>]; 200: CC[>>]; 300: CCC[>>]; 400: CD[>>]; 500: D[>>];\n"
+ + " 600: DC[>>]; 700: DCC[>>]; 800: DCCC[>>]; 900: CM[>>];\n"
+ // in modern Roman numerals, high numbers are generally shown
+ // by placing a bar over the letters for the lower numbers:
+ // the bar multiplied a letter's value by 1,000
+ + " 1000: M[>>]; 2000: MM[>>]; 3000: MMM[>>]; 4000: MV\u0306[>>];\n"
+ + " 5000: V\u0306[>>]; 6000: V\u0306M[>>]; 7000: V\u0306MM[>>];\n"
+ + " 8000: V\u0306MMM[>>]; 9000: MX\u0306[>>];\n"
+ + " 10,000: X\u0306[>>]; 20,000: X\u0306X\u0306[>>]; 30,000: X\u0306X\u0306X\u0306[>>];\n"
+ + " 40,000: X\u0306L\u0306[>>]; 50,000: L\u0306[>>]; 60,000: L\u0306X\u0306[>>];\n"
+ + " 70,000: L\u0306X\u0306X\u0306[>>]; 80,000: L\u0306X\u0306X\u0306X\u0306[>>];\n"
+ + " 90,000: X\u0306C\u0306[>>];\n"
+ + " 100,000: C\u0306[>>]; 200,000: C\u0306C\u0306[>>]; 300,000: C\u0306C\u0306[>>];\n"
+ + " 400,000: C\u0306D\u0306[>>]; 500,000: D\u0306[>>]; 600,000: D\u0306C\u0306[>>];\n"
+ + " 700,000: D\u0306C\u0306C\u0306[>>]; 800,000: D\u0306C\u0306C\u0306C\u0306[>>];\n"
+ + " 900,000: =#,##0=;\n";
+
+ /**
+ * Hebrew alphabetic numerals. Before adoption of Arabic numerals, Hebrew speakers
+ * used the letter of their alphabet as numerals. The first nine letters of
+ * the alphabet repesented the values from 1 to 9, the second nine letters the
+ * multiples of 10, and the remaining letters the multiples of 100. Since they
+ * ran out of letters at 400, the remaining multiples of 100 were represented
+ * using combinations of the existing letters for the hundreds. Numbers were
+ * distinguished from words in a number of different ways: the way shown here
+ * uses a single mark after a number consisting of one letter, and a double
+ * mark between the last two letters of a number consisting of two or more
+ * letters. Two dots over a letter multiplied its value by 1,000. Also, since
+ * the letter for 10 is the first letter of God's name and the letters for 5 and 6
+ * are letters in God's name, which wasn't supposed to be written or spoken, 15 and
+ * 16 were usually written as 9 + 6 and 9 + 7 instead of 10 + 5 and 10 + 6.
+ */
+ public static final String hebrewAlphabetic =
+ // letters for the ones
+ "%%ones:\n"
+ + " (no zero); \u05d0; \u05d1; \u05d2; \u05d3; \u05d4; \u05d5; \u05d6; \u05d7; \u05d8;\n"
+ // letters for the tens
+ + "%%tens:\n"
+ + " ; \u05d9; \u05db; \u05dc; \u05de; \u05e0; \u05e1; \u05e2; \u05e4; \u05e6;\n"
+ // letters for the first four hundreds
+ + "%%hundreds:\n"
+ + " ; \u05e7; \u05e8; \u05e9; \u05ea;\n"
+ // this rule set is used to write the combination of the tens and ones digits
+ // when we know that no other digits precede them: they put the numeral marks
+ // in the right place and properly handle 15 and 16 (I'm using the mathematical
+ // prime characters for the numeral marks because my Unicode font doesn't
+ // include the real Hebrew characters, which look just like the prime marks)
+ + "%%tens-and-ones:\n"
+ // for values less than 10, just use %%ones and put the numeral mark
+ // afterward
+ + " =%%ones=\u2032;\n"
+ // put the numeral mark at the end for 10, but in the middle for
+ // 11 through 14
+ + " 10: <%%tens<\u2032; <%%tens<\u2033>%%ones>;\n"
+ // special-case 15 and 16
+ + " 15: \u05d8\u2033\u05d5; 16: \u05d8\u2033\u05d6;\n"
+ // go back to the normal method at 17
+ + " 17: <%%tens<\u2033>%%ones>;\n"
+ // repeat the rules for 10 and 11 to cover the values from 20 to 99
+ + " 20: <%%tens<\u2032; <%%tens<\u2033>%%ones>;\n"
+ // this rule set is used to format numbers below 1,000. It relies on
+ // %%tens-and-ones to format the tens and ones places, and adds logic
+ // to handle the high hundreds and the numeral marks when there is no
+ // tens digit. Notice how the rules are paired: all of these pairs of
+ // rules take advantage of the rollback rule: if the value (between 100
+ // and 499) is an even multiple of 100, the rule for 100 is used; otherwise,
+ // the rule for 101 (the following rule) is used. The first rule in each
+ // pair (the one for the even multiple) places the numeral mark in a different
+ // spot than the second rule in each pair (which knows there are more digits
+ // and relies on the rule supplying them to also supply the numeral mark).
+ // The call to %%null in line 10 is there simply to invoke the rollback
+ // rule.
+ + "%%low-order:\n"
+ // this rule is only called when there are other characters before.
+ // It places the numeral mark before the last digit
+ + " \u2033=%%ones=;\n"
+ // the rule for 10 places the numeral mark before the 10 character
+ // (because we know it's the last character); the rule for 11 relies
+ // on %%tens-and-ones to place the numeral mark
+ + " 10: \u2033<%%tens<; =%%tens-and-ones=>%%null>;\n"
+ // the rule for 100 places the numeral mark before the 100 character
+ // (we know it's the last character); the rule for 101 recurses to
+ // fill in the remaining digits and the numeral mark
+ + " 100: <%%hundreds<\u2032; <%%hundreds<>>;\n"
+ // special-case the hundreds from 500 to 900 because they consist of
+ // more than one character
+ + " 500: \u05ea\u2033\u05e7; \u05ea\u05e7>>;\n"
+ + " 600: \u05ea\u2033\u05e8; \u05ea\u05e8>>;\n"
+ + " 700: \u05ea\u2033\u05e9; \u05ea\u05e9>>;\n"
+ + " 800: \u05ea\u2033\u05ea; \u05ea\u05ea>>;\n"
+ + " 900: \u05ea\u05ea\u2033\u05e7; \u05ea\u05ea\u05e7>>;\n"
+ // this rule set is used to format values of 1,000 or more. Here, we don't
+ // worry about the numeral mark, and we add two dots (the Unicode combining
+ // diaeresis character) to ever letter
+ + "%%high-order:\n"
+ // put the ones digit, followed by the diaeresis
+ + " =%%ones=\u0308;\n"
+ // the tens can be handled with recursion
+ + " 10: <%%tens<\u0308[>>];\n"
+ // still have to special-case 15 and 16
+ + " 15: \u05d8\u0308\u05d5\u0308; 16: \u05d8\u003078\u05d6\u0308;\n"
+ // back to the regular rules at 17
+ + " 17: <%%tens<\u0308[>>];\n"
+ // the hundreds with the dots added (and without worrying about
+ // placing the numeral mark)
+ + " 100: <%%hundreds<\u0308[>>];\n"
+ + " 500: \u05ea\u0308\u05e7\u0308[>>];\n"
+ + " 600: \u05ea\u0308\u05e8\u0308[>>];\n"
+ + " 700: \u05ea\u0308\u05e9\u0308[>>];\n"
+ + " 800: \u05ea\u0308\u05ea\u0308[>>];\n"
+ + " 900: \u05ea\u0308\u05ea\u0308\u05e7\u0308[>>];\n"
+ // this rule set doesn't do anything; it's used by some other rules to
+ // invoke the rollback rule
+ + " %%null:\n"
+ + " ;\n"
+ // the main rule set.
+ + "%main:\n"
+ // for values below 10, just output the letter and the numeral mark
+ + " =%%ones=\u2032;\n"
+ // for values from 10 to 99, use %%tens-and-ones to do the formatting
+ + " 10: =%%tens-and-ones=;\n"
+ // for values from 100 to 999, use %%low-order to do the formatting
+ + " 100: =%%low-order=;\n"
+ // for values of 1,000 and over, use %%high-order to do the formatting
+ + " 1000: <%%high-order<[>%%low-order>];\n";
+
+ /**
+ * Greek alphabetic numerals. The Greeks, before adopting the Arabic numerals,
+ * also used the letters of their alphabet as numerals. There are three now-
+ * obsolete Greek letters that are used as numerals; many fonts don't have them.
+ * Large numbers were handled many different ways; the way shown here divides
+ * large numbers into groups of four letters (factors of 10,000), and separates
+ * the groups with the capital letter mu (for myriad). Capital letters are used
+ * for values below 10,000; small letters for higher numbers (to make the capital
+ * mu stand out).
+ */
+ public static final String greekAlphabetic =
+ // this rule set is used for formatting numbers below 10,000. It uses
+ // capital letters.
+ "%%low-order:\n"
+ + " (no zero); \u0391; \u0392; \u0393; \u0394; \u0395; \u03dc; \u0396; \u0397; \u0398;\n"
+ + " 10: \u0399[>>]; 20: \u039a[>>]; 30: \u039b[>>]; 40: \u039c[>>]; 50: \u039d[>>];\n"
+ + " 60: \u039e[>>]; 70: \u039f[>>]; 80: \u03a0[>>]; 90: \u03de[>>];\n"
+ + " 100: \u03a1[>>]; 200: \u03a3[>>]; 300: \u03a4[>>]; 400: \u03a5[>>];\n"
+ + " 500: \u03a6[>>]; 600: \u03a7[>>]; 700: \u03a8[>>]; 800: \u03a9[>>];\n"
+ + " 900: \u03e0[>>];\n"
+ // the thousands are represented by the same numbers as the ones, but
+ // with a comma-like mark added to their left shoulder
+ + " 1000: \u0391\u0313[>>]; 2000: \u0392\u0313[>>]; 3000: \u0393\u0313[>>];\n"
+ + " 4000: \u0394\u0313[>>]; 5000: \u0395\u0313[>>]; 6000: \u03dc\u0313[>>];\n"
+ + " 7000: \u0396\u0313[>>]; 8000: \u0397\u0313[>>]; 9000: \u0398\u0313[>>];\n"
+ // this rule set is the same as above, but uses lowercase letters. It is used
+ // for formatting the groups in numbers above 10,000.
+ + "%%high-order:\n"
+ + " (no zero); \u03b1; \u03b2; \u03b3; \u03b4; \u03b5; \u03dc; \u03b6; \u03b7; \u03b8;\n"
+ + " 10: \u03b9[>>]; 20: \u03ba[>>]; 30: \u03bb[>>]; 40: \u03bc[>>]; 50: \u03bd[>>];\n"
+ + " 60: \u03be[>>]; 70: \u03bf[>>]; 80: \u03c0[>>]; 90: \u03de[>>];\n"
+ + " 100: \u03c1[>>]; 200: \u03c3[>>]; 300: \u03c4[>>]; 400: \u03c5[>>];\n"
+ + " 500: \u03c6[>>]; 600: \u03c7[>>]; 700: \u03c8[>>]; 800: \u03c9[>>];\n"
+ + " 900: \u03c0[>>];\n"
+ + " 1000: \u03b1\u0313[>>]; 2000: \u03b2\u0313[>>]; 3000: \u03b3\u0313[>>];\n"
+ + " 4000: \u03b4\u0313[>>]; 5000: \u03b5\u0313[>>]; 6000: \u03dc\u0313[>>];\n"
+ + " 7000: \u03b6\u0313[>>]; 8000: \u03b7\u0313[>>]; 9000: \u03b8\u0313[>>];\n"
+ // the main rule set
+ + "%main:\n"
+ // for values below 10,000, just use %%low-order
+ + " =%%low-order=;\n"
+ // for values above 10,000, split into two groups of four digits
+ // and format each with %%high-order (putting an M in betwen)
+ + " 10,000: <%%high-order<\u039c>%%high-order>;\n"
+ // for values above 100,000,000, add another group onto the front
+ // and another M
+ + " 100,000,000: <%%high-order<\u039c>>\n";
+
+ /**
+ * A list of all the sample rule sets, used by the demo program.
+ */
+ public static final String[] sampleRuleSets =
+ { usEnglish,
+ ukEnglish,
+ spanish,
+ french,
+ swissFrench,
+ german,
+ italian,
+ swedish,
+ dutch,
+ japanese,
+ greek,
+ russian,
+ hebrew,
+ ordinal,
+ message1,
+ dollarsAndCents,
+ decimalAsFraction,
+ closestFraction,
+ stock,
+ abbEnglish,
+ units,
+ message2,
+ dozens,
+ durationInSeconds,
+ durationInHours,
+ poundsShillingsAndPence,
+ arabicNumerals,
+ wordsForDigits,
+ chinesePlaceValue,
+ romanNumerals,
+ hebrewAlphabetic,
+ greekAlphabetic };
+
+ /**
+ * The displayable names for all the sample rule sets, in the same order as
+ * the preceding array.
+ */
+ public static final String[] sampleRuleSetNames =
+ { "English (US)",
+ "English (UK)",
+ "Spanish",
+ "French (France)",
+ "French (Switzerland)",
+ "German",
+ "Italian",
+ "Swedish",
+ "Dutch",
+ "Japanese",
+ "Greek",
+ "Russian",
+ "Hebrew",
+ "English ordinal abbreviations",
+ "Simple message formatting",
+ "Dollars and cents",
+ "Decimals as fractions",
+ "Closest fraction",
+ "Stock prices",
+ "Abbreviated US English",
+ "Changing dimensions",
+ "Complex message formatting",
+ "Dozens",
+ "Duration (value in seconds)",
+ "Duration (value in hours)",
+ "Pounds, shillings, and pence",
+ "Arabic numerals",
+ "Words for digits",
+ "Chinese place-value notation",
+ "Roman numerals",
+ "Hebrew ahlphabetic numerals",
+ "Greek alphabetic numerals" };
+
+ /**
+ * The base locale for each of the sample rule sets. The locale is used to
+ * determine DecimalFormat behavior, lenient-parse behavior, and text-display
+ * selection (we have a hack in here to allow display of non-Latin scripts).
+ * Null means the locale setting is irrelevant and the default can be used.
+ */
+ public static final Locale[] sampleRuleSetLocales =
+ { Locale.US,
+ Locale.UK,
+ new Locale("es", "", ""),
+ Locale.FRANCE,
+ new Locale("fr", "CH", ""),
+ Locale.GERMAN,
+ Locale.ITALIAN,
+ new Locale("sv", "", ""),
+ new Locale("nl", "", ""),
+ Locale.JAPANESE,
+ new Locale("el", "", ""),
+ new Locale("ru", "", ""),
+ new Locale("iw", "", ""),
+ Locale.ENGLISH,
+ Locale.ENGLISH,
+ Locale.US,
+ Locale.ENGLISH,
+ null,
+ null,
+ Locale.ENGLISH,
+ null,
+ Locale.ENGLISH,
+ Locale.ENGLISH,
+ null,
+ null,
+ Locale.UK,
+ null,
+ Locale.ENGLISH,
+ new Locale("zh", "", ""),
+ null,
+ new Locale("iw", "", ""),
+ new Locale("el", "", ""),
+ null };
+
+ public static final String[] sampleRuleSetCommentary = {
+ "This demonstration version of the "
+ + "U.S. English spellout rules has four variants: 1) %simplified is a "
+ + "set of rules showing the simple method of spelling out numbers in "
+ + "English: 289 is formatted as \"two hundred eighty-nine\". 2) %alt-teens "
+ + "is the same as %simplified, except that values between 1,000 and 9,999 "
+ + "whose hundreds place isn't zero are formatted in hundreds. For example, "
+ + "1,983 is formatted as \"nineteen hundred eighty-three,\" and 2,183 is "
+ + "formatted as \"twenty-one hundred eighty-three,\" but 2,083 is still "
+ + "formatted as \"two thousand eighty-three.\" 3) %ordinal formats the "
+ + "values as ordinal numbers in English (e.g., 289 is \"two hundred eighty-"
+ + "ninth\"). 4) %default uses a more complicated algorithm to format "
+ + "numbers in a more natural way: 289 is formatted as \"two hundred AND "
+ + "eighty-nine\" and commas are inserted between the thousands groups for "
+ + "values above 100,000.",
+
+ "U.K. English has one significant "
+ + "difference from U.S. English: the names for values of 1,000,000,000 "
+ + "and higher. In American English, each successive \"-illion\" is 1,000 "
+ + "times greater than the preceding one: 1,000,000,000 is \"one billion\" "
+ + "and 1,000,000,000,000 is \"one trillion.\" In British English, each "
+ + "successive \"-illion\" is one million times greater than the one before: "
+ + "\"one billion\" is 1,000,000,000,000 (or what Americans would call a "
+ + "\"trillion\"), and \"one trillion\" is 1,000,000,000,000,000,000. "
+ + "1,000,000,000 in British English is \"one thousand million.\" (This "
+ + "value is sometimes called a \"milliard,\" but this word seems to have "
+ + "fallen into disuse.)",
+
+ "The Spanish rules are quite similar to "
+ + "the English rules, but there are some important differences: "
+ + "First, we have to provide separate rules for most of the twenties "
+ + "because the ones digit frequently picks up an accent mark that it "
+ + "doesn't have when standing alone. Second, each multiple of 100 has "
+ + "to be specified separately because the multiplier on 100 very often "
+ + "changes form in the contraction: 500 is \"quinientos,\" not "
+ + "\"cincocientos.\" In addition, the word for 100 is \"cien\" when "
+ + "standing alone, but changes to \"ciento\" when followed by more digits. "
+ + "There also some other differences.",
+
+ "French adds some interesting quirks of its "
+ + "own: 1) The word \"et\" is interposed between the tens and ones digits, "
+ + "but only if the ones digit if 1: 20 is \"vingt,\" and 2 is \"vingt-deux,\" "
+ + "but 21 is \"vingt-et-un.\" 2) There are no words for 70, 80, or 90. "
+ + "\"quatre-vingts\" (\"four twenties\") is used for 80, and values proceed "
+ + "by score from 60 to 99 (e.g., 73 is \"soixante-treize\" [\"sixty-thirteen\"]). "
+ + "Numbers from 1,100 to 1,199 are rendered as hundreds rather than "
+ + "thousands: 1,100 is \"onze cents\" (\"eleven hundred\"), rather than "
+ + "\"mille cent\" (\"one thousand one hundred\")",
+
+ "Swiss French differs from French French "
+ + "in that it does have words for 70, 80, and 90. This rule set shows them, "
+ + "and is simpler as a result.",
+
+ "German also adds some interesting "
+ + "characteristics. For values below 1,000,000, numbers are customarily "
+ + "written out as a single word. And the ones digit PRECEDES the tens "
+ + "digit (e.g., 23 is \"dreiundzwanzig,\" not \"zwanzigunddrei\").",
+
+ "Like German, most Italian numbers are "
+ + "written as single words. What makes these rules complicated is the rule "
+ + "that says that when a word ending in a vowel and a word beginning with "
+ + "a vowel are combined into a compound, the vowel is dropped from the "
+ + "end of the first word: 180 is \"centottanta,\" not \"centoottanta.\" "
+ + "The complexity of this rule set is to produce this behavior.",
+
+ "Spellout rules for Swedish.",
+
+ "Spellout rules for Dutch. Notice that in Dutch, as in German,"
+ + "the ones digit precedes the tens digit.",
+
+ "In Japanese, there really isn't any "
+ + "distinction between a number written out in digits and a number "
+ + "written out in words: the ideographic characters are both digits "
+ + "and words. This rule set provides two variants: %traditional "
+ + "uses the traditional CJK numerals (which are also used in China "
+ + "and Korea). %financial uses alternate ideographs for many numbers "
+ + "that are harder to alter than the traditional numerals (one could "
+ + "fairly easily change a one to "
+ + "a three just by adding two strokes, for example). This is also done in "
+ + "the other countries using Chinese idographs, but different ideographs "
+ + "are used in those places.",
+
+ "Again in Greek we have to supply the words "
+ + "for the multiples of 100 because they can't be derived algorithmically. "
+ + "Also, the tens dgit changes form when followed by a ones digit: an "
+ + "accent mark disappears from the tens digit and moves to the ones digit. "
+ + "Therefore, instead of using the [] notation, we actually have to use "
+ + "two separate rules for each multiple of 10 to show the two forms of "
+ + "the word.",
+
+ "Spellout rules for Russian.",
+
+ "Spellout rules for Hebrew. Hebrew actually has inflected forms for "
+ + "most of the lower-order numbers. The masculine forms are shown "
+ + "here.",
+
+ "This rule set adds an English ordinal abbreviation to the end of a "
+ + "number. For example, 2 is formatted as \"2nd\". Parsing doesn't work with "
+ + "this rule set. To parse, use DecimalFormat on the numeral.",
+
+ "This is a simple message-formatting example. Normally one would "
+ + "use ChoiceFormat and MessageFormat to do something this simple, "
+ + "but this shows it could be done with RuleBasedNumberFormat too. "
+ + "A message-formatting example that might work better with "
+ + "RuleBasedNumberFormat appears later.",
+
+ "The next few examples demonstrate fraction handling. "
+ + "This example formats a number in one of the two styles often used "
+ + "on checks. %dollars-and-hundredths formats cents as hundredths of "
+ + "a dollar (23.40 comes out as \"twenty-three and 40/100 dollars\"). "
+ + "%dollars-and-cents formats in dollars and cents (23.40 comes out as "
+ + "\"twenty-three dollars and forty cents\")",
+
+ "This rule set shows the fractional part of the number as a fraction "
+ + "with a power of 10 as the denominator. Some languages don't spell "
+ + "out the fractional part of a number as \"point one two three,\" but "
+ + "always render it as a fraction. If we still want to treat the fractional "
+ + "part of the number as a decimal, then the fraction's denominator "
+ + "is always a power of 10. This example does that: 23.125 is formatted "
+ + "as \"twenty-three and one hundred twenty-five thousandths\" (as opposed "
+ + "to \"twenty-three point one two five\" or \"twenty-three and one eighth\").",
+
+ "Number with closest fraction. This example formats a value using "
+ + "numerals, but shows the fractional part as a ratio (fraction) rather "
+ + "than a decimal. The fraction always has a denominator between 2 and 10.",
+
+ "American stock-price formatting. Non-integral stock prices are still "
+ + "generally shown in eighths or sixteenths of dollars instead of dollars "
+ + "and cents. This example formats stock prices in this way if possible, "
+ + "and in dollars and cents if not.",
+
+ "The next few examples demonstrate using a RuleBasedNumberFormat to "
+ + "change the units a value is denominated in depending on its magnitude. "
+ + "The example shows large numbers the way they often appear is nwespapers: "
+ + "1,200,000 is formatted as \"1.2 million\".",
+
+ "This example takes a number of meters and formats it in whatever unit "
+ + "will produce a number with from one to three digits before the decimal "
+ + "point. For example, 230,000 is formatted as \"230 km\".",
+
+ "A more complicated message-formatting example. Here, in addition to "
+ + "handling the singular and plural versions of the word, the value is "
+ + "denominated in bytes, kilobytes, or megabytes depending on its magnitude. "
+ + "Also notice that it correctly treats a kilobyte as 1,024 bytes (not 1,000), "
+ + "and a megabyte as 1,024 kilobytes (not 1,000).",
+
+ "This example formats a number in dozens and gross. This is intended to "
+ + "demonstrate how this rule set can be used to format numbers in systems "
+ + "other than base 10. The \"/12\" after the rules' base values controls this. "
+ + "Also notice that the base doesn't have to be consistent throughout the "
+ + "whole rule set: we go back to base 10 for values over 1,000.",
+
+ "The next few examples show how a single value can be divided up into major "
+ + "and minor units that don't relate to each other by a factor of 10. "
+ + "This example formats a number of seconds in sexagesimal notation "
+ + "(i.e., hours, minutes, and seconds). %with-words formats it with "
+ + "words (3740 is \"1 hour, 2 minutes, 20 seconds\") and %in-numerals "
+ + "formats it entirely in numerals (3740 is \"1:02:20\").",
+
+ "This example formats a number of hours in sexagesimal notation (i.e., "
+ + "hours, minutes, and seconds). %with-words formats the value using "
+ + "words for the units, and %in-numerals formats the value using only "
+ + "numerals.",
+
+ "This rule set formats a number of pounds as pounds, shillings, and "
+ + "pence in the old English system of currency.",
+
+ "These examples show how RuleBasedNumberFormat can be used to format "
+ + "numbers using non-positional numeration systems. "
+ + "This example formats numbers in Arabic numerals. "
+ + "Normally, you'd do this with DecimalFormat, but this shows that "
+ + "RuleBasedNumberFormat can handle it too.",
+
+ "This example follows the same pattern as the Arabic-numerals "
+ + "example, but uses words for the various digits (e.g., 123 comes "
+ + "out as \"one two three\").",
+
+ "This example formats numbers using Chinese characters in the Arabic "
+ + "place-value method. This was used historically in China for a while.",
+
+ "Roman numerals. This example has two variants: %modern shows how large "
+ + "numbers are usually handled today; %historical ses the older symbols for "
+ + "thousands. Not all of the characters are displayable with most fonts.",
+
+ "Hebrew alphabetic numerals. Before adoption of Arabic numerals, Hebrew speakers "
+ + "used the letter of their alphabet as numerals. The first nine letters of "
+ + "the alphabet repesented the values from 1 to 9, the second nine letters the "
+ + "multiples of 10, and the remaining letters the multiples of 100. Since they "
+ + "ran out of letters at 400, the remaining multiples of 100 were represented "
+ + "using combinations of the existing letters for the hundreds. Numbers were "
+ + "distinguished from words in a number of different ways: the way shown here "
+ + "uses a single mark after a number consisting of one letter, and a double "
+ + "mark between the last two letters of a number consisting of two or more "
+ + "letters. Two dots over a letter multiplied its value by 1,000. Also, since "
+ + "the letter for 10 is the first letter of God's name and the letters for 5 and 6 "
+ + "are letters in God's name, which wasn't supposed to be written or spoken, 15 and "
+ + "16 were usually written as 9 + 6 and 9 + 7 instead of 10 + 5 and 10 + 6.",
+
+ "Greek alphabetic numerals. The Greeks, before adopting the Arabic numerals, "
+ + "also used the letters of their alphabet as numerals. There are three now-"
+ + "obsolete Greek letters that are used as numerals; many fonts don't have them. "
+ + "Large numbers were handled many different ways; the way shown here divides "
+ + "large numbers into groups of four letters (factors of 10,000), and separates "
+ + "the groups with the capital letter mu (for myriad). Capital letters are used "
+ + "for values below 10,000; small letters for higher numbers (to make the capital "
+ + "mu stand out).",
+
+ "This is a custom (user-defined) rule set."
+ };
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/rbnf/package.html b/demos/src/com/ibm/icu/dev/demo/rbnf/package.html
new file mode 100644
index 00000000000..8a0507f1ff3
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/rbnf/package.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+RuleBasedNumberFormat demo appliation.
+
+
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/timescale/PivotDemo.java b/demos/src/com/ibm/icu/dev/demo/timescale/PivotDemo.java
new file mode 100644
index 00000000000..72d83048a99
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/timescale/PivotDemo.java
@@ -0,0 +1,78 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ */
+
+package com.ibm.icu.dev.demo.timescale;
+
+import java.util.Locale;
+
+import com.ibm.icu.text.MessageFormat;
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.SimpleTimeZone;
+import com.ibm.icu.util.TimeZone;
+import com.ibm.icu.util.UniversalTimeScale;
+
+/**
+ * This class demonstrates how to use UniversalTimeScale
to
+ * convert from one local time scale to another.
+ *
+ * @see UniversalTimeScale
+ */
+public class PivotDemo {
+
+ /**
+ * The default constructor.
+ */
+ public PivotDemo()
+ {
+ }
+
+ /**
+ * The main()
method uses UniversalTimeScale
to
+ * convert from the Java and Unix time scales to the ICU time scale. It uses
+ * a Calendar
object to display the ICU time values.
+ *
+ * @param args the command line arguments.
+ */
+ public static void main(String[] args)
+ {
+ TimeZone utc = new SimpleTimeZone(0, "UTC");
+ Calendar cal = Calendar.getInstance(utc, Locale.ENGLISH);
+ MessageFormat fmt = new MessageFormat("{1} = {0, date, full} {0, time, full}");
+ Object arguments[] = {cal, null};
+
+ arguments[0] = cal;
+
+ System.out.println("\nJava test:");
+ cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(0, UniversalTimeScale.JAVA_TIME), UniversalTimeScale.ICU4C_TIME));
+ arguments[1] = " 000000000000000";
+ System.out.println(fmt.format(arguments));
+
+ cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(-62164684800000L, UniversalTimeScale.JAVA_TIME), UniversalTimeScale.ICU4C_TIME));
+ arguments[1] = "-62164684800000L";
+ System.out.println(fmt.format(arguments));
+
+ cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(-62135769600000L, UniversalTimeScale.JAVA_TIME), UniversalTimeScale.ICU4C_TIME));
+ arguments[1] = "-62135769600000L";
+ System.out.println(fmt.format(arguments));
+
+ System.out.println("\nUnix test:");
+
+ cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(0x80000000, UniversalTimeScale.UNIX_TIME), UniversalTimeScale.ICU4C_TIME));
+ arguments[1] = "0x80000000";
+ System.out.println(fmt.format(arguments));
+
+ cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(0, UniversalTimeScale.UNIX_TIME), UniversalTimeScale.ICU4C_TIME));
+ arguments[1] = "0x00000000";
+ System.out.println(fmt.format(arguments));
+
+ cal.setTimeInMillis(UniversalTimeScale.toLong(UniversalTimeScale.from(0x7FFFFFFF, UniversalTimeScale.UNIX_TIME), UniversalTimeScale.ICU4C_TIME));
+ arguments[1] = "0x7FFFFFFF";
+ System.out.println(fmt.format(arguments));
+
+ }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/AnyTransliterator.java b/demos/src/com/ibm/icu/dev/demo/translit/AnyTransliterator.java
new file mode 100644
index 00000000000..3f458d8199a
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/AnyTransliterator.java
@@ -0,0 +1,308 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.translit;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.Replaceable;
+import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeFilter;
+
+public class AnyTransliterator extends Transliterator {
+
+ static final boolean DEBUG = false;
+ private String targetName;
+ private RunIterator it;
+ private Position run;
+
+
+ public AnyTransliterator(String targetName, UnicodeFilter filter, RunIterator it){
+ super("Any-" + targetName, filter);
+ this.targetName = targetName;
+ this.it = it;
+ run = new Position();
+ }
+
+ public AnyTransliterator(String targetName, UnicodeFilter filter){
+ this(targetName, filter, new ScriptRunIterator());
+ }
+
+ static private Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex");
+
+ protected void handleTransliterate(Replaceable text,
+ Position offsets, boolean isIncremental) {
+ if (DEBUG) {
+ System.out.println("- handleTransliterate " + hex.transliterate(text.toString())
+ + ", " + toString(offsets));
+ }
+ it.reset(text, offsets);
+
+ while (it.next(run)) {
+ if (targetName.equalsIgnoreCase(it.getName())) {
+ if (DEBUG) System.out.println("Skipping identical: " + targetName);
+ run.start = run.limit; // show we processed
+ continue; // skip if same
+ }
+
+ Transliterator t;
+ String id = it.getName() + '-' + targetName;
+ try {
+ t = Transliterator.getInstance(id);
+ } catch (IllegalArgumentException ex) {
+ if (DEBUG) System.out.println("Couldn't find: " + id + ", Trying Latin as Pivot");
+ id = it.getName() + "-Latin; Latin-" + targetName;
+ try {
+ t = Transliterator.getInstance(id);
+ } catch (IllegalArgumentException ex2) {
+ if (DEBUG) System.out.println("Couldn't find: " + id);
+ continue;
+ }
+ }
+ // TODO catch error later!!
+
+ if (DEBUG) {
+ System.out.println(t.getID());
+ System.out.println("input: " + hex.transliterate(text.toString())
+ + ", " + toString(run));
+ }
+
+ if (isIncremental && it.atEnd()) {
+ t.transliterate(text, run);
+ } else {
+ t.finishTransliteration(text, run);
+ }
+ // adjust the offsets in line with the changes
+ it.adjust(run.limit);
+
+ if (DEBUG) {
+ System.out.println("output: " + hex.transliterate(text.toString())
+ + ", " + toString(run));
+ }
+ }
+
+ // show how far we got!
+ it.getExpanse(offsets);
+ if (run.start == run.limit) offsets.start = offsets.limit;
+ else offsets.start = run.start;
+ if (DEBUG) {
+ System.out.println("+ handleTransliterate: " + ", " + toString(offsets));
+ System.out.println();
+ }
+ }
+
+ // should be method on Position
+ public static String toString(Position offsets) {
+ return "[cs: " + offsets.contextStart
+ + ", s: " + offsets.start
+ + ", l: " + offsets.limit
+ + ", cl: " + offsets.contextLimit
+ + "]";
+ }
+
+ public interface RunIterator {
+ public void reset(Replaceable text, Position expanse);
+ public void getExpanse(Position run);
+ public void reset();
+ public boolean next(Position run);
+ public void getCurrent(Position run);
+ public String getName();
+ public void adjust(int newCurrentLimit);
+ public boolean atEnd();
+ }
+
+ /**
+ * Returns a series of ranges corresponding to scripts. They will be of the form:
+ * ccccSScSSccccTTcTcccc - where c is common, S is the first script and T is the second
+ *| | - first run
+ * | | - second run
+ * That is, the runs will overlap. The reason for this is so that a transliterator can
+ * consider common characters both before and after the scripts.
+ * The only time that contextStart != start is for the first run
+ * (the context is the start context of the entire expanse)
+ * The only time that contextLimit != limit is for the last run
+ * (the context is the end context of the entire expanse)
+ */
+ public static class ScriptRunIterator implements RunIterator {
+ private Replaceable text;
+ private Position expanse = new Position();
+ private Position current = new Position();
+ private int script;
+ private boolean done = true;
+
+
+ public void reset(Replaceable repText, Position expansePos) {
+ set(this.expanse, expansePos);
+ this.text = repText;
+ reset();
+ }
+
+ public void reset() {
+ done = false;
+ //this.expanse = expanse;
+ script = UScript.INVALID_CODE;
+ // set up first range to be empty, at beginning
+ current.contextStart = expanse.contextStart;
+ current.start = current.limit = current.contextLimit = expanse.start;
+ }
+
+ public boolean next(Position run) {
+ if (done) return false;
+ if (DEBUG) {
+ System.out.println("+cs: " + current.contextStart
+ + ", s: " + current.start
+ + ", l: " + current.limit
+ + ", cl: " + current.contextLimit);
+ }
+ // reset start context run to the last end
+ current.start = current.limit;
+
+ // Phase 1. Backup the START value through COMMON until we get to expanse.start or a real script.
+ int i, cp;
+ int limit = expanse.start;
+ for (i = current.start; i > limit; i -= UTF16.getCharCount(cp)) {
+ cp = text.char32At(i);
+ int scrpt = UScript.getScript(cp);
+ if (scrpt != UScript.COMMON && scrpt != UScript.INHERITED) break;
+ }
+ current.start = i;
+ current.contextStart = (i == limit) ? expanse.contextStart : i; // extend at start
+
+ // PHASE 2. Move up the LIMIT value through COMMON or single script until we get to expanse.limit
+ int lastScript = UScript.COMMON;
+ //int veryLastScript = UScript.COMMON;
+ limit = expanse.limit;
+ for (i = current.limit; i < limit; i += UTF16.getCharCount(cp)) {
+ cp = text.char32At(i);
+ int scrpt = UScript.getScript(cp);
+ if (scrpt == UScript.INHERITED) scrpt = UScript.COMMON;
+ if (scrpt != UScript.COMMON) {
+ // if we find a real script:
+ // if we already had a script, bail
+ // otherwise set our script
+ if (lastScript == UScript.COMMON) lastScript = scrpt;
+ else if (lastScript != scrpt) break;
+ }
+ }
+ current.limit = i;
+ current.contextLimit = (i == limit) ? expanse.contextLimit : i; // extend at end
+ done = (i == limit);
+ script = lastScript;
+
+ if (DEBUG) {
+ System.out.println("-cs: " + current.contextStart
+ + ", s: " + current.start
+ + ", l: " + current.limit
+ + ", cl: " + current.contextLimit);
+ }
+
+ set(run, current);
+ return true;
+ }
+
+ // SHOULD BE METHOD ON POSITION
+ public static void set(Position run, Position current) {
+ run.contextStart = current.contextStart;
+ run.start = current.start;
+ run.limit = current.limit;
+ run.contextLimit = current.contextLimit;
+ }
+
+ public boolean atEnd() {
+ return current.limit == expanse.limit;
+ }
+
+ public void getCurrent(Position run) {
+ set(run, current);
+ }
+
+ public void getExpanse(Position run) {
+ set(run, expanse);
+ }
+
+ public String getName() {
+ return UScript.getName(script);
+ }
+
+ public void adjust(int newCurrentLimit) {
+ if (expanse == null) {
+ throw new IllegalArgumentException("Must reset() before calling");
+ }
+ int delta = newCurrentLimit - current.limit;
+ current.limit += delta;
+ current.contextLimit += delta;
+ expanse.limit += delta;
+ expanse.contextLimit += delta;
+ }
+
+ // register Any-Script for every script.
+
+ private static Set scriptList = new HashSet();
+
+ public static void registerAnyToScript() {
+ synchronized (scriptList) {
+ Enumeration sources = Transliterator.getAvailableSources();
+ while(sources.hasMoreElements()) {
+ String source = (String) sources.nextElement();
+ if (source.equals("Any")) continue; // to keep from looping
+
+ Enumeration targets = Transliterator.getAvailableTargets(source);
+ while(targets.hasMoreElements()) {
+ String target = (String) targets.nextElement();
+ if (UScript.getCode(target) == null) continue; // SKIP unless we have a script (or locale)
+ if (scriptList.contains(target)) continue; // already encountered
+ scriptList.add(target); // otherwise add for later testing
+
+ Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target));
+ if (variantSet.size() < 2) {
+ AnyTransliterator at = new AnyTransliterator(target, null);
+ DummyFactory.add(at.getID(), at);
+ } else {
+ Iterator variants = variantSet.iterator();
+ while(variants.hasNext()) {
+ String variant = (String) variants.next();
+ AnyTransliterator at = new AnyTransliterator(
+ (variant.length() > 0) ? target + "/" + variant : target, null);
+ DummyFactory.add(at.getID(), at);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ static class DummyFactory implements Transliterator.Factory {
+ static DummyFactory singleton = new DummyFactory();
+ static HashMap m = new HashMap();
+
+ // Since Transliterators are immutable, we don't have to clone on set & get
+ static void add(String ID, Transliterator t) {
+ m.put(ID, t);
+ System.out.println("Registering: " + ID + ", " + t.toRules(true));
+ Transliterator.registerFactory(ID, singleton);
+ }
+ public Transliterator getInstance(String ID) {
+ return (Transliterator) m.get(ID);
+ }
+ }
+
+ // Nice little Utility for converting Enumeration to collection
+ static Set add(Set s, Enumeration enumeration) {
+ while(enumeration.hasMoreElements()) {
+ s.add(enumeration.nextElement());
+ }
+ return s;
+ }
+
+
+ }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/CaseIterator.java b/demos/src/com/ibm/icu/dev/demo/translit/CaseIterator.java
new file mode 100644
index 00000000000..b2b477ab42e
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/CaseIterator.java
@@ -0,0 +1,560 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+
+package com.ibm.icu.dev.demo.translit;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * Incrementally returns the set of all strings that case-fold to the same value.
+ */
+public class CaseIterator {
+
+ // testing stuff
+ static Transliterator toName = Transliterator.getInstance("[:^ascii:] Any-Name");
+ static Transliterator toHex = Transliterator.getInstance("[:^ascii:] Any-Hex");
+ static Transliterator toHex2 = Transliterator.getInstance("[[^\u0021-\u007F]-[,]] Any-Hex");
+
+ // global tables (could be precompiled)
+ private static Map fromCaseFold = new HashMap();
+ private static Map toCaseFold = new HashMap();
+ private static int maxLength = 0;
+
+ // This exception list is generated on the console by turning on the GENERATED flag,
+ // which MUST be false for normal operation.
+ // Once the list is generated, it is pasted in here.
+ // A bit of a cludge, but this bootstrapping is the easiest way
+ // to get around certain complications in the data.
+
+ private static final boolean GENERATE = false;
+
+ private static final boolean DUMP = false;
+
+ private static String[][] exceptionList = {
+ // a\N{MODIFIER LETTER RIGHT HALF RING}
+ {"a\u02BE","A\u02BE","a\u02BE",},
+ // ff
+ {"ff","FF","Ff","fF","ff",},
+ // ffi
+ {"ffi","FFI","FFi","FfI","Ffi","F\uFB01","fFI","fFi","ffI","ffi","f\uFB01","\uFB00I","\uFB00i",},
+ // ffl
+ {"ffl","FFL","FFl","FfL","Ffl","F\uFB02","fFL","fFl","ffL","ffl","f\uFB02","\uFB00L","\uFB00l",},
+ // fi
+ {"fi","FI","Fi","fI","fi",},
+ // fl
+ {"fl","FL","Fl","fL","fl",},
+ // h\N{COMBINING MACRON BELOW}
+ {"h\u0331","H\u0331","h\u0331",},
+ // i\N{COMBINING DOT ABOVE}
+ {"i\u0307","I\u0307","i\u0307",},
+ // j\N{COMBINING CARON}
+ {"j\u030C","J\u030C","j\u030C",},
+ // ss
+ {"ss","SS","Ss","S\u017F","sS","ss","s\u017F","\u017FS","\u017Fs","\u017F\u017F",},
+ // st
+ {"st","ST","St","sT","st","\u017FT","\u017Ft",},
+ // t\N{COMBINING DIAERESIS}
+ {"t\u0308","T\u0308","t\u0308",},
+ // w\N{COMBINING RING ABOVE}
+ {"w\u030A","W\u030A","w\u030A",},
+ // y\N{COMBINING RING ABOVE}
+ {"y\u030A","Y\u030A","y\u030A",},
+ // \N{MODIFIER LETTER APOSTROPHE}n
+ {"\u02BCn","\u02BCN","\u02BCn",},
+ // \N{GREEK SMALL LETTER ALPHA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
+ {"\u03AC\u03B9","\u0386\u0345","\u0386\u0399","\u0386\u03B9","\u0386\u1FBE","\u03AC\u0345","\u03AC\u0399","\u03AC\u03B9","\u03AC\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
+ {"\u03AE\u03B9","\u0389\u0345","\u0389\u0399","\u0389\u03B9","\u0389\u1FBE","\u03AE\u0345","\u03AE\u0399","\u03AE\u03B9","\u03AE\u1FBE",},
+ // \N{GREEK SMALL LETTER ALPHA}\N{COMBINING GREEK PERISPOMENI}
+ {"\u03B1\u0342","\u0391\u0342","\u03B1\u0342",},
+ // \N{GREEK SMALL LETTER ALPHA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+ {"\u03B1\u0342\u03B9","\u0391\u0342\u0345","\u0391\u0342\u0399","\u0391\u0342\u03B9","\u0391\u0342\u1FBE",
+ "\u03B1\u0342\u0345","\u03B1\u0342\u0399","\u03B1\u0342\u03B9","\u03B1\u0342\u1FBE","\u1FB6\u0345",
+ "\u1FB6\u0399","\u1FB6\u03B9","\u1FB6\u1FBE",},
+ // \N{GREEK SMALL LETTER ALPHA}\N{GREEK SMALL LETTER IOTA}
+ {"\u03B1\u03B9","\u0391\u0345","\u0391\u0399","\u0391\u03B9","\u0391\u1FBE","\u03B1\u0345","\u03B1\u0399","\u03B1\u03B9","\u03B1\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA}\N{COMBINING GREEK PERISPOMENI}
+ {"\u03B7\u0342","\u0397\u0342","\u03B7\u0342",},
+ // \N{GREEK SMALL LETTER ETA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+ {"\u03B7\u0342\u03B9","\u0397\u0342\u0345","\u0397\u0342\u0399","\u0397\u0342\u03B9","\u0397\u0342\u1FBE",
+ "\u03B7\u0342\u0345","\u03B7\u0342\u0399","\u03B7\u0342\u03B9","\u03B7\u0342\u1FBE","\u1FC6\u0345","\u1FC6\u0399",
+ "\u1FC6\u03B9","\u1FC6\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA}\N{GREEK SMALL LETTER IOTA}
+ {"\u03B7\u03B9","\u0397\u0345","\u0397\u0399","\u0397\u03B9","\u0397\u1FBE","\u03B7\u0345","\u03B7\u0399","\u03B7\u03B9","\u03B7\u1FBE",},
+ // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING GRAVE ACCENT}
+ {"\u03B9\u0308\u0300","\u0345\u0308\u0300","\u0399\u0308\u0300","\u03B9\u0308\u0300","\u1FBE\u0308\u0300",},
+ // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING ACUTE ACCENT}
+ {"\u03B9\u0308\u0301","\u0345\u0308\u0301","\u0399\u0308\u0301","\u03B9\u0308\u0301","\u1FBE\u0308\u0301",},
+ // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING GREEK PERISPOMENI}
+ {"\u03B9\u0308\u0342","\u0345\u0308\u0342","\u0399\u0308\u0342","\u03B9\u0308\u0342","\u1FBE\u0308\u0342",},
+ // \N{GREEK SMALL LETTER IOTA}\N{COMBINING GREEK PERISPOMENI}
+ {"\u03B9\u0342","\u0345\u0342","\u0399\u0342","\u03B9\u0342","\u1FBE\u0342",},
+ // \N{GREEK SMALL LETTER RHO}\N{COMBINING COMMA ABOVE}
+ {"\u03C1\u0313","\u03A1\u0313","\u03C1\u0313","\u03F1\u0313",},
+ // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING GRAVE ACCENT}
+ {"\u03C5\u0308\u0300","\u03A5\u0308\u0300","\u03C5\u0308\u0300",},
+ // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING ACUTE ACCENT}
+ {"\u03C5\u0308\u0301","\u03A5\u0308\u0301","\u03C5\u0308\u0301",},
+ // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING GREEK PERISPOMENI}
+ {"\u03C5\u0308\u0342","\u03A5\u0308\u0342","\u03C5\u0308\u0342",},
+ // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}
+ {"\u03C5\u0313","\u03A5\u0313","\u03C5\u0313",},
+ // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING GRAVE ACCENT}
+ {"\u03C5\u0313\u0300","\u03A5\u0313\u0300","\u03C5\u0313\u0300","\u1F50\u0300",},
+ // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING ACUTE ACCENT}
+ {"\u03C5\u0313\u0301","\u03A5\u0313\u0301","\u03C5\u0313\u0301","\u1F50\u0301",},
+ // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING GREEK PERISPOMENI}
+ {"\u03C5\u0313\u0342","\u03A5\u0313\u0342","\u03C5\u0313\u0342","\u1F50\u0342",},
+ // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING GREEK PERISPOMENI}
+ {"\u03C5\u0342","\u03A5\u0342","\u03C5\u0342",},
+ // \N{GREEK SMALL LETTER OMEGA}\N{COMBINING GREEK PERISPOMENI}
+ {"\u03C9\u0342","\u03A9\u0342","\u03C9\u0342","\u2126\u0342",},
+ // \N{GREEK SMALL LETTER OMEGA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+ {"\u03C9\u0342\u03B9","\u03A9\u0342\u0345","\u03A9\u0342\u0399","\u03A9\u0342\u03B9","\u03A9\u0342\u1FBE","\u03C9\u0342\u0345","\u03C9\u0342\u0399","\u03C9\u0342\u03B9","\u03C9\u0342\u1FBE","\u1FF6\u0345",
+ "\u1FF6\u0399","\u1FF6\u03B9","\u1FF6\u1FBE","\u2126\u0342\u0345","\u2126\u0342\u0399","\u2126\u0342\u03B9","\u2126\u0342\u1FBE",},
+ // \N{GREEK SMALL LETTER OMEGA}\N{GREEK SMALL LETTER IOTA}
+ {"\u03C9\u03B9","\u03A9\u0345","\u03A9\u0399","\u03A9\u03B9","\u03A9\u1FBE","\u03C9\u0345","\u03C9\u0399","\u03C9\u03B9","\u03C9\u1FBE","\u2126\u0345","\u2126\u0399","\u2126\u03B9","\u2126\u1FBE",},
+ // \N{GREEK SMALL LETTER OMEGA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
+ {"\u03CE\u03B9","\u038F\u0345","\u038F\u0399","\u038F\u03B9","\u038F\u1FBE","\u03CE\u0345","\u03CE\u0399","\u03CE\u03B9","\u03CE\u1FBE",},
+ // \N{ARMENIAN SMALL LETTER ECH}\N{ARMENIAN SMALL LETTER YIWN}
+ {"\u0565\u0582","\u0535\u0552","\u0535\u0582","\u0565\u0552","\u0565\u0582",},
+ // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER ECH}
+ {"\u0574\u0565","\u0544\u0535","\u0544\u0565","\u0574\u0535","\u0574\u0565",},
+ // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER INI}
+ {"\u0574\u056B","\u0544\u053B","\u0544\u056B","\u0574\u053B","\u0574\u056B",},
+ // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER XEH}
+ {"\u0574\u056D","\u0544\u053D","\u0544\u056D","\u0574\u053D","\u0574\u056D",},
+ // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER NOW}
+ {"\u0574\u0576","\u0544\u0546","\u0544\u0576","\u0574\u0546","\u0574\u0576",},
+ // \N{ARMENIAN SMALL LETTER VEW}\N{ARMENIAN SMALL LETTER NOW}
+ {"\u057E\u0576","\u054E\u0546","\u054E\u0576","\u057E\u0546","\u057E\u0576",},
+ // \N{GREEK SMALL LETTER ALPHA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F00\u03B9","\u1F00\u0345","\u1F00\u0399","\u1F00\u03B9","\u1F00\u1FBE","\u1F08\u0345","\u1F08\u0399","\u1F08\u03B9","\u1F08\u1FBE",},
+ // \N{GREEK SMALL LETTER ALPHA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F01\u03B9","\u1F01\u0345","\u1F01\u0399","\u1F01\u03B9","\u1F01\u1FBE","\u1F09\u0345","\u1F09\u0399","\u1F09\u03B9","\u1F09\u1FBE",},
+ // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F02\u03B9","\u1F02\u0345","\u1F02\u0399","\u1F02\u03B9","\u1F02\u1FBE","\u1F0A\u0345","\u1F0A\u0399","\u1F0A\u03B9","\u1F0A\u1FBE",},
+ // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F03\u03B9","\u1F03\u0345","\u1F03\u0399","\u1F03\u03B9","\u1F03\u1FBE","\u1F0B\u0345","\u1F0B\u0399","\u1F0B\u03B9","\u1F0B\u1FBE",},
+ // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F04\u03B9","\u1F04\u0345","\u1F04\u0399","\u1F04\u03B9","\u1F04\u1FBE","\u1F0C\u0345","\u1F0C\u0399","\u1F0C\u03B9","\u1F0C\u1FBE",},
+ // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F05\u03B9","\u1F05\u0345","\u1F05\u0399","\u1F05\u03B9","\u1F05\u1FBE","\u1F0D\u0345","\u1F0D\u0399","\u1F0D\u03B9","\u1F0D\u1FBE",},
+ // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F06\u03B9","\u1F06\u0345","\u1F06\u0399","\u1F06\u03B9","\u1F06\u1FBE","\u1F0E\u0345","\u1F0E\u0399","\u1F0E\u03B9","\u1F0E\u1FBE",},
+ // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F07\u03B9","\u1F07\u0345","\u1F07\u0399","\u1F07\u03B9","\u1F07\u1FBE","\u1F0F\u0345","\u1F0F\u0399","\u1F0F\u03B9","\u1F0F\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F20\u03B9","\u1F20\u0345","\u1F20\u0399","\u1F20\u03B9","\u1F20\u1FBE","\u1F28\u0345","\u1F28\u0399","\u1F28\u03B9","\u1F28\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F21\u03B9","\u1F21\u0345","\u1F21\u0399","\u1F21\u03B9","\u1F21\u1FBE","\u1F29\u0345","\u1F29\u0399","\u1F29\u03B9","\u1F29\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F22\u03B9","\u1F22\u0345","\u1F22\u0399","\u1F22\u03B9","\u1F22\u1FBE","\u1F2A\u0345","\u1F2A\u0399","\u1F2A\u03B9","\u1F2A\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F23\u03B9","\u1F23\u0345","\u1F23\u0399","\u1F23\u03B9","\u1F23\u1FBE","\u1F2B\u0345","\u1F2B\u0399","\u1F2B\u03B9","\u1F2B\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F24\u03B9","\u1F24\u0345","\u1F24\u0399","\u1F24\u03B9","\u1F24\u1FBE","\u1F2C\u0345","\u1F2C\u0399","\u1F2C\u03B9","\u1F2C\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F25\u03B9","\u1F25\u0345","\u1F25\u0399","\u1F25\u03B9","\u1F25\u1FBE","\u1F2D\u0345","\u1F2D\u0399","\u1F2D\u03B9","\u1F2D\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F26\u03B9","\u1F26\u0345","\u1F26\u0399","\u1F26\u03B9","\u1F26\u1FBE","\u1F2E\u0345","\u1F2E\u0399","\u1F2E\u03B9","\u1F2E\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F27\u03B9","\u1F27\u0345","\u1F27\u0399","\u1F27\u03B9","\u1F27\u1FBE","\u1F2F\u0345","\u1F2F\u0399","\u1F2F\u03B9","\u1F2F\u1FBE",},
+ // \N{GREEK SMALL LETTER OMEGA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F60\u03B9","\u1F60\u0345","\u1F60\u0399","\u1F60\u03B9","\u1F60\u1FBE","\u1F68\u0345","\u1F68\u0399","\u1F68\u03B9","\u1F68\u1FBE",},
+ // \N{GREEK SMALL LETTER OMEGA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F61\u03B9","\u1F61\u0345","\u1F61\u0399","\u1F61\u03B9","\u1F61\u1FBE","\u1F69\u0345","\u1F69\u0399","\u1F69\u03B9","\u1F69\u1FBE",},
+ // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F62\u03B9","\u1F62\u0345","\u1F62\u0399","\u1F62\u03B9","\u1F62\u1FBE","\u1F6A\u0345","\u1F6A\u0399","\u1F6A\u03B9","\u1F6A\u1FBE",},
+ // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F63\u03B9","\u1F63\u0345","\u1F63\u0399","\u1F63\u03B9","\u1F63\u1FBE","\u1F6B\u0345","\u1F6B\u0399","\u1F6B\u03B9","\u1F6B\u1FBE",},
+ // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F64\u03B9","\u1F64\u0345","\u1F64\u0399","\u1F64\u03B9","\u1F64\u1FBE","\u1F6C\u0345","\u1F6C\u0399","\u1F6C\u03B9","\u1F6C\u1FBE",},
+ // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F65\u03B9","\u1F65\u0345","\u1F65\u0399","\u1F65\u03B9","\u1F65\u1FBE","\u1F6D\u0345","\u1F6D\u0399","\u1F6D\u03B9","\u1F6D\u1FBE",},
+ // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F66\u03B9","\u1F66\u0345","\u1F66\u0399","\u1F66\u03B9","\u1F66\u1FBE","\u1F6E\u0345","\u1F6E\u0399","\u1F6E\u03B9","\u1F6E\u1FBE",},
+ // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F67\u03B9","\u1F67\u0345","\u1F67\u0399","\u1F67\u03B9","\u1F67\u1FBE","\u1F6F\u0345","\u1F6F\u0399","\u1F6F\u03B9","\u1F6F\u1FBE",},
+ // \N{GREEK SMALL LETTER ALPHA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F70\u03B9","\u1F70\u0345","\u1F70\u0399","\u1F70\u03B9","\u1F70\u1FBE","\u1FBA\u0345","\u1FBA\u0399","\u1FBA\u03B9","\u1FBA\u1FBE",},
+ // \N{GREEK SMALL LETTER ETA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F74\u03B9","\u1F74\u0345","\u1F74\u0399","\u1F74\u03B9","\u1F74\u1FBE","\u1FCA\u0345","\u1FCA\u0399","\u1FCA\u03B9","\u1FCA\u1FBE",},
+ // \N{GREEK SMALL LETTER OMEGA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
+ {"\u1F7C\u03B9","\u1F7C\u0345","\u1F7C\u0399","\u1F7C\u03B9","\u1F7C\u1FBE","\u1FFA\u0345","\u1FFA\u0399","\u1FFA\u03B9","\u1FFA\u1FBE",},
+ };
+
+ // this initializes the data used to generated the case-equivalents
+
+ static {
+
+ // Gather up the exceptions in a form we can use
+
+ if (!GENERATE) {
+ for (int i = 0; i < exceptionList.length; ++i) {
+ String[] exception = exceptionList[i];
+ Set s = new HashSet();
+ // there has to be some method to do the following, but I can't find it in the collections
+ for (int j = 0; j < exception.length; ++j) {
+ s.add(exception[j]);
+ }
+ fromCaseFold.put(exception[0], s);
+ }
+ }
+
+ // walk through all the characters, and at every case fold result,
+ // put a set of all the characters that map to that result
+
+ boolean defaultmapping = true; // false for turkish
+ for (int i = 0; i <= 0x10FFFF; ++i) {
+ int cat = UCharacter.getType(i);
+ if (cat == Character.UNASSIGNED || cat == Character.PRIVATE_USE) continue;
+
+ String cp = UTF16.valueOf(i);
+ String mapped = UCharacter.foldCase(cp, defaultmapping);
+ if (mapped.equals(cp)) continue;
+
+ if (maxLength < mapped.length()) maxLength = mapped.length();
+
+ // at this point, have different case folding
+
+ Set s = (Set) fromCaseFold.get(mapped);
+ if (s == null) {
+ s = new HashSet();
+ s.add(mapped); // add the case fold result itself
+ fromCaseFold.put(mapped, s);
+ }
+ s.add(cp);
+ toCaseFold.put(cp, mapped);
+ toCaseFold.put(mapped, mapped); // add mapping to self
+ }
+
+ // Emit the final data
+
+ if (DUMP) {
+ System.out.println("maxLength = " + maxLength);
+
+ System.out.println("\nfromCaseFold:");
+ Iterator it = fromCaseFold.keySet().iterator();
+ while (it.hasNext()) {
+ Object key = it.next();
+ System.out.print(" " + toHex2.transliterate((String)key) + ": ");
+ Set s = (Set) fromCaseFold.get(key);
+ Iterator it2 = s.iterator();
+ boolean first = true;
+ while (it2.hasNext()) {
+ if (first) {
+ first = false;
+ } else {
+ System.out.print(", ");
+ }
+ System.out.print(toHex2.transliterate((String)it2.next()));
+ }
+ System.out.println("");
+ }
+
+ System.out.println("\ntoCaseFold:");
+ it = toCaseFold.keySet().iterator();
+ while (it.hasNext()) {
+ String key = (String) it.next();
+ String value = (String) toCaseFold.get(key);
+ System.out.println(" " + toHex2.transliterate(key) + ": " + toHex2.transliterate(value));
+ }
+ }
+
+ // Now convert all those sets into linear arrays
+ // We can't do this in place in Java, so make a temporary target array
+
+ // Note: This could be transformed into a single array, with offsets into it.
+ // Might be best choice in C.
+
+
+ Map fromCaseFold2 = new HashMap();
+ Iterator it = fromCaseFold.keySet().iterator();
+ while (it.hasNext()) {
+ Object key = it.next();
+ Set s = (Set) fromCaseFold.get(key);
+ String[] temp = new String[s.size()];
+ s.toArray(temp);
+ fromCaseFold2.put(key, temp);
+ }
+ fromCaseFold = fromCaseFold2;
+
+ // We have processed everything, so the iterator will now work
+ // The following is normally OFF.
+ // It is here to generate (under the GENERATE flag) the static exception list.
+ // It must be at the very end of initialization, so that the iterator is functional.
+ // (easiest to do it that way)
+
+ if (GENERATE) {
+
+ // first get small set of items that have multiple characters
+
+ Set multichars = new TreeSet();
+ it = fromCaseFold.keySet().iterator();
+ while (it.hasNext()) {
+ String key = (String) it.next();
+ if (UTF16.countCodePoint(key) < 2) continue;
+ multichars.add(key);
+ }
+
+ // now we will go through each of them.
+
+ CaseIterator ci = new CaseIterator();
+ it = multichars.iterator();
+
+ while (it.hasNext()) {
+ String key = (String) it.next();
+
+ // here is a nasty complication. Take 'ffi' ligature. We
+ // can't just close it, since we would miss the combination
+ // that includes the 'fi' => "fi" ligature
+ // so first do a pass through, and add substring combinations
+ // we call this a 'partial closure'
+
+ Set partialClosure = new TreeSet();
+ partialClosure.add(key);
+
+ if (UTF16.countCodePoint(key) > 2) {
+ Iterator multiIt2 = multichars.iterator();
+ while (multiIt2.hasNext()) {
+ String otherKey = (String) multiIt2.next();
+ if (otherKey.length() >= key.length()) continue;
+ int pos = -1;
+ while (true) {
+ // The following is not completely general
+ // but works for the actual cased stuff,
+ // and should work for future characters, since we won't have
+ // more ligatures & other oddities.
+ pos = key.indexOf(otherKey, pos+1);
+ if (pos < 0) break;
+ int endPos = pos + otherKey.length();
+ // we know we have a proper substring,
+ // so get the combinations
+ String[] choices = (String[]) fromCaseFold.get(otherKey);
+ for (int ii = 0; ii < choices.length; ++ii) {
+ String patchwork = key.substring(0, pos)
+ + choices[ii]
+ + key.substring(endPos);
+ partialClosure.add(patchwork);
+ }
+ }
+ }
+ }
+
+ // now, for each thing in the partial closure, get its
+ // case closure and add it to the final result.
+
+ Set closure = new TreeSet(); // this will be the real closure
+ Iterator partialIt = partialClosure.iterator();
+ while (partialIt.hasNext()) {
+ String key2 = (String) partialIt.next();
+ ci.reset(key2);
+ for (String temp = ci.next(); temp != null; temp = ci.next()) {
+ closure.add(temp);
+ }
+ // form closure
+ /*String[] choices = (String[]) fromCaseFold.get(key2);
+ for (int i = 0; i < choices.length; ++i) {
+ ci.reset(choices[i]);
+ String temp;
+ while (null != (temp = ci.next())) {
+ closure.add(temp);
+ }
+ }
+ */
+ }
+
+ // print it out, so that it can be cut and pasted back into this document.
+
+ Iterator it2 = closure.iterator();
+ System.out.println("\t// " + toName.transliterate(key));
+ System.out.print("\t{\"" + toHex.transliterate(key) + "\",");
+ while (it2.hasNext()) {
+ String item = (String)it2.next();
+ System.out.print("\"" + toHex.transliterate(item) + "\",");
+ }
+ System.out.println("},");
+ }
+ }
+ }
+
+ // ============ PRIVATE CLASS DATA ============
+
+ // pieces that we will put together
+ // is not changed during iteration
+ private int count = 0;
+ private String[][] variants;
+
+ // state information, changes during iteration
+ private boolean done = false;
+ private int[] counts;
+
+ // internal buffer for efficiency
+ private StringBuffer nextBuffer = new StringBuffer();
+
+ // ========================
+
+ /**
+ * Reset to different source. Once reset, the iteration starts from the beginning.
+ * @param source The string to get case variants for
+ */
+ public void reset(String source) {
+
+ // allocate arrays to store pieces
+ // using length might be slightly too long, but we don't care much
+
+ counts = new int[source.length()];
+ variants = new String[source.length()][];
+
+ // walk through the source, and break up into pieces
+ // each piece becomes an array of equivalent values
+ // TODO: could optimized this later to coalesce all single string pieces
+
+ String piece = null;
+ count = 0;
+ for (int i = 0; i < source.length(); i += piece.length()) {
+
+ // find *longest* matching piece
+ String caseFold = null;
+
+ if (GENERATE) {
+ // do exactly one CP
+ piece = UTF16.valueOf(source, i);
+ caseFold = (String) toCaseFold.get(piece);
+ } else {
+ int max = i + maxLength;
+ if (max > source.length()) max = source.length();
+ for (int j = max; j > i; --j) {
+ piece = source.substring(i, j);
+ caseFold = (String) toCaseFold.get(piece);
+ if (caseFold != null) break;
+ }
+ }
+
+ // if we fail, pick one code point
+ if (caseFold == null) {
+ piece = UTF16.valueOf(source, i);
+ variants[count++] = new String[] {piece}; // single item string
+ } else {
+ variants[count++] = (String[])fromCaseFold.get(caseFold);
+ }
+ }
+ reset();
+ }
+
+ /**
+ * Restart the iteration from the beginning, but with same source
+ */
+ public void reset() {
+ done = false;
+ for (int i = 0; i < count; ++i) {
+ counts[i] = 0;
+ }
+ }
+
+ /**
+ * Iterates through the case variants.
+ * @return next case variant. Each variant will case-fold to the same value as the source will.
+ * When the iteration is done, null is returned.
+ */
+ public String next() {
+
+ if (done) return null;
+ int i;
+
+ // TODO Optimize so we keep the piece before and after the current position
+ // so we don't have so much concatenation
+
+ // get the result, a concatenation
+
+ nextBuffer.setLength(0);
+ for (i = 0; i < count; ++i) {
+ nextBuffer.append(variants[i][counts[i]]);
+ }
+
+ // find the next right set of pieces to concatenate
+
+ for (i = count-1; i >= 0; --i) {
+ counts[i]++;
+ if (counts[i] < variants[i].length) break;
+ counts[i] = 0;
+ }
+
+ // if we go too far, bail
+
+ if (i < 0) {
+ done = true;
+ }
+
+ return nextBuffer.toString();
+ }
+
+
+ /**
+ * Temporary test, just to see how the stuff works.
+ */
+ static public void main(String[] args) {
+ String[] testCases = {"fiss", "h\u03a3"};
+ CaseIterator ci = new CaseIterator();
+
+ for (int i = 0; i < testCases.length; ++i) {
+ String item = testCases[i];
+ System.out.println();
+ System.out.println("Testing: " + toName.transliterate(item));
+ System.out.println();
+ ci.reset(item);
+ int count = 0;
+ for (String temp = ci.next(); temp != null; temp = ci.next()) {
+ System.out.println(toName.transliterate(temp));
+ count++;
+ }
+ System.out.println("Total: " + count);
+ }
+
+ // generate a list of all caseless characters -- characters whose
+ // case closure is themselves.
+
+ UnicodeSet caseless = new UnicodeSet();
+
+ for (int i = 0; i <= 0x10FFFF; ++i) {
+ String cp = UTF16.valueOf(i);
+ ci.reset(cp);
+ int count = 0;
+ String fold = null;
+ for (String temp = ci.next(); temp != null; temp = ci.next()) {
+ fold = temp;
+ if (++count > 1) break;
+ }
+ if (count==1 && fold.equals(cp)) {
+ caseless.add(i);
+ }
+ }
+
+ System.out.println("caseless = " + caseless.toPattern(true));
+
+ UnicodeSet not_lc = new UnicodeSet("[:^lc:]");
+
+ UnicodeSet a = new UnicodeSet();
+ a.set(not_lc);
+ a.removeAll(caseless);
+ System.out.println("[:^lc:] - caseless = " + a.toPattern(true));
+
+ a.set(caseless);
+ a.removeAll(not_lc);
+ System.out.println("caseless - [:^lc:] = " + a.toPattern(true));
+ }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Demo.java b/demos/src/com/ibm/icu/dev/demo/translit/Demo.java
new file mode 100644
index 00000000000..87882f9027b
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/Demo.java
@@ -0,0 +1,1417 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.translit;
+
+import java.awt.Button;
+import java.awt.CheckboxMenuItem;
+import java.awt.FileDialog;
+import java.awt.Font;
+import java.awt.Frame;
+import java.awt.GraphicsEnvironment;
+import java.awt.Label;
+import java.awt.Menu;
+import java.awt.MenuBar;
+import java.awt.MenuItem;
+import java.awt.MenuShortcut;
+import java.awt.TextField;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.ItemEvent;
+import java.awt.event.ItemListener;
+import java.awt.event.KeyEvent;
+import java.awt.event.WindowAdapter;
+import java.awt.event.WindowEvent;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.text.CharacterIterator;
+import java.util.Comparator;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.ibm.icu.impl.Differ;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.CanonicalIterator;
+import com.ibm.icu.text.Normalizer;
+import com.ibm.icu.text.ReplaceableString;
+import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSetIterator;
+
+/**
+ * A frame that allows the user to experiment with keyboard
+ * transliteration. This class has a main() method so it can be run
+ * as an application. The frame contains an editable text component
+ * and uses keyboard transliteration to process keyboard events.
+ *
+ * Copyright (c) IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ */
+public class Demo extends Frame {
+
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = 1L;
+ static final boolean DEBUG = false;
+ static final String START_TEXT = "(cut,\u03BA\u03C5\u03C4,\u05D0,\u30AF\u30C8,\u4E80,\u091A\u0941\u0924\u094D)";
+
+ Transliterator translit = null;
+ String fontName = "Arial Unicode MS";
+ int fontSize = 18;
+
+
+
+ /*
+ boolean compound = false;
+ Transliterator[] compoundTranslit = new Transliterator[MAX_COMPOUND];
+ static final int MAX_COMPOUND = 128;
+ int compoundCount = 0;
+ */
+
+ TransliteratingTextComponent text = null;
+
+ Menu translitMenu;
+ CheckboxMenuItem translitItem;
+ CheckboxMenuItem noTranslitItem;
+
+ static final String NO_TRANSLITERATOR = "None";
+
+ //private static final String COPYRIGHT =
+ // "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ public static void main(String[] args) {
+ Frame f = new Demo(600, 200);
+ f.addWindowListener(new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ com.ibm.icu.dev.demo.impl.DemoApplet.demoFrameClosed();
+// System.exit(0);
+ }
+ });
+ f.setVisible(true);
+ com.ibm.icu.dev.demo.impl.DemoApplet.demoFrameOpened();
+ }
+
+ public Demo(int width, int height) {
+ super("Transliteration Demo");
+
+ initMenus();
+
+ addWindowListener(new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ handleClose();
+ }
+ });
+
+ text = new TransliteratingTextComponent();
+ Font font = new Font(fontName, Font.PLAIN, fontSize);
+ text.setFont(font);
+ text.setSize(width, height);
+ text.setVisible(true);
+ text.setText(START_TEXT);
+ add(text);
+
+ setSize(width, height);
+ setTransliterator("Latin-Greek", null);
+ }
+
+ private void initMenus() {
+ MenuBar mbar;
+ Menu menu;
+ MenuItem mitem;
+ //CheckboxMenuItem citem;
+
+ setMenuBar(mbar = new MenuBar());
+ mbar.add(menu = new Menu("File"));
+ menu.add(mitem = new MenuItem("Quit"));
+ mitem.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ handleClose();
+ }
+ });
+/*
+ final ItemListener setTransliteratorListener = new ItemListener() {
+ public void itemStateChanged(ItemEvent e) {
+ CheckboxMenuItem item = (CheckboxMenuItem) e.getSource();
+ if (e.getStateChange() == ItemEvent.DESELECTED) {
+ // Don't let the current transliterator be deselected.
+ // Just reselect it.
+ item.setState(true);
+ } else if (compound) {
+ // Adding an item to a compound transliterator
+ handleAddToCompound(item.getLabel());
+ } else if (item != translitItem) {
+ // Deselect previous choice. Don't need to call
+ // setState(true) on new choice.
+ translitItem.setState(false);
+ translitItem = item;
+ handleSetTransliterator(item.getLabel());
+ }
+ }
+ };
+*/
+ /*
+ translitMenu.add(translitItem = noTranslitItem =
+ new CheckboxMenuItem(NO_TRANSLITERATOR, true));
+ noTranslitItem.addItemListener(new ItemListener() {
+ public void itemStateChanged(ItemEvent e) {
+ // Can't uncheck None -- any action here sets None to true
+ setNoTransliterator();
+ }
+ });
+
+ translitMenu.addSeparator();
+ */
+
+/*
+ translitMenu.add(citem = new CheckboxMenuItem("Compound"));
+ citem.addItemListener(new ItemListener() {
+ public void itemStateChanged(ItemEvent e) {
+ CheckboxMenuItem item = (CheckboxMenuItem) e.getSource();
+ if (e.getStateChange() == ItemEvent.DESELECTED) {
+ // If compound gets deselected, then select NONE
+ setNoTransliterator();
+ } else if (!compound) {
+ // Switching from non-compound to compound
+ translitItem.setState(false);
+ translitItem = item;
+ translit = null;
+ compound = true;
+ compoundCount = 0;
+ for (int i=0; i &Hex($1) &Name($1);\r\n"
+ + "&Hex-Any($1) < ('\\' [uU] [a-fA-F0-9]*);\r\n"
+ + "&Name-Any($1) < ('{' [^\\}]* '}');"
+ );
+ button = new Button("Set");
+ button.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ String compound = "";
+ try {
+ compound = rulesDialog.getArea().getText();
+ String id = ruleId.getText();
+ setTransliterator(compound, id);
+ } catch (RuntimeException ex) {
+ rulesDialog.getArea().setText(compound + "\n#" + ex.getMessage());
+ }
+ }
+ });
+ rulesDialog.getBottom().add(button);
+ ruleId = new TextField("test1", 20);
+ Label temp = new Label(" Name:");
+ rulesDialog.getBottom().add(temp);
+ rulesDialog.getBottom().add(ruleId);
+
+
+ translitMenu.add(mitem = new MenuItem("From Rules...",
+ new MenuShortcut(KeyEvent.VK_R)));
+ mitem.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ rulesDialog.show();
+ }
+ });
+
+
+ translitMenu.add(mitem = new MenuItem("From File...",
+ new MenuShortcut(KeyEvent.VK_F)));
+ mitem.addActionListener(new FileListener(this, RULE_FILE));
+
+ translitMenu.add(mitem = new MenuItem("Test File..."));
+ mitem.addActionListener(new FileListener(this, TEST_FILE));
+
+ // Flesh out the menu with the installed transliterators
+
+ translitMenu.addSeparator();
+
+ Iterator sources = add(new TreeSet(), Transliterator.getAvailableSources()).iterator();
+ while(sources.hasNext()) {
+ String source = (String) sources.next();
+ Iterator targets = add(new TreeSet(), Transliterator.getAvailableTargets(source)).iterator();
+ Menu targetMenu = new Menu(source);
+ while(targets.hasNext()) {
+ String target = (String) targets.next();
+ Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target));
+ if (variantSet.size() < 2) {
+ mitem = new MenuItem(target);
+ mitem.addActionListener(new TransliterationListener(source + "-" + target));
+ targetMenu.add(mitem);
+ } else {
+ Iterator variants = variantSet.iterator();
+ Menu variantMenu = new Menu(target);
+ while(variants.hasNext()) {
+ String variant = (String) variants.next();
+ String menuName = variant.length() == 0 ? "" : variant;
+ //System.out.println("<" + source + "-" + target + "/" + variant + ">, <" + menuName + ">");
+ mitem = new MenuItem(menuName);
+ mitem.addActionListener(new TransliterationListener(source + "-" + target + "/" + variant));
+ variantMenu.add(mitem);
+ }
+ targetMenu.add(variantMenu);
+ }
+ }
+ translitMenu.add(targetMenu);
+ }
+
+
+ }
+
+ static final int RULE_FILE = 0, TEST_FILE = 1;
+ //
+ static class FileListener implements ActionListener {
+ Demo frame;
+ int choice;
+
+ FileListener(Demo frame, int choice) {
+ this.frame = frame;
+ this.choice = choice;
+ }
+
+ public void actionPerformed(ActionEvent e) {
+ String id = frame.translit.getID();
+ int slashPos = id.indexOf('/');
+ String variant = "";
+ if (slashPos >= 0) {
+ variant = "_" + id.substring(slashPos+1);
+ id = id.substring(0, slashPos);
+ }
+
+ FileDialog fileDialog = new FileDialog(frame, "Input File");
+ fileDialog.setFile("Test_" + id + ".txt");
+ fileDialog.show();
+ String fileName = fileDialog.getFile();
+ String fileDirectory = fileDialog.getDirectory();
+ if (fileName != null) {
+ try {
+ File f = new File(fileDirectory, fileName);
+ if (choice == RULE_FILE) {
+
+ // read stuff into buffer
+
+ StringBuffer buffer = new StringBuffer();
+ FileInputStream fis = new FileInputStream(f);
+ InputStreamReader isr = new InputStreamReader(fis, "UTF8");
+ BufferedReader br = new BufferedReader(isr, 32*1024);
+ while (true) {
+ String line = br.readLine();
+ if (line == null) break;
+ if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1); // strip BOM
+ buffer.append('\n');
+ buffer.append(line);
+ }
+ br.close();
+
+ // Transform file name into id
+ if (fileName.startsWith("Transliterator_")) {
+ fileName = fileName.substring("Transliterator_".length());
+ }
+ int pos = fileName.indexOf('_');
+ if (pos < 0) {
+ id = fileName;
+ } else {
+ id = fileName.substring(0, pos) + "-";
+ int pos2 = fileName.indexOf('_', pos+1);
+ if (pos2 < 0) {
+ id += fileName.substring(pos+1);
+ } else {
+ id += fileName.substring(pos+1, pos2) + "/" + fileName.substring(pos2 + 1);
+ }
+ }
+ pos = id.lastIndexOf('.');
+ if (pos >= 0) id = id.substring(0, pos);
+
+ // Now set
+
+ frame.setTransliterator(buffer.toString(), id);
+ } else if (choice == TEST_FILE) {
+ genTestFile(f, frame.translit, variant);
+ }
+ } catch (Exception e2) {
+ e2.printStackTrace();
+ System.out.println("Problem opening/reading: " + fileDirectory + ", " + fileName);
+ }
+ }
+ fileDialog.dispose();
+ }
+ }
+
+
+ boolean transliterateTyping = true;
+ Transliterator fromHex = Transliterator.getInstance("Hex-Any");
+ InfoDialog helpDialog;
+ InfoDialog hexDialog;
+ InfoDialog compoundDialog;
+ InfoDialog rulesDialog;
+ TextField ruleId;
+ MenuItem convertSelectionItem = null;
+ MenuItem swapSelectionItem = null;
+ MenuItem convertTypingItem = null;
+ Menu historyMenu;
+ Map historyMap = new HashMap();
+ Set historySet = new TreeSet(new Comparator() {
+ public int compare(Object a, Object b) {
+ MenuItem aa = (MenuItem)a;
+ MenuItem bb = (MenuItem)b;
+ return aa.getLabel().compareTo(bb.getLabel());
+ }
+ });
+
+ // ADD Factory since otherwise getInverse blows out
+ static class DummyFactory implements Transliterator.Factory {
+ static DummyFactory singleton = new DummyFactory();
+ static HashMap m = new HashMap();
+
+ // Since Transliterators are immutable, we don't have to clone on set & get
+ static void add(String ID, Transliterator t) {
+ m.put(ID, t);
+ System.out.println("Registering: " + ID + ", " + t.toRules(true));
+ Transliterator.registerFactory(ID, singleton);
+ }
+ public Transliterator getInstance(String ID) {
+ return (Transliterator) m.get(ID);
+ }
+ }
+
+ static void printBreaks(int num, String testSource, BreakIterator brkItr) {
+ String result = "";
+ int lastPos = 0;
+ while (true) {
+ int pos = brkItr.next();
+ if (pos == BreakIterator.DONE) break;
+ result += testSource.substring(lastPos, pos) + "&";
+ lastPos = pos;
+ System.out.println(pos);
+ }
+ System.out.println("Test" + num + ": " + result);
+ }
+
+ static void printIteration(int num, String testSource, CharacterIterator ci) {
+ String result = "";
+ while (true) {
+ char ch = ci.next();
+ if (ch == CharacterIterator.DONE) break;
+ result += ch + "(" + ci.getIndex() + ")";
+ }
+ System.out.println("Test" + num + ": " + result);
+ }
+
+ static void printSources() {
+ String[] list = {"Latin-ThaiLogical", "ThaiLogical-Latin", "Thai-ThaiLogical", "ThaiLogical-Thai"};
+ UnicodeSet all = new UnicodeSet();
+ for (int i = 0; i < list.length; ++i) {
+ Transliterator tr = Transliterator.getInstance(list[i]);
+ UnicodeSet src = tr.getSourceSet();
+ System.out.println(list[i] + ": " + src.toPattern(true));
+ all.addAll(src);
+ }
+ System.out.println("All: " + all.toPattern(true));
+ UnicodeSet rem = new UnicodeSet("[[:latin:][:thai:]]");
+ System.out.println("missing from [:latin:][:thai:]: " + all.removeAll(rem).toPattern(true));
+ }
+
+ // 200E;LEFT-TO-RIGHT MARK;Cf;0;L;;;;;N;;;;;
+
+ static Transliterator title = Transliterator.getInstance("title");
+ static String hexAndNameRules = " ([:c:]) > \\u200E &hex/unicode($1) ' ( ) ' &name($1) \\u200E ' ';"
+ + "([:mark:]) > \\u200E &hex/unicode($1) ' ( ' \\u200E \u25CC $1 \\u200E ' ) ' &name($1) \\u200E ' ';"
+ + "(.) > \\u200E &hex/unicode($1) ' ( ' \\u200E $1 \\u200E ' ) ' &name($1) ' ' \\u200E;";
+
+ static Transliterator hexAndName = Transliterator.createFromRules("any-hexAndName",
+ hexAndNameRules, Transliterator.FORWARD);
+
+
+
+ //static Transliterator upper = Transliterator.getInstance("upper");
+
+ static final byte NONE = 0, TITLEWORD = 1, TITLELINE = 2;
+
+ static void genTestFile(File sourceFile, Transliterator translit, String variant) {
+ try {
+
+ System.out.println("Reading: " + sourceFile.getCanonicalPath());
+ BufferedReader in = new BufferedReader(
+ new InputStreamReader(
+ new FileInputStream(sourceFile), "UTF-8"));
+ String targetFile = sourceFile.getCanonicalPath();
+ int dotPos = targetFile.lastIndexOf('.');
+ if (dotPos >= 0) targetFile = targetFile.substring(0,dotPos);
+ targetFile += variant;
+
+ File outFile = new File(targetFile + ".html");
+ System.out.println("Writing: " + outFile.getCanonicalPath());
+
+ PrintWriter out = new PrintWriter(
+ new BufferedWriter(
+ new OutputStreamWriter(
+ new FileOutputStream(outFile), "UTF-8")));
+
+ String direction = "";
+ String id = translit.getID();
+ if (id.indexOf("Arabic") >= 0 || id.indexOf("Hebrew") >= 0) {
+ direction = " direction: rtl;";
+ }
+ boolean testRoundTrip = true;
+ boolean generateSets = true;
+ if (id.startsWith("Han-") || id.startsWith("ja-")) {
+ testRoundTrip = false;
+ generateSets = false;
+ }
+ out.println(" ");
+ out.println("");
+ out.println("" + id + " Transliteration Check ");
+ out.println("See Test_Instructions.html for details.
");
+ out.println("");
+
+ //out.println("Thai Latin Thai ");
+
+ Transliterator tl = translit;
+ Transliterator lt = tl.getInverse();
+
+ Transliterator ltFilter = tl.getInverse();
+ ltFilter.setFilter(new UnicodeSet("[:^Lu:]"));
+ Transliterator tlFilter = lt.getInverse();
+ tlFilter.setFilter(new UnicodeSet("[:^Lu:]"));
+
+ //Transliterator.getInstance("[:^Lu:]" + lt.getID());
+
+ BreakIterator sentenceBreak = BreakIterator.getSentenceInstance();
+
+ byte titleSetting = TITLELINE;
+ //boolean upperfilter = false;
+ boolean first = true;
+ while (true) {
+ String line = in.readLine();
+ if (line == null) break;
+ line = line.trim();
+ if (line.length() == 0) continue;
+ if (line.charAt(0) == '\uFEFF') line = line.substring(1); // remove BOM
+
+ if (line.charAt(0) == '#') continue; // comments
+
+ if (line.equals("@TITLECASE@")) {
+ titleSetting = TITLEWORD;
+ out.println("Names ");
+ continue;
+ } else if (line.equals("@UPPERFILTER@")) {
+ //upperfilter = true;
+ continue;
+ } else if (line.startsWith("@SET")) {
+ UnicodeSet s = new UnicodeSet(line.substring(4).trim());
+ out.println("Characters ");
+ UnicodeSetIterator it = new UnicodeSetIterator(s);
+ while (it.next()) {
+ addSentenceToTable(out, it.codepoint != UnicodeSetIterator.IS_STRING
+ ? UTF16.valueOf(it.codepoint)
+ : it.string,
+ NONE, true, testRoundTrip, first, tl, lt);
+ }
+ continue;
+ }
+
+ sentenceBreak.setText(line);
+ int start = 0;
+ while (true) {
+ int end = sentenceBreak.next();
+ if (end == BreakIterator.DONE) break;
+ String coreSentence = line.substring(start, end);
+ //System.out.println("Core: " + hex.transliterate(coreSentence));
+ end = start;
+
+ int oldPos = 0;
+ while (oldPos < coreSentence.length()) {
+ // hack, because sentence doesn't seem to be working right
+ int pos = coreSentence.indexOf(". ", oldPos);
+ if (pos < 0) pos = coreSentence.length(); else pos = pos+2;
+ int pos2 = coreSentence.indexOf('\u3002', oldPos);
+ if (pos2 < 0) pos2 = coreSentence.length(); else pos2 = pos2 + 1;
+ if (pos > pos2) pos = pos2;
+ String sentence = coreSentence.substring(oldPos, pos).trim();
+ //System.out.println("Sentence: " + hex.transliterate(coreSentence));
+ oldPos = pos;
+
+ addSentenceToTable(out, sentence,
+ titleSetting, false, testRoundTrip, first, tl, lt);
+
+ first = false;
+ }
+ }
+ }
+ out.println("
");
+ out.close();
+
+ // Now write the source/target sets
+ if (generateSets) {
+ outFile = new File(targetFile + "_Sets.html");
+ System.out.println("Writing: " + outFile.getCanonicalPath());
+
+ out = new PrintWriter(
+ new BufferedWriter(
+ new OutputStreamWriter(
+ new FileOutputStream(outFile), "UTF-8")));
+ out.println(" ");
+ out.println("");
+ out.println("" + id + " Transliteration Sets ");
+ out.println("");
+
+ int dashPos = id.indexOf('-');
+ int slashPos = id.indexOf('/');
+ if (slashPos < 0) slashPos = id.length();
+ UnicodeSet sourceSuper = null;
+ try {
+ String temp = id.substring(0,dashPos);
+ if (temp.equals("ja")) sourceSuper = new UnicodeSet("[[:Han:][:hiragana:][:katakana:]]");
+ else sourceSuper = new UnicodeSet("[[:" + temp + ":][:Mn:][:Me:]]");
+ } catch (Exception e) {}
+
+ UnicodeSet targetSuper = null;
+ try {
+ targetSuper = new UnicodeSet("[[:" + id.substring(dashPos+1, slashPos) + ":][:Mn:][:Me:]]");
+ } catch (Exception e) {}
+
+ int nfdStyle = CLOSE_CASE | CLOSE_FLATTEN | CLOSE_CANONICAL;
+ int nfkdStyle = nfdStyle | CLOSE_COMPATIBILITY;
+ out.println("");
+ out.println("None
");
+ showSets(out, translit, lt, null, null, 0);
+ out.println("NFD
");
+ showSets(out, translit, lt, sourceSuper, targetSuper, nfdStyle);
+ out.println("NFKD
");
+ showSets(out, translit, lt, sourceSuper, targetSuper, nfkdStyle);
+ out.println(" ");
+ out.close();
+ }
+ System.out.println("Done Writing");
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ static void addSentenceToTable(PrintWriter out, String sentence,
+ byte titleSetting, boolean addName, boolean testRoundTrip, boolean first,
+ Transliterator tl, Transliterator lt) {
+ if (sentence.length() == 0) return; // skip empty lines
+
+ String originalShow = sentence;
+ String latin;
+ latin = tl.transliterate(saveAscii.transliterate(sentence));
+
+ String latinShow = latin;
+ if (titleSetting == TITLEWORD) {
+ latinShow = title.transliterate(latin);
+ } else if (titleSetting == TITLELINE) {
+ latinShow = titlecaseFirstWord(latinShow);
+ }
+ latinShow = restoreAscii.transliterate(latinShow);
+
+ String reverse;
+ reverse = restoreAscii.transliterate(lt.transliterate(latin));
+
+ String NFKDSentence = Normalizer.normalize(sentence, Normalizer.NFKD);
+ String NFKDLatin = Normalizer.normalize(latin, Normalizer.NFKD);
+ String NFKDReverse = Normalizer.normalize(reverse, Normalizer.NFKD);
+
+ if (latinShow.length() == 0) {
+ latinShow = "empty ";
+ } else if (NFKDSentence.equals(NFKDLatin)) {
+ latinShow = "" + latinShow + " ";
+ }
+ String reverseShow = reverse;
+
+ if (testRoundTrip && !NFKDReverse.equals(NFKDSentence)) {
+ int minLen = reverse.length();
+ if (minLen > sentence.length()) minLen = sentence.length();
+ int i;
+ for (i = 0; i < minLen; ++i) {
+ if (reverse.charAt(i) != sentence.charAt(i)) break;
+ }
+ //originalShow = sentence.substring(0,i) + "" + sentence.substring(i) + " ";
+ reverseShow = reverseShow.length() == 0
+ ? "empty "
+ //: reverse.substring(0,i) + "" + reverse.substring(i) + " ";
+ : showDifference(sentence, reverse);
+ out.println("" : ">") + originalShow
+ + " " + latinShow
+ + " " + reverseShow
+ + " ");
+ } else {
+ out.println("" : ">") + originalShow
+ + " " + latinShow
+ + " ");
+ }
+ if (addName) {
+ latinShow = hexAndName.transliterate(latin);
+ if (latinShow.length() == 0) latinShow = "empty ";
+ originalShow = hexAndName.transliterate(sentence);
+ if (originalShow.length() == 0) originalShow = "empty ";
+
+ out.println("" + originalShow
+ + " " + latinShow
+ + " ");
+ }
+ out.println(" ");
+
+ }
+
+ static String showDifference(String as, String bs) {
+ Differ differ = new Differ(300, 3);
+ StringBuffer out = new StringBuffer();
+ int max = as.length();
+ if (max < bs.length()) max = bs.length();
+ for (int j = 0; j <= max; ++j) {
+ if (j < as.length()) differ.addA(as.substring(j, j+1));
+ if (j < bs.length()) differ.addB(bs.substring(j, j+1));
+ differ.checkMatch(j == max);
+
+ if (differ.getACount() != 0 || differ.getBCount() != 0) {
+ out.append("...");
+ if (differ.getACount() != 0) {
+ out.append("");
+ for (int i = 0; i < differ.getACount(); ++i) {
+ out.append(differ.getA(i));
+ }
+ out.append(" ");
+ }
+ if (differ.getBCount() != 0) {
+ out.append("");
+ for (int i = 0; i < differ.getBCount(); ++i) {
+ out.append(differ.getB(i));
+ }
+ out.append(" ");
+ }
+ out.append("...");
+ }
+ }
+ return out.toString();
+ }
+
+ static void showSets(PrintWriter out, Transliterator translit, Transliterator inverse,
+ UnicodeSet sourceSuper, UnicodeSet targetSuper, int options) {
+ out.println("Source Set:" + toPattern(closeUnicodeSet(translit.getSourceSet(), options), sourceSuper) + " ");
+ out.println("Reverse Target Set:" + toPattern(closeUnicodeSet(inverse.getTargetSet(), options), sourceSuper) + " ");
+ out.println("Target Set:" + toPattern(closeUnicodeSet(translit.getTargetSet(), options), targetSuper) + " ");
+ out.println("Reverse Source Set:" + toPattern(closeUnicodeSet(inverse.getSourceSet(), options), targetSuper) + " ");
+ }
+
+ static final int CLOSE_CASE = 1, CLOSE_FLATTEN = 2, CLOSE_CANONICAL = 4, CLOSE_COMPATIBILITY = 8;
+
+ static UnicodeSet closeUnicodeSet(UnicodeSet source, int options) {
+ if (options == 0) return source;
+
+ UnicodeSetIterator it = new UnicodeSetIterator(source);
+ UnicodeSet additions = new UnicodeSet(); // to avoid messing up iterator
+ UnicodeSet removals = new UnicodeSet(); // to avoid messing up iterator
+ String base;
+ int cp;
+
+ // Add all case equivalents
+ if ((options & CLOSE_CASE) != 0) {
+ while (it.next()) {
+ cp = it.codepoint;
+ if (cp == UnicodeSetIterator.IS_STRING) continue;
+ int type = UCharacter.getType(cp);
+ if (type == Character.UPPERCASE_LETTER || type == Character.LOWERCASE_LETTER || type == Character.TITLECASE_LETTER) {
+ additions.add(UCharacter.toLowerCase(UTF16.valueOf(cp)));
+ additions.add(UCharacter.toUpperCase(UTF16.valueOf(cp)));
+ }
+ }
+ source.addAll(additions);
+ }
+
+ // Add the canonical closure of all strings and characters in source
+ if ((options & CLOSE_CANONICAL) != 0) {
+ it.reset();
+ additions.clear();
+ CanonicalIterator ci = new CanonicalIterator(".");
+ while (it.next()) {
+ if (it.codepoint == UnicodeSetIterator.IS_STRING) base = it.string;
+ else base = UTF16.valueOf(it.codepoint);
+ ci.setSource(base);
+ while (true) {
+ String trial = ci.next();
+ if (trial == null) break;
+ if (trial.equals(base)) continue;
+ additions.add(trial);
+ }
+ }
+ source.addAll(additions);
+ }
+
+ // flatten strings
+ if ((options & CLOSE_FLATTEN) != 0) {
+ it.reset();
+ additions.clear();
+ while (it.next()) {
+ if (it.codepoint != UnicodeSetIterator.IS_STRING) continue;
+ additions.addAll(it.string);
+ removals.add(it.string);
+ //System.out.println("flattening '" + hex.transliterate(it.string) + "'");
+ }
+ source.addAll(additions);
+ source.removeAll(removals);
+ }
+
+ // Now add decompositions of characters in source
+ if ((options & CLOSE_COMPATIBILITY) != 0) {
+ it.reset(source);
+ additions.clear();
+ while (it.next()) {
+ if (it.codepoint == UnicodeSetIterator.IS_STRING) base = it.string;
+ else base = UTF16.valueOf(it.codepoint);
+ if (Normalizer.isNormalized(base, Normalizer.NFKD,0)) continue;
+ String decomp = Normalizer.normalize(base, Normalizer.NFKD);
+ additions.add(decomp);
+ }
+ source.addAll(additions);
+
+ // Now add any other character that decomposes to a character in source
+ for (cp = 0; cp < 0x10FFFF; ++cp) {
+ if (!UCharacter.isDefined(cp)) continue;
+ if (Normalizer.isNormalized(cp, Normalizer.NFKD,0)) continue;
+ if (source.contains(cp)) continue;
+
+ String decomp = Normalizer.normalize(cp, Normalizer.NFKD);
+ if (source.containsAll(decomp)) {
+ // System.out.println("Adding: " + Integer.toString(cp,16) + " " + UCharacter.getName(cp));
+ source.add(cp);
+ }
+ }
+ }
+
+ return source;
+ }
+
+ static String toPattern(UnicodeSet source, UnicodeSet superset) {
+ if (superset != null) {
+ source.removeAll(superset);
+ return "[" + superset.toPattern(true) + " " + source.toPattern(true) + "]";
+ }
+ return source.toPattern(true);
+ }
+
+ static BreakIterator bi = BreakIterator.getWordInstance();
+
+ static String titlecaseFirstWord(String line) {
+ // search for first word with letters. If the first letter is lower, then titlecase it.
+ bi.setText(line);
+ int start = 0;
+ while (true) {
+ int end = bi.next();
+ if (end == BreakIterator.DONE) break;
+ int firstLetterType = getFirstLetterType(line, start, end);
+ if (firstLetterType != Character.UNASSIGNED) {
+ if (firstLetterType != Character.LOWERCASE_LETTER) break;
+ line = line.substring(0, start)
+ + UCharacter.toTitleCase(line.substring(start, end), bi)
+ + line.substring(end);
+ break;
+ }
+ end = start;
+ }
+ return line;
+ }
+
+ static final int LETTER_MASK =
+ (1< XXX # " + UCharacter.getName(it.codepoint));
+ main.add(it.codepoint);
+ }
+
+ if (others.size() != 0) {
+ out.println("Decomposed characters found above: ");
+ others.removeAll(main);
+ it.reset(others);
+ while (it.next()) {
+ out.println(" " + UTF16.valueOf(it.codepoint) + " <> XXX # " + UCharacter.getName(it.codepoint));
+ }
+ }
+
+ out.close();
+ System.out.println("Done Writing");
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ static Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex");
+ static final String saveRules =
+ "A <> \uEA41; B <> \uEA42; C <> \uEA43; D <> \uEA44; E <> \uEA45; F <> \uEA46; G <> \uEA47; H <> \uEA48; I <> \uEA49; "
+ + "J <> \uEA4A; K <> \uEA4B; L <> \uEA4C; M <> \uEA4D; N <> \uEA4E; O <> \uEA4F; P <> \uEA50; Q <> \uEA51; R <> \uEA52; "
+ + "S <> \uEA53; T <> \uEA54; U <> \uEA55; V <> \uEA56; W <> \uEA57; X <> \uEA58; Y <> \uEA59; Z <> \uEA5A; "
+ + "a <> \uEA61; b <> \uEA62; c <> \uEA63; d <> \uEA64; e <> \uEA65; f <> \uEA66; g <> \uEA67; h <> \uEA68; i <> \uEA69; "
+ + "j <> \uEA6A; k <> \uEA6B; l <> \uEA6C; m <> \uEA6D; n <> \uEA6E; o <> \uEA6F; p <> \uEA70; q <> \uEA71; r <> \uEA72; "
+ + "s <> \uEA73; t <> \uEA74; u <> \uEA75; v <> \uEA76; w <> \uEA77; x <> \uEA78; y <> \uEA79; z <> \uEA7A;";
+
+ static Transliterator saveAscii = Transliterator.createFromRules("ascii-saved", saveRules, Transliterator.FORWARD);
+ static Transliterator restoreAscii = Transliterator.createFromRules("ascii-saved", saveRules, Transliterator.REVERSE);
+
+ static {
+
+ if (false) {
+
+ for (char i = 'A'; i <= 'z'; ++i) {
+ System.out.print(i + " <> " + hex.transliterate(String.valueOf((char)(0xEA00 + i))) + "; ");
+ }
+
+ UnicodeSet x = new UnicodeSet("[[:^ccc=0:]&[:^ccc=230:]]");
+ x = x.complement();
+ x = x.complement();
+ System.out.println("Test: " + x.toPattern(true));
+
+ Transliterator y = Transliterator.createFromRules("xxx", "$notAbove = [[:^ccc=0:]&[:^ccc=230:]]; u ($notAbove*) \u0308 > XXX | $1; ", Transliterator.FORWARD);
+
+ String[] testList = {"u\u0308", "u\u0316\u0308", "u\u0308\u0316", "u\u0301\u0308", "u\u0308\u0301"};
+ for (int i = 0; i < testList.length; ++i) {
+ String yy = y.transliterate(testList[i]);
+ System.out.println(hex.transliterate(testList[i]) + " => " + hex.transliterate(yy));
+ }
+
+ //printNames(new UnicodeSet("[\u0600-\u06FF]"), "Arabic-Latin.txt");
+
+
+ /*
+ BreakTransliterator.register();
+
+ BreakTransliterator testTrans = new BreakTransliterator("Any-XXX", null, null, "$");
+ String testSource = "The Quick: Brown fox--jumped.";
+ BreakIterator bi = testTrans.getBreakIterator();
+ bi.setText(new StringCharacterIterator(testSource));
+ printBreaks(0, testSource, bi);
+ //bi.setText(UCharacterIterator.getInstance(testSource));
+ //printBreaks(1, testSource, bi);
+
+ printIteration(2, testSource, new StringCharacterIterator(testSource));
+ //printIteration(3, testSource, UCharacterIterator.getInstance(testSource));
+
+
+
+ String test = testTrans.transliterate(testSource);
+ System.out.println("Test3: " + test);
+ DummyFactory.add(testTrans.getID(), testTrans);
+ */
+
+ // AnyTransliterator.ScriptRunIterator.registerAnyToScript();
+
+ AnyTransliterator at = new AnyTransliterator("Greek", null);
+ at.transliterate("(cat,\u03b1,\u0915)");
+ DummyFactory.add(at.getID(), at);
+
+ at = new AnyTransliterator("Devanagari", null);
+ at.transliterate("(cat,\u03b1,\u0915)");
+ DummyFactory.add(at.getID(), at);
+
+ at = new AnyTransliterator("Latin", null);
+ at.transliterate("(cat,\u03b1,\u0915)");
+ DummyFactory.add(at.getID(), at);
+
+ DummyFactory.add("Any-gif", Transliterator.createFromRules("gif", "'\\'u(..)(..) > ' ';", Transliterator.FORWARD));
+ DummyFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
+
+ DummyFactory.add("Any-RemoveCurly", Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ;", Transliterator.FORWARD));
+ DummyFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
+
+ System.out.println("Trying &hex");
+ Transliterator t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
+ System.out.println("Registering");
+ DummyFactory.add("Any-hex2", t);
+
+ System.out.println("Trying &gif");
+ t = Transliterator.createFromRules("gif2", "(.) > &any-gif($1);", Transliterator.FORWARD);
+ System.out.println("Registering");
+ DummyFactory.add("Any-gif2", t);
+ }
+ }
+
+
+ void setTransliterator(String name, String id) {
+ if (DEBUG) System.out.println("Got: " + name);
+ if (id == null) {
+ translit = Transliterator.getInstance(name);
+ } else {
+ String reverseId = "";
+ int pos = id.indexOf('-');
+ if (pos < 0) {
+ reverseId = id + "-Any";
+ id = "Any-" + id;
+ } else {
+ int pos2 = id.indexOf("/", pos);
+ if (pos2 < 0) {
+ reverseId = id.substring(pos+1) + "-" + id.substring(0,pos);
+ } else {
+ reverseId = id.substring(pos+1, pos2) + "-" + id.substring(0,pos) + id.substring(pos2);
+ }
+ }
+
+
+ translit = Transliterator.createFromRules(id, name, Transliterator.FORWARD);
+ if (DEBUG) {
+ System.out.println("***Forward Rules");
+ System.out.println(translit.toRules(true));
+ System.out.println("***Source Set");
+ System.out.println(translit.getSourceSet().toPattern(true));
+ }
+ System.out.println("***Target Set");
+ UnicodeSet target = translit.getTargetSet();
+ System.out.println(target.toPattern(true));
+ UnicodeSet rest = new UnicodeSet("[a-z]").removeAll(target);
+ System.out.println("***ASCII - Target Set");
+ System.out.println(rest.toPattern(true));
+
+ DummyFactory.add(id, translit);
+
+ Transliterator translit2 = Transliterator.createFromRules(reverseId, name, Transliterator.REVERSE);
+ if (DEBUG) {
+ System.out.println("***Backward Rules");
+ System.out.println(translit2.toRules(true));
+ }
+ DummyFactory.add(reverseId, translit2);
+
+ Transliterator rev = translit.getInverse();
+ if (DEBUG) System.out.println("***Inverse Rules");
+ if (DEBUG) System.out.println(rev.toRules(true));
+
+ }
+ text.flush();
+ text.setTransliterator(translit);
+ convertSelectionItem.setLabel(Transliterator.getDisplayName(translit.getID()));
+
+ addHistory(translit);
+
+ Transliterator inv;
+ try {
+ inv = translit.getInverse();
+ } catch (Exception ex) {
+ inv = null;
+ }
+ if (inv != null) {
+ addHistory(inv);
+ swapSelectionItem.setEnabled(true);
+ } else {
+ swapSelectionItem.setEnabled(false);
+ }
+ System.out.println("Set transliterator: " + translit.getID()
+ + (inv != null ? " and " + inv.getID() : ""));
+ }
+
+ void addHistory(Transliterator trans) {
+ String name = trans.getID();
+ MenuItem cmi = (MenuItem) historyMap.get(name);
+ if (cmi == null) {
+ cmi = new MenuItem(Transliterator.getDisplayName(name));
+ cmi.addActionListener(new TransliterationListener(name));
+ historyMap.put(name, cmi);
+ historySet.add(cmi);
+ historyMenu.removeAll();
+ Iterator it = historySet.iterator();
+ while (it.hasNext()) {
+ historyMenu.add((MenuItem)it.next());
+ }
+ }
+ }
+
+ class TransliterationListener implements ActionListener, ItemListener {
+ String name;
+ public TransliterationListener(String name) {
+ this.name = name;
+ }
+ public void actionPerformed(ActionEvent e) {
+ setTransliterator(name, null);
+ }
+ public void itemStateChanged(ItemEvent e) {
+ if (e.getStateChange() == ItemEvent.SELECTED) {
+ setTransliterator(name, null);
+ } else {
+ setTransliterator("Any-Null", null);
+ }
+ }
+ }
+
+ class FontActionListener implements ActionListener {
+ String name;
+ public FontActionListener(String name) {
+ this.name = name;
+ }
+ public void actionPerformed(ActionEvent e) {
+ if (DEBUG) System.out.println("Font: " + name);
+ fontName = name;
+ text.setFont(new Font(fontName, Font.PLAIN, fontSize));
+ }
+ }
+
+ class SizeActionListener implements ActionListener {
+ int size;
+ public SizeActionListener(int size) {
+ this.size = size;
+ }
+ public void actionPerformed(ActionEvent e) {
+ if (DEBUG) System.out.println("Size: " + size);
+ fontSize = size;
+ text.setFont(new Font(fontName, Font.PLAIN, fontSize));
+ }
+ }
+
+ Set add(Set s, Enumeration enumeration) {
+ while(enumeration.hasMoreElements()) {
+ s.add(enumeration.nextElement());
+ }
+ return s;
+ }
+
+ /**
+ * Get a sorted list of the system transliterators.
+ */
+ /*
+ private static Vector getSystemTransliteratorNames() {
+ Vector v = new Vector();
+ for (Enumeration e=Transliterator.getAvailableIDs();
+ e.hasMoreElements(); ) {
+ v.addElement(e.nextElement());
+ }
+ // Insertion sort, O(n^2) acceptable for small n
+ for (int i=0; i<(v.size()-1); ++i) {
+ String a = (String) v.elementAt(i);
+ for (int j=i+1; j 0) {
+ v.setElementAt(b, i);
+ v.setElementAt(a, j);
+ a = b;
+ }
+ }
+ }
+ return v;
+ }
+ */
+
+/*
+ private void setNoTransliterator() {
+ translitItem = noTranslitItem;
+ noTranslitItem.setState(true);
+ handleSetTransliterator(noTranslitItem.getLabel());
+ compound = false;
+ for (int i=0; i.
+ */
+ /*
+ private static Transliterator decodeTranslitItem(String name) {
+ return (name.equals(NO_TRANSLITERATOR))
+ ? null : Transliterator.getInstance(name);
+ }
+ */
+
+ private void handleBatchTransliterate(Transliterator trans) {
+ if (trans == null) {
+ return;
+ }
+
+ int start = text.getSelectionStart();
+ int end = text.getSelectionEnd();
+ ReplaceableString s =
+ new ReplaceableString(text.getText().substring(start, end));
+
+ StringBuffer log = null;
+ if (DEBUG) {
+ log = new StringBuffer();
+ log.append('"' + s.toString() + "\" (start " + start +
+ ", end " + end + ") -> \"");
+ }
+
+ trans.transliterate(s);
+ String str = s.toString();
+
+ if (DEBUG) {
+ log.append(str + "\"");
+ System.out.println("Batch " + trans.getID() + ": " + log.toString());
+ }
+
+ text.replaceRange(str, start, end);
+ text.select(start, start + str.length());
+ }
+
+ private void handleClose() {
+ helpDialog.dispose();
+ dispose();
+ }
+
+ /*
+ class InfoDialog extends Dialog {
+ protected Button button;
+ protected TextArea area;
+ protected Dialog me;
+ protected Panel bottom;
+
+ public TextArea getArea() {
+ return area;
+ }
+
+ public Panel getBottom() {
+ return bottom;
+ }
+
+ InfoDialog(Frame parent, String title, String label, String message) {
+ super(parent, title, false);
+ me = this;
+ this.setLayout(new BorderLayout());
+ if (label.length() != 0) {
+ this.add("North", new Label(label));
+ }
+
+ area = new TextArea(message, 8, 80, TextArea.SCROLLBARS_VERTICAL_ONLY);
+ this.add("Center", area);
+
+ button = new Button("Hide");
+ button.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ me.hide();
+ }
+ });
+ bottom = new Panel();
+ bottom.setLayout(new FlowLayout(FlowLayout.CENTER, 0, 0));
+ bottom.add(button);
+ this.add("South", bottom);
+ this.pack();
+ addWindowListener(new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ me.hide();
+ }
+ });
+ }
+ }
+ */
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/DemoApplet.java b/demos/src/com/ibm/icu/dev/demo/translit/DemoApplet.java
new file mode 100644
index 00000000000..99820e0611d
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/DemoApplet.java
@@ -0,0 +1,73 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.translit;
+import java.applet.Applet;
+import java.awt.Button;
+import java.awt.Dimension;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.WindowAdapter;
+import java.awt.event.WindowEvent;
+
+import com.ibm.icu.dev.demo.impl.AppletFrame;
+
+/**
+ * A simple Applet that shows a button. When pressed, the button
+ * shows the DemoAppletFrame. This Applet is meant to be embedded
+ * in a web page.
+ *
+ * Copyright (c) IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ */
+public class DemoApplet extends Applet {
+
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = 8214879807740061678L;
+ Demo frame = null;
+
+ public static void main(String args[]) {
+ final DemoApplet applet = new DemoApplet();
+ new AppletFrame("Transliteration Demo", applet, 640, 480);
+ }
+
+ public void init() {
+
+ Button button = new Button("Transliteration Demo");
+ button.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ if (frame == null) {
+ frame = new Demo(600, 200);
+ frame.addWindowListener(new WindowAdapter() {
+ public void windowClosing(WindowEvent we) {
+ frame = null;
+ }
+ });
+ }
+ frame.setVisible(true);
+ frame.toFront();
+ }
+ });
+
+ add(button);
+
+ Dimension size = button.getPreferredSize();
+ size.width += 10;
+ size.height += 10;
+
+ resize(size);
+ }
+
+ public void stop() {
+ if (frame != null) {
+ frame.dispose();
+ }
+ frame = null;
+ }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/InfoDialog.java b/demos/src/com/ibm/icu/dev/demo/translit/InfoDialog.java
new file mode 100644
index 00000000000..4ea16e3b9a4
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/InfoDialog.java
@@ -0,0 +1,66 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.translit;
+import java.awt.BorderLayout;
+import java.awt.Button;
+import java.awt.Dialog;
+import java.awt.FlowLayout;
+import java.awt.Frame;
+import java.awt.Label;
+import java.awt.Panel;
+import java.awt.TextArea;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.WindowAdapter;
+import java.awt.event.WindowEvent;
+public class InfoDialog extends Dialog {
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = -3086665546137919018L;
+ protected Button button;
+ protected TextArea area;
+ protected Dialog me;
+ protected Panel bottom;
+
+ public TextArea getArea() {
+ return area;
+ }
+
+ public Panel getBottom() {
+ return bottom;
+ }
+
+ InfoDialog(Frame parent, String title, String label, String message) {
+ super(parent, title, false);
+ me = this;
+ this.setLayout(new BorderLayout());
+ if (label.length() != 0) {
+ this.add("North", new Label(label));
+ }
+
+ area = new TextArea(message, 8, 80, TextArea.SCROLLBARS_VERTICAL_ONLY);
+ this.add("Center", area);
+
+ button = new Button("Hide");
+ button.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ me.hide();
+ }
+ });
+ bottom = new Panel();
+ bottom.setLayout(new FlowLayout(FlowLayout.CENTER, 0, 0));
+ bottom.add(button);
+ this.add("South", bottom);
+ this.pack();
+ addWindowListener(new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ me.hide();
+ }
+ });
+ }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Arabic-Latin.txt b/demos/src/com/ibm/icu/dev/demo/translit/Test_Arabic-Latin.txt
new file mode 100644
index 00000000000..146c659ac3c
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Arabic-Latin.txt
@@ -0,0 +1,24 @@
+#--------------------------------------------------------------------
+# Copyright (c) 1999-2004, International Business Machines
+# Corporation and others. All Rights Reserved.
+#--------------------------------------------------------------------
+@UPPERFILTER@
+ما هي الشفرة الموحدة "يونِكود" ؟
+
+أساسًا، تتعامل الحواسيب فقط مع الأرقام، وتقوم بتخزين الأحرف والمحارف الأخرى بعد أن تُعطي رقما معينا لكل واحد منها. وقبل اختراع "يونِكود"، كان هناك مئات الأنظمة للتشفير وتخصيص هذه الأرقام للمحارف، ولم يوجد نظام تشفير واحد يحتوي على جميع المحارف الضرورية. وعلى سبيل المثال، فإن الاتحاد الأوروبي لوحده، احتوى العديد من الشفرات المختلفة ليغطي جميع اللغات المستخدمة في الاتحاد. وحتى لو اعتبرنا لغة واحدة، كاللغة الإنجليزية، فإن جدول شفرة واحد لم يكف لاستيعاب جميع الأحرف وعلامات الترقيم والرموز الفنية والعلمية الشائعة الاستعمال.
+
+
+
+وتجدر الملاحظة أن أنظمة التشفير المختلفة تتعارض مع بعضها البعض. وبعبارة أخرى، يمكن أن يستخدِم جدولي شفرة نفس الرقم لتمثيل محرفين مختلفين، أو رقمين مختلفين لتمثيل نفس المحرف. ولو أخذنا أي جهاز حاسوب، وبخاصة جهاز النادل (server)، فيجب أن تكون لديه القدرة على التعامل مع عدد كبير من الشفرات المختلفة، ويتم تصميمه على هذا الأساس. ومع ذلك، فعندما تمر البيانات عبر أنظمة مختلفة، توجد هناك خطورة لضياع أو تحريف بعض هذه البيانات.
+
+
+
+"يونِكود" تغير هذا كليا !
+
+تخصص الشفرة الموحدة "يونِكود" رقما وحيدا لكل محرف في جميع اللغات العالمية، وذلك بغض النظر عن نوع الحاسوب أو البرامج المستخدمة. وقد تم تبني مواصفة "يونِكود" من قبل قادة الصانعين لأنظمة الحواسيب في العالم، مثل شركات آي.بي.إم. (IBM)، أبل (APPLE)، هِيْولِت باكرد (Hewlett-Packard) ، مايكروسوفت (Microsoft)، أوراكِل (Oracle) ، صن (Sun) وغيرها. كما أن المواصفات والمقاييس الحديثة (مثل لغة البرمجة "جافا" "JAVA" ولغة "إكس إم إل" "XML" التي تستخدم لبرمجة الانترنيت) تتطلب استخدام "يونِكود". علاوة على ذلك ، فإن "يونِكود" هي الطريقة الرسمية لتطبيق المقياس العالمي إيزو ١٠٦٤٦ (ISO 10646) .
+
+
+
+إن بزوغ مواصفة "يونِكود" وتوفُّر الأنظمة التي تستخدمه وتدعمه، يعتبر من أهم الاختراعات الحديثة في عولمة البرمجيات لجميع اللغات في العالم. وإن استخدام "يونِكود" في عالم الانترنيت سيؤدي إلى توفير كبير مقارنة مع استخدام المجموعات التقليدية للمحارف المشفرة. كما أن استخدام "يونِكود" سيُمكِّن المبرمج من كتابة البرنامج مرة واحدة، واستخدامه على أي نوع من الأجهزة أو الأنظمة، ولأي لغة أو دولة في العالم أينما كانت، دون الحاجة لإعادة البرمجة أو إجراء أي تعديل. وأخيرا، فإن استخدام "يونِكود" سيمكن البيانات من الانتقال عبر الأنظمة والأجهزة المختلفة دون أي خطورة لتحريفها، مهما تعددت الشركات الصانعة للأنظمة واللغات، والدول التي تمر من خلالها هذه البيانات.
+
+@SET [[[:Arabic:] & [\u0600-\u06FF]] [\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9]]
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Greek-Latin.txt b/demos/src/com/ibm/icu/dev/demo/translit/Test_Greek-Latin.txt
new file mode 100644
index 00000000000..63800742530
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Greek-Latin.txt
@@ -0,0 +1,73 @@
+#--------------------------------------------------------------------
+# Copyright (c) 1999-2004, International Business Machines
+# Corporation and others. All Rights Reserved.
+#--------------------------------------------------------------------
+
+Τι είναι το Unicode?
+
+Η κωδικοσελίδα Unicode προτείνει έναν και μοναδικό αριθμό για κάθε χαρακτήρα, ανεξάρτητα από το λειτουργικό σύστημα, ανεξάρτητα από το λογισμικό, ανεξάρτητα από την γλώσσα.
+
+Οι ηλεκτρονικοί υπολογιστές, σε τελική ανάλυση, χειρίζονται απλώς αριθμούς. Αποθηκεύουν γράμματα και άλλους χαρακτήρες αντιστοιχώντας στο καθένα τους από έναν αριθμό (ονομάζουμε μία τέτοια αντιστοιχία κωδικοσελίδα). Πριν την εφεύρεση του Unicode, υπήρχαν εκατοντάδες διαφορετικές κωδικοσελίδες. Λόγω περιορισμών μεγέθους όμως, σε καμία κωδικοσελίδα δεν χωρούσαν αρκετοί χαρακτήρες: λόγου χάριν, η Ευρωπαϊκή Ένωση χρειαζόταν πλήθος διαφορετικών κωδικοσελίδων για να καλύψει όλες τις γλώσσες των χωρών-μελών της. Ακόμα και για μία και μόνη γλώσσα, όπως π.χ. τα Αγγλικά, μία κωδικοσελίδα δεν επαρκούσε για να καλύψει όλα τα γράμματα, σημεία στίξης και τεχνικά σύμβολα ευρείας χρήσης.
+
+Εκτός αυτού, οι κωδικοσελίδες αυτές διαφωνούσαν μεταξύ τους. Έτσι, δύο κωδικοσελίδες μπορούσαν κάλλιστα να χρησιμοποιούν τον ίδιο αριθμό για δύο διαφορετικούς χαρακτήρες, ή να χρησιμοποιούν διαφορετικούς αριθμούς για τον ίδιο χαρακτήρα. Κάθε υπολογιστής (και ιδίως εάν ήταν διακομιστής) έπρεπε να υποστηρίζει πλήθος διαφορετικών κωδικοσελίδων· ταυτόχρονα κάθε φορά που δεδομένα μεταφέρονταν μεταξύ διαφορετικών κωδικοσελίδων ή λειτουργικών συστημάτων, τα δεδομένα αυτά κινδύνευαν να αλλοιωθούν.
+
+Το Unicode αλλάζει αυτή την κατάσταση!
+Το Unicode προτείνει έναν μοναδικό αριθμό για κάθε χαρακτήρα, ανεξάρτητα από το λειτουργικό σύστημα, ανεξάρτητα από το λογισμικό, ανεξάρτητα από την γλώσσα. Την κωδικοσελίδα Unicode έχουν ασπασθεί κορυφαίοι παράγοντες του χώρου των λογισμικών όπως οι: Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, Sun, Sybase, Unisys και πολλοί άλλοι. Το Unicode απαιτούν πολλές σύγχρονες τυποποιήσεις όπως οι: XML, Java, ECMAScript (JavaScript), LDAP, CORBA 3.0, WML, κ.λπ., και είναι η επίσημη μέθοδος εφαρμογής της τυποποίησης ISO/IEC 10646. Υποστηρίζεται από πολλά λειτουργικά συστήματα, όλους τους σύχρονους περιηγητές Διαδικτύου, και πολλά άλλα προϊόντα. Η εμφάνιση της κωδικοσελίδας Unicode, και η διαθεσιμότητα εργαλείων που να την υποστηρίζουν είναι από τις σημαντικότερες εξελίξεις της πρόσφατης τεχνολογίας λογισμικών.
+
+Η ενσωμάτωση του Unicode σε εφαρμογές πελάτη-διακομιστή ή "multi-tiered" προσφέρει σημαντικές οικονομίες σε σχέση με τις ως τώρα υπάρχουσες κωδικοσελίδες. Χάρις στο Unicode ένα και μόνο προϊόν ή μία και μόνη τοποθεσία Διαδικτύου μπορεί να επικοινωνεί με διάφορα λειτουργικά συστήματα, σε διάφορες γλώσσες και χώρες, χωρίς την ανάγκη επαναπρογραμματισμού. Γίνεται έτσι δυνατή η μεταφορά δεδομένων ανάμεσα σε πλήθος διαφορετικών συστημάτων δίχως κίνδυνο αλλοίωσης.
+
+Σχετικά με το Κονσόρτιουμ Unicode
+Tο Κονσόρτιουμ Unicode είναι ένας κοινωφελής οργανισμός· ιδρύθηκε για να αναπτύξει, να επεκτείνει και να μεταδώσει την χρήση της κωδικοσελίδας Unicode που καθορίζει την αναπαράσταση κειμένου σε σύγχρονα λογισμικά προϊόντα και τυποποιήσεις. Μεγάλος αριθμός εταιρειών και οργανισμών της διεθνούς βιομηχανίας υπολογιστών και λογισμικών είναι μέλη του Κονσόρτιουμ Unicode. Το Κονσόρτιουμ χρηματοδοτείται μόνο από τις συνδρομές των μελών του. Μέλος του κονσόρτιουμ Unicode μπορεί να γίνει οιοσδήποτε (οργανισμός, εταιρεία ή ιδιώτης, οπουδήποτε στον κόσμο) που να υποστηρίζει την κωδικοσελίδα Unicode και να επιθυμεί να συνδράμει στην επέκταση και στην εφαρμογή της.
+
+Για περαιτέρω πληροφορίες, βλέπε τις εξής ιστοσελίδες: Γλωσσάρι, Δείγματα προϊόντων συμβατών με το Unicode, Τεχνική Εισαγωγή και Χρήσιμες πηγές πληροφοριών.
+
+(ANCIENT)
+
+ἄνδρα μοι ἔννεπε, μοῦσα, πολύτροπον, ὃς μάλα πολλὰ
+πλάγχθη, ἐπεὶ Τροίης ἱερὸν πτολίεθρον ἔπερσεν:
+πολλῶν δ’ ἀνθρώπων ἴδεν ἄστεα καὶ νόον ἔγνω,
+πολλὰ δ’ ὅ γ’ ἐν πόντῳ πάθεν ἄλγεα ὃν κατὰ θυμόν,
+ἀρνύμενος ἥν τε ψυχὴν καὶ νόστον ἑταίρων.
+ἀλλ’ οὐδ’ ὣς ἑτάρους ἐρρύσατο, ἱέμενός περ:
+αὐτῶν γὰρ σφετέρῃσιν ἀτασθαλίῃσιν ὄλοντο,
+νήπιοι, οἳ κατὰ βοῦς Ὑπερίονος Ἠελίοιο
+ἤσθιον: αὐτὰρ ὁ τοῖσιν ἀφείλετο νόστιμον ἦμαρ.
+τῶν ἁμόθεν γε, θεά, θύγατερ Διός, εἰπὲ καὶ ἡμῖν.
+*
+ἔνθ’ ἄλλοι μὲν πάντες, ὅσοι φύγον αἰπὺν ὄλεθρον,
+οἴκοι ἔσαν, πόλεμόν τε πεφευγότες ἠδὲ θάλασσαν:
+τὸν δ’ οἶον νόστου κεχρημένον ἠδὲ γυναικὸς
+νύμφη πότνι’ ἔρυκε Καλυψὼ δῖα θεάων
+ἐν σπέσσι γλαφυροῖσι, λιλαιομένη πόσιν εἶναι.
+ἀλλ’ ὅτε δὴ ἔτος ἦλθε περιπλομένων ἐνιαυτῶν,
+τῷ οἱ ἐπεκλώσαντο θεοὶ οἶκόνδε νέεσθαι
+εἰς Ἰθάκην, οὐδ’ ἔνθα πεφυγμένος ἦεν ἀέθλων
+καὶ μετὰ οἷσι φίλοισι. θεοὶ δ’ ἐλέαιρον ἅπαντες
+νόσφι Ποσειδάωνος: ὁ δ’ ἀσπερχὲς μενέαινεν
+ἀντιθέῳ Ὀδυσῆι πάρος ἣν γαῖαν ἱκέσθαι.
+*
+ἀλλ’ ὁ μὲν Αἰθίοπας μετεκίαθε τηλόθ’ ἐόντας,
+Αἰθίοπας τοὶ διχθὰ δεδαίαται, ἔσχατοι ἀνδρῶν,
+οἱ μὲν δυσομένου Ὑπερίονος οἱ δ’ ἀνιόντος,
+ἀντιόων ταύρων τε καὶ ἀρνειῶν ἑκατόμβης.
+ἔνθ’ ὅ γ’ ἐτέρπετο δαιτὶ παρήμενος: οἱ δὲ δὴ ἄλλοι
+Ζηνὸς ἐνὶ μεγάροισιν Ὀλυμπίου ἁθρόοι ἦσαν.
+τοῖσι δὲ μύθων ἦρχε πατὴρ ἀνδρῶν τε θεῶν τε:
+μνήσατο γὰρ κατὰ θυμὸν ἀμύμονος Αἰγίσθοιο,
+τόν ῥ’ Ἀγαμεμνονίδης τηλεκλυτὸς ἔκταν’ Ὀρέστης:
+τοῦ ὅ γ’ ἐπιμνησθεὶς ἔπε’ ἀθανάτοισι μετηύδα:
+*
+“ὢ πόποι, οἷον δή νυ θεοὺς βροτοὶ αἰτιόωνται:
+ἐξ ἡμέων γάρ φασι κάκ’ ἔμμεναι, οἱ δὲ καὶ αὐτοὶ
+σφῇσιν ἀτασθαλίῃσιν ὑπὲρ μόρον ἄλγε’ ἔχουσιν,
+ὡς καὶ νῦν Αἴγισθος ὑπὲρ μόρον Ἀτρεί̈δαο
+γῆμ’ ἄλοχον μνηστήν, τὸν δ’ ἔκτανε νοστήσαντα,
+εἰδὼς αἰπὺν ὄλεθρον, ἐπεὶ πρό οἱ εἴπομεν ἡμεῖς,
+Ἑρμείαν πέμψαντες, ἐύσκοπον ἀργεϊφόντην,
+μήτ’ αὐτὸν κτείνειν μήτε μνάασθαι ἄκοιτιν:
+ἐκ γὰρ Ὀρέσταο τίσις ἔσσεται Ἀτρεί̈δαο,
+ὁππότ’ ἂν ἡβήσῃ τε καὶ ἧς ἱμείρεται αἴης.
+ὣς ἔφαθ’ Ἑρμείας, ἀλλ’ οὐ φρένας Αἰγίσθοιο
+πεῖθ’ ἀγαθὰ φρονέων: νῦν δ’ ἁθρόα πάντ’ ἀπέτισεν.”
+
+@SET [[[:Greek:]&[\u0370-\u03E1 \u03F0-\u03FF]] [\:-;?\u00B7\u037E\u0387]]
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Han-Latin.txt b/demos/src/com/ibm/icu/dev/demo/translit/Test_Han-Latin.txt
new file mode 100644
index 00000000000..8603663c655
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Han-Latin.txt
@@ -0,0 +1,26 @@
+#--------------------------------------------------------------------
+# Copyright (c) 1999-2004, International Business Machines
+# Corporation and others. All Rights Reserved.
+#--------------------------------------------------------------------
+@UPPERFILTER@
+什么是Unicode(统一码)?
+Unicode给每个字符提供了一个唯一的数字,
+不论是什么平台,
+不论是什么程序,
+不论是什么语言。
+
+基本上,计算机只是处理数字。它们指定一个数字,来储存字母或其他字符。在创造Unicode之前,有数百种指定这些数字的编码系统。没有一个编码可以包含足够的字符:例如,单单欧州共同体就需要好几种不同的编码来包括所有的语言。即使是单一种语言,例如英语,也没有哪一个编码可以适用于所有的字母,标点符号,和常用的技术符号。
+
+这些编码系统也会互相冲突。也就是说,两种编码可能使用相同的数字代表两个不同的字符,或使用不同的数字代表相同的字符。任何一台特定的计算机(特别是服务器)都需要支持许多不同的编码,但是,不论什么时候数据通过不同的编码或平台之间,那些数据总会有损坏的危险。
+
+Unicode正在改变所有这一切!
+Unicode给每个字符提供了一个唯一的数字,不论是什么平台,不论是什么程序,不论什么语言。Unicode标准已经被这些工业界的领导们所采用,例如:Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, Sun, Sybase, Unisys和其它许多公司。最新的标准都需要Unicode,例如XML, Java, ECMAScript (JavaScript), LDAP, CORBA 3.0, WML等等,并且,Unicode是实现ISO/IEC 10646的正规方式。许多操作系统,所有最新的浏览器和许多其他产品都支持它。Unicode标准的出现和支持它工具的存在,是近来全球软件技术最重要的发展趋势。
+
+将Unicode与客户服务器或多层应用程序和网站结合,比使用传统字符集节省费用。Unicode使单一软件产品或单一网站能够贯穿多个平台,语言和国家,而不需要重建。它可将数据传输到许多不同的系统,而无损坏。
+
+关于Unicode学术学会
+Unicode学术学会是一个非盈利的组织,是为发展,扩展和推广使用Unicode标准而建立的,Unicode学术学会设立了现代软件产品和标准文本的表示法。学术学会的会员代表了广泛领域的计算机和资讯工业的公司和组织。学术学会只由会员提供资金。Unicode学术学会的会员资格开放给世界上任何支持Unicode标准和希望协助其扩展和执行的组织及个人。
+
+欲知更多信息,请参阅术语词汇表,Unicode产品样本,技术简介和参考资料。
+
+Chinese translation by 黎國珍, Xerox
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Hebrew-Latin.txt b/demos/src/com/ibm/icu/dev/demo/translit/Test_Hebrew-Latin.txt
new file mode 100644
index 00000000000..9d26745a874
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Hebrew-Latin.txt
@@ -0,0 +1,26 @@
+#--------------------------------------------------------------------
+# Copyright (c) 1999-2004, International Business Machines
+# Corporation and others. All Rights Reserved.
+#--------------------------------------------------------------------
+@UPPERFILTER@
+מה זה יוניקוד (Unicode)?
+יוניקוד מקצה מספר ייחודי לכל תו,
+לא משנה על איזו פלטפורמה,
+לא משנה באיזו תוכנית,
+ולא משנה באיזו שפה.
+
+באופן בסיסי, מחשבים עוסקים רק במספרים. הם מאחסנים אותיות ותווים אחרים על-ידי הקצאת מספר לכל אחד מהם. בטרם הומצא היוניקוד, היו מאות מערכות קידוד שונות להקצאת המספרים הללו. אף לא אחת מהן יכלה להכיל כמות תווים מספקת. לדוגמא: רק לאיחוד האירופאי נדרשים כמה סוגי קידודים שונים על מנת לכסות את כל השפות המדוברות בו. יתירה מזאת אף לשפה בודדת, כמו אנגלית למשל, לא היה די במערכת קידוד אחת בעבור כל האותיות, סימני הפיסוק והסמלים הטכניים שבשימוש שוטף.
+
+מערכות קידוד אלו אף סותרות זו את זו. כלומר, שני קידודים יכולים להשתמש באותו מספר לשני תוים נבדלים, או להשתמש במספרים שונים לאותו תו. על כל מחשב (ובמיוחד שרתים) לתמוך במספר רב של מערכות קידוד שונות; אולם כל אימת שנתונים עוברים בין מערכות קידוד או פלטפורמות שונות קיים הסיכון שייפגמו.
+
+יוניקוד משנה את כל זה!
+יוניקוד מקצה מספר ייחודי לכל תו, ללא תלות בפלטפורמה, בתוכנית, או בשפה. תקן היוניקוד אומץ על-ידי המובילים בתעשייה כמו Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, Sun, Sybase, Unisys ורבים אחרים. יוניקוד נדרש על-ידי תקנים מודרניים כמו XML, Java, ECMAScript (JavaScript), LDAP, CORBA 3.0, WML וכדומה, ומהווה למעשה את היישום הרשמי של תקן ISO/IEC 10646. הוא נתמך על ידי מערכות הפעלה רבות, כל הדפדפנים החדישים, ומוצרים רבים אחרים. הופעת תקן היוניקוד וזמינות הכלים התומכים בו נמנות עם המגמות הכלל-עולמיות החשובות ביותר, אשר מסתמנות לאחרונה בטכנולוגיית התוכנה.
+
+שילוב יוניקוד ביישומי שרת-לקוח או ביישומים רבי-שכבות ובאתרי אינטרנט מאפשר חיסכון ניכר בעלויות לעומת השימוש בסדרות התווים המסורתיות. הודות ליוניקוד, מוצר תוכנה אחד או אתר יחיד ברשת יכול להרחיב את יעדיו למגוון פלטפורמות, ארצות ושפות ללא צורך בשינויים מרחיקים. יוניקוד מאפשר מעבר נתונים דרך מערכות רבות ושונות מבלי שייפגמו.
+
+פרטים אודות הקונסורציום של יוניקוד (Unicode Consortium)
+הקונסורציום של יוניקוד הוא ארגון ללא מטרת רווח שנוסד כדי לפתח, להרחיב ולקדם את השימוש בתקן יוניקוד, אשר מגדיר את ייצוג הטקסט במוצרי תוכנה ותקנים מודרניים. חברים בקונסורציום מגוון רחב של תאגידים וארגונים בתעשיית המחשבים ועיבוד המידע. הקונסורציום ממומן על-ידי דמי-חבר בלבד. החברות בקונסורציום יוניקוד פתוחה לארגונים ולאנשים פרטיים, בכל רחבי העולם, אשר תומכים בתקן יוניקוד ומעוניינים לסייע בהתפתחותו והטמעתו.
+
+למידע נוסף, ראה מילון מונחים, רשימה חלקית של מוצרים מותאמים ליוניקוד, מבוא טכני ו- חומרי עזר [קישורים באנגלית].
+
+@SET [[:Hebrew:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138]]
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Instructions.html b/demos/src/com/ibm/icu/dev/demo/translit/Test_Instructions.html
new file mode 100644
index 00000000000..026394a11e4
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Instructions.html
@@ -0,0 +1,154 @@
+
+
+
+
+
+
+New Transliteration Test Files
+
+
+
+
+New Transliteration Test Files
+The Test_*.html files show the transliteration of characters for given
+languages. The sample for each language consists of "What Is Unicode"
+in Thai, followed by other available text. The text is broken apart into
+sentences for ease of viewing (note: we know of some problems with the sentence
+rules for Japanese and Chinese). The left column is the original, and the right
+is the romanization. The program also converts back to the original script. If
+there is a discrepancy between the source and the reverse transformation, that
+is indicated by making the background red
+from that point on.
+
+ Note: If you have some more text that you would like added to the
+ sample, just let me know. I am particularly interested in name lists, since
+ they are the typical source.
+
+Standards
+The goal is to follow a given standard, such as ISO* or UNGEGN wherever
+possible. We also need to round-trip, so in some cases, that means adding some
+additional accent marks to disambiguate characters. And often the source
+standards are missing some characters, such as characters with combining Hamzas
+in Arabic. Remember that the goal for these is transliteration (unambiguously
+representing all the letters in the original), not transcription (representing
+the best pronunciation).
+
+ Thai : ISO 11940 < http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
+ > plus a few items:
+
+ Accents may be added to the Latin for disambiguation.
+ In the next release, we'd like to do the UNGEGN version < http://www.eki.ee/wgrs/rom1_th.pdf
+ > which is probably more useful (and readable), and follows more
+ closely the Thai standard.
+ Spaces are provided at word-breaks, using the Thai BreakIterator.
+ An inherent vowel (ọ) is added, as in UNGEGN. The dot is for
+ disambiguation.
+
+ Note: if the inherent vowel positions cannot be algorithmically
+ determined, let me know and I will remove them.
+
+
+
+
+ Arabic : Generally follows
+ UNGEGN < http://www.eki.ee/wgrs/rom1_ar.pdf
+ >
+
+ Accents may be added to the Latin for disambiguation.
+ Occasionally deviates in the direction of ISO 233 < http://homepage.mac.com/sirbinks/pdf/Arabic.pdf
+ >
+
+ with underdot instead of cedilla for letter like SAD, since those
+ are explicitly in Unicode for transliteration of Arabic
+ adding extra non-Arabic-language letters, like PEH. Note: not all
+ extended Arabic characters are handled yet.
+
+
+ Does not do assimilation of "al", nor hyphenation of
+ it.
+
+ While it could be done, we need to determine whether a prefix
+ "al" could occur other than as the definite article (since
+ no space is used).
+
+
+ This is transliteration. For transcription one would want an
+ engine that added points appropriately to the Hebrew.
+
+
+ Hebrew : Generally
+ follows UNGEGN < http://www.eki.ee/wgrs/rom1_he.pdf
+ >, with some exceptions:
+
+ Accents may be added to the Latin for disambiguation.
+ Combinations of dagesh, shin/sin dot that would produce different
+ letters are not yet called out.
+ Note that the final forms are not preserved. Thus, when going from
+ Latin to Hebrew, a character is given final form depending on its
+ position.
+
+ E.g. מםמם => mmmm =>
+ מממם
+
+
+ This is transliteration. For transcription one would want an
+ engine that added points appropriately to the Hebrew.
+ See also < http://homepage.mac.com/sirbinks/pdf/Hebrew.r1.pdf
+ > for the ISO version. The Chicago Manual of Style has a clear table
+ of mappings for the vowel marks.
+
+
+ Han : Uses the CEDICT
+ data plus Unicode Unihan kMandarin values for pinyin. Doesn't
+ roundtrip!
+
+ Note: the Chinese pronunciation of Han characters varies by
+ context and grammar, though nowhere near as much a Japanese.
+
+ Ideally we'd have an underlying engine for this. In 2.4 we will
+ have a plug-in interface so that people could add one, such as the
+ IBM engine.
+ The data from CEDICT and Unihan don't list the most frequent
+ choice first, so we will be updating that.
+
+
+
+
+ Greek/UNGEGN : Uses a
+ modern Greek transliteration, based on the UNGEGN rules at < http://www.eki.ee/wgrs/rom1_el.pdf
+ >. This version will not roundtrip ancient Greek.
+ Greek : Uses a classic Greek
+ transliteration. This version will not roundtrip modern Greek.
+
+Notes
+
+ For readability, the files have a few other things besides just the
+ transliteration:
+
+ The first word of the sentences are titlecased, as are names (where we
+ have a name-list, such as in Thai).
+ The Latin in the original is mapped to the private-use zone before
+ conversion, and then again after conversion. This does have the downside
+ that any rules (such as in Han) that need to know the context (e.g. for
+ inserting spaces or capitalization) will gum up a little bit. This is
+ just an artifact of the test display.
+
+
+ I don't think that ISO 11940 is a particularly good way to romanize, but
+ it is at least complete and a standard. So what I am interested in just for
+ now is whether the samples in the file follow it (with the above
+ exceptions).
+ Some of the files also have a set of characters at the end, one character
+ per row, with a following row listing the hex and name.
+ The source rules for all of these is in the following URL. So if you want
+ to know the details of how the characters are handled, that is the place to
+ look.
+
+
+
+
+
+
+
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/Test_Thai-Latin.txt b/demos/src/com/ibm/icu/dev/demo/translit/Test_Thai-Latin.txt
new file mode 100644
index 00000000000..631f191529a
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/Test_Thai-Latin.txt
@@ -0,0 +1,69 @@
+#--------------------------------------------------------------------
+# Copyright (c) 1999-2004, International Business Machines
+# Corporation and others. All Rights Reserved.
+#--------------------------------------------------------------------
+@UPPERFILTER@
+Unicode คืออะไร?
+Unicode กำหนดหมายเลขเฉพาะสำหรับทุกอักขระ
+โดยไม่สนใจว่าเป็นแพล็ตฟอร์มใด
+ไม่ขึ้นกับว่าจะเป็นโปรแกรมใด
+และไม่ว่าจะเป็นภาษาใด
+
+โดยพื้นฐานแล้ว, คอมพิวเตอร์จะเกี่ยวข้องกับเรื่องของตัวเลข. คอมพิวเตอร์จัดเก็บตัวอักษรและอักขระอื่นๆ โดยการกำหนดหมายเลขให้สำหรับแต่ละตัว. ก่อนหน้าที่๊ Unicode จะถูกสร้างขึ้น, ได้มีระบบ encoding อยู่หลายร้อยระบบสำหรับการกำหนดหมายเลขเหล่านี้. ไม่มี encoding ใดที่มีจำนวนตัวอักขระมากเพียงพอ: ยกตัวอย่างเช่น, เฉพาะในกลุ่มสหภาพยุโรปเพียงแห่งเดียว ก็ต้องการหลาย encoding ในการครอบคลุมทุกภาษาในกลุ่ม. หรือแม้แต่ในภาษาเดี่ยว เช่น ภาษาอังกฤษ ก็ไม่มี encoding ใดที่เพียงพอสำหรับทุกตัวอักษร, เครื่องหมายวรรคตอน และสัญลักษณ์ทางเทคนิคที่ใช้กันอยู่ทั่วไป.
+
+ระบบ encoding เหล่านี้ยังขัดแย้งซึ่งกันและกัน. นั่นก็คือ, ในสอง encoding สามารถใช้หมายเลขเดียวกันสำหรับตัวอักขระสองตัวที่แตกต่างกัน,หรือใช้หมายเลขต่างกันสำหรับอักขระตัวเดียวกัน. ในระบบคอมพิวเตอร์ (โดยเฉพาะเซิร์ฟเวอร์) ต้องมีการสนับสนุนหลาย encoding; และเมื่อข้อมูลที่ผ่านไปมาระหว่างการเข้ารหัสหรือแพล็ตฟอร์มที่ต่างกัน, ข้อมูลนั้นจะเสี่ยงต่อการผิดพลาดเสียหาย.
+
+Unicode จะเปลี่ยนแปลงสิ่งเหล่านั้นทั้งหมด!
+Unicode กำหนดหมายเลขเฉพาะสำหรับแต่ละอักขระ, โดยไม่สนใจว่าเป็นแพล็ตฟอร์มใด, ไม่ขึ้นกับว่าจะเป็นโปรแกรมใดและไม่ว่าจะเป็นภาษาใด. มาตรฐาน Unicode ได้ถูกนำไปใช้โดยผู้นำในอุตสาหกรรม เช่น Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, Sun, Sybase, Unisys และอื่นๆ อีกมาก. Unicode เป็นสิ่งที่จำเป็นสำหรับมาตรฐานใหม่ๆ เช่น XML, Java, ECMAScript (JavaScript), LDAP, CORBA 3.0, WML ฯลฯ., และเป็นแนวทางอย่างเป็นทางการในการทำ ISO/IEC 10646. Unicode ได้รับการสนับสนุนในระบบปฏิบัติการจำนวนมาก, บราวเซอร์ใหม่ๆ ทกตัว, และผลิตภัณฑ์อื่นๆ อีกมาก. การเกิดขึ้นของ Unicode Standard และทูลส์ต่างๆ ที่มีในการสนับสนุน Unicode, เป็นหนึ่งในแนวโน้มทางเทคโนโลยีซอฟต์แวร์ระดับโลกที่มีความสำคัญที่สุด.
+
+การรวม Unicode เข้าไปในระบบไคลเอ็นต์-เซิร์ฟเวอร์ หรือแอ็พพลิเคชันแบบ multi-tiered และเว็บไซต์ จะทำให้เกิดการประหยัดค่าใช้จ่ายมากกว่าการใช้ชุดอักขระแบบเดิม. Unicode ทำให้ผลิตภัณฑ์ซอฟต์แวร์หนึ่งเดียว หรือเว็บไซต์แห่งเดียว รองรับได้หลายแพล็ตฟอร์ม, หลายภาษาและหลายประเทศโดยไม่ต้องทำการรื้อปรับระบบ. Unicode ยังทำให้ข้อมูลสามารถเคลื่อนย้ายไปมาในหลายๆ ระบบโดยไม่เกิดความผิดพลาดเสียหาย.
+
+เกี่ยวกับ Unicode Consortium
+Unicode Consortium เป็นองค์กรไม่แสวงหากำไรที่ก่อตั้งขึ้นเพื่อพัฒนา, ขยายและส่งเสริมการใช้ Unicode Standard, ซึ่งกำหนดรูปแบบการแทนค่าของข้อความในผลิตภัณฑ์ซอฟต์แวร์และมาตรฐานใหม่ๆ. สมาชิกของสมาคมเป็นตัวแทนจากบริษัทและองค์กรในอุตสาหกรรมคอมพิวเตอร์และการประมวลผลสารสนเทศ. สมาคมได้รับการสนับสนุนทางการเงินผ่านทางค่าธรรมเนียมของการเป็นสมาชิกเท่านั้น. สมาชิกภาพของ Unicode Consortium เปิดกว้างสำหรับองค์กรหรือบุคคลใดๆ ในโลกที่ต้องการสนับสนุน Unicode Standard และช่วยเหลือการขยายตัวและการนำ Unicode ไปใช้งาน.
+
+สำหรับข้อมูลเพิ่มเติม, ให้ดูที่ Glossary, Sample Unicode-Enabled Products, Technical Introduction และ Useful Resources.
+
+เป็นมนุษย์สุดประเสริฐเลิศคุณค่า
+กว่าบรรดาฝูงสัตว์เดรัจฉาน
+จงฝ่าฟันพัฒนาวิชาการ
+อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร
+ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า
+หัดอภัยเหมือนกีฬาอัชฌาสัย
+ปฏิบัติประพฤติกฎกำหนดใจ
+พูดจาให้จ๊ะ ๆ จ๋า ๆ น่าฟังเอยฯ
+
+แหล่งที่มา : สมาคมคอมพิวเตอร์แห่งประเทศไทย
+
+ฅนฃวด kho khuat and kho khon
+@TITLECASE@
+ก๊กเฮง แซ่แต้
+กชกร ศราทธทัต
+กติกา อังคสุภณ
+กนก ธรรมประทีป
+กนก วงศ์ทองศรี
+กนกกร ช้างเย็นฉ่ำ
+กนกฉัตร์ ถาวรนันท์
+กนกนวล โปษยะนันทน์
+กนกพร คมคาย
+กนกพร ตีรเลิศพานิช
+กนกพร พันทร
+กนกพร ศรีบัณฑิต
+กนกพร อติวรรณาพัฒน์
+กนกพรรณ ศรีวนาภิรมย์
+กนกรัตน์ เกียรติยิ่งอังศุลี
+กนกรัตน์ สุธรรมพิทักษ์
+กนกวรรณ คงคาประเสริฐ
+กนกวรรณ แซ่เตียว
+กนกวรรณ บุญประเสริฐ
+กนกวรรณ รักทรัพย์
+กนกวรรณ สัจจพงษ์
+กนกวรรณ อุ้ยวงศ์ไพศาล
+กนกศักดิ์ ยิ่งยง
+กนกแก้ว กรสมิต
+กนิษฐา ทนุถนอมราษฎร์
+กนิษฐา หวังวิบูลย์กิจ
+กมล กาญจนโรจน์
+กมล คัมภีร์
+กมล เจตน์มงคลรัตน์
+กมล ชูตระกูลธรรม
+@SET [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B]
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/TransliteratingTextComponent.java b/demos/src/com/ibm/icu/dev/demo/translit/TransliteratingTextComponent.java
new file mode 100644
index 00000000000..597721ca3ec
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/TransliteratingTextComponent.java
@@ -0,0 +1,257 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.translit;
+
+import java.awt.event.KeyEvent;
+
+import com.ibm.icu.dev.demo.impl.DumbTextComponent;
+import com.ibm.icu.text.ReplaceableString;
+import com.ibm.icu.text.Transliterator;
+
+/**
+ * A subclass of {@link DumbTextComponent} that passes key events through
+ * a {@link com.ibm.icu.text.Transliterator}.
+ *
+ * @author Alan Liu
+ */
+public class TransliteratingTextComponent extends DumbTextComponent {
+
+ /**
+ * For serialization
+ */
+ private static final long serialVersionUID = -8672128213174154047L;
+
+ private static boolean DEBUG = false;
+
+ private Transliterator translit = null;
+
+ // NOTE: DISABLE THE START AND CURSOR UNTIL WE CAN GET IT TO WORK AT ALL
+
+ // Index into getText() where the start of transliteration is.
+ // As we commit text during transliteration, we advance
+ // this.
+ //private int start = 0;
+
+ // Index into getText() where the cursor is; cursor >= start
+ //private int cursor = 0;
+
+// private static final String COPYRIGHT =
+// "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ /**
+ * Constructor.
+ */
+ public TransliteratingTextComponent() {
+ super();
+ /*
+ addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ // We get an ActionEvent only when the selection changes
+ resetTransliterationStart();
+ }
+ });
+ */
+ }
+
+ /**
+ * {@link DumbTextComponent} API. Framework method that is called
+ * when a KeyEvent
is received. This implementation
+ * runs the new character through the current
+ * Transliterator
, if one is set, and inserts the
+ * transliterated text into the buffer.
+ */
+ protected void handleKeyTyped(KeyEvent e) {
+ char ch = e.getKeyChar();
+
+ if (translit == null) {
+ setKeyStart(-1);
+ super.handleKeyTyped(e);
+ return;
+ }
+
+ transliterate(ch, false);
+ }
+
+ public void flush() {
+ if (translit != null) transliterate('\uFFFF', true);
+ }
+
+
+ protected void transliterate(char ch, boolean flush) {
+
+ // ------------------------------------------------------------
+ // The following case motivates the two lines that recompute
+ // start and cursor below.
+
+ // " "
+ // a b c q r|s t u m m
+ // 0 1 2 3 4 5 6 7 8 9
+ // 0 1 2
+
+ // start 3, cursor 5, sel 6 -> { 0, 3, 2 }
+ // : new int[] { 0, sel - start, cursor - start };
+
+ // sz>99|9
+
+ // " { "
+ // a b c q r 9 9|9 t u m m
+ // 0 1 2 3 4 5 6 7 8 9 a b
+ // 0 1 2 3 4
+
+ // { 3, 5, 4 } -> start 6, cursor 7, sel 8
+ // : start += index[0];
+ // : cursor = start + index[2] - index[0];
+ // ------------------------------------------------------------
+
+ // Need to save start because calls to replaceRange will update
+ // start and cursor.
+ //int saveStart = start;
+
+ int end = flush ? getSelectionEnd() : getSelectionStart();
+ String sourceText = getText().substring(0,end);
+ ReplaceableString buf = new ReplaceableString(sourceText);
+ /*buf.replace(0, 1, getText().substring(start,
+ getSelectionStart()));*/
+
+ Transliterator.Position index = new Transliterator.Position();
+ index.contextLimit = buf.length();
+ index.contextStart = 0;
+ index.start = getKeyStart();
+ if (index.start == -1) index.start = getSelectionStart();
+ index.limit = buf.length();
+
+ // StringBuffer log = null;
+ if (DEBUG) {
+ System.out.println("Transliterator: " + translit.getID());
+ System.out.println("From:\t" + '"' + buf.toString() + '"'
+ + "; {cs: " + index.contextStart
+ + ", s: " + index.start
+ + ", l: " + index.limit
+ + ", cl: " + index.contextLimit
+ + "}" + "; '" + ch + "'"
+ + " " + getKeyStart()
+ );
+ }
+
+ if (flush) {
+ translit.finishTransliteration(buf, index);
+ } else {
+ translit.transliterate(buf, index, ch);
+ }
+
+ if (DEBUG) {
+ System.out.println("To:\t" + '"' + buf.toString() + '"'
+ + "; {cs: " + index.contextStart
+ + ", s: " + index.start
+ + ", l: " + index.limit
+ + ", cl: " + index.contextLimit
+ + "}"
+ );
+ System.out.println();
+ }
+ /*
+ buf.replace(buf.length(), buf.length(), String.valueOf(ch));
+ translit.transliterate(buf);
+ */
+
+ String result = buf.toString();
+ //if (result.equals(sourceText + ch)) return;
+
+ replaceRange(result, 0, getSelectionEnd());
+ setKeyStart(index.start);
+
+ // At this point start has been changed by the callback to
+ // resetTransliteratorStart() via replaceRange() -- so use our
+ // local copy, saveStart.
+
+ // The START index is zero-based. On entry to transliterate(),
+ // it was zero. We can therefore just add it to our original
+ // getText()-based index value of start (in saveStart) to get
+ // the new getText()-based start.
+// start = saveStart + index.contextStart;
+
+ // Make the cursor getText()-based. The CURSOR index is zero-based.
+// cursor = start + index.start - index.contextStart;
+
+/*
+ if (DEBUG) {
+ String out = buf.toString();
+ log.append(out.substring(0, index.contextStart)).
+ append('{').
+ append(out.substring(index.contextStart, index.start)).
+ append('|').
+ append(out.substring(index.start)).
+ append('"');
+ log.append(", {" + index.contextStart + ", " + index.contextLimit + ", " + index.start + "}, ");
+// log.append("start " + start + ", cursor " + cursor);
+ log.append(", sel " + getSelectionStart());
+ System.out.println(escape(log.toString()));
+ }
+ */
+ }
+
+ /**
+ * Set the {@link com.ibm.icu.text.Transliterator} and direction to
+ * use to process incoming KeyEvent
s.
+ * @param t the {@link com.ibm.icu.text.Transliterator} to use
+ */
+ public void setTransliterator(Transliterator t) {
+ /*
+ if (translit != t) { // [sic] pointer compare ok; singletons
+ resetTransliterationStart();
+ }
+ */
+ translit = t;
+ }
+
+ public Transliterator getTransliterator() {
+ return translit;
+ }
+
+ /**
+ * Reset the start point at which transliteration begins. This
+ * needs to be done when the user moves the cursor or when the
+ * current {@link com.ibm.icu.text.Transliterator} is changed.
+ */
+ /*
+ private void resetTransliterationStart() {
+ start = getSelectionStart();
+ cursor = start;
+ }
+ */
+
+ /**
+ * Escape non-ASCII characters as Unicode.
+ * JUST FOR DEBUGGING OUTPUT.
+ */
+ public static final String escape(String s) {
+ StringBuffer buf = new StringBuffer();
+ for (int i=0; i= ' ' && c <= 0x007F) {
+ if (c == '\\') {
+ buf.append("\\\\"); // That is, "\\"
+ } else {
+ buf.append(c);
+ }
+ } else {
+ buf.append("\\u");
+ if (c < 0x1000) {
+ buf.append('0');
+ if (c < 0x100) {
+ buf.append('0');
+ if (c < 0x10) {
+ buf.append('0');
+ }
+ }
+ }
+ buf.append(Integer.toHexString(c));
+ }
+ }
+ return buf.toString();
+ }
+}
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/TransliterationChart.java b/demos/src/com/ibm/icu/dev/demo/translit/TransliterationChart.java
new file mode 100644
index 00000000000..d0865c3bb96
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/TransliterationChart.java
@@ -0,0 +1,294 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.demo.translit;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.ibm.icu.impl.Utility;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.Normalizer;
+import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSetIterator;
+
+public class TransliterationChart {
+ public static void main(String[] args) throws IOException {
+ System.out.println("Start");
+ UnicodeSet lengthMarks = new UnicodeSet("[\u09D7\u0B56-\u0B57\u0BD7\u0C56\u0CD5-\u0CD6\u0D57\u0C55\u0CD5]");
+ int[] indicScripts = {
+ UScript.LATIN,
+ UScript.DEVANAGARI,
+ UScript.BENGALI,
+ UScript.GURMUKHI,
+ UScript.GUJARATI,
+ UScript.ORIYA,
+ UScript.TAMIL,
+ UScript.TELUGU,
+ UScript.KANNADA,
+ UScript.MALAYALAM,
+ };
+ String[] names = new String[indicScripts.length];
+ UnicodeSet[] sets = new UnicodeSet[indicScripts.length];
+ Transliterator[] fallbacks = new Transliterator[indicScripts.length];
+ for (int i = 0; i < indicScripts.length; ++i) {
+ names[i] = UScript.getName(indicScripts[i]);
+ sets[i] = new UnicodeSet("[[:" + names[i] + ":]&[[:L:][:M:]]&[:age=3.1:]]");
+ fallbacks[i] = Transliterator.getInstance("any-" + names[i]);
+ }
+ EquivClass eq = new EquivClass(new ReverseComparator());
+ PrintWriter pw = openPrintWriter("transChart.html");
+ pw.println(" ");
+ pw.println("Indic Transliteration Chart ");
+
+ Transliterator anyToLatin = Transliterator.getInstance("any-latin");
+
+ String testString = "\u0946\u093E";
+
+ UnicodeSet failNorm = new UnicodeSet();
+ Set latinFail = new TreeSet();
+
+ for (int i = 0; i < indicScripts.length; ++i) {
+ if (indicScripts[i] == UScript.LATIN) continue;
+ String source = names[i];
+ System.out.println(source);
+ UnicodeSet sourceChars = sets[i];
+
+ for (int j = 0; j < indicScripts.length; ++j) {
+ if (i == j) continue;
+ String target = names[j];
+ Transliterator forward = Transliterator.getInstance(source + '-' + target);
+ Transliterator backward = forward.getInverse();
+ UnicodeSetIterator it = new UnicodeSetIterator(sourceChars);
+ while (it.next()) {
+ if (lengthMarks.contains(it.codepoint)) continue;
+ String s = Normalizer.normalize(it.codepoint,Normalizer.NFC,0);
+ //if (!Normalizer.isNormalized(s,Normalizer.NFC,0)) continue;
+ if (!s.equals(Normalizer.normalize(s,Normalizer.NFD,0))) {
+ failNorm.add(it.codepoint);
+ }
+ String t = fix(forward.transliterate(s));
+ if (t.equals(testString)) {
+ System.out.println("debug");
+ }
+
+ String r = fix(backward.transliterate(t));
+ if (Normalizer.compare(s,r,0) == 0) {
+ if (indicScripts[j] != UScript.LATIN) eq.add(s,t);
+ } else {
+ if (indicScripts[j] == UScript.LATIN) {
+ latinFail.add(s + " - " + t + " - " + r);
+ }
+ }
+ }
+ }
+ }
+ // collect equivalents
+ pw.println("");
+ for (int i = 0; i < indicScripts.length; ++i) {
+ pw.print("" + names[i].substring(0,3) + " ");
+ }
+ pw.println(" ");
+
+ Iterator rit = eq.getSetIterator(new MyComparator());
+ while(rit.hasNext()) {
+ Set equivs = (Set)rit.next();
+ pw.print("");
+ Iterator sit = equivs.iterator();
+ String source = (String)sit.next();
+ String item = anyToLatin.transliterate(source);
+ if (item.equals("") || source.equals(item)) item = " ";
+ pw.print("" + item + " ");
+ for (int i = 1; i < indicScripts.length; ++i) {
+ sit = equivs.iterator();
+ item = "";
+ while (sit.hasNext()) {
+ String trial = (String)sit.next();
+ if (!sets[i].containsAll(trial)) continue;
+ item = trial;
+ break;
+ }
+ String classString = "";
+ if (item.equals("")) {
+ classString = " class='miss'";
+ String temp = fallbacks[i].transliterate(source);
+ if (!temp.equals("") && !temp.equals(source)) item = temp;
+ }
+ String backup = item.equals("") ? " " : item;
+ pw.print(""
+ + backup + "" + Utility.hex(item) + " ");
+ }
+ /*
+ Iterator sit = equivs.iterator();
+ while (sit.hasNext()) {
+ String item = (String)sit.next();
+ pw.print("" + item + " ");
+ }
+ */
+ pw.println(" ");
+ }
+ pw.println("
");
+ if (true) {
+ pw.println("Failed Normalization ");
+
+ UnicodeSetIterator it = new UnicodeSetIterator(failNorm);
+ UnicodeSet pieces = new UnicodeSet();
+ while (it.next()) {
+ String s = UTF16.valueOf(it.codepoint);
+ String d = Normalizer.normalize(s,Normalizer.NFD,0);
+ pw.println("Norm:" + s + ", " + Utility.hex(s) + " " + UCharacter.getName(it.codepoint)
+ + "; " + d + ", " + Utility.hex(d) + ", ");
+ pw.println(UCharacter.getName(d.charAt(1)) + " ");
+ if (UCharacter.getName(d.charAt(1)).indexOf("LENGTH") >= 0) pieces.add(d.charAt(1));
+ }
+ pw.println(pieces);
+
+ pw.println("Failed Round-Trip ");
+ Iterator cit = latinFail.iterator();
+ while (cit.hasNext()) {
+ pw.println(cit.next() + " ");
+ }
+ }
+
+ pw.println("");
+ pw.close();
+ System.out.println("Done");
+ }
+
+ public static String fix(String s) {
+ if (s.equals("\u0946\u093E")) return "\u094A";
+ if (s.equals("\u0C46\u0C3E")) return "\u0C4A";
+ if (s.equals("\u0CC6\u0CBE")) return "\u0CCA";
+
+ if (s.equals("\u0947\u093E")) return "\u094B";
+ if (s.equals("\u0A47\u0A3E")) return "\u0A4B";
+ if (s.equals("\u0AC7\u0ABE")) return "\u0ACB";
+ if (s.equals("\u0C47\u0C3E")) return "\u0C4B";
+ if (s.equals("\u0CC7\u0CBE")) return "\u0CCB";
+
+ //return Normalizer.normalize(s,Normalizer.NFD,0);
+ return s;
+ }
+
+ public static PrintWriter openPrintWriter(String fileName) throws IOException {
+ File lf = new File(fileName);
+ System.out.println("Creating file: " + lf.getAbsoluteFile());
+
+ return new PrintWriter(
+ new BufferedWriter(
+ new OutputStreamWriter(
+ new FileOutputStream(fileName), "UTF8"), 4*1024));
+ }
+
+
+ public static String getName(String s, String separator) {
+ int cp;
+ StringBuffer sb = new StringBuffer();
+ for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
+ cp = UTF16.charAt(s,i);
+ if (i != 0) sb.append(separator);
+ sb.append(UCharacter.getName(cp));
+ }
+ return sb.toString();
+ }
+
+ static class MyComparator implements Comparator {
+ public int compare(Object o1, Object o2) {
+ Iterator i1 = ((TreeSet) o1).iterator();
+ Iterator i2 = ((TreeSet) o2).iterator();
+ while (i1.hasNext() && i2.hasNext()) {
+ String a = (String)i1.next();
+ String b = (String)i2.next();
+ int result = a.compareTo(b);
+ if (result != 0) return result;
+ }
+ if (i1.hasNext()) return 1;
+ if (i2.hasNext()) return -1;
+ return 0;
+ }
+
+ }
+ static class ReverseComparator implements Comparator {
+ public int compare(Object o1, Object o2) {
+ String a = o1.toString();
+ char a1 = a.charAt(0);
+ String b = o2.toString();
+ char b1 = b.charAt(0);
+ if (a1 < 0x900 && b1 > 0x900) return -1;
+ if (a1 > 0x900 && b1 < 0x900) return +1;
+ return a.compareTo(b);
+ }
+ }
+
+ static class EquivClass {
+ EquivClass(Comparator c) {
+ comparator = c;
+ }
+ private HashMap itemToSet = new HashMap();
+ private Comparator comparator;
+
+ void add(Object a, Object b) {
+ Set sa = (Set)itemToSet.get(a);
+ Set sb = (Set)itemToSet.get(b);
+ if (sa == null && sb == null) { // new set!
+ Set s = new TreeSet(comparator);
+ s.add(a);
+ s.add(b);
+ itemToSet.put(a, s);
+ itemToSet.put(b, s);
+ } else if (sa == null) {
+ sb.add(a);
+ } else if (sb == null) {
+ sa.add(b);
+ } else { // merge sets, dumping sb
+ sa.addAll(sb);
+ Iterator it = sb.iterator();
+ while (it.hasNext()) {
+ itemToSet.put(it.next(), sa);
+ }
+ }
+ }
+
+ private class MyIterator implements Iterator {
+ private Iterator it;
+ MyIterator (Comparator comp) {
+ TreeSet values = new TreeSet(comp);
+ values.addAll(itemToSet.values());
+ it = values.iterator();
+ }
+
+ public boolean hasNext() {
+ return it.hasNext();
+ }
+ public Object next() {
+ return it.next();
+ }
+ public void remove() {
+ throw new IllegalArgumentException("can't remove");
+ }
+ }
+
+ public Iterator getSetIterator (Comparator comp) {
+ return new MyIterator(comp);
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/demo.bat b/demos/src/com/ibm/icu/dev/demo/translit/demo.bat
new file mode 100755
index 00000000000..dd9c205fbaa
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/demo.bat
@@ -0,0 +1,13 @@
+REM /*
+REM *******************************************************************************
+REM * Copyright (C) 1996-2004, International Business Machines Corporation and *
+REM * others. All Rights Reserved. *
+REM *******************************************************************************
+REM */
+REM For best results, run the demo as an applet inside of Netscape
+REM with Bitstream Cyberbit installed.
+
+REM setup your JDK 1.1.x path and classpath here:
+call JDK11
+set CLASSPATH=../translit.jar;%CLASSPATH%
+javaw Demo
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/demo.html b/demos/src/com/ibm/icu/dev/demo/translit/demo.html
new file mode 100644
index 00000000000..2a7ee5bceec
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/demo.html
@@ -0,0 +1,34 @@
+
+
+
+Transliteration Demo
+
+
+
+
+
+
+If you don't see a button above, then your browser is failing to
+locate the necessary Java class files.
+
+
+
+One way to make this work is to copy this HTML file to
+icu4j/src
, and make sure the Java files in the directories
+under icu4j/src/com
are built. Then open this HTML file
+using a browser or appletviewer.
+
+
+
+For best results, run this demo as an applet within Netscape with
+Bitstream Cyberbit installed.
+
+
+
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/package.html b/demos/src/com/ibm/icu/dev/demo/translit/package.html
new file mode 100644
index 00000000000..8355d1f03f7
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/package.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+Transliterator demo appliation.
+
+
\ No newline at end of file
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Han_Pinyin.txt b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Han_Pinyin.txt
new file mode 100644
index 00000000000..8f7c21d3a62
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Han_Pinyin.txt
@@ -0,0 +1,20365 @@
+#--------------------------------------------------------------------
+# Copyright (c) 1999-2001, International Business Machines
+# Corporation and others. All Rights Reserved.
+#--------------------------------------------------------------------
+# Generated: Fri Jan 26 15:14:48 2001
+# Tool: ..\..\tools\translit\hanpinyin.pl
+#--------------------------------------------------------------------
+
+# Han-Pinyin
+
+# Mechanically derived from \desk\Unihan.txt (ftp.unicode.org), version:
+#
+# Name: Unihan database
+# Unicode version: 3.0b1
+# Table version: 1.1
+# Date: 2 July 1999
+#
+# Copyright (c) 1996-1999 Unicode, Inc. All Rights reserved.
+
+丁 > dīng; #4E01
+丂 > kăo; #4E02
+七 > qī; #4E03
+丄 > shàng; #4E04
+丅 > xià; #4E05
+万 > mò; #4E07
+丈 > zhàng; #4E08
+三 > sān; #4E09
+上 > shàng; #4E0A
+下 > xià; #4E0B
+丌 > jī; #4E0C
+不 > bù; #4E0D
+与 > yŭ; #4E0E
+丏 > miăn; #4E0F
+丐 > gài; #4E10
+丑 > chŏu; #4E11
+丒 > chŏu; #4E12
+专 > zhuān; #4E13
+且 > qiĕ; #4E14
+丕 > pī; #4E15
+世 > shì; #4E16
+丗 > shì; #4E17
+丘 > qīu; #4E18
+丙 > bĭng; #4E19
+业 > yè; #4E1A
+丛 > cóng; #4E1B
+东 > dōng; #4E1C
+丝 > sī; #4E1D
+丞 > chéng; #4E1E
+丟 > dīu; #4E1F
+丠 > qīu; #4E20
+両 > liăng; #4E21
+丢 > dīu; #4E22
+丣 > yŏu; #4E23
+两 > liăng; #4E24
+严 > yán; #4E25
+並 > bìng; #4E26
+丧 > sāng; #4E27
+丨 > gŭn; #4E28
+丩 > jīu; #4E29
+个 > gè; #4E2A
+丫 > yā; #4E2B
+丬 > qiáng; #4E2C
+中 > zhōng; #4E2D
+丮 > jĭ; #4E2E
+丯 > jiè; #4E2F
+丰 > fēng; #4E30
+丱 > guàn; #4E31
+串 > chuàn; #4E32
+丳 > chăn; #4E33
+临 > lín; #4E34
+丵 > zhŭo; #4E35
+丶 > zhŭ; #4E36
+丸 > wán; #4E38
+丹 > dān; #4E39
+为 > wèi; #4E3A
+主 > zhŭ; #4E3B
+丼 > jĭng; #4E3C
+丽 > lì; #4E3D
+举 > jŭ; #4E3E
+丿 > piĕ; #4E3F
+乀 > fú; #4E40
+乁 > yí; #4E41
+乂 > yì; #4E42
+乃 > năi; #4E43
+久 > jĭu; #4E45
+乆 > jĭu; #4E46
+乇 > zhé; #4E47
+么 > yāo; #4E48
+义 > yì; #4E49
+之 > zhī; #4E4B
+乌 > wū; #4E4C
+乍 > zhà; #4E4D
+乎 > hū; #4E4E
+乏 > fá; #4E4F
+乐 > lè; #4E50
+乑 > zhòng; #4E51
+乒 > pīng; #4E52
+乓 > pang; #4E53
+乔 > qiáo; #4E54
+乕 > hŭ; #4E55
+乖 > guāi; #4E56
+乗 > chéng; #4E57
+乘 > chéng; #4E58
+乙 > yĭ; #4E59
+乚 > yĭn; #4E5A
+乜 > miē; #4E5C
+九 > jĭu; #4E5D
+乞 > qĭ; #4E5E
+也 > yĕ; #4E5F
+习 > xí; #4E60
+乡 > xiāng; #4E61
+乢 > gài; #4E62
+乣 > dīu; #4E63
+书 > shū; #4E66
+乨 > shĭ; #4E68
+乩 > jī; #4E69
+乪 > nāng; #4E6A
+乫 > jiā; #4E6B
+乭 > shí; #4E6D
+买 > măi; #4E70
+乱 > luàn; #4E71
+乳 > rŭ; #4E73
+乴 > xué; #4E74
+乵 > yăn; #4E75
+乶 > fŭ; #4E76
+乷 > shā; #4E77
+乸 > nă; #4E78
+乹 > gān; #4E79
+乾 > gān; #4E7E
+乿 > chì; #4E7F
+亀 > gūi; #4E80
+亁 > gān; #4E81
+亂 > luàn; #4E82
+亃 > lín; #4E83
+亄 > yì; #4E84
+亅 > jué; #4E85
+了 > liăo; #4E86
+予 > yú; #4E88
+争 > zhēng; #4E89
+亊 > shì; #4E8A
+事 > shì; #4E8B
+二 > èr; #4E8C
+亍 > chù; #4E8D
+于 > yú; #4E8E
+亏 > yú; #4E8F
+亐 > yú; #4E90
+云 > yún; #4E91
+互 > hù; #4E92
+亓 > qí; #4E93
+五 > wŭ; #4E94
+井 > jĭng; #4E95
+亖 > sì; #4E96
+亗 > sùi; #4E97
+亘 > gèn; #4E98
+亙 > gèn; #4E99
+亚 > yà; #4E9A
+些 > xiē; #4E9B
+亜 > yà; #4E9C
+亝 > qí; #4E9D
+亞 > yà; #4E9E
+亟 > jí; #4E9F
+亠 > tóu; #4EA0
+亡 > wáng; #4EA1
+亢 > kàng; #4EA2
+亣 > tà; #4EA3
+交 > jiāo; #4EA4
+亥 > hài; #4EA5
+亦 > yì; #4EA6
+产 > chăn; #4EA7
+亨 > hēng; #4EA8
+亩 > mŭ; #4EA9
+享 > xiăng; #4EAB
+京 > jīng; #4EAC
+亭 > tíng; #4EAD
+亮 > liàng; #4EAE
+亯 > xiăng; #4EAF
+亰 > jīng; #4EB0
+亱 > yè; #4EB1
+亲 > qīn; #4EB2
+亳 > bó; #4EB3
+亴 > yòu; #4EB4
+亵 > xiè; #4EB5
+亶 > dăn; #4EB6
+亷 > lián; #4EB7
+亸 > dŭo; #4EB8
+亹 > wĕi; #4EB9
+人 > rén; #4EBA
+亻 > rén; #4EBB
+亼 > jí; #4EBC
+亾 > wáng; #4EBE
+亿 > yì; #4EBF
+什 > shí; #4EC0
+仁 > rén; #4EC1
+仂 > lè; #4EC2
+仃 > dīng; #4EC3
+仄 > zè; #4EC4
+仅 > jĭn; #4EC5
+仆 > pū; #4EC6
+仇 > chóu; #4EC7
+仈 > bā; #4EC8
+仉 > zhăng; #4EC9
+今 > jīn; #4ECA
+介 > jiè; #4ECB
+仌 > bīng; #4ECC
+仍 > réng; #4ECD
+从 > cóng; #4ECE
+仏 > fó; #4ECF
+仐 > săn; #4ED0
+仑 > lún; #4ED1
+仓 > cāng; #4ED3
+仔 > zĭ; #4ED4
+仕 > shì; #4ED5
+他 > tā; #4ED6
+仗 > zhàng; #4ED7
+付 > fù; #4ED8
+仙 > xiān; #4ED9
+仚 > xiān; #4EDA
+仛 > tūo; #4EDB
+仜 > hóng; #4EDC
+仝 > tóng; #4EDD
+仞 > rèn; #4EDE
+仟 > qiān; #4EDF
+仠 > gán; #4EE0
+仡 > yì; #4EE1
+仢 > dí; #4EE2
+代 > dài; #4EE3
+令 > lìng; #4EE4
+以 > yĭ; #4EE5
+仦 > chào; #4EE6
+仧 > cháng; #4EE7
+仨 > sā; #4EE8
+仪 > yí; #4EEA
+仫 > mù; #4EEB
+们 > men; #4EEC
+仭 > rèn; #4EED
+仮 > jiă; #4EEE
+仯 > chào; #4EEF
+仰 > yăng; #4EF0
+仱 > qián; #4EF1
+仲 > zhòng; #4EF2
+仳 > pĭ; #4EF3
+仴 > wàn; #4EF4
+仵 > wŭ; #4EF5
+件 > jiàn; #4EF6
+价 > jiè; #4EF7
+仸 > yăo; #4EF8
+仹 > fēng; #4EF9
+仺 > cāng; #4EFA
+任 > rèn; #4EFB
+仼 > wáng; #4EFC
+份 > fèn; #4EFD
+仾 > dī; #4EFE
+仿 > făng; #4EFF
+伀 > zhōng; #4F00
+企 > qĭ; #4F01
+伂 > pèi; #4F02
+伃 > yú; #4F03
+伄 > diào; #4F04
+伅 > dùn; #4F05
+伆 > wèn; #4F06
+伇 > yì; #4F07
+伈 > xĭn; #4F08
+伉 > kàng; #4F09
+伊 > yī; #4F0A
+伋 > jí; #4F0B
+伌 > ài; #4F0C
+伍 > wŭ; #4F0D
+伎 > jì; #4F0E
+伏 > fú; #4F0F
+伐 > fá; #4F10
+休 > xīu; #4F11
+伒 > jìn; #4F12
+伓 > bēi; #4F13
+伔 > dăn; #4F14
+伕 > fū; #4F15
+伖 > tăng; #4F16
+众 > zhòng; #4F17
+优 > yōu; #4F18
+伙 > hŭo; #4F19
+会 > hùi; #4F1A
+伛 > yŭ; #4F1B
+伜 > cùi; #4F1C
+伝 > chuán; #4F1D
+伞 > săn; #4F1E
+伟 > wĕi; #4F1F
+传 > chuán; #4F20
+伡 > chē; #4F21
+伢 > yá; #4F22
+伣 > xiàn; #4F23
+伤 > shāng; #4F24
+伥 > chāng; #4F25
+伦 > lún; #4F26
+伧 > cāng; #4F27
+伨 > xùn; #4F28
+伩 > xìn; #4F29
+伪 > wĕi; #4F2A
+伫 > zhù; #4F2B
+伭 > xuán; #4F2D
+伮 > nú; #4F2E
+伯 > bó; #4F2F
+估 > gū; #4F30
+伱 > nĭ; #4F31
+伲 > nĭ; #4F32
+伳 > xiè; #4F33
+伴 > bàn; #4F34
+伵 > xù; #4F35
+伶 > líng; #4F36
+伷 > zhòu; #4F37
+伸 > shēn; #4F38
+伹 > qū; #4F39
+伺 > sì; #4F3A
+伻 > bēng; #4F3B
+似 > sì; #4F3C
+伽 > jiā; #4F3D
+伾 > pī; #4F3E
+伿 > yì; #4F3F
+佀 > sì; #4F40
+佁 > ăi; #4F41
+佂 > zhēng; #4F42
+佃 > diàn; #4F43
+佄 > hán; #4F44
+佅 > mài; #4F45
+但 > dàn; #4F46
+佇 > zhù; #4F47
+佈 > bù; #4F48
+佉 > qū; #4F49
+佊 > bĭ; #4F4A
+佋 > shào; #4F4B
+佌 > cĭ; #4F4C
+位 > wèi; #4F4D
+低 > dī; #4F4E
+住 > zhù; #4F4F
+佐 > zŭo; #4F50
+佑 > yòu; #4F51
+佒 > yāng; #4F52
+体 > tĭ; #4F53
+佔 > zhàn; #4F54
+何 > hé; #4F55
+佖 > bì; #4F56
+佗 > tūo; #4F57
+佘 > shé; #4F58
+余 > yú; #4F59
+佚 > yì; #4F5A
+佛 > fó; #4F5B
+作 > zùo; #4F5C
+佝 > kòu; #4F5D
+佞 > nìng; #4F5E
+佟 > tóng; #4F5F
+你 > nĭ; #4F60
+佡 > xuān; #4F61
+佢 > qú; #4F62
+佣 > yòng; #4F63
+佤 > wă; #4F64
+佥 > qiān; #4F65
+佧 > kă; #4F67
+佩 > pèi; #4F69
+佪 > huái; #4F6A
+佫 > hè; #4F6B
+佬 > lăo; #4F6C
+佭 > xiáng; #4F6D
+佮 > gé; #4F6E
+佯 > yáng; #4F6F
+佰 > băi; #4F70
+佱 > fă; #4F71
+佲 > míng; #4F72
+佳 > jia; #4F73
+佴 > èr; #4F74
+併 > bìng; #4F75
+佶 > jí; #4F76
+佷 > hĕn; #4F77
+佸 > húo; #4F78
+佹 > gŭi; #4F79
+佺 > quán; #4F7A
+佻 > tiāo; #4F7B
+佼 > jiăo; #4F7C
+佽 > cì; #4F7D
+佾 > yì; #4F7E
+使 > shĭ; #4F7F
+侀 > xíng; #4F80
+侁 > shēn; #4F81
+侂 > tūo; #4F82
+侃 > kăn; #4F83
+侄 > zhí; #4F84
+侅 > gāi; #4F85
+來 > lái; #4F86
+侇 > yí; #4F87
+侈 > chĭ; #4F88
+侉 > kuā; #4F89
+侊 > guāng; #4F8A
+例 > lì; #4F8B
+侌 > yīn; #4F8C
+侍 > shì; #4F8D
+侎 > mĭ; #4F8E
+侏 > zhū; #4F8F
+侐 > xù; #4F90
+侑 > yòu; #4F91
+侒 > ān; #4F92
+侓 > lù; #4F93
+侔 > móu; #4F94
+侕 > ér; #4F95
+侖 > lún; #4F96
+侗 > tóng; #4F97
+侘 > chà; #4F98
+侙 > chì; #4F99
+侚 > xùn; #4F9A
+供 > gōng; #4F9B
+侜 > zhōu; #4F9C
+依 > yī; #4F9D
+侞 > rŭ; #4F9E
+侟 > jiàn; #4F9F
+侠 > xiá; #4FA0
+価 > jià; #4FA1
+侢 > zài; #4FA2
+侣 > lǚ; #4FA3
+侥 > jiăo; #4FA5
+侦 > zhēn; #4FA6
+侧 > cè; #4FA7
+侨 > qiáo; #4FA8
+侩 > kuài; #4FA9
+侪 > chái; #4FAA
+侫 > nìng; #4FAB
+侬 > nóng; #4FAC
+侭 > jĭn; #4FAD
+侮 > wŭ; #4FAE
+侯 > hóu; #4FAF
+侰 > jĭong; #4FB0
+侱 > chĕng; #4FB1
+侲 > zhèn; #4FB2
+侳 > zùo; #4FB3
+侴 > chŏu; #4FB4
+侵 > qīn; #4FB5
+侶 > lǚ; #4FB6
+侷 > jú; #4FB7
+侸 > shù; #4FB8
+侹 > tĭng; #4FB9
+侺 > shèn; #4FBA
+侻 > tūo; #4FBB
+侼 > bó; #4FBC
+侽 > nán; #4FBD
+侾 > hāo; #4FBE
+便 > biàn; #4FBF
+俀 > tŭi; #4FC0
+俁 > yŭ; #4FC1
+係 > xì; #4FC2
+促 > cù; #4FC3
+俄 > é; #4FC4
+俅 > qíu; #4FC5
+俆 > xú; #4FC6
+俇 > kuăng; #4FC7
+俈 > kù; #4FC8
+俉 > wù; #4FC9
+俊 > jùn; #4FCA
+俋 > yì; #4FCB
+俌 > fŭ; #4FCC
+俍 > láng; #4FCD
+俎 > zŭ; #4FCE
+俏 > qiào; #4FCF
+俐 > lì; #4FD0
+俑 > yŏng; #4FD1
+俒 > hùn; #4FD2
+俓 > jìng; #4FD3
+俔 > xiàn; #4FD4
+俕 > sàn; #4FD5
+俖 > păi; #4FD6
+俗 > sú; #4FD7
+俘 > fú; #4FD8
+俙 > xī; #4FD9
+俚 > lĭ; #4FDA
+俛 > fŭ; #4FDB
+俜 > pīng; #4FDC
+保 > băo; #4FDD
+俞 > yú; #4FDE
+俟 > sì; #4FDF
+俠 > xiá; #4FE0
+信 > xìn; #4FE1
+俢 > xīu; #4FE2
+俣 > yŭ; #4FE3
+俤 > tì; #4FE4
+俥 > chē; #4FE5
+俦 > chóu; #4FE6
+俨 > yăn; #4FE8
+俩 > liă; #4FE9
+俪 > lì; #4FEA
+俫 > lái; #4FEB
+俭 > jiăn; #4FED
+修 > xīu; #4FEE
+俯 > fŭ; #4FEF
+俰 > hè; #4FF0
+俱 > jù; #4FF1
+俲 > xiào; #4FF2
+俳 > pái; #4FF3
+俴 > jiàn; #4FF4
+俵 > biào; #4FF5
+俶 > chù; #4FF6
+俷 > fèi; #4FF7
+俸 > fèng; #4FF8
+俹 > yà; #4FF9
+俺 > ăn; #4FFA
+俻 > bèi; #4FFB
+俼 > yù; #4FFC
+俽 > xīn; #4FFD
+俾 > bĭ; #4FFE
+俿 > jiàn; #4FFF
+倀 > chāng; #5000
+倁 > chí; #5001
+倂 > bìng; #5002
+倃 > zán; #5003
+倄 > yáo; #5004
+倅 > cùi; #5005
+倆 > liă; #5006
+倇 > wăn; #5007
+倈 > lái; #5008
+倉 > cāng; #5009
+倊 > zòng; #500A
+個 > gè; #500B
+倌 > guān; #500C
+倍 > bèi; #500D
+倎 > tiān; #500E
+倏 > shū; #500F
+倐 > shū; #5010
+們 > men; #5011
+倒 > dăo; #5012
+倓 > tán; #5013
+倔 > jué; #5014
+倕 > chúi; #5015
+倖 > xìng; #5016
+倗 > péng; #5017
+倘 > tăng; #5018
+候 > hòu; #5019
+倚 > yĭ; #501A
+倛 > qī; #501B
+倜 > tì; #501C
+倝 > gàn; #501D
+倞 > jìng; #501E
+借 > jiè; #501F
+倠 > sūi; #5020
+倡 > chàng; #5021
+倢 > jié; #5022
+倣 > făng; #5023
+値 > zhí; #5024
+倥 > kōng; #5025
+倦 > juàn; #5026
+倧 > zōng; #5027
+倨 > jù; #5028
+倩 > qiàn; #5029
+倪 > ní; #502A
+倫 > lún; #502B
+倬 > zhūo; #502C
+倭 > wēi; #502D
+倮 > lŭo; #502E
+倯 > sōng; #502F
+倰 > léng; #5030
+倱 > hùn; #5031
+倲 > dōng; #5032
+倳 > zì; #5033
+倴 > bèn; #5034
+倵 > wŭ; #5035
+倶 > jù; #5036
+倷 > nài; #5037
+倸 > căi; #5038
+倹 > jiăn; #5039
+债 > zhài; #503A
+倻 > yē; #503B
+值 > zhí; #503C
+倽 > shà; #503D
+倾 > qīng; #503E
+偀 > yīng; #5040
+偁 > chēng; #5041
+偂 > jiān; #5042
+偃 > yăn; #5043
+偄 > nuàn; #5044
+偅 > zhòng; #5045
+偆 > chŭn; #5046
+假 > jiă; #5047
+偈 > jié; #5048
+偉 > wĕi; #5049
+偊 > yŭ; #504A
+偋 > bĭng; #504B
+偌 > rùo; #504C
+偍 > tí; #504D
+偎 > wēi; #504E
+偏 > piān; #504F
+偐 > yàn; #5050
+偑 > fēng; #5051
+偒 > tăng; #5052
+偓 > wò; #5053
+偔 > è; #5054
+偕 > xié; #5055
+偖 > chĕ; #5056
+偗 > shĕng; #5057
+偘 > kăn; #5058
+偙 > dì; #5059
+做 > zùo; #505A
+偛 > chā; #505B
+停 > tíng; #505C
+偝 > bèi; #505D
+偞 > yè; #505E
+偟 > huáng; #505F
+偠 > yăo; #5060
+偡 > zhàn; #5061
+偢 > chŏu; #5062
+偣 > yān; #5063
+偤 > yŏu; #5064
+健 > jiàn; #5065
+偦 > xū; #5066
+偧 > zhā; #5067
+偨 > cī; #5068
+偩 > fù; #5069
+偪 > bī; #506A
+偫 > zhì; #506B
+偬 > zŏng; #506C
+偭 > miăn; #506D
+偮 > jí; #506E
+偯 > yĭ; #506F
+偰 > xiè; #5070
+偱 > xún; #5071
+偲 > sī; #5072
+偳 > duān; #5073
+側 > cè; #5074
+偵 > zhēn; #5075
+偶 > ŏu; #5076
+偷 > tōu; #5077
+偸 > tōu; #5078
+偹 > bèi; #5079
+偺 > zá; #507A
+偻 > lǚ; #507B
+偼 > jié; #507C
+偽 > wĕi; #507D
+偾 > fèn; #507E
+偿 > cháng; #507F
+傀 > gūi; #5080
+傁 > sŏu; #5081
+傂 > zhì; #5082
+傃 > sù; #5083
+傄 > xiā; #5084
+傅 > fù; #5085
+傆 > yuàn; #5086
+傇 > rŏng; #5087
+傈 > lì; #5088
+傉 > rù; #5089
+傊 > yŭn; #508A
+傋 > gòu; #508B
+傌 > mà; #508C
+傍 > bàng; #508D
+傎 > diān; #508E
+傏 > táng; #508F
+傐 > hào; #5090
+傑 > jié; #5091
+傒 > xī; #5092
+傓 > shàn; #5093
+傔 > qiàn; #5094
+傕 > jué; #5095
+傖 > cāng; #5096
+傗 > chù; #5097
+傘 > săn; #5098
+備 > bèi; #5099
+傚 > xiào; #509A
+傛 > yŏng; #509B
+傜 > yáo; #509C
+傝 > tàn; #509D
+傞 > sūo; #509E
+傟 > yăng; #509F
+傠 > fā; #50A0
+傡 > bìng; #50A1
+傢 > jiā; #50A2
+傣 > dăi; #50A3
+傤 > zài; #50A4
+傥 > tăng; #50A5
+傧 > bìn; #50A7
+储 > chŭ; #50A8
+傩 > núo; #50A9
+傪 > cān; #50AA
+傫 > lĕi; #50AB
+催 > cūi; #50AC
+傭 > yōng; #50AD
+傮 > zāo; #50AE
+傯 > zŏng; #50AF
+傰 > péng; #50B0
+傱 > sŏng; #50B1
+傲 > ào; #50B2
+傳 > chuán; #50B3
+傴 > yŭ; #50B4
+債 > zhài; #50B5
+傶 > còu; #50B6
+傷 > shāng; #50B7
+傸 > qiăng; #50B8
+傹 > jìng; #50B9
+傺 > chì; #50BA
+傻 > shă; #50BB
+傼 > hàn; #50BC
+傽 > zhāng; #50BD
+傾 > qīng; #50BE
+傿 > yàn; #50BF
+僀 > dì; #50C0
+僁 > xī; #50C1
+僂 > lǚ; #50C2
+僃 > bèi; #50C3
+僄 > piào; #50C4
+僅 > jĭn; #50C5
+僆 > lián; #50C6
+僇 > lù; #50C7
+僈 > màn; #50C8
+僉 > qiān; #50C9
+僊 > xiān; #50CA
+僋 > tàn; #50CB
+僌 > yíng; #50CC
+働 > dòng; #50CD
+僎 > zhuàn; #50CE
+像 > xiàng; #50CF
+僐 > shàn; #50D0
+僑 > qiáo; #50D1
+僒 > jĭong; #50D2
+僓 > tŭi; #50D3
+僔 > zŭn; #50D4
+僕 > pú; #50D5
+僖 > xī; #50D6
+僗 > láo; #50D7
+僘 > chăng; #50D8
+僙 > guāng; #50D9
+僚 > liáo; #50DA
+僛 > qī; #50DB
+僜 > dèng; #50DC
+僝 > chán; #50DD
+僞 > wĕi; #50DE
+僟 > jī; #50DF
+僠 > fān; #50E0
+僡 > hùi; #50E1
+僢 > chuăn; #50E2
+僣 > jiàn; #50E3
+僤 > dàn; #50E4
+僥 > jiăo; #50E5
+僦 > jìu; #50E6
+僧 > sēng; #50E7
+僨 > fèn; #50E8
+僩 > xiàn; #50E9
+僪 > jué; #50EA
+僫 > è; #50EB
+僬 > jiāo; #50EC
+僭 > jiàn; #50ED
+僮 > tóng; #50EE
+僯 > lĭn; #50EF
+僰 > bó; #50F0
+僱 > gù; #50F1
+僳 > sù; #50F3
+僴 > xiàn; #50F4
+僵 > jiāng; #50F5
+僶 > mĭn; #50F6
+僷 > yè; #50F7
+僸 > jìn; #50F8
+價 > jià; #50F9
+僺 > qiào; #50FA
+僻 > pì; #50FB
+僼 > fēng; #50FC
+僽 > zhòu; #50FD
+僾 > ài; #50FE
+僿 > sài; #50FF
+儀 > yí; #5100
+儁 > jùn; #5101
+儂 > nóng; #5102
+儃 > chán; #5103
+億 > yì; #5104
+儅 > dāng; #5105
+儆 > jĭng; #5106
+儇 > xuān; #5107
+儈 > kuài; #5108
+儉 > jiăn; #5109
+儊 > chù; #510A
+儋 > dān; #510B
+儌 > jiăo; #510C
+儍 > shă; #510D
+儎 > zài; #510E
+儐 > bìn; #5110
+儑 > àn; #5111
+儒 > rú; #5112
+儓 > tái; #5113
+儔 > chóu; #5114
+儕 > chái; #5115
+儖 > lán; #5116
+儗 > nĭ; #5117
+儘 > jĭn; #5118
+儙 > qiàn; #5119
+儚 > méng; #511A
+儛 > wŭ; #511B
+儜 > níng; #511C
+儝 > qíong; #511D
+儞 > nĭ; #511E
+償 > cháng; #511F
+儠 > liè; #5120
+儡 > lĕi; #5121
+儢 > lǚ; #5122
+儣 > kuàng; #5123
+儤 > bào; #5124
+儥 > dú; #5125
+儦 > biāo; #5126
+儧 > zăn; #5127
+儨 > zhí; #5128
+儩 > sì; #5129
+優 > yōu; #512A
+儫 > háo; #512B
+儬 > chèn; #512C
+儭 > chèn; #512D
+儮 > lì; #512E
+儯 > téng; #512F
+儰 > wĕi; #5130
+儱 > lŏng; #5131
+儲 > chŭ; #5132
+儳 > chàn; #5133
+儴 > ráng; #5134
+儵 > shū; #5135
+儶 > hùi; #5136
+儷 > lì; #5137
+儸 > lúo; #5138
+儹 > zăn; #5139
+儺 > núo; #513A
+儻 > tăng; #513B
+儼 > yăn; #513C
+儽 > lĕi; #513D
+儾 > nàng; #513E
+儿 > ér; #513F
+兀 > wù; #5140
+允 > yŭn; #5141
+兂 > zān; #5142
+元 > yuán; #5143
+兄 > xīong; #5144
+充 > chōng; #5145
+兆 > zhào; #5146
+兇 > xīong; #5147
+先 > xiān; #5148
+光 > guāng; #5149
+兊 > dùi; #514A
+克 > kè; #514B
+兌 > dùi; #514C
+免 > miăn; #514D
+兎 > tù; #514E
+兏 > cháng; #514F
+児 > ér; #5150
+兑 > dùi; #5151
+兒 > ér; #5152
+兓 > xīn; #5153
+兔 > tù; #5154
+兕 > sì; #5155
+兖 > yăn; #5156
+兗 > yăn; #5157
+兘 > shĭ; #5158
+兙 > shí' 'kè; #5159
+党 > dăng; #515A
+兛 > qiān; #515B
+兜 > dōu; #515C
+兝 > fēn; #515D
+兞 > máo; #515E
+兟 > shēn; #515F
+兠 > dōu; #5160
+兡 > băi' 'kè; #5161
+兢 > jīng; #5162
+兣 > lĭ; #5163
+兤 > huáng; #5164
+入 > rù; #5165
+兦 > wáng; #5166
+內 > nèi; #5167
+全 > quán; #5168
+兩 > liăng; #5169
+兪 > yú; #516A
+八 > bā; #516B
+公 > gōng; #516C
+六 > lìu; #516D
+兮 > xī; #516E
+兰 > lán; #5170
+共 > gòng; #5171
+兲 > tiān; #5172
+关 > guān; #5173
+兴 > xīng; #5174
+兵 > bīng; #5175
+其 > qí; #5176
+具 > jù; #5177
+典 > diăn; #5178
+兹 > zī; #5179
+养 > yăng; #517B
+兼 > jiān; #517C
+兽 > shòu; #517D
+兾 > jì; #517E
+兿 > yì; #517F
+冀 > jì; #5180
+冁 > chăn; #5181
+冂 > jīong; #5182
+冃 > mao; #5183
+冄 > răn; #5184
+内 > nèi; #5185
+円 > yuan; #5186
+冇 > măo; #5187
+冈 > gāng; #5188
+冉 > răn; #5189
+冊 > cè; #518A
+冋 > jīong; #518B
+册 > cè; #518C
+再 > zài; #518D
+冎 > guă; #518E
+冏 > jĭong; #518F
+冐 > mào; #5190
+冑 > zhòu; #5191
+冒 > mòu; #5192
+冓 > gòu; #5193
+冔 > xŭ; #5194
+冕 > miăn; #5195
+冖 > mì; #5196
+冗 > rŏng; #5197
+冘 > yín; #5198
+写 > xiĕ; #5199
+冚 > kăn; #519A
+军 > jūn; #519B
+农 > nóng; #519C
+冝 > yí; #519D
+冞 > mí; #519E
+冟 > shì; #519F
+冠 > guān; #51A0
+冡 > méng; #51A1
+冢 > zhŏng; #51A2
+冣 > jù; #51A3
+冤 > yuān; #51A4
+冥 > míng; #51A5
+冦 > kòu; #51A6
+冨 > fù; #51A8
+冩 > xiĕ; #51A9
+冪 > mì; #51AA
+冫 > bīng; #51AB
+冬 > dōng; #51AC
+冭 > tái; #51AD
+冮 > gāng; #51AE
+冯 > féng; #51AF
+冰 > bīng; #51B0
+冱 > hù; #51B1
+冲 > chōng; #51B2
+决 > jué; #51B3
+冴 > hù; #51B4
+况 > kuàng; #51B5
+冶 > yĕ; #51B6
+冷 > lĕng; #51B7
+冸 > pàn; #51B8
+冹 > fú; #51B9
+冺 > mĭn; #51BA
+冻 > dòng; #51BB
+冼 > xiăn; #51BC
+冽 > liè; #51BD
+冾 > xiá; #51BE
+冿 > jiān; #51BF
+净 > jìng; #51C0
+凁 > shù; #51C1
+凂 > mĕi; #51C2
+凃 > tú; #51C3
+凄 > qī; #51C4
+凅 > gù; #51C5
+准 > zhŭn; #51C6
+凇 > sòng; #51C7
+凈 > jìng; #51C8
+凉 > liáng; #51C9
+凊 > qìng; #51CA
+凋 > diāo; #51CB
+凌 > líng; #51CC
+凍 > dòng; #51CD
+凎 > gàn; #51CE
+减 > jiăn; #51CF
+凐 > yīn; #51D0
+凑 > còu; #51D1
+凒 > yí; #51D2
+凓 > lì; #51D3
+凔 > cāng; #51D4
+凕 > mĭng; #51D5
+凖 > zhuĕn; #51D6
+凗 > cúi; #51D7
+凘 > sī; #51D8
+凙 > dúo; #51D9
+凚 > jìn; #51DA
+凛 > lĭn; #51DB
+凜 > lĭn; #51DC
+凝 > níng; #51DD
+凞 > xī; #51DE
+凟 > dú; #51DF
+几 > jī; #51E0
+凡 > fán; #51E1
+凢 > fán; #51E2
+凣 > fán; #51E3
+凤 > fèng; #51E4
+凥 > jū; #51E5
+処 > chŭ; #51E6
+凨 > fēng; #51E8
+凫 > fú; #51EB
+凬 > fēng; #51EC
+凭 > píng; #51ED
+凮 > fēng; #51EE
+凯 > kăi; #51EF
+凰 > huáng; #51F0
+凱 > kăi; #51F1
+凲 > gān; #51F2
+凳 > dèng; #51F3
+凴 > píng; #51F4
+凵 > qū; #51F5
+凶 > xīong; #51F6
+凷 > kuài; #51F7
+凸 > tū; #51F8
+凹 > āo; #51F9
+出 > chū; #51FA
+击 > jí; #51FB
+凼 > dàng; #51FC
+函 > hán; #51FD
+凾 > hán; #51FE
+凿 > záo; #51FF
+刀 > dāo; #5200
+刁 > diāo; #5201
+刂 > dāo; #5202
+刃 > rèn; #5203
+刄 > rèn; #5204
+刅 > chuāng; #5205
+分 > fēn; #5206
+切 > qiē; #5207
+刈 > yì; #5208
+刉 > jī; #5209
+刊 > kān; #520A
+刋 > qiàn; #520B
+刌 > cŭn; #520C
+刍 > chú; #520D
+刎 > wĕn; #520E
+刏 > jī; #520F
+刐 > dăn; #5210
+刑 > xíng; #5211
+划 > huá; #5212
+刓 > wán; #5213
+刔 > jué; #5214
+刕 > lí; #5215
+刖 > yuè; #5216
+列 > liè; #5217
+刘 > líu; #5218
+则 > zé; #5219
+刚 > gāng; #521A
+创 > chuàng; #521B
+刜 > fú; #521C
+初 > chū; #521D
+刞 > qù; #521E
+刟 > jū; #521F
+删 > shān; #5220
+刡 > mĭn; #5221
+刢 > líng; #5222
+刣 > zhōng; #5223
+判 > pàn; #5224
+別 > bié; #5225
+刦 > jié; #5226
+刧 > jié; #5227
+刨 > bào; #5228
+利 > lì; #5229
+刪 > shān; #522A
+别 > bié; #522B
+刬 > chăn; #522C
+刭 > jĭng; #522D
+刮 > guā; #522E
+刯 > gēn; #522F
+到 > dào; #5230
+刱 > chuàng; #5231
+刲 > kūi; #5232
+刳 > kū; #5233
+刴 > dùo; #5234
+刵 > èr; #5235
+制 > zhì; #5236
+刷 > shuā; #5237
+券 > quàn; #5238
+刹 > chà; #5239
+刺 > cì; #523A
+刻 > kè; #523B
+刼 > jié; #523C
+刽 > gùi; #523D
+刾 > cì; #523E
+刿 > gùi; #523F
+剀 > kăi; #5240
+剁 > dùo; #5241
+剂 > jì; #5242
+剃 > tì; #5243
+剄 > jĭng; #5244
+剅 > lóu; #5245
+剆 > gēn; #5246
+則 > zé; #5247
+剈 > yuān; #5248
+剉 > cùo; #5249
+削 > xuē; #524A
+剋 > kè; #524B
+剌 > là; #524C
+前 > qián; #524D
+剎 > chà; #524E
+剏 > chuàng; #524F
+剐 > guă; #5250
+剑 > jiàn; #5251
+剒 > cùo; #5252
+剓 > lí; #5253
+剔 > tī; #5254
+剕 > fèi; #5255
+剖 > pōu; #5256
+剗 > chăn; #5257
+剘 > qí; #5258
+剙 > chuàng; #5259
+剚 > zì; #525A
+剛 > gāng; #525B
+剜 > wān; #525C
+剝 > bō; #525D
+剞 > jī; #525E
+剟 > dūo; #525F
+剠 > qíng; #5260
+剡 > yăn; #5261
+剢 > zhúo; #5262
+剣 > jiàn; #5263
+剤 > jì; #5264
+剥 > bō; #5265
+剦 > yān; #5266
+剧 > jù; #5267
+剨 > hùo; #5268
+剩 > shèng; #5269
+剪 > jiăn; #526A
+剫 > dúo; #526B
+剬 > duān; #526C
+剭 > wū; #526D
+剮 > guă; #526E
+副 > fù; #526F
+剰 > shèng; #5270
+剱 > jiàn; #5271
+割 > gē; #5272
+剳 > zhā; #5273
+剴 > kăi; #5274
+創 > chuàng; #5275
+剶 > juān; #5276
+剷 > chăn; #5277
+剸 > tuán; #5278
+剹 > lù; #5279
+剺 > lí; #527A
+剻 > fóu; #527B
+剼 > shān; #527C
+剽 > piào; #527D
+剾 > kōu; #527E
+剿 > jiăo; #527F
+劀 > guā; #5280
+劁 > qiāo; #5281
+劂 > jué; #5282
+劃 > huà; #5283
+劄 > zhá; #5284
+劅 > zhùo; #5285
+劆 > lián; #5286
+劇 > jù; #5287
+劈 > pī; #5288
+劉 > líu; #5289
+劊 > gùi; #528A
+劋 > jiăo; #528B
+劌 > gùi; #528C
+劍 > jiàn; #528D
+劎 > jiàn; #528E
+劏 > tāng; #528F
+劐 > hūo; #5290
+劑 > jì; #5291
+劒 > jiàn; #5292
+劓 > yì; #5293
+劔 > jiàn; #5294
+劕 > zhí; #5295
+劖 > chán; #5296
+劗 > cuán; #5297
+劘 > mó; #5298
+劙 > lí; #5299
+劚 > zhú; #529A
+力 > lì; #529B
+劜 > yā; #529C
+劝 > quàn; #529D
+办 > bàn; #529E
+功 > gōng; #529F
+加 > jiā; #52A0
+务 > wù; #52A1
+劢 > mài; #52A2
+劣 > liè; #52A3
+劤 > jìn; #52A4
+劥 > kēng; #52A5
+劦 > xié; #52A6
+劧 > zhĭ; #52A7
+动 > dòng; #52A8
+助 > zhù; #52A9
+努 > nŭ; #52AA
+劫 > jié; #52AB
+劬 > qú; #52AC
+劭 > shào; #52AD
+劮 > yì; #52AE
+劯 > zhū; #52AF
+劰 > miăo; #52B0
+励 > lì; #52B1
+劲 > jìng; #52B2
+劳 > láo; #52B3
+労 > láo; #52B4
+劵 > juàn; #52B5
+劶 > kŏu; #52B6
+劷 > yáng; #52B7
+劸 > wā; #52B8
+効 > xiào; #52B9
+劺 > móu; #52BA
+劻 > kuāng; #52BB
+劼 > jié; #52BC
+劽 > liè; #52BD
+劾 > hé; #52BE
+势 > shì; #52BF
+勀 > kè; #52C0
+勁 > jìng; #52C1
+勂 > háo; #52C2
+勃 > bó; #52C3
+勄 > mĭn; #52C4
+勅 > chì; #52C5
+勆 > láng; #52C6
+勇 > yŏng; #52C7
+勈 > yŏng; #52C8
+勉 > miăn; #52C9
+勊 > kè; #52CA
+勋 > xūn; #52CB
+勌 > juàn; #52CC
+勍 > qíng; #52CD
+勎 > lù; #52CE
+勏 > pŏu; #52CF
+勐 > mĕng; #52D0
+勑 > lài; #52D1
+勒 > lè; #52D2
+勓 > kài; #52D3
+勔 > miăn; #52D4
+動 > dòng; #52D5
+勖 > xù; #52D6
+勗 > xù; #52D7
+勘 > kān; #52D8
+務 > wù; #52D9
+勚 > yì; #52DA
+勛 > xūn; #52DB
+勜 > wĕng; #52DC
+勝 > shèng; #52DD
+勞 > láo; #52DE
+募 > mù; #52DF
+勠 > lù; #52E0
+勡 > piào; #52E1
+勢 > shì; #52E2
+勣 > jī; #52E3
+勤 > qín; #52E4
+勥 > qiăng; #52E5
+勦 > jiăo; #52E6
+勧 > quàn; #52E7
+勨 > yăng; #52E8
+勩 > yì; #52E9
+勪 > jué; #52EA
+勫 > fán; #52EB
+勬 > juàn; #52EC
+勭 > tóng; #52ED
+勮 > jù; #52EE
+勯 > dān; #52EF
+勰 > xié; #52F0
+勱 > mài; #52F1
+勲 > xūn; #52F2
+勳 > xūn; #52F3
+勴 > lǜ; #52F4
+勵 > lì; #52F5
+勶 > chè; #52F6
+勷 > ráng; #52F7
+勸 > quàn; #52F8
+勹 > bāo; #52F9
+勺 > sháo; #52FA
+勻 > yún; #52FB
+勼 > jīu; #52FC
+勽 > bào; #52FD
+勾 > gōu; #52FE
+勿 > wù; #52FF
+匀 > yún; #5300
+匃 > gài; #5303
+匄 > gài; #5304
+包 > bāo; #5305
+匆 > cōng; #5306
+匈 > xīong; #5308
+匉 > pēng; #5309
+匊 > jú; #530A
+匋 > táo; #530B
+匌 > gé; #530C
+匍 > pú; #530D
+匎 > àn; #530E
+匏 > páo; #530F
+匐 > fú; #5310
+匑 > gōng; #5311
+匒 > dá; #5312
+匓 > jìu; #5313
+匔 > qīong; #5314
+匕 > bĭ; #5315
+化 > huà; #5316
+北 > bĕi; #5317
+匘 > năo; #5318
+匙 > chí; #5319
+匚 > fāng; #531A
+匛 > jìu; #531B
+匜 > yí; #531C
+匝 > zā; #531D
+匞 > jiàng; #531E
+匟 > kàng; #531F
+匠 > jiàng; #5320
+匡 > kuāng; #5321
+匢 > hū; #5322
+匣 > xiá; #5323
+匤 > qū; #5324
+匥 > biàn; #5325
+匦 > gŭi; #5326
+匧 > qiè; #5327
+匨 > zāng; #5328
+匩 > kuāng; #5329
+匪 > fĕi; #532A
+匫 > hū; #532B
+匬 > tóu; #532C
+匭 > gŭi; #532D
+匮 > gùi; #532E
+匯 > hùi; #532F
+匰 > dān; #5330
+匱 > gùi; #5331
+匲 > lián; #5332
+匳 > lián; #5333
+匴 > suăn; #5334
+匵 > dú; #5335
+匶 > jìu; #5336
+匷 > qú; #5337
+匸 > xĭ; #5338
+匹 > pĭ; #5339
+区 > qū; #533A
+医 > yì; #533B
+匼 > qià; #533C
+匽 > yăn; #533D
+匾 > biăn; #533E
+匿 > nì; #533F
+區 > qū; #5340
+十 > shí; #5341
+卂 > xìn; #5342
+千 > qiān; #5343
+卄 > niàn; #5344
+卅 > sà; #5345
+卆 > zú; #5346
+升 > shēng; #5347
+午 > wŭ; #5348
+卉 > hùi; #5349
+半 > bàn; #534A
+卋 > shì; #534B
+卌 > xì; #534C
+卍 > wàn; #534D
+华 > huá; #534E
+协 > xié; #534F
+卐 > wàn; #5350
+卑 > bēi; #5351
+卒 > zú; #5352
+卓 > zhūo; #5353
+協 > xié; #5354
+单 > dān; #5355
+卖 > mài; #5356
+南 > nán; #5357
+単 > dān; #5358
+卙 > jí; #5359
+博 > bó; #535A
+卛 > shuài; #535B
+卜 > bŭ; #535C
+卝 > kuàng; #535D
+卞 > biàn; #535E
+卟 > bŭ; #535F
+占 > zhān; #5360
+卡 > qiă; #5361
+卢 > lú; #5362
+卣 > yŏu; #5363
+卤 > lŭ; #5364
+卥 > xī; #5365
+卦 > guà; #5366
+卧 > wò; #5367
+卨 > xiè; #5368
+卩 > jié; #5369
+卪 > jié; #536A
+卫 > wèi; #536B
+卬 > áng; #536C
+卭 > qíong; #536D
+卮 > zhī; #536E
+卯 > măo; #536F
+印 > yìn; #5370
+危 > wēi; #5371
+卲 > shào; #5372
+即 > jí; #5373
+却 > què; #5374
+卵 > luăn; #5375
+卶 > shì; #5376
+卷 > juàn; #5377
+卸 > xiè; #5378
+卹 > xù; #5379
+卺 > jĭn; #537A
+卻 > què; #537B
+卼 > wù; #537C
+卽 > jí; #537D
+卾 > è; #537E
+卿 > qīng; #537F
+厀 > xī; #5380
+厂 > hàn; #5382
+厃 > zhān; #5383
+厄 > è; #5384
+厅 > tīng; #5385
+历 > lì; #5386
+厇 > zhé; #5387
+厈 > hăn; #5388
+厉 > lì; #5389
+厊 > yă; #538A
+压 > yā; #538B
+厌 > yàn; #538C
+厍 > shè; #538D
+厎 > zhĭ; #538E
+厏 > zhă; #538F
+厐 > páng; #5390
+厒 > hé; #5392
+厓 > yá; #5393
+厔 > zhì; #5394
+厕 > cè; #5395
+厖 > páng; #5396
+厗 > tí; #5397
+厘 > lí; #5398
+厙 > shè; #5399
+厚 > hòu; #539A
+厛 > tīng; #539B
+厜 > zūi; #539C
+厝 > cùo; #539D
+厞 > fèi; #539E
+原 > yuán; #539F
+厠 > cè; #53A0
+厡 > yuán; #53A1
+厢 > xiāng; #53A2
+厣 > yăn; #53A3
+厤 > lì; #53A4
+厥 > jué; #53A5
+厦 > shà; #53A6
+厧 > diān; #53A7
+厨 > chú; #53A8
+厩 > jìu; #53A9
+厪 > qín; #53AA
+厫 > áo; #53AB
+厬 > gŭi; #53AC
+厭 > yàn; #53AD
+厮 > sī; #53AE
+厯 > lì; #53AF
+厰 > chăng; #53B0
+厱 > lán; #53B1
+厲 > lì; #53B2
+厳 > yán; #53B3
+厴 > yăn; #53B4
+厵 > yuán; #53B5
+厶 > sī; #53B6
+厷 > gōng; #53B7
+厸 > lín; #53B8
+厹 > qíu; #53B9
+厺 > qù; #53BA
+去 > qù; #53BB
+厽 > lĕi; #53BD
+厾 > dū; #53BE
+县 > xiàn; #53BF
+叀 > zhuān; #53C0
+叁 > sān; #53C1
+参 > cān; #53C2
+參 > cān; #53C3
+叄 > cān; #53C4
+叅 > cān; #53C5
+叆 > ài; #53C6
+叇 > dài; #53C7
+又 > yòu; #53C8
+叉 > cha; #53C9
+及 > jí; #53CA
+友 > yŏu; #53CB
+双 > shuāng; #53CC
+反 > făn; #53CD
+収 > shōu; #53CE
+叏 > guài; #53CF
+叐 > bá; #53D0
+发 > fā; #53D1
+叒 > rùo; #53D2
+叓 > shì; #53D3
+叔 > shū; #53D4
+叕 > zhúo; #53D5
+取 > qū; #53D6
+受 > shòu; #53D7
+变 > biàn; #53D8
+叙 > xù; #53D9
+叚 > jiă; #53DA
+叛 > pàn; #53DB
+叜 > sŏu; #53DC
+叝 > gào; #53DD
+叞 > wèi; #53DE
+叟 > sŏu; #53DF
+叠 > dié; #53E0
+叡 > rùi; #53E1
+叢 > cóng; #53E2
+口 > kŏu; #53E3
+古 > gŭ; #53E4
+句 > jù; #53E5
+另 > lìng; #53E6
+叧 > guă; #53E7
+叨 > tāo; #53E8
+叩 > kòu; #53E9
+只 > zhĭ; #53EA
+叫 > jiào; #53EB
+召 > zhào; #53EC
+叭 > bā; #53ED
+叮 > dīng; #53EE
+可 > kĕ; #53EF
+台 > tái; #53F0
+叱 > chì; #53F1
+史 > shĭ; #53F2
+右 > yòu; #53F3
+叴 > qíu; #53F4
+叵 > pŏ; #53F5
+叶 > xié; #53F6
+号 > hào; #53F7
+司 > sī; #53F8
+叹 > tàn; #53F9
+叺 > chĭ; #53FA
+叻 > lè; #53FB
+叼 > diāo; #53FC
+叽 > jī; #53FD
+叿 > hōng; #53FF
+吀 > miē; #5400
+吁 > xū; #5401
+吂 > máng; #5402
+吃 > chī; #5403
+各 > gè; #5404
+吅 > xuān; #5405
+吆 > yāo; #5406
+吇 > zĭ; #5407
+合 > hé; #5408
+吉 > jí; #5409
+吊 > diào; #540A
+吋 > cùn; #540B
+同 > tóng; #540C
+名 > míng; #540D
+后 > hòu; #540E
+吏 > lì; #540F
+吐 > tŭ; #5410
+向 > xiàng; #5411
+吒 > zhà; #5412
+吓 > xià; #5413
+吔 > yĕ; #5414
+吕 > lǚ; #5415
+吖 > ā; #5416
+吗 > ma; #5417
+吘 > ŏu; #5418
+吙 > xuē; #5419
+吚 > yī; #541A
+君 > jūn; #541B
+吜 > chŏu; #541C
+吝 > lìn; #541D
+吞 > tūn; #541E
+吟 > yín; #541F
+吠 > fèi; #5420
+吡 > bĭ; #5421
+吢 > qìn; #5422
+吣 > qìn; #5423
+吤 > jiè; #5424
+吥 > bù; #5425
+否 > fŏu; #5426
+吧 > ba; #5427
+吨 > dūn; #5428
+吩 > fēn; #5429
+吪 > é; #542A
+含 > hán; #542B
+听 > tīng; #542C
+吭 > háng; #542D
+吮 > shŭn; #542E
+启 > qĭ; #542F
+吰 > hóng; #5430
+吱 > zhī; #5431
+吲 > shĕn; #5432
+吳 > wú; #5433
+吴 > wú; #5434
+吵 > chăo; #5435
+吶 > nè; #5436
+吷 > xuè; #5437
+吸 > xī; #5438
+吹 > chūi; #5439
+吺 > dōu; #543A
+吻 > wĕn; #543B
+吼 > hŏu; #543C
+吽 > óu; #543D
+吾 > wú; #543E
+吿 > gào; #543F
+呀 > yā; #5440
+呁 > jùn; #5441
+呂 > lǚ; #5442
+呃 > è; #5443
+呄 > gé; #5444
+呅 > méi; #5445
+呆 > ái; #5446
+呇 > qĭ; #5447
+呈 > chéng; #5448
+呉 > wú; #5449
+告 > gào; #544A
+呋 > fū; #544B
+呌 > jiào; #544C
+呍 > hōng; #544D
+呎 > chĭ; #544E
+呏 > shēng; #544F
+呐 > nè; #5450
+呑 > tūn; #5451
+呒 > fŭ; #5452
+呓 > yì; #5453
+呔 > dāi; #5454
+呕 > ōu; #5455
+呖 > lì; #5456
+呗 > bài; #5457
+员 > yuán; #5458
+呙 > kuāi; #5459
+呛 > qiāng; #545B
+呜 > wū; #545C
+呝 > è; #545D
+呞 > shī; #545E
+呟 > quăn; #545F
+呠 > pēn; #5460
+呡 > wĕn; #5461
+呢 > ní; #5462
+呣 > ḿ; #5463
+呤 > lĭng; #5464
+呥 > răn; #5465
+呦 > yōu; #5466
+呧 > dĭ; #5467
+周 > zhōu; #5468
+呩 > shì; #5469
+呪 > zhòu; #546A
+呫 > tiē; #546B
+呬 > xì; #546C
+呭 > yì; #546D
+呮 > qì; #546E
+呯 > píng; #546F
+呰 > zĭ; #5470
+呱 > gū; #5471
+呲 > zī; #5472
+味 > wèi; #5473
+呴 > xū; #5474
+呵 > hē; #5475
+呶 > náo; #5476
+呷 > xiā; #5477
+呸 > pēi; #5478
+呹 > yì; #5479
+呺 > xiāo; #547A
+呻 > shēn; #547B
+呼 > hū; #547C
+命 > mìng; #547D
+呾 > dá; #547E
+呿 > qū; #547F
+咀 > jŭ; #5480
+咁 > gèm; #5481
+咂 > zā; #5482
+咃 > tūo; #5483
+咄 > dūo; #5484
+咅 > pòu; #5485
+咆 > páo; #5486
+咇 > bì; #5487
+咈 > fú; #5488
+咉 > yāng; #5489
+咊 > hé; #548A
+咋 > zhà; #548B
+和 > hé; #548C
+咍 > hāi; #548D
+咎 > jìu; #548E
+咏 > yŏng; #548F
+咐 > fù; #5490
+咑 > què; #5491
+咒 > zhòu; #5492
+咓 > wă; #5493
+咔 > kă; #5494
+咕 > gū; #5495
+咖 > kā; #5496
+咗 > zŭo; #5497
+咘 > bù; #5498
+咙 > lóng; #5499
+咚 > dōng; #549A
+咛 > níng; #549B
+咝 > sī; #549D
+咞 > xiàn; #549E
+咟 > hùo; #549F
+咠 > qì; #54A0
+咡 > èr; #54A1
+咢 > è; #54A2
+咣 > guāng; #54A3
+咤 > zhà; #54A4
+咥 > xì; #54A5
+咦 > yí; #54A6
+咧 > liĕ; #54A7
+咨 > zī; #54A8
+咩 > miē; #54A9
+咪 > mī; #54AA
+咫 > zhĭ; #54AB
+咬 > yăo; #54AC
+咭 > jī; #54AD
+咮 > zhòu; #54AE
+咯 > gē; #54AF
+咰 > shuài; #54B0
+咱 > zán; #54B1
+咲 > xiào; #54B2
+咳 > ké; #54B3
+咴 > hūi; #54B4
+咵 > kuā; #54B5
+咶 > huài; #54B6
+咷 > táo; #54B7
+咸 > xián; #54B8
+咹 > è; #54B9
+咺 > xuān; #54BA
+咻 > xīu; #54BB
+咼 > wāi; #54BC
+咽 > yān; #54BD
+咾 > lăo; #54BE
+咿 > yī; #54BF
+哀 > āi; #54C0
+品 > pĭn; #54C1
+哂 > shĕn; #54C2
+哃 > tóng; #54C3
+哄 > hōng; #54C4
+哅 > xīong; #54C5
+哆 > chĭ; #54C6
+哇 > wā; #54C7
+哈 > hā; #54C8
+哉 > zāi; #54C9
+哊 > yù; #54CA
+哋 > dì; #54CB
+哌 > pài; #54CC
+响 > xiăng; #54CD
+哎 > āi; #54CE
+哏 > hĕn; #54CF
+哐 > kuāng; #54D0
+哑 > yă; #54D1
+哒 > dā; #54D2
+哓 > xiāo; #54D3
+哔 > bì; #54D4
+哕 > yuĕ; #54D5
+哗 > huā; #54D7
+哙 > kuài; #54D9
+哚 > dŭo; #54DA
+哜 > jì; #54DC
+哝 > nóng; #54DD
+哞 > mōu; #54DE
+哟 > yo; #54DF
+哠 > hào; #54E0
+員 > yuán; #54E1
+哢 > lòng; #54E2
+哣 > pŏu; #54E3
+哤 > máng; #54E4
+哥 > gē; #54E5
+哦 > é; #54E6
+哧 > chī; #54E7
+哨 > shào; #54E8
+哩 > lī; #54E9
+哪 > nă; #54EA
+哫 > zú; #54EB
+哬 > hé; #54EC
+哭 > kū; #54ED
+哮 > xiāo; #54EE
+哯 > xiàn; #54EF
+哰 > láo; #54F0
+哱 > bō; #54F1
+哲 > zhé; #54F2
+哳 > zhā; #54F3
+哴 > liàng; #54F4
+哵 > bā; #54F5
+哶 > miē; #54F6
+哷 > lè; #54F7
+哸 > sūi; #54F8
+哹 > fóu; #54F9
+哺 > bŭ; #54FA
+哻 > hàn; #54FB
+哼 > hēng; #54FC
+哽 > gĕng; #54FD
+哾 > shūo; #54FE
+哿 > gĕ; #54FF
+唀 > yŏu; #5500
+唁 > yàn; #5501
+唂 > gŭ; #5502
+唃 > gŭ; #5503
+唄 > bài; #5504
+唅 > hān; #5505
+唆 > sūo; #5506
+唇 > chún; #5507
+唈 > yì; #5508
+唉 > āi; #5509
+唊 > jiá; #550A
+唋 > tŭ; #550B
+唌 > xián; #550C
+唍 > huăn; #550D
+唎 > lī; #550E
+唏 > xī; #550F
+唐 > táng; #5510
+唑 > zùo; #5511
+唒 > qíu; #5512
+唓 > chē; #5513
+唔 > wú; #5514
+唕 > zào; #5515
+唖 > yă; #5516
+唗 > dōu; #5517
+唘 > qĭ; #5518
+唙 > dí; #5519
+唚 > qìn; #551A
+唛 > mà; #551B
+唝 > hŏng; #551D
+唞 > dŏu; #551E
+唠 > láo; #5520
+唡 > liăng; #5521
+唢 > sŭo; #5522
+唣 > zào; #5523
+唤 > huàn; #5524
+唦 > shā; #5526
+唧 > jī; #5527
+唨 > zŭo; #5528
+唩 > wō; #5529
+唪 > fĕng; #552A
+唫 > yín; #552B
+唬 > hŭ; #552C
+唭 > qī; #552D
+售 > shòu; #552E
+唯 > wéi; #552F
+唰 > shuā; #5530
+唱 > chàng; #5531
+唲 > ér; #5532
+唳 > lì; #5533
+唴 > qiàng; #5534
+唵 > ăn; #5535
+唶 > jiè; #5536
+唷 > yō; #5537
+唸 > niàn; #5538
+唹 > yū; #5539
+唺 > tiăn; #553A
+唻 > lăi; #553B
+唼 > shà; #553C
+唽 > xī; #553D
+唾 > tùo; #553E
+唿 > hū; #553F
+啀 > ái; #5540
+啁 > zhōu; #5541
+啂 > nòu; #5542
+啃 > kĕn; #5543
+啄 > zhúo; #5544
+啅 > zhúo; #5545
+商 > shāng; #5546
+啇 > dí; #5547
+啈 > hèng; #5548
+啉 > lán; #5549
+啊 > a; #554A
+啋 > xiāo; #554B
+啌 > xiāng; #554C
+啍 > tūn; #554D
+啎 > wŭ; #554E
+問 > wèn; #554F
+啐 > cùi; #5550
+啑 > shà; #5551
+啒 > hū; #5552
+啓 > qĭ; #5553
+啔 > qĭ; #5554
+啕 > táo; #5555
+啖 > dàn; #5556
+啗 > dàn; #5557
+啘 > yè; #5558
+啙 > zĭ; #5559
+啚 > bĭ; #555A
+啛 > cùi; #555B
+啜 > chùo; #555C
+啝 > hé; #555D
+啞 > yă; #555E
+啟 > qĭ; #555F
+啠 > zhé; #5560
+啡 > pēi; #5561
+啢 > liăng; #5562
+啣 > xián; #5563
+啤 > pí; #5564
+啥 > shà; #5565
+啦 > la; #5566
+啧 > zé; #5567
+啨 > qīng; #5568
+啩 > guà; #5569
+啪 > pā; #556A
+啫 > zhĕ; #556B
+啬 > sè; #556C
+啭 > zhuàn; #556D
+啮 > niè; #556E
+啯 > guo; #556F
+啰 > lūo; #5570
+啱 > yān; #5571
+啲 > dì; #5572
+啳 > quán; #5573
+啴 > tān; #5574
+啵 > bo; #5575
+啶 > dìng; #5576
+啷 > lāng; #5577
+啸 > xiào; #5578
+啺 > táng; #557A
+啻 > chì; #557B
+啼 > tí; #557C
+啽 > án; #557D
+啾 > jīu; #557E
+啿 > dàn; #557F
+喀 > kè; #5580
+喁 > yóng; #5581
+喂 > wèi; #5582
+喃 > nán; #5583
+善 > shàn; #5584
+喅 > yù; #5585
+喆 > zhé; #5586
+喇 > lă; #5587
+喈 > jiē; #5588
+喉 > hóu; #5589
+喊 > hăn; #558A
+喋 > dié; #558B
+喌 > zhōu; #558C
+喍 > chái; #558D
+喎 > wāi; #558E
+喏 > rĕ; #558F
+喐 > yù; #5590
+喑 > yīn; #5591
+喒 > zán; #5592
+喓 > yāo; #5593
+喔 > wō; #5594
+喕 > miăn; #5595
+喖 > hú; #5596
+喗 > yŭn; #5597
+喘 > chuăn; #5598
+喙 > hùi; #5599
+喚 > huàn; #559A
+喛 > huàn; #559B
+喜 > xĭ; #559C
+喝 > hē; #559D
+喞 > jī; #559E
+喟 > kùi; #559F
+喠 > zhŏng; #55A0
+喡 > wĕi; #55A1
+喢 > shà; #55A2
+喣 > xŭ; #55A3
+喤 > huáng; #55A4
+喥 > dù; #55A5
+喦 > niè; #55A6
+喧 > 1xuān; #55A7
+喨 > liàng; #55A8
+喩 > yù; #55A9
+喪 > sāng; #55AA
+喫 > chī; #55AB
+喬 > qiáo; #55AC
+喭 > yàn; #55AD
+單 > dān; #55AE
+喯 > pēn; #55AF
+喰 > cān; #55B0
+喱 > lí; #55B1
+喲 > yo; #55B2
+喳 > zhā; #55B3
+喴 > wēi; #55B4
+喵 > miāo; #55B5
+営 > yíng; #55B6
+喷 > pēn; #55B7
+喹 > kúi; #55B9
+喺 > xì; #55BA
+喻 > yù; #55BB
+喼 > jié; #55BC
+喽 > lou; #55BD
+喾 > kù; #55BE
+喿 > sào; #55BF
+嗀 > hùo; #55C0
+嗁 > tí; #55C1
+嗂 > yáo; #55C2
+嗃 > hè; #55C3
+嗄 > á; #55C4
+嗅 > xìu; #55C5
+嗆 > qiāng; #55C6
+嗇 > sè; #55C7
+嗈 > yōng; #55C8
+嗉 > sù; #55C9
+嗊 > hŏng; #55CA
+嗋 > xié; #55CB
+嗌 > yì; #55CC
+嗍 > sūo; #55CD
+嗎 > ma; #55CE
+嗏 > chā; #55CF
+嗐 > hài; #55D0
+嗑 > kè; #55D1
+嗒 > tà; #55D2
+嗓 > săng; #55D3
+嗔 > tián; #55D4
+嗕 > rù; #55D5
+嗖 > sōu; #55D6
+嗗 > wā; #55D7
+嗘 > jī; #55D8
+嗙 > păng; #55D9
+嗚 > wū; #55DA
+嗛 > xián; #55DB
+嗜 > shì; #55DC
+嗝 > gé; #55DD
+嗞 > zī; #55DE
+嗟 > jiē; #55DF
+嗠 > lùo; #55E0
+嗡 > wēng; #55E1
+嗢 > wà; #55E2
+嗣 > sì; #55E3
+嗤 > chī; #55E4
+嗥 > háo; #55E5
+嗦 > sūo; #55E6
+嗧 > jiā' 'lún; #55E7
+嗨 > hăi; #55E8
+嗩 > sŭo; #55E9
+嗪 > qín; #55EA
+嗫 > niè; #55EB
+嗬 > hē; #55EC
+嗮 > sài; #55EE
+嗯 > ǹg; #55EF
+嗰 > gè; #55F0
+嗱 > ná; #55F1
+嗲 > diă; #55F2
+嗳 > ài; #55F3
+嗵 > tōng; #55F5
+嗶 > bì; #55F6
+嗷 > áo; #55F7
+嗸 > áo; #55F8
+嗹 > lián; #55F9
+嗺 > cūi; #55FA
+嗻 > zhē; #55FB
+嗼 > mò; #55FC
+嗽 > sòu; #55FD
+嗾 > sŏu; #55FE
+嗿 > tăn; #55FF
+嘀 > dí; #5600
+嘁 > qī; #5601
+嘂 > jiào; #5602
+嘃 > chōng; #5603
+嘄 > jiāo; #5604
+嘅 > kăi; #5605
+嘆 > tàn; #5606
+嘇 > sān; #5607
+嘈 > cáo; #5608
+嘉 > jiā; #5609
+嘊 > ái; #560A
+嘋 > xiāo; #560B
+嘌 > piāo; #560C
+嘍 > lou; #560D
+嘎 > gā; #560E
+嘏 > gŭ; #560F
+嘐 > xiāo; #5610
+嘑 > hū; #5611
+嘒 > hùi; #5612
+嘓 > guo; #5613
+嘔 > ōu; #5614
+嘕 > xiān; #5615
+嘖 > zé; #5616
+嘗 > cháng; #5617
+嘘 > xū; #5618
+嘙 > pó; #5619
+嘚 > dé; #561A
+嘛 > ma; #561B
+嘜 > mà; #561C
+嘝 > hú; #561D
+嘞 > lei; #561E
+嘟 > dū; #561F
+嘠 > gā; #5620
+嘡 > tāng; #5621
+嘢 > yĕ; #5622
+嘣 > bēng; #5623
+嘤 > yīng; #5624
+嘦 > jiào; #5626
+嘧 > mī; #5627
+嘨 > xiào; #5628
+嘩 > huā; #5629
+嘪 > măi; #562A
+嘫 > rán; #562B
+嘬 > zūo; #562C
+嘭 > pēng; #562D
+嘮 > láo; #562E
+嘯 > xiào; #562F
+嘰 > jī; #5630
+嘱 > zhŭ; #5631
+嘲 > cháo; #5632
+嘳 > kùi; #5633
+嘴 > zŭi; #5634
+嘵 > xiāo; #5635
+嘶 > sī; #5636
+嘷 > háo; #5637
+嘸 > fŭ; #5638
+嘹 > liáo; #5639
+嘺 > qiáo; #563A
+嘻 > xī; #563B
+嘼 > xìu; #563C
+嘽 > tān; #563D
+嘾 > tán; #563E
+嘿 > mò; #563F
+噀 > xùn; #5640
+噁 > ĕ; #5641
+噂 > zŭn; #5642
+噃 > fān; #5643
+噄 > chī; #5644
+噅 > hūi; #5645
+噆 > zăn; #5646
+噇 > chuáng; #5647
+噈 > cù; #5648
+噉 > dàn; #5649
+噊 > yù; #564A
+噋 > tūn; #564B
+噌 > chēng; #564C
+噍 > jiào; #564D
+噎 > yē; #564E
+噏 > xī; #564F
+噐 > qì; #5650
+噑 > háo; #5651
+噒 > lián; #5652
+噓 > xū; #5653
+噔 > dēng; #5654
+噕 > hūi; #5655
+噖 > yín; #5656
+噗 > pū; #5657
+噘 > juē; #5658
+噙 > qín; #5659
+噚 > xún; #565A
+噛 > niè; #565B
+噜 > lū; #565C
+噝 > sī; #565D
+噞 > yăn; #565E
+噟 > yìng; #565F
+噠 > dā; #5660
+噡 > dān; #5661
+噢 > yŭ; #5662
+噣 > zhòu; #5663
+噤 > jìn; #5664
+噥 > nóng; #5665
+噦 > yuĕ; #5666
+噧 > hùi; #5667
+器 > qì; #5668
+噩 > è; #5669
+噪 > zào; #566A
+噫 > yī; #566B
+噬 > shì; #566C
+噭 > jiào; #566D
+噮 > yuān; #566E
+噯 > ài; #566F
+噰 > yōng; #5670
+噱 > jué; #5671
+噲 > kuài; #5672
+噳 > yŭ; #5673
+噴 > pēn; #5674
+噵 > dào; #5675
+噶 > gé; #5676
+噷 > xīn; #5677
+噸 > dūn; #5678
+噹 > dāng; #5679
+噻 > sai; #567B
+噼 > pī; #567C
+噽 > pĭ; #567D
+噾 > yīn; #567E
+噿 > zŭi; #567F
+嚀 > níng; #5680
+嚁 > dí; #5681
+嚂 > làn; #5682
+嚃 > tā; #5683
+嚄 > hùo; #5684
+嚅 > rú; #5685
+嚆 > hāo; #5686
+嚇 > xià; #5687
+嚈 > yà; #5688
+嚉 > dūo; #5689
+嚊 > xì; #568A
+嚋 > chóu; #568B
+嚌 > jì; #568C
+嚍 > jìn; #568D
+嚎 > háo; #568E
+嚏 > tì; #568F
+嚐 > cháng; #5690
+嚓 > cā; #5693
+嚔 > tì; #5694
+嚕 > lū; #5695
+嚖 > hùi; #5696
+嚗 > bó; #5697
+嚘 > yōu; #5698
+嚙 > niè; #5699
+嚚 > yín; #569A
+嚛 > hù; #569B
+嚜 > mò; #569C
+嚝 > huāng; #569D
+嚞 > zhé; #569E
+嚟 > lí; #569F
+嚠 > líu; #56A0
+嚢 > náng; #56A2
+嚣 > xiāo; #56A3
+嚤 > mó; #56A4
+嚥 > yàn; #56A5
+嚦 > lì; #56A6
+嚧 > lú; #56A7
+嚨 > lóng; #56A8
+嚩 > fú; #56A9
+嚪 > dàn; #56AA
+嚫 > chèn; #56AB
+嚬 > pín; #56AC
+嚭 > pĭ; #56AD
+嚮 > xiàng; #56AE
+嚯 > hùo; #56AF
+嚰 > mó; #56B0
+嚱 > xì; #56B1
+嚲 > dŭo; #56B2
+嚳 > kù; #56B3
+嚴 > yán; #56B4
+嚵 > chán; #56B5
+嚶 > yīng; #56B6
+嚷 > răng; #56B7
+嚸 > diăn; #56B8
+嚹 > lā; #56B9
+嚺 > tà; #56BA
+嚻 > xiāo; #56BB
+嚼 > jiáo; #56BC
+嚽 > chùo; #56BD
+嚾 > huān; #56BE
+嚿 > hùo; #56BF
+囀 > zhuàn; #56C0
+囁 > niè; #56C1
+囂 > xiāo; #56C2
+囃 > cà; #56C3
+囄 > lí; #56C4
+囅 > chăn; #56C5
+囆 > chài; #56C6
+囇 > lì; #56C7
+囈 > yì; #56C8
+囉 > lūo; #56C9
+囊 > náng; #56CA
+囋 > zàn; #56CB
+囌 > sū; #56CC
+囍 > xĭ; #56CD
+囏 > jiān; #56CF
+囐 > zá; #56D0
+囑 > zhŭ; #56D1
+囒 > lán; #56D2
+囓 > niè; #56D3
+囔 > nāng; #56D4
+囗 > wéi; #56D7
+囘 > húi; #56D8
+囙 > yīn; #56D9
+囚 > qíu; #56DA
+四 > sì; #56DB
+囜 > nín; #56DC
+囝 > jiăn; #56DD
+回 > húi; #56DE
+囟 > xìn; #56DF
+因 > yīn; #56E0
+囡 > nān; #56E1
+团 > tuán; #56E2
+団 > tuán; #56E3
+囤 > dùn; #56E4
+囥 > kàng; #56E5
+囦 > yuān; #56E6
+囧 > jĭong; #56E7
+囨 > piān; #56E8
+囩 > yùn; #56E9
+囪 > cōng; #56EA
+囫 > hú; #56EB
+囬 > húi; #56EC
+园 > yuán; #56ED
+囮 > yóu; #56EE
+囯 > gúo; #56EF
+困 > kùn; #56F0
+囱 > cōng; #56F1
+囲 > wéi; #56F2
+図 > tú; #56F3
+围 > wéi; #56F4
+囵 > lún; #56F5
+囶 > gúo; #56F6
+囷 > qūn; #56F7
+囸 > rì; #56F8
+囹 > líng; #56F9
+固 > gù; #56FA
+囻 > gúo; #56FB
+囼 > tāi; #56FC
+国 > gúo; #56FD
+图 > tú; #56FE
+囿 > yòu; #56FF
+圀 > gúo; #5700
+圁 > yín; #5701
+圂 > hùn; #5702
+圃 > pŭ; #5703
+圄 > yŭ; #5704
+圅 > hán; #5705
+圆 > yuán; #5706
+圇 > lún; #5707
+圈 > quān; #5708
+圉 > yŭ; #5709
+圊 > qīng; #570A
+國 > gúo; #570B
+圌 > chuán; #570C
+圍 > wéi; #570D
+圎 > yuán; #570E
+圏 > quān; #570F
+圐 > kū; #5710
+圑 > fù; #5711
+園 > yuán; #5712
+圓 > yuán; #5713
+圔 > è; #5714
+圕 > tú' 'shū' 'guăn; #5715
+圖 > tú; #5716
+圗 > tú; #5717
+團 > tuán; #5718
+圙 > lǜe; #5719
+圚 > hùi; #571A
+圛 > yì; #571B
+圜 > yuán; #571C
+圝 > luán; #571D
+圞 > luán; #571E
+土 > tŭ; #571F
+圠 > yà; #5720
+圡 > tŭ; #5721
+圢 > tīng; #5722
+圣 > shèng; #5723
+圤 > pŭ; #5724
+圥 > lù; #5725
+圧 > yā; #5727
+在 > zài; #5728
+圩 > wéi; #5729
+圪 > gē; #572A
+圫 > yù; #572B
+圬 > wū; #572C
+圭 > gūi; #572D
+圮 > pĭ; #572E
+圯 > yí; #572F
+地 > dì; #5730
+圱 > qiān; #5731
+圲 > qiān; #5732
+圳 > zhèn; #5733
+圴 > zhúo; #5734
+圵 > dàng; #5735
+圶 > qià; #5736
+圹 > kuàng; #5739
+场 > cháng; #573A
+圻 > qí; #573B
+圼 > niè; #573C
+圽 > mò; #573D
+圾 > jí; #573E
+圿 > jiá; #573F
+址 > zhĭ; #5740
+坁 > zhĭ; #5741
+坂 > băn; #5742
+坃 > xūn; #5743
+坄 > tóu; #5744
+坅 > qĭn; #5745
+坆 > fén; #5746
+均 > jūn; #5747
+坈 > kēng; #5748
+坉 > tún; #5749
+坊 > fāng; #574A
+坋 > fèn; #574B
+坌 > bèn; #574C
+坍 > tān; #574D
+坎 > kăn; #574E
+坏 > pī; #574F
+坐 > zùo; #5750
+坑 > kēng; #5751
+坒 > bì; #5752
+坓 > xíng; #5753
+坔 > dì; #5754
+坕 > jīng; #5755
+坖 > jì; #5756
+块 > kuài; #5757
+坘 > dĭ; #5758
+坙 > jīng; #5759
+坚 > jiān; #575A
+坛 > tán; #575B
+坜 > lì; #575C
+坝 > bà; #575D
+坞 > wù; #575E
+坟 > fén; #575F
+坠 > zhùi; #5760
+坡 > pō; #5761
+坢 > păn; #5762
+坣 > tāng; #5763
+坤 > kūn; #5764
+坥 > qū; #5765
+坦 > tăn; #5766
+坧 > zhī; #5767
+坨 > túo; #5768
+坩 > gān; #5769
+坪 > píng; #576A
+坫 > diàn; #576B
+坬 > guà; #576C
+坭 > ní; #576D
+坮 > tái; #576E
+坯 > pī; #576F
+坰 > jīong; #5770
+坱 > yăng; #5771
+坲 > fó; #5772
+坳 > ào; #5773
+坴 > lìu; #5774
+坵 > qīu; #5775
+坶 > mù; #5776
+坷 > kĕ; #5777
+坸 > gòu; #5778
+坹 > xuè; #5779
+坺 > bá; #577A
+坻 > chí; #577B
+坼 > chè; #577C
+坽 > líng; #577D
+坾 > zhù; #577E
+坿 > fù; #577F
+垀 > hū; #5780
+垁 > zhì; #5781
+垂 > chúi; #5782
+垃 > lā; #5783
+垄 > lŏng; #5784
+垅 > lŏng; #5785
+垆 > lú; #5786
+垇 > ào; #5787
+垉 > páo; #5789
+型 > xíng; #578B
+垌 > dòng; #578C
+垍 > jì; #578D
+垎 > kè; #578E
+垏 > lù; #578F
+垐 > cí; #5790
+垑 > chĭ; #5791
+垒 > lĕi; #5792
+垓 > gāi; #5793
+垔 > yīn; #5794
+垕 > hòu; #5795
+垖 > dūi; #5796
+垗 > zhào; #5797
+垘 > fú; #5798
+垙 > guāng; #5799
+垚 > yáo; #579A
+垛 > dŭo; #579B
+垜 > dŭo; #579C
+垝 > gŭi; #579D
+垞 > chá; #579E
+垟 > yáng; #579F
+垠 > yín; #57A0
+垡 > fá; #57A1
+垢 > gòu; #57A2
+垣 > yuán; #57A3
+垤 > dié; #57A4
+垥 > xié; #57A5
+垦 > kĕn; #57A6
+垧 > jīong; #57A7
+垨 > shŏu; #57A8
+垩 > è; #57A9
+垫 > diàn; #57AB
+垬 > hóng; #57AC
+垭 > wù; #57AD
+垮 > kuă; #57AE
+垱 > dàng; #57B1
+垲 > kăi; #57B2
+垴 > năo; #57B4
+垵 > ăn; #57B5
+垶 > xīng; #57B6
+垷 > xiàn; #57B7
+垸 > huàn; #57B8
+垹 > bāng; #57B9
+垺 > pēi; #57BA
+垻 > bà; #57BB
+垼 > yì; #57BC
+垽 > yìn; #57BD
+垾 > hàn; #57BE
+垿 > xù; #57BF
+埀 > chúi; #57C0
+埁 > cén; #57C1
+埂 > gĕng; #57C2
+埃 > āi; #57C3
+埄 > péng; #57C4
+埅 > fáng; #57C5
+埆 > què; #57C6
+埇 > yŏng; #57C7
+埈 > xùn; #57C8
+埉 > jiá; #57C9
+埊 > dì; #57CA
+埋 > mái; #57CB
+埌 > làng; #57CC
+埍 > xuàn; #57CD
+城 > chéng; #57CE
+埏 > yán; #57CF
+埐 > jīn; #57D0
+埑 > zhé; #57D1
+埒 > lèi; #57D2
+埓 > liè; #57D3
+埔 > bù; #57D4
+埕 > chéng; #57D5
+埗 > bù; #57D7
+埘 > shí; #57D8
+埙 > xūn; #57D9
+埚 > gūo; #57DA
+埛 > jīong; #57DB
+埜 > yĕ; #57DC
+埝 > niàn; #57DD
+埞 > dĭ; #57DE
+域 > yù; #57DF
+埠 > bù; #57E0
+埡 > yà; #57E1
+埢 > juăn; #57E2
+埣 > sùi; #57E3
+埤 > pí; #57E4
+埥 > chēng; #57E5
+埦 > wăn; #57E6
+埧 > jù; #57E7
+埨 > lŭn; #57E8
+埩 > zhēng; #57E9
+埪 > kōng; #57EA
+埫 > chŏng; #57EB
+埬 > dōng; #57EC
+埭 > dài; #57ED
+埮 > tàn; #57EE
+埯 > ăn; #57EF
+埰 > cài; #57F0
+埱 > shú; #57F1
+埲 > bĕng; #57F2
+埳 > kăn; #57F3
+埴 > zhí; #57F4
+埵 > dŭo; #57F5
+埶 > yì; #57F6
+執 > zhí; #57F7
+埸 > yì; #57F8
+培 > péi; #57F9
+基 > jī; #57FA
+埻 > zhŭn; #57FB
+埼 > qí; #57FC
+埽 > sào; #57FD
+埾 > jù; #57FE
+埿 > ní; #57FF
+堀 > kū; #5800
+堁 > kè; #5801
+堂 > táng; #5802
+堃 > kūn; #5803
+堄 > nì; #5804
+堅 > jiān; #5805
+堆 > dūi; #5806
+堇 > jĭn; #5807
+堈 > gāng; #5808
+堉 > yù; #5809
+堊 > è; #580A
+堋 > péng; #580B
+堌 > gù; #580C
+堍 > tù; #580D
+堎 > lèng; #580E
+堐 > yá; #5810
+堑 > qiàn; #5811
+堓 > àn; #5813
+堕 > dùo; #5815
+堖 > năo; #5816
+堗 > tū; #5817
+堘 > chéng; #5818
+堙 > yīn; #5819
+堚 > hún; #581A
+堛 > bì; #581B
+堜 > liàn; #581C
+堝 > gūo; #581D
+堞 > dié; #581E
+堟 > zhuàn; #581F
+堠 > hòu; #5820
+堡 > băo; #5821
+堢 > băo; #5822
+堣 > yú; #5823
+堤 > dī; #5824
+堥 > máo; #5825
+堦 > jiē; #5826
+堧 > ruán; #5827
+堨 > è; #5828
+堩 > gèng; #5829
+堪 > kān; #582A
+堫 > zōng; #582B
+堬 > yú; #582C
+堭 > huáng; #582D
+堮 > è; #582E
+堯 > yáo; #582F
+堰 > yàn; #5830
+報 > bào; #5831
+堲 > jí; #5832
+堳 > méi; #5833
+場 > cháng; #5834
+堵 > dŭ; #5835
+堶 > túo; #5836
+堷 > yìn; #5837
+堸 > féng; #5838
+堹 > zhòng; #5839
+堺 > jiè; #583A
+堻 > zhēn; #583B
+堼 > fēng; #583C
+堽 > gāng; #583D
+堾 > chuăn; #583E
+堿 > jiăn; #583F
+塂 > xiàng; #5842
+塃 > huāng; #5843
+塄 > léng; #5844
+塅 > duàn; #5845
+塇 > xuān; #5847
+塈 > jì; #5848
+塉 > jí; #5849
+塊 > kuài; #584A
+塋 > yíng; #584B
+塌 > tā; #584C
+塍 > chéng; #584D
+塎 > yŏng; #584E
+塏 > kăi; #584F
+塐 > sù; #5850
+塑 > sù; #5851
+塒 > shí; #5852
+塓 > mì; #5853
+塔 > tă; #5854
+塕 > wĕng; #5855
+塖 > chéng; #5856
+塗 > tú; #5857
+塘 > táng; #5858
+塙 > què; #5859
+塚 > zhŏng; #585A
+塛 > lì; #585B
+塜 > péng; #585C
+塝 > bàng; #585D
+塞 > sāi; #585E
+塟 > zàng; #585F
+塠 > dūi; #5860
+塡 > tián; #5861
+塢 > wù; #5862
+塣 > chĕng; #5863
+塤 > xūn; #5864
+塥 > gé; #5865
+塦 > zhèn; #5866
+塧 > ài; #5867
+塨 > gōng; #5868
+塩 > yán; #5869
+塪 > kăn; #586A
+填 > tián; #586B
+塬 > yuán; #586C
+塭 > wēn; #586D
+塮 > xiè; #586E
+塯 > lìu; #586F
+塱 > lăng; #5871
+塲 > cháng; #5872
+塳 > péng; #5873
+塴 > bèng; #5874
+塵 > chén; #5875
+塶 > cù; #5876
+塷 > lŭ; #5877
+塸 > ŏu; #5878
+塹 > qiàn; #5879
+塺 > méi; #587A
+塻 > mò; #587B
+塼 > zhuān; #587C
+塽 > shuăng; #587D
+塾 > shú; #587E
+塿 > lŏu; #587F
+墀 > chí; #5880
+墁 > màn; #5881
+墂 > biāo; #5882
+境 > jìng; #5883
+墄 > qī; #5884
+墅 > shù; #5885
+墆 > dì; #5886
+墇 > zhāng; #5887
+墈 > kàn; #5888
+墉 > yōng; #5889
+墊 > diàn; #588A
+墋 > chĕn; #588B
+墌 > zhī; #588C
+墍 > xì; #588D
+墎 > gūo; #588E
+墏 > qiăng; #588F
+墐 > jìn; #5890
+墑 > dī; #5891
+墒 > shāng; #5892
+墓 > mù; #5893
+墔 > cūi; #5894
+墕 > yàn; #5895
+墖 > tă; #5896
+増 > zēng; #5897
+墘 > qí; #5898
+墙 > qiáng; #5899
+墚 > liáng; #589A
+墜 > zhùi; #589C
+墝 > qiāo; #589D
+增 > zēng; #589E
+墟 > xū; #589F
+墠 > shàn; #58A0
+墡 > shàn; #58A1
+墢 > bá; #58A2
+墣 > pū; #58A3
+墤 > kuài; #58A4
+墥 > dŏng; #58A5
+墦 > fán; #58A6
+墧 > què; #58A7
+墨 > mò; #58A8
+墩 > dūn; #58A9
+墪 > dūn; #58AA
+墫 > dūn; #58AB
+墬 > dì; #58AC
+墭 > shèng; #58AD
+墮 > dùo; #58AE
+墯 > dùo; #58AF
+墰 > tán; #58B0
+墱 > dèng; #58B1
+墲 > wŭ; #58B2
+墳 > fén; #58B3
+墴 > huáng; #58B4
+墵 > tán; #58B5
+墶 > dā; #58B6
+墷 > yè; #58B7
+墺 > yù; #58BA
+墻 > qiáng; #58BB
+墼 > jī; #58BC
+墽 > qiāo; #58BD
+墾 > kĕn; #58BE
+墿 > yì; #58BF
+壀 > pí; #58C0
+壁 > bì; #58C1
+壂 > diàn; #58C2
+壃 > jiāng; #58C3
+壄 > yĕ; #58C4
+壅 > yōng; #58C5
+壆 > bó; #58C6
+壇 > tán; #58C7
+壈 > lăn; #58C8
+壉 > jù; #58C9
+壊 > huài; #58CA
+壋 > dàng; #58CB
+壌 > răng; #58CC
+壍 > qiàn; #58CD
+壎 > xūn; #58CE
+壏 > làn; #58CF
+壐 > xĭ; #58D0
+壑 > hè; #58D1
+壒 > ài; #58D2
+壓 > yā; #58D3
+壔 > dăo; #58D4
+壕 > háo; #58D5
+壖 > ruán; #58D6
+壘 > lĕi; #58D8
+壙 > kuàng; #58D9
+壚 > lú; #58DA
+壛 > yán; #58DB
+壜 > tán; #58DC
+壝 > wéi; #58DD
+壞 > huài; #58DE
+壟 > lŏng; #58DF
+壠 > lŏng; #58E0
+壡 > rùi; #58E1
+壢 > lì; #58E2
+壣 > lín; #58E3
+壤 > răng; #58E4
+壦 > xūn; #58E6
+壧 > yán; #58E7
+壨 > léi; #58E8
+壩 > bà; #58E9
+士 > shì; #58EB
+壬 > rén; #58EC
+壮 > zhuàng; #58EE
+壯 > zhuàng; #58EF
+声 > shēng; #58F0
+壱 > yī; #58F1
+売 > mài; #58F2
+壳 > ké; #58F3
+壴 > zhŭ; #58F4
+壵 > zhuàng; #58F5
+壶 > hú; #58F6
+壷 > hú; #58F7
+壸 > kŭn; #58F8
+壹 > yī; #58F9
+壺 > hú; #58FA
+壻 > xù; #58FB
+壼 > kŭn; #58FC
+壽 > shòu; #58FD
+壾 > măng; #58FE
+壿 > zŭn; #58FF
+夀 > shòu; #5900
+夁 > yī; #5901
+夂 > zhĭ; #5902
+夃 > gū; #5903
+处 > chù; #5904
+夅 > jiàng; #5905
+夆 > fēng; #5906
+备 > bèi; #5907
+変 > biàn; #5909
+夊 > sūi; #590A
+夋 > qūn; #590B
+夌 > líng; #590C
+复 > fù; #590D
+夎 > zùo; #590E
+夏 > xià; #590F
+夐 > xìong; #5910
+夒 > náo; #5912
+夓 > xià; #5913
+夔 > kúi; #5914
+夕 > xī; #5915
+外 > wài; #5916
+夗 > yuàn; #5917
+夘 > măo; #5918
+夙 > sù; #5919
+多 > dūo; #591A
+夛 > dūo; #591B
+夜 > yè; #591C
+夝 > qíng; #591D
+够 > gòu; #591F
+夠 > gòu; #5920
+夡 > qì; #5921
+夢 > mèng; #5922
+夣 > mèng; #5923
+夤 > yín; #5924
+夥 > hŭo; #5925
+夦 > chèn; #5926
+大 > dà; #5927
+夨 > zè; #5928
+天 > tiān; #5929
+太 > tài; #592A
+夫 > fū; #592B
+夬 > guài; #592C
+夭 > yăo; #592D
+央 > yāng; #592E
+夯 > hāng; #592F
+夰 > găo; #5930
+失 > shī; #5931
+夲 > bĕn; #5932
+夳 > tài; #5933
+头 > tóu; #5934
+夵 > yăn; #5935
+夶 > bĭ; #5936
+夷 > yí; #5937
+夸 > kuā; #5938
+夹 > jiā; #5939
+夺 > dúo; #593A
+夼 > kuăng; #593C
+夽 > yùn; #593D
+夾 > jiā; #593E
+夿 > pā; #593F
+奀 > ēn; #5940
+奁 > lián; #5941
+奂 > huàn; #5942
+奃 > dì; #5943
+奄 > yăn; #5944
+奅 > pào; #5945
+奆 > quăn; #5946
+奇 > qí; #5947
+奈 > nài; #5948
+奉 > fèng; #5949
+奊 > xié; #594A
+奋 > fèn; #594B
+奌 > diăn; #594C
+奎 > kúi; #594E
+奏 > zòu; #594F
+奐 > huàn; #5950
+契 > qì; #5951
+奒 > kāi; #5952
+奓 > zhà; #5953
+奔 > bēn; #5954
+奕 > yì; #5955
+奖 > jiăng; #5956
+套 > tào; #5957
+奘 > zàng; #5958
+奙 > bĕn; #5959
+奚 > xī; #595A
+奛 > xiăng; #595B
+奜 > fĕi; #595C
+奝 > diāo; #595D
+奞 > xùn; #595E
+奟 > kēng; #595F
+奠 > diàn; #5960
+奡 > ào; #5961
+奢 > shē; #5962
+奣 > wĕng; #5963
+奤 > păn; #5964
+奥 > ào; #5965
+奦 > wù; #5966
+奧 > ào; #5967
+奨 > jiăng; #5968
+奩 > lián; #5969
+奪 > dúo; #596A
+奫 > yūn; #596B
+奬 > jiăng; #596C
+奭 > shì; #596D
+奮 > fèn; #596E
+奯 > hùo; #596F
+奰 > bì; #5970
+奱 > lián; #5971
+奲 > dŭo; #5972
+女 > nǚ; #5973
+奴 > nú; #5974
+奵 > dīng; #5975
+奶 > năi; #5976
+奷 > qiān; #5977
+奸 > jiān; #5978
+她 > tā; #5979
+奺 > jĭu; #597A
+奻 > nán; #597B
+奼 > chà; #597C
+好 > hăo; #597D
+奾 > xiān; #597E
+奿 > fàn; #597F
+妀 > jĭ; #5980
+妁 > shùo; #5981
+如 > rú; #5982
+妃 > fēi; #5983
+妄 > wàng; #5984
+妅 > hóng; #5985
+妆 > zhuāng; #5986
+妇 > fù; #5987
+妈 > mā; #5988
+妉 > dān; #5989
+妊 > rèn; #598A
+妋 > fū; #598B
+妌 > jìng; #598C
+妍 > yán; #598D
+妎 > xiè; #598E
+妏 > wèn; #598F
+妐 > zhōng; #5990
+妑 > pā; #5991
+妒 > dù; #5992
+妓 > jì; #5993
+妔 > kēng; #5994
+妕 > zhòng; #5995
+妖 > yāo; #5996
+妗 > jìn; #5997
+妘 > yún; #5998
+妙 > miào; #5999
+妚 > pēi; #599A
+妜 > yuè; #599C
+妝 > zhuāng; #599D
+妞 > nīu; #599E
+妟 > yàn; #599F
+妠 > nà; #59A0
+妡 > xīn; #59A1
+妢 > fén; #59A2
+妣 > bĭ; #59A3
+妤 > yú; #59A4
+妥 > tŭo; #59A5
+妦 > fēng; #59A6
+妧 > yuán; #59A7
+妨 > fáng; #59A8
+妩 > wŭ; #59A9
+妪 > yù; #59AA
+妫 > gūi; #59AB
+妬 > dù; #59AC
+妭 > bá; #59AD
+妮 > nī; #59AE
+妯 > zhóu; #59AF
+妰 > zhúo; #59B0
+妱 > zhāo; #59B1
+妲 > dá; #59B2
+妳 > năi; #59B3
+妴 > yuăn; #59B4
+妵 > tŏu; #59B5
+妶 > xuán; #59B6
+妷 > zhí; #59B7
+妸 > ē; #59B8
+妹 > mèi; #59B9
+妺 > mò; #59BA
+妻 > qī; #59BB
+妼 > bì; #59BC
+妽 > shēn; #59BD
+妾 > qiè; #59BE
+妿 > ē; #59BF
+姀 > hé; #59C0
+姁 > xŭ; #59C1
+姂 > fá; #59C2
+姃 > zhēng; #59C3
+姄 > mín; #59C4
+姅 > bàn; #59C5
+姆 > mŭ; #59C6
+姇 > fū; #59C7
+姈 > líng; #59C8
+姉 > zĭ; #59C9
+姊 > zĭ; #59CA
+始 > shĭ; #59CB
+姌 > răn; #59CC
+姍 > shān; #59CD
+姎 > yāng; #59CE
+姏 > mán; #59CF
+姐 > jiĕ; #59D0
+姑 > gū; #59D1
+姒 > sì; #59D2
+姓 > xìng; #59D3
+委 > wĕi; #59D4
+姕 > zī; #59D5
+姖 > jù; #59D6
+姗 > shān; #59D7
+姘 > pīn; #59D8
+姙 > rèn; #59D9
+姚 > yáo; #59DA
+姛 > tŏng; #59DB
+姜 > jiāng; #59DC
+姝 > shū; #59DD
+姞 > jí; #59DE
+姟 > gāi; #59DF
+姠 > shàng; #59E0
+姡 > kùo; #59E1
+姢 > juān; #59E2
+姣 > jiāo; #59E3
+姤 > gòu; #59E4
+姥 > mŭ; #59E5
+姦 > jiān; #59E6
+姧 > jiān; #59E7
+姨 > yí; #59E8
+姩 > niàn; #59E9
+姪 > zhí; #59EA
+姫 > jī; #59EB
+姬 > jī; #59EC
+姭 > xiàn; #59ED
+姮 > héng; #59EE
+姯 > guāng; #59EF
+姰 > jūn; #59F0
+姱 > kuā; #59F1
+姲 > yàn; #59F2
+姳 > mĭng; #59F3
+姴 > liè; #59F4
+姵 > pèi; #59F5
+姶 > yăn; #59F6
+姷 > yòu; #59F7
+姸 > yán; #59F8
+姹 > chà; #59F9
+姺 > shēn; #59FA
+姻 > yīn; #59FB
+姼 > chĭ; #59FC
+姽 > gŭi; #59FD
+姾 > quān; #59FE
+姿 > zī; #59FF
+娀 > sōng; #5A00
+威 > wēi; #5A01
+娂 > hóng; #5A02
+娃 > wá; #5A03
+娄 > lóu; #5A04
+娅 > yà; #5A05
+娆 > răo; #5A06
+娇 > jiāo; #5A07
+娈 > luán; #5A08
+娉 > pīng; #5A09
+娊 > xiàn; #5A0A
+娋 > shào; #5A0B
+娌 > lĭ; #5A0C
+娍 > chéng; #5A0D
+娎 > xiào; #5A0E
+娏 > máng; #5A0F
+娐 > fu; #5A10
+娑 > sūo; #5A11
+娒 > wŭ; #5A12
+娓 > wĕi; #5A13
+娔 > kè; #5A14
+娕 > lài; #5A15
+娖 > chùo; #5A16
+娗 > dìng; #5A17
+娘 > niáng; #5A18
+娙 > xíng; #5A19
+娚 > nán; #5A1A
+娛 > yú; #5A1B
+娜 > núo; #5A1C
+娝 > pēi; #5A1D
+娞 > nĕi; #5A1E
+娟 > juān; #5A1F
+娠 > shēn; #5A20
+娡 > zhì; #5A21
+娢 > hán; #5A22
+娣 > dì; #5A23
+娤 > zhuāng; #5A24
+娥 > é; #5A25
+娦 > pín; #5A26
+娧 > tùi; #5A27
+娨 > hàn; #5A28
+娩 > miăn; #5A29
+娪 > wú; #5A2A
+娫 > yán; #5A2B
+娬 > wŭ; #5A2C
+娭 > xī; #5A2D
+娮 > yán; #5A2E
+娯 > yú; #5A2F
+娰 > sì; #5A30
+娱 > yú; #5A31
+娲 > wā; #5A32
+娴 > xián; #5A34
+娵 > jū; #5A35
+娶 > qŭ; #5A36
+娷 > shùi; #5A37
+娸 > qī; #5A38
+娹 > xián; #5A39
+娺 > zhūi; #5A3A
+娻 > dōng; #5A3B
+娼 > chāng; #5A3C
+娽 > lù; #5A3D
+娾 > ăi; #5A3E
+娿 > ē; #5A3F
+婀 > ē; #5A40
+婁 > lóu; #5A41
+婂 > mián; #5A42
+婃 > cóng; #5A43
+婄 > pŏu; #5A44
+婅 > jú; #5A45
+婆 > pó; #5A46
+婇 > căi; #5A47
+婈 > díng; #5A48
+婉 > wăn; #5A49
+婊 > biăo; #5A4A
+婋 > xiāo; #5A4B
+婌 > shŭ; #5A4C
+婍 > qĭ; #5A4D
+婎 > hūi; #5A4E
+婏 > fù; #5A4F
+婐 > ē; #5A50
+婑 > wŏ; #5A51
+婒 > tán; #5A52
+婓 > fēi; #5A53
+婔 > wei; #5A54
+婕 > jié; #5A55
+婖 > tiān; #5A56
+婗 > ní; #5A57
+婘 > quán; #5A58
+婙 > jìng; #5A59
+婚 > hūn; #5A5A
+婛 > jīng; #5A5B
+婜 > qiān; #5A5C
+婝 > diàn; #5A5D
+婞 > xìng; #5A5E
+婟 > hù; #5A5F
+婠 > wà; #5A60
+婡 > lái; #5A61
+婢 > bì; #5A62
+婣 > yīn; #5A63
+婤 > chōu; #5A64
+婥 > chùo; #5A65
+婦 > fù; #5A66
+婧 > jìng; #5A67
+婨 > lún; #5A68
+婩 > yàn; #5A69
+婪 > lán; #5A6A
+婫 > kūn; #5A6B
+婬 > yín; #5A6C
+婭 > yà; #5A6D
+婮 > ju; #5A6E
+婯 > lì; #5A6F
+婰 > diăn; #5A70
+婱 > xián; #5A71
+婳 > huà; #5A73
+婴 > yīng; #5A74
+婵 > chán; #5A75
+婶 > shĕn; #5A76
+婷 > tíng; #5A77
+婸 > dàng; #5A78
+婹 > yăo; #5A79
+婺 > wù; #5A7A
+婻 > nàn; #5A7B
+婼 > rùo; #5A7C
+婽 > jiă; #5A7D
+婾 > tōu; #5A7E
+婿 > xù; #5A7F
+媀 > yú; #5A80
+媁 > wēi; #5A81
+媂 > tí; #5A82
+媃 > róu; #5A83
+媄 > mĕi; #5A84
+媅 > dān; #5A85
+媆 > ruăn; #5A86
+媇 > qīn; #5A87
+媈 > hui; #5A88
+媉 > wū; #5A89
+媊 > qián; #5A8A
+媋 > chūn; #5A8B
+媌 > máo; #5A8C
+媍 > fù; #5A8D
+媎 > jiĕ; #5A8E
+媏 > duān; #5A8F
+媐 > xī; #5A90
+媑 > zhòng; #5A91
+媒 > méi; #5A92
+媓 > huáng; #5A93
+媔 > mián; #5A94
+媕 > ān; #5A95
+媖 > yīng; #5A96
+媗 > xuān; #5A97
+媘 > jie; #5A98
+媙 > wēi; #5A99
+媚 > mèi; #5A9A
+媛 > yuàn; #5A9B
+媜 > zhēn; #5A9C
+媝 > qīu; #5A9D
+媞 > tí; #5A9E
+媟 > xiè; #5A9F
+媠 > tŭo; #5AA0
+媡 > liàn; #5AA1
+媢 > mào; #5AA2
+媣 > răn; #5AA3
+媤 > sī; #5AA4
+媥 > piān; #5AA5
+媦 > wèi; #5AA6
+媧 > wā; #5AA7
+媨 > jìu; #5AA8
+媩 > hú; #5AA9
+媪 > ăo; #5AAA
+媭 > xū; #5AAD
+媮 > tōu; #5AAE
+媯 > gūi; #5AAF
+媰 > zōu; #5AB0
+媱 > yáo; #5AB1
+媲 > pì; #5AB2
+媳 > xí; #5AB3
+媴 > yuán; #5AB4
+媵 > yìng; #5AB5
+媶 > róng; #5AB6
+媷 > rù; #5AB7
+媸 > chī; #5AB8
+媹 > líu; #5AB9
+媺 > mĕi; #5ABA
+媻 > pán; #5ABB
+媼 > ăo; #5ABC
+媽 > mā; #5ABD
+媾 > gòu; #5ABE
+媿 > kùi; #5ABF
+嫀 > qín; #5AC0
+嫁 > jià; #5AC1
+嫂 > săo; #5AC2
+嫃 > zhēn; #5AC3
+嫄 > yuán; #5AC4
+嫅 > chā; #5AC5
+嫆 > yóng; #5AC6
+嫇 > míng; #5AC7
+嫈 > yīng; #5AC8
+嫉 > jí; #5AC9
+嫊 > sù; #5ACA
+嫋 > niăo; #5ACB
+嫌 > xián; #5ACC
+嫍 > tāo; #5ACD
+嫎 > páng; #5ACE
+嫏 > láng; #5ACF
+嫐 > năo; #5AD0
+嫑 > báo; #5AD1
+嫒 > ài; #5AD2
+嫓 > pì; #5AD3
+嫔 > pín; #5AD4
+嫕 > yì; #5AD5
+嫖 > piào; #5AD6
+嫗 > yù; #5AD7
+嫘 > léi; #5AD8
+嫙 > xuán; #5AD9
+嫚 > màn; #5ADA
+嫛 > yī; #5ADB
+嫜 > zhāng; #5ADC
+嫝 > kāng; #5ADD
+嫞 > yóng; #5ADE
+嫟 > nì; #5ADF
+嫠 > lí; #5AE0
+嫡 > dí; #5AE1
+嫢 > gūi; #5AE2
+嫣 > yān; #5AE3
+嫤 > jìn; #5AE4
+嫥 > zhuān; #5AE5
+嫦 > cháng; #5AE6
+嫧 > cè; #5AE7
+嫨 > hān; #5AE8
+嫩 > nèn; #5AE9
+嫪 > lào; #5AEA
+嫫 > mó; #5AEB
+嫬 > zhē; #5AEC
+嫭 > hù; #5AED
+嫮 > hù; #5AEE
+嫯 > ào; #5AEF
+嫰 > nèn; #5AF0
+嫱 > qiáng; #5AF1
+嫳 > piè; #5AF3
+嫴 > gū; #5AF4
+嫵 > wŭ; #5AF5
+嫶 > jiáo; #5AF6
+嫷 > tŭo; #5AF7
+嫸 > zhăn; #5AF8
+嫹 > máo; #5AF9
+嫺 > xián; #5AFA
+嫻 > xián; #5AFB
+嫼 > mò; #5AFC
+嫽 > liáo; #5AFD
+嫾 > lián; #5AFE
+嫿 > huà; #5AFF
+嬀 > gūi; #5B00
+嬁 > dēng; #5B01
+嬂 > zhī; #5B02
+嬃 > xū; #5B03
+嬄 > yi; #5B04
+嬅 > huá; #5B05
+嬆 > xī; #5B06
+嬇 > hùi; #5B07
+嬈 > răo; #5B08
+嬉 > xī; #5B09
+嬊 > yàn; #5B0A
+嬋 > chán; #5B0B
+嬌 > jiāo; #5B0C
+嬍 > mĕi; #5B0D
+嬎 > fàn; #5B0E
+嬏 > fān; #5B0F
+嬐 > xiān; #5B10
+嬑 > yì; #5B11
+嬒 > wèi; #5B12
+嬓 > jiào; #5B13
+嬔 > fù; #5B14
+嬕 > shì; #5B15
+嬖 > bì; #5B16
+嬗 > shàn; #5B17
+嬘 > sùi; #5B18
+嬙 > qiáng; #5B19
+嬚 > liăn; #5B1A
+嬛 > huán; #5B1B
+嬜 > xin; #5B1C
+嬝 > niăo; #5B1D
+嬞 > dŏng; #5B1E
+嬟 > yì; #5B1F
+嬠 > cán; #5B20
+嬡 > ài; #5B21
+嬢 > niáng; #5B22
+嬣 > néng; #5B23
+嬤 > mā; #5B24
+嬥 > tiăo; #5B25
+嬦 > chóu; #5B26
+嬧 > jìn; #5B27
+嬨 > cí; #5B28
+嬩 > yú; #5B29
+嬪 > pín; #5B2A
+嬫 > yong; #5B2B
+嬬 > xū; #5B2C
+嬭 > năi; #5B2D
+嬮 > yān; #5B2E
+嬯 > tái; #5B2F
+嬰 > yīng; #5B30
+嬱 > cán; #5B31
+嬲 > niăo; #5B32
+嬳 > wo; #5B33
+嬴 > yíng; #5B34
+嬵 > mián; #5B35
+嬷 > mā; #5B37
+嬸 > shĕn; #5B38
+嬹 > xìng; #5B39
+嬺 > nì; #5B3A
+嬻 > dú; #5B3B
+嬼 > lĭu; #5B3C
+嬽 > yuān; #5B3D
+嬾 > lăn; #5B3E
+嬿 > yàn; #5B3F
+孀 > shuāng; #5B40
+孁 > líng; #5B41
+孂 > jiăo; #5B42
+孃 > niáng; #5B43
+孄 > lăn; #5B44
+孅 > xiān; #5B45
+孆 > yīng; #5B46
+孇 > shuāng; #5B47
+孈 > shuāi; #5B48
+孉 > quán; #5B49
+孊 > mĭ; #5B4A
+孋 > lí; #5B4B
+孌 > luán; #5B4C
+孍 > yán; #5B4D
+孎 > zhŭ; #5B4E
+孏 > lăn; #5B4F
+子 > zĭ; #5B50
+孑 > jié; #5B51
+孒 > jué; #5B52
+孓 > jué; #5B53
+孔 > kŏng; #5B54
+孕 > yùn; #5B55
+孖 > zī; #5B56
+字 > zì; #5B57
+存 > cún; #5B58
+孙 > sūn; #5B59
+孚 > fú; #5B5A
+孛 > bèi; #5B5B
+孜 > zī; #5B5C
+孝 > xiào; #5B5D
+孞 > xìn; #5B5E
+孟 > mèng; #5B5F
+孠 > sì; #5B60
+孡 > tāi; #5B61
+孢 > bāo; #5B62
+季 > jì; #5B63
+孤 > gū; #5B64
+孥 > nú; #5B65
+学 > xué; #5B66
+孨 > zhuăn; #5B68
+孩 > hái; #5B69
+孪 > luán; #5B6A
+孫 > sūn; #5B6B
+孬 > huài; #5B6C
+孭 > miē; #5B6D
+孮 > cóng; #5B6E
+孯 > qiān; #5B6F
+孰 > shú; #5B70
+孱 > chán; #5B71
+孲 > yā; #5B72
+孳 > zī; #5B73
+孴 > nĭ; #5B74
+孵 > fū; #5B75
+孶 > zī; #5B76
+孷 > lí; #5B77
+學 > xué; #5B78
+孹 > bò; #5B79
+孺 > rú; #5B7A
+孻 > lái; #5B7B
+孼 > niè; #5B7C
+孽 > niè; #5B7D
+孾 > yīng; #5B7E
+孿 > luán; #5B7F
+宀 > mián; #5B80
+宁 > zhù; #5B81
+宂 > rŏng; #5B82
+它 > tā; #5B83
+宄 > gŭi; #5B84
+宅 > zhái; #5B85
+宆 > qíong; #5B86
+宇 > yŭ; #5B87
+守 > shŏu; #5B88
+安 > ān; #5B89
+宊 > tú; #5B8A
+宋 > sòng; #5B8B
+完 > wán; #5B8C
+宍 > ròu; #5B8D
+宎 > yăo; #5B8E
+宏 > hóng; #5B8F
+宐 > yí; #5B90
+宑 > jĭng; #5B91
+宒 > zhūn; #5B92
+宓 > mì; #5B93
+宔 > zhŭ; #5B94
+宕 > dàng; #5B95
+宖 > hóng; #5B96
+宗 > zōng; #5B97
+官 > guān; #5B98
+宙 > zhòu; #5B99
+定 > dìng; #5B9A
+宛 > wăn; #5B9B
+宜 > yí; #5B9C
+宝 > băo; #5B9D
+实 > shí; #5B9E
+実 > shí; #5B9F
+宠 > chŏng; #5BA0
+审 > shĕn; #5BA1
+客 > kè; #5BA2
+宣 > xuān; #5BA3
+室 > shì; #5BA4
+宥 > yòu; #5BA5
+宦 > huàn; #5BA6
+宧 > yí; #5BA7
+宨 > tiăo; #5BA8
+宩 > shĭ; #5BA9
+宪 > xiàn; #5BAA
+宫 > gōng; #5BAB
+宬 > chéng; #5BAC
+宭 > qún; #5BAD
+宮 > gōng; #5BAE
+宯 > xiāo; #5BAF
+宰 > zăi; #5BB0
+宱 > zhà; #5BB1
+宲 > băo; #5BB2
+害 > hài; #5BB3
+宴 > yàn; #5BB4
+宵 > xiāo; #5BB5
+家 > jiā; #5BB6
+宷 > shĕn; #5BB7
+宸 > chén; #5BB8
+容 > róng; #5BB9
+宺 > huăng; #5BBA
+宻 > mì; #5BBB
+宼 > kòu; #5BBC
+宽 > kuān; #5BBD
+宾 > bīn; #5BBE
+宿 > sù; #5BBF
+寀 > cài; #5BC0
+寁 > zăn; #5BC1
+寂 > jì; #5BC2
+寃 > yuān; #5BC3
+寄 > jì; #5BC4
+寅 > yín; #5BC5
+密 > mì; #5BC6
+寇 > kòu; #5BC7
+寈 > qīng; #5BC8
+寉 > què; #5BC9
+寊 > zhēn; #5BCA
+寋 > jiăn; #5BCB
+富 > fù; #5BCC
+寍 > níng; #5BCD
+寎 > bìng; #5BCE
+寏 > huán; #5BCF
+寐 > mèi; #5BD0
+寑 > qĭn; #5BD1
+寒 > hán; #5BD2
+寓 > yù; #5BD3
+寔 > shí; #5BD4
+寕 > níng; #5BD5
+寖 > qìn; #5BD6
+寗 > níng; #5BD7
+寘 > zhì; #5BD8
+寙 > yŭ; #5BD9
+寚 > băo; #5BDA
+寛 > kuān; #5BDB
+寜 > níng; #5BDC
+寝 > qĭn; #5BDD
+寞 > mò; #5BDE
+察 > chá; #5BDF
+寠 > jù; #5BE0
+寡 > guă; #5BE1
+寢 > qĭn; #5BE2
+寣 > hū; #5BE3
+寤 > wù; #5BE4
+寥 > liáo; #5BE5
+實 > shí; #5BE6
+寧 > zhù; #5BE7
+寨 > zhài; #5BE8
+審 > shĕn; #5BE9
+寪 > wĕi; #5BEA
+寫 > xiĕ; #5BEB
+寬 > kuān; #5BEC
+寭 > hùi; #5BED
+寮 > liáo; #5BEE
+寯 > jùn; #5BEF
+寰 > huán; #5BF0
+寱 > yì; #5BF1
+寲 > yí; #5BF2
+寳 > băo; #5BF3
+寴 > qìn; #5BF4
+寵 > chŏng; #5BF5
+寶 > băo; #5BF6
+寷 > fēng; #5BF7
+寸 > cùn; #5BF8
+对 > dùi; #5BF9
+寺 > sì; #5BFA
+寻 > xún; #5BFB
+导 > dăo; #5BFC
+寽 > lǜ; #5BFD
+対 > dùi; #5BFE
+寿 > shòu; #5BFF
+尀 > pŏ; #5C00
+封 > fēng; #5C01
+専 > zhuān; #5C02
+尃 > fū; #5C03
+射 > shè; #5C04
+尅 > kè; #5C05
+将 > jiāng; #5C06
+將 > jiāng; #5C07
+專 > zhuān; #5C08
+尉 > wèi; #5C09
+尊 > zūn; #5C0A
+尋 > xún; #5C0B
+尌 > shù; #5C0C
+對 > dùi; #5C0D
+導 > dăo; #5C0E
+小 > xiăo; #5C0F
+尐 > jī; #5C10
+少 > shăo; #5C11
+尒 > ĕr; #5C12
+尓 > ĕr; #5C13
+尔 > ĕr; #5C14
+尕 > gă; #5C15
+尖 > jiān; #5C16
+尗 > shú; #5C17
+尘 > chén; #5C18
+尙 > shàng; #5C19
+尚 > shàng; #5C1A
+尛 > mo; #5C1B
+尜 > gá; #5C1C
+尝 > cháng; #5C1D
+尞 > liào; #5C1E
+尟 > xiăn; #5C1F
+尠 > xiăn; #5C20
+尢 > wāng; #5C22
+尣 > wāng; #5C23
+尤 > yóu; #5C24
+尥 > liào; #5C25
+尦 > liào; #5C26
+尧 > yáo; #5C27
+尨 > máng; #5C28
+尩 > wāng; #5C29
+尪 > wāng; #5C2A
+尫 > wāng; #5C2B
+尬 > gà; #5C2C
+尭 > yáo; #5C2D
+尮 > dùo; #5C2E
+尯 > kùi; #5C2F
+尰 > zhŏng; #5C30
+就 > jìu; #5C31
+尲 > gān; #5C32
+尳 > gŭ; #5C33
+尴 > gān; #5C34
+尵 > túi; #5C35
+尶 > gān; #5C36
+尷 > gān; #5C37
+尸 > shī; #5C38
+尹 > yĭn; #5C39
+尺 > chĭ; #5C3A
+尻 > kāo; #5C3B
+尼 > ní; #5C3C
+尽 > jĭn; #5C3D
+尾 > wĕi; #5C3E
+尿 > niào; #5C3F
+局 > jú; #5C40
+屁 > pì; #5C41
+层 > céng; #5C42
+屃 > xì; #5C43
+屄 > bī; #5C44
+居 > jū; #5C45
+屆 > jiè; #5C46
+屇 > tián; #5C47
+屈 > qū; #5C48
+屉 > tì; #5C49
+届 > jiè; #5C4A
+屋 > wū; #5C4B
+屌 > diăo; #5C4C
+屍 > shī; #5C4D
+屎 > shĭ; #5C4E
+屏 > píng; #5C4F
+屐 > jī; #5C50
+屑 > xiè; #5C51
+屒 > chén; #5C52
+屓 > xì; #5C53
+屔 > ní; #5C54
+展 > zhăn; #5C55
+屖 > xī; #5C56
+屘 > măn; #5C58
+屙 > ē; #5C59
+屚 > lòu; #5C5A
+屛 > píng; #5C5B
+屜 > tì; #5C5C
+屝 > fèi; #5C5D
+属 > shŭ; #5C5E
+屟 > xiè; #5C5F
+屠 > tú; #5C60
+屡 > lǚ; #5C61
+屢 > lǚ; #5C62
+屣 > xĭ; #5C63
+層 > céng; #5C64
+履 > lǚ; #5C65
+屦 > jù; #5C66
+屧 > xiè; #5C67
+屨 > jù; #5C68
+屩 > juē; #5C69
+屪 > liáo; #5C6A
+屫 > jué; #5C6B
+屬 > shŭ; #5C6C
+屭 > xì; #5C6D
+屮 > chè; #5C6E
+屯 > tún; #5C6F
+屰 > nì; #5C70
+山 > shān; #5C71
+屳 > xiān; #5C73
+屴 > lì; #5C74
+屵 > xuē; #5C75
+屸 > lóng; #5C78
+屹 > yì; #5C79
+屺 > qĭ; #5C7A
+屻 > rèn; #5C7B
+屼 > wù; #5C7C
+屽 > hàn; #5C7D
+屾 > shēn; #5C7E
+屿 > yŭ; #5C7F
+岀 > chū; #5C80
+岁 > sùi; #5C81
+岂 > qĭ; #5C82
+岄 > yuè; #5C84
+岅 > băn; #5C85
+岆 > yăo; #5C86
+岇 > áng; #5C87
+岈 > yá; #5C88
+岉 > wù; #5C89
+岊 > jié; #5C8A
+岋 > è; #5C8B
+岌 > jí; #5C8C
+岍 > qiān; #5C8D
+岎 > fēn; #5C8E
+岏 > yuán; #5C8F
+岐 > qí; #5C90
+岑 > cén; #5C91
+岒 > qián; #5C92
+岓 > qí; #5C93
+岔 > chà; #5C94
+岕 > jiè; #5C95
+岖 > qū; #5C96
+岗 > găng; #5C97
+岘 > xiàn; #5C98
+岙 > ào; #5C99
+岚 > lán; #5C9A
+岛 > dăo; #5C9B
+岜 > bā; #5C9C
+岝 > zùo; #5C9D
+岞 > zùo; #5C9E
+岟 > yăng; #5C9F
+岠 > jù; #5CA0
+岡 > gāng; #5CA1
+岢 > kĕ; #5CA2
+岣 > gŏu; #5CA3
+岤 > xuè; #5CA4
+岥 > bēi; #5CA5
+岦 > lì; #5CA6
+岧 > tiáo; #5CA7
+岨 > jū; #5CA8
+岩 > yán; #5CA9
+岪 > fú; #5CAA
+岫 > xìu; #5CAB
+岬 > jiă; #5CAC
+岭 > líng; #5CAD
+岮 > túo; #5CAE
+岯 > pēi; #5CAF
+岰 > yŏu; #5CB0
+岱 > dài; #5CB1
+岲 > kuàng; #5CB2
+岳 > yuè; #5CB3
+岴 > qū; #5CB4
+岵 > hù; #5CB5
+岶 > pò; #5CB6
+岷 > mín; #5CB7
+岸 > àn; #5CB8
+岹 > tiáo; #5CB9
+岺 > líng; #5CBA
+岻 > chí; #5CBB
+岽 > dōng; #5CBD
+岿 > kūi; #5CBF
+峀 > xìu; #5CC0
+峁 > măo; #5CC1
+峂 > tóng; #5CC2
+峃 > xué; #5CC3
+峄 > yì; #5CC4
+峆 > hē; #5CC6
+峇 > kē; #5CC7
+峈 > lùo; #5CC8
+峉 > ē; #5CC9
+峊 > fù; #5CCA
+峋 > xún; #5CCB
+峌 > dié; #5CCC
+峍 > lù; #5CCD
+峎 > ān; #5CCE
+峏 > ĕr; #5CCF
+峐 > gāi; #5CD0
+峑 > quán; #5CD1
+峒 > tóng; #5CD2
+峓 > yí; #5CD3
+峔 > mŭ; #5CD4
+峕 > shí; #5CD5
+峖 > ān; #5CD6
+峗 > wéi; #5CD7
+峘 > hū; #5CD8
+峙 > zhì; #5CD9
+峚 > mì; #5CDA
+峛 > lĭ; #5CDB
+峜 > jī; #5CDC
+峝 > tóng; #5CDD
+峞 > wéi; #5CDE
+峟 > yòu; #5CDF
+峡 > xiá; #5CE1
+峢 > lĭ; #5CE2
+峣 > yáo; #5CE3
+峤 > jiào; #5CE4
+峥 > zhēng; #5CE5
+峦 > luán; #5CE6
+峧 > jiāo; #5CE7
+峨 > é; #5CE8
+峩 > é; #5CE9
+峪 > yù; #5CEA
+峫 > yé; #5CEB
+峬 > bū; #5CEC
+峭 > qiào; #5CED
+峮 > qūn; #5CEE
+峯 > fēng; #5CEF
+峰 > fēng; #5CF0
+峱 > náo; #5CF1
+峲 > lĭ; #5CF2
+峳 > yóu; #5CF3
+峴 > xiàn; #5CF4
+峵 > hóng; #5CF5
+島 > dăo; #5CF6
+峷 > shēn; #5CF7
+峸 > chéng; #5CF8
+峹 > tú; #5CF9
+峺 > gĕng; #5CFA
+峻 > jùn; #5CFB
+峼 > hào; #5CFC
+峽 > xiá; #5CFD
+峾 > yīn; #5CFE
+峿 > yŭ; #5CFF
+崀 > làng; #5D00
+崁 > kăn; #5D01
+崂 > láo; #5D02
+崃 > lái; #5D03
+崄 > xiăn; #5D04
+崅 > què; #5D05
+崆 > kōng; #5D06
+崇 > chóng; #5D07
+崈 > chóng; #5D08
+崉 > tà; #5D09
+崊 > lin; #5D0A
+崋 > huá; #5D0B
+崌 > jū; #5D0C
+崍 > lái; #5D0D
+崎 > qí; #5D0E
+崏 > mín; #5D0F
+崐 > kūn; #5D10
+崑 > kūn; #5D11
+崒 > zú; #5D12
+崓 > gù; #5D13
+崔 > cūi; #5D14
+崕 > yá; #5D15
+崖 > yá; #5D16
+崗 > găng; #5D17
+崘 > lún; #5D18
+崙 > lún; #5D19
+崚 > léng; #5D1A
+崛 > jué; #5D1B
+崜 > dūo; #5D1C
+崝 > zhēng; #5D1D
+崞 > gūo; #5D1E
+崟 > yín; #5D1F
+崠 > dōng; #5D20
+崡 > hán; #5D21
+崢 > zhēng; #5D22
+崣 > wĕi; #5D23
+崤 > yáo; #5D24
+崥 > pĭ; #5D25
+崦 > yān; #5D26
+崧 > sōng; #5D27
+崨 > jié; #5D28
+崩 > bēng; #5D29
+崪 > zú; #5D2A
+崫 > jué; #5D2B
+崬 > dōng; #5D2C
+崭 > zhăn; #5D2D
+崮 > gù; #5D2E
+崯 > yín; #5D2F
+崱 > zé; #5D31
+崲 > huáng; #5D32
+崳 > yú; #5D33
+崴 > wēi; #5D34
+崵 > yáng; #5D35
+崶 > fēng; #5D36
+崷 > qíu; #5D37
+崸 > dùn; #5D38
+崹 > tí; #5D39
+崺 > yĭ; #5D3A
+崻 > zhì; #5D3B
+崼 > shì; #5D3C
+崽 > zăi; #5D3D
+崾 > yăo; #5D3E
+崿 > è; #5D3F
+嵀 > zhù; #5D40
+嵁 > kān; #5D41
+嵂 > lǜ; #5D42
+嵃 > yăn; #5D43
+嵄 > mĕi; #5D44
+嵅 > gān; #5D45
+嵆 > jī; #5D46
+嵇 > jī; #5D47
+嵈 > huăn; #5D48
+嵉 > tíng; #5D49
+嵊 > shèng; #5D4A
+嵋 > méi; #5D4B
+嵌 > qiàn; #5D4C
+嵍 > wù; #5D4D
+嵎 > yú; #5D4E
+嵏 > zōng; #5D4F
+嵐 > lán; #5D50
+嵑 > jué; #5D51
+嵒 > yán; #5D52
+嵓 > yán; #5D53
+嵔 > wĕi; #5D54
+嵕 > zōng; #5D55
+嵖 > chá; #5D56
+嵗 > sùi; #5D57
+嵘 > róng; #5D58
+嵚 > qīn; #5D5A
+嵛 > yú; #5D5B
+嵝 > lŏu; #5D5D
+嵞 > tú; #5D5E
+嵟 > dūi; #5D5F
+嵠 > xī; #5D60
+嵡 > wēng; #5D61
+嵢 > cāng; #5D62
+嵣 > dāng; #5D63
+嵤 > hóng; #5D64
+嵥 > jié; #5D65
+嵦 > ái; #5D66
+嵧 > líu; #5D67
+嵨 > wŭ; #5D68
+嵩 > sōng; #5D69
+嵪 > qiāo; #5D6A
+嵫 > zī; #5D6B
+嵬 > wéi; #5D6C
+嵭 > bēng; #5D6D
+嵮 > diān; #5D6E
+嵯 > cúo; #5D6F
+嵰 > qiăn; #5D70
+嵱 > yŏng; #5D71
+嵲 > niè; #5D72
+嵳 > cúo; #5D73
+嵴 > jí; #5D74
+嵷 > sŏng; #5D77
+嵸 > zōng; #5D78
+嵹 > jiàng; #5D79
+嵺 > liáo; #5D7A
+嵻 > kang; #5D7B
+嵼 > chăn; #5D7C
+嵽 > dié; #5D7D
+嵾 > cēn; #5D7E
+嵿 > dĭng; #5D7F
+嶀 > tū; #5D80
+嶁 > lŏu; #5D81
+嶂 > zhàng; #5D82
+嶃 > zhăn; #5D83
+嶄 > zhăn; #5D84
+嶅 > áo; #5D85
+嶆 > cáo; #5D86
+嶇 > qū; #5D87
+嶈 > qiāng; #5D88
+嶉 > zūi; #5D89
+嶊 > zŭi; #5D8A
+嶋 > dăo; #5D8B
+嶌 > dăo; #5D8C
+嶍 > xí; #5D8D
+嶎 > yù; #5D8E
+嶏 > bó; #5D8F
+嶐 > lóng; #5D90
+嶑 > xiăng; #5D91
+嶒 > céng; #5D92
+嶓 > bō; #5D93
+嶔 > qīn; #5D94
+嶕 > jiāo; #5D95
+嶖 > yăn; #5D96
+嶗 > láo; #5D97
+嶘 > zhàn; #5D98
+嶙 > lín; #5D99
+嶚 > liáo; #5D9A
+嶛 > liáo; #5D9B
+嶜 > jīn; #5D9C
+嶝 > dèng; #5D9D
+嶞 > dùo; #5D9E
+嶟 > zūn; #5D9F
+嶠 > jiào; #5DA0
+嶡 > gùi; #5DA1
+嶢 > yáo; #5DA2
+嶣 > qiáo; #5DA3
+嶤 > yáo; #5DA4
+嶥 > jué; #5DA5
+嶦 > zhān; #5DA6
+嶧 > yì; #5DA7
+嶨 > xué; #5DA8
+嶩 > náo; #5DA9
+嶪 > yè; #5DAA
+嶫 > yè; #5DAB
+嶬 > yí; #5DAC
+嶭 > è; #5DAD
+嶮 > xiăn; #5DAE
+嶯 > jí; #5DAF
+嶰 > xiè; #5DB0
+嶱 > kĕ; #5DB1
+嶲 > xī; #5DB2
+嶳 > dì; #5DB3
+嶴 > ào; #5DB4
+嶵 > zŭi; #5DB5
+嶷 > nì; #5DB7
+嶸 > róng; #5DB8
+嶹 > dăo; #5DB9
+嶺 > lĭng; #5DBA
+嶻 > zá; #5DBB
+嶼 > yŭ; #5DBC
+嶽 > yuè; #5DBD
+嶾 > yĭn; #5DBE
+巀 > jiē; #5DC0
+巁 > lì; #5DC1
+巂 > sŭi; #5DC2
+巃 > lóng; #5DC3
+巄 > lóng; #5DC4
+巅 > diān; #5DC5
+巆 > yíng; #5DC6
+巇 > xī; #5DC7
+巈 > jú; #5DC8
+巉 > chán; #5DC9
+巊 > yĭng; #5DCA
+巋 > kūi; #5DCB
+巌 > yán; #5DCC
+巍 > wēi; #5DCD
+巎 > náo; #5DCE
+巏 > quán; #5DCF
+巐 > chăo; #5DD0
+巑 > cuán; #5DD1
+巒 > luán; #5DD2
+巓 > diān; #5DD3
+巔 > diān; #5DD4
+巖 > yán; #5DD6
+巗 > yán; #5DD7
+巘 > yăn; #5DD8
+巙 > náo; #5DD9
+巚 > yăn; #5DDA
+巛 > chuān; #5DDB
+巜 > gùi; #5DDC
+川 > chuān; #5DDD
+州 > zhōu; #5DDE
+巟 > huāng; #5DDF
+巠 > jīng; #5DE0
+巡 > xún; #5DE1
+巢 > cháo; #5DE2
+巣 > cháo; #5DE3
+巤 > liē; #5DE4
+工 > gōng; #5DE5
+左 > zŭo; #5DE6
+巧 > qiăo; #5DE7
+巨 > jù; #5DE8
+巩 > gŏng; #5DE9
+巫 > wū; #5DEB
+差 > chāi; #5DEE
+巯 > qíu; #5DEF
+巰 > qíu; #5DF0
+己 > jĭ; #5DF1
+已 > yĭ; #5DF2
+巳 > sì; #5DF3
+巴 > bā; #5DF4
+巵 > zhī; #5DF5
+巶 > zhāo; #5DF6
+巷 > xiàng; #5DF7
+巸 > yí; #5DF8
+巹 > jĭn; #5DF9
+巺 > xùn; #5DFA
+巻 > juàn; #5DFB
+巽 > xùn; #5DFD
+巾 > jīn; #5DFE
+巿 > fú; #5DFF
+帀 > zā; #5E00
+币 > bì; #5E01
+市 > shì; #5E02
+布 > bù; #5E03
+帄 > dīng; #5E04
+帅 > shuài; #5E05
+帆 > fān; #5E06
+帇 > niè; #5E07
+师 > shī; #5E08
+帉 > fēn; #5E09
+帊 > pà; #5E0A
+帋 > zhĭ; #5E0B
+希 > xī; #5E0C
+帍 > hù; #5E0D
+帎 > dàn; #5E0E
+帏 > wéi; #5E0F
+帐 > zhàng; #5E10
+帑 > tăng; #5E11
+帒 > dài; #5E12
+帓 > mà; #5E13
+帔 > pèi; #5E14
+帕 > pà; #5E15
+帖 > tiē; #5E16
+帗 > fú; #5E17
+帘 > lián; #5E18
+帙 > zhì; #5E19
+帚 > zhŏu; #5E1A
+帛 > bó; #5E1B
+帜 > zhì; #5E1C
+帝 > dì; #5E1D
+帞 > mò; #5E1E
+帟 > yì; #5E1F
+帠 > yì; #5E20
+帡 > píng; #5E21
+帢 > qià; #5E22
+帣 > juàn; #5E23
+帤 > rú; #5E24
+帥 > shuài; #5E25
+带 > dài; #5E26
+帧 > zhèng; #5E27
+帨 > shùi; #5E28
+帩 > qiào; #5E29
+帪 > zhēn; #5E2A
+師 > shī; #5E2B
+帬 > qún; #5E2C
+席 > xí; #5E2D
+帮 > bāng; #5E2E
+帯 > dài; #5E2F
+帰 > gūi; #5E30
+帱 > chóu; #5E31
+帲 > píng; #5E32
+帳 > zhàng; #5E33
+帴 > shā; #5E34
+帵 > wān; #5E35
+帶 > dài; #5E36
+帷 > wéi; #5E37
+常 > cháng; #5E38
+帹 > shà; #5E39
+帺 > qí; #5E3A
+帻 > zé; #5E3B
+帼 > gúo; #5E3C
+帽 > mào; #5E3D
+帾 > dŭ; #5E3E
+帿 > hóu; #5E3F
+幀 > zhèng; #5E40
+幁 > xū; #5E41
+幂 > mì; #5E42
+幃 > wéi; #5E43
+幄 > wò; #5E44
+幅 > fú; #5E45
+幆 > yì; #5E46
+幇 > bāng; #5E47
+幈 > píng; #5E48
+幊 > gōng; #5E4A
+幋 > pán; #5E4B
+幌 > huăng; #5E4C
+幍 > dāo; #5E4D
+幎 > mì; #5E4E
+幏 > jiā; #5E4F
+幐 > téng; #5E50
+幑 > hūi; #5E51
+幒 > zhōng; #5E52
+幓 > shān; #5E53
+幔 > màn; #5E54
+幕 > mù; #5E55
+幖 > biāo; #5E56
+幗 > gúo; #5E57
+幘 > zé; #5E58
+幙 > mù; #5E59
+幚 > bāng; #5E5A
+幛 > zhàng; #5E5B
+幜 > jĭong; #5E5C
+幝 > chăn; #5E5D
+幞 > fú; #5E5E
+幟 > zhì; #5E5F
+幠 > hū; #5E60
+幡 > fān; #5E61
+幢 > chuáng; #5E62
+幣 > bì; #5E63
+幦 > mì; #5E66
+幧 > qiāo; #5E67
+幨 > chān; #5E68
+幩 > fén; #5E69
+幪 > méng; #5E6A
+幫 > bāng; #5E6B
+幬 > chóu; #5E6C
+幭 > miè; #5E6D
+幮 > chú; #5E6E
+幯 > jié; #5E6F
+幰 > xiăn; #5E70
+幱 > lán; #5E71
+干 > gān; #5E72
+平 > píng; #5E73
+年 > nián; #5E74
+幵 > qiān; #5E75
+并 > bìng; #5E76
+幷 > bìng; #5E77
+幸 > xìng; #5E78
+幹 > gàn; #5E79
+幺 > yāo; #5E7A
+幻 > huàn; #5E7B
+幼 > yòu; #5E7C
+幽 > yōu; #5E7D
+幾 > jĭ; #5E7E
+广 > yăn; #5E7F
+庀 > pĭ; #5E80
+庁 > tīng; #5E81
+庂 > zè; #5E82
+広 > guăng; #5E83
+庄 > zhuāng; #5E84
+庅 > mo; #5E85
+庆 > qìng; #5E86
+庇 > bì; #5E87
+庈 > qín; #5E88
+庉 > dùn; #5E89
+床 > chuáng; #5E8A
+庋 > gŭi; #5E8B
+庌 > yă; #5E8C
+庍 > bài; #5E8D
+庎 > jiè; #5E8E
+序 > xù; #5E8F
+庐 > lú; #5E90
+庑 > wŭ; #5E91
+库 > kù; #5E93
+应 > yìng; #5E94
+底 > dĭ; #5E95
+庖 > páo; #5E96
+店 > diàn; #5E97
+庘 > yā; #5E98
+庙 > miào; #5E99
+庚 > gēng; #5E9A
+庛 > cī; #5E9B
+府 > fŭ; #5E9C
+庝 > tóng; #5E9D
+庞 > páng; #5E9E
+废 > fèi; #5E9F
+庠 > xiáng; #5EA0
+庡 > yĭ; #5EA1
+庢 > zhì; #5EA2
+庣 > tiāo; #5EA3
+庤 > zhì; #5EA4
+庥 > xīu; #5EA5
+度 > dù; #5EA6
+座 > zùo; #5EA7
+庨 > xiāo; #5EA8
+庩 > tú; #5EA9
+庪 > gŭi; #5EAA
+庫 > kù; #5EAB
+庬 > páng; #5EAC
+庭 > tíng; #5EAD
+庮 > yŏu; #5EAE
+庯 > bū; #5EAF
+庰 > dīng; #5EB0
+庱 > chĕng; #5EB1
+庲 > lái; #5EB2
+庳 > bēi; #5EB3
+庴 > jí; #5EB4
+庵 > ān; #5EB5
+庶 > shù; #5EB6
+康 > kāng; #5EB7
+庸 > yōng; #5EB8
+庹 > tŭo; #5EB9
+庺 > sōng; #5EBA
+庻 > shù; #5EBB
+庼 > qĭng; #5EBC
+庽 > yù; #5EBD
+庾 > yŭ; #5EBE
+庿 > miào; #5EBF
+廀 > sōu; #5EC0
+廁 > cè; #5EC1
+廂 > xiāng; #5EC2
+廃 > fèi; #5EC3
+廄 > jìu; #5EC4
+廅 > hé; #5EC5
+廆 > hùi; #5EC6
+廇 > lìu; #5EC7
+廈 > shà; #5EC8
+廉 > lián; #5EC9
+廊 > láng; #5ECA
+廋 > sōu; #5ECB
+廌 > jiàn; #5ECC
+廍 > pŏu; #5ECD
+廎 > qĭng; #5ECE
+廏 > jìu; #5ECF
+廐 > jìu; #5ED0
+廑 > qín; #5ED1
+廒 > áo; #5ED2
+廓 > kùo; #5ED3
+廔 > lóu; #5ED4
+廕 > yīn; #5ED5
+廖 > liào; #5ED6
+廗 > dài; #5ED7
+廘 > lù; #5ED8
+廙 > yì; #5ED9
+廚 > chú; #5EDA
+廛 > chán; #5EDB
+廜 > tū; #5EDC
+廝 > sī; #5EDD
+廞 > xīn; #5EDE
+廟 > miào; #5EDF
+廠 > chăng; #5EE0
+廡 > wŭ; #5EE1
+廢 > fèi; #5EE2
+廣 > guăng; #5EE3
+廥 > kuài; #5EE5
+廦 > bì; #5EE6
+廧 > qiáng; #5EE7
+廨 > xiè; #5EE8
+廩 > lĭn; #5EE9
+廪 > lĭn; #5EEA
+廫 > liáo; #5EEB
+廬 > lú; #5EEC
+廮 > yíng; #5EEE
+廯 > xiān; #5EEF
+廰 > tīng; #5EF0
+廱 > yōng; #5EF1
+廲 > lí; #5EF2
+廳 > tīng; #5EF3
+廴 > yĭn; #5EF4
+廵 > xún; #5EF5
+延 > yán; #5EF6
+廷 > tíng; #5EF7
+廸 > dí; #5EF8
+廹 > pò; #5EF9
+建 > jiàn; #5EFA
+廻 > húi; #5EFB
+廼 > năi; #5EFC
+廽 > húi; #5EFD
+廾 > gòng; #5EFE
+廿 > niàn; #5EFF
+开 > kāi; #5F00
+弁 > biàn; #5F01
+异 > yì; #5F02
+弃 > qì; #5F03
+弄 > nòng; #5F04
+弅 > fén; #5F05
+弆 > jŭ; #5F06
+弇 > yăn; #5F07
+弈 > yì; #5F08
+弉 > zàng; #5F09
+弊 > bì; #5F0A
+弋 > yì; #5F0B
+弌 > yī; #5F0C
+弍 > èr; #5F0D
+弎 > sān; #5F0E
+式 > shì; #5F0F
+弐 > èr; #5F10
+弑 > shì; #5F11
+弒 > shì; #5F12
+弓 > gōng; #5F13
+弔 > diào; #5F14
+引 > yĭn; #5F15
+弖 > hù; #5F16
+弗 > fú; #5F17
+弘 > hóng; #5F18
+弙 > wū; #5F19
+弚 > túi; #5F1A
+弛 > chí; #5F1B
+弜 > jiàng; #5F1C
+弝 > bà; #5F1D
+弞 > shĕn; #5F1E
+弟 > dì; #5F1F
+张 > zhāng; #5F20
+弡 > jué; #5F21
+弢 > tāo; #5F22
+弣 > fŭ; #5F23
+弤 > dĭ; #5F24
+弥 > mí; #5F25
+弦 > xián; #5F26
+弧 > hú; #5F27
+弨 > chāo; #5F28
+弩 > nŭ; #5F29
+弪 > jìng; #5F2A
+弫 > zhĕn; #5F2B
+弬 > yí; #5F2C
+弭 > mĭ; #5F2D
+弮 > quān; #5F2E
+弯 > wān; #5F2F
+弰 > shāo; #5F30
+弱 > rùo; #5F31
+弲 > xuān; #5F32
+弳 > jìng; #5F33
+弴 > dūn; #5F34
+張 > zhāng; #5F35
+弶 > jiàng; #5F36
+強 > qiáng; #5F37
+弸 > péng; #5F38
+弹 > dàn; #5F39
+强 > qiáng; #5F3A
+弻 > bì; #5F3B
+弼 > bì; #5F3C
+弽 > shè; #5F3D
+弾 > dàn; #5F3E
+弿 > jiăn; #5F3F
+彀 > gòu; #5F40
+彂 > fā; #5F42
+彃 > bì; #5F43
+彄 > kōu; #5F44
+彆 > biè; #5F46
+彇 > xiāo; #5F47
+彈 > dàn; #5F48
+彉 > kùo; #5F49
+彊 > qiáng; #5F4A
+彋 > hóng; #5F4B
+彌 > mí; #5F4C
+彍 > kùo; #5F4D
+彎 > wān; #5F4E
+彏 > jué; #5F4F
+彐 > jì; #5F50
+彑 > jì; #5F51
+归 > gūi; #5F52
+当 > dāng; #5F53
+彔 > lù; #5F54
+录 > lù; #5F55
+彖 > tuàn; #5F56
+彗 > hùi; #5F57
+彘 > zhì; #5F58
+彙 > hùi; #5F59
+彚 > hùi; #5F5A
+彛 > yí; #5F5B
+彜 > yí; #5F5C
+彝 > yí; #5F5D
+彞 > yí; #5F5E
+彟 > hùo; #5F5F
+彠 > hùo; #5F60
+彡 > shān; #5F61
+形 > xíng; #5F62
+彣 > wén; #5F63
+彤 > tóng; #5F64
+彥 > yàn; #5F65
+彦 > yàn; #5F66
+彧 > yù; #5F67
+彨 > chī; #5F68
+彩 > căi; #5F69
+彪 > biāo; #5F6A
+彫 > diāo; #5F6B
+彬 > bīn; #5F6C
+彭 > péng; #5F6D
+彮 > yŏng; #5F6E
+彯 > piāo; #5F6F
+彰 > zhāng; #5F70
+影 > yĭng; #5F71
+彲 > chī; #5F72
+彳 > chì; #5F73
+彴 > zhúo; #5F74
+彵 > tŭo; #5F75
+彶 > jí; #5F76
+彷 > páng; #5F77
+彸 > zhōng; #5F78
+役 > yì; #5F79
+彺 > wáng; #5F7A
+彻 > chè; #5F7B
+彼 > bĭ; #5F7C
+彽 > chí; #5F7D
+彾 > lĭng; #5F7E
+彿 > fú; #5F7F
+往 > wăng; #5F80
+征 > zhēng; #5F81
+徂 > cú; #5F82
+徃 > wăng; #5F83
+径 > jìng; #5F84
+待 > dài; #5F85
+徆 > xī; #5F86
+徇 > xùn; #5F87
+很 > hĕn; #5F88
+徉 > yáng; #5F89
+徊 > huái; #5F8A
+律 > lǜ; #5F8B
+後 > hòu; #5F8C
+徍 > wā; #5F8D
+徎 > chĕng; #5F8E
+徏 > zhì; #5F8F
+徐 > xú; #5F90
+徑 > jìng; #5F91
+徒 > tú; #5F92
+従 > cóng; #5F93
+徕 > lái; #5F95
+徖 > cóng; #5F96
+得 > dé; #5F97
+徘 > pái; #5F98
+徙 > xĭ; #5F99
+徛 > qì; #5F9B
+徜 > cháng; #5F9C
+徝 > zhì; #5F9D
+從 > cóng; #5F9E
+徟 > zhōu; #5F9F
+徠 > lái; #5FA0
+御 > yù; #5FA1
+徢 > xiè; #5FA2
+徣 > jiè; #5FA3
+徤 > jiàn; #5FA4
+徥 > chí; #5FA5
+徦 > jiă; #5FA6
+徧 > biàn; #5FA7
+徨 > huáng; #5FA8
+復 > fù; #5FA9
+循 > xún; #5FAA
+徫 > wĕi; #5FAB
+徬 > páng; #5FAC
+徭 > yáo; #5FAD
+微 > wēi; #5FAE
+徯 > xī; #5FAF
+徰 > zhēng; #5FB0
+徱 > piào; #5FB1
+徲 > chí; #5FB2
+徳 > dé; #5FB3
+徴 > zhēng; #5FB4
+徵 > zhēng; #5FB5
+徶 > biè; #5FB6
+德 > dé; #5FB7
+徸 > chōng; #5FB8
+徹 > chè; #5FB9
+徺 > jiăo; #5FBA
+徻 > wèi; #5FBB
+徼 > jiào; #5FBC
+徽 > hūi; #5FBD
+徾 > méi; #5FBE
+徿 > lòng; #5FBF
+忀 > xiāng; #5FC0
+忁 > bào; #5FC1
+忂 > qú; #5FC2
+心 > xīn; #5FC3
+忄 > shù' 'xīn' 'páng; #5FC4
+必 > bì; #5FC5
+忆 > yì; #5FC6
+忇 > lè; #5FC7
+忈 > rén; #5FC8
+忉 > dāo; #5FC9
+忊 > dìng; #5FCA
+忋 > găi; #5FCB
+忌 > jì; #5FCC
+忍 > rĕn; #5FCD
+忎 > rén; #5FCE
+忏 > chàn; #5FCF
+忐 > tăn; #5FD0
+忑 > tè; #5FD1
+忒 > tè; #5FD2
+忓 > gān; #5FD3
+忔 > qì; #5FD4
+忕 > shì; #5FD5
+忖 > cŭn; #5FD6
+志 > zhì; #5FD7
+忘 > wàng; #5FD8
+忙 > máng; #5FD9
+忚 > xī; #5FDA
+忛 > fán; #5FDB
+応 > yīng; #5FDC
+忝 > tiăn; #5FDD
+忞 > mín; #5FDE
+忟 > mín; #5FDF
+忠 > zhōng; #5FE0
+忡 > chōng; #5FE1
+忢 > wù; #5FE2
+忣 > jí; #5FE3
+忤 > wŭ; #5FE4
+忥 > xì; #5FE5
+忦 > yè; #5FE6
+忧 > yōu; #5FE7
+忨 > wàn; #5FE8
+忩 > cōng; #5FE9
+忪 > zhōng; #5FEA
+快 > kuài; #5FEB
+忬 > yù; #5FEC
+忭 > biàn; #5FED
+忮 > zhì; #5FEE
+忯 > qí; #5FEF
+忰 > cùi; #5FF0
+忱 > chén; #5FF1
+忲 > tài; #5FF2
+忳 > tún; #5FF3
+忴 > qián; #5FF4
+念 > niàn; #5FF5
+忶 > hún; #5FF6
+忷 > xīong; #5FF7
+忸 > nĭu; #5FF8
+忹 > wăng; #5FF9
+忺 > xiān; #5FFA
+忻 > xīn; #5FFB
+忼 > kāng; #5FFC
+忽 > hū; #5FFD
+忾 > kài; #5FFE
+忿 > fèn; #5FFF
+怀 > huái; #6000
+态 > tài; #6001
+怂 > sŏng; #6002
+怃 > wŭ; #6003
+怄 > òu; #6004
+怅 > chàng; #6005
+怆 > chuàng; #6006
+怇 > jù; #6007
+怈 > yì; #6008
+怉 > băo; #6009
+怊 > chāo; #600A
+怋 > mín; #600B
+怌 > pēi; #600C
+怍 > zùo; #600D
+怎 > zĕn; #600E
+怏 > yàng; #600F
+怐 > kòu; #6010
+怑 > bàn; #6011
+怒 > nù; #6012
+怓 > náo; #6013
+怔 > zhēng; #6014
+怕 > pà; #6015
+怖 > bù; #6016
+怗 > tiē; #6017
+怘 > gù; #6018
+怙 > hù; #6019
+怚 > jù; #601A
+怛 > dá; #601B
+怜 > lián; #601C
+思 > sī; #601D
+怞 > chōu; #601E
+怟 > dì; #601F
+怠 > dài; #6020
+怡 > yí; #6021
+怢 > tú; #6022
+怣 > yóu; #6023
+怤 > fū; #6024
+急 > jí; #6025
+怦 > pēng; #6026
+性 > xìng; #6027
+怨 > yuàn; #6028
+怩 > ní; #6029
+怪 > guài; #602A
+怫 > fú; #602B
+怬 > xì; #602C
+怭 > bì; #602D
+怮 > yōu; #602E
+怯 > qiè; #602F
+怰 > xuàn; #6030
+怱 > cōng; #6031
+怲 > bĭng; #6032
+怳 > huăng; #6033
+怴 > xù; #6034
+怵 > chù; #6035
+怶 > pī; #6036
+怷 > xī; #6037
+怸 > xī; #6038
+怹 > tān; #6039
+总 > zŏng; #603B
+怼 > dùi; #603C
+怿 > yì; #603F
+恀 > chĭ; #6040
+恁 > rèn; #6041
+恂 > xún; #6042
+恃 > shì; #6043
+恄 > xì; #6044
+恅 > lăo; #6045
+恆 > héng; #6046
+恇 > kuāng; #6047
+恈 > mú; #6048
+恉 > zhĭ; #6049
+恊 > xié; #604A
+恋 > liàn; #604B
+恌 > tiāo; #604C
+恍 > huăng; #604D
+恎 > dié; #604E
+恏 > hăo; #604F
+恐 > kŏng; #6050
+恑 > gŭi; #6051
+恒 > héng; #6052
+恓 > xī; #6053
+恔 > xiào; #6054
+恕 > shù; #6055
+恖 > sī; #6056
+恗 > kuă; #6057
+恘 > qīu; #6058
+恙 > yàng; #6059
+恚 > hùi; #605A
+恛 > húi; #605B
+恜 > chì; #605C
+恝 > jiá; #605D
+恞 > yí; #605E
+恟 > xīong; #605F
+恠 > guài; #6060
+恡 > lìn; #6061
+恢 > hūi; #6062
+恣 > zì; #6063
+恤 > xù; #6064
+恥 > chĭ; #6065
+恦 > xiàng; #6066
+恧 > nǜ; #6067
+恨 > hèn; #6068
+恩 > ēn; #6069
+恪 > kè; #606A
+恫 > tōng; #606B
+恬 > tián; #606C
+恭 > gōng; #606D
+恮 > quán; #606E
+息 > xī; #606F
+恰 > qià; #6070
+恱 > yuè; #6071
+恲 > pēng; #6072
+恳 > kĕn; #6073
+恴 > dé; #6074
+恵 > hùi; #6075
+恶 > è; #6076
+恸 > tòng; #6078
+恹 > yàn; #6079
+恺 > kăi; #607A
+恻 > cè; #607B
+恼 > năo; #607C
+恽 > yùn; #607D
+恾 > máng; #607E
+恿 > yŏng; #607F
+悀 > yŏng; #6080
+悁 > yuān; #6081
+悂 > pī; #6082
+悃 > kŭn; #6083
+悄 > qiăo; #6084
+悅 > yuè; #6085
+悆 > yù; #6086
+悇 > yù; #6087
+悈 > jiè; #6088
+悉 > xī; #6089
+悊 > zhé; #608A
+悋 > lìn; #608B
+悌 > tì; #608C
+悍 > hàn; #608D
+悎 > hào; #608E
+悏 > qiè; #608F
+悐 > tì; #6090
+悑 > bù; #6091
+悒 > yì; #6092
+悓 > qiàn; #6093
+悔 > hŭi; #6094
+悕 > xī; #6095
+悖 > bèi; #6096
+悗 > mán; #6097
+悘 > yī; #6098
+悙 > hēng; #6099
+悚 > sŏng; #609A
+悛 > quān; #609B
+悜 > chĕng; #609C
+悝 > hūi; #609D
+悞 > wù; #609E
+悟 > wù; #609F
+悠 > yōu; #60A0
+悡 > lí; #60A1
+悢 > liàng; #60A2
+患 > huàn; #60A3
+悤 > cōng; #60A4
+悥 > yì; #60A5
+悦 > yuè; #60A6
+悧 > lì; #60A7
+您 > nín; #60A8
+悩 > năo; #60A9
+悪 > è; #60AA
+悫 > què; #60AB
+悬 > xuán; #60AC
+悭 > qiān; #60AD
+悮 > wù; #60AE
+悯 > mĭn; #60AF
+悰 > cóng; #60B0
+悱 > fĕi; #60B1
+悲 > bēi; #60B2
+悳 > dúo; #60B3
+悴 > cùi; #60B4
+悵 > chàng; #60B5
+悶 > mèn; #60B6
+悷 > lì; #60B7
+悸 > jì; #60B8
+悹 > guàn; #60B9
+悺 > guàn; #60BA
+悻 > xìng; #60BB
+悼 > dào; #60BC
+悽 > qī; #60BD
+悾 > kōng; #60BE
+悿 > tiăn; #60BF
+惀 > lún; #60C0
+惁 > xī; #60C1
+惂 > kăn; #60C2
+惃 > kūn; #60C3
+惄 > nì; #60C4
+情 > qíng; #60C5
+惆 > chóu; #60C6
+惇 > dūn; #60C7
+惈 > gŭo; #60C8
+惉 > chān; #60C9
+惊 > liáng; #60CA
+惋 > wăn; #60CB
+惌 > yuān; #60CC
+惍 > jīn; #60CD
+惎 > jì; #60CE
+惏 > lín; #60CF
+惐 > yù; #60D0
+惑 > hùo; #60D1
+惒 > hé; #60D2
+惓 > quán; #60D3
+惔 > tán; #60D4
+惕 > tì; #60D5
+惖 > tì; #60D6
+惗 > niē; #60D7
+惘 > wăng; #60D8
+惙 > chùo; #60D9
+惚 > bū; #60DA
+惛 > hūn; #60DB
+惜 > xī; #60DC
+惝 > tăng; #60DD
+惞 > xīn; #60DE
+惟 > wéi; #60DF
+惠 > hùi; #60E0
+惡 > è; #60E1
+惢 > rŭi; #60E2
+惣 > zŏng; #60E3
+惤 > jiān; #60E4
+惥 > yŏng; #60E5
+惦 > diàn; #60E6
+惧 > jù; #60E7
+惨 > căn; #60E8
+惩 > chéng; #60E9
+惪 > dé; #60EA
+惫 > bèi; #60EB
+惬 > qiè; #60EC
+惭 > cán; #60ED
+惮 > dàn; #60EE
+惯 > guàn; #60EF
+惰 > dùo; #60F0
+惱 > năo; #60F1
+惲 > yùn; #60F2
+想 > xiăng; #60F3
+惴 > zhùi; #60F4
+惵 > diè; #60F5
+惶 > huáng; #60F6
+惷 > chŭn; #60F7
+惸 > qíong; #60F8
+惹 > rĕ; #60F9
+惺 > xīng; #60FA
+惻 > cè; #60FB
+惼 > biăn; #60FC
+惽 > hūn; #60FD
+惾 > zōng; #60FE
+惿 > tí; #60FF
+愀 > qiăo; #6100
+愁 > chóu; #6101
+愂 > bèi; #6102
+愃 > xuān; #6103
+愄 > wēi; #6104
+愅 > gé; #6105
+愆 > qiān; #6106
+愇 > wĕi; #6107
+愈 > yù; #6108
+愉 > yú; #6109
+愊 > bì; #610A
+愋 > xuān; #610B
+愌 > huàn; #610C
+愍 > mĭn; #610D
+愎 > bì; #610E
+意 > yì; #610F
+愐 > miăn; #6110
+愑 > yŏng; #6111
+愒 > kài; #6112
+愓 > dàng; #6113
+愔 > yīn; #6114
+愕 > è; #6115
+愖 > chén; #6116
+愗 > mòu; #6117
+愘 > kè; #6118
+愙 > kè; #6119
+愚 > yú; #611A
+愛 > ài; #611B
+愜 > qiè; #611C
+愝 > yăn; #611D
+愞 > nùo; #611E
+感 > găn; #611F
+愠 > yùn; #6120
+愡 > zŏng; #6121
+愢 > sāi; #6122
+愣 > léng; #6123
+愤 > fèn; #6124
+愦 > kùi; #6126
+愧 > kùi; #6127
+愨 > què; #6128
+愩 > gōng; #6129
+愪 > yún; #612A
+愫 > sù; #612B
+愬 > sù; #612C
+愭 > qí; #612D
+愮 > yáo; #612E
+愯 > sŏng; #612F
+愰 > huăng; #6130
+愱 > jí; #6131
+愲 > gŭ; #6132
+愳 > jù; #6133
+愴 > chuàng; #6134
+愵 > nì; #6135
+愶 > xié; #6136
+愷 > kăi; #6137
+愸 > zhĕng; #6138
+愹 > yŏng; #6139
+愺 > căo; #613A
+愻 > sùn; #613B
+愼 > shèn; #613C
+愽 > bó; #613D
+愾 > kài; #613E
+愿 > yuàn; #613F
+慀 > xié; #6140
+慁 > hùn; #6141
+慂 > yŏng; #6142
+慃 > yăng; #6143
+慄 > lì; #6144
+慅 > sāo; #6145
+慆 > tāo; #6146
+慇 > yīn; #6147
+慈 > cí; #6148
+慉 > xù; #6149
+慊 > qiàn; #614A
+態 > tài; #614B
+慌 > huāng; #614C
+慍 > yùn; #614D
+慎 > shèn; #614E
+慏 > mĭng; #614F
+慑 > shè; #6151
+慒 > cóng; #6152
+慓 > piào; #6153
+慔 > mò; #6154
+慕 > mù; #6155
+慖 > gúo; #6156
+慗 > chì; #6157
+慘 > căn; #6158
+慙 > cán; #6159
+慚 > cán; #615A
+慛 > cúi; #615B
+慜 > mĭn; #615C
+慝 > tè; #615D
+慞 > zhāng; #615E
+慟 > tòng; #615F
+慠 > ào; #6160
+慡 > shuăng; #6161
+慢 > màn; #6162
+慣 > guàn; #6163
+慤 > què; #6164
+慥 > zào; #6165
+慦 > jìu; #6166
+慧 > hùi; #6167
+慨 > kăi; #6168
+慩 > lián; #6169
+慪 > òu; #616A
+慫 > sŏng; #616B
+慬 > jĭn; #616C
+慭 > yìn; #616D
+慮 > lǜ; #616E
+慯 > shāng; #616F
+慰 > wèi; #6170
+慱 > tuán; #6171
+慲 > mán; #6172
+慳 > qiān; #6173
+慴 > shè; #6174
+慵 > yōng; #6175
+慶 > qìng; #6176
+慷 > kāng; #6177
+慸 > dì; #6178
+慹 > zhí; #6179
+慺 > lóu; #617A
+慻 > juàn; #617B
+慼 > qī; #617C
+慽 > qī; #617D
+慾 > yù; #617E
+慿 > píng; #617F
+憀 > liáo; #6180
+憁 > cōng; #6181
+憂 > yōu; #6182
+憃 > chōng; #6183
+憄 > zhì; #6184
+憅 > tòng; #6185
+憆 > chēng; #6186
+憇 > qì; #6187
+憈 > qū; #6188
+憉 > péng; #6189
+憊 > bèi; #618A
+憋 > biē; #618B
+憌 > chún; #618C
+憍 > jiāo; #618D
+憎 > zēng; #618E
+憏 > chì; #618F
+憐 > lián; #6190
+憑 > píng; #6191
+憒 > kùi; #6192
+憓 > hùi; #6193
+憔 > qiáo; #6194
+憕 > chéng; #6195
+憖 > yìn; #6196
+憗 > yìn; #6197
+憘 > xĭ; #6198
+憙 > xĭ; #6199
+憚 > dàn; #619A
+憛 > tán; #619B
+憜 > dŭo; #619C
+憝 > dùi; #619D
+憞 > dùi; #619E
+憟 > sù; #619F
+憠 > jué; #61A0
+憡 > cè; #61A1
+憢 > xiāo; #61A2
+憣 > fán; #61A3
+憤 > fèn; #61A4
+憥 > láo; #61A5
+憦 > lào; #61A6
+憧 > chōng; #61A7
+憨 > hān; #61A8
+憩 > qì; #61A9
+憪 > xián; #61AA
+憫 > mĭn; #61AB
+憬 > jĭng; #61AC
+憭 > liăo; #61AD
+憮 > wŭ; #61AE
+憯 > căn; #61AF
+憰 > jué; #61B0
+憱 > cù; #61B1
+憲 > xiàn; #61B2
+憳 > tăn; #61B3
+憴 > shéng; #61B4
+憵 > pī; #61B5
+憶 > yì; #61B6
+憷 > chŭ; #61B7
+憸 > xiān; #61B8
+憹 > náo; #61B9
+憺 > dàn; #61BA
+憻 > tăn; #61BB
+憼 > jĭng; #61BC
+憽 > sōng; #61BD
+憾 > hàn; #61BE
+憿 > jiāo; #61BF
+懀 > wài; #61C0
+懁 > huán; #61C1
+懂 > dŏng; #61C2
+懃 > qín; #61C3
+懄 > qín; #61C4
+懅 > qú; #61C5
+懆 > căo; #61C6
+懇 > kĕn; #61C7
+懈 > xiè; #61C8
+應 > yìng; #61C9
+懊 > ào; #61CA
+懋 > mào; #61CB
+懌 > yì; #61CC
+懍 > lĭn; #61CD
+懎 > sè; #61CE
+懏 > jùn; #61CF
+懐 > huái; #61D0
+懑 > mèn; #61D1
+懒 > lăn; #61D2
+懓 > ài; #61D3
+懔 > lĭn; #61D4
+懕 > yān; #61D5
+懖 > guā; #61D6
+懗 > xià; #61D7
+懘 > chì; #61D8
+懙 > yŭ; #61D9
+懚 > yìn; #61DA
+懛 > dāi; #61DB
+懜 > mèng; #61DC
+懝 > ài; #61DD
+懞 > méng; #61DE
+懟 > dùi; #61DF
+懠 > qí; #61E0
+懡 > mŏ; #61E1
+懢 > lán; #61E2
+懣 > mèn; #61E3
+懤 > chóu; #61E4
+懥 > zhì; #61E5
+懦 > nùo; #61E6
+懧 > nùo; #61E7
+懨 > yān; #61E8
+懩 > yăng; #61E9
+懪 > bó; #61EA
+懫 > zhí; #61EB
+懬 > kuàng; #61EC
+懭 > kuàng; #61ED
+懮 > yŏu; #61EE
+懯 > fū; #61EF
+懰 > líu; #61F0
+懱 > miè; #61F1
+懲 > chéng; #61F2
+懴 > chàn; #61F4
+懵 > méng; #61F5
+懶 > lăn; #61F6
+懷 > huái; #61F7
+懸 > xuán; #61F8
+懹 > ràng; #61F9
+懺 > chàn; #61FA
+懻 > jì; #61FB
+懼 > jù; #61FC
+懽 > huān; #61FD
+懾 > shè; #61FE
+懿 > yì; #61FF
+戀 > liàn; #6200
+戁 > năn; #6201
+戂 > mí; #6202
+戃 > tăng; #6203
+戄 > jué; #6204
+戅 > gàng; #6205
+戆 > gàng; #6206
+戇 > gàng; #6207
+戈 > gē; #6208
+戉 > yuè; #6209
+戊 > wù; #620A
+戋 > jiān; #620B
+戌 > xū; #620C
+戍 > shù; #620D
+戎 > róng; #620E
+戏 > xì; #620F
+成 > chéng; #6210
+我 > wŏ; #6211
+戒 > jiè; #6212
+戓 > gē; #6213
+戔 > jiān; #6214
+戕 > qiāng; #6215
+或 > hùo; #6216
+戗 > qiāng; #6217
+战 > zhàn; #6218
+戙 > dòng; #6219
+戚 > qī; #621A
+戛 > jiá; #621B
+戜 > dié; #621C
+戝 > zéi; #621D
+戞 > jiá; #621E
+戟 > jĭ; #621F
+戠 > shì; #6220
+戡 > kān; #6221
+戢 > jí; #6222
+戣 > kúi; #6223
+戤 > gài; #6224
+戥 > dĕng; #6225
+戦 > zhàn; #6226
+戧 > chuāng; #6227
+戨 > gē; #6228
+戩 > jiăn; #6229
+截 > jié; #622A
+戫 > yù; #622B
+戬 > jiăn; #622C
+戭 > yăn; #622D
+戮 > lù; #622E
+戯 > xì; #622F
+戰 > zhàn; #6230
+戱 > xì; #6231
+戲 > xì; #6232
+戳 > chūo; #6233
+戴 > dài; #6234
+戵 > qú; #6235
+戶 > hù; #6236
+户 > hù; #6237
+戸 > hù; #6238
+戹 > è; #6239
+戺 > shì; #623A
+戻 > lì; #623B
+戼 > măo; #623C
+戽 > hù; #623D
+戾 > lì; #623E
+房 > fáng; #623F
+所 > sŭo; #6240
+扁 > biăn; #6241
+扂 > diàn; #6242
+扃 > jīong; #6243
+扄 > shăng; #6244
+扅 > yí; #6245
+扆 > yĭ; #6246
+扇 > shàn; #6247
+扈 > hù; #6248
+扉 > fēi; #6249
+扊 > yăn; #624A
+手 > shŏu; #624B
+扌 > t̄' 'shŏu' 'páng; #624C
+才 > cái; #624D
+扎 > zhā; #624E
+扏 > qíu; #624F
+扐 > lè; #6250
+扑 > bū; #6251
+扒 > bā; #6252
+打 > dă; #6253
+扔 > rēng; #6254
+払 > fú; #6255
+扗 > zài; #6257
+托 > tūo; #6258
+扙 > zhàng; #6259
+扚 > diāo; #625A
+扛 > káng; #625B
+扜 > yū; #625C
+扝 > kū; #625D
+扞 > hàn; #625E
+扟 > shēn; #625F
+扠 > chā; #6260
+扡 > yĭ; #6261
+扢 > gŭ; #6262
+扣 > kòu; #6263
+扤 > wù; #6264
+扥 > tūo; #6265
+扦 > qiān; #6266
+执 > zhí; #6267
+扨 > rèn; #6268
+扩 > kùo; #6269
+扪 > mén; #626A
+扫 > săo; #626B
+扬 > yáng; #626C
+扭 > nĭu; #626D
+扮 > bàn; #626E
+扯 > chĕ; #626F
+扰 > răo; #6270
+扱 > xī; #6271
+扲 > qián; #6272
+扳 > bān; #6273
+扴 > jiá; #6274
+扵 > yú; #6275
+扶 > fú; #6276
+扷 > ào; #6277
+扸 > xī; #6278
+批 > pī; #6279
+扺 > zhĭ; #627A
+扻 > zì; #627B
+扼 > è; #627C
+扽 > dùn; #627D
+找 > zhăo; #627E
+承 > chéng; #627F
+技 > jì; #6280
+抁 > yăn; #6281
+抂 > kuáng; #6282
+抃 > biàn; #6283
+抄 > chāo; #6284
+抅 > jū; #6285
+抆 > wèn; #6286
+抇 > hú; #6287
+抈 > yuè; #6288
+抉 > jué; #6289
+把 > bă; #628A
+抋 > qìn; #628B
+抌 > zhĕn; #628C
+抍 > zhĕng; #628D
+抎 > yŭn; #628E
+抏 > wán; #628F
+抐 > nù; #6290
+抑 > yì; #6291
+抒 > shū; #6292
+抓 > zhuā; #6293
+抔 > póu; #6294
+投 > tóu; #6295
+抖 > dŏu; #6296
+抗 > kàng; #6297
+折 > zhé; #6298
+抙 > póu; #6299
+抚 > fŭ; #629A
+抛 > pāo; #629B
+抜 > bá; #629C
+抝 > ăo; #629D
+択 > zé; #629E
+抟 > tuán; #629F
+抠 > kōu; #62A0
+抡 > lún; #62A1
+抢 > qiăng; #62A2
+护 > hù; #62A4
+报 > bào; #62A5
+抦 > bĭng; #62A6
+抧 > zhĭ; #62A7
+抨 > pēng; #62A8
+抩 > tān; #62A9
+抪 > pū; #62AA
+披 > pī; #62AB
+抬 > tái; #62AC
+抭 > yăo; #62AD
+抮 > zhĕn; #62AE
+抯 > zhā; #62AF
+抰 > yăng; #62B0
+抱 > bào; #62B1
+抲 > hē; #62B2
+抳 > nĭ; #62B3
+抴 > yì; #62B4
+抵 > dĭ; #62B5
+抶 > chì; #62B6
+抷 > pī; #62B7
+抸 > zā; #62B8
+抹 > mŏ; #62B9
+抺 > mŏ; #62BA
+抻 > shèn; #62BB
+押 > yā; #62BC
+抽 > chōu; #62BD
+抾 > qū; #62BE
+抿 > mĭn; #62BF
+拀 > chù; #62C0
+拁 > jiā; #62C1
+拂 > fú; #62C2
+拃 > zhăn; #62C3
+拄 > zhŭ; #62C4
+担 > dàn; #62C5
+拆 > chāi; #62C6
+拇 > mŭ; #62C7
+拈 > nián; #62C8
+拉 > lā; #62C9
+拊 > fŭ; #62CA
+拋 > pāo; #62CB
+拌 > bàn; #62CC
+拍 > pāi; #62CD
+拎 > līng; #62CE
+拏 > ná; #62CF
+拐 > guăi; #62D0
+拑 > qián; #62D1
+拒 > jù; #62D2
+拓 > tùo; #62D3
+拔 > bá; #62D4
+拕 > tūo; #62D5
+拖 > tūo; #62D6
+拗 > ăo; #62D7
+拘 > jū; #62D8
+拙 > zhúo; #62D9
+拚 > pàn; #62DA
+招 > zhāo; #62DB
+拜 > bài; #62DC
+拝 > bài; #62DD
+拞 > dĭ; #62DE
+拟 > nĭ; #62DF
+拠 > jù; #62E0
+拡 > kùo; #62E1
+拢 > lŏng; #62E2
+拣 > jiăn; #62E3
+拥 > yŏng; #62E5
+拦 > lán; #62E6
+拧 > níng; #62E7
+拨 > bō; #62E8
+择 > zé; #62E9
+拪 > qiān; #62EA
+拫 > hén; #62EB
+括 > guā; #62EC
+拭 > shì; #62ED
+拮 > jié; #62EE
+拯 > zhĕng; #62EF
+拰 > nĭn; #62F0
+拱 > gŏng; #62F1
+拲 > gŏng; #62F2
+拳 > quán; #62F3
+拴 > shuān; #62F4
+拵 > cún; #62F5
+拶 > zăn; #62F6
+拷 > kăo; #62F7
+拸 > chĭ; #62F8
+拹 > xié; #62F9
+拺 > cè; #62FA
+拻 > hūi; #62FB
+拼 > pīn; #62FC
+拽 > zhuāi; #62FD
+拾 > shí; #62FE
+拿 > ná; #62FF
+挀 > bò; #6300
+持 > chí; #6301
+挂 > guà; #6302
+挃 > zhì; #6303
+挄 > kùo; #6304
+挅 > dŭo; #6305
+挆 > dŭo; #6306
+指 > zhĭ; #6307
+挈 > qiè; #6308
+按 > àn; #6309
+挊 > nòng; #630A
+挋 > zhèn; #630B
+挌 > gé; #630C
+挍 > jiào; #630D
+挎 > kū; #630E
+挏 > dòng; #630F
+挐 > rú; #6310
+挑 > tiāo; #6311
+挒 > liè; #6312
+挓 > zhā; #6313
+挔 > lǚ; #6314
+挕 > dié; #6315
+挖 > wā; #6316
+挗 > jué; #6317
+挙 > jŭ; #6319
+挚 > zhì; #631A
+挛 > luán; #631B
+挜 > yà; #631C
+挝 > zhuā; #631D
+挞 > tà; #631E
+挟 > xié; #631F
+挠 > náo; #6320
+挡 > dăng; #6321
+挢 > jiăo; #6322
+挣 > zhēng; #6323
+挤 > jĭ; #6324
+挥 > hūi; #6325
+挦 > xún; #6326
+挨 > āi; #6328
+挩 > tūo; #6329
+挪 > núo; #632A
+挫 > cùo; #632B
+挬 > bó; #632C
+挭 > gĕng; #632D
+挮 > tĭ; #632E
+振 > zhèn; #632F
+挰 > chéng; #6330
+挱 > sūo; #6331
+挲 > sūo; #6332
+挳 > kēng; #6333
+挴 > mĕi; #6334
+挵 > lòng; #6335
+挶 > jú; #6336
+挷 > péng; #6337
+挸 > jiăn; #6338
+挹 > yì; #6339
+挺 > tĭng; #633A
+挻 > shān; #633B
+挼 > nùo; #633C
+挽 > wăn; #633D
+挾 > xié; #633E
+挿 > chā; #633F
+捀 > fēng; #6340
+捁 > jiăo; #6341
+捂 > wŭ; #6342
+捃 > jùn; #6343
+捄 > jìu; #6344
+捅 > tŏng; #6345
+捆 > kŭn; #6346
+捇 > hùo; #6347
+捈 > tú; #6348
+捉 > zhūo; #6349
+捊 > póu; #634A
+捋 > lè; #634B
+捌 > bā; #634C
+捍 > hàn; #634D
+捎 > shāo; #634E
+捏 > niē; #634F
+捐 > juān; #6350
+捑 > zé; #6351
+捒 > sŏng; #6352
+捓 > yé; #6353
+捔 > jué; #6354
+捕 > bŭ; #6355
+捖 > huán; #6356
+捗 > bù; #6357
+捘 > zùn; #6358
+捙 > yì; #6359
+捚 > zhāi; #635A
+捛 > lǚ; #635B
+捜 > sōu; #635C
+捝 > tūo; #635D
+捞 > lāo; #635E
+损 > sŭn; #635F
+捠 > bāng; #6360
+捡 > jiăn; #6361
+换 > huàn; #6362
+捣 > dăo; #6363
+捥 > wàn; #6365
+捦 > qín; #6366
+捧 > pĕng; #6367
+捨 > shĕ; #6368
+捩 > liè; #6369
+捪 > mín; #636A
+捫 > mén; #636B
+捬 > fŭ; #636C
+捭 > băi; #636D
+据 > jù; #636E
+捯 > dăo; #636F
+捰 > wŏ; #6370
+捱 > ái; #6371
+捲 > juăn; #6372
+捳 > yuè; #6373
+捴 > zŏng; #6374
+捵 > chĕn; #6375
+捶 > chúi; #6376
+捷 > jié; #6377
+捸 > tū; #6378
+捹 > bèn; #6379
+捺 > nà; #637A
+捻 > niăn; #637B
+捼 > núo; #637C
+捽 > zú; #637D
+捾 > wò; #637E
+捿 > xī; #637F
+掀 > xiān; #6380
+掁 > chéng; #6381
+掂 > diān; #6382
+掃 > săo; #6383
+掄 > lún; #6384
+掅 > qìng; #6385
+掆 > gāng; #6386
+掇 > dúo; #6387
+授 > shòu; #6388
+掉 > diào; #6389
+掊 > póu; #638A
+掋 > dĭ; #638B
+掌 > zhăng; #638C
+掍 > gŭn; #638D
+掎 > jĭ; #638E
+掏 > tāo; #638F
+掐 > qiā; #6390
+掑 > qí; #6391
+排 > pái; #6392
+掓 > shú; #6393
+掔 > qiān; #6394
+掕 > lìng; #6395
+掖 > yì; #6396
+掗 > yà; #6397
+掘 > jué; #6398
+掙 > zhēng; #6399
+掚 > liăng; #639A
+掛 > guà; #639B
+掜 > yĭ; #639C
+掝 > hùo; #639D
+掞 > shàn; #639E
+掟 > zhĕng; #639F
+掠 > lǜe; #63A0
+採 > căi; #63A1
+探 > tàn; #63A2
+掣 > chè; #63A3
+掤 > bīng; #63A4
+接 > jiē; #63A5
+掦 > tì; #63A6
+控 > kòng; #63A7
+推 > tūi; #63A8
+掩 > yăn; #63A9
+措 > cùo; #63AA
+掫 > zōu; #63AB
+掬 > jú; #63AC
+掭 > tiàn; #63AD
+掮 > qián; #63AE
+掯 > kèn; #63AF
+掰 > bāi; #63B0
+掱 > shŏu; #63B1
+掲 > jiē; #63B2
+掳 > lŭ; #63B3
+掴 > gúo; #63B4
+掷 > zhí; #63B7
+掸 > dăn; #63B8
+掺 > xiān; #63BA
+掻 > sāo; #63BB
+掼 > guàn; #63BC
+掽 > pèng; #63BD
+掾 > yuàn; #63BE
+掿 > nùo; #63BF
+揀 > jiăn; #63C0
+揁 > zhēn; #63C1
+揂 > jīu; #63C2
+揃 > jiān; #63C3
+揄 > yú; #63C4
+揅 > yán; #63C5
+揆 > kúi; #63C6
+揇 > năn; #63C7
+揈 > hōng; #63C8
+揉 > róu; #63C9
+揊 > pì; #63CA
+揋 > wēi; #63CB
+揌 > sāi; #63CC
+揍 > zòu; #63CD
+揎 > xuān; #63CE
+描 > miáo; #63CF
+提 > tí; #63D0
+揑 > niē; #63D1
+插 > chā; #63D2
+揓 > shì; #63D3
+揔 > zŏng; #63D4
+揕 > zhèn; #63D5
+揖 > yī; #63D6
+揗 > shŭn; #63D7
+揘 > héng; #63D8
+揙 > biàn; #63D9
+揚 > yáng; #63DA
+換 > huàn; #63DB
+揜 > yăn; #63DC
+揝 > zuàn; #63DD
+揞 > ăn; #63DE
+揟 > xū; #63DF
+揠 > yà; #63E0
+握 > wò; #63E1
+揢 > kè; #63E2
+揣 > chuăi; #63E3
+揤 > jí; #63E4
+揥 > tì; #63E5
+揦 > lá; #63E6
+揧 > là; #63E7
+揨 > chéng; #63E8
+揩 > kāi; #63E9
+揪 > jīu; #63EA
+揫 > jīu; #63EB
+揬 > tú; #63EC
+揭 > jiē; #63ED
+揮 > hūi; #63EE
+揯 > gēng; #63EF
+揰 > chòng; #63F0
+揱 > shùo; #63F1
+揲 > shé; #63F2
+揳 > xiè; #63F3
+援 > yuán; #63F4
+揵 > qián; #63F5
+揶 > yé; #63F6
+揷 > chā; #63F7
+揸 > zhā; #63F8
+揹 > bēi; #63F9
+揺 > yáo; #63FA
+揽 > lăn; #63FD
+揾 > wèn; #63FE
+揿 > qìn; #63FF
+搀 > chān; #6400
+搁 > gē; #6401
+搂 > lŏu; #6402
+搃 > zŏng; #6403
+搄 > gēng; #6404
+搅 > jiăo; #6405
+搆 > gòu; #6406
+搇 > qìn; #6407
+搈 > yŏng; #6408
+搉 > què; #6409
+搊 > chōu; #640A
+搋 > chĭ; #640B
+搌 > zhăn; #640C
+損 > sŭn; #640D
+搎 > sūn; #640E
+搏 > bó; #640F
+搐 > chù; #6410
+搑 > rŏng; #6411
+搒 > bèng; #6412
+搓 > cūo; #6413
+搔 > sāo; #6414
+搕 > kè; #6415
+搖 > yáo; #6416
+搗 > dăo; #6417
+搘 > zhī; #6418
+搙 > nù; #6419
+搚 > xié; #641A
+搛 > jiān; #641B
+搜 > sōu; #641C
+搝 > qĭu; #641D
+搞 > găo; #641E
+搟 > xiăn; #641F
+搠 > shùo; #6420
+搡 > săng; #6421
+搢 > jìn; #6422
+搣 > miè; #6423
+搤 > è; #6424
+搥 > chúi; #6425
+搦 > nùo; #6426
+搧 > shān; #6427
+搨 > tà; #6428
+搩 > jié; #6429
+搪 > táng; #642A
+搫 > pán; #642B
+搬 > bān; #642C
+搭 > dā; #642D
+搮 > lì; #642E
+搯 > tāo; #642F
+搰 > hú; #6430
+搱 > zhì; #6431
+搲 > wā; #6432
+搳 > xiá; #6433
+搴 > qiān; #6434
+搵 > wèn; #6435
+搶 > qiăng; #6436
+搷 > tián; #6437
+搸 > zhēn; #6438
+搹 > è; #6439
+携 > xī; #643A
+搻 > nùo; #643B
+搼 > quán; #643C
+搽 > chá; #643D
+搾 > zhà; #643E
+搿 > gé; #643F
+摀 > wŭ; #6440
+摁 > èn; #6441
+摂 > shè; #6442
+摃 > káng; #6443
+摄 > shè; #6444
+摅 > shū; #6445
+摆 > băi; #6446
+摇 > yáo; #6447
+摈 > bìn; #6448
+摉 > sōu; #6449
+摊 > tān; #644A
+摋 > sà; #644B
+摌 > chăn; #644C
+摍 > sūo; #644D
+摎 > liáo; #644E
+摏 > chōng; #644F
+摐 > chuāng; #6450
+摑 > gúo; #6451
+摒 > bìng; #6452
+摓 > féng; #6453
+摔 > shuāi; #6454
+摕 > dì; #6455
+摖 > qì; #6456
+摗 > sou; #6457
+摘 > zhāi; #6458
+摙 > liăn; #6459
+摚 > táng; #645A
+摛 > chī; #645B
+摜 > guàn; #645C
+摝 > lù; #645D
+摞 > lúo; #645E
+摟 > lŏu; #645F
+摠 > zŏng; #6460
+摡 > gài; #6461
+摢 > hù; #6462
+摣 > zhā; #6463
+摤 > chuăng; #6464
+摥 > tàng; #6465
+摦 > huà; #6466
+摧 > cūi; #6467
+摨 > nái; #6468
+摩 > mó; #6469
+摪 > jiāng; #646A
+摫 > gūi; #646B
+摬 > yìng; #646C
+摭 > zhí; #646D
+摮 > áo; #646E
+摯 > zhì; #646F
+摰 > niè; #6470
+摱 > mán; #6471
+摲 > shàn; #6472
+摳 > kōu; #6473
+摴 > shū; #6474
+摵 > sŭo; #6475
+摶 > tuán; #6476
+摷 > jiăo; #6477
+摸 > mō; #6478
+摹 > mó; #6479
+摺 > zhé; #647A
+摻 > xiān; #647B
+摼 > kēng; #647C
+摽 > piăo; #647D
+摾 > jiàng; #647E
+摿 > yīn; #647F
+撀 > gòu; #6480
+撁 > qiān; #6481
+撂 > lǜe; #6482
+撃 > jí; #6483
+撄 > yīng; #6484
+撅 > juē; #6485
+撆 > piē; #6486
+撇 > piĕ; #6487
+撈 > lāo; #6488
+撉 > dūn; #6489
+撊 > xiàn; #648A
+撋 > ruán; #648B
+撌 > kùi; #648C
+撍 > zăn; #648D
+撎 > yì; #648E
+撏 > xún; #648F
+撐 > chēng; #6490
+撑 > chēng; #6491
+撒 > să; #6492
+撓 > náo; #6493
+撔 > hèng; #6494
+撕 > sī; #6495
+撖 > qiăn; #6496
+撗 > huáng; #6497
+撘 > dā; #6498
+撙 > zŭn; #6499
+撚 > niăn; #649A
+撛 > lĭn; #649B
+撜 > zhĕng; #649C
+撝 > hūi; #649D
+撞 > zhuàng; #649E
+撟 > jiăo; #649F
+撠 > jĭ; #64A0
+撡 > cāo; #64A1
+撢 > dăn; #64A2
+撣 > dăn; #64A3
+撤 > chè; #64A4
+撥 > bō; #64A5
+撦 > chĕ; #64A6
+撧 > jué; #64A7
+撨 > xiāo; #64A8
+撩 > liáo; #64A9
+撪 > bèn; #64AA
+撫 > fŭ; #64AB
+撬 > qiào; #64AC
+播 > bò; #64AD
+撮 > cūo; #64AE
+撯 > zhúo; #64AF
+撰 > zhuàn; #64B0
+撱 > tŭo; #64B1
+撲 > pū; #64B2
+撳 > qìn; #64B3
+撴 > dūn; #64B4
+撵 > niăn; #64B5
+撷 > xié; #64B7
+撸 > lŭ; #64B8
+撹 > jiăo; #64B9
+撺 > cuān; #64BA
+撻 > tà; #64BB
+撼 > hàn; #64BC
+撽 > qiào; #64BD
+撾 > zhuā; #64BE
+撿 > jiăn; #64BF
+擀 > găn; #64C0
+擁 > yŏng; #64C1
+擂 > léi; #64C2
+擃 > kŭo; #64C3
+擄 > lŭ; #64C4
+擅 > shàn; #64C5
+擆 > zhúo; #64C6
+擇 > zé; #64C7
+擈 > pū; #64C8
+擉 > chùo; #64C9
+擊 > jí; #64CA
+擋 > dăng; #64CB
+擌 > sŭo; #64CC
+操 > cāo; #64CD
+擎 > qíng; #64CE
+擏 > jìng; #64CF
+擐 > huàn; #64D0
+擑 > jiē; #64D1
+擒 > qín; #64D2
+擓 > kuăi; #64D3
+擔 > dān; #64D4
+擕 > xī; #64D5
+擖 > gĕ; #64D6
+擗 > pì; #64D7
+擘 > bò; #64D8
+擙 > ào; #64D9
+據 > jù; #64DA
+擛 > yè; #64DB
+擞 > sŏu; #64DE
+擟 > mí; #64DF
+擠 > jĭ; #64E0
+擡 > tái; #64E1
+擢 > zhúo; #64E2
+擣 > dăo; #64E3
+擤 > xĭng; #64E4
+擥 > lăn; #64E5
+擦 > cā; #64E6
+擧 > jŭ; #64E7
+擨 > yé; #64E8
+擩 > rŭ; #64E9
+擪 > yè; #64EA
+擫 > yè; #64EB
+擬 > nĭ; #64EC
+擭 > hù; #64ED
+擮 > jí; #64EE
+擯 > bìn; #64EF
+擰 > níng; #64F0
+擱 > gē; #64F1
+擲 > zhí; #64F2
+擳 > jié; #64F3
+擴 > kùo; #64F4
+擵 > mó; #64F5
+擶 > jiàn; #64F6
+擷 > xié; #64F7
+擸 > liè; #64F8
+擹 > tān; #64F9
+擺 > băi; #64FA
+擻 > sŏu; #64FB
+擼 > lŭ; #64FC
+擽 > lǜe; #64FD
+擾 > răo; #64FE
+擿 > zhí; #64FF
+攀 > pān; #6500
+攁 > yăng; #6501
+攂 > lèi; #6502
+攃 > sà; #6503
+攄 > shū; #6504
+攅 > zăn; #6505
+攆 > niăn; #6506
+攇 > xiăn; #6507
+攈 > jùn; #6508
+攉 > hùo; #6509
+攊 > lì; #650A
+攋 > là; #650B
+攌 > hàn; #650C
+攍 > yíng; #650D
+攎 > lú; #650E
+攏 > lŏng; #650F
+攐 > qiān; #6510
+攑 > qiān; #6511
+攒 > zăn; #6512
+攓 > qiān; #6513
+攔 > lán; #6514
+攕 > sān; #6515
+攖 > yīng; #6516
+攗 > méi; #6517
+攘 > ráng; #6518
+攙 > chān; #6519
+攛 > cuān; #651B
+攜 > xī; #651C
+攝 > shè; #651D
+攞 > lŭo; #651E
+攟 > jùn; #651F
+攠 > mí; #6520
+攡 > lí; #6521
+攢 > zăn; #6522
+攣 > lǘan; #6523
+攤 > tān; #6524
+攥 > zuàn; #6525
+攦 > lì; #6526
+攧 > diān; #6527
+攨 > wā; #6528
+攩 > dăng; #6529
+攪 > jiăo; #652A
+攫 > jué; #652B
+攬 > lăn; #652C
+攭 > lì; #652D
+攮 > năng; #652E
+支 > zhī; #652F
+攰 > gùi; #6530
+攱 > gŭi; #6531
+攲 > qī; #6532
+攳 > xín; #6533
+攴 > pū; #6534
+攵 > sūi; #6535
+收 > shōu; #6536
+攷 > káo; #6537
+攸 > yōu; #6538
+改 > găi; #6539
+攺 > yĭ; #653A
+攻 > gōng; #653B
+攼 > gān; #653C
+攽 > bān; #653D
+放 > fàng; #653E
+政 > zhèng; #653F
+敀 > bó; #6540
+敁 > diān; #6541
+敂 > kòu; #6542
+敃 > mĭn; #6543
+敄 > wù; #6544
+故 > gù; #6545
+敆 > hé; #6546
+敇 > cè; #6547
+效 > xiào; #6548
+敉 > mĭ; #6549
+敊 > chù; #654A
+敋 > gé; #654B
+敌 > dí; #654C
+敍 > xù; #654D
+敎 > jiào; #654E
+敏 > mĭn; #654F
+敐 > chén; #6550
+救 > jìu; #6551
+敒 > zhèn; #6552
+敓 > dúo; #6553
+敔 > yŭ; #6554
+敕 > chì; #6555
+敖 > áo; #6556
+敗 > bài; #6557
+敘 > xù; #6558
+教 > jiào; #6559
+敚 > dúo; #655A
+敛 > liàn; #655B
+敜 > niè; #655C
+敝 > bì; #655D
+敞 > chăng; #655E
+敟 > diăn; #655F
+敠 > dúo; #6560
+敡 > yì; #6561
+敢 > găn; #6562
+散 > sàn; #6563
+敤 > kĕ; #6564
+敥 > yàn; #6565
+敦 > dūn; #6566
+敧 > qĭ; #6567
+敨 > dŏu; #6568
+敩 > xiào; #6569
+敪 > dúo; #656A
+敫 > jiào; #656B
+敬 > jìng; #656C
+敭 > yáng; #656D
+敮 > xiá; #656E
+敯 > mín; #656F
+数 > shù; #6570
+敱 > ái; #6571
+敲 > qiāo; #6572
+敳 > ái; #6573
+整 > zhĕng; #6574
+敵 > dí; #6575
+敶 > zhèn; #6576
+敷 > fū; #6577
+數 > shù; #6578
+敹 > liáo; #6579
+敺 > qū; #657A
+敻 > xìong; #657B
+敼 > xĭ; #657C
+敽 > jiăo; #657D
+敿 > jiăo; #657F
+斀 > zhúo; #6580
+斁 > yì; #6581
+斂 > liàn; #6582
+斃 > bì; #6583
+斄 > lì; #6584
+斅 > xiào; #6585
+斆 > xiào; #6586
+文 > wén; #6587
+斈 > xué; #6588
+斉 > qí; #6589
+斊 > qí; #658A
+斋 > zhāi; #658B
+斌 > bīn; #658C
+斍 > jué; #658D
+斎 > zhāi; #658E
+斐 > fĕi; #6590
+斑 > bān; #6591
+斒 > bān; #6592
+斓 > lán; #6593
+斔 > yŭ; #6594
+斕 > lán; #6595
+斖 > wĕi; #6596
+斗 > dŏu; #6597
+斘 > shēng; #6598
+料 > liào; #6599
+斚 > jiă; #659A
+斛 > hú; #659B
+斜 > xié; #659C
+斝 > jiă; #659D
+斞 > yŭ; #659E
+斟 > zhēn; #659F
+斠 > jiào; #65A0
+斡 > wò; #65A1
+斢 > tŏu; #65A2
+斣 > chù; #65A3
+斤 > jīn; #65A4
+斥 > chì; #65A5
+斦 > yín; #65A6
+斧 > fŭ; #65A7
+斨 > qiāng; #65A8
+斩 > zhăn; #65A9
+斪 > qú; #65AA
+斫 > zhúo; #65AB
+斬 > zhăn; #65AC
+断 > duàn; #65AD
+斮 > zhúo; #65AE
+斯 > sī; #65AF
+新 > xīn; #65B0
+斱 > zhúo; #65B1
+斲 > zhúo; #65B2
+斳 > qín; #65B3
+斴 > lín; #65B4
+斵 > zhúo; #65B5
+斶 > chù; #65B6
+斷 > duàn; #65B7
+斸 > zhŭ; #65B8
+方 > fāng; #65B9
+斺 > xiè; #65BA
+斻 > háng; #65BB
+於 > yú; #65BC
+施 > shī; #65BD
+斾 > pèi; #65BE
+斿 > yóu; #65BF
+旁 > páng; #65C1
+旂 > qí; #65C2
+旃 > zhān; #65C3
+旄 > máo; #65C4
+旅 > lǚ; #65C5
+旆 > pèi; #65C6
+旇 > pī; #65C7
+旈 > líu; #65C8
+旉 > fū; #65C9
+旊 > făng; #65CA
+旋 > xuán; #65CB
+旌 > jīng; #65CC
+旍 > jīng; #65CD
+旎 > nĭ; #65CE
+族 > zú; #65CF
+旐 > zhào; #65D0
+旑 > yĭ; #65D1
+旒 > líu; #65D2
+旓 > shāo; #65D3
+旔 > jiàn; #65D4
+旖 > yĭ; #65D6
+旗 > qí; #65D7
+旘 > zhì; #65D8
+旙 > fān; #65D9
+旚 > piāo; #65DA
+旛 > fān; #65DB
+旜 > zhān; #65DC
+旝 > guài; #65DD
+旞 > sùi; #65DE
+旟 > yú; #65DF
+无 > wú; #65E0
+旡 > jì; #65E1
+既 > jì; #65E2
+旣 > jì; #65E3
+旤 > hùo; #65E4
+日 > rì; #65E5
+旦 > dàn; #65E6
+旧 > jìu; #65E7
+旨 > zhĭ; #65E8
+早 > zăo; #65E9
+旪 > xié; #65EA
+旫 > tiāo; #65EB
+旬 > xún; #65EC
+旭 > xù; #65ED
+旮 > xù; #65EE
+旯 > xù; #65EF
+旰 > gàn; #65F0
+旱 > hàn; #65F1
+旲 > tái; #65F2
+旳 > dì; #65F3
+旴 > xū; #65F4
+旵 > chăn; #65F5
+时 > shí; #65F6
+旷 > kuàng; #65F7
+旸 > yáng; #65F8
+旹 > shí; #65F9
+旺 > wàng; #65FA
+旻 > mín; #65FB
+旼 > mín; #65FC
+旽 > tūn; #65FD
+旾 > chūn; #65FE
+旿 > wŭ; #65FF
+昀 > yún; #6600
+昁 > bèi; #6601
+昂 > áng; #6602
+昃 > zè; #6603
+昄 > băn; #6604
+昅 > jié; #6605
+昆 > kūn; #6606
+昇 > shēng; #6607
+昈 > hù; #6608
+昉 > făng; #6609
+昊 > hào; #660A
+昋 > gùi; #660B
+昌 > chāng; #660C
+昍 > xuān; #660D
+明 > míng; #660E
+昏 > hūn; #660F
+昐 > fēn; #6610
+昑 > qĭn; #6611
+昒 > hū; #6612
+易 > yì; #6613
+昔 > xí; #6614
+昕 > xīn; #6615
+昖 > yán; #6616
+昗 > zè; #6617
+昘 > făng; #6618
+昙 > tán; #6619
+昚 > shèn; #661A
+昛 > jù; #661B
+昜 > yáng; #661C
+昝 > zăn; #661D
+昞 > bĭng; #661E
+星 > xīng; #661F
+映 > yìng; #6620
+昡 > xuàn; #6621
+昢 > pĕi; #6622
+昣 > zhĕn; #6623
+昤 > līng; #6624
+春 > chūn; #6625
+昦 > hào; #6626
+昧 > mèi; #6627
+昨 > zúo; #6628
+昩 > mò; #6629
+昪 > biàn; #662A
+昫 > xŭ; #662B
+昬 > hūn; #662C
+昭 > zhāo; #662D
+昮 > zòng; #662E
+是 > shì; #662F
+昰 > shì; #6630
+昱 > yù; #6631
+昲 > fèi; #6632
+昳 > dié; #6633
+昴 > măo; #6634
+昵 > nì; #6635
+昶 > chăng; #6636
+昷 > wēn; #6637
+昸 > dōng; #6638
+昹 > ăi; #6639
+昺 > bĭng; #663A
+昻 > áng; #663B
+昼 > zhòu; #663C
+昽 > lóng; #663D
+显 > xiăn; #663E
+昿 > kuàng; #663F
+晀 > tiăo; #6640
+晁 > cháo; #6641
+時 > shí; #6642
+晃 > huăng; #6643
+晄 > huăng; #6644
+晅 > xuān; #6645
+晆 > kúi; #6646
+晇 > xū; #6647
+晈 > jiăo; #6648
+晉 > jìn; #6649
+晊 > zhĭ; #664A
+晋 > jìn; #664B
+晌 > shăng; #664C
+晍 > tóng; #664D
+晎 > hŏng; #664E
+晏 > yàn; #664F
+晐 > gāi; #6650
+晑 > xiăng; #6651
+晒 > shài; #6652
+晓 > xiăo; #6653
+晔 > yē; #6654
+晕 > yūn; #6655
+晖 > hūi; #6656
+晗 > hán; #6657
+晘 > hàn; #6658
+晙 > jùn; #6659
+晚 > wăn; #665A
+晛 > xiàn; #665B
+晜 > kūn; #665C
+晝 > zhòu; #665D
+晞 > xī; #665E
+晟 > chéng; #665F
+晠 > shéng; #6660
+晡 > bū; #6661
+晢 > zhē; #6662
+晣 > zhē; #6663
+晤 > wù; #6664
+晥 > hàn; #6665
+晦 > hùi; #6666
+晧 > hào; #6667
+晨 > chén; #6668
+晩 > wăn; #6669
+晪 > tiăn; #666A
+晫 > zhúo; #666B
+晬 > zùi; #666C
+晭 > zhŏu; #666D
+普 > pŭ; #666E
+景 > jĭng; #666F
+晰 > xī; #6670
+晱 > shăn; #6671
+晲 > yĭ; #6672
+晳 > xì; #6673
+晴 > qíng; #6674
+晵 > qĭ; #6675
+晶 > jīng; #6676
+晷 > gŭi; #6677
+晸 > zhĕn; #6678
+晹 > yì; #6679
+智 > zhì; #667A
+晻 > ăn; #667B
+晼 > wăn; #667C
+晽 > lín; #667D
+晾 > liàng; #667E
+晿 > chāng; #667F
+暀 > wăng; #6680
+暁 > xiăo; #6681
+暂 > zàn; #6682
+暄 > xuān; #6684
+暅 > xuăn; #6685
+暆 > yí; #6686
+暇 > xiá; #6687
+暈 > yūn; #6688
+暉 > hūi; #6689
+暊 > fŭ; #668A
+暋 > mĭn; #668B
+暌 > kúi; #668C
+暍 > hè; #668D
+暎 > yìng; #668E
+暏 > dŭ; #668F
+暐 > wĕi; #6690
+暑 > shŭ; #6691
+暒 > qíng; #6692
+暓 > mào; #6693
+暔 > nán; #6694
+暕 > jiăn; #6695
+暖 > nuăn; #6696
+暗 > àn; #6697
+暘 > yáng; #6698
+暙 > chūn; #6699
+暚 > yáo; #669A
+暛 > sŭo; #669B
+暜 > jìn; #669C
+暝 > míng; #669D
+暞 > jiăo; #669E
+暟 > kăi; #669F
+暠 > găo; #66A0
+暡 > wĕng; #66A1
+暢 > chàng; #66A2
+暣 > qì; #66A3
+暤 > hào; #66A4
+暥 > yàn; #66A5
+暦 > lì; #66A6
+暧 > ài; #66A7
+暨 > jì; #66A8
+暩 > gùi; #66A9
+暪 > mĕn; #66AA
+暫 > zàn; #66AB
+暬 > xiè; #66AC
+暭 > hào; #66AD
+暮 > mù; #66AE
+暯 > mò; #66AF
+暰 > cōng; #66B0
+暱 > nì; #66B1
+暲 > zhāng; #66B2
+暳 > hùi; #66B3
+暴 > bào; #66B4
+暵 > hàn; #66B5
+暶 > xuán; #66B6
+暷 > chuán; #66B7
+暸 > liáo; #66B8
+暹 > xiān; #66B9
+暺 > dàn; #66BA
+暻 > jĭng; #66BB
+暼 > piē; #66BC
+暽 > lín; #66BD
+暾 > tūn; #66BE
+暿 > xĭ; #66BF
+曀 > yì; #66C0
+曁 > jì; #66C1
+曂 > huàng; #66C2
+曃 > tài; #66C3
+曄 > yè; #66C4
+曅 > yè; #66C5
+曆 > lì; #66C6
+曇 > tán; #66C7
+曈 > tóng; #66C8
+曉 > xiăo; #66C9
+曊 > fèi; #66CA
+曋 > qĭn; #66CB
+曌 > zhào; #66CC
+曍 > hào; #66CD
+曎 > yì; #66CE
+曏 > xiàng; #66CF
+曐 > xīng; #66D0
+曑 > sēn; #66D1
+曒 > jiăo; #66D2
+曓 > bào; #66D3
+曔 > jìng; #66D4
+曕 > yiàn; #66D5
+曖 > ài; #66D6
+曗 > yè; #66D7
+曘 > rú; #66D8
+曙 > shù; #66D9
+曚 > méng; #66DA
+曛 > xūn; #66DB
+曜 > yào; #66DC
+曝 > pù; #66DD
+曞 > lì; #66DE
+曟 > chén; #66DF
+曠 > kuàng; #66E0
+曡 > dié; #66E1
+曣 > yàn; #66E3
+曤 > hùo; #66E4
+曥 > lú; #66E5
+曦 > xī; #66E6
+曧 > róng; #66E7
+曨 > lóng; #66E8
+曩 > năng; #66E9
+曪 > lŭo; #66EA
+曫 > luán; #66EB
+曬 > shài; #66EC
+曭 > tăng; #66ED
+曮 > yăn; #66EE
+曯 > chú; #66EF
+曰 > yuē; #66F0
+曱 > yuē; #66F1
+曲 > qŭ; #66F2
+曳 > yì; #66F3
+更 > gèng; #66F4
+曵 > yè; #66F5
+曶 > hū; #66F6
+曷 > hé; #66F7
+書 > shū; #66F8
+曹 > cáo; #66F9
+曺 > cáo; #66FA
+曼 > màn; #66FC
+曽 > cēng; #66FD
+曾 > céng; #66FE
+替 > tì; #66FF
+最 > zùi; #6700
+朁 > căn; #6701
+朂 > xù; #6702
+會 > hùi; #6703
+朄 > yìn; #6704
+朅 > qiè; #6705
+朆 > fēn; #6706
+朇 > pí; #6707
+月 > yuè; #6708
+有 > yŏu; #6709
+朊 > ruăn; #670A
+朋 > péng; #670B
+朌 > bān; #670C
+服 > fú; #670D
+朎 > líng; #670E
+朏 > fĕi; #670F
+朐 > qú; #6710
+朒 > nǜ; #6712
+朓 > tiào; #6713
+朔 > shùo; #6714
+朕 > zhèn; #6715
+朖 > lăng; #6716
+朗 > lăng; #6717
+朘 > juān; #6718
+朙 > míng; #6719
+朚 > huāng; #671A
+望 > wàng; #671B
+朜 > tūn; #671C
+朝 > zhāo; #671D
+朞 > jī; #671E
+期 > qí; #671F
+朠 > yīng; #6720
+朡 > zōng; #6721
+朢 > wàng; #6722
+朣 > tóng; #6723
+朤 > lăng; #6724
+朦 > méng; #6726
+朧 > lóng; #6727
+木 > mù; #6728
+朩 > dĕng; #6729
+未 > wèi; #672A
+末 > mò; #672B
+本 > bĕn; #672C
+札 > zhá; #672D
+朮 > zhú; #672E
+术 > zhú; #672F
+朱 > zhū; #6731
+朲 > rén; #6732
+朳 > bā; #6733
+朴 > pò; #6734
+朵 > dŭo; #6735
+朶 > dŭo; #6736
+朷 > dāo; #6737
+朸 > lì; #6738
+朹 > qíu; #6739
+机 > jī; #673A
+朻 > jīu; #673B
+朼 > bĭ; #673C
+朽 > xĭu; #673D
+朾 > tíng; #673E
+朿 > cì; #673F
+杀 > shā; #6740
+杂 > zá; #6742
+权 > quán; #6743
+杄 > qiān; #6744
+杅 > yú; #6745
+杆 > gān; #6746
+杇 > wū; #6747
+杈 > chā; #6748
+杉 > shān; #6749
+杊 > xún; #674A
+杋 > fān; #674B
+杌 > wù; #674C
+杍 > zĭ; #674D
+李 > lĭ; #674E
+杏 > xìng; #674F
+材 > cái; #6750
+村 > cūn; #6751
+杒 > rèn; #6752
+杓 > sháo; #6753
+杔 > tūo; #6754
+杕 > dì; #6755
+杖 > zhàng; #6756
+杗 > máng; #6757
+杘 > chì; #6758
+杙 > yì; #6759
+杚 > gŭ; #675A
+杛 > gōng; #675B
+杜 > dù; #675C
+杝 > yí; #675D
+杞 > qĭ; #675E
+束 > shù; #675F
+杠 > gāng; #6760
+条 > tiáo; #6761
+来 > lái; #6765
+杧 > máng; #6767
+杨 > yáng; #6768
+杩 > mà; #6769
+杪 > miăo; #676A
+杫 > sì; #676B
+杬 > yuán; #676C
+杭 > háng; #676D
+杮 > fèi; #676E
+杯 > bēi; #676F
+杰 > jié; #6770
+東 > dōng; #6771
+杲 > găo; #6772
+杳 > yăo; #6773
+杴 > xiān; #6774
+杵 > chŭ; #6775
+杶 > qūn; #6776
+杷 > pá; #6777
+杸 > shū; #6778
+杹 > huà; #6779
+杺 > xīn; #677A
+杻 > chŏu; #677B
+杼 > zhù; #677C
+杽 > chŏu; #677D
+松 > sōng; #677E
+板 > băn; #677F
+枀 > sōng; #6780
+极 > jí; #6781
+枂 > yuè; #6782
+枃 > jìn; #6783
+构 > gōu; #6784
+枅 > jī; #6785
+枆 > máo; #6786
+枇 > pí; #6787
+枈 > bì; #6788
+枉 > wăng; #6789
+枊 > àng; #678A
+枋 > fāng; #678B
+枌 > fén; #678C
+枍 > yì; #678D
+枎 > fú; #678E
+枏 > nán; #678F
+析 > xī; #6790
+枑 > hù; #6791
+枒 > yá; #6792
+枓 > dŏu; #6793
+枔 > xún; #6794
+枕 > zhĕn; #6795
+枖 > yāo; #6796
+林 > lín; #6797
+枘 > rùi; #6798
+枙 > é; #6799
+枚 > méi; #679A
+枛 > zhào; #679B
+果 > gŭo; #679C
+枝 > zhī; #679D
+枞 > cōng; #679E
+枟 > yùn; #679F
+枡 > dŏu; #67A1
+枢 > shū; #67A2
+枣 > zăo; #67A3
+枥 > lì; #67A5
+枧 > jiàn; #67A7
+枨 > chéng; #67A8
+枪 > qiāng; #67AA
+枫 > fēng; #67AB
+枬 > nán; #67AC
+枭 > xiāo; #67AD
+枮 > xiān; #67AE
+枯 > kū; #67AF
+枰 > píng; #67B0
+枱 > yí; #67B1
+枲 > xĭ; #67B2
+枳 > zhī; #67B3
+枴 > guăi; #67B4
+枵 > xiāo; #67B5
+架 > jià; #67B6
+枷 > jiā; #67B7
+枸 > gŏu; #67B8
+枹 > fū; #67B9
+枺 > mò; #67BA
+枻 > yì; #67BB
+枼 > yè; #67BC
+枽 > yè; #67BD
+枾 > shì; #67BE
+枿 > niè; #67BF
+柀 > bĭ; #67C0
+柁 > dùo; #67C1
+柂 > yí; #67C2
+柃 > líng; #67C3
+柄 > bĭng; #67C4
+柅 > nĭ; #67C5
+柆 > lā; #67C6
+柇 > hé; #67C7
+柈 > pán; #67C8
+柉 > fán; #67C9
+柊 > zhōng; #67CA
+柋 > dài; #67CB
+柌 > cí; #67CC
+柍 > yāng; #67CD
+柎 > fū; #67CE
+柏 > bó; #67CF
+某 > mŏu; #67D0
+柑 > gān; #67D1
+柒 > qī; #67D2
+染 > răn; #67D3
+柔 > róu; #67D4
+柕 > mào; #67D5
+柖 > zhāo; #67D6
+柗 > sōng; #67D7
+柘 > zhè; #67D8
+柙 > xiá; #67D9
+柚 > yòu; #67DA
+柛 > shēn; #67DB
+柜 > jŭ; #67DC
+柝 > tùo; #67DD
+柞 > zùo; #67DE
+柟 > nán; #67DF
+柠 > níng; #67E0
+柡 > yŏng; #67E1
+柢 > dĭ; #67E2
+柣 > zhí; #67E3
+柤 > zhā; #67E4
+查 > chá; #67E5
+柦 > dàn; #67E6
+柧 > gū; #67E7
+柨 > pu; #67E8
+柩 > jìu; #67E9
+柪 > āo; #67EA
+柫 > fú; #67EB
+柬 > jiăn; #67EC
+柭 > bō; #67ED
+柮 > dùo; #67EE
+柯 > kē; #67EF
+柰 > nài; #67F0
+柱 > zhù; #67F1
+柲 > bì; #67F2
+柳 > lĭu; #67F3
+柴 > chái; #67F4
+柵 > zhà; #67F5
+柶 > sì; #67F6
+柷 > zhù; #67F7
+柸 > pēi; #67F8
+柹 > shì; #67F9
+柺 > guăi; #67FA
+査 > chá; #67FB
+柼 > yăo; #67FC
+柽 > jué; #67FD
+柾 > jìu; #67FE
+柿 > shì; #67FF
+栀 > zhī; #6800
+栁 > lĭu; #6801
+栂 > méi; #6802
+栄 > róng; #6804
+栅 > zhà; #6805
+标 > biāo; #6807
+栈 > zhàn; #6808
+栉 > jié; #6809
+栊 > lóng; #680A
+栋 > dòng; #680B
+栌 > lú; #680C
+栎 > lì; #680E
+栏 > lán; #680F
+栐 > yŏng; #6810
+树 > shù; #6811
+栒 > xún; #6812
+栓 > shuān; #6813
+栔 > qì; #6814
+栕 > zhēn; #6815
+栖 > qī; #6816
+栗 > lì; #6817
+栘 > yĭ; #6818
+栙 > xiáng; #6819
+栚 > zhèn; #681A
+栛 > lì; #681B
+栜 > sù; #681C
+栝 > guā; #681D
+栞 > kān; #681E
+栟 > bīng; #681F
+栠 > rĕn; #6820
+校 > xiào; #6821
+栢 > bó; #6822
+栣 > rĕn; #6823
+栤 > bìng; #6824
+栥 > zī; #6825
+栦 > chóu; #6826
+栧 > yì; #6827
+栨 > jié; #6828
+栩 > xŭ; #6829
+株 > zhū; #682A
+栫 > jiàn; #682B
+栬 > zùi; #682C
+栭 > ér; #682D
+栮 > ĕr; #682E
+栯 > yŏu; #682F
+栰 > fá; #6830
+栱 > gŏng; #6831
+栲 > kăo; #6832
+栳 > lăo; #6833
+栴 > zhān; #6834
+栵 > lì; #6835
+栶 > yin; #6836
+样 > yáng; #6837
+核 > hé; #6838
+根 > gēn; #6839
+栺 > zhĭ; #683A
+栻 > chì; #683B
+格 > gé; #683C
+栽 > zāi; #683D
+栾 > luán; #683E
+栿 > fú; #683F
+桀 > jié; #6840
+桁 > háng; #6841
+桂 > gùi; #6842
+桃 > táo; #6843
+桄 > guàng; #6844
+桅 > wéi; #6845
+框 > kuàng; #6846
+桇 > rú; #6847
+案 > àn; #6848
+桉 > àn; #6849
+桊 > juàn; #684A
+桋 > yí; #684B
+桌 > zhūo; #684C
+桍 > kū; #684D
+桎 > zhí; #684E
+桏 > qíong; #684F
+桐 > tóng; #6850
+桑 > sāng; #6851
+桒 > sāng; #6852
+桓 > huán; #6853
+桔 > jié; #6854
+桕 > jìu; #6855
+桖 > xuè; #6856
+桗 > dùo; #6857
+桘 > zhùi; #6858
+桙 > yú; #6859
+桚 > zăn; #685A
+桜 > yīng; #685C
+桟 > zhàn; #685F
+桠 > yá; #6860
+桡 > náo; #6861
+桢 > zhēn; #6862
+档 > dăng; #6863
+桤 > qī; #6864
+桥 > qiáo; #6865
+桦 > huà; #6866
+桧 > kuài; #6867
+桨 > jiăng; #6868
+桩 > zhuāng; #6869
+桪 > xún; #686A
+桫 > sūo; #686B
+桬 > shā; #686C
+桭 > zhēn; #686D
+桮 > bēi; #686E
+桯 > tīng; #686F
+桰 > guā; #6870
+桱 > jìng; #6871
+桲 > bó; #6872
+桳 > bèn; #6873
+桴 > fú; #6874
+桵 > rŭi; #6875
+桶 > tŏng; #6876
+桷 > jué; #6877
+桸 > xī; #6878
+桹 > láng; #6879
+桺 > lĭu; #687A
+桻 > fēng; #687B
+桼 > qī; #687C
+桽 > wĕn; #687D
+桾 > jūn; #687E
+桿 > găn; #687F
+梀 > cù; #6880
+梁 > liáng; #6881
+梂 > qíu; #6882
+梃 > tĭng; #6883
+梄 > yŏu; #6884
+梅 > méi; #6885
+梆 > bāng; #6886
+梇 > lòng; #6887
+梈 > pēng; #6888
+梉 > zhuāng; #6889
+梊 > dì; #688A
+梋 > xuān; #688B
+梌 > tú; #688C
+梍 > zào; #688D
+梎 > āo; #688E
+梏 > gù; #688F
+梐 > bì; #6890
+梑 > dí; #6891
+梒 > hán; #6892
+梓 > zĭ; #6893
+梔 > zhī; #6894
+梕 > rèn; #6895
+梖 > bèi; #6896
+梗 > gĕng; #6897
+梘 > jiàn; #6898
+梙 > huàn; #6899
+梚 > wăn; #689A
+梛 > núo; #689B
+梜 > jiá; #689C
+條 > tiáo; #689D
+梞 > jì; #689E
+梟 > xiāo; #689F
+梠 > lǚ; #68A0
+梡 > huán; #68A1
+梢 > shāo; #68A2
+梣 > cén; #68A3
+梤 > fén; #68A4
+梥 > sōng; #68A5
+梦 > mèng; #68A6
+梧 > wú; #68A7
+梨 > lí; #68A8
+梩 > lí; #68A9
+梪 > dòu; #68AA
+梫 > cēn; #68AB
+梬 > yĭng; #68AC
+梭 > sūo; #68AD
+梮 > jú; #68AE
+梯 > tī; #68AF
+械 > jiè; #68B0
+梱 > kŭn; #68B1
+梲 > zhúo; #68B2
+梳 > shū; #68B3
+梴 > chān; #68B4
+梵 > fàn; #68B5
+梶 > wĕi; #68B6
+梷 > jìng; #68B7
+梸 > lí; #68B8
+梹 > bīng; #68B9
+梼 > táo; #68BC
+梽 > zhì; #68BD
+梾 > lái; #68BE
+梿 > lián; #68BF
+检 > jiăn; #68C0
+棁 > zhúo; #68C1
+棂 > líng; #68C2
+棃 > lí; #68C3
+棄 > qì; #68C4
+棅 > bìng; #68C5
+棆 > zhūn; #68C6
+棇 > cōng; #68C7
+棈 > qiàn; #68C8
+棉 > mián; #68C9
+棊 > qí; #68CA
+棋 > qí; #68CB
+棌 > căi; #68CC
+棍 > gùn; #68CD
+棎 > chán; #68CE
+棏 > tè; #68CF
+棐 > fĕi; #68D0
+棑 > pái; #68D1
+棒 > bàng; #68D2
+棓 > pŏu; #68D3
+棔 > hūn; #68D4
+棕 > zōng; #68D5
+棖 > chéng; #68D6
+棗 > zăo; #68D7
+棘 > jí; #68D8
+棙 > lì; #68D9
+棚 > péng; #68DA
+棛 > yù; #68DB
+棜 > yù; #68DC
+棝 > gù; #68DD
+棞 > hún; #68DE
+棟 > dòng; #68DF
+棠 > táng; #68E0
+棡 > gāng; #68E1
+棢 > wăng; #68E2
+棣 > dì; #68E3
+棤 > xí; #68E4
+棥 > fán; #68E5
+棦 > chēng; #68E6
+棧 > zhàn; #68E7
+棨 > qĭ; #68E8
+棩 > yuān; #68E9
+棪 > yăn; #68EA
+棫 > yù; #68EB
+棬 > quān; #68EC
+棭 > yì; #68ED
+森 > sēn; #68EE
+棯 > rĕn; #68EF
+棰 > chúi; #68F0
+棱 > léng; #68F1
+棲 > qī; #68F2
+棳 > zhúo; #68F3
+棴 > fú; #68F4
+棵 > kē; #68F5
+棶 > lái; #68F6
+棷 > zōu; #68F7
+棸 > zōu; #68F8
+棹 > zhūo; #68F9
+棺 > guān; #68FA
+棻 > fén; #68FB
+棼 > fén; #68FC
+棽 > chēn; #68FD
+棾 > qíong; #68FE
+棿 > niè; #68FF
+椀 > wăn; #6900
+椁 > gŭo; #6901
+椂 > lù; #6902
+椃 > háo; #6903
+椄 > jiē; #6904
+椅 > yĭ; #6905
+椆 > chóu; #6906
+椇 > jŭ; #6907
+椈 > jú; #6908
+椉 > chéng; #6909
+椊 > zúo; #690A
+椋 > liáng; #690B
+椌 > qiāng; #690C
+植 > zhí; #690D
+椎 > zhūi; #690E
+椏 > yā; #690F
+椐 > jū; #6910
+椑 > bēi; #6911
+椒 > jiāo; #6912
+椓 > zhúo; #6913
+椔 > zī; #6914
+椕 > bīn; #6915
+椖 > péng; #6916
+椗 > dìng; #6917
+椘 > chŭ; #6918
+検 > jiăn; #691C
+椝 > gūi; #691D
+椞 > xì; #691E
+椟 > dú; #691F
+椠 > qiàn; #6920
+椤 > lúo; #6924
+椥 > zhī; #6925
+椪 > pèng; #692A
+椫 > zhăn; #692B
+椭 > tŭo; #692D
+椮 > sēn; #692E
+椯 > dúo; #692F
+椰 > yé; #6930
+椱 > fòu; #6931
+椲 > wĕi; #6932
+椳 > wēi; #6933
+椴 > duàn; #6934
+椵 > jiă; #6935
+椶 > zōng; #6936
+椷 > jiān; #6937
+椸 > yí; #6938
+椹 > shèn; #6939
+椺 > xí; #693A
+椻 > yàn; #693B
+椼 > yăn; #693C
+椽 > chuán; #693D
+椾 > zhàn; #693E
+椿 > chūn; #693F
+楀 > yŭ; #6940
+楁 > hé; #6941
+楂 > zhā; #6942
+楃 > wò; #6943
+楄 > pián; #6944
+楅 > bì; #6945
+楆 > yāo; #6946
+楇 > hùo; #6947
+楈 > xū; #6948
+楉 > rùo; #6949
+楊 > yáng; #694A
+楋 > là; #694B
+楌 > yán; #694C
+楍 > bĕn; #694D
+楎 > hún; #694E
+楏 > kúi; #694F
+楐 > jiè; #6950
+楑 > kúi; #6951
+楒 > sī; #6952
+楓 > fēng; #6953
+楔 > xiè; #6954
+楕 > tŭo; #6955
+楖 > zhì; #6956
+楗 > jiàn; #6957
+楘 > mù; #6958
+楙 > mào; #6959
+楚 > chŭ; #695A
+楛 > hù; #695B
+楜 > hú; #695C
+楝 > liàn; #695D
+楞 > léng; #695E
+楟 > tíng; #695F
+楠 > nán; #6960
+楡 > yú; #6961
+楢 > yóu; #6962
+楣 > méi; #6963
+楤 > sŏng; #6964
+楥 > xuàn; #6965
+楦 > xuàn; #6966
+楧 > yīng; #6967
+楨 > zhēn; #6968
+楩 > pián; #6969
+楪 > yè; #696A
+楫 > jí; #696B
+楬 > jié; #696C
+業 > yè; #696D
+楮 > chŭ; #696E
+楯 > shŭn; #696F
+楰 > yú; #6970
+楱 > còu; #6971
+楲 > wēi; #6972
+楳 > méi; #6973
+楴 > dì; #6974
+極 > jí; #6975
+楶 > jié; #6976
+楷 > kăi; #6977
+楸 > qīu; #6978
+楹 > yíng; #6979
+楺 > róu; #697A
+楻 > héng; #697B
+楼 > lóu; #697C
+楽 > lè; #697D
+榀 > pĭn; #6980
+概 > gài; #6982
+榃 > tán; #6983
+榄 > lăn; #6984
+榅 > yún; #6985
+榆 > yú; #6986
+榇 > chèn; #6987
+榈 > lǘ; #6988
+榉 > jŭ; #6989
+榍 > xiè; #698D
+榎 > jiă; #698E
+榏 > yì; #698F
+榐 > zhăn; #6990
+榑 > fù; #6991
+榒 > nài; #6992
+榓 > mì; #6993
+榔 > láng; #6994
+榕 > róng; #6995
+榖 > gŭ; #6996
+榗 > jiàn; #6997
+榘 > jŭ; #6998
+榙 > tă; #6999
+榚 > yăo; #699A
+榛 > zhēn; #699B
+榜 > băng; #699C
+榝 > shā; #699D
+榞 > yuán; #699E
+榟 > zĭ; #699F
+榠 > mīng; #69A0
+榡 > sù; #69A1
+榢 > jià; #69A2
+榣 > yáo; #69A3
+榤 > jié; #69A4
+榥 > huăng; #69A5
+榦 > gàn; #69A6
+榧 > fĕi; #69A7
+榨 > zhà; #69A8
+榩 > qián; #69A9
+榪 > mà; #69AA
+榫 > sŭn; #69AB
+榬 > yuán; #69AC
+榭 > xiè; #69AD
+榮 > róng; #69AE
+榯 > shí; #69AF
+榰 > zhī; #69B0
+榱 > cūi; #69B1
+榲 > yún; #69B2
+榳 > tíng; #69B3
+榴 > líu; #69B4
+榵 > róng; #69B5
+榶 > táng; #69B6
+榷 > què; #69B7
+榸 > zhāi; #69B8
+榹 > sī; #69B9
+榺 > shèng; #69BA
+榻 > tà; #69BB
+榼 > kè; #69BC
+榽 > xī; #69BD
+榾 > gù; #69BE
+榿 > qī; #69BF
+槀 > kăo; #69C0
+槁 > găo; #69C1
+槂 > sūn; #69C2
+槃 > pán; #69C3
+槄 > tāo; #69C4
+槅 > gé; #69C5
+槆 > xún; #69C6
+槇 > diān; #69C7
+槈 > nòu; #69C8
+槉 > jí; #69C9
+槊 > shùo; #69CA
+構 > gòu; #69CB
+槌 > chúi; #69CC
+槍 > qiāng; #69CD
+槎 > chā; #69CE
+槏 > qiăn; #69CF
+槐 > huái; #69D0
+槑 > méi; #69D1
+槒 > xù; #69D2
+槓 > gàng; #69D3
+槔 > gāo; #69D4
+槕 > zhúo; #69D5
+槖 > tùo; #69D6
+様 > yàng; #69D8
+槙 > diān; #69D9
+槚 > jiă; #69DA
+槛 > jiàn; #69DB
+槜 > zùi; #69DC
+槟 > bīn; #69DF
+槠 > zhū; #69E0
+槢 > xí; #69E2
+槣 > qĭ; #69E3
+槤 > lián; #69E4
+槥 > hùi; #69E5
+槦 > yóng; #69E6
+槧 > qiàn; #69E7
+槨 > gŭo; #69E8
+槩 > gài; #69E9
+槪 > gài; #69EA
+槫 > tuán; #69EB
+槬 > huà; #69EC
+槭 > cù; #69ED
+槮 > sēn; #69EE
+槯 > cūi; #69EF
+槰 > bèng; #69F0
+槱 > yŏu; #69F1
+槲 > hú; #69F2
+槳 > jiăng; #69F3
+槴 > hù; #69F4
+槵 > huàn; #69F5
+槶 > kùi; #69F6
+槷 > yì; #69F7
+槸 > niè; #69F8
+槹 > gāo; #69F9
+槺 > kāng; #69FA
+槻 > gūi; #69FB
+槼 > gūi; #69FC
+槽 > cáo; #69FD
+槾 > mán; #69FE
+槿 > jĭn; #69FF
+樀 > dì; #6A00
+樁 > zhuāng; #6A01
+樂 > lè; #6A02
+樃 > láng; #6A03
+樄 > chén; #6A04
+樅 > cōng; #6A05
+樆 > lí; #6A06
+樇 > xīu; #6A07
+樈 > qíng; #6A08
+樉 > shuăng; #6A09
+樊 > fán; #6A0A
+樋 > tōng; #6A0B
+樌 > guàn; #6A0C
+樍 > jī; #6A0D
+樎 > sūo; #6A0E
+樏 > lĕi; #6A0F
+樐 > lŭ; #6A10
+樑 > liáng; #6A11
+樒 > mì; #6A12
+樓 > lóu; #6A13
+樔 > cháo; #6A14
+樕 > sù; #6A15
+樖 > kē; #6A16
+樗 > shū; #6A17
+樘 > táng; #6A18
+標 > biāo; #6A19
+樚 > lù; #6A1A
+樛 > jīu; #6A1B
+樜 > shù; #6A1C
+樝 > zhā; #6A1D
+樞 > shū; #6A1E
+樟 > zhāng; #6A1F
+樠 > mén; #6A20
+模 > mó; #6A21
+樢 > niăo; #6A22
+樣 > yàng; #6A23
+樤 > tiáo; #6A24
+樥 > péng; #6A25
+樦 > zhù; #6A26
+樧 > shā; #6A27
+樨 > xī; #6A28
+権 > quán; #6A29
+横 > héng; #6A2A
+樫 > jiān; #6A2B
+樬 > cōng; #6A2C
+樯 > qiáng; #6A2F
+樱 > yīng; #6A31
+樲 > èr; #6A32
+樳 > xín; #6A33
+樴 > zhí; #6A34
+樵 > qiáo; #6A35
+樶 > zūi; #6A36
+樷 > cōng; #6A37
+樸 > pú; #6A38
+樹 > shù; #6A39
+樺 > huà; #6A3A
+樻 > kùi; #6A3B
+樼 > zhēn; #6A3C
+樽 > zūn; #6A3D
+樾 > yuè; #6A3E
+樿 > zhăn; #6A3F
+橀 > xī; #6A40
+橁 > xún; #6A41
+橂 > diàn; #6A42
+橃 > fā; #6A43
+橄 > găn; #6A44
+橅 > mó; #6A45
+橆 > wŭ; #6A46
+橇 > qiāo; #6A47
+橈 > náo; #6A48
+橉 > lìn; #6A49
+橊 > líu; #6A4A
+橋 > qiáo; #6A4B
+橌 > xiàn; #6A4C
+橍 > rùn; #6A4D
+橎 > fán; #6A4E
+橏 > zhăn; #6A4F
+橐 > tùo; #6A50
+橑 > lăo; #6A51
+橒 > yún; #6A52
+橓 > shùn; #6A53
+橔 > túi; #6A54
+橕 > chēng; #6A55
+橖 > táng; #6A56
+橗 > méng; #6A57
+橘 > jú; #6A58
+橙 > chéng; #6A59
+橚 > sù; #6A5A
+橛 > jué; #6A5B
+橜 > jué; #6A5C
+橝 > tān; #6A5D
+橞 > hùi; #6A5E
+機 > jī; #6A5F
+橠 > nŭo; #6A60
+橡 > xiàng; #6A61
+橢 > tŭo; #6A62
+橣 > nĭng; #6A63
+橤 > rŭi; #6A64
+橥 > zhū; #6A65
+橦 > chuáng; #6A66
+橧 > zēng; #6A67
+橨 > fén; #6A68
+橩 > qíong; #6A69
+橪 > răn; #6A6A
+橫 > héng; #6A6B
+橬 > cén; #6A6C
+橭 > gū; #6A6D
+橮 > lĭu; #6A6E
+橯 > lào; #6A6F
+橰 > gāo; #6A70
+橱 > chú; #6A71
+橶 > jí; #6A76
+橷 > dōu; #6A77
+橹 > lŭ; #6A79
+橼 > yuán; #6A7C
+橽 > tà; #6A7D
+橾 > shū; #6A7E
+橿 > jiāng; #6A7F
+檀 > tán; #6A80
+檁 > lĭn; #6A81
+檂 > nóng; #6A82
+檃 > yĭn; #6A83
+檄 > xí; #6A84
+檅 > sùi; #6A85
+檆 > shān; #6A86
+檇 > zùi; #6A87
+檈 > xuán; #6A88
+檉 > chēng; #6A89
+檊 > gàn; #6A8A
+檋 > jū; #6A8B
+檌 > zùi; #6A8C
+檍 > yì; #6A8D
+檎 > qín; #6A8E
+檏 > pŭ; #6A8F
+檐 > yán; #6A90
+檑 > léi; #6A91
+檒 > fēng; #6A92
+檓 > hŭi; #6A93
+檔 > dăng; #6A94
+檕 > jì; #6A95
+檖 > sùi; #6A96
+檗 > bò; #6A97
+檘 > bì; #6A98
+檙 > dĭng; #6A99
+檚 > chŭ; #6A9A
+檛 > zhuā; #6A9B
+檜 > kuài; #6A9C
+檝 > jí; #6A9D
+檞 > jiĕ; #6A9E
+檟 > jiă; #6A9F
+檠 > qíng; #6AA0
+檡 > zhè; #6AA1
+檢 > jiăn; #6AA2
+檣 > qiáng; #6AA3
+檤 > dào; #6AA4
+檥 > yĭ; #6AA5
+檦 > biăo; #6AA6
+檧 > sōng; #6AA7
+檨 > shē; #6AA8
+檩 > lĭn; #6AA9
+檫 > chá; #6AAB
+檬 > méng; #6AAC
+檭 > yín; #6AAD
+檮 > táo; #6AAE
+檯 > tái; #6AAF
+檰 > mián; #6AB0
+檱 > qí; #6AB1
+檲 > tóan; #6AB2
+檳 > bīn; #6AB3
+檴 > hùo; #6AB4
+檵 > jì; #6AB5
+檶 > qiān; #6AB6
+檷 > mí; #6AB7
+檸 > níng; #6AB8
+檹 > yī; #6AB9
+檺 > găo; #6ABA
+檻 > jiàn; #6ABB
+檼 > yìn; #6ABC
+檽 > ér; #6ABD
+檾 > qĭng; #6ABE
+檿 > yăn; #6ABF
+櫀 > qí; #6AC0
+櫁 > mì; #6AC1
+櫂 > zhào; #6AC2
+櫃 > gùi; #6AC3
+櫄 > chūn; #6AC4
+櫅 > jī; #6AC5
+櫆 > kúi; #6AC6
+櫇 > pó; #6AC7
+櫈 > dèng; #6AC8
+櫉 > chú; #6AC9
+櫋 > mián; #6ACB
+櫌 > yōu; #6ACC
+櫍 > zhì; #6ACD
+櫎 > guàng; #6ACE
+櫏 > qiān; #6ACF
+櫐 > lĕi; #6AD0
+櫑 > lĕi; #6AD1
+櫒 > sà; #6AD2
+櫓 > lŭ; #6AD3
+櫔 > lì; #6AD4
+櫕 > cuán; #6AD5
+櫖 > lǘ; #6AD6
+櫗 > miè; #6AD7
+櫘 > hùi; #6AD8
+櫙 > ōu; #6AD9
+櫚 > lǘ; #6ADA
+櫛 > jié; #6ADB
+櫜 > gāo; #6ADC
+櫝 > dú; #6ADD
+櫞 > yuán; #6ADE
+櫟 > lì; #6ADF
+櫠 > fèi; #6AE0
+櫡 > zhúo; #6AE1
+櫢 > sŏu; #6AE2
+櫣 > lián; #6AE3
+櫥 > chú; #6AE5
+櫧 > zhū; #6AE7
+櫨 > lú; #6AE8
+櫩 > yán; #6AE9
+櫪 > lì; #6AEA
+櫫 > zhū; #6AEB
+櫬 > chèn; #6AEC
+櫭 > jié; #6AED
+櫮 > è; #6AEE
+櫯 > sū; #6AEF
+櫰 > huái; #6AF0
+櫱 > niè; #6AF1
+櫲 > yù; #6AF2
+櫳 > lóng; #6AF3
+櫴 > lài; #6AF4
+櫶 > xiăn; #6AF6
+櫸 > jŭ; #6AF8
+櫹 > xiāo; #6AF9
+櫺 > líng; #6AFA
+櫻 > yīng; #6AFB
+櫼 > jiān; #6AFC
+櫽 > yĭn; #6AFD
+櫾 > yóu; #6AFE
+櫿 > yíng; #6AFF
+欀 > xiāng; #6B00
+欁 > nóng; #6B01
+欂 > bó; #6B02
+欃 > chán; #6B03
+欄 > lán; #6B04
+欅 > jŭ; #6B05
+欆 > shuāng; #6B06
+欇 > shè; #6B07
+欈 > wéi; #6B08
+欉 > còng; #6B09
+權 > quán; #6B0A
+欋 > qú; #6B0B
+欎 > yù; #6B0E
+欏 > lúo; #6B0F
+欐 > lĭ; #6B10
+欑 > zàn; #6B11
+欒 > luán; #6B12
+欓 > dăng; #6B13
+欔 > jué; #6B14
+欖 > lăn; #6B16
+欗 > lán; #6B17
+欘 > zhŭ; #6B18
+欙 > léi; #6B19
+欚 > lĭ; #6B1A
+欛 > bà; #6B1B
+欜 > náng; #6B1C
+欝 > yù; #6B1D
+欞 > líng; #6B1E
+欠 > qiàn; #6B20
+次 > cì; #6B21
+欢 > huān; #6B22
+欣 > xīn; #6B23
+欤 > yú; #6B24
+欥 > yù; #6B25
+欦 > qiān; #6B26
+欧 > ōu; #6B27
+欨 > xū; #6B28
+欩 > chāo; #6B29
+欪 > chù; #6B2A
+欫 > chī; #6B2B
+欬 > kài; #6B2C
+欭 > yì; #6B2D
+欮 > jué; #6B2E
+欯 > xí; #6B2F
+欰 > xū; #6B30
+欱 > xià; #6B31
+欲 > yù; #6B32
+欳 > kuài; #6B33
+欴 > láng; #6B34
+欵 > kuăn; #6B35
+欶 > shùo; #6B36
+欷 > xī; #6B37
+欸 > ăi; #6B38
+欹 > yī; #6B39
+欺 > qī; #6B3A
+欻 > hū; #6B3B
+欼 > chĭ; #6B3C
+欽 > qīn; #6B3D
+款 > kuăn; #6B3E
+欿 > kăn; #6B3F
+歀 > kuăn; #6B40
+歁 > kăn; #6B41
+歂 > chuán; #6B42
+歃 > shà; #6B43
+歄 > gua; #6B44
+歅 > yīn; #6B45
+歆 > xīn; #6B46
+歇 > xiē; #6B47
+歈 > yú; #6B48
+歉 > qiàn; #6B49
+歊 > xiāo; #6B4A
+歋 > yí; #6B4B
+歌 > gē; #6B4C
+歍 > wū; #6B4D
+歎 > tàn; #6B4E
+歏 > jìn; #6B4F
+歐 > ōu; #6B50
+歑 > hū; #6B51
+歒 > tì; #6B52
+歓 > huān; #6B53
+歔 > xū; #6B54
+歕 > pèn; #6B55
+歖 > xī; #6B56
+歗 > xiào; #6B57
+歘 > xū; #6B58
+歙 > xì; #6B59
+歛 > liàn; #6B5B
+歜 > chù; #6B5C
+歝 > yì; #6B5D
+歞 > kăn; #6B5E
+歟 > yú; #6B5F
+歠 > chùo; #6B60
+歡 > huān; #6B61
+止 > zhĭ; #6B62
+正 > zhèng; #6B63
+此 > cĭ; #6B64
+步 > bù; #6B65
+武 > wŭ; #6B66
+歧 > qí; #6B67
+歨 > bù; #6B68
+歩 > bù; #6B69
+歪 > wāi; #6B6A
+歫 > jù; #6B6B
+歬 > qián; #6B6C
+歭 > chí; #6B6D
+歮 > sè; #6B6E
+歯 > chĭ; #6B6F
+歰 > sè; #6B70
+歱 > zhŏng; #6B71
+歲 > sùi; #6B72
+歳 > sùi; #6B73
+歴 > lì; #6B74
+歵 > cùo; #6B75
+歶 > yú; #6B76
+歷 > lì; #6B77
+歸 > gūi; #6B78
+歹 > dăi; #6B79
+歺 > dăi; #6B7A
+死 > sĭ; #6B7B
+歼 > jiān; #6B7C
+歽 > zhé; #6B7D
+歾 > mò; #6B7E
+歿 > mò; #6B7F
+殀 > yăo; #6B80
+殁 > mò; #6B81
+殂 > cú; #6B82
+殃 > yāng; #6B83
+殄 > tiăn; #6B84
+殅 > shēng; #6B85
+殆 > dài; #6B86
+殇 > shāng; #6B87
+殈 > xù; #6B88
+殉 > xùn; #6B89
+殊 > shū; #6B8A
+残 > cán; #6B8B
+殌 > jué; #6B8C
+殍 > piăo; #6B8D
+殎 > qià; #6B8E
+殏 > qìu; #6B8F
+殐 > sù; #6B90
+殑 > qíng; #6B91
+殒 > yŭn; #6B92
+殓 > liàn; #6B93
+殔 > yì; #6B94
+殕 > fŏu; #6B95
+殖 > zhí; #6B96
+殗 > yè; #6B97
+殘 > cán; #6B98
+殙 > hūn; #6B99
+殚 > dān; #6B9A
+殛 > jí; #6B9B
+殜 > yè; #6B9C
+殝 > zhen; #6B9D
+殞 > yŭn; #6B9E
+殟 > wēn; #6B9F
+殠 > chòu; #6BA0
+殡 > bìn; #6BA1
+殢 > tì; #6BA2
+殣 > jĭn; #6BA3
+殤 > shāng; #6BA4
+殥 > yín; #6BA5
+殦 > diāo; #6BA6
+殧 > cù; #6BA7
+殨 > hùi; #6BA8
+殩 > cuàn; #6BA9
+殪 > yì; #6BAA
+殫 > dān; #6BAB
+殬 > dù; #6BAC
+殭 > jiāng; #6BAD
+殮 > liàn; #6BAE
+殯 > bìn; #6BAF
+殰 > dú; #6BB0
+殲 > jiān; #6BB2
+殳 > shū; #6BB3
+殴 > ōu; #6BB4
+段 > duàn; #6BB5
+殶 > zhù; #6BB6
+殷 > yīn; #6BB7
+殸 > qìng; #6BB8
+殹 > yì; #6BB9
+殺 > shā; #6BBA
+殻 > què; #6BBB
+殼 > ké; #6BBC
+殽 > yáo; #6BBD
+殾 > jùn; #6BBE
+殿 > diàn; #6BBF
+毀 > hŭi; #6BC0
+毁 > hŭi; #6BC1
+毂 > gŭ; #6BC2
+毃 > què; #6BC3
+毄 > jī; #6BC4
+毅 > yì; #6BC5
+毆 > ōu; #6BC6
+毇 > hŭi; #6BC7
+毈 > duàn; #6BC8
+毉 > yī; #6BC9
+毊 > xiāo; #6BCA
+毋 > wú; #6BCB
+毌 > guàn; #6BCC
+母 > mŭ; #6BCD
+毎 > mĕi; #6BCE
+每 > mĕi; #6BCF
+毐 > ăi; #6BD0
+毑 > zŭo; #6BD1
+毒 > dú; #6BD2
+毓 > yù; #6BD3
+比 > bĭ; #6BD4
+毕 > bì; #6BD5
+毖 > bì; #6BD6
+毗 > pí; #6BD7
+毘 > pí; #6BD8
+毙 > bì; #6BD9
+毚 > chán; #6BDA
+毛 > máo; #6BDB
+毞 > pú; #6BDE
+毠 > jiā; #6BE0
+毡 > zhān; #6BE1
+毢 > sāi; #6BE2
+毣 > mù; #6BE3
+毤 > tùo; #6BE4
+毥 > xún; #6BE5
+毦 > èr; #6BE6
+毧 > róng; #6BE7
+毨 > xiăn; #6BE8
+毩 > jú; #6BE9
+毪 > mú; #6BEA
+毫 > háo; #6BEB
+毬 > qíu; #6BEC
+毭 > dòu; #6BED
+毯 > tăn; #6BEF
+毰 > péi; #6BF0
+毱 > jú; #6BF1
+毲 > dúo; #6BF2
+毳 > cùi; #6BF3
+毴 > bī; #6BF4
+毵 > sān; #6BF5
+毷 > mào; #6BF7
+毸 > sūi; #6BF8
+毹 > yū; #6BF9
+毺 > yū; #6BFA
+毻 > tùo; #6BFB
+毼 > hé; #6BFC
+毽 > jiàn; #6BFD
+毾 > tà; #6BFE
+毿 > sān; #6BFF
+氀 > lǘ; #6C00
+氁 > mú; #6C01
+氂 > lí; #6C02
+氃 > tóng; #6C03
+氄 > rŏng; #6C04
+氅 > chăng; #6C05
+氆 > pŭ; #6C06
+氇 > lúo; #6C07
+氈 > zhān; #6C08
+氉 > sào; #6C09
+氊 > zhān; #6C0A
+氋 > méng; #6C0B
+氌 > lúo; #6C0C
+氍 > qú; #6C0D
+氎 > dié; #6C0E
+氏 > shì; #6C0F
+氐 > dĭ; #6C10
+民 > mín; #6C11
+氒 > jué; #6C12
+氓 > máng; #6C13
+气 > qì; #6C14
+氕 > piē; #6C15
+氖 > năi; #6C16
+気 > qì; #6C17
+氘 > dāo; #6C18
+氙 > xiān; #6C19
+氚 > chuān; #6C1A
+氛 > fēn; #6C1B
+氜 > rì; #6C1C
+氝 > nèi; #6C1D
+氟 > fú; #6C1F
+氠 > shēn; #6C20
+氡 > dōng; #6C21
+氢 > qīng; #6C22
+氣 > qì; #6C23
+氤 > yīn; #6C24
+氥 > xī; #6C25
+氦 > hài; #6C26
+氧 > yăng; #6C27
+氨 > ān; #6C28
+氩 > yà; #6C29
+氪 > kè; #6C2A
+氫 > qīng; #6C2B
+氬 > yà; #6C2C
+氭 > dōng; #6C2D
+氮 > dàn; #6C2E
+氯 > lǜ; #6C2F
+氰 > qīng; #6C30
+氱 > yăng; #6C31
+氲 > yūn; #6C32
+氳 > yūn; #6C33
+水 > shŭi; #6C34
+氵 > sān' 'diăn' 'shŭi; #6C35
+氶 > zhĕng; #6C36
+氷 > bīng; #6C37
+永 > yŏng; #6C38
+氹 > dàng; #6C39
+氻 > lè; #6C3B
+氼 > nì; #6C3C
+氽 > tŭn; #6C3D
+氾 > fàn; #6C3E
+氿 > gŭi; #6C3F
+汀 > tīng; #6C40
+汁 > zhī; #6C41
+求 > qíu; #6C42
+汃 > bīn; #6C43
+汄 > zè; #6C44
+汅 > miăn; #6C45
+汆 > cuān; #6C46
+汇 > hùi; #6C47
+汈 > diāo; #6C48
+汉 > yì; #6C49
+汊 > chà; #6C4A
+汋 > zhúo; #6C4B
+汌 > chuàn; #6C4C
+汍 > wán; #6C4D
+汎 > fàn; #6C4E
+汏 > dài; #6C4F
+汐 > xì; #6C50
+汑 > tūo; #6C51
+汒 > máng; #6C52
+汓 > qíu; #6C53
+汔 > qì; #6C54
+汕 > shàn; #6C55
+汖 > pài; #6C56
+汗 > hàn; #6C57
+汘 > qiān; #6C58
+汙 > wū; #6C59
+汚 > wū; #6C5A
+汛 > xùn; #6C5B
+汜 > sì; #6C5C
+汝 > rŭ; #6C5D
+汞 > gŏng; #6C5E
+江 > jiāng; #6C5F
+池 > chí; #6C60
+污 > wū; #6C61
+汤 > tāng; #6C64
+汥 > zhī; #6C65
+汦 > chí; #6C66
+汧 > qiān; #6C67
+汨 > mì; #6C68
+汩 > yù; #6C69
+汪 > wāng; #6C6A
+汫 > qìng; #6C6B
+汬 > jĭng; #6C6C
+汭 > rùi; #6C6D
+汮 > jūn; #6C6E
+汯 > hóng; #6C6F
+汰 > tài; #6C70
+汱 > quăn; #6C71
+汲 > jí; #6C72
+汳 > biàn; #6C73
+汴 > biàn; #6C74
+汵 > gàn; #6C75
+汶 > wèn; #6C76
+汷 > zhōng; #6C77
+汸 > fāng; #6C78
+汹 > xīong; #6C79
+決 > jué; #6C7A
+汻 > hăng; #6C7B
+汼 > niōu; #6C7C
+汽 > qì; #6C7D
+汾 > fén; #6C7E
+汿 > xù; #6C7F
+沀 > xù; #6C80
+沁 > qìn; #6C81
+沂 > yí; #6C82
+沃 > wò; #6C83
+沄 > yún; #6C84
+沅 > yuán; #6C85
+沆 > háng; #6C86
+沇 > yăn; #6C87
+沈 > chén; #6C88
+沉 > chén; #6C89
+沊 > dàn; #6C8A
+沋 > yóu; #6C8B
+沌 > dùn; #6C8C
+沍 > hù; #6C8D
+沎 > hùo; #6C8E
+沏 > qiē; #6C8F
+沐 > mù; #6C90
+沑 > róu; #6C91
+沒 > méi; #6C92
+沓 > tà; #6C93
+沔 > miăn; #6C94
+沕 > wù; #6C95
+沖 > chōng; #6C96
+沗 > tiān; #6C97
+沘 > bĭ; #6C98
+沙 > shā; #6C99
+沚 > zhĭ; #6C9A
+沛 > pèi; #6C9B
+沜 > pàn; #6C9C
+沝 > zhŭi; #6C9D
+沞 > zā; #6C9E
+沟 > gōu; #6C9F
+沠 > líu; #6CA0
+没 > méi; #6CA1
+沢 > zé; #6CA2
+沣 > fēng; #6CA3
+沤 > òu; #6CA4
+沥 > lì; #6CA5
+沦 > lún; #6CA6
+沧 > cāng; #6CA7
+沨 > féng; #6CA8
+沩 > wéi; #6CA9
+沪 > hù; #6CAA
+沫 > mò; #6CAB
+沬 > mèi; #6CAC
+沭 > shù; #6CAD
+沮 > jū; #6CAE
+沯 > zăn; #6CAF
+沰 > tūo; #6CB0
+沱 > túo; #6CB1
+沲 > túo; #6CB2
+河 > hé; #6CB3
+沴 > lì; #6CB4
+沵 > mĭ; #6CB5
+沶 > yí; #6CB6
+沷 > fā; #6CB7
+沸 > fèi; #6CB8
+油 > yóu; #6CB9
+沺 > tián; #6CBA
+治 > zhì; #6CBB
+沼 > zhăo; #6CBC
+沽 > gū; #6CBD
+沾 > zhān; #6CBE
+沿 > yán; #6CBF
+泀 > sī; #6CC0
+況 > kuàng; #6CC1
+泂 > jĭong; #6CC2
+泃 > jù; #6CC3
+泄 > xiè; #6CC4
+泅 > qíu; #6CC5
+泆 > yī; #6CC6
+泇 > jiā; #6CC7
+泈 > zhōng; #6CC8
+泉 > quán; #6CC9
+泊 > bó; #6CCA
+泋 > hùi; #6CCB
+泌 > mì; #6CCC
+泍 > bēn; #6CCD
+泎 > zhúo; #6CCE
+泏 > chù; #6CCF
+泐 > lè; #6CD0
+泑 > yŏu; #6CD1
+泒 > gū; #6CD2
+泓 > hóng; #6CD3
+泔 > gān; #6CD4
+法 > fă; #6CD5
+泖 > măo; #6CD6
+泗 > sì; #6CD7
+泘 > hū; #6CD8
+泙 > píng; #6CD9
+泚 > cĭ; #6CDA
+泛 > fàn; #6CDB
+泜 > chí; #6CDC
+泝 > sù; #6CDD
+泞 > nìng; #6CDE
+泟 > chēng; #6CDF
+泠 > líng; #6CE0
+泡 > pào; #6CE1
+波 > bō; #6CE2
+泣 > qì; #6CE3
+泤 > sì; #6CE4
+泥 > ní; #6CE5
+泦 > jú; #6CE6
+泧 > yuè; #6CE7
+注 > zhù; #6CE8
+泩 > shēng; #6CE9
+泪 > lèi; #6CEA
+泫 > xuàn; #6CEB
+泬 > xuè; #6CEC
+泭 > fū; #6CED
+泮 > pàn; #6CEE
+泯 > mĭn; #6CEF
+泰 > tài; #6CF0
+泱 > yāng; #6CF1
+泲 > jĭ; #6CF2
+泳 > yŏng; #6CF3
+泴 > guàn; #6CF4
+泵 > bèng; #6CF5
+泶 > xué; #6CF6
+泷 > lóng; #6CF7
+泸 > lú; #6CF8
+泺 > bó; #6CFA
+泻 > xiè; #6CFB
+泼 > pō; #6CFC
+泽 > zé; #6CFD
+泾 > jīng; #6CFE
+泿 > yín; #6CFF
+洀 > zhōu; #6D00
+洁 > jí; #6D01
+洂 > yì; #6D02
+洃 > hūi; #6D03
+洄 > húi; #6D04
+洅 > zŭi; #6D05
+洆 > chéng; #6D06
+洇 > yīn; #6D07
+洈 > wéi; #6D08
+洉 > hòu; #6D09
+洊 > jiàn; #6D0A
+洋 > yáng; #6D0B
+洌 > liè; #6D0C
+洍 > sì; #6D0D
+洎 > jì; #6D0E
+洏 > ér; #6D0F
+洐 > xíng; #6D10
+洑 > fú; #6D11
+洒 > să; #6D12
+洓 > sŭo; #6D13
+洔 > zhĭ; #6D14
+洕 > yīn; #6D15
+洖 > wú; #6D16
+洗 > xĭ; #6D17
+洘 > kăo; #6D18
+洙 > zhū; #6D19
+洚 > jiàng; #6D1A
+洛 > lùo; #6D1B
+洝 > àn; #6D1D
+洞 > dòng; #6D1E
+洟 > yí; #6D1F
+洠 > móu; #6D20
+洡 > lĕi; #6D21
+洢 > yī; #6D22
+洣 > mĭ; #6D23
+洤 > quán; #6D24
+津 > jīn; #6D25
+洦 > mò; #6D26
+洧 > wĕi; #6D27
+洨 > xiáo; #6D28
+洩 > xiè; #6D29
+洪 > hóng; #6D2A
+洫 > xù; #6D2B
+洬 > shùo; #6D2C
+洭 > kuāng; #6D2D
+洮 > tāo; #6D2E
+洯 > qiè; #6D2F
+洰 > jù; #6D30
+洱 > ĕr; #6D31
+洲 > zhōu; #6D32
+洳 > rù; #6D33
+洴 > píng; #6D34
+洵 > xún; #6D35
+洶 > xīong; #6D36
+洷 > zhì; #6D37
+洸 > guāng; #6D38
+洹 > huán; #6D39
+洺 > míng; #6D3A
+活 > húo; #6D3B
+洼 > wā; #6D3C
+洽 > qià; #6D3D
+派 > pài; #6D3E
+洿 > wū; #6D3F
+浀 > qŭ; #6D40
+流 > líu; #6D41
+浂 > yì; #6D42
+浃 > jiá; #6D43
+浄 > jìng; #6D44
+浅 > qiăn; #6D45
+浆 > jiāng; #6D46
+浇 > jiāo; #6D47
+浈 > chéng; #6D48
+浉 > shī; #6D49
+浊 > zhúo; #6D4A
+测 > cè; #6D4B
+浍 > kuài; #6D4D
+济 > jì; #6D4E
+浏 > líu; #6D4F
+浐 > chăn; #6D50
+浑 > hún; #6D51
+浒 > hŭ; #6D52
+浓 > nóng; #6D53
+浔 > xún; #6D54
+浕 > jìn; #6D55
+浖 > liè; #6D56
+浗 > qíu; #6D57
+浘 > wĕi; #6D58
+浙 > zhè; #6D59
+浚 > jùn; #6D5A
+浛 > hàn; #6D5B
+浜 > bāng; #6D5C
+浝 > máng; #6D5D
+浞 > zhúo; #6D5E
+浟 > yóu; #6D5F
+浠 > xī; #6D60
+浡 > bó; #6D61
+浢 > dòu; #6D62
+浣 > wăn; #6D63
+浤 > hóng; #6D64
+浥 > yì; #6D65
+浦 > pŭ; #6D66
+浧 > yĭng; #6D67
+浨 > lăn; #6D68
+浩 > hào; #6D69
+浪 > làng; #6D6A
+浫 > hăn; #6D6B
+浬 > lĭ; #6D6C
+浭 > gēng; #6D6D
+浮 > fú; #6D6E
+浯 > wú; #6D6F
+浰 > liàn; #6D70
+浱 > chún; #6D71
+浲 > féng; #6D72
+浳 > yì; #6D73
+浴 > yù; #6D74
+浵 > tóng; #6D75
+浶 > láo; #6D76
+海 > hăi; #6D77
+浸 > jìn; #6D78
+浹 > jiá; #6D79
+浺 > chōng; #6D7A
+浻 > wĕng; #6D7B
+浼 > mĕi; #6D7C
+浽 > sūi; #6D7D
+浾 > chēng; #6D7E
+浿 > pèi; #6D7F
+涀 > xiàn; #6D80
+涁 > shèn; #6D81
+涂 > tú; #6D82
+涃 > kùn; #6D83
+涄 > pīn; #6D84
+涅 > niè; #6D85
+涆 > hàn; #6D86
+涇 > jīng; #6D87
+消 > xiāo; #6D88
+涉 > shè; #6D89
+涊 > niàn; #6D8A
+涋 > tū; #6D8B
+涌 > yŏng; #6D8C
+涍 > xiào; #6D8D
+涎 > xián; #6D8E
+涏 > tĭng; #6D8F
+涐 > é; #6D90
+涑 > sù; #6D91
+涒 > tūn; #6D92
+涓 > juān; #6D93
+涔 > cén; #6D94
+涕 > tì; #6D95
+涖 > lì; #6D96
+涗 > shùi; #6D97
+涘 > sì; #6D98
+涙 > lèi; #6D99
+涚 > shùi; #6D9A
+涛 > tāo; #6D9B
+涜 > dú; #6D9C
+涝 > lào; #6D9D
+涞 > lái; #6D9E
+涟 > lián; #6D9F
+涠 > wéi; #6DA0
+涡 > wō; #6DA1
+涢 > yún; #6DA2
+涣 > huàn; #6DA3
+涤 > dí; #6DA4
+润 > rùn; #6DA6
+涧 > jiàn; #6DA7
+涨 > zhăng; #6DA8
+涩 > sè; #6DA9
+涪 > fú; #6DAA
+涫 > guàn; #6DAB
+涬 > xìng; #6DAC
+涭 > shòu; #6DAD
+涮 > shuàn; #6DAE
+涯 > yá; #6DAF
+涰 > chùo; #6DB0
+涱 > zhàng; #6DB1
+液 > yè; #6DB2
+涳 > kōng; #6DB3
+涴 > wò; #6DB4
+涵 > hán; #6DB5
+涶 > tūo; #6DB6
+涷 > dōng; #6DB7
+涸 > hé; #6DB8
+涹 > wō; #6DB9
+涺 > jū; #6DBA
+涻 > gàn; #6DBB
+涼 > liáng; #6DBC
+涽 > hūn; #6DBD
+涾 > tà; #6DBE
+涿 > zhúo; #6DBF
+淀 > diàn; #6DC0
+淁 > qiè; #6DC1
+淂 > dé; #6DC2
+淃 > juàn; #6DC3
+淄 > zī; #6DC4
+淅 > xī; #6DC5
+淆 > yáo; #6DC6
+淇 > qí; #6DC7
+淈 > gŭ; #6DC8
+淉 > gŭo; #6DC9
+淊 > hàn; #6DCA
+淋 > lín; #6DCB
+淌 > tăng; #6DCC
+淍 > zhōu; #6DCD
+淎 > pĕng; #6DCE
+淏 > hào; #6DCF
+淐 > chāng; #6DD0
+淑 > shú; #6DD1
+淒 > qī; #6DD2
+淓 > fāng; #6DD3
+淔 > chì; #6DD4
+淕 > lù; #6DD5
+淖 > nào; #6DD6
+淗 > jú; #6DD7
+淘 > táo; #6DD8
+淙 > cóng; #6DD9
+淚 > lèi; #6DDA
+淛 > zhì; #6DDB
+淜 > péng; #6DDC
+淝 > féi; #6DDD
+淞 > sōng; #6DDE
+淟 > tiăn; #6DDF
+淠 > pì; #6DE0
+淡 > dàn; #6DE1
+淢 > yù; #6DE2
+淣 > ní; #6DE3
+淤 > yū; #6DE4
+淥 > lù; #6DE5
+淦 > gàn; #6DE6
+淧 > mì; #6DE7
+淨 > jìng; #6DE8
+淩 > líng; #6DE9
+淪 > lún; #6DEA
+淫 > yín; #6DEB
+淬 > cùi; #6DEC
+淭 > qú; #6DED
+淮 > huái; #6DEE
+淯 > yù; #6DEF
+淰 > niàn; #6DF0
+深 > shēn; #6DF1
+淲 > piáo; #6DF2
+淳 > chún; #6DF3
+淴 > wà; #6DF4
+淵 > yuān; #6DF5
+淶 > lái; #6DF6
+混 > hŭn; #6DF7
+淸 > qīng; #6DF8
+淹 > yān; #6DF9
+淺 > qiăn; #6DFA
+添 > tiān; #6DFB
+淼 > miăo; #6DFC
+淽 > zhĭ; #6DFD
+淾 > yĭn; #6DFE
+淿 > mì; #6DFF
+渀 > bēn; #6E00
+渁 > yuān; #6E01
+渂 > wèn; #6E02
+渃 > rè; #6E03
+渄 > fēi; #6E04
+清 > qīng; #6E05
+渆 > yuān; #6E06
+渇 > kĕ; #6E07
+済 > jì; #6E08
+渉 > shè; #6E09
+渊 > yuān; #6E0A
+渌 > lù; #6E0C
+渍 > zì; #6E0D
+渎 > dú; #6E0E
+渐 > jiàn; #6E10
+渑 > mĭn; #6E11
+渒 > pì; #6E12
+渔 > yú; #6E14
+渕 > yuān; #6E15
+渖 > shĕn; #6E16
+渗 > shèn; #6E17
+渘 > róu; #6E18
+渙 > huàn; #6E19
+渚 > zhŭ; #6E1A
+減 > jiăn; #6E1B
+渜 > nuăn; #6E1C
+渝 > yú; #6E1D
+渞 > qíu; #6E1E
+渟 > tíng; #6E1F
+渠 > qú; #6E20
+渡 > dù; #6E21
+渢 > féng; #6E22
+渣 > zhā; #6E23
+渤 > bó; #6E24
+渥 > wò; #6E25
+渦 > wō; #6E26
+渧 > dì; #6E27
+渨 > wēi; #6E28
+温 > wēn; #6E29
+渪 > rú; #6E2A
+渫 > xiè; #6E2B
+測 > cè; #6E2C
+渭 > wèi; #6E2D
+渮 > gē; #6E2E
+港 > găng; #6E2F
+渰 > yăn; #6E30
+渱 > hóng; #6E31
+渲 > xuàn; #6E32
+渳 > mĭ; #6E33
+渴 > kĕ; #6E34
+渵 > máo; #6E35
+渶 > yīng; #6E36
+渷 > yăn; #6E37
+游 > yóu; #6E38
+渹 > hōng; #6E39
+渺 > miăo; #6E3A
+渻 > xĭng; #6E3B
+渼 > mĕi; #6E3C
+渽 > zāi; #6E3D
+渾 > hún; #6E3E
+渿 > nài; #6E3F
+湀 > kúi; #6E40
+湁 > shí; #6E41
+湂 > è; #6E42
+湃 > pài; #6E43
+湄 > méi; #6E44
+湅 > liàn; #6E45
+湆 > qì; #6E46
+湇 > qì; #6E47
+湈 > méi; #6E48
+湉 > tián; #6E49
+湊 > còu; #6E4A
+湋 > wéi; #6E4B
+湌 > cān; #6E4C
+湍 > tuān; #6E4D
+湎 > miăn; #6E4E
+湏 > hùi; #6E4F
+湐 > mò; #6E50
+湑 > xŭ; #6E51
+湒 > jí; #6E52
+湓 > pén; #6E53
+湔 > jiān; #6E54
+湕 > jiăn; #6E55
+湖 > hú; #6E56
+湗 > fèng; #6E57
+湘 > xiāng; #6E58
+湙 > yì; #6E59
+湚 > yìn; #6E5A
+湛 > zhàn; #6E5B
+湜 > shí; #6E5C
+湝 > jiē; #6E5D
+湞 > chéng; #6E5E
+湟 > huáng; #6E5F
+湠 > tàn; #6E60
+湡 > yú; #6E61
+湢 > bì; #6E62
+湣 > mĭn; #6E63
+湤 > shī; #6E64
+湥 > tú; #6E65
+湦 > shēng; #6E66
+湧 > yŏng; #6E67
+湨 > qù; #6E68
+湩 > zhòng; #6E69
+湪 > suèi; #6E6A
+湫 > jīu; #6E6B
+湬 > jiăo; #6E6C
+湭 > qióu; #6E6D
+湮 > yīn; #6E6E
+湯 > tāng; #6E6F
+湰 > lóng; #6E70
+湱 > hùo; #6E71
+湲 > yuán; #6E72
+湳 > năn; #6E73
+湴 > bàn; #6E74
+湵 > yŏu; #6E75
+湶 > quán; #6E76
+湷 > chúi; #6E77
+湸 > liàng; #6E78
+湹 > chán; #6E79
+湺 > yán; #6E7A
+湻 > chún; #6E7B
+湼 > niè; #6E7C
+湽 > zī; #6E7D
+湾 > wān; #6E7E
+湿 > shī; #6E7F
+満 > măn; #6E80
+溁 > yíng; #6E81
+溃 > kùi; #6E83
+溅 > jiàn; #6E85
+溆 > xù; #6E86
+溇 > lǚ; #6E87
+溈 > gūi; #6E88
+溉 > gài; #6E89
+溌 > pō; #6E8C
+溍 > jìn; #6E8D
+溎 > gùi; #6E8E
+溏 > táng; #6E8F
+源 > yuán; #6E90
+溑 > sŭo; #6E91
+溒 > yuán; #6E92
+溓 > lián; #6E93
+溔 > yăo; #6E94
+溕 > mèng; #6E95
+準 > zhŭn; #6E96
+溗 > shéng; #6E97
+溘 > kè; #6E98
+溙 > tài; #6E99
+溚 > dá; #6E9A
+溛 > wā; #6E9B
+溜 > līu; #6E9C
+溝 > gōu; #6E9D
+溞 > sāo; #6E9E
+溟 > míng; #6E9F
+溠 > zhà; #6EA0
+溡 > shí; #6EA1
+溢 > yì; #6EA2
+溣 > lún; #6EA3
+溤 > mă; #6EA4
+溥 > pŭ; #6EA5
+溦 > wéi; #6EA6
+溧 > lì; #6EA7
+溨 > cái; #6EA8
+溩 > wù; #6EA9
+溪 > xī; #6EAA
+溫 > wēn; #6EAB
+溬 > qiāng; #6EAC
+溭 > zé; #6EAD
+溮 > shī; #6EAE
+溯 > sù; #6EAF
+溰 > yī; #6EB0
+溱 > zhēn; #6EB1
+溲 > sōu; #6EB2
+溳 > yún; #6EB3
+溴 > xìu; #6EB4
+溵 > yīn; #6EB5
+溶 > róng; #6EB6
+溷 > hùn; #6EB7
+溸 > sù; #6EB8
+溹 > sù; #6EB9
+溺 > nì; #6EBA
+溻 > tà; #6EBB
+溼 > shī; #6EBC
+溽 > rù; #6EBD
+溾 > wēi; #6EBE
+溿 > pàn; #6EBF
+滀 > chù; #6EC0
+滁 > chú; #6EC1
+滂 > pāng; #6EC2
+滃 > wĕng; #6EC3
+滄 > cāng; #6EC4
+滅 > miè; #6EC5
+滆 > hé; #6EC6
+滇 > diān; #6EC7
+滈 > hào; #6EC8
+滉 > huăng; #6EC9
+滊 > xì; #6ECA
+滋 > zī; #6ECB
+滌 > dí; #6ECC
+滍 > zhĭ; #6ECD
+滎 > yíng; #6ECE
+滏 > fŭ; #6ECF
+滐 > jié; #6ED0
+滑 > huá; #6ED1
+滒 > gē; #6ED2
+滓 > zĭ; #6ED3
+滔 > tāo; #6ED4
+滕 > téng; #6ED5
+滖 > sūi; #6ED6
+滗 > bĭ; #6ED7
+滘 > jiào; #6ED8
+滙 > hùi; #6ED9
+滚 > gŭn; #6EDA
+滛 > yín; #6EDB
+滜 > gāo; #6EDC
+滝 > lóng; #6EDD
+滞 > zhì; #6EDE
+滟 > yàn; #6EDF
+滠 > shè; #6EE0
+满 > măn; #6EE1
+滢 > yìng; #6EE2
+滣 > chún; #6EE3
+滤 > lǜ; #6EE4
+滥 > làn; #6EE5
+滦 > luán; #6EE6
+滨 > bīn; #6EE8
+滩 > tān; #6EE9
+滪 > yù; #6EEA
+滫 > sŏu; #6EEB
+滬 > hù; #6EEC
+滭 > bì; #6EED
+滮 > biāo; #6EEE
+滯 > zhì; #6EEF
+滰 > jiăng; #6EF0
+滱 > kòu; #6EF1
+滲 > shèn; #6EF2
+滳 > shāng; #6EF3
+滴 > dī; #6EF4
+滵 > mì; #6EF5
+滶 > áo; #6EF6
+滷 > lŭ; #6EF7
+滸 > hŭ; #6EF8
+滹 > hū; #6EF9
+滺 > yóu; #6EFA
+滻 > chăn; #6EFB
+滼 > fàn; #6EFC
+滽 > yóng; #6EFD
+滾 > gŭn; #6EFE
+滿 > măn; #6EFF
+漀 > qìng; #6F00
+漁 > yú; #6F01
+漂 > piāo; #6F02
+漃 > jí; #6F03
+漄 > yá; #6F04
+漅 > jiăo; #6F05
+漆 > qī; #6F06
+漇 > xĭ; #6F07
+漈 > jì; #6F08
+漉 > lù; #6F09
+漊 > lǚ; #6F0A
+漋 > lóng; #6F0B
+漌 > jĭn; #6F0C
+漍 > gúo; #6F0D
+漎 > cóng; #6F0E
+漏 > lòu; #6F0F
+漐 > zhí; #6F10
+漑 > gài; #6F11
+漒 > qiáng; #6F12
+漓 > lí; #6F13
+演 > yăn; #6F14
+漕 > cáo; #6F15
+漖 > jiào; #6F16
+漗 > cōng; #6F17
+漘 > qún; #6F18
+漙 > tuán; #6F19
+漚 > òu; #6F1A
+漛 > téng; #6F1B
+漜 > yĕ; #6F1C
+漝 > xí; #6F1D
+漞 > mì; #6F1E
+漟 > táng; #6F1F
+漠 > mò; #6F20
+漡 > shāng; #6F21
+漢 > hàn; #6F22
+漣 > lián; #6F23
+漤 > lăn; #6F24
+漥 > wā; #6F25
+漦 > lí; #6F26
+漧 > qián; #6F27
+漨 > féng; #6F28
+漩 > xuán; #6F29
+漪 > yī; #6F2A
+漫 > màn; #6F2B
+漬 > zì; #6F2C
+漭 > măng; #6F2D
+漮 > kāng; #6F2E
+漯 > lĕi; #6F2F
+漰 > pēng; #6F30
+漱 > shù; #6F31
+漲 > zhăng; #6F32
+漳 > zhāng; #6F33
+漴 > chóng; #6F34
+漵 > xù; #6F35
+漶 > huàn; #6F36
+漷 > kùo; #6F37
+漸 > jiàn; #6F38
+漹 > yān; #6F39
+漺 > chuăng; #6F3A
+漻 > liáo; #6F3B
+漼 > cŭi; #6F3C
+漽 > tí; #6F3D
+漾 > yàng; #6F3E
+漿 > jiāng; #6F3F
+潀 > cóng; #6F40
+潁 > yĭng; #6F41
+潂 > hóng; #6F42
+潃 > xún; #6F43
+潄 > shù; #6F44
+潅 > guàn; #6F45
+潆 > yíng; #6F46
+潇 > xiāo; #6F47
+潊 > xù; #6F4A
+潋 > liàn; #6F4B
+潌 > zhì; #6F4C
+潍 > wéi; #6F4D
+潎 > pì; #6F4E
+潏 > jué; #6F4F
+潐 > jiào; #6F50
+潑 > pō; #6F51
+潒 > dàng; #6F52
+潓 > hùi; #6F53
+潔 > jié; #6F54
+潕 > wŭ; #6F55
+潖 > pá; #6F56
+潗 > jí; #6F57
+潘 > pān; #6F58
+潙 > gúi; #6F59
+潚 > xiāo; #6F5A
+潛 > qián; #6F5B
+潜 > qián; #6F5C
+潝 > xī; #6F5D
+潞 > lù; #6F5E
+潟 > xì; #6F5F
+潠 > xuàn; #6F60
+潡 > dùn; #6F61
+潢 > huáng; #6F62
+潣 > mĭn; #6F63
+潤 > rùn; #6F64
+潥 > sù; #6F65
+潦 > liáo; #6F66
+潧 > zhēn; #6F67
+潨 > zhōng; #6F68
+潩 > yì; #6F69
+潪 > dí; #6F6A
+潫 > wān; #6F6B
+潬 > dàn; #6F6C
+潭 > tán; #6F6D
+潮 > cháo; #6F6E
+潯 > xún; #6F6F
+潰 > kùi; #6F70
+潱 > yie; #6F71
+潲 > shào; #6F72
+潳 > tú; #6F73
+潴 > zhū; #6F74
+潵 > sàn; #6F75
+潶 > hēi; #6F76
+潷 > bĭ; #6F77
+潸 > shān; #6F78
+潹 > chán; #6F79
+潺 > chán; #6F7A
+潻 > shŭ; #6F7B
+潼 > tóng; #6F7C
+潽 > pŭ; #6F7D
+潾 > lín; #6F7E
+潿 > wéi; #6F7F
+澀 > sè; #6F80
+澁 > sè; #6F81
+澂 > chéng; #6F82
+澃 > jìong; #6F83
+澄 > chéng; #6F84
+澅 > huà; #6F85
+澆 > jiāo; #6F86
+澇 > lào; #6F87
+澈 > chè; #6F88
+澉 > găn; #6F89
+澊 > cūn; #6F8A
+澋 > hèng; #6F8B
+澌 > sī; #6F8C
+澍 > shù; #6F8D
+澎 > péng; #6F8E
+澏 > hàn; #6F8F
+澐 > yún; #6F90
+澑 > lìu; #6F91
+澒 > hòng; #6F92
+澓 > fú; #6F93
+澔 > hào; #6F94
+澕 > hé; #6F95
+澖 > xiān; #6F96
+澗 > jiàn; #6F97
+澘 > shān; #6F98
+澙 > xì; #6F99
+澜 > lán; #6F9C
+澞 > yú; #6F9E
+澟 > lĭn; #6F9F
+澠 > mĭn; #6FA0
+澡 > zăo; #6FA1
+澢 > dāng; #6FA2
+澣 > wăn; #6FA3
+澤 > zé; #6FA4
+澥 > xiè; #6FA5
+澦 > yù; #6FA6
+澧 > lĭ; #6FA7
+澨 > shì; #6FA8
+澩 > xué; #6FA9
+澪 > líng; #6FAA
+澫 > màn; #6FAB
+澬 > zī; #6FAC
+澭 > yōng; #6FAD
+澮 > kuài; #6FAE
+澯 > càn; #6FAF
+澰 > liàn; #6FB0
+澱 > diàn; #6FB1
+澲 > yè; #6FB2
+澳 > ào; #6FB3
+澴 > huán; #6FB4
+澵 > zhēn; #6FB5
+澶 > chán; #6FB6
+澷 > màn; #6FB7
+澸 > dăn; #6FB8
+澹 > dàn; #6FB9
+澺 > yì; #6FBA
+澻 > sùi; #6FBB
+澼 > pì; #6FBC
+澽 > jù; #6FBD
+澾 > tà; #6FBE
+澿 > qín; #6FBF
+激 > jī; #6FC0
+濁 > zhúo; #6FC1
+濂 > lián; #6FC2
+濃 > nóng; #6FC3
+濄 > gūo; #6FC4
+濅 > jìn; #6FC5
+濆 > fén; #6FC6
+濇 > sè; #6FC7
+濈 > jí; #6FC8
+濉 > sūi; #6FC9
+濊 > hùi; #6FCA
+濋 > chŭ; #6FCB
+濌 > tà; #6FCC
+濍 > sōng; #6FCD
+濎 > dĭng; #6FCE
+濐 > zhŭ; #6FD0
+濑 > lài; #6FD1
+濒 > bīn; #6FD2
+濓 > lián; #6FD3
+濔 > mĭ; #6FD4
+濕 > shī; #6FD5
+濖 > shù; #6FD6
+濗 > mì; #6FD7
+濘 > nìng; #6FD8
+濙 > yíng; #6FD9
+濚 > yíng; #6FDA
+濛 > méng; #6FDB
+濜 > jìn; #6FDC
+濝 > qí; #6FDD
+濞 > pì; #6FDE
+濟 > jì; #6FDF
+濠 > háo; #6FE0
+濡 > rú; #6FE1
+濢 > zŭi; #6FE2
+濣 > wò; #6FE3
+濤 > tāo; #6FE4
+濥 > yìn; #6FE5
+濦 > yĭn; #6FE6
+濧 > dùi; #6FE7
+濨 > cí; #6FE8
+濩 > hùo; #6FE9
+濪 > jìng; #6FEA
+濫 > làn; #6FEB
+濬 > jùn; #6FEC
+濭 > ài; #6FED
+濮 > pū; #6FEE
+濯 > zhúo; #6FEF
+濰 > wéi; #6FF0
+濱 > bīn; #6FF1
+濲 > gŭ; #6FF2
+濳 > qián; #6FF3
+濴 > xíng; #6FF4
+濶 > kùo; #6FF6
+濷 > fèi; #6FF7
+濺 > jiàn; #6FFA
+濻 > wĕi; #6FFB
+濼 > lùo; #6FFC
+濽 > zàn; #6FFD
+濾 > lǜ; #6FFE
+濿 > lì; #6FFF
+瀀 > yōu; #7000
+瀁 > yàng; #7001
+瀂 > lŭ; #7002
+瀃 > sì; #7003
+瀄 > jié; #7004
+瀅 > yìng; #7005
+瀆 > dú; #7006
+瀇 > wăng; #7007
+瀈 > hūi; #7008
+瀉 > xiè; #7009
+瀊 > pán; #700A
+瀋 > shĕn; #700B
+瀌 > biāo; #700C
+瀍 > chán; #700D
+瀎 > mò; #700E
+瀏 > líu; #700F
+瀐 > jiān; #7010
+瀑 > pù; #7011
+瀒 > sè; #7012
+瀓 > chéng; #7013
+瀔 > gŭ; #7014
+瀕 > bīn; #7015
+瀖 > hùo; #7016
+瀗 > xiàn; #7017
+瀘 > lú; #7018
+瀙 > qīn; #7019
+瀚 > hàn; #701A
+瀛 > yíng; #701B
+瀜 > yōng; #701C
+瀝 > lì; #701D
+瀞 > jìng; #701E
+瀟 > xiāo; #701F
+瀠 > yíng; #7020
+瀡 > sŭi; #7021
+瀢 > wéi; #7022
+瀣 > xiè; #7023
+瀤 > huái; #7024
+瀥 > hào; #7025
+瀦 > zhū; #7026
+瀧 > lóng; #7027
+瀨 > lài; #7028
+瀩 > dùi; #7029
+瀪 > fán; #702A
+瀫 > hú; #702B
+瀬 > lài; #702C
+瀯 > yíng; #702F
+瀰 > mí; #7030
+瀱 > jì; #7031
+瀲 > liàn; #7032
+瀳 > jiàn; #7033
+瀴 > yĭng; #7034
+瀵 > fèn; #7035
+瀶 > lín; #7036
+瀷 > yì; #7037
+瀸 > jiān; #7038
+瀹 > yuè; #7039
+瀺 > chán; #703A
+瀻 > dài; #703B
+瀼 > ráng; #703C
+瀽 > jiăn; #703D
+瀾 > lán; #703E
+瀿 > fán; #703F
+灀 > shuàng; #7040
+灁 > yuān; #7041
+灂 > zhúo; #7042
+灃 > fēng; #7043
+灄 > shè; #7044
+灅 > lĕi; #7045
+灆 > lán; #7046
+灇 > cóng; #7047
+灈 > qú; #7048
+灉 > yōng; #7049
+灊 > qián; #704A
+灋 > fă; #704B
+灌 > guàn; #704C
+灍 > què; #704D
+灎 > yàn; #704E
+灏 > hào; #704F
+灑 > să; #7051
+灒 > zàn; #7052
+灓 > luán; #7053
+灔 > yàn; #7054
+灕 > lí; #7055
+灖 > mĭ; #7056
+灗 > shàn; #7057
+灘 > tān; #7058
+灙 > dăng; #7059
+灚 > jiăo; #705A
+灛 > chăn; #705B
+灝 > hào; #705D
+灞 > bà; #705E
+灟 > zhú; #705F
+灠 > lăn; #7060
+灡 > lán; #7061
+灢 > năng; #7062
+灣 > wān; #7063
+灤 > luán; #7064
+灥 > xún; #7065
+灦 > xiăn; #7066
+灧 > yàn; #7067
+灨 > găn; #7068
+灩 > yàn; #7069
+灪 > yù; #706A
+火 > hŭo; #706B
+灬 > sì' 'diăn' 'hŭo; #706C
+灭 > miè; #706D
+灮 > guāng; #706E
+灯 > dēng; #706F
+灰 > hūi; #7070
+灱 > xiāo; #7071
+灲 > xiāo; #7072
+灳 > hū1; #7073
+灴 > hóng; #7074
+灵 > líng; #7075
+灶 > zào; #7076
+灷 > zhuàn; #7077
+灸 > jĭu; #7078
+灹 > zhà; #7079
+灺 > xiè; #707A
+灻 > chì; #707B
+灼 > zhúo; #707C
+災 > zāi; #707D
+灾 > zāi; #707E
+灿 > càn; #707F
+炀 > yáng; #7080
+炁 > qì; #7081
+炂 > zhōng; #7082
+炃 > fén; #7083
+炄 > nĭu; #7084
+炅 > jĭong; #7085
+炆 > wén; #7086
+炇 > pò; #7087
+炈 > yì; #7088
+炉 > lú; #7089
+炊 > chūi; #708A
+炋 > pī; #708B
+炌 > kài; #708C
+炍 > pàn; #708D
+炎 > yán; #708E
+炏 > kài; #708F
+炐 > pàng; #7090
+炑 > mù; #7091
+炒 > chăo; #7092
+炓 > liào; #7093
+炔 > gùi; #7094
+炕 > kàng; #7095
+炖 > tūn; #7096
+炗 > guāng; #7097
+炘 > xīn; #7098
+炙 > zhì; #7099
+炚 > guang; #709A
+炛 > guāng; #709B
+炜 > wĕi; #709C
+炝 > qiàng; #709D
+炟 > dá; #709F
+炠 > xiá; #70A0
+炡 > zhēng; #70A1
+炢 > zhú; #70A2
+炣 > kĕ; #70A3
+炤 > zhào; #70A4
+炥 > fú; #70A5
+炦 > bá; #70A6
+炧 > dùo; #70A7
+炨 > dùo; #70A8
+炩 > lìng; #70A9
+炪 > zhúo; #70AA
+炫 > xuàn; #70AB
+炬 > jù; #70AC
+炭 > tàn; #70AD
+炮 > pào; #70AE
+炯 > jĭong; #70AF
+炰 > páo; #70B0
+炱 > tái; #70B1
+炲 > tái; #70B2
+炳 > bĭng; #70B3
+炴 > yăng; #70B4
+炵 > tōng; #70B5
+炶 > hān; #70B6
+炷 > zhù; #70B7
+炸 > zhà; #70B8
+点 > diăn; #70B9
+為 > wèi; #70BA
+炻 > shí; #70BB
+炼 > liàn; #70BC
+炽 > chì; #70BD
+炾 > huăng; #70BE
+烀 > hū; #70C0
+烁 > shùo; #70C1
+烂 > làn; #70C2
+烃 > jĭng; #70C3
+烄 > jiăo; #70C4
+烅 > xù; #70C5
+烆 > xíng; #70C6
+烇 > quàn; #70C7
+烈 > liè; #70C8
+烉 > huàn; #70C9
+烊 > yáng; #70CA
+烋 > xiāo; #70CB
+烌 > xīu; #70CC
+烍 > xiăn; #70CD
+烎 > yín; #70CE
+烏 > wū; #70CF
+烐 > zhōu; #70D0
+烑 > yáo; #70D1
+烒 > shì; #70D2
+烓 > wēi; #70D3
+烔 > tóng; #70D4
+烕 > xuè; #70D5
+烖 > zāi; #70D6
+烗 > kài; #70D7
+烘 > hōng; #70D8
+烙 > lùo; #70D9
+烚 > xiá; #70DA
+烛 > zhú; #70DB
+烜 > xuăn; #70DC
+烝 > zhēng; #70DD
+烞 > pò; #70DE
+烟 > yān; #70DF
+烠 > hŭi; #70E0
+烡 > guāng; #70E1
+烢 > zhè; #70E2
+烣 > hūi; #70E3
+烤 > kăo; #70E4
+烦 > fán; #70E6
+烧 > shāo; #70E7
+烨 > yè; #70E8
+烩 > hùi; #70E9
+烫 > tàng; #70EB
+烬 > jìn; #70EC
+热 > rè; #70ED
+烯 > xī; #70EF
+烰 > fú; #70F0
+烱 > jĭong; #70F1
+烲 > chè; #70F2
+烳 > pŭ; #70F3
+烴 > jĭng; #70F4
+烵 > zhúo; #70F5
+烶 > tĭng; #70F6
+烷 > wán; #70F7
+烸 > hăi; #70F8
+烹 > pēng; #70F9
+烺 > lăng; #70FA
+烻 > shān; #70FB
+烼 > hū; #70FC
+烽 > fēng; #70FD
+烾 > chì; #70FE
+烿 > róng; #70FF
+焀 > hú; #7100
+焁 > xi; #7101
+焂 > shú; #7102
+焃 > hè; #7103
+焄 > xūn; #7104
+焅 > kù; #7105
+焆 > jué; #7106
+焇 > xiāo; #7107
+焈 > xī; #7108
+焉 > yān; #7109
+焊 > hàn; #710A
+焋 > zhuàng; #710B
+焌 > jùn; #710C
+焍 > dì; #710D
+焎 > xiè; #710E
+焏 > jí; #710F
+焐 > wù; #7110
+焓 > hán; #7113
+焔 > yàn; #7114
+焕 > huàn; #7115
+焖 > mèn; #7116
+焗 > jú; #7117
+焘 > chóu; #7118
+焙 > bèi; #7119
+焚 > fén; #711A
+焛 > lìn; #711B
+焜 > kūn; #711C
+焝 > hùn; #711D
+焞 > tūn; #711E
+焟 > xí; #711F
+焠 > cùi; #7120
+無 > wú; #7121
+焢 > hōng; #7122
+焣 > jù; #7123
+焤 > fŭ; #7124
+焥 > wò; #7125
+焦 > jiāo; #7126
+焧 > cōng; #7127
+焨 > fèng; #7128
+焩 > pīng; #7129
+焪 > qīong; #712A
+焫 > rùo; #712B
+焬 > xí; #712C
+焭 > qíong; #712D
+焮 > xìn; #712E
+焯 > zhúo; #712F
+焰 > yàn; #7130
+焱 > yàn; #7131
+焲 > yì; #7132
+焳 > jué; #7133
+焴 > yù; #7134
+焵 > gàng; #7135
+然 > rán; #7136
+焷 > pí; #7137
+焸 > gŭ; #7138
+焺 > shēng; #713A
+焻 > chàng; #713B
+焼 > shāo; #713C
+煁 > chén; #7141
+煂 > hè; #7142
+煃 > kŭi; #7143
+煄 > zhōng; #7144
+煅 > duàn; #7145
+煆 > xiā; #7146
+煇 > hūi; #7147
+煈 > fèng; #7148
+煉 > liàn; #7149
+煊 > xuān; #714A
+煋 > xīng; #714B
+煌 > huáng; #714C
+煍 > jiăo; #714D
+煎 > jiān; #714E
+煏 > bì; #714F
+煐 > yīng; #7150
+煑 > zhŭ; #7151
+煒 > wĕi; #7152
+煓 > tuān; #7153
+煔 > tiàn; #7154
+煕 > xī; #7155
+煖 > nuăn; #7156
+煗 > nuăn; #7157
+煘 > chán; #7158
+煙 > yān; #7159
+煚 > jĭong; #715A
+煛 > jĭong; #715B
+煜 > yù; #715C
+煝 > mèi; #715D
+煞 > shà; #715E
+煟 > wèi; #715F
+煠 > yè; #7160
+煡 > xìn; #7161
+煢 > qíong; #7162
+煣 > rŏu; #7163
+煤 > méi; #7164
+煥 > huàn; #7165
+煦 > xŭ; #7166
+照 > zhào; #7167
+煨 > wēi; #7168
+煩 > fán; #7169
+煪 > qíu; #716A
+煫 > sùi; #716B
+煬 > yáng; #716C
+煭 > liè; #716D
+煮 > zhŭ; #716E
+煯 > jie; #716F
+煰 > gào; #7170
+煱 > guā; #7171
+煲 > bào; #7172
+煳 > hú; #7173
+煴 > yūn; #7174
+煵 > xiā; #7175
+煸 > biān; #7178
+煹 > gòu; #7179
+煺 > tùi; #717A
+煻 > táng; #717B
+煼 > chăo; #717C
+煽 > shān; #717D
+煾 > n; #717E
+煿 > bó; #717F
+熀 > huăng; #7180
+熁 > xié; #7181
+熂 > xì; #7182
+熃 > wù; #7183
+熄 > xí; #7184
+熅 > yún; #7185
+熆 > hé; #7186
+熇 > hè; #7187
+熈 > xī; #7188
+熉 > yún; #7189
+熊 > xíong; #718A
+熋 > nái; #718B
+熌 > shàn; #718C
+熍 > qiong; #718D
+熎 > yào; #718E
+熏 > xūn; #718F
+熐 > mì; #7190
+熑 > lián; #7191
+熒 > yíng; #7192
+熓 > wèn; #7193
+熔 > róng; #7194
+熗 > qiàng; #7197
+熘 > līu; #7198
+熙 > xī; #7199
+熚 > bì; #719A
+熛 > biāo; #719B
+熜 > zŏng; #719C
+熝 > lù; #719D
+熞 > jiān; #719E
+熟 > shóu; #719F
+熠 > yì; #71A0
+熡 > lóu; #71A1
+熢 > fēng; #71A2
+熣 > sūi; #71A3
+熤 > yì; #71A4
+熥 > tōng; #71A5
+熦 > jué; #71A6
+熧 > zōng; #71A7
+熨 > yùn; #71A8
+熩 > hù; #71A9
+熪 > yí; #71AA
+熫 > zhì; #71AB
+熬 > áo; #71AC
+熭 > wèi; #71AD
+熮 > liáo; #71AE
+熯 > hàn; #71AF
+熰 > ōu; #71B0
+熱 > rè; #71B1
+熲 > jĭong; #71B2
+熳 > màn; #71B3
+熵 > shāng; #71B5
+熶 > cuàn; #71B6
+熷 > zēng; #71B7
+熸 > jiān; #71B8
+熹 > xī; #71B9
+熺 > xī; #71BA
+熻 > xī; #71BB
+熼 > yì; #71BC
+熽 > xiào; #71BD
+熾 > chì; #71BE
+熿 > huáng; #71BF
+燀 > chăn; #71C0
+燁 > yè; #71C1
+燂 > qián; #71C2
+燃 > rán; #71C3
+燄 > yàn; #71C4
+燅 > xián; #71C5
+燆 > qiáo; #71C6
+燇 > zùn; #71C7
+燈 > dēng; #71C8
+燉 > dùn; #71C9
+燊 > shēn; #71CA
+燋 > jiāo; #71CB
+燌 > fén; #71CC
+燍 > sī; #71CD
+燎 > liào; #71CE
+燏 > yù; #71CF
+燐 > lín; #71D0
+燑 > tóng; #71D1
+燒 > shāo; #71D2
+燓 > fēn; #71D3
+燔 > fán; #71D4
+燕 > yàn; #71D5
+燖 > xún; #71D6
+燗 > làn; #71D7
+燘 > mĕi; #71D8
+燙 > tàng; #71D9
+燚 > yī; #71DA
+燛 > jĭng; #71DB
+燜 > mèn; #71DC
+營 > yíng; #71DF
+燠 > yù; #71E0
+燡 > yì; #71E1
+燢 > xué; #71E2
+燣 > lán; #71E3
+燤 > tài; #71E4
+燥 > zào; #71E5
+燦 > càn; #71E6
+燧 > sùi; #71E7
+燨 > xī; #71E8
+燩 > què; #71E9
+燪 > cōng; #71EA
+燫 > lián; #71EB
+燬 > hŭi; #71EC
+燭 > zhú; #71ED
+燮 > xiè; #71EE
+燯 > líng; #71EF
+燰 > wēi; #71F0
+燱 > yì; #71F1
+燲 > xié; #71F2
+燳 > zhào; #71F3
+燴 > hùi; #71F4
+燷 > lán; #71F7
+燸 > rú; #71F8
+燹 > xiăn; #71F9
+燺 > kăo; #71FA
+燻 > xūn; #71FB
+燼 > jìn; #71FC
+燽 > chóu; #71FD
+燾 > chóu; #71FE
+燿 > yào; #71FF
+爀 > hè; #7200
+爁 > làn; #7201
+爂 > biāo; #7202
+爃 > róng; #7203
+爄 > lì; #7204
+爅 > mò; #7205
+爆 > bào; #7206
+爇 > rùo; #7207
+爈 > lǘ; #7208
+爉 > là; #7209
+爊 > áo; #720A
+爋 > xùn; #720B
+爌 > kuàng; #720C
+爍 > shùo; #720D
+爏 > lì; #720F
+爐 > lú; #7210
+爑 > jué; #7211
+爒 > liào; #7212
+爓 > yàn; #7213
+爔 > xī; #7214
+爕 > xiè; #7215
+爖 > lóng; #7216
+爗 > yè; #7217
+爙 > răng; #7219
+爚 > yuè; #721A
+爛 > làn; #721B
+爜 > cóng; #721C
+爝 > jué; #721D
+爞 > tóng; #721E
+爟 > guàn; #721F
+爡 > chè; #7221
+爢 > mí; #7222
+爣 > tăng; #7223
+爤 > làn; #7224
+爥 > zhú; #7225
+爧 > líng; #7227
+爨 > cuàn; #7228
+爩 > yù; #7229
+爪 > zhuă; #722A
+爬 > pá; #722C
+爭 > zhēng; #722D
+爮 > páo; #722E
+爯 > chēng; #722F
+爰 > yuán; #7230
+爱 > ài; #7231
+爲 > wèi; #7232
+爴 > jué; #7234
+爵 > jué; #7235
+父 > fù; #7236
+爷 > yé; #7237
+爸 > bà; #7238
+爹 > diē; #7239
+爺 > yé; #723A
+爻 > yáo; #723B
+爼 > zŭ; #723C
+爽 > shuăng; #723D
+爾 > ĕr; #723E
+爿 > qiáng; #723F
+牀 > chuáng; #7240
+牁 > gē; #7241
+牂 > zāng; #7242
+牃 > dié; #7243
+牄 > qiāng; #7244
+牅 > yóng; #7245
+牆 > qiáng; #7246
+片 > piàn; #7247
+版 > băn; #7248
+牉 > pàn; #7249
+牊 > sháo; #724A
+牋 > jiān; #724B
+牌 > pái; #724C
+牍 > dú; #724D
+牎 > chuāng; #724E
+牏 > tóu; #724F
+牐 > zhá; #7250
+牑 > biān; #7251
+牒 > dié; #7252
+牓 > băng; #7253
+牔 > bó; #7254
+牕 > chuāng; #7255
+牖 > yŏu; #7256
+牘 > dú; #7258
+牙 > yá; #7259
+牚 > chèng; #725A
+牛 > níu; #725B
+牝 > pìn; #725D
+牞 > jīu; #725E
+牟 > móu; #725F
+牠 > tūo; #7260
+牡 > mŭ; #7261
+牢 > láo; #7262
+牣 > rèn; #7263
+牤 > máng; #7264
+牥 > fāng; #7265
+牦 > máo; #7266
+牧 > mù; #7267
+牨 > gāng; #7268
+物 > wù; #7269
+牪 > yàn; #726A
+牫 > gē; #726B
+牬 > bèi; #726C
+牭 > sì; #726D
+牮 > jiàn; #726E
+牯 > gŭ; #726F
+牰 > yòu; #7270
+牱 > gē; #7271
+牲 > shēng; #7272
+牳 > mŭ; #7273
+牴 > dĭ; #7274
+牵 > qiān; #7275
+牶 > quàn; #7276
+牷 > quán; #7277
+牸 > zì; #7278
+特 > tè; #7279
+牺 > xī; #727A
+牻 > máng; #727B
+牼 > kēng; #727C
+牽 > qiān; #727D
+牾 > wú; #727E
+牿 > gù; #727F
+犀 > xī; #7280
+犁 > lí; #7281
+犂 > lí; #7282
+犃 > pŏu; #7283
+犄 > jī; #7284
+犅 > gāng; #7285
+犆 > zhí; #7286
+犇 > bēn; #7287
+犈 > quán; #7288
+犉 > rún; #7289
+犊 > dú; #728A
+犋 > jù; #728B
+犌 > jiā; #728C
+犍 > jiān; #728D
+犎 > fēng; #728E
+犏 > piān; #728F
+犐 > kē; #7290
+犑 > jú; #7291
+犒 > kào; #7292
+犓 > chú; #7293
+犔 > xì; #7294
+犕 > bèi; #7295
+犖 > lùo; #7296
+犗 > jiè; #7297
+犘 > má; #7298
+犙 > sān; #7299
+犚 > wèi; #729A
+犛 > lí; #729B
+犜 > dūn; #729C
+犝 > tóng; #729D
+犟 > jiàng; #729F
+犡 > lì; #72A1
+犢 > dú; #72A2
+犣 > liè; #72A3
+犤 > pí; #72A4
+犥 > piăo; #72A5
+犦 > bào; #72A6
+犧 > xī; #72A7
+犨 > chōu; #72A8
+犩 > wèi; #72A9
+犪 > kúi; #72AA
+犫 > chōu; #72AB
+犬 > quăn; #72AC
+犭 > făn' 'quăn' 'páng; #72AD
+犮 > bá; #72AE
+犯 > fàn; #72AF
+犰 > qíu; #72B0
+犱 > jĭ; #72B1
+犲 > cái; #72B2
+犳 > chúo; #72B3
+犴 > àn; #72B4
+犵 > jié; #72B5
+状 > zhuàng; #72B6
+犷 > guăng; #72B7
+犸 > mà; #72B8
+犹 > yóu; #72B9
+犺 > kàng; #72BA
+犻 > bó; #72BB
+犼 > hŏu; #72BC
+犽 > yá; #72BD
+犾 > yín; #72BE
+犿 > huān; #72BF
+狀 > zhuàng; #72C0
+狁 > yŭn; #72C1
+狂 > kuáng; #72C2
+狃 > nĭu; #72C3
+狄 > dí; #72C4
+狅 > qīng; #72C5
+狆 > zhòng; #72C6
+狇 > mù; #72C7
+狈 > bèi; #72C8
+狉 > pī; #72C9
+狊 > jú; #72CA
+狋 > ní; #72CB
+狌 > shēng; #72CC
+狍 > páo; #72CD
+狎 > xiá; #72CE
+狏 > túo; #72CF
+狐 > hú; #72D0
+狑 > líng; #72D1
+狒 > fèi; #72D2
+狓 > pī; #72D3
+狔 > nĭ; #72D4
+狕 > ăo; #72D5
+狖 > yòu; #72D6
+狗 > gŏu; #72D7
+狘 > yuè; #72D8
+狙 > jū; #72D9
+狚 > dàn; #72DA
+狛 > pò; #72DB
+狜 > gŭ; #72DC
+狝 > xiăn; #72DD
+狞 > níng; #72DE
+狟 > huán; #72DF
+狠 > hĕn; #72E0
+狡 > jiăo; #72E1
+狢 > hé; #72E2
+狣 > zhào; #72E3
+狤 > jí; #72E4
+狥 > xùn; #72E5
+狦 > shān; #72E6
+狧 > tà; #72E7
+狨 > róng; #72E8
+狩 > shòu; #72E9
+狪 > tōng; #72EA
+狫 > lăo; #72EB
+独 > dú; #72EC
+狭 > xiá; #72ED
+狮 > shī; #72EE
+狯 > huá; #72EF
+狰 > zhēng; #72F0
+狱 > yù; #72F1
+狲 > sūn; #72F2
+狳 > yú; #72F3
+狴 > bì; #72F4
+狵 > máng; #72F5
+狶 > xĭ; #72F6
+狷 > juàn; #72F7
+狸 > lí; #72F8
+狹 > xiá; #72F9
+狺 > yín; #72FA
+狻 > suān; #72FB
+狼 > láng; #72FC
+狽 > bèi; #72FD
+狾 > zhì; #72FE
+狿 > yán; #72FF
+猀 > shā; #7300
+猁 > lì; #7301
+猂 > hàn; #7302
+猃 > xiăn; #7303
+猄 > jīng; #7304
+猅 > pái; #7305
+猆 > fēi; #7306
+猇 > yáo; #7307
+猈 > bà; #7308
+猉 > qí; #7309
+猊 > ní; #730A
+猋 > biāo; #730B
+猌 > yìn; #730C
+猍 > lái; #730D
+猎 > xí; #730E
+猏 > jiān; #730F
+猐 > qiāng; #7310
+猑 > kūn; #7311
+猒 > yān; #7312
+猓 > gŭo; #7313
+猔 > zòng; #7314
+猕 > mí; #7315
+猖 > chāng; #7316
+猗 > yī; #7317
+猘 > zhì; #7318
+猙 > zhēng; #7319
+猚 > yá; #731A
+猛 > mĕng; #731B
+猜 > cāi; #731C
+猝 > cù; #731D
+猞 > shè; #731E
+猡 > lúo; #7321
+猢 > hú; #7322
+猣 > zōng; #7323
+猤 > jì; #7324
+猥 > wĕi; #7325
+猦 > fēng; #7326
+猧 > wō; #7327
+猨 > yuán; #7328
+猩 > xīng; #7329
+猪 > zhū; #732A
+猫 > māo; #732B
+猬 > wèi; #732C
+猭 > yuán; #732D
+献 > xiàn; #732E
+猯 > tuān; #732F
+猰 > yà; #7330
+猱 > náo; #7331
+猲 > xiē; #7332
+猳 > jiā; #7333
+猴 > hóu; #7334
+猵 > biān; #7335
+猶 > yóu; #7336
+猷 > yóu; #7337
+猸 > méi; #7338
+猹 > zhā; #7339
+猺 > yáo; #733A
+猻 > sūn; #733B
+猼 > bó; #733C
+猽 > míng; #733D
+猾 > huá; #733E
+猿 > yuán; #733F
+獀 > sōu; #7340
+獁 > mà; #7341
+獂 > yuán; #7342
+獃 > dāi; #7343
+獄 > yù; #7344
+獅 > shī; #7345
+獆 > háo; #7346
+獈 > yì; #7348
+獉 > zhēn; #7349
+獊 > chuàng; #734A
+獋 > háo; #734B
+獌 > màn; #734C
+獍 > jìng; #734D
+獎 > jiăng; #734E
+獏 > mú; #734F
+獐 > zhāng; #7350
+獑 > chán; #7351
+獒 > áo; #7352
+獓 > áo; #7353
+獔 > háo; #7354
+獕 > cūi; #7355
+獖 > fén; #7356
+獗 > jué; #7357
+獘 > bì; #7358
+獙 > bì; #7359
+獚 > huáng; #735A
+獛 > pú; #735B
+獜 > lín; #735C
+獝 > yù; #735D
+獞 > tóng; #735E
+獟 > yào; #735F
+獠 > liáo; #7360
+獡 > shùo; #7361
+獢 > xiāo; #7362
+獥 > xí; #7365
+獦 > gé; #7366
+獧 > juàn; #7367
+獨 > dú; #7368
+獩 > hùi; #7369
+獪 > kuài; #736A
+獫 > xiăn; #736B
+獬 > xiè; #736C
+獭 > tà; #736D
+獮 > xiăn; #736E
+獯 > xūn; #736F
+獰 > níng; #7370
+獱 > pín; #7371
+獲 > hùo; #7372
+獳 > nòu; #7373
+獴 > méng; #7374
+獵 > liè; #7375
+獶 > náo; #7376
+獷 > guăng; #7377
+獸 > shòu; #7378
+獹 > lú; #7379
+獺 > tà; #737A
+獻 > xiàn; #737B
+獼 > mí; #737C
+獽 > ráng; #737D
+獾 > huān; #737E
+獿 > náo; #737F
+玀 > lúo; #7380
+玁 > xiăn; #7381
+玂 > qí; #7382
+玃 > jué; #7383
+玄 > xuán; #7384
+玅 > miào; #7385
+玆 > zī; #7386
+率 > lǜ; #7387
+玈 > lú; #7388
+玉 > yù; #7389
+玊 > sù; #738A
+王 > wáng; #738B
+玌 > qíu; #738C
+玍 > gă; #738D
+玎 > dīng; #738E
+玏 > lè; #738F
+玐 > bā; #7390
+玑 > jī; #7391
+玒 > hóng; #7392
+玓 > dì; #7393
+玔 > quàn; #7394
+玕 > gān; #7395
+玖 > jĭu; #7396
+玗 > yú; #7397
+玘 > jĭ; #7398
+玙 > yú; #7399
+玚 > yáng; #739A
+玛 > mă; #739B
+玜 > gōng; #739C
+玝 > wŭ; #739D
+玞 > fū; #739E
+玟 > wén; #739F
+玠 > jiè; #73A0
+玡 > yà; #73A1
+玢 > fén; #73A2
+玣 > biàn; #73A3
+玤 > bĕng; #73A4
+玥 > yuè; #73A5
+玦 > jué; #73A6
+玧 > yŭn; #73A7
+玨 > jué; #73A8
+玩 > wán; #73A9
+玪 > jiān; #73AA
+玫 > méi; #73AB
+玬 > dăn; #73AC
+玭 > pí; #73AD
+玮 > wĕi; #73AE
+环 > huán; #73AF
+现 > xiàn; #73B0
+玱 > qiāng; #73B1
+玲 > líng; #73B2
+玳 > dài; #73B3
+玴 > yì; #73B4
+玵 > án; #73B5
+玶 > píng; #73B6
+玷 > diàn; #73B7
+玸 > fú; #73B8
+玹 > xuán; #73B9
+玺 > xĭ; #73BA
+玻 > bō; #73BB
+玼 > cĭ; #73BC
+玽 > gŏu; #73BD
+玾 > jiă; #73BE
+玿 > sháo; #73BF
+珀 > pò; #73C0
+珁 > cí; #73C1
+珂 > kē; #73C2
+珃 > răn; #73C3
+珄 > shēng; #73C4
+珅 > shēn; #73C5
+珆 > yí; #73C6
+珇 > zŭ; #73C7
+珈 > jiā; #73C8
+珉 > mín; #73C9
+珊 > shān; #73CA
+珋 > lĭu; #73CB
+珌 > bì; #73CC
+珍 > zhēn; #73CD
+珎 > zhēn; #73CE
+珏 > jué; #73CF
+珐 > fà; #73D0
+珑 > lóng; #73D1
+珒 > jīn; #73D2
+珓 > jiào; #73D3
+珔 > jiàn; #73D4
+珕 > lì; #73D5
+珖 > guāng; #73D6
+珗 > xiān; #73D7
+珘 > zhōu; #73D8
+珙 > gŏng; #73D9
+珚 > yān; #73DA
+珛 > xìu; #73DB
+珜 > yáng; #73DC
+珝 > xŭ; #73DD
+珞 > lùo; #73DE
+珟 > sù; #73DF
+珠 > zhū; #73E0
+珡 > qín; #73E1
+珢 > kèn; #73E2
+珣 > xún; #73E3
+珤 > băo; #73E4
+珥 > ĕr; #73E5
+珦 > xiàng; #73E6
+珧 > yáo; #73E7
+珨 > xiá; #73E8
+珩 > héng; #73E9
+珪 > gūi; #73EA
+珫 > chōng; #73EB
+珬 > xù; #73EC
+班 > bān; #73ED
+珮 > pèi; #73EE
+珰 > dāng; #73F0
+珲 > hún; #73F2
+珳 > wén; #73F3
+珴 > é; #73F4
+珵 > chéng; #73F5
+珶 > tí; #73F6
+珷 > wŭ; #73F7
+珸 > wú; #73F8
+珹 > chéng; #73F9
+珺 > jùn; #73FA
+珻 > méi; #73FB
+珼 > bèi; #73FC
+珽 > tĭng; #73FD
+現 > xiàn; #73FE
+珿 > chùo; #73FF
+琀 > hán; #7400
+琁 > xuan; #7401
+琂 > yán; #7402
+球 > qíu; #7403
+琄 > quăn; #7404
+琅 > láng; #7405
+理 > lĭ; #7406
+琇 > xìu; #7407
+琈 > fú; #7408
+琉 > líu; #7409
+琊 > yé; #740A
+琋 > xī; #740B
+琌 > líng; #740C
+琍 > lì; #740D
+琎 > jìn; #740E
+琏 > lián; #740F
+琐 > sŭo; #7410
+琓 > wán; #7413
+琔 > diàn; #7414
+琕 > pín; #7415
+琖 > zhăn; #7416
+琗 > cùi; #7417
+琘 > mín; #7418
+琙 > yù; #7419
+琚 > jū; #741A
+琛 > chēn; #741B
+琜 > lái; #741C
+琝 > wén; #741D
+琞 > shèng; #741E
+琟 > wéi; #741F
+琠 > diăn; #7420
+琡 > chù; #7421
+琢 > zhúo; #7422
+琣 > pĕi; #7423
+琤 > chēng; #7424
+琥 > hŭ; #7425
+琦 > qí; #7426
+琧 > è; #7427
+琨 > kūn; #7428
+琩 > chāng; #7429
+琪 > qí; #742A
+琫 > bĕng; #742B
+琬 > wăn; #742C
+琭 > lù; #742D
+琮 > cóng; #742E
+琯 > guăn; #742F
+琰 > yăn; #7430
+琱 > diāo; #7431
+琲 > bèi; #7432
+琳 > lín; #7433
+琴 > qín; #7434
+琵 > pí; #7435
+琶 > pá; #7436
+琷 > què; #7437
+琸 > zhúo; #7438
+琹 > qín; #7439
+琺 > fà; #743A
+琼 > qíong; #743C
+琽 > dŭ; #743D
+琾 > jiè; #743E
+琿 > hún; #743F
+瑀 > yŭ; #7440
+瑁 > mào; #7441
+瑂 > méi; #7442
+瑃 > chun; #7443
+瑄 > xuān; #7444
+瑅 > tí; #7445
+瑆 > xīng; #7446
+瑇 > dài; #7447
+瑈 > róu; #7448
+瑉 > mín; #7449
+瑊 > zhēn; #744A
+瑋 > wĕi; #744B
+瑌 > ruăn; #744C
+瑍 > huàn; #744D
+瑎 > jiē; #744E
+瑏 > chuān; #744F
+瑐 > jiăn; #7450
+瑑 > zhuàn; #7451
+瑒 > yáng; #7452
+瑓 > liàn; #7453
+瑔 > quán; #7454
+瑕 > xiá; #7455
+瑖 > duàn; #7456
+瑗 > yuàn; #7457
+瑘 > yé; #7458
+瑙 > năo; #7459
+瑚 > hú; #745A
+瑛 > yīng; #745B
+瑜 > yú; #745C
+瑝 > huáng; #745D
+瑞 > rùi; #745E
+瑟 > sè; #745F
+瑠 > líu; #7460
+瑡 > shi; #7461
+瑢 > róng; #7462
+瑣 > sŭo; #7463
+瑤 > yáo; #7464
+瑥 > wēn; #7465
+瑦 > wū; #7466
+瑧 > jīn; #7467
+瑨 > jìn; #7468
+瑩 > yíng; #7469
+瑪 > mă; #746A
+瑫 > tāo; #746B
+瑬 > líu; #746C
+瑭 > táng; #746D
+瑮 > lì; #746E
+瑯 > láng; #746F
+瑰 > gūi; #7470
+瑱 > zhèn; #7471
+瑲 > qiāng; #7472
+瑳 > cŭo; #7473
+瑴 > jué; #7474
+瑵 > zhăo; #7475
+瑶 > yáo; #7476
+瑷 > ài; #7477
+瑸 > bīn; #7478
+瑹 > tú; #7479
+瑺 > cháng; #747A
+瑻 > kūn; #747B
+瑼 > zhuān; #747C
+瑽 > cōng; #747D
+瑾 > jĭn; #747E
+瑿 > yī; #747F
+璀 > cŭi; #7480
+璁 > cōng; #7481
+璂 > qí; #7482
+璃 > lí; #7483
+璄 > yĭng; #7484
+璅 > sŭo; #7485
+璆 > qíu; #7486
+璇 > xuán; #7487
+璈 > áo; #7488
+璉 > lián; #7489
+璊 > mán; #748A
+璋 > zhāng; #748B
+璌 > yín; #748C
+璎 > yīng; #748E
+璏 > zhì; #748F
+璐 > lù; #7490
+璑 > wú; #7491
+璒 > dēng; #7492
+璓 > xiòu; #7493
+璔 > zēng; #7494
+璕 > xún; #7495
+璖 > qú; #7496
+璗 > dàng; #7497
+璘 > lín; #7498
+璙 > liáo; #7499
+璚 > qíong; #749A
+璛 > sù; #749B
+璜 > huáng; #749C
+璝 > gūi; #749D
+璞 > pú; #749E
+璟 > jĭng; #749F
+璠 > fán; #74A0
+璡 > jìn; #74A1
+璢 > líu; #74A2
+璣 > jī; #74A3
+璥 > jĭng; #74A5
+璦 > ài; #74A6
+璧 > bì; #74A7
+璨 > càn; #74A8
+璩 > qú; #74A9
+璪 > zăo; #74AA
+璫 > dāng; #74AB
+璬 > jiăo; #74AC
+璭 > gùn; #74AD
+璮 > tăn; #74AE
+璯 > hùi; #74AF
+環 > huán; #74B0
+璱 > sè; #74B1
+璲 > sùi; #74B2
+璳 > tián; #74B3
+璵 > yú; #74B5
+璶 > jìn; #74B6
+璷 > lú; #74B7
+璸 > bīn; #74B8
+璹 > shòu; #74B9
+璺 > wèn; #74BA
+璻 > zŭi; #74BB
+璼 > lán; #74BC
+璽 > xĭ; #74BD
+璾 > jì; #74BE
+璿 > xuán; #74BF
+瓀 > ruăn; #74C0
+瓁 > hùo; #74C1
+瓂 > gài; #74C2
+瓃 > léi; #74C3
+瓄 > dú; #74C4
+瓅 > lì; #74C5
+瓆 > zhí; #74C6
+瓇 > róu; #74C7
+瓈 > lí; #74C8
+瓉 > zàn; #74C9
+瓊 > qíong; #74CA
+瓋 > zhé; #74CB
+瓌 > gūi; #74CC
+瓍 > sùi; #74CD
+瓎 > là; #74CE
+瓏 > lóng; #74CF
+瓐 > lú; #74D0
+瓑 > lì; #74D1
+瓒 > zàn; #74D2
+瓓 > làn; #74D3
+瓔 > yīng; #74D4
+瓕 > mí; #74D5
+瓖 > xiāng; #74D6
+瓗 > xī; #74D7
+瓘 > guàn; #74D8
+瓙 > dào; #74D9
+瓚 > zàn; #74DA
+瓛 > huán; #74DB
+瓜 > guā; #74DC
+瓝 > bó; #74DD
+瓞 > dié; #74DE
+瓟 > báo; #74DF
+瓠 > hù; #74E0
+瓡 > zhí; #74E1
+瓢 > piáo; #74E2
+瓣 > bàn; #74E3
+瓤 > ráng; #74E4
+瓥 > lì; #74E5
+瓦 > wă; #74E6
+瓨 > jiāng; #74E8
+瓩 > qián' 'wă; #74E9
+瓪 > făn; #74EA
+瓫 > pén; #74EB
+瓬 > făng; #74EC
+瓭 > dăn; #74ED
+瓮 > wèng; #74EE
+瓯 > ōu; #74EF
+瓳 > hú; #74F3
+瓴 > líng; #74F4
+瓵 > yí; #74F5
+瓶 > píng; #74F6
+瓷 > cí; #74F7
+瓹 > juàn; #74F9
+瓺 > cháng; #74FA
+瓻 > chī; #74FB
+瓽 > dàng; #74FD
+瓾 > mĕng; #74FE
+瓿 > pŏu; #74FF
+甀 > zhùi; #7500
+甁 > píng; #7501
+甂 > biān; #7502
+甃 > zhòu; #7503
+甄 > zhēn; #7504
+甆 > cí; #7506
+甇 > yīng; #7507
+甈 > qì; #7508
+甉 > xián; #7509
+甊 > lŏu; #750A
+甋 > dì; #750B
+甌 > ōu; #750C
+甍 > méng; #750D
+甎 > zhuān; #750E
+甏 > pèng; #750F
+甐 > lín; #7510
+甑 > zèng; #7511
+甒 > wŭ; #7512
+甓 > pì; #7513
+甔 > dān; #7514
+甕 > wèng; #7515
+甖 > yīng; #7516
+甗 > yăn; #7517
+甘 > gān; #7518
+甙 > dài; #7519
+甚 > shén; #751A
+甛 > tián; #751B
+甜 > tián; #751C
+甝 > hān; #751D
+甞 > cháng; #751E
+生 > shēng; #751F
+甠 > qíng; #7520
+甡 > shēng; #7521
+產 > chăn; #7522
+産 > chăn; #7523
+甤 > rúi; #7524
+甥 > shēng; #7525
+甦 > sū; #7526
+甧 > sēn; #7527
+用 > yòng; #7528
+甩 > shuăi; #7529
+甪 > lù; #752A
+甫 > fŭ; #752B
+甬 > yŏng; #752C
+甭 > béng; #752D
+甮 > fèng; #752E
+甯 > níng; #752F
+田 > tián; #7530
+由 > yóu; #7531
+甲 > jiă; #7532
+申 > shēn; #7533
+甴 > zhá; #7534
+电 > diàn; #7535
+甶 > fú; #7536
+男 > nán; #7537
+甸 > diàn; #7538
+甹 > píng; #7539
+町 > tĭng; #753A
+画 > huà; #753B
+甼 > tĭng; #753C
+甽 > quăn; #753D
+甾 > zī; #753E
+甿 > méng; #753F
+畀 > bì; #7540
+畁 > qí; #7541
+畂 > lìu; #7542
+畃 > xún; #7543
+畄 > líu; #7544
+畅 > chàng; #7545
+畆 > mŭ; #7546
+畇 > yún; #7547
+畈 > fàn; #7548
+畉 > fú; #7549
+畊 > gēng; #754A
+畋 > tián; #754B
+界 > jiè; #754C
+畍 > jiè; #754D
+畎 > quăn; #754E
+畏 > wèi; #754F
+畐 > fú; #7550
+畑 > tián; #7551
+畒 > mŭ; #7552
+畔 > pàn; #7554
+畕 > jiāng; #7555
+畖 > wā; #7556
+畗 > dá; #7557
+畘 > nán; #7558
+留 > líu; #7559
+畚 > bĕn; #755A
+畛 > zhĕn; #755B
+畜 > chù; #755C
+畝 > mŭ; #755D
+畞 > mŭ; #755E
+畟 > cè; #755F
+畡 > gāi; #7561
+畢 > bì; #7562
+畣 > dá; #7563
+畤 > zhì; #7564
+略 > lǜe; #7565
+畦 > qí; #7566
+畧 > lǜe; #7567
+畨 > pān; #7568
+番 > fān; #756A
+畫 > huà; #756B
+畬 > yú; #756C
+畭 > yú; #756D
+畮 > mŭ; #756E
+畯 > jùn; #756F
+異 > yì; #7570
+畱 > líu; #7571
+畲 > yú; #7572
+畳 > dié; #7573
+畴 > chóu; #7574
+畵 > huà; #7575
+當 > dāng; #7576
+畷 > chùo; #7577
+畸 > jī; #7578
+畹 > wăn; #7579
+畺 > jiāng; #757A
+畻 > shéng; #757B
+畼 > chàng; #757C
+畽 > tuăn; #757D
+畾 > léi; #757E
+畿 > jī; #757F
+疀 > chā; #7580
+疁 > líu; #7581
+疃 > tuăn; #7583
+疄 > lín; #7584
+疅 > jiāng; #7585
+疆 > jiāng; #7586
+疇 > chóu; #7587
+疈 > bò; #7588
+疉 > dié; #7589
+疊 > dié; #758A
+疋 > pĭ; #758B
+疌 > niè; #758C
+疍 > dàn; #758D
+疎 > shū; #758E
+疏 > shū; #758F
+疐 > zhì; #7590
+疑 > yí; #7591
+疒 > chuáng; #7592
+疓 > năi; #7593
+疔 > dīng; #7594
+疕 > bĭ; #7595
+疖 > jié; #7596
+疗 > liáo; #7597
+疘 > gōng; #7598
+疙 > gē; #7599
+疚 > jìu; #759A
+疛 > zhŏu; #759B
+疜 > xià; #759C
+疝 > shàn; #759D
+疞 > xū; #759E
+疟 > nǜe; #759F
+疠 > lì; #75A0
+疡 > yáng; #75A1
+疢 > chèn; #75A2
+疣 > yóu; #75A3
+疤 > bā; #75A4
+疥 > jiè; #75A5
+疦 > jué; #75A6
+疧 > zhī; #75A7
+疨 > xiā; #75A8
+疩 > cùi; #75A9
+疪 > bì; #75AA
+疫 > yì; #75AB
+疬 > lì; #75AC
+疭 > zòng; #75AD
+疮 > chuāng; #75AE
+疯 > fēng; #75AF
+疰 > zhù; #75B0
+疱 > pào; #75B1
+疲 > pí; #75B2
+疳 > gān; #75B3
+疴 > kē; #75B4
+疵 > cī; #75B5
+疶 > xiè; #75B6
+疷 > qí; #75B7
+疸 > dăn; #75B8
+疹 > zhĕn; #75B9
+疺 > fá; #75BA
+疻 > zhĭ; #75BB
+疼 > téng; #75BC
+疽 > jū; #75BD
+疾 > jí; #75BE
+疿 > fèi; #75BF
+痀 > qú; #75C0
+痁 > diàn; #75C1
+痂 > jiā; #75C2
+痃 > xián; #75C3
+痄 > chá; #75C4
+病 > bìng; #75C5
+痆 > nì; #75C6
+症 > zhèng; #75C7
+痈 > yōng; #75C8
+痉 > jìng; #75C9
+痊 > quán; #75CA
+痋 > chóng; #75CB
+痌 > tōng; #75CC
+痍 > yí; #75CD
+痎 > kāi; #75CE
+痏 > wĕi; #75CF
+痐 > húi; #75D0
+痑 > dŭo; #75D1
+痒 > yăng; #75D2
+痓 > chì; #75D3
+痔 > zhì; #75D4
+痕 > hén; #75D5
+痖 > yă; #75D6
+痗 > mèi; #75D7
+痘 > dòu; #75D8
+痙 > jìng; #75D9
+痚 > xiāo; #75DA
+痛 > tòng; #75DB
+痜 > tū; #75DC
+痝 > máng; #75DD
+痞 > pĭ; #75DE
+痟 > xiāo; #75DF
+痠 > suān; #75E0
+痡 > pū; #75E1
+痢 > lì; #75E2
+痣 > zhì; #75E3
+痤 > cúo; #75E4
+痥 > dúo; #75E5
+痦 > wù; #75E6
+痧 > shā; #75E7
+痨 > láo; #75E8
+痩 > shòu; #75E9
+痪 > huàn; #75EA
+痫 > xián; #75EB
+痬 > yì; #75EC
+痭 > péng; #75ED
+痮 > zhàng; #75EE
+痯 > guăn; #75EF
+痰 > tán; #75F0
+痱 > fèi; #75F1
+痲 > má; #75F2
+痳 > lín; #75F3
+痴 > chī; #75F4
+痵 > jì; #75F5
+痶 > diăn; #75F6
+痷 > ān; #75F7
+痸 > chì; #75F8
+痹 > bì; #75F9
+痺 > bēi; #75FA
+痻 > mín; #75FB
+痼 > gū; #75FC
+痽 > dūi; #75FD
+痾 > ē; #75FE
+痿 > wĕi; #75FF
+瘀 > yū; #7600
+瘁 > cùi; #7601
+瘂 > yă; #7602
+瘃 > zhŭ; #7603
+瘄 > cù; #7604
+瘅 > dàn; #7605
+瘆 > shèn; #7606
+瘇 > zhŭng; #7607
+瘈 > jì; #7608
+瘉 > yù; #7609
+瘊 > hóu; #760A
+瘋 > fēng; #760B
+瘌 > là; #760C
+瘍 > yáng; #760D
+瘎 > shèn; #760E
+瘏 > tú; #760F
+瘐 > yŭ; #7610
+瘑 > guā; #7611
+瘒 > wén; #7612
+瘓 > huàn; #7613
+瘔 > kù; #7614
+瘕 > jiă; #7615
+瘖 > yīn; #7616
+瘗 > yì; #7617
+瘘 > lǘ; #7618
+瘙 > sāo; #7619
+瘚 > jué; #761A
+瘛 > chì; #761B
+瘜 > xí; #761C
+瘝 > guān; #761D
+瘞 > yì; #761E
+瘟 > wēn; #761F
+瘠 > jí; #7620
+瘡 > chuāng; #7621
+瘢 > bān; #7622
+瘣 > lĕi; #7623
+瘤 > líu; #7624
+瘥 > chài; #7625
+瘦 > shòu; #7626
+瘧 > nǜe; #7627
+瘨 > diān; #7628
+瘩 > dā; #7629
+瘪 > piē; #762A
+瘫 > tān; #762B
+瘬 > zhàng; #762C
+瘭 > biāo; #762D
+瘮 > shen; #762E
+瘯 > cù; #762F
+瘰 > lŭo; #7630
+瘱 > yì; #7631
+瘲 > zòng; #7632
+瘳 > chōu; #7633
+瘴 > zhàng; #7634
+瘵 > zhài; #7635
+瘶 > sòu; #7636
+瘷 > sŭo; #7637
+瘸 > qué; #7638
+瘹 > diào; #7639
+瘺 > lòu; #763A
+瘻 > lǘ; #763B
+瘼 > mò; #763C
+瘽 > jìn; #763D
+瘾 > yĭn; #763E
+瘿 > yĭng; #763F
+癀 > huáng; #7640
+癁 > fú; #7641
+療 > liáo; #7642
+癃 > lóng; #7643
+癄 > qiáo; #7644
+癅 > líu; #7645
+癆 > láo; #7646
+癇 > xián; #7647
+癈 > fèi; #7648
+癉 > dàn; #7649
+癊 > yìn; #764A
+癋 > hè; #764B
+癌 > yán; #764C
+癍 > bān; #764D
+癎 > xián; #764E
+癏 > guān; #764F
+癐 > guài; #7650
+癑 > nóng; #7651
+癒 > yù; #7652
+癓 > wéi; #7653
+癔 > yì; #7654
+癕 > yōng; #7655
+癖 > pĭ; #7656
+癗 > lĕi; #7657
+癘 > lì; #7658
+癙 > shŭ; #7659
+癚 > dàn; #765A
+癛 > lĭn; #765B
+癜 > diàn; #765C
+癝 > lĭn; #765D
+癞 > lài; #765E
+癟 > piē; #765F
+癠 > jì; #7660
+癡 > chī; #7661
+癢 > yăng; #7662
+癣 > xiăn; #7663
+癤 > jié; #7664
+癥 > zhēng; #7665
+癧 > lì; #7667
+癨 > hùo; #7668
+癩 > lài; #7669
+癫 > diān; #766B
+癬 > xiăn; #766C
+癭 > yĭng; #766D
+癮 > yĭn; #766E
+癯 > qú; #766F
+癰 > yōng; #7670
+癱 > tān; #7671
+癲 > diān; #7672
+癳 > lŭo; #7673
+癴 > lǘan; #7674
+癵 > luán; #7675
+癶 > bō; #7676
+癸 > gŭi; #7678
+癹 > pō; #7679
+発 > fā; #767A
+登 > dēng; #767B
+發 > fā; #767C
+白 > bái; #767D
+百 > băi; #767E
+癿 > qié; #767F
+皀 > bī; #7680
+皁 > zào; #7681
+皂 > zào; #7682
+皃 > mào; #7683
+的 > de; #7684
+皅 > pā; #7685
+皆 > jiē; #7686
+皇 > huáng; #7687
+皈 > gūi; #7688
+皉 > cĭ; #7689
+皊 > líng; #768A
+皋 > gāo; #768B
+皌 > mò; #768C
+皍 > jí; #768D
+皎 > jiăo; #768E
+皏 > pĕng; #768F
+皐 > gāo; #7690
+皑 > ái; #7691
+皒 > é; #7692
+皓 > hào; #7693
+皔 > hàn; #7694
+皕 > bī; #7695
+皖 > wăn; #7696
+皗 > chóu; #7697
+皘 > qiàn; #7698
+皙 > xī; #7699
+皚 > ái; #769A
+皛 > jĭong; #769B
+皜 > hào; #769C
+皝 > huăng; #769D
+皞 > hào; #769E
+皟 > zé; #769F
+皠 > cŭi; #76A0
+皡 > hào; #76A1
+皢 > xiăo; #76A2
+皣 > yè; #76A3
+皤 > pó; #76A4
+皥 > hào; #76A5
+皦 > jiăo; #76A6
+皧 > ài; #76A7
+皨 > xīng; #76A8
+皩 > huàng; #76A9
+皪 > lì; #76AA
+皫 > piăo; #76AB
+皬 > hè; #76AC
+皭 > jiào; #76AD
+皮 > pí; #76AE
+皯 > găn; #76AF
+皰 > pào; #76B0
+皱 > zhòu; #76B1
+皲 > jūn; #76B2
+皳 > qíu; #76B3
+皴 > cūn; #76B4
+皵 > què; #76B5
+皶 > zhā; #76B6
+皷 > gŭ; #76B7
+皸 > jūn; #76B8
+皹 > jūn; #76B9
+皺 > zhòu; #76BA
+皻 > zhā; #76BB
+皼 > gŭ; #76BC
+皽 > zhăn; #76BD
+皾 > dú; #76BE
+皿 > mĭn; #76BF
+盀 > qĭ; #76C0
+盁 > yíng; #76C1
+盂 > yú; #76C2
+盃 > bēi; #76C3
+盄 > zhāo; #76C4
+盅 > zhōng; #76C5
+盆 > pén; #76C6
+盇 > hé; #76C7
+盈 > yíng; #76C8
+盉 > hé; #76C9
+益 > yì; #76CA
+盋 > bō; #76CB
+盌 > wăn; #76CC
+盍 > hé; #76CD
+盎 > àng; #76CE
+盏 > zhăn; #76CF
+盐 > yán; #76D0
+监 > jiān; #76D1
+盒 > hé; #76D2
+盓 > yū; #76D3
+盔 > kūi; #76D4
+盕 > fàn; #76D5
+盖 > gài; #76D6
+盗 > dào; #76D7
+盘 > pán; #76D8
+盙 > fŭ; #76D9
+盚 > qíu; #76DA
+盛 > shèng; #76DB
+盜 > dào; #76DC
+盝 > lù; #76DD
+盞 > zhăn; #76DE
+盟 > méng; #76DF
+盠 > lĭ; #76E0
+盡 > jìn; #76E1
+盢 > xù; #76E2
+監 > jiān; #76E3
+盤 > pán; #76E4
+盥 > guàn; #76E5
+盦 > ān; #76E6
+盧 > lú; #76E7
+盨 > shŭ; #76E8
+盩 > zhōu; #76E9
+盪 > dàng; #76EA
+盫 > ān; #76EB
+盬 > gŭ; #76EC
+盭 > lì; #76ED
+目 > mù; #76EE
+盯 > chéng; #76EF
+盰 > găn; #76F0
+盱 > xū; #76F1
+盲 > máng; #76F2
+盳 > máng; #76F3
+直 > zhí; #76F4
+盵 > qì; #76F5
+盶 > ruăn; #76F6
+盷 > tián; #76F7
+相 > xiāng; #76F8
+盹 > dùn; #76F9
+盺 > xīn; #76FA
+盻 > xì; #76FB
+盼 > pàn; #76FC
+盽 > fēng; #76FD
+盾 > dùn; #76FE
+盿 > mín; #76FF
+眀 > míng; #7700
+省 > shĕng; #7701
+眂 > shì; #7702
+眃 > yún; #7703
+眄 > miăn; #7704
+眅 > pān; #7705
+眆 > făng; #7706
+眇 > miăo; #7707
+眈 > dān; #7708
+眉 > méi; #7709
+眊 > mào; #770A
+看 > kàn; #770B
+県 > xiàn; #770C
+眍 > ōu; #770D
+眎 > shì; #770E
+眏 > yāng; #770F
+眐 > zhēng; #7710
+眑 > yăo; #7711
+眒 > shèn; #7712
+眓 > hùo; #7713
+眔 > dà; #7714
+眕 > zhĕn; #7715
+眖 > kuàng; #7716
+眗 > jū; #7717
+眘 > shèn; #7718
+眙 > chì; #7719
+眚 > shĕng; #771A
+眛 > mèi; #771B
+眜 > mò; #771C
+眝 > zhù; #771D
+眞 > zhēn; #771E
+真 > zhēn; #771F
+眠 > mián; #7720
+眡 > dī; #7721
+眢 > yuān; #7722
+眣 > dié; #7723
+眤 > yí; #7724
+眥 > zì; #7725
+眦 > zì; #7726
+眧 > chăo; #7727
+眨 > zhă; #7728
+眩 > xuàn; #7729
+眪 > bĭng; #772A
+眫 > mĭ; #772B
+眬 > lóng; #772C
+眭 > sūi; #772D
+眮 > dòng; #772E
+眯 > mĭ; #772F
+眰 > dié; #7730
+眱 > yí; #7731
+眲 > èr; #7732
+眳 > mĭng; #7733
+眴 > xuàn; #7734
+眵 > chī; #7735
+眶 > kuàng; #7736
+眷 > juàn; #7737
+眸 > móu; #7738
+眹 > zhèn; #7739
+眺 > tiào; #773A
+眻 > yáng; #773B
+眼 > yăn; #773C
+眽 > mò; #773D
+眾 > zhòng; #773E
+眿 > mài; #773F
+着 > zháo; #7740
+睁 > zhēng; #7741
+睂 > méi; #7742
+睃 > jùn; #7743
+睄 > shào; #7744
+睅 > hàn; #7745
+睆 > huăn; #7746
+睇 > dì; #7747
+睈 > chĕng; #7748
+睉 > cūo; #7749
+睊 > juàn; #774A
+睋 > é; #774B
+睌 > wăn; #774C
+睍 > xiàn; #774D
+睎 > xī; #774E
+睏 > kùn; #774F
+睐 > lài; #7750
+睑 > jiăn; #7751
+睒 > shăn; #7752
+睓 > tiăn; #7753
+睔 > hŭn; #7754
+睕 > wăn; #7755
+睖 > líng; #7756
+睗 > shì; #7757
+睘 > qíong; #7758
+睙 > liè; #7759
+睚 > yái; #775A
+睛 > jīng; #775B
+睜 > zhēng; #775C
+睝 > lí; #775D
+睞 > lài; #775E
+睟 > sùi; #775F
+睠 > juàn; #7760
+睡 > shùi; #7761
+睢 > sūi; #7762
+督 > dū; #7763
+睤 > bì; #7764
+睥 > bì; #7765
+睦 > mù; #7766
+睧 > hūn; #7767
+睨 > nì; #7768
+睩 > lù; #7769
+睪 > yì; #776A
+睫 > jié; #776B
+睬 > căi; #776C
+睭 > zhŏu; #776D
+睮 > yú; #776E
+睯 > hūn; #776F
+睰 > mà; #7770
+睱 > xià; #7771
+睲 > xĭng; #7772
+睳 > xī; #7773
+睴 > gùn; #7774
+睵 > cai; #7775
+睶 > chŭn; #7776
+睷 > jiān; #7777
+睸 > mèi; #7778
+睹 > dŭ; #7779
+睺 > hóu; #777A
+睻 > xuān; #777B
+睼 > tì; #777C
+睽 > kúi; #777D
+睾 > gāo; #777E
+睿 > rùi; #777F
+瞀 > mòu; #7780
+瞁 > xù; #7781
+瞂 > fā; #7782
+瞃 > wēn; #7783
+瞄 > miáo; #7784
+瞅 > chŏu; #7785
+瞆 > kùi; #7786
+瞇 > mī; #7787
+瞈 > wĕng; #7788
+瞉 > kòu; #7789
+瞊 > dàng; #778A
+瞋 > chēn; #778B
+瞌 > kē; #778C
+瞍 > sŏu; #778D
+瞎 > xiā; #778E
+瞏 > qíong; #778F
+瞐 > mào; #7790
+瞑 > míng; #7791
+瞒 > mán; #7792
+瞓 > shùi; #7793
+瞔 > zé; #7794
+瞕 > zhàng; #7795
+瞖 > yì; #7796
+瞗 > diāo; #7797
+瞘 > ōu; #7798
+瞙 > mò; #7799
+瞚 > shùn; #779A
+瞛 > cōng; #779B
+瞜 > lōu; #779C
+瞝 > chī; #779D
+瞞 > mán; #779E
+瞟 > piăo; #779F
+瞠 > chēng; #77A0
+瞡 > jì; #77A1
+瞢 > méng; #77A2
+瞤 > rún; #77A4
+瞥 > piē; #77A5
+瞦 > xī; #77A6
+瞧 > qiáo; #77A7
+瞨 > pú; #77A8
+瞩 > zhŭ; #77A9
+瞪 > dèng; #77AA
+瞫 > shĕn; #77AB
+瞬 > shùn; #77AC
+瞭 > liăo; #77AD
+瞮 > chè; #77AE
+瞯 > xián; #77AF
+瞰 > kàn; #77B0
+瞱 > yè; #77B1
+瞲 > xù; #77B2
+瞳 > tóng; #77B3
+瞴 > móu; #77B4
+瞵 > lín; #77B5
+瞶 > kùi; #77B6
+瞷 > xián; #77B7
+瞸 > yè; #77B8
+瞹 > ài; #77B9
+瞺 > hùi; #77BA
+瞻 > zhān; #77BB
+瞼 > jiăn; #77BC
+瞽 > gŭ; #77BD
+瞾 > zhào; #77BE
+瞿 > qū; #77BF
+矀 > wéi; #77C0
+矁 > chŏu; #77C1
+矂 > sào; #77C2
+矃 > nĭng; #77C3
+矄 > xūn; #77C4
+矅 > yào; #77C5
+矆 > hùo; #77C6
+矇 > méng; #77C7
+矈 > mián; #77C8
+矉 > bīn; #77C9
+矊 > mián; #77CA
+矋 > lì; #77CB
+矌 > kuàng; #77CC
+矍 > jué; #77CD
+矎 > xuān; #77CE
+矏 > mián; #77CF
+矐 > hùo; #77D0
+矑 > lú; #77D1
+矒 > méng; #77D2
+矓 > lóng; #77D3
+矔 > guàn; #77D4
+矕 > măn; #77D5
+矖 > xĭ; #77D6
+矗 > chù; #77D7
+矘 > tăng; #77D8
+矙 > kàn; #77D9
+矚 > zhŭ; #77DA
+矛 > máo; #77DB
+矜 > jīn; #77DC
+矝 > lín; #77DD
+矞 > yù; #77DE
+矟 > shùo; #77DF
+矠 > cè; #77E0
+矡 > jué; #77E1
+矢 > shĭ; #77E2
+矣 > yĭ; #77E3
+矤 > shĕn; #77E4
+知 > zhī; #77E5
+矦 > hóu; #77E6
+矧 > shĕn; #77E7
+矨 > yĭng; #77E8
+矩 > jŭ; #77E9
+矪 > zhōu; #77EA
+矫 > jiăo; #77EB
+矬 > cúo; #77EC
+短 > duăn; #77ED
+矮 > ăi; #77EE
+矯 > jiăo; #77EF
+矰 > zēng; #77F0
+矱 > hùo; #77F1
+矲 > băi; #77F2
+石 > shí; #77F3
+矴 > dìng; #77F4
+矵 > qì; #77F5
+矶 > jī; #77F6
+矷 > zĭ; #77F7
+矸 > gān; #77F8
+矹 > wù; #77F9
+矺 > tūo; #77FA
+矻 > kù; #77FB
+矼 > qiāng; #77FC
+矽 > xì; #77FD
+矾 > fán; #77FE
+矿 > kuàng; #77FF
+砀 > dàng; #7800
+码 > mă; #7801
+砂 > shā; #7802
+砃 > dān; #7803
+砄 > jué; #7804
+砅 > lì; #7805
+砆 > fū; #7806
+砇 > mín; #7807
+砈 > nŭo; #7808
+砉 > hùo; #7809
+砊 > kàng; #780A
+砋 > zhĭ; #780B
+砌 > qì; #780C
+砍 > kăn; #780D
+砎 > jiè; #780E
+砏 > fēn; #780F
+砐 > è; #7810
+砑 > yà; #7811
+砒 > pī; #7812
+砓 > zhé; #7813
+研 > yán; #7814
+砕 > sùi; #7815
+砖 > zhuān; #7816
+砗 > chē; #7817
+砘 > dùn; #7818
+砙 > pān; #7819
+砚 > yàn; #781A
+砜 > fēng; #781C
+砝 > fá; #781D
+砞 > mò; #781E
+砟 > zhà; #781F
+砠 > qū; #7820
+砡 > yù; #7821
+砢 > lŭo; #7822
+砣 > túo; #7823
+砤 > túo; #7824
+砥 > dĭ; #7825
+砦 > zhài; #7826
+砧 > zhēn; #7827
+砨 > ài; #7828
+砩 > fèi; #7829
+砪 > mŭ; #782A
+砫 > zhŭ; #782B
+砬 > lì; #782C
+砭 > biān; #782D
+砮 > nŭ; #782E
+砯 > pīng; #782F
+砰 > pēng; #7830
+砱 > líng; #7831
+砲 > pào; #7832
+砳 > lè; #7833
+破 > pò; #7834
+砵 > bō; #7835
+砶 > pò; #7836
+砷 > shēn; #7837
+砸 > zá; #7838
+砹 > nŭo; #7839
+砺 > lì; #783A
+砻 > lóng; #783B
+砼 > tóng; #783C
+砾 > lì; #783E
+础 > chŭ; #7840
+硁 > kēng; #7841
+硂 > quán; #7842
+硃 > zhū; #7843
+硄 > kuāng; #7844
+硅 > hùo; #7845
+硆 > è; #7846
+硇 > náo; #7847
+硈 > jiá; #7848
+硉 > lù; #7849
+硊 > wĕi; #784A
+硋 > ài; #784B
+硌 > lùo; #784C
+硍 > kèn; #784D
+硎 > xíng; #784E
+硏 > yán; #784F
+硐 > tóng; #7850
+硑 > pēng; #7851
+硒 > xī; #7852
+硔 > hóng; #7854
+硕 > shùo; #7855
+硖 > xiá; #7856
+硗 > qiāo; #7857
+硙 > wèi; #7859
+硚 > qiáo; #785A
+硜 > kēng; #785C
+硝 > xiāo; #785D
+硞 > què; #785E
+硟 > chàn; #785F
+硠 > lăng; #7860
+硡 > hóng; #7861
+硢 > yú; #7862
+硣 > xiāo; #7863
+硤 > xiá; #7864
+硥 > măng; #7865
+硦 > lòng; #7866
+硧 > ĭong; #7867
+硨 > chē; #7868
+硩 > chè; #7869
+硪 > é; #786A
+硫 > líu; #786B
+硬 > yìng; #786C
+硭 > máng; #786D
+确 > què; #786E
+硯 > yàn; #786F
+硰 > shā; #7870
+硱 > kŭn; #7871
+硲 > yù; #7872
+硵 > lŭ; #7875
+硶 > chĕn; #7876
+硷 > jiăn; #7877
+硸 > nuè; #7878
+硹 > sōng; #7879
+硺 > zhúo; #787A
+硻 > kēng; #787B
+硼 > péng; #787C
+硽 > yăn; #787D
+硾 > zhùi; #787E
+硿 > kōng; #787F
+碀 > céng; #7880
+碁 > qí; #7881
+碂 > zòng; #7882
+碃 > qìng; #7883
+碄 > lín; #7884
+碅 > jūn; #7885
+碆 > bō; #7886
+碇 > dìng; #7887
+碈 > mín; #7888
+碉 > diāo; #7889
+碊 > jiān; #788A
+碋 > hè; #788B
+碌 > lù; #788C
+碍 > ài; #788D
+碎 > sùi; #788E
+碏 > què; #788F
+碐 > líng; #7890
+碑 > bēi; #7891
+碒 > yín; #7892
+碓 > dùi; #7893
+碔 > wŭ; #7894
+碕 > qí; #7895
+碖 > lùn; #7896
+碗 > wăn; #7897
+碘 > diăn; #7898
+碙 > gāng; #7899
+碚 > péi; #789A
+碛 > qì; #789B
+碜 > chĕn; #789C
+碝 > ruăn; #789D
+碞 > yán; #789E
+碟 > dié; #789F
+碠 > dìng; #78A0
+碡 > dú; #78A1
+碢 > túo; #78A2
+碣 > jié; #78A3
+碤 > yīng; #78A4
+碥 > biăn; #78A5
+碦 > kè; #78A6
+碧 > bì; #78A7
+碨 > wēi; #78A8
+碩 > shùo; #78A9
+碪 > zhēn; #78AA
+碫 > duàn; #78AB
+碬 > xiá; #78AC
+碭 > dàng; #78AD
+碮 > tí; #78AE
+碯 > năo; #78AF
+碰 > pèng; #78B0
+碱 > jiăn; #78B1
+碲 > dì; #78B2
+碳 > tàn; #78B3
+碴 > chá; #78B4
+碶 > qì; #78B6
+碸 > fēng; #78B8
+碹 > xuàn; #78B9
+確 > què; #78BA
+碻 > què; #78BB
+碼 > mă; #78BC
+碽 > gōng; #78BD
+碾 > niàn; #78BE
+碿 > sù; #78BF
+磀 > é; #78C0
+磁 > cí; #78C1
+磂 > lìu; #78C2
+磃 > sī; #78C3
+磄 > táng; #78C4
+磅 > bàng; #78C5
+磆 > huá; #78C6
+磇 > pī; #78C7
+磈 > wĕi; #78C8
+磉 > săng; #78C9
+磊 > lĕi; #78CA
+磋 > cūo; #78CB
+磌 > zhēn; #78CC
+磍 > xiá; #78CD
+磎 > qī; #78CE
+磏 > lián; #78CF
+磐 > pán; #78D0
+磑 > wèi; #78D1
+磒 > yŭn; #78D2
+磓 > dūi; #78D3
+磔 > zhé; #78D4
+磕 > kē; #78D5
+磖 > lā; #78D6
+磘 > qìng; #78D8
+磙 > gŭn; #78D9
+磚 > zhuān; #78DA
+磛 > chán; #78DB
+磜 > qì; #78DC
+磝 > áo; #78DD
+磞 > pēng; #78DE
+磟 > lù; #78DF
+磠 > lŭ; #78E0
+磡 > kàn; #78E1
+磢 > qiăng; #78E2
+磣 > chĕn; #78E3
+磤 > yĭn; #78E4
+磥 > lĕi; #78E5
+磦 > biāo; #78E6
+磧 > qì; #78E7
+磨 > mó; #78E8
+磩 > qī; #78E9
+磪 > cūi; #78EA
+磫 > zōng; #78EB
+磬 > qìng; #78EC
+磭 > chùo; #78ED
+磯 > jī; #78EF
+磰 > shàn; #78F0
+磱 > láo; #78F1
+磲 > qú; #78F2
+磳 > zēng; #78F3
+磴 > dèng; #78F4
+磵 > jiàn; #78F5
+磶 > xì; #78F6
+磷 > lìn; #78F7
+磸 > dìng; #78F8
+磹 > diàn; #78F9
+磺 > huáng; #78FA
+磻 > pán; #78FB
+磼 > zá; #78FC
+磽 > qiāo; #78FD
+磾 > dī; #78FE
+磿 > lì; #78FF
+礁 > jiāo; #7901
+礃 > zhăng; #7903
+礄 > qiáo; #7904
+礅 > dūn; #7905
+礆 > xiăn; #7906
+礇 > yù; #7907
+礈 > zhùi; #7908
+礉 > hé; #7909
+礊 > hùo; #790A
+礋 > zhái; #790B
+礌 > lèi; #790C
+礍 > kĕ; #790D
+礎 > chŭ; #790E
+礏 > jí; #790F
+礐 > què; #7910
+礑 > dàng; #7911
+礒 > yĭ; #7912
+礓 > jiāng; #7913
+礔 > pì; #7914
+礕 > pī; #7915
+礖 > yù; #7916
+礗 > pīn; #7917
+礘 > qì; #7918
+礙 > ài; #7919
+礚 > kài; #791A
+礛 > jiān; #791B
+礜 > yù; #791C
+礝 > ruăn; #791D
+礞 > méng; #791E
+礟 > pào; #791F
+礠 > cí; #7920
+礣 > miè; #7923
+礤 > că; #7924
+礥 > xián; #7925
+礦 > kuàng; #7926
+礧 > lèi; #7927
+礨 > lĕi; #7928
+礩 > zhì; #7929
+礪 > lì; #792A
+礫 > lì; #792B
+礬 > fán; #792C
+礭 > què; #792D
+礮 > pào; #792E
+礯 > yīng; #792F
+礰 > lì; #7930
+礱 > lóng; #7931
+礲 > lóng; #7932
+礳 > mò; #7933
+礴 > bó; #7934
+礵 > shuāng; #7935
+礶 > guàn; #7936
+礷 > lán; #7937
+礸 > zăn; #7938
+礹 > yán; #7939
+示 > shì; #793A
+礻 > shì' 'zì' 'páng; #793B
+礼 > lĭ; #793C
+礽 > réng; #793D
+社 > shè; #793E
+礿 > yuè; #793F
+祀 > sì; #7940
+祁 > qí; #7941
+祂 > tā; #7942
+祃 > mà; #7943
+祄 > xiè; #7944
+祅 > xiān; #7945
+祆 > xiān; #7946
+祇 > zhī; #7947
+祈 > qí; #7948
+祉 > zhĭ; #7949
+祊 > bēng; #794A
+祋 > dùi; #794B
+祌 > zhòng; #794C
+祎 > yī; #794E
+祏 > shí; #794F
+祐 > yòu; #7950
+祑 > zhì; #7951
+祒 > tiáo; #7952
+祓 > fú; #7953
+祔 > fù; #7954
+祕 > mì; #7955
+祖 > zŭ; #7956
+祗 > zhī; #7957
+祘 > suàn; #7958
+祙 > mèi; #7959
+祚 > zùo; #795A
+祛 > qū; #795B
+祜 > hù; #795C
+祝 > zhù; #795D
+神 > shén; #795E
+祟 > sùi; #795F
+祠 > cí; #7960
+祡 > chái; #7961
+祢 > mí; #7962
+祣 > lǚ; #7963
+祤 > yŭ; #7964
+祥 > xiáng; #7965
+祦 > wú; #7966
+祧 > tiāo; #7967
+票 > piào; #7968
+祩 > zhū; #7969
+祪 > gŭi; #796A
+祫 > xiá; #796B
+祬 > zhī; #796C
+祭 > jì; #796D
+祮 > gào; #796E
+祯 > zhēn; #796F
+祰 > gào; #7970
+祱 > shùi; #7971
+祲 > jīn; #7972
+祳 > chĕn; #7973
+祴 > gāi; #7974
+祵 > kŭn; #7975
+祶 > dì; #7976
+祷 > dăo; #7977
+祸 > hùo; #7978
+祹 > táo; #7979
+祺 > qí; #797A
+祻 > gù; #797B
+祼 > guàn; #797C
+祽 > zùi; #797D
+祾 > líng; #797E
+祿 > lù; #797F
+禀 > bĭng; #7980
+禁 > jìn; #7981
+禂 > dăo; #7982
+禃 > zhí; #7983
+禄 > lù; #7984
+禅 > shàn; #7985
+禆 > bēi; #7986
+禇 > zhĕ; #7987
+禈 > hūi; #7988
+禉 > yŏu; #7989
+禊 > xì; #798A
+禋 > yīn; #798B
+禌 > zī; #798C
+禍 > hùo; #798D
+禎 > zhēn; #798E
+福 > fú; #798F
+禐 > yuàn; #7990
+禑 > wú; #7991
+禒 > xiăn; #7992
+禓 > yáng; #7993
+禔 > tí; #7994
+禕 > yī; #7995
+禖 > méi; #7996
+禗 > sī; #7997
+禘 > dì; #7998
+禚 > zhúo; #799A
+禛 > zhēn; #799B
+禜 > yŏng; #799C
+禝 > jí; #799D
+禞 > gào; #799E
+禟 > táng; #799F
+禠 > sī; #79A0
+禡 > mà; #79A1
+禢 > tā; #79A2
+禤 > xuān; #79A4
+禥 > qí; #79A5
+禦 > yù; #79A6
+禧 > xī; #79A7
+禨 > jī; #79A8
+禩 > sì; #79A9
+禪 > chán; #79AA
+禫 > tăn; #79AB
+禬 > kuài; #79AC
+禭 > sùi; #79AD
+禮 > lĭ; #79AE
+禯 > nóng; #79AF
+禰 > nĭ; #79B0
+禱 > dăo; #79B1
+禲 > lì; #79B2
+禳 > ráng; #79B3
+禴 > yuè; #79B4
+禵 > tí; #79B5
+禶 > zăn; #79B6
+禷 > lèi; #79B7
+禸 > róu; #79B8
+禹 > yŭ; #79B9
+禺 > yú; #79BA
+离 > chī; #79BB
+禼 > xiè; #79BC
+禽 > qín; #79BD
+禾 > hé; #79BE
+禿 > tū; #79BF
+秀 > xìu; #79C0
+私 > sī; #79C1
+秂 > rén; #79C2
+秃 > tū; #79C3
+秄 > zĭ; #79C4
+秅 > chá; #79C5
+秆 > găn; #79C6
+秇 > yì; #79C7
+秈 > xiān; #79C8
+秉 > bĭng; #79C9
+秊 > nián; #79CA
+秋 > qīu; #79CB
+秌 > qīu; #79CC
+种 > chóng; #79CD
+秎 > fén; #79CE
+秏 > hào; #79CF
+秐 > yún; #79D0
+科 > kē; #79D1
+秒 > miăo; #79D2
+秓 > zhī; #79D3
+秔 > gēng; #79D4
+秕 > bĭ; #79D5
+秖 > zhī; #79D6
+秗 > yù; #79D7
+秘 > mì; #79D8
+秙 > kù; #79D9
+秚 > bàn; #79DA
+秛 > pī; #79DB
+秜 > ní; #79DC
+秝 > lì; #79DD
+秞 > yóu; #79DE
+租 > zū; #79DF
+秠 > pī; #79E0
+秡 > bá; #79E1
+秢 > líng; #79E2
+秣 > mò; #79E3
+秤 > chèng; #79E4
+秥 > nián; #79E5
+秦 > qín; #79E6
+秧 > yāng; #79E7
+秨 > zúo; #79E8
+秩 > zhì; #79E9
+秪 > zhī; #79EA
+秫 > shú; #79EB
+秬 > jù; #79EC
+秭 > zĭ; #79ED
+秮 > húo; #79EE
+积 > jī; #79EF
+称 > chēng; #79F0
+秱 > tóng; #79F1
+秲 > zhì; #79F2
+秳 > húo; #79F3
+秴 > hé; #79F4
+秵 > yīn; #79F5
+秶 > zī; #79F6
+秷 > zhí; #79F7
+秸 > jiē; #79F8
+秹 > rĕn; #79F9
+秺 > dù; #79FA
+移 > yí; #79FB
+秼 > zhū; #79FC
+秽 > hùi; #79FD
+秾 > nóng; #79FE
+秿 > fŭ; #79FF
+稀 > xī; #7A00
+稁 > kăo; #7A01
+稂 > láng; #7A02
+稃 > fū; #7A03
+稄 > zè; #7A04
+稅 > shùi; #7A05
+稆 > lǚ; #7A06
+稇 > kŭn; #7A07
+稈 > găn; #7A08
+稉 > gēng; #7A09
+稊 > tí; #7A0A
+程 > chéng; #7A0B
+稌 > tú; #7A0C
+稍 > shāo; #7A0D
+税 > shùi; #7A0E
+稏 > yà; #7A0F
+稐 > lŭn; #7A10
+稑 > lù; #7A11
+稒 > gù; #7A12
+稓 > zúo; #7A13
+稔 > rĕn; #7A14
+稕 > zhùn; #7A15
+稖 > bàng; #7A16
+稗 > bài; #7A17
+稘 > jī; #7A18
+稙 > zhí; #7A19
+稚 > zhì; #7A1A
+稛 > kŭn; #7A1B
+稜 > léng; #7A1C
+稝 > péng; #7A1D
+稞 > kē; #7A1E
+稟 > bĭng; #7A1F
+稠 > chóu; #7A20
+稡 > zú; #7A21
+稢 > yù; #7A22
+稣 > sū; #7A23
+稤 > lǜe; #7A24
+稦 > yī; #7A26
+稧 > xì; #7A27
+稨 > biān; #7A28
+稩 > jì; #7A29
+稪 > fù; #7A2A
+稫 > bī; #7A2B
+稬 > nùo; #7A2C
+稭 > jiē; #7A2D
+種 > zhŏng; #7A2E
+稯 > zōng; #7A2F
+稰 > xū; #7A30
+稱 > chēng; #7A31
+稲 > dào; #7A32
+稳 > wĕn; #7A33
+稴 > lián; #7A34
+稵 > zī; #7A35
+稶 > yù; #7A36
+稷 > jì; #7A37
+稸 > xù; #7A38
+稹 > zhĕn; #7A39
+稺 > zhì; #7A3A
+稻 > dào; #7A3B
+稼 > jià; #7A3C
+稽 > jī; #7A3D
+稾 > găo; #7A3E
+稿 > găo; #7A3F
+穀 > gŭ; #7A40
+穁 > róng; #7A41
+穂 > sùi; #7A42
+穄 > jì; #7A44
+穅 > kāng; #7A45
+穆 > mù; #7A46
+穇 > shān; #7A47
+穈 > mén; #7A48
+穉 > zhì; #7A49
+穊 > jì; #7A4A
+穋 > lù; #7A4B
+穌 > sū; #7A4C
+積 > jī; #7A4D
+穎 > yĭng; #7A4E
+穏 > wĕn; #7A4F
+穐 > qīu; #7A50
+穑 > sè; #7A51
+穓 > yì; #7A53
+穔 > huáng; #7A54
+穕 > qiè; #7A55
+穖 > jĭ; #7A56
+穗 > sùi; #7A57
+穘 > xiāo; #7A58
+穙 > pú; #7A59
+穚 > jiāo; #7A5A
+穛 > zhūo; #7A5B
+穜 > tóng; #7A5C
+穞 > lǚ; #7A5E
+穟 > sùi; #7A5F
+穠 > nóng; #7A60
+穡 > sè; #7A61
+穢 > hùi; #7A62
+穣 > ráng; #7A63
+穤 > nùo; #7A64
+穥 > yù; #7A65
+穦 > bin; #7A66
+穧 > jì; #7A67
+穨 > túi; #7A68
+穩 > wĕn; #7A69
+穪 > chēng; #7A6A
+穫 > hùo; #7A6B
+穬 > gŏng; #7A6C
+穭 > lǚ; #7A6D
+穮 > biāo; #7A6E
+穰 > ráng; #7A70
+穱 > zhūo; #7A71
+穲 > lí; #7A72
+穳 > zàn; #7A73
+穴 > xuè; #7A74
+穵 > wā; #7A75
+究 > jìu; #7A76
+穷 > qíong; #7A77
+穸 > xì; #7A78
+穹 > qīong; #7A79
+空 > kōng; #7A7A
+穻 > yū; #7A7B
+穼 > sēn; #7A7C
+穽 > jĭng; #7A7D
+穾 > yào; #7A7E
+穿 > chuān; #7A7F
+窀 > zhūn; #7A80
+突 > tú; #7A81
+窂 > láo; #7A82
+窃 > qiè; #7A83
+窄 > zhăi; #7A84
+窅 > yăo; #7A85
+窆 > biăn; #7A86
+窇 > báo; #7A87
+窈 > yăo; #7A88
+窉 > bĭng; #7A89
+窊 > wā; #7A8A
+窋 > zhú; #7A8B
+窌 > jiào; #7A8C
+窍 > qiào; #7A8D
+窎 > diào; #7A8E
+窏 > wū; #7A8F
+窐 > gūi; #7A90
+窑 > yáo; #7A91
+窒 > zhì; #7A92
+窓 > chuāng; #7A93
+窔 > yăo; #7A94
+窕 > tiăo; #7A95
+窖 > jiào; #7A96
+窗 > chuāng; #7A97
+窘 > jĭong; #7A98
+窙 > xiāo; #7A99
+窚 > chéng; #7A9A
+窛 > kòu; #7A9B
+窜 > cuàn; #7A9C
+窝 > wō; #7A9D
+窞 > dàn; #7A9E
+窟 > kū; #7A9F
+窠 > kē; #7AA0
+窡 > zhùi; #7AA1
+窢 > xù; #7AA2
+窣 > sù; #7AA3
+窤 > guan; #7AA4
+窥 > kūi; #7AA5
+窦 > dòu; #7AA6
+窨 > yìn; #7AA8
+窩 > wō; #7AA9
+窪 > wā; #7AAA
+窫 > yà; #7AAB
+窬 > yú; #7AAC
+窭 > jù; #7AAD
+窮 > qíong; #7AAE
+窯 > yáo; #7AAF
+窰 > yáo; #7AB0
+窱 > tiào; #7AB1
+窲 > cháo; #7AB2
+窳 > yŭ; #7AB3
+窴 > tián; #7AB4
+窵 > diào; #7AB5
+窶 > jù; #7AB6
+窷 > liáo; #7AB7
+窸 > xī; #7AB8
+窹 > wù; #7AB9
+窺 > kūi; #7ABA
+窻 > chuāng; #7ABB
+窼 > zhāo; #7ABC
+窾 > kuăn; #7ABE
+窿 > lóng; #7ABF
+竀 > chēng; #7AC0
+竁 > cùi; #7AC1
+竂 > piáo; #7AC2
+竃 > zào; #7AC3
+竄 > cuàn; #7AC4
+竅 > qiào; #7AC5
+竆 > qíong; #7AC6
+竇 > dòu; #7AC7
+竈 > zào; #7AC8
+竉 > lŏng; #7AC9
+竊 > qiè; #7ACA
+立 > lì; #7ACB
+竌 > chù; #7ACC
+竍 > shí' 'gōng' 'shēng; #7ACD
+竎 > fòu; #7ACE
+竏 > qiān' 'gōng' 'shēng; #7ACF
+竐 > chù; #7AD0
+竑 > hóng; #7AD1
+竒 > qí; #7AD2
+竓 > qiān' 'fēn' 'zhī' 'yī' 'gōng' 'shēng; #7AD3
+竔 > gōng' 'shēng; #7AD4
+竕 > shí' 'fēn' 'zhī' 'yī' 'gōng' 'shēng; #7AD5
+竖 > shù; #7AD6
+竗 > miào; #7AD7
+竘 > jŭ; #7AD8
+站 > zhàn; #7AD9
+竚 > zhù; #7ADA
+竛 > líng; #7ADB
+竜 > lóng; #7ADC
+竝 > bìng; #7ADD
+竞 > jìng; #7ADE
+竟 > jìng; #7ADF
+章 > zhāng; #7AE0
+竡 > yī' 'gōng' 'shēng' 'de' 'yī' 'băi' 'bèi; #7AE1
+竢 > sì; #7AE2
+竣 > jùn; #7AE3
+竤 > hóng; #7AE4
+童 > tóng; #7AE5
+竦 > sŏng; #7AE6
+竧 > jìng; #7AE7
+竨 > diào; #7AE8
+竩 > yì; #7AE9
+竪 > shù; #7AEA
+竫 > jìng; #7AEB
+竬 > qŭ; #7AEC
+竭 > jié; #7AED
+竮 > píng; #7AEE
+端 > duān; #7AEF
+竰 > sháo; #7AF0
+竱 > zhuăn; #7AF1
+竲 > céng; #7AF2
+竳 > dēng; #7AF3
+竴 > cūi; #7AF4
+竵 > huāi; #7AF5
+競 > jìng; #7AF6
+竷 > kàn; #7AF7
+竸 > jìng; #7AF8
+竹 > zhú; #7AF9
+竺 > zhú; #7AFA
+竻 > lè; #7AFB
+竼 > péng; #7AFC
+竽 > yú; #7AFD
+竾 > chí; #7AFE
+竿 > gān; #7AFF
+笀 > máng; #7B00
+笁 > zhú; #7B01
+笃 > dŭ; #7B03
+笄 > jī; #7B04
+笅 > xiáo; #7B05
+笆 > bā; #7B06
+笇 > suàn; #7B07
+笈 > jí; #7B08
+笉 > zhĕn; #7B09
+笊 > zhào; #7B0A
+笋 > sŭn; #7B0B
+笌 > yá; #7B0C
+笍 > zhùi; #7B0D
+笎 > yuán; #7B0E
+笏 > hù; #7B0F
+笐 > gāng; #7B10
+笑 > xiào; #7B11
+笒 > cén; #7B12
+笓 > pí; #7B13
+笔 > bĭ; #7B14
+笕 > jiăn; #7B15
+笖 > yĭ; #7B16
+笗 > dōng; #7B17
+笘 > shān; #7B18
+笙 > shēng; #7B19
+笚 > xiá; #7B1A
+笛 > dí; #7B1B
+笜 > zhú; #7B1C
+笝 > nà; #7B1D
+笞 > chī; #7B1E
+笟 > gū; #7B1F
+笠 > lì; #7B20
+笡 > qiè; #7B21
+笢 > mĭn; #7B22
+笣 > bāo; #7B23
+笤 > tiáo; #7B24
+笥 > sì; #7B25
+符 > fú; #7B26
+笧 > cè; #7B27
+笨 > bèn; #7B28
+笩 > pèi; #7B29
+笪 > dá; #7B2A
+笫 > zĭ; #7B2B
+第 > dì; #7B2C
+笭 > líng; #7B2D
+笮 > zé; #7B2E
+笯 > nú; #7B2F
+笰 > fú; #7B30
+笱 > gŏu; #7B31
+笲 > fān; #7B32
+笳 > jiā; #7B33
+笴 > gĕ; #7B34
+笵 > fàn; #7B35
+笶 > shĭ; #7B36
+笷 > măo; #7B37
+笸 > pŏ; #7B38
+笺 > jiān; #7B3A
+笻 > qíong; #7B3B
+笼 > lóng; #7B3C
+笾 > biān; #7B3E
+笿 > lùo; #7B3F
+筀 > gùi; #7B40
+筁 > qŭ; #7B41
+筂 > chí; #7B42
+筃 > yīn; #7B43
+筄 > yào; #7B44
+筅 > xiăn; #7B45
+筆 > bĭ; #7B46
+筇 > qíong; #7B47
+筈 > guā; #7B48
+等 > dĕng; #7B49
+筊 > jiăo; #7B4A
+筋 > jīn; #7B4B
+筌 > quán; #7B4C
+筍 > sŭn; #7B4D
+筎 > rú; #7B4E
+筏 > fá; #7B4F
+筐 > kuāng; #7B50
+筑 > zhú; #7B51
+筒 > tŏng; #7B52
+筓 > jī; #7B53
+答 > dá; #7B54
+筕 > xíng; #7B55
+策 > cè; #7B56
+筗 > zhòng; #7B57
+筘 > kòu; #7B58
+筙 > lái; #7B59
+筚 > bì; #7B5A
+筛 > shāi; #7B5B
+筜 > dāng; #7B5C
+筝 > zhēng; #7B5D
+筞 > cè; #7B5E
+筟 > fū; #7B5F
+筠 > yún; #7B60
+筡 > tú; #7B61
+筢 > pá; #7B62
+筣 > lì; #7B63
+筤 > láng; #7B64
+筥 > jŭ; #7B65
+筦 > guăn; #7B66
+筧 > jiăn; #7B67
+筨 > hán; #7B68
+筩 > tóng; #7B69
+筪 > xiá; #7B6A
+筫 > zhì; #7B6B
+筬 > chéng; #7B6C
+筭 > suàn; #7B6D
+筮 > shì; #7B6E
+筯 > zhù; #7B6F
+筰 > zúo; #7B70
+筱 > xiăo; #7B71
+筲 > shāo; #7B72
+筳 > tíng; #7B73
+筴 > cè; #7B74
+筵 > yán; #7B75
+筶 > găo; #7B76
+筷 > kuài; #7B77
+筸 > gān; #7B78
+筹 > chóu; #7B79
+筻 > gàng; #7B7B
+筼 > yún; #7B7C
+签 > qiān; #7B7E
+筿 > xiăo; #7B7F
+简 > jiăn; #7B80
+箁 > pú; #7B81
+箂 > lái; #7B82
+箃 > zōu; #7B83
+箄 > bì; #7B84
+箅 > bì; #7B85
+箆 > bì; #7B86
+箇 > gè; #7B87
+箈 > chí; #7B88
+箉 > guăi; #7B89
+箊 > yū; #7B8A
+箋 > jiān; #7B8B
+箌 > zhào; #7B8C
+箍 > gū; #7B8D
+箎 > chí; #7B8E
+箏 > zhēng; #7B8F
+箐 > jīng; #7B90
+箑 > shà; #7B91
+箒 > zhŏu; #7B92
+箓 > lù; #7B93
+箔 > bó; #7B94
+箕 > jī; #7B95
+箖 > lín; #7B96
+算 > suàn; #7B97
+箘 > jùn; #7B98
+箙 > fú; #7B99
+箚 > zhá; #7B9A
+箛 > gū; #7B9B
+箜 > kōng; #7B9C
+箝 > qián; #7B9D
+箞 > quān; #7B9E
+箟 > jùn; #7B9F
+箠 > chúi; #7BA0
+管 > guăn; #7BA1
+箢 > yuān; #7BA2
+箣 > cè; #7BA3
+箤 > jú; #7BA4
+箥 > bŏ; #7BA5
+箦 > zé; #7BA6
+箧 > qiè; #7BA7
+箨 > tùo; #7BA8
+箩 > lúo; #7BA9
+箪 > dān; #7BAA
+箫 > xiāo; #7BAB
+箬 > rùo; #7BAC
+箭 > jiàn; #7BAD
+箮 > xuan; #7BAE
+箯 > biān; #7BAF
+箰 > sŭn; #7BB0
+箱 > xiāng; #7BB1
+箲 > xiăn; #7BB2
+箳 > píng; #7BB3
+箴 > zhēn; #7BB4
+箵 > shĕng; #7BB5
+箶 > hú; #7BB6
+箷 > shī; #7BB7
+箸 > zhù; #7BB8
+箹 > yuē; #7BB9
+箺 > chŭn; #7BBA
+箻 > lǜ; #7BBB
+箼 > wū; #7BBC
+箽 > dŏng; #7BBD
+箾 > xiāo; #7BBE
+箿 > jí; #7BBF
+節 > jié; #7BC0
+篁 > huáng; #7BC1
+篂 > xīng; #7BC2
+篃 > méi; #7BC3
+範 > fàn; #7BC4
+篅 > chúi; #7BC5
+篆 > zhuàn; #7BC6
+篇 > piān; #7BC7
+篈 > fēng; #7BC8
+築 > zhú; #7BC9
+篊 > hóng; #7BCA
+篋 > qiè; #7BCB
+篌 > hóu; #7BCC
+篍 > qīu; #7BCD
+篎 > miăo; #7BCE
+篏 > qiàn; #7BCF
+篑 > kùi; #7BD1
+篓 > lŏu; #7BD3
+篔 > yún; #7BD4
+篕 > hé; #7BD5
+篖 > táng; #7BD6
+篗 > yuè; #7BD7
+篘 > chōu; #7BD8
+篙 > gāo; #7BD9
+篚 > fĕi; #7BDA
+篛 > rùo; #7BDB
+篜 > zhēng; #7BDC
+篝 > gōu; #7BDD
+篞 > niè; #7BDE
+篟 > qiàn; #7BDF
+篠 > xiăo; #7BE0
+篡 > cuàn; #7BE1
+篢 > gōng; #7BE2
+篣 > páng; #7BE3
+篤 > dŭ; #7BE4
+篥 > lì; #7BE5
+篦 > bì; #7BE6
+篧 > zhúo; #7BE7
+篨 > chú; #7BE8
+篩 > shāi; #7BE9
+篪 > chí; #7BEA
+篫 > zhú; #7BEB
+篬 > qiāng; #7BEC
+篭 > lóng; #7BED
+篮 > lán; #7BEE
+篯 > jiān; #7BEF
+篰 > bù; #7BF0
+篱 > lí; #7BF1
+篲 > hùi; #7BF2
+篳 > bì; #7BF3
+篴 > dí; #7BF4
+篵 > cōng; #7BF5
+篶 > yān; #7BF6
+篷 > péng; #7BF7
+篸 > sēn; #7BF8
+篹 > zhuàn; #7BF9
+篺 > pái; #7BFA
+篻 > piào; #7BFB
+篼 > dōu; #7BFC
+篽 > yŭ; #7BFD
+篾 > miè; #7BFE
+篿 > zhuān; #7BFF
+簀 > zé; #7C00
+簁 > xĭ; #7C01
+簂 > gúo; #7C02
+簃 > yí; #7C03
+簄 > hù; #7C04
+簅 > chăn; #7C05
+簆 > kòu; #7C06
+簇 > cù; #7C07
+簈 > píng; #7C08
+簉 > chòu; #7C09
+簊 > jī; #7C0A
+簋 > gŭi; #7C0B
+簌 > sù; #7C0C
+簍 > lŏu; #7C0D
+簎 > zhà; #7C0E
+簏 > lù; #7C0F
+簐 > niăn; #7C10
+簑 > sūo; #7C11
+簒 > cuàn; #7C12
+簔 > sūo; #7C14
+簕 > lè; #7C15
+簖 > duàn; #7C16
+簘 > xiāo; #7C18
+簙 > bó; #7C19
+簚 > mì; #7C1A
+簛 > sī; #7C1B
+簜 > dàng; #7C1C
+簝 > liáo; #7C1D
+簞 > dān; #7C1E
+簟 > diàn; #7C1F
+簠 > fŭ; #7C20
+簡 > jiăn; #7C21
+簢 > mĭn; #7C22
+簣 > kùi; #7C23
+簤 > dài; #7C24
+簥 > qiáo; #7C25
+簦 > dēng; #7C26
+簧 > huáng; #7C27
+簨 > sŭn; #7C28
+簩 > láo; #7C29
+簪 > zān; #7C2A
+簫 > xiāo; #7C2B
+簬 > dù; #7C2C
+簭 > shì; #7C2D
+簮 > zān; #7C2E
+簰 > pái; #7C30
+簲 > pái; #7C32
+簳 > gàn; #7C33
+簴 > jù; #7C34
+簵 > dù; #7C35
+簶 > lù; #7C36
+簷 > yán; #7C37
+簸 > bò; #7C38
+簹 > dāng; #7C39
+簺 > sài; #7C3A
+簻 > kē; #7C3B
+簼 > lóng; #7C3C
+簽 > qiān; #7C3D
+簾 > lián; #7C3E
+簿 > bó; #7C3F
+籀 > zhòu; #7C40
+籁 > lài; #7C41
+籃 > lán; #7C43
+籄 > kùi; #7C44
+籅 > yú; #7C45
+籆 > yuè; #7C46
+籇 > háo; #7C47
+籈 > zhēn; #7C48
+籉 > tái; #7C49
+籊 > tì; #7C4A
+籋 > mí; #7C4B
+籌 > chóu; #7C4C
+籍 > jí; #7C4D
+籐 > téng; #7C50
+籑 > zhuàn; #7C51
+籒 > zhòu; #7C52
+籓 > fān; #7C53
+籔 > sŏu; #7C54
+籕 > zhòu; #7C55
+籗 > zhúo; #7C57
+籘 > téng; #7C58
+籙 > lù; #7C59
+籚 > lú; #7C5A
+籛 > jiān; #7C5B
+籜 > tùo; #7C5C
+籝 > yíng; #7C5D
+籞 > yù; #7C5E
+籟 > lài; #7C5F
+籠 > lóng; #7C60
+籢 > lián; #7C62
+籣 > lán; #7C63
+籤 > qiān; #7C64
+籥 > yuè; #7C65
+籦 > zhōng; #7C66
+籧 > qú; #7C67
+籨 > lián; #7C68
+籩 > biān; #7C69
+籪 > duàn; #7C6A
+籫 > zuăn; #7C6B
+籬 > lí; #7C6C
+籭 > sī; #7C6D
+籮 > lúo; #7C6E
+籯 > yíng; #7C6F
+籰 > yuè; #7C70
+籱 > zhúo; #7C71
+籲 > xū; #7C72
+米 > mĭ; #7C73
+籴 > dí; #7C74
+籵 > fán; #7C75
+籶 > shēn; #7C76
+籷 > zhé; #7C77
+籸 > shēn; #7C78
+籹 > nǚ; #7C79
+籺 > xié; #7C7A
+类 > lèi; #7C7B
+籼 > xiān; #7C7C
+籽 > zĭ; #7C7D
+籾 > ní; #7C7E
+籿 > cùn; #7C7F
+粁 > qiān; #7C81
+粃 > bĭ; #7C83
+粄 > băn; #7C84
+粅 > wù; #7C85
+粆 > shā; #7C86
+粇 > kāng; #7C87
+粈 > rŏu; #7C88
+粉 > fĕn; #7C89
+粊 > bì; #7C8A
+粋 > cùi; #7C8B
+粍 > lí; #7C8D
+粎 > chĭ; #7C8E
+粑 > bā; #7C91
+粒 > lì; #7C92
+粓 > gān; #7C93
+粔 > jù; #7C94
+粕 > pò; #7C95
+粖 > mò; #7C96
+粗 > cū; #7C97
+粘 > nián; #7C98
+粙 > zhòu; #7C99
+粚 > lí; #7C9A
+粛 > sù; #7C9B
+粜 > tiào; #7C9C
+粝 > lì; #7C9D
+粞 > qī; #7C9E
+粟 > sù; #7C9F
+粠 > hóng; #7CA0
+粡 > tóng; #7CA1
+粢 > zī; #7CA2
+粣 > cè; #7CA3
+粤 > yuè; #7CA4
+粥 > zhōu; #7CA5
+粦 > lìn; #7CA6
+粧 > zhuāng; #7CA7
+粨 > băi; #7CA8
+粪 > fèn; #7CAA
+粮 > liáng; #7CAE
+粯 > xiàn; #7CAF
+粰 > fú; #7CB0
+粱 > liáng; #7CB1
+粲 > càn; #7CB2
+粳 > gēng; #7CB3
+粴 > lĭ; #7CB4
+粵 > yuè; #7CB5
+粶 > lù; #7CB6
+粷 > jú; #7CB7
+粸 > qí; #7CB8
+粹 > cùi; #7CB9
+粺 > bài; #7CBA
+粻 > zhāng; #7CBB
+粼 > lín; #7CBC
+粽 > zòng; #7CBD
+精 > jīng; #7CBE
+粿 > gŭo; #7CBF
+糁 > sān; #7CC1
+糂 > săn; #7CC2
+糃 > táng; #7CC3
+糄 > biān; #7CC4
+糅 > rŏu; #7CC5
+糆 > miàn; #7CC6
+糇 > hóu; #7CC7
+糈 > xŭ; #7CC8
+糉 > zòng; #7CC9
+糊 > hú; #7CCA
+糋 > jiàn; #7CCB
+糌 > zán; #7CCC
+糍 > cí; #7CCD
+糎 > lí; #7CCE
+糏 > xiè; #7CCF
+糐 > fū; #7CD0
+糑 > nì; #7CD1
+糒 > bèi; #7CD2
+糓 > gŭ; #7CD3
+糔 > xĭu; #7CD4
+糕 > gāo; #7CD5
+糖 > táng; #7CD6
+糗 > qĭu; #7CD7
+糙 > cāo; #7CD9
+糚 > zhuāng; #7CDA
+糛 > táng; #7CDB
+糜 > mí; #7CDC
+糝 > sān; #7CDD
+糞 > fèn; #7CDE
+糟 > zāo; #7CDF
+糠 > kāng; #7CE0
+糡 > jiàng; #7CE1
+糢 > mó; #7CE2
+糣 > săn; #7CE3
+糤 > săn; #7CE4
+糥 > nùo; #7CE5
+糦 > xī; #7CE6
+糧 > liáng; #7CE7
+糨 > jiàng; #7CE8
+糩 > kuài; #7CE9
+糪 > bó; #7CEA
+糫 > huán; #7CEB
+糭 > zòng; #7CED
+糮 > xiàn; #7CEE
+糯 > nùo; #7CEF
+糰 > tuán; #7CF0
+糱 > niè; #7CF1
+糲 > lì; #7CF2
+糳 > zùo; #7CF3
+糴 > dí; #7CF4
+糵 > niè; #7CF5
+糶 > tiào; #7CF6
+糷 > lán; #7CF7
+糸 > mì; #7CF8
+糹 > jiăo' 'sī' 'páng; #7CF9
+糺 > jīu; #7CFA
+系 > xì; #7CFB
+糼 > gōng; #7CFC
+糽 > zhĕng; #7CFD
+糾 > jīu; #7CFE
+糿 > yòu; #7CFF
+紀 > jì; #7D00
+紁 > chà; #7D01
+紂 > zhòu; #7D02
+紃 > xún; #7D03
+約 > yuē; #7D04
+紅 > hóng; #7D05
+紆 > yū; #7D06
+紇 > hé; #7D07
+紈 > wán; #7D08
+紉 > rèn; #7D09
+紊 > wèn; #7D0A
+紋 > wén; #7D0B
+紌 > qíu; #7D0C
+納 > nà; #7D0D
+紎 > zī; #7D0E
+紏 > tŏu; #7D0F
+紐 > nĭu; #7D10
+紑 > fóu; #7D11
+紒 > jiè; #7D12
+紓 > shū; #7D13
+純 > chún; #7D14
+紕 > pí; #7D15
+紖 > yĭn; #7D16
+紗 > shā; #7D17
+紘 > hóng; #7D18
+紙 > zhĭ; #7D19
+級 > jí; #7D1A
+紛 > fēn; #7D1B
+紜 > yún; #7D1C
+紝 > rén; #7D1D
+紞 > dăn; #7D1E
+紟 > jīn; #7D1F
+素 > sù; #7D20
+紡 > făng; #7D21
+索 > sŭo; #7D22
+紣 > cùi; #7D23
+紤 > jĭu; #7D24
+紥 > zhá; #7D25
+紧 > jĭn; #7D27
+紨 > fù; #7D28
+紩 > zhì; #7D29
+紪 > cĭ; #7D2A
+紫 > zĭ; #7D2B
+紬 > chóu; #7D2C
+紭 > hóng; #7D2D
+紮 > zhá; #7D2E
+累 > lèi; #7D2F
+細 > xì; #7D30
+紱 > fú; #7D31
+紲 > xiè; #7D32
+紳 > shēn; #7D33
+紴 > bèi; #7D34
+紵 > zhù; #7D35
+紶 > qŭ; #7D36
+紷 > líng; #7D37
+紸 > zhù; #7D38
+紹 > shào; #7D39
+紺 > gàn; #7D3A
+紻 > yāng; #7D3B
+紼 > fú; #7D3C
+紽 > túo; #7D3D
+紾 > zhĕn; #7D3E
+紿 > dài; #7D3F
+絀 > zhúo; #7D40
+絁 > shī; #7D41
+終 > zhōng; #7D42
+絃 > xián; #7D43
+組 > zŭ; #7D44
+絅 > jĭong; #7D45
+絆 > bàn; #7D46
+絇 > jù; #7D47
+絈 > mò; #7D48
+絉 > shù; #7D49
+絊 > zùi; #7D4A
+経 > jīng; #7D4C
+絍 > rén; #7D4D
+絎 > hèng; #7D4E
+絏 > xiè; #7D4F
+結 > jié; #7D50
+絑 > zhū; #7D51
+絒 > chóu; #7D52
+絓 > guà; #7D53
+絔 > băi; #7D54
+絕 > jué; #7D55
+絖 > kuàng; #7D56
+絗 > hú; #7D57
+絘 > cì; #7D58
+絙 > gēng; #7D59
+絚 > gēng; #7D5A
+絛 > tāo; #7D5B
+絜 > xié; #7D5C
+絝 > kù; #7D5D
+絞 > jiăo; #7D5E
+絟 > quān; #7D5F
+絠 > găi; #7D60
+絡 > lùo; #7D61
+絢 > xuàn; #7D62
+絣 > bīng; #7D63
+絤 > xiàn; #7D64
+絥 > fú; #7D65
+給 > gĕi; #7D66
+絧 > tóng; #7D67
+絨 > róng; #7D68
+絩 > tiào; #7D69
+絪 > yīn; #7D6A
+絫 > lĕi; #7D6B
+絬 > xiè; #7D6C
+絭 > quàn; #7D6D
+絮 > xù; #7D6E
+絯 > lǜn; #7D6F
+絰 > dié; #7D70
+統 > tŏng; #7D71
+絲 > sī; #7D72
+絳 > jiàng; #7D73
+絴 > xiáng; #7D74
+絵 > hùi; #7D75
+絶 > jué; #7D76
+絷 > zhí; #7D77
+絸 > jiăn; #7D78
+絹 > juàn; #7D79
+絺 > chī; #7D7A
+絻 > miăn; #7D7B
+絼 > zhĕn; #7D7C
+絽 > lǚ; #7D7D
+絾 > chéng; #7D7E
+絿 > qíu; #7D7F
+綀 > shū; #7D80
+綁 > băng; #7D81
+綂 > tŏng; #7D82
+綃 > xiāo; #7D83
+綄 > wàn; #7D84
+綅 > qīn; #7D85
+綆 > gĕng; #7D86
+綇 > xĭu; #7D87
+綈 > tí; #7D88
+綉 > xìu; #7D89
+綊 > xié; #7D8A
+綋 > hóng; #7D8B
+綌 > xì; #7D8C
+綍 > fú; #7D8D
+綎 > tīng; #7D8E
+綏 > sūi; #7D8F
+綐 > dùi; #7D90
+綑 > kŭn; #7D91
+綒 > fū; #7D92
+經 > jīng; #7D93
+綔 > hù; #7D94
+綕 > zhī; #7D95
+綖 > yán; #7D96
+綗 > jĭong; #7D97
+綘 > féng; #7D98
+継 > jì; #7D99
+綜 > zòng; #7D9C
+綝 > lín; #7D9D
+綞 > dŭo; #7D9E
+綟 > lì; #7D9F
+綠 > lǜ; #7DA0
+綡 > liáng; #7DA1
+綢 > chóu; #7DA2
+綣 > quăn; #7DA3
+綤 > shào; #7DA4
+綥 > qì; #7DA5
+綦 > qí; #7DA6
+綧 > zhŭn; #7DA7
+綨 > qí; #7DA8
+綩 > wăn; #7DA9
+綪 > qiàn; #7DAA
+綫 > xiàn; #7DAB
+綬 > shòu; #7DAC
+維 > wéi; #7DAD
+綮 > qĭ; #7DAE
+綯 > táo; #7DAF
+綰 > wăn; #7DB0
+綱 > gāng; #7DB1
+網 > wăng; #7DB2
+綳 > bēng; #7DB3
+綴 > zhùi; #7DB4
+綵 > căi; #7DB5
+綶 > gŭo; #7DB6
+綷 > cùi; #7DB7
+綸 > lún; #7DB8
+綹 > lĭu; #7DB9
+綺 > qĭ; #7DBA
+綻 > zhàn; #7DBB
+綼 > bēi; #7DBC
+綽 > chùo; #7DBD
+綾 > líng; #7DBE
+綿 > mián; #7DBF
+緀 > qī; #7DC0
+緁 > qiè; #7DC1
+緂 > tān; #7DC2
+緃 > zōng; #7DC3
+緄 > gŭn; #7DC4
+緅 > zōu; #7DC5
+緆 > yì; #7DC6
+緇 > zī; #7DC7
+緈 > xìng; #7DC8
+緉 > liăng; #7DC9
+緊 > jĭn; #7DCA
+緋 > fēi; #7DCB
+緌 > rúi; #7DCC
+緍 > mín; #7DCD
+緎 > yù; #7DCE
+総 > zŏng; #7DCF
+緐 > fán; #7DD0
+緑 > lǜ; #7DD1
+緒 > xù; #7DD2
+緓 > yingl; #7DD3
+緔 > zhàng; #7DD4
+緖 > xù; #7DD6
+緗 > xiāng; #7DD7
+緘 > jiān; #7DD8
+緙 > kè; #7DD9
+線 > xiàn; #7DDA
+緛 > ruăn; #7DDB
+緜 > mián; #7DDC
+緝 > qì; #7DDD
+緞 > duàn; #7DDE
+緟 > zhòng; #7DDF
+締 > dì; #7DE0
+緡 > mín; #7DE1
+緢 > miáo; #7DE2
+緣 > yuán; #7DE3
+緤 > xiè; #7DE4
+緥 > băo; #7DE5
+緦 > sī; #7DE6
+緧 > qīu; #7DE7
+編 > biān; #7DE8
+緩 > huăn; #7DE9
+緪 > gēng; #7DEA
+緫 > cōng; #7DEB
+緬 > miăn; #7DEC
+緭 > wèi; #7DED
+緮 > fù; #7DEE
+緯 > wĕi; #7DEF
+緰 > yú; #7DF0
+緱 > gōu; #7DF1
+緲 > miăo; #7DF2
+緳 > xié; #7DF3
+練 > liàn; #7DF4
+緵 > zōng; #7DF5
+緶 > biàn; #7DF6
+緷 > yùn; #7DF7
+緸 > yīn; #7DF8
+緹 > tí; #7DF9
+緺 > guā; #7DFA
+緻 > zhì; #7DFB
+緼 > yūn; #7DFC
+緽 > chēng; #7DFD
+緾 > chán; #7DFE
+緿 > dài; #7DFF
+縀 > xiá; #7E00
+縁 > yuán; #7E01
+縂 > zŏng; #7E02
+縃 > xū; #7E03
+縆 > gēng; #7E06
+縈 > yíng; #7E08
+縉 > jìn; #7E09
+縊 > yì; #7E0A
+縋 > zhùi; #7E0B
+縌 > nì; #7E0C
+縍 > bāng; #7E0D
+縎 > gŭ; #7E0E
+縏 > pán; #7E0F
+縐 > zhòu; #7E10
+縑 > jiān; #7E11
+縒 > cŭo; #7E12
+縓 > quăn; #7E13
+縔 > shuăng; #7E14
+縕 > yūn; #7E15
+縖 > xiá; #7E16
+縗 > shuāi; #7E17
+縘 > xī; #7E18
+縙 > róng; #7E19
+縚 > tāo; #7E1A
+縛 > fú; #7E1B
+縜 > yún; #7E1C
+縝 > zhēn; #7E1D
+縞 > găo; #7E1E
+縟 > rù; #7E1F
+縠 > hú; #7E20
+縡 > zăi; #7E21
+縢 > téng; #7E22
+縣 > xiàn; #7E23
+縤 > sù; #7E24
+縥 > zhĕn; #7E25
+縦 > zòng; #7E26
+縧 > tāo; #7E27
+縩 > cài; #7E29
+縪 > bì; #7E2A
+縫 > féng; #7E2B
+縬 > cù; #7E2C
+縭 > lí; #7E2D
+縮 > sūo; #7E2E
+縯 > yĭn; #7E2F
+縰 > xĭ; #7E30
+縱 > zòng; #7E31
+縲 > léi; #7E32
+縳 > zhuàn; #7E33
+縴 > qiān; #7E34
+縵 > màn; #7E35
+縶 > zhí; #7E36
+縷 > lǚ; #7E37
+縸 > mò; #7E38
+縹 > piăo; #7E39
+縺 > lián; #7E3A
+縻 > mí; #7E3B
+縼 > xuàn; #7E3C
+總 > zŏng; #7E3D
+績 > jī; #7E3E
+縿 > shān; #7E3F
+繀 > sùi; #7E40
+繁 > fán; #7E41
+繂 > shuài; #7E42
+繃 > bēng; #7E43
+繄 > yī; #7E44
+繅 > sāo; #7E45
+繆 > móu; #7E46
+繇 > zhòu; #7E47
+繈 > qiăng; #7E48
+繉 > hún; #7E49
+繋 > xì; #7E4B
+繍 > xìu; #7E4D
+繎 > rán; #7E4E
+繏 > xuàn; #7E4F
+繐 > hùi; #7E50
+繑 > qiāo; #7E51
+繒 > zēng; #7E52
+繓 > zŭo; #7E53
+織 > zhī; #7E54
+繕 > shàn; #7E55
+繖 > săn; #7E56
+繗 > lín; #7E57
+繘 > yù; #7E58
+繙 > fān; #7E59
+繚 > liáo; #7E5A
+繛 > chùo; #7E5B
+繜 > zūn; #7E5C
+繝 > jiàn; #7E5D
+繞 > rào; #7E5E
+繟 > chăn; #7E5F
+繠 > rŭi; #7E60
+繡 > xìu; #7E61
+繢 > hùi; #7E62
+繣 > huà; #7E63
+繤 > zuăn; #7E64
+繥 > xī; #7E65
+繦 > qiăng; #7E66
+繨 > dá; #7E68
+繩 > shéng; #7E69
+繪 > hùi; #7E6A
+繫 > xì; #7E6B
+繬 > sè; #7E6C
+繭 > jiăn; #7E6D
+繮 > jiāng; #7E6E
+繯 > huán; #7E6F
+繰 > zăo; #7E70
+繱 > cōng; #7E71
+繲 > jiè; #7E72
+繳 > jiăo; #7E73
+繴 > bò; #7E74
+繵 > chán; #7E75
+繶 > yì; #7E76
+繷 > náo; #7E77
+繸 > sùi; #7E78
+繹 > yì; #7E79
+繺 > shăi; #7E7A
+繻 > xū; #7E7B
+繼 > jì; #7E7C
+繽 > bīn; #7E7D
+繾 > qiăn; #7E7E
+繿 > lán; #7E7F
+纀 > pú; #7E80
+纁 > xūn; #7E81
+纂 > zuăn; #7E82
+纃 > qí; #7E83
+纄 > péng; #7E84
+纅 > lì; #7E85
+纆 > mò; #7E86
+纇 > lèi; #7E87
+纈 > xié; #7E88
+纉 > zuăn; #7E89
+纊 > kuàng; #7E8A
+纋 > yōu; #7E8B
+續 > xù; #7E8C
+纍 > léi; #7E8D
+纎 > xiān; #7E8E
+纏 > chán; #7E8F
+纑 > lú; #7E91
+纒 > chán; #7E92
+纓 > yīng; #7E93
+纔 > cái; #7E94
+纕 > xiāng; #7E95
+纖 > xiān; #7E96
+纗 > zūi; #7E97
+纘 > zuăn; #7E98
+纙 > lùo; #7E99
+纚 > xĭ; #7E9A
+纛 > dào; #7E9B
+纜 > làn; #7E9C
+纝 > léi; #7E9D
+纞 > liàn; #7E9E
+纟 > sī; #7E9F
+纠 > jīu; #7EA0
+纡 > yū; #7EA1
+红 > hóng; #7EA2
+纣 > zhòu; #7EA3
+纤 > xiān; #7EA4
+纥 > hé; #7EA5
+约 > yuē; #7EA6
+级 > jí; #7EA7
+纨 > wán; #7EA8
+纩 > kuàng; #7EA9
+纪 > jì; #7EAA
+纫 > rèn; #7EAB
+纬 > wĕi; #7EAC
+纭 > yún; #7EAD
+纮 > hóng; #7EAE
+纯 > chún; #7EAF
+纰 > pí; #7EB0
+纱 > shā; #7EB1
+纲 > gāng; #7EB2
+纳 > nà; #7EB3
+纴 > rén; #7EB4
+纵 > zòng; #7EB5
+纶 > lún; #7EB6
+纷 > fēn; #7EB7
+纸 > zhĭ; #7EB8
+纹 > wén; #7EB9
+纺 > făng; #7EBA
+纻 > zhù; #7EBB
+纼 > yĭn; #7EBC
+纽 > nĭu; #7EBD
+纾 > shū; #7EBE
+线 > xiàn; #7EBF
+绀 > gàn; #7EC0
+绁 > xiè; #7EC1
+绂 > fú; #7EC2
+练 > liàn; #7EC3
+组 > zŭ; #7EC4
+绅 > shēn; #7EC5
+细 > xì; #7EC6
+织 > zhī; #7EC7
+终 > zhōng; #7EC8
+绉 > zhòu; #7EC9
+绊 > bàn; #7ECA
+绋 > fú; #7ECB
+绌 > zhúo; #7ECC
+绍 > shào; #7ECD
+绎 > yì; #7ECE
+经 > jīng; #7ECF
+绐 > dài; #7ED0
+绑 > băng; #7ED1
+绒 > róng; #7ED2
+结 > jié; #7ED3
+绔 > kù; #7ED4
+绕 > rào; #7ED5
+绖 > dié; #7ED6
+绗 > hèng; #7ED7
+绘 > hùi; #7ED8
+给 > gĕi; #7ED9
+绚 > xuàn; #7EDA
+绛 > jiàng; #7EDB
+络 > lùo; #7EDC
+绝 > jué; #7EDD
+绞 > jiăo; #7EDE
+统 > tŏng; #7EDF
+绠 > gĕng; #7EE0
+绡 > xiāo; #7EE1
+绢 > juàn; #7EE2
+绣 > xìu; #7EE3
+绤 > xì; #7EE4
+绥 > sūi; #7EE5
+绦 > tāo; #7EE6
+继 > jì; #7EE7
+绨 > tí; #7EE8
+绩 > jī; #7EE9
+绪 > xù; #7EEA
+绫 > líng; #7EEB
+续 > xù; #7EED
+绮 > qĭ; #7EEE
+绯 > fēi; #7EEF
+绰 > chùo; #7EF0
+绱 > zhăng; #7EF1
+绲 > gŭn; #7EF2
+绳 > shéng; #7EF3
+维 > wéi; #7EF4
+绵 > mián; #7EF5
+绶 > shòu; #7EF6
+绷 > bēng; #7EF7
+绸 > chóu; #7EF8
+绹 > táo; #7EF9
+绺 > lĭu; #7EFA
+绻 > quăn; #7EFB
+综 > zòng; #7EFC
+绽 > zhàn; #7EFD
+绾 > wăn; #7EFE
+绿 > lǜ; #7EFF
+缀 > zhùi; #7F00
+缁 > zī; #7F01
+缂 > kè; #7F02
+缃 > xiāng; #7F03
+缄 > jiān; #7F04
+缅 > miăn; #7F05
+缆 > làn; #7F06
+缇 > tí; #7F07
+缈 > miăo; #7F08
+缉 > qì; #7F09
+缊 > yūn; #7F0A
+缋 > hùi; #7F0B
+缌 > sī; #7F0C
+缍 > dŭo; #7F0D
+缎 > duàn; #7F0E
+缏 > biàn; #7F0F
+缐 > xiàn; #7F10
+缑 > gōu; #7F11
+缒 > zhùi; #7F12
+缓 > huăn; #7F13
+缔 > dì; #7F14
+缕 > lǚ; #7F15
+编 > biān; #7F16
+缗 > mín; #7F17
+缘 > yuán; #7F18
+缙 > jìn; #7F19
+缚 > fú; #7F1A
+缛 > rù; #7F1B
+缜 > zhēn; #7F1C
+缝 > féng; #7F1D
+缞 > shuāi; #7F1E
+缟 > găo; #7F1F
+缠 > chán; #7F20
+缡 > lí; #7F21
+缢 > yì; #7F22
+缣 > jiān; #7F23
+缤 > bīn; #7F24
+缥 > piăo; #7F25
+缦 > màn; #7F26
+缧 > léi; #7F27
+缨 > yīng; #7F28
+缩 > sūo; #7F29
+缪 > móu; #7F2A
+缫 > sāo; #7F2B
+缬 > xié; #7F2C
+缭 > liáo; #7F2D
+缮 > shàn; #7F2E
+缯 > zēng; #7F2F
+缰 > jiāng; #7F30
+缱 > qiăn; #7F31
+缲 > zăo; #7F32
+缳 > huán; #7F33
+缴 > jiăo; #7F34
+缵 > zuăn; #7F35
+缶 > fŏu; #7F36
+缷 > xiè; #7F37
+缸 > gāng; #7F38
+缹 > fŏu; #7F39
+缺 > quē; #7F3A
+缻 > fŏu; #7F3B
+缽 > bō; #7F3D
+缾 > píng; #7F3E
+缿 > hòu; #7F3F
+罁 > gāng; #7F41
+罂 > yīng; #7F42
+罃 > yīng; #7F43
+罄 > qìng; #7F44
+罅 > xià; #7F45
+罆 > guàn; #7F46
+罇 > zūn; #7F47
+罈 > tán; #7F48
+罊 > qì; #7F4A
+罋 > wèng; #7F4B
+罌 > yīng; #7F4C
+罍 > léi; #7F4D
+罎 > tán; #7F4E
+罏 > lú; #7F4F
+罐 > guàn; #7F50
+网 > wăng; #7F51
+罒 > wăng; #7F52
+罓 > gāng; #7F53
+罔 > wăng; #7F54
+罕 > hăn; #7F55
+罗 > lūo; #7F57
+罘 > fú; #7F58
+罙 > mí; #7F59
+罚 > fá; #7F5A
+罛 > gū; #7F5B
+罜 > zhŭ; #7F5C
+罝 > jū; #7F5D
+罞 > máo; #7F5E
+罟 > gŭ; #7F5F
+罠 > mín; #7F60
+罡 > gāng; #7F61
+罢 > bà; #7F62
+罣 > guà; #7F63
+罤 > tí; #7F64
+罥 > juàn; #7F65
+罦 > fū; #7F66
+罧 > lín; #7F67
+罨 > yăn; #7F68
+罩 > zhào; #7F69
+罪 > zùi; #7F6A
+罫 > guà; #7F6B
+罬 > zhúo; #7F6C
+罭 > yù; #7F6D
+置 > zhì; #7F6E
+罯 > ăn; #7F6F
+罰 > fá; #7F70
+罱 > năn; #7F71
+署 > shŭ; #7F72
+罳 > sī; #7F73
+罴 > pí; #7F74
+罵 > mà; #7F75
+罶 > lĭu; #7F76
+罷 > bà; #7F77
+罸 > fá; #7F78
+罹 > lí; #7F79
+罺 > chāo; #7F7A
+罻 > wèi; #7F7B
+罼 > bì; #7F7C
+罽 > jì; #7F7D
+罾 > zēng; #7F7E
+罿 > tóng; #7F7F
+羀 > lĭu; #7F80
+羁 > jī; #7F81
+羂 > juàn; #7F82
+羃 > mì; #7F83
+羄 > zhào; #7F84
+羅 > lúo; #7F85
+羆 > pí; #7F86
+羇 > jī; #7F87
+羈 > jī; #7F88
+羉 > luán; #7F89
+羊 > yáng; #7F8A
+羋 > miē; #7F8B
+羌 > qiāng; #7F8C
+羍 > tà; #7F8D
+美 > mĕi; #7F8E
+羏 > yáng; #7F8F
+羐 > yŏu; #7F90
+羑 > yŏu; #7F91
+羒 > fén; #7F92
+羓 > bā; #7F93
+羔 > gāo; #7F94
+羕 > yàng; #7F95
+羖 > gŭ; #7F96
+羗 > qiāng; #7F97
+羘 > zāng; #7F98
+羙 > gāo; #7F99
+羚 > líng; #7F9A
+羛 > yì; #7F9B
+羜 > zhù; #7F9C
+羝 > dī; #7F9D
+羞 > xīu; #7F9E
+羟 > qiān; #7F9F
+羠 > yí; #7FA0
+羡 > xiàn; #7FA1
+羢 > róng; #7FA2
+羣 > qún; #7FA3
+群 > qún; #7FA4
+羥 > qiān; #7FA5
+羦 > huán; #7FA6
+羧 > zūi; #7FA7
+羨 > xiàn; #7FA8
+義 > yì; #7FA9
+羫 > qiāng; #7FAB
+羬 > xián; #7FAC
+羭 > yú; #7FAD
+羮 > gēng; #7FAE
+羯 > jié; #7FAF
+羰 > tāng; #7FB0
+羱 > yuán; #7FB1
+羲 > xī; #7FB2
+羳 > fán; #7FB3
+羴 > shān; #7FB4
+羵 > fĕn; #7FB5
+羶 > shān; #7FB6
+羷 > liăn; #7FB7
+羸 > léi; #7FB8
+羹 > gēng; #7FB9
+羺 > nóu; #7FBA
+羻 > qiàng; #7FBB
+羼 > chàn; #7FBC
+羽 > yŭ; #7FBD
+羾 > gòng; #7FBE
+羿 > yì; #7FBF
+翀 > chóng; #7FC0
+翁 > wēng; #7FC1
+翂 > fēn; #7FC2
+翃 > hóng; #7FC3
+翄 > chì; #7FC4
+翅 > chì; #7FC5
+翆 > cùi; #7FC6
+翇 > fú; #7FC7
+翈 > xiá; #7FC8
+翉 > pĕn; #7FC9
+翊 > yì; #7FCA
+翋 > lā; #7FCB
+翌 > yì; #7FCC
+翍 > pī; #7FCD
+翎 > líng; #7FCE
+翏 > lìu; #7FCF
+翐 > zhì; #7FD0
+翑 > qú; #7FD1
+習 > xí; #7FD2
+翓 > xié; #7FD3
+翔 > xiáng; #7FD4
+翕 > xì; #7FD5
+翖 > xì; #7FD6
+翗 > qí; #7FD7
+翘 > qiáo; #7FD8
+翙 > hùi; #7FD9
+翚 > hūi; #7FDA
+翛 > xiāo; #7FDB
+翜 > sè; #7FDC
+翝 > hóng; #7FDD
+翞 > jiāng; #7FDE
+翟 > dí; #7FDF
+翠 > cùi; #7FE0
+翡 > fĕi; #7FE1
+翢 > tāo; #7FE2
+翣 > shà; #7FE3
+翤 > chì; #7FE4
+翥 > zhù; #7FE5
+翦 > jiăn; #7FE6
+翧 > xuān; #7FE7
+翨 > shì; #7FE8
+翩 > piān; #7FE9
+翪 > zōng; #7FEA
+翫 > wàn; #7FEB
+翬 > hūi; #7FEC
+翭 > hóu; #7FED
+翮 > hé; #7FEE
+翯 > hè; #7FEF
+翰 > hàn; #7FF0
+翱 > áo; #7FF1
+翲 > piāo; #7FF2
+翳 > yì; #7FF3
+翴 > lián; #7FF4
+翵 > qú; #7FF5
+翷 > lín; #7FF7
+翸 > pĕn; #7FF8
+翹 > qiáo; #7FF9
+翺 > áo; #7FFA
+翻 > fān; #7FFB
+翼 > yì; #7FFC
+翽 > hùi; #7FFD
+翾 > xuān; #7FFE
+翿 > dào; #7FFF
+耀 > yào; #8000
+老 > lăo; #8001
+考 > kăo; #8003
+耄 > mào; #8004
+者 > zhĕ; #8005
+耆 > qí; #8006
+耇 > gŏu; #8007
+耈 > gŏu; #8008
+耉 > gŏu; #8009
+耊 > diè; #800A
+耋 > diè; #800B
+而 > ér; #800C
+耍 > shuă; #800D
+耎 > ruăn; #800E
+耏 > ér; #800F
+耐 > nài; #8010
+耑 > zhuān; #8011
+耒 > lĕi; #8012
+耓 > tīng; #8013
+耔 > zĭ; #8014
+耕 > gēng; #8015
+耖 > chào; #8016
+耗 > hào; #8017
+耘 > yún; #8018
+耙 > pá; #8019
+耚 > pī; #801A
+耛 > chí; #801B
+耜 > sì; #801C
+耝 > chú; #801D
+耞 > jiā; #801E
+耟 > jù; #801F
+耠 > hé; #8020
+耡 > chú; #8021
+耢 > lào; #8022
+耣 > lŭn; #8023
+耤 > jí; #8024
+耥 > tăng; #8025
+耦 > ŏu; #8026
+耧 > lóu; #8027
+耨 > nòu; #8028
+耩 > gōu; #8029
+耪 > păng; #802A
+耫 > zé; #802B
+耬 > lóu; #802C
+耭 > jī; #802D
+耮 > lào; #802E
+耯 > hùo; #802F
+耰 > yōu; #8030
+耱 > mò; #8031
+耲 > huái; #8032
+耳 > ĕr; #8033
+耴 > zhé; #8034
+耵 > tīng; #8035
+耶 > yé; #8036
+耷 > dā; #8037
+耸 > sŏng; #8038
+耹 > qín; #8039
+耺 > yún; #803A
+耻 > chĭ; #803B
+耼 > dān; #803C
+耽 > dān; #803D
+耾 > hóng; #803E
+耿 > gĕng; #803F
+聀 > zhí; #8040
+聂 > niè; #8042
+聃 > dān; #8043
+聄 > zhĕn; #8044
+聅 > chè; #8045
+聆 > líng; #8046
+聇 > zhēng; #8047
+聈 > yŏu; #8048
+聉 > wā; #8049
+聊 > liáo; #804A
+聋 > lóng; #804B
+职 > zhí; #804C
+聍 > níng; #804D
+聎 > tiāo; #804E
+聏 > ér; #804F
+聐 > yà; #8050
+聑 > dié; #8051
+聒 > guā; #8052
+联 > lián; #8054
+聕 > hào; #8055
+聖 > shèng; #8056
+聗 > liè; #8057
+聘 > pìn; #8058
+聙 > jīng; #8059
+聚 > jù; #805A
+聛 > bì; #805B
+聜 > dĭ; #805C
+聝 > gúo; #805D
+聞 > wén; #805E
+聟 > xù; #805F
+聠 > píng; #8060
+聡 > cōng; #8061
+聤 > tíng; #8064
+聥 > yŭ; #8065
+聦 > cōng; #8066
+聧 > kúi; #8067
+聩 > kùi; #8069
+聪 > cōng; #806A
+聫 > lián; #806B
+聬 > wĕng; #806C
+聭 > kùi; #806D
+聮 > lián; #806E
+聯 > lián; #806F
+聰 > cōng; #8070
+聱 > áo; #8071
+聲 > shēng; #8072
+聳 > sŏng; #8073
+聴 > tīng; #8074
+聵 > kùi; #8075
+聶 > niè; #8076
+職 > zhí; #8077
+聸 > dān; #8078
+聹 > níng; #8079
+聺 > qie; #807A
+聻 > jī; #807B
+聼 > tīng; #807C
+聽 > tīng; #807D
+聾 > lóng; #807E
+聿 > yù; #807F
+肀 > yù; #8080
+肁 > zhào; #8081
+肂 > sì; #8082
+肃 > sù; #8083
+肄 > yì; #8084
+肅 > sù; #8085
+肆 > sì; #8086
+肇 > zhào; #8087
+肈 > zhào; #8088
+肉 > ròu; #8089
+肊 > yì; #808A
+肋 > lè; #808B
+肌 > jī; #808C
+肍 > qíu; #808D
+肎 > kĕn; #808E
+肏 > cào; #808F
+肐 > gē; #8090
+肑 > dì; #8091
+肒 > huán; #8092
+肓 > huāng; #8093
+肔 > yĭ; #8094
+肕 > rèn; #8095
+肖 > xiào; #8096
+肗 > rŭ; #8097
+肘 > zhŏu; #8098
+肙 > yuān; #8099
+肚 > dù; #809A
+肛 > gāng; #809B
+肜 > róng; #809C
+肝 > gān; #809D
+肞 > chā; #809E
+肟 > wò; #809F
+肠 > cháng; #80A0
+股 > gŭ; #80A1
+肢 > zhī; #80A2
+肣 > hán; #80A3
+肤 > fū; #80A4
+肥 > féi; #80A5
+肦 > fén; #80A6
+肧 > pēi; #80A7
+肨 > pàng; #80A8
+肩 > jiān; #80A9
+肪 > fáng; #80AA
+肫 > zhūn; #80AB
+肬 > yóu; #80AC
+肭 > nà; #80AD
+肮 > háng; #80AE
+肯 > kĕn; #80AF
+肰 > rán; #80B0
+肱 > gōng; #80B1
+育 > yù; #80B2
+肳 > wĕn; #80B3
+肴 > yáo; #80B4
+肵 > jìn; #80B5
+肶 > pí; #80B6
+肷 > qiān; #80B7
+肸 > xì; #80B8
+肹 > xī; #80B9
+肺 > fèi; #80BA
+肻 > kĕn; #80BB
+肼 > jĭng; #80BC
+肽 > tài; #80BD
+肾 > shèn; #80BE
+肿 > zhŏng; #80BF
+胀 > zhàng; #80C0
+胁 > xié; #80C1
+胂 > shēn; #80C2
+胃 > wèi; #80C3
+胄 > zhòu; #80C4
+胅 > dié; #80C5
+胆 > dăn; #80C6
+胇 > fèi; #80C7
+胈 > bá; #80C8
+胉 > bó; #80C9
+胊 > qú; #80CA
+胋 > tián; #80CB
+背 > bèi; #80CC
+胍 > guā; #80CD
+胎 > tāi; #80CE
+胏 > zĭ; #80CF
+胐 > kū; #80D0
+胑 > zhī; #80D1
+胒 > nì; #80D2
+胓 > píng; #80D3
+胔 > zì; #80D4
+胕 > fù; #80D5
+胖 > pàng; #80D6
+胗 > zhēn; #80D7
+胘 > xián; #80D8
+胙 > zùo; #80D9
+胚 > pēi; #80DA
+胛 > jiă; #80DB
+胜 > shèng; #80DC
+胝 > zhī; #80DD
+胞 > bāo; #80DE
+胟 > mŭ; #80DF
+胠 > qū; #80E0
+胡 > hú; #80E1
+胢 > kē; #80E2
+胣 > yĭ; #80E3
+胤 > yìn; #80E4
+胥 > xū; #80E5
+胦 > yāng; #80E6
+胧 > lóng; #80E7
+胨 > dòng; #80E8
+胩 > kă; #80E9
+胪 > lú; #80EA
+胫 > jìng; #80EB
+胬 > nŭ; #80EC
+胭 > yān; #80ED
+胮 > páng; #80EE
+胯 > kuà; #80EF
+胰 > yí; #80F0
+胱 > guāng; #80F1
+胲 > gāi; #80F2
+胳 > gē; #80F3
+胴 > dòng; #80F4
+胵 > zhì; #80F5
+胶 > xiáo; #80F6
+胷 > xīong; #80F7
+胸 > xīong; #80F8
+胹 > ér; #80F9
+胺 > è; #80FA
+胻 > xíng; #80FB
+胼 > pián; #80FC
+能 > néng; #80FD
+胾 > zì; #80FE
+胿 > gui; #80FF
+脀 > chéng; #8100
+脁 > tiào; #8101
+脂 > zhī; #8102
+脃 > cùi; #8103
+脄 > méi; #8104
+脅 > xié; #8105
+脆 > cùi; #8106
+脇 > xié; #8107
+脈 > mò; #8108
+脉 > mài; #8109
+脊 > jí; #810A
+脍 > kuài; #810D
+脎 > sà; #810E
+脏 > zāng; #810F
+脐 > qí; #8110
+脑 > năo; #8111
+脒 > mĭ; #8112
+脓 > nóng; #8113
+脔 > luán; #8114
+脕 > wăn; #8115
+脖 > bó; #8116
+脗 > wĕn; #8117
+脘 > guăn; #8118
+脙 > qíu; #8119
+脚 > jiăo; #811A
+脛 > jìng; #811B
+脜 > róu; #811C
+脝 > hēng; #811D
+脞 > cŭo; #811E
+脟 > liè; #811F
+脠 > shān; #8120
+脡 > tĭng; #8121
+脢 > méi; #8122
+脣 > chún; #8123
+脤 > shèn; #8124
+脥 > xié; #8125
+脦 > de; #8126
+脧 > zūi; #8127
+脨 > cù; #8128
+脩 > xīu; #8129
+脪 > xìn; #812A
+脫 > tūo; #812B
+脬 > pāo; #812C
+脭 > chéng; #812D
+脮 > nĕi; #812E
+脯 > fŭ; #812F
+脰 > dòu; #8130
+脱 > tūo; #8131
+脲 > niào; #8132
+脴 > pĭ; #8134
+脵 > gŭ; #8135
+脶 > guā; #8136
+脷 > lì; #8137
+脸 > liăn; #8138
+脹 > zhàng; #8139
+脺 > cùi; #813A
+脻 > jié; #813B
+脼 > liăng; #813C
+脽 > zhōu; #813D
+脾 > pí; #813E
+脿 > biāo; #813F
+腀 > lún; #8140
+腁 > pián; #8141
+腂 > gùo; #8142
+腃 > kùi; #8143
+腄 > chúi; #8144
+腅 > dàn; #8145
+腆 > tiăn; #8146
+腇 > nĕi; #8147
+腈 > jīng; #8148
+腉 > jiē; #8149
+腊 > là; #814A
+腋 > yì; #814B
+腌 > ān; #814C
+腍 > rĕn; #814D
+腎 > shèn; #814E
+腏 > chùo; #814F
+腐 > fŭ; #8150
+腑 > fŭ; #8151
+腒 > jū; #8152
+腓 > féi; #8153
+腔 > qiāng; #8154
+腕 > wàn; #8155
+腖 > dòng; #8156
+腗 > pí; #8157
+腘 > gúo; #8158
+腙 > zōng; #8159
+腚 > dìng; #815A
+腛 > wū; #815B
+腜 > méi; #815C
+腝 > ruăn; #815D
+腞 > zhuàn; #815E
+腟 > zhì; #815F
+腠 > còu; #8160
+腡 > guā; #8161
+腢 > ŏu; #8162
+腣 > dì; #8163
+腤 > ān; #8164
+腥 > xīng; #8165
+腦 > năo; #8166
+腧 > yú; #8167
+腨 > chuăn; #8168
+腩 > năn; #8169
+腪 > yùn; #816A
+腫 > zhŏng; #816B
+腬 > róu; #816C
+腭 > è; #816D
+腮 > sāi; #816E
+腯 > tú; #816F
+腰 > yāo; #8170
+腱 > jiàn; #8171
+腲 > wĕi; #8172
+腳 > jiăo; #8173
+腴 > yú; #8174
+腵 > jiā; #8175
+腶 > duàn; #8176
+腷 > bì; #8177
+腸 > cháng; #8178
+腹 > fù; #8179
+腺 > xiàn; #817A
+腻 > nì; #817B
+腼 > miăn; #817C
+腽 > wà; #817D
+腾 > téng; #817E
+腿 > tŭi; #817F
+膀 > băng; #8180
+膁 > qiān; #8181
+膂 > lǚ; #8182
+膃 > wà; #8183
+膄 > sòu; #8184
+膅 > táng; #8185
+膆 > sù; #8186
+膇 > zhùi; #8187
+膈 > gé; #8188
+膉 > yì; #8189
+膊 > bó; #818A
+膋 > liáo; #818B
+膌 > jí; #818C
+膍 > pí; #818D
+膎 > xié; #818E
+膏 > gāo; #818F
+膐 > lǚ; #8190
+膑 > bìn; #8191
+膒 > ou; #8192
+膓 > cháng; #8193
+膔 > lù; #8194
+膕 > gúo; #8195
+膖 > pāng; #8196
+膗 > chuái; #8197
+膘 > piăo; #8198
+膙 > jiăng; #8199
+膚 > fū; #819A
+膛 > táng; #819B
+膜 > mò; #819C
+膝 > xī; #819D
+膞 > zhuān; #819E
+膟 > lǜ; #819F
+膠 > jiāo; #81A0
+膡 > yìng; #81A1
+膢 > lǘ; #81A2
+膣 > zhì; #81A3
+膥 > chūn; #81A5
+膦 > liăn; #81A6
+膧 > tóng; #81A7
+膨 > péng; #81A8
+膩 > nì; #81A9
+膪 > zhà; #81AA
+膫 > liáo; #81AB
+膬 > cùi; #81AC
+膭 > gūi; #81AD
+膮 > xiāo; #81AE
+膯 > tēng; #81AF
+膰 > fán; #81B0
+膱 > zhí; #81B1
+膲 > jiāo; #81B2
+膳 > shàn; #81B3
+膴 > wú; #81B4
+膵 > cùi; #81B5
+膶 > rùn; #81B6
+膷 > xiāng; #81B7
+膸 > sŭi; #81B8
+膹 > fèn; #81B9
+膺 > yīng; #81BA
+膻 > tăn; #81BB
+膼 > zhuā; #81BC
+膽 > dăn; #81BD
+膾 > kuài; #81BE
+膿 > nóng; #81BF
+臀 > tún; #81C0
+臁 > lián; #81C1
+臂 > bì; #81C2
+臃 > yŏng; #81C3
+臄 > jué; #81C4
+臅 > chù; #81C5
+臆 > yì; #81C6
+臇 > juăn; #81C7
+臈 > là; #81C8
+臉 > liăn; #81C9
+臊 > sāo; #81CA
+臋 > tún; #81CB
+臌 > gŭ; #81CC
+臍 > qí; #81CD
+臎 > cùi; #81CE
+臏 > bìn; #81CF
+臐 > xūn; #81D0
+臑 > rú; #81D1
+臒 > hùo; #81D2
+臓 > zàng; #81D3
+臔 > xiàn; #81D4
+臕 > biāo; #81D5
+臖 > xìng; #81D6
+臗 > kuān; #81D7
+臘 > là; #81D8
+臙 > yān; #81D9
+臚 > lú; #81DA
+臛 > hùo; #81DB
+臜 > zāng; #81DC
+臝 > lŭo; #81DD
+臞 > qú; #81DE
+臟 > zàng; #81DF
+臠 > luán; #81E0
+臡 > ní; #81E1
+臢 > zāng; #81E2
+臣 > chén; #81E3
+臤 > qiān; #81E4
+臥 > wò; #81E5
+臦 > guàng; #81E6
+臧 > záng; #81E7
+臨 > lín; #81E8
+臩 > guàng; #81E9
+自 > zì; #81EA
+臫 > jiăo; #81EB
+臬 > niè; #81EC
+臭 > chòu; #81ED
+臮 > jì; #81EE
+臯 > gāo; #81EF
+臰 > chòu; #81F0
+臱 > mián; #81F1
+臲 > niè; #81F2
+至 > zhì; #81F3
+致 > zhì; #81F4
+臵 > gé; #81F5
+臶 > jiàn; #81F6
+臷 > dié; #81F7
+臸 > zhì; #81F8
+臹 > xīu; #81F9
+臺 > tái; #81FA
+臻 > zhēn; #81FB
+臼 > jìu; #81FC
+臽 > xiàn; #81FD
+臾 > yú; #81FE
+臿 > chā; #81FF
+舀 > yăo; #8200
+舁 > yú; #8201
+舂 > chōng; #8202
+舃 > xì; #8203
+舄 > xì; #8204
+舅 > jìu; #8205
+舆 > yú; #8206
+與 > yŭ; #8207
+興 > xīng; #8208
+舉 > jŭ; #8209
+舊 > jìu; #820A
+舋 > xìn; #820B
+舌 > shé; #820C
+舍 > shè; #820D
+舏 > jĭu; #820F
+舐 > shì; #8210
+舑 > tān; #8211
+舒 > shū; #8212
+舓 > shì; #8213
+舔 > tiăn; #8214
+舕 > dàn; #8215
+舖 > pù; #8216
+舗 > pù; #8217
+舘 > guăn; #8218
+舙 > huà; #8219
+舚 > tān; #821A
+舛 > chuăn; #821B
+舜 > shùn; #821C
+舝 > xiá; #821D
+舞 > wŭ; #821E
+舟 > zhōu; #821F
+舠 > dāo; #8220
+舡 > gāng; #8221
+舢 > shān; #8222
+舣 > yĭ; #8223
+舥 > pā; #8225
+舦 > tài; #8226
+舧 > fán; #8227
+舨 > băn; #8228
+舩 > chuán; #8229
+航 > háng; #822A
+舫 > făng; #822B
+般 > bān; #822C
+舭 > què; #822D
+舯 > zhōng; #822F
+舰 > jiàn; #8230
+舱 > cāng; #8231
+舲 > líng; #8232
+舳 > zhú; #8233
+舴 > zé; #8234
+舵 > dùo; #8235
+舶 > bó; #8236
+舷 > xián; #8237
+舸 > gĕ; #8238
+船 > chuán; #8239
+舺 > jiá; #823A
+舻 > lŭ; #823B
+舼 > hóng; #823C
+舽 > páng; #823D
+舾 > xī; #823E
+艀 > fú; #8240
+艁 > zào; #8241
+艂 > féng; #8242
+艃 > lí; #8243
+艄 > shāo; #8244
+艅 > yú; #8245
+艆 > láng; #8246
+艇 > tĭng; #8247
+艉 > wĕi; #8249
+艊 > bó; #824A
+艋 > mĕng; #824B
+艌 > niàn; #824C
+艍 > jū; #824D
+艎 > huáng; #824E
+艏 > shŏu; #824F
+艐 > zōng; #8250
+艑 > biàn; #8251
+艒 > mào; #8252
+艓 > dié; #8253
+艕 > bàng; #8255
+艖 > chā; #8256
+艗 > yì; #8257
+艘 > sāo; #8258
+艙 > cāng; #8259
+艚 > cáo; #825A
+艛 > lóu; #825B
+艜 > dài; #825C
+艞 > yào; #825E
+艟 > tóng; #825F
+艡 > dāng; #8261
+艢 > tán; #8262
+艣 > lŭ; #8263
+艤 > yĭ; #8264
+艥 > jiè; #8265
+艦 > jiàn; #8266
+艧 > hùo; #8267
+艨 > méng; #8268
+艩 > qí; #8269
+艪 > lŭ; #826A
+艫 > lú; #826B
+艬 > chán; #826C
+艭 > shuāng; #826D
+艮 > gèn; #826E
+良 > liáng; #826F
+艰 > jiān; #8270
+艱 > jiān; #8271
+色 > sè; #8272
+艳 > yàn; #8273
+艴 > fú; #8274
+艵 > píng; #8275
+艶 > yàn; #8276
+艷 > yàn; #8277
+艸 > căo; #8278
+艹 > căo' 'zì' 'tóu; #8279
+艺 > yì; #827A
+艻 > lè; #827B
+艼 > tīng; #827C
+艽 > qíu; #827D
+艾 > ài; #827E
+艿 > năi; #827F
+芀 > tiáo; #8280
+芁 > jiāo; #8281
+节 > jié; #8282
+芃 > péng; #8283
+芄 > wán; #8284
+芅 > yì; #8285
+芆 > chāi; #8286
+芇 > mián; #8287
+芈 > miē; #8288
+芉 > gān; #8289
+芊 > qiān; #828A
+芋 > yù; #828B
+芌 > yù; #828C
+芍 > shùo; #828D
+芎 > qīong; #828E
+芏 > tŭ; #828F
+芐 > xià; #8290
+芑 > qĭ; #8291
+芒 > máng; #8292
+芓 > zĭ; #8293
+芔 > hŭi; #8294
+芕 > sūi; #8295
+芖 > zhì; #8296
+芗 > xiāng; #8297
+芘 > bī; #8298
+芙 > fú; #8299
+芚 > tún; #829A
+芛 > wĕi; #829B
+芜 > wú; #829C
+芝 > zhī; #829D
+芞 > qĭ; #829E
+芟 > shān; #829F
+芠 > wén; #82A0
+芡 > qiàn; #82A1
+芢 > rén; #82A2
+芣 > fŏu; #82A3
+芤 > kōu; #82A4
+芥 > jiè; #82A5
+芦 > lú; #82A6
+芧 > xù; #82A7
+芨 > jí; #82A8
+芩 > qín; #82A9
+芪 > qí; #82AA
+芫 > yuán; #82AB
+芬 > fēn; #82AC
+芭 > bā; #82AD
+芮 > rùi; #82AE
+芯 > xīn; #82AF
+芰 > jì; #82B0
+花 > huā; #82B1
+芲 > huā; #82B2
+芳 > fāng; #82B3
+芴 > wù; #82B4
+芵 > jué; #82B5
+芶 > gōu; #82B6
+芷 > zhĭ; #82B7
+芸 > yún; #82B8
+芹 > qín; #82B9
+芺 > ăo; #82BA
+芻 > chú; #82BB
+芼 > mào; #82BC
+芽 > yá; #82BD
+芾 > fèi; #82BE
+芿 > rèng; #82BF
+苀 > háng; #82C0
+苁 > cōng; #82C1
+苂 > yín; #82C2
+苃 > yŏu; #82C3
+苄 > biàn; #82C4
+苅 > yì; #82C5
+苇 > wĕi; #82C7
+苈 > lì; #82C8
+苉 > pĭ; #82C9
+苊 > è; #82CA
+苋 > xiàn; #82CB
+苌 > cháng; #82CC
+苍 > cāng; #82CD
+苎 > méng; #82CE
+苏 > sū; #82CF
+苐 > yí; #82D0
+苑 > yuàn; #82D1
+苒 > răn; #82D2
+苓 > líng; #82D3
+苔 > tái; #82D4
+苕 > tiáo; #82D5
+苖 > dĭ; #82D6
+苗 > miáo; #82D7
+苘 > qĭong; #82D8
+苙 > lì; #82D9
+苚 > yòng; #82DA
+苛 > kē; #82DB
+苜 > mù; #82DC
+苝 > pèi; #82DD
+苞 > bāo; #82DE
+苟 > gŏu; #82DF
+苠 > mín; #82E0
+苡 > yĭ; #82E1
+苢 > yĭ; #82E2
+苣 > jù; #82E3
+苤 > pĭ; #82E4
+若 > rùo; #82E5
+苦 > kŭ; #82E6
+苧 > zhù; #82E7
+苨 > nĭ; #82E8
+苩 > bó; #82E9
+苪 > bĭng; #82EA
+苫 > shān; #82EB
+苬 > qíu; #82EC
+苭 > yăo; #82ED
+苮 > xiān; #82EE
+苯 > bĕn; #82EF
+苰 > hóng; #82F0
+英 > yīng; #82F1
+苲 > zhă; #82F2
+苳 > dōng; #82F3
+苴 > jū; #82F4
+苵 > dié; #82F5
+苶 > nié; #82F6
+苷 > gān; #82F7
+苸 > hū; #82F8
+苹 > píng; #82F9
+苺 > méi; #82FA
+苻 > fú; #82FB
+苼 > shēng; #82FC
+苽 > gū; #82FD
+苾 > bì; #82FE
+苿 > wèi; #82FF
+茀 > fú; #8300
+茁 > zhúo; #8301
+茂 > mào; #8302
+范 > fàn; #8303
+茄 > qié; #8304
+茅 > máo; #8305
+茆 > măo; #8306
+茇 > bá; #8307
+茈 > zĭ; #8308
+茉 > mò; #8309
+茊 > zī; #830A
+茋 > dĭ; #830B
+茌 > chí; #830C
+茍 > jì; #830D
+茎 > jīng; #830E
+茏 > lóng; #830F
+茑 > niăo; #8311
+茓 > xué; #8313
+茔 > yíng; #8314
+茕 > qíong; #8315
+茖 > gé; #8316
+茗 > mĭng; #8317
+茘 > lì; #8318
+茙 > róng; #8319
+茚 > yìn; #831A
+茛 > gèn; #831B
+茜 > qiàn; #831C
+茝 > chăi; #831D
+茞 > chén; #831E
+茟 > yù; #831F
+茠 > xīu; #8320
+茡 > zì; #8321
+茢 > liè; #8322
+茣 > wú; #8323
+茤 > jì; #8324
+茥 > kūi; #8325
+茦 > cè; #8326
+茧 > chóng; #8327
+茨 > cí; #8328
+茩 > gŏu; #8329
+茪 > guāng; #832A
+茫 > máng; #832B
+茬 > chí; #832C
+茭 > jiāo; #832D
+茮 > jiāo; #832E
+茯 > fú; #832F
+茰 > yú; #8330
+茱 > zhū; #8331
+茲 > zī; #8332
+茳 > jiāng; #8333
+茴 > húi; #8334
+茵 > yīn; #8335
+茶 > chá; #8336
+茷 > fá; #8337
+茸 > róng; #8338
+茹 > rú; #8339
+茺 > chōng; #833A
+茻 > măng; #833B
+茼 > tóng; #833C
+茽 > zhòng; #833D
+茿 > zhú; #833F
+荀 > xún; #8340
+荁 > huán; #8341
+荂 > kuā; #8342
+荃 > quán; #8343
+荄 > gāi; #8344
+荅 > dā; #8345
+荆 > jīng; #8346
+荇 > xìng; #8347
+荈 > quàn; #8348
+草 > căo; #8349
+荊 > jīng; #834A
+荋 > ér; #834B
+荌 > àn; #834C
+荍 > shōu; #834D
+荎 > chí; #834E
+荏 > rĕn; #834F
+荐 > jiàn; #8350
+荑 > tí; #8351
+荒 > huāng; #8352
+荓 > píng; #8353
+荔 > lì; #8354
+荕 > jīn; #8355
+荖 > lăo; #8356
+荗 > shù; #8357
+荘 > zhuāng; #8358
+荙 > dá; #8359
+荚 > jiá; #835A
+荛 > ráo; #835B
+荜 > bì; #835C
+荝 > zé; #835D
+荞 > qiáo; #835E
+荟 > hùi; #835F
+荠 > qí; #8360
+荡 > dàng; #8361
+荣 > róng; #8363
+荤 > hūn; #8364
+荥 > yíng; #8365
+荦 > lùo; #8366
+荧 > yíng; #8367
+荨 > xún; #8368
+荩 > jìn; #8369
+荪 > sūn; #836A
+荫 > yìn; #836B
+荬 > măi; #836C
+荭 > hóng; #836D
+荮 > zhòu; #836E
+药 > yào; #836F
+荰 > dù; #8370
+荱 > wĕi; #8371
+荲 > chù; #8372
+荳 > dòu; #8373
+荴 > fū; #8374
+荵 > rĕn; #8375
+荶 > yín; #8376
+荷 > hé; #8377
+荸 > bí; #8378
+荹 > bù; #8379
+荺 > yún; #837A
+荻 > dí; #837B
+荼 > tú; #837C
+荽 > sūi; #837D
+荾 > sūi; #837E
+荿 > chéng; #837F
+莀 > chén; #8380
+莁 > wú; #8381
+莂 > bié; #8382
+莃 > xī; #8383
+莄 > gĕng; #8384
+莅 > lì; #8385
+莆 > fŭ; #8386
+莇 > zhù; #8387
+莈 > mò; #8388
+莉 > lì; #8389
+莊 > zhuāng; #838A
+莋 > jí; #838B
+莌 > dúo; #838C
+莍 > qíu; #838D
+莎 > shā; #838E
+莏 > sūo; #838F
+莐 > chén; #8390
+莑 > fēng; #8391
+莒 > jŭ; #8392
+莓 > méi; #8393
+莔 > méng; #8394
+莕 > xìng; #8395
+莖 > jīng; #8396
+莗 > chē; #8397
+莘 > xīn; #8398
+莙 > jūn; #8399
+莚 > yán; #839A
+莛 > tíng; #839B
+莜 > diào; #839C
+莝 > cùo; #839D
+莞 > wăn; #839E
+莟 > hàn; #839F
+莠 > yŏu; #83A0
+莡 > cùo; #83A1
+莢 > jiá; #83A2
+莣 > wáng; #83A3
+莤 > yóu; #83A4
+莥 > nĭu; #83A5
+莦 > shāo; #83A6
+莧 > xiàn; #83A7
+莨 > láng; #83A8
+莩 > fú; #83A9
+莪 > é; #83AA
+莫 > mò; #83AB
+莬 > wèn; #83AC
+莭 > jié; #83AD
+莮 > nán; #83AE
+莯 > mù; #83AF
+莰 > kăn; #83B0
+莱 > lái; #83B1
+莲 > lián; #83B2
+莳 > shí; #83B3
+莴 > wō; #83B4
+莶 > liăn; #83B6
+获 > hùo; #83B7
+莸 > yóu; #83B8
+莹 > yíng; #83B9
+莺 > yīng; #83BA
+莼 > chún; #83BC
+莽 > măng; #83BD
+莾 > măng; #83BE
+莿 > cì; #83BF
+菀 > wăn; #83C0
+菁 > jīng; #83C1
+菂 > dī; #83C2
+菃 > qú; #83C3
+菄 > dōng; #83C4
+菅 > jiān; #83C5
+菆 > zōu; #83C6
+菇 > gū; #83C7
+菈 > lā; #83C8
+菉 > lù; #83C9
+菊 > jú; #83CA
+菋 > wèi; #83CB
+菌 > jùn; #83CC
+菍 > niè; #83CD
+菎 > kūn; #83CE
+菏 > hé; #83CF
+菐 > pú; #83D0
+菑 > zī; #83D1
+菒 > găo; #83D2
+菓 > gŭo; #83D3
+菔 > fú; #83D4
+菕 > lún; #83D5
+菖 > chāng; #83D6
+菗 > chóu; #83D7
+菘 > sōng; #83D8
+菙 > chúi; #83D9
+菚 > zhàn; #83DA
+菛 > mén; #83DB
+菜 > cài; #83DC
+菝 > bá; #83DD
+菞 > lí; #83DE
+菟 > tù; #83DF
+菠 > bō; #83E0
+菡 > hàn; #83E1
+菢 > bào; #83E2
+菣 > qìn; #83E3
+菤 > juăn; #83E4
+菥 > xī; #83E5
+菦 > qín; #83E6
+菧 > dĭ; #83E7
+菨 > jiē; #83E8
+菩 > pú; #83E9
+菪 > dàng; #83EA
+菫 > jĭn; #83EB
+菬 > zhăo; #83EC
+菭 > tái; #83ED
+菮 > gēng; #83EE
+華 > huá; #83EF
+菰 > gū; #83F0
+菱 > líng; #83F1
+菲 > fēi; #83F2
+菳 > jīn; #83F3
+菴 > ān; #83F4
+菵 > wăng; #83F5
+菶 > bĕng; #83F6
+菷 > zhŏu; #83F7
+菸 > yān; #83F8
+菹 > jū; #83F9
+菺 > jiān; #83FA
+菻 > lĭn; #83FB
+菼 > tăn; #83FC
+菽 > shú; #83FD
+菾 > tián; #83FE
+菿 > dào; #83FF
+萀 > hŭ; #8400
+萁 > qí; #8401
+萂 > hé; #8402
+萃 > cùi; #8403
+萄 > táo; #8404
+萅 > chūn; #8405
+萆 > bēi; #8406
+萇 > cháng; #8407
+萈 > huán; #8408
+萉 > féi; #8409
+萊 > lái; #840A
+萋 > qī; #840B
+萌 > méng; #840C
+萍 > píng; #840D
+萎 > wēi; #840E
+萏 > dàn; #840F
+萐 > shà; #8410
+萑 > huán; #8411
+萒 > yăn; #8412
+萓 > yí; #8413
+萔 > tiáo; #8414
+萕 > qí; #8415
+萖 > wăn; #8416
+萗 > cè; #8417
+萘 > nài; #8418
+萚 > tùo; #841A
+萛 > jīu; #841B
+萜 > tiē; #841C
+萝 > lúo; #841D
+萠 > méng; #8420
+萤 > yíng; #8424
+营 > yíng; #8425
+萦 > yíng; #8426
+萧 > xiāo; #8427
+萨 > sà; #8428
+萩 > qīu; #8429
+萪 > kē; #842A
+萫 > xiàng; #842B
+萬 > wàn; #842C
+萭 > yŭ; #842D
+萮 > yù; #842E
+萯 > fù; #842F
+萰 > liàn; #8430
+萱 > xuān; #8431
+萲 > yuán; #8432
+萳 > nán; #8433
+萴 > zé; #8434
+萵 > wō; #8435
+萶 > chŭn; #8436
+萷 > xiāo; #8437
+萸 > yú; #8438
+萹 > piān; #8439
+萺 > mào; #843A
+萻 > ān; #843B
+萼 > è; #843C
+落 > lùo; #843D
+萾 > yíng; #843E
+萿 > húo; #843F
+葀 > guā; #8440
+葁 > jiāng; #8441
+葂 > miăn; #8442
+葃 > zúo; #8443
+葄 > zùo; #8444
+葅 > jū; #8445
+葆 > băo; #8446
+葇 > róu; #8447
+葈 > xĭ; #8448
+葉 > xié; #8449
+葊 > ān; #844A
+葋 > qú; #844B
+葌 > jiān; #844C
+葍 > fú; #844D
+葎 > lǜ; #844E
+葏 > jīng; #844F
+葐 > pén; #8450
+葑 > fēng; #8451
+葒 > hóng; #8452
+葓 > hóng; #8453
+葔 > hóu; #8454
+葕 > yán; #8455
+葖 > tú; #8456
+著 > zhù; #8457
+葘 > zī; #8458
+葙 > xiāng; #8459
+葚 > shèn; #845A
+葛 > gĕ; #845B
+葜 > jié; #845C
+葝 > jìng; #845D
+葞 > mĭ; #845E
+葟 > huáng; #845F
+葠 > shēn; #8460
+葡 > pú; #8461
+葢 > gài; #8462
+董 > dŏng; #8463
+葤 > zhòu; #8464
+葥 > qián; #8465
+葦 > wĕi; #8466
+葧 > bó; #8467
+葨 > wēi; #8468
+葩 > pā; #8469
+葪 > jì; #846A
+葫 > hú; #846B
+葬 > zàng; #846C
+葭 > jiā; #846D
+葮 > duàn; #846E
+葯 > yào; #846F
+葰 > jùn; #8470
+葱 > cōng; #8471
+葲 > quán; #8472
+葳 > wēi; #8473
+葴 > xián; #8474
+葵 > kúi; #8475
+葶 > tíng; #8476
+葷 > hūn; #8477
+葸 > xĭ; #8478
+葹 > shī; #8479
+葺 > qì; #847A
+葻 > lán; #847B
+葼 > zōng; #847C
+葽 > yāo; #847D
+葾 > yuān; #847E
+葿 > méi; #847F
+蒀 > yūn; #8480
+蒁 > shù; #8481
+蒂 > dì; #8482
+蒃 > zhuàn; #8483
+蒄 > guān; #8484
+蒆 > xuē; #8486
+蒇 > chăn; #8487
+蒈 > kăi; #8488
+蒉 > kùi; #8489
+蒋 > jiăng; #848B
+蒌 > lóu; #848C
+蒍 > wéi; #848D
+蒎 > pài; #848E
+蒐 > sōu; #8490
+蒑 > yīn; #8491
+蒒 > shī; #8492
+蒓 > chún; #8493
+蒔 > shí; #8494
+蒕 > yūn; #8495
+蒖 > zhēn; #8496
+蒗 > làng; #8497
+蒘 > nú; #8498
+蒙 > méng; #8499
+蒚 > hé; #849A
+蒛 > quē; #849B
+蒜 > suàn; #849C
+蒝 > yuán; #849D
+蒞 > lì; #849E
+蒟 > jŭ; #849F
+蒠 > xí; #84A0
+蒡 > páng; #84A1
+蒢 > chú; #84A2
+蒣 > xú; #84A3
+蒤 > tú; #84A4
+蒥 > líu; #84A5
+蒦 > wò; #84A6
+蒧 > zhēn; #84A7
+蒨 > qiàn; #84A8
+蒩 > zū; #84A9
+蒪 > pò; #84AA
+蒫 > cūo; #84AB
+蒬 > yuān; #84AC
+蒭 > chú; #84AD
+蒮 > yù; #84AE
+蒯 > kuăi; #84AF
+蒰 > pán; #84B0
+蒱 > pú; #84B1
+蒲 > pú; #84B2
+蒳 > nà; #84B3
+蒴 > shùo; #84B4
+蒵 > xī; #84B5
+蒶 > fén; #84B6
+蒷 > yún; #84B7
+蒸 > zhēng; #84B8
+蒹 > jiān; #84B9
+蒺 > jí; #84BA
+蒻 > rùo; #84BB
+蒼 > cāng; #84BC
+蒽 > ēn; #84BD
+蒾 > mí; #84BE
+蒿 > hāo; #84BF
+蓀 > sūn; #84C0
+蓁 > zhēn; #84C1
+蓂 > míng; #84C2
+蓃 > sou; #84C3
+蓄 > xù; #84C4
+蓅 > líu; #84C5
+蓆 > xí; #84C6
+蓇 > gŭ; #84C7
+蓈 > láng; #84C8
+蓉 > róng; #84C9
+蓊 > wĕng; #84CA
+蓋 > gài; #84CB
+蓌 > cùo; #84CC
+蓍 > shī; #84CD
+蓎 > táng; #84CE
+蓏 > lŭo; #84CF
+蓐 > rù; #84D0
+蓑 > sūo; #84D1
+蓒 > xiān; #84D2
+蓓 > bèi; #84D3
+蓔 > yăo; #84D4
+蓕 > gùi; #84D5
+蓖 > bī; #84D6
+蓗 > zŏng; #84D7
+蓘 > gŭn; #84D8
+蓚 > xīu; #84DA
+蓛 > cè; #84DB
+蓝 > lán; #84DD
+蓟 > jì; #84DF
+蓠 > lí; #84E0
+蓡 > cān; #84E1
+蓢 > láng; #84E2
+蓣 > yù; #84E3
+蓥 > yìng; #84E5
+蓦 > mò; #84E6
+蓧 > diào; #84E7
+蓨 > tiāo; #84E8
+蓩 > mào; #84E9
+蓪 > tōng; #84EA
+蓫 > zhú; #84EB
+蓬 > péng; #84EC
+蓭 > ān; #84ED
+蓮 > lián; #84EE
+蓯 > cōng; #84EF
+蓰 > xĭ; #84F0
+蓱 > píng; #84F1
+蓲 > qīu; #84F2
+蓳 > jìn; #84F3
+蓴 > chún; #84F4
+蓵 > jié; #84F5
+蓶 > wĕi; #84F6
+蓷 > tūi; #84F7
+蓸 > cáo; #84F8
+蓹 > yŭ; #84F9
+蓺 > yì; #84FA
+蓻 > jí; #84FB
+蓼 > liăo; #84FC
+蓽 > bì; #84FD
+蓾 > lŭ; #84FE
+蓿 > sù; #84FF
+蔀 > bù; #8500
+蔁 > zhāng; #8501
+蔂 > lúo; #8502
+蔃 > jiàng; #8503
+蔄 > màn; #8504
+蔅 > yán; #8505
+蔆 > líng; #8506
+蔇 > jì; #8507
+蔈 > piăo; #8508
+蔉 > gŭn; #8509
+蔊 > hăn; #850A
+蔋 > dí; #850B
+蔌 > sù; #850C
+蔍 > lù; #850D
+蔎 > shè; #850E
+蔏 > shāng; #850F
+蔐 > dí; #8510
+蔑 > miè; #8511
+蔒 > xūn; #8512
+蔓 > màn; #8513
+蔔 > bó; #8514
+蔕 > dì; #8515
+蔖 > cúo; #8516
+蔗 > zhè; #8517
+蔘 > sēn; #8518
+蔙 > xuàn; #8519
+蔚 > wèi; #851A
+蔛 > hú; #851B
+蔜 > áo; #851C
+蔝 > mĭ; #851D
+蔞 > lóu; #851E
+蔟 > cù; #851F
+蔠 > zhōng; #8520
+蔡 > cài; #8521
+蔢 > pó; #8522
+蔣 > jiăng; #8523
+蔤 > mì; #8524
+蔥 > cōng; #8525
+蔦 > niăo; #8526
+蔧 > hùi; #8527
+蔨 > jùn; #8528
+蔩 > yín; #8529
+蔪 > jiàn; #852A
+蔫 > yān; #852B
+蔬 > shū; #852C
+蔭 > yìn; #852D
+蔮 > kùi; #852E
+蔯 > chén; #852F
+蔰 > hù; #8530
+蔱 > shā; #8531
+蔲 > kòu; #8532
+蔳 > qiàn; #8533
+蔴 > má; #8534
+蔵 > zāng; #8535
+蔷 > qiáng; #8537
+蔸 > dōu; #8538
+蔹 > liàn; #8539
+蔺 > lìn; #853A
+蔻 > kòu; #853B
+蔼 > ăi; #853C
+蔽 > bì; #853D
+蔾 > lí; #853E
+蔿 > wéi; #853F
+蕀 > jí; #8540
+蕁 > xún; #8541
+蕂 > shèng; #8542
+蕃 > fán; #8543
+蕄 > méng; #8544
+蕅 > ŏu; #8545
+蕆 > chăn; #8546
+蕇 > diăn; #8547
+蕈 > xùn; #8548
+蕉 > jiāo; #8549
+蕊 > rŭi; #854A
+蕋 > rŭi; #854B
+蕌 > lĕi; #854C
+蕍 > yú; #854D
+蕎 > qiáo; #854E
+蕏 > chú; #854F
+蕐 > huá; #8550
+蕑 > jiān; #8551
+蕒 > măi; #8552
+蕓 > yún; #8553
+蕔 > bāo; #8554
+蕕 > yóu; #8555
+蕖 > qú; #8556
+蕗 > lù; #8557
+蕘 > ráo; #8558
+蕙 > hùi; #8559
+蕚 > è; #855A
+蕛 > téng; #855B
+蕜 > fĕi; #855C
+蕝 > jué; #855D
+蕞 > zùi; #855E
+蕟 > fà; #855F
+蕠 > rú; #8560
+蕡 > fén; #8561
+蕢 > kùi; #8562
+蕣 > shùn; #8563
+蕤 > rúi; #8564
+蕥 > yă; #8565
+蕦 > xū; #8566
+蕧 > fù; #8567
+蕨 > jué; #8568
+蕩 > dàng; #8569
+蕪 > wú; #856A
+蕫 > tóng; #856B
+蕬 > sī; #856C
+蕭 > xiāo; #856D
+蕮 > xì; #856E
+蕯 > lóng; #856F
+蕰 > yùn; #8570
+蕲 > qí; #8572
+蕳 > jiān; #8573
+蕴 > yùn; #8574
+蕵 > sūn; #8575
+蕶 > líng; #8576
+蕷 > yù; #8577
+蕸 > xiá; #8578
+蕹 > yōng; #8579
+蕺 > jí; #857A
+蕻 > hòng; #857B
+蕼 > sì; #857C
+蕽 > nóng; #857D
+蕾 > lĕi; #857E
+蕿 > xuān; #857F
+薀 > yùn; #8580
+薁 > yù; #8581
+薂 > xí; #8582
+薃 > hào; #8583
+薄 > bó; #8584
+薅 > hāo; #8585
+薆 > ài; #8586
+薇 > wéi; #8587
+薈 > hùi; #8588
+薉 > wèi; #8589
+薊 > jì; #858A
+薋 > cī; #858B
+薌 > xiāng; #858C
+薍 > luàn; #858D
+薎 > miè; #858E
+薏 > yì; #858F
+薐 > léng; #8590
+薑 > jiāng; #8591
+薒 > càn; #8592
+薓 > shēn; #8593
+薔 > qiáng; #8594
+薕 > lián; #8595
+薖 > kē; #8596
+薗 > yuán; #8597
+薘 > dá; #8598
+薙 > tì; #8599
+薚 > táng; #859A
+薛 > xiē; #859B
+薜 > bì; #859C
+薝 > zhán; #859D
+薞 > sūn; #859E
+薟 > liăn; #859F
+薠 > fán; #85A0
+薡 > dĭng; #85A1
+薢 > jiē; #85A2
+薣 > gŭ; #85A3
+薤 > xiè; #85A4
+薥 > shŭ; #85A5
+薦 > jiàn; #85A6
+薧 > kăo; #85A7
+薨 > hōng; #85A8
+薩 > sà; #85A9
+薪 > xīn; #85AA
+薫 > xūn; #85AB
+薬 > yào; #85AC
+薮 > sŏu; #85AE
+薯 > shŭ; #85AF
+薰 > xūn; #85B0
+薱 > dùi; #85B1
+薲 > pín; #85B2
+薳 > wĕi; #85B3
+薴 > néng; #85B4
+薵 > chóu; #85B5
+薶 > mái; #85B6
+薷 > rú; #85B7
+薸 > piāo; #85B8
+薹 > tái; #85B9
+薺 > qí; #85BA
+薻 > zăo; #85BB
+薼 > chén; #85BC
+薽 > zhēn; #85BD
+薾 > ĕr; #85BE
+薿 > nĭ; #85BF
+藀 > yíng; #85C0
+藁 > găo; #85C1
+藂 > còng; #85C2
+藃 > xiāo; #85C3
+藄 > qí; #85C4
+藅 > fá; #85C5
+藆 > jiăn; #85C6
+藇 > xù; #85C7
+藈 > kūi; #85C8
+藉 > jiè; #85C9
+藊 > biăn; #85CA
+藋 > diào; #85CB
+藌 > mì; #85CC
+藍 > lán; #85CD
+藎 > jìn; #85CE
+藏 > cáng; #85CF
+藐 > miăo; #85D0
+藑 > qíong; #85D1
+藒 > qiè; #85D2
+藓 > xiăn; #85D3
+藕 > ŏu; #85D5
+藖 > xián; #85D6
+藗 > sù; #85D7
+藘 > lǘ; #85D8
+藙 > yì; #85D9
+藚 > xù; #85DA
+藛 > xiĕ; #85DB
+藜 > lí; #85DC
+藝 > yì; #85DD
+藞 > lă; #85DE
+藟 > lĕi; #85DF
+藠 > xiào; #85E0
+藡 > dí; #85E1
+藢 > zhĭ; #85E2
+藣 > bēi; #85E3
+藤 > téng; #85E4
+藥 > yào; #85E5
+藦 > mò; #85E6
+藧 > huăn; #85E7
+藨 > piăo; #85E8
+藩 > fán; #85E9
+藪 > sŏu; #85EA
+藫 > tán; #85EB
+藬 > tūi; #85EC
+藭 > qíong; #85ED
+藮 > qiáo; #85EE
+藯 > wèi; #85EF
+藰 > líu; #85F0
+藱 > hùi; #85F1
+藳 > găo; #85F3
+藴 > yùn; #85F4
+藶 > lì; #85F6
+藷 > shŭ; #85F7
+藸 > chú; #85F8
+藹 > ăi; #85F9
+藺 > lìn; #85FA
+藻 > zăo; #85FB
+藼 > xuān; #85FC
+藽 > chèn; #85FD
+藾 > lài; #85FE
+藿 > hùo; #85FF
+蘀 > tùo; #8600
+蘁 > wù; #8601
+蘂 > rŭi; #8602
+蘃 > rŭi; #8603
+蘄 > qí; #8604
+蘅 > héng; #8605
+蘆 > lú; #8606
+蘇 > sū; #8607
+蘈 > túi; #8608
+蘉 > máng; #8609
+蘊 > yùn; #860A
+蘋 > pín; #860B
+蘌 > yŭ; #860C
+蘍 > xūn; #860D
+蘎 > jì; #860E
+蘏 > jīong; #860F
+蘐 > xiān; #8610
+蘑 > mó; #8611
+蘓 > sū; #8613
+蘔 > jīong; #8614
+蘖 > niè; #8616
+蘗 > bò; #8617
+蘘 > ráng; #8618
+蘙 > yì; #8619
+蘚 > xiăn; #861A
+蘛 > yú; #861B
+蘜 > jú; #861C
+蘝 > liàn; #861D
+蘞 > liàn; #861E
+蘟 > yĭn; #861F
+蘠 > qiáng; #8620
+蘡 > yīng; #8621
+蘢 > lóng; #8622
+蘣 > tòng; #8623
+蘤 > wĕi; #8624
+蘥 > yuè; #8625
+蘦 > líng; #8626
+蘧 > qú; #8627
+蘨 > yáo; #8628
+蘩 > fán; #8629
+蘪 > mí; #862A
+蘫 > lán; #862B
+蘬 > kūi; #862C
+蘭 > lán; #862D
+蘮 > jì; #862E
+蘯 > dàng; #862F
+蘱 > lèi; #8631
+蘲 > léi; #8632
+蘳 > huă; #8633
+蘴 > fēng; #8634
+蘵 > zhí; #8635
+蘶 > wèi; #8636
+蘷 > kúi; #8637
+蘸 > zhàn; #8638
+蘹 > huài; #8639
+蘺 > lí; #863A
+蘻 > jì; #863B
+蘼 > mí; #863C
+蘽 > lĕi; #863D
+蘾 > huài; #863E
+蘿 > lúo; #863F
+虀 > jī; #8640
+虁 > kúi; #8641
+虂 > lù; #8642
+虃 > jiān; #8643
+虆 > léi; #8646
+虇 > quăn; #8647
+虈 > xiāo; #8648
+虉 > yì; #8649
+虊 > luán; #864A
+虋 > mén; #864B
+虌 > biē; #864C
+虍 > hū; #864D
+虎 > hŭ; #864E
+虏 > lŭ; #864F
+虐 > nǜe; #8650
+虑 > lǜ; #8651
+虒 > sī; #8652
+虓 > xiāo; #8653
+虔 > qián; #8654
+處 > chù; #8655
+虖 > hū; #8656
+虗 > xū; #8657
+虘 > cúo; #8658
+虙 > fú; #8659
+虚 > xū; #865A
+虛 > xū; #865B
+虜 > lŭ; #865C
+虝 > hŭ; #865D
+虞 > yú; #865E
+號 > hào; #865F
+虠 > jiăo; #8660
+虡 > jù; #8661
+虢 > gúo; #8662
+虣 > bào; #8663
+虤 > yán; #8664
+虥 > zhàn; #8665
+虦 > zhàn; #8666
+虧 > kūi; #8667
+虨 > bān; #8668
+虩 > xì; #8669
+虪 > shú; #866A
+虫 > chóng; #866B
+虬 > qíu; #866C
+虭 > diāo; #866D
+虮 > jī; #866E
+虯 > qíu; #866F
+虰 > chéng; #8670
+虱 > shī; #8671
+虳 > dì; #8673
+虴 > zhé; #8674
+虵 > shé; #8675
+虶 > yū; #8676
+虷 > gān; #8677
+虸 > zĭ; #8678
+虹 > hóng; #8679
+虺 > hŭi; #867A
+虻 > méng; #867B
+虼 > gè; #867C
+虽 > sūi; #867D
+虾 > xiā; #867E
+虿 > chài; #867F
+蚀 > shí; #8680
+蚁 > yĭ; #8681
+蚂 > mă; #8682
+蚃 > xiàng; #8683
+蚄 > fāng; #8684
+蚅 > è; #8685
+蚆 > pā; #8686
+蚇 > chĭ; #8687
+蚈 > qiān; #8688
+蚉 > wén; #8689
+蚊 > wén; #868A
+蚋 > rùi; #868B
+蚌 > bàng; #868C
+蚍 > bĭ; #868D
+蚎 > yuè; #868E
+蚏 > yuè; #868F
+蚐 > jūn; #8690
+蚑 > qí; #8691
+蚒 > rán; #8692
+蚓 > yĭn; #8693
+蚔 > qí; #8694
+蚕 > tiăn; #8695
+蚖 > yuán; #8696
+蚗 > jué; #8697
+蚘 > húi; #8698
+蚙 > qín; #8699
+蚚 > qí; #869A
+蚛 > zhòng; #869B
+蚜 > yá; #869C
+蚝 > cì; #869D
+蚞 > mù; #869E
+蚟 > wáng; #869F
+蚠 > fén; #86A0
+蚡 > fén; #86A1
+蚢 > háng; #86A2
+蚣 > gōng; #86A3
+蚤 > zăo; #86A4
+蚥 > fŭ; #86A5
+蚦 > rán; #86A6
+蚧 > jiè; #86A7
+蚨 > fú; #86A8
+蚩 > chī; #86A9
+蚪 > dŏu; #86AA
+蚫 > piáo; #86AB
+蚬 > xiàn; #86AC
+蚭 > ní; #86AD
+蚮 > tè; #86AE
+蚯 > qīu; #86AF
+蚰 > yóu; #86B0
+蚱 > zhà; #86B1
+蚲 > píng; #86B2
+蚳 > chí; #86B3
+蚴 > yŏu; #86B4
+蚵 > hé; #86B5
+蚶 > hān; #86B6
+蚷 > jù; #86B7
+蚸 > lì; #86B8
+蚹 > fù; #86B9
+蚺 > rán; #86BA
+蚻 > zhá; #86BB
+蚼 > gŏu; #86BC
+蚽 > pí; #86BD
+蚾 > bŏ; #86BE
+蚿 > xián; #86BF
+蛀 > zhù; #86C0
+蛁 > diāo; #86C1
+蛂 > biĕ; #86C2
+蛃 > bĭng; #86C3
+蛄 > gū; #86C4
+蛅 > rán; #86C5
+蛆 > qū; #86C6
+蛇 > shé; #86C7
+蛈 > tiè; #86C8
+蛉 > líng; #86C9
+蛊 > gŭ; #86CA
+蛋 > dàn; #86CB
+蛌 > gŭ; #86CC
+蛍 > yíng; #86CD
+蛎 > lì; #86CE
+蛏 > chēng; #86CF
+蛐 > qū; #86D0
+蛑 > móu; #86D1
+蛒 > gé; #86D2
+蛓 > cì; #86D3
+蛔 > húi; #86D4
+蛕 > húi; #86D5
+蛖 > máng; #86D6
+蛗 > fù; #86D7
+蛘 > yáng; #86D8
+蛙 > wā; #86D9
+蛚 > liè; #86DA
+蛛 > zhū; #86DB
+蛜 > yī; #86DC
+蛝 > xián; #86DD
+蛞 > kùo; #86DE
+蛟 > jiāo; #86DF
+蛠 > lì; #86E0
+蛡 > yì; #86E1
+蛢 > píng; #86E2
+蛣 > jī; #86E3
+蛤 > há; #86E4
+蛥 > shé; #86E5
+蛦 > yí; #86E6
+蛧 > wăng; #86E7
+蛨 > mò; #86E8
+蛩 > qíong; #86E9
+蛪 > qiè; #86EA
+蛫 > gŭi; #86EB
+蛬 > gŏng; #86EC
+蛭 > zhì; #86ED
+蛮 > mán; #86EE
+蛰 > zhí; #86F0
+蛱 > jiá; #86F1
+蛲 > ráo; #86F2
+蛳 > sī; #86F3
+蛴 > qí; #86F4
+蛵 > xīng; #86F5
+蛶 > liè; #86F6
+蛷 > qíu; #86F7
+蛸 > shāo; #86F8
+蛹 > yŏng; #86F9
+蛺 > jiá; #86FA
+蛻 > shùi; #86FB
+蛼 > chē; #86FC
+蛽 > bài; #86FD
+蛾 > é; #86FE
+蛿 > hàn; #86FF
+蜀 > shŭ; #8700
+蜁 > xuán; #8701
+蜂 > fēng; #8702
+蜃 > shèn; #8703
+蜄 > zhèn; #8704
+蜅 > fŭ; #8705
+蜆 > xiàn; #8706
+蜇 > zhé; #8707
+蜈 > wú; #8708
+蜉 > fú; #8709
+蜊 > lí; #870A
+蜋 > láng; #870B
+蜌 > bì; #870C
+蜍 > chú; #870D
+蜎 > yuān; #870E
+蜏 > yŏu; #870F
+蜐 > jié; #8710
+蜑 > dàn; #8711
+蜒 > yán; #8712
+蜓 > tíng; #8713
+蜔 > diàn; #8714
+蜕 > shùi; #8715
+蜖 > húi; #8716
+蜗 > guā; #8717
+蜘 > zhī; #8718
+蜙 > sōng; #8719
+蜚 > fēi; #871A
+蜛 > jū; #871B
+蜜 > mì; #871C
+蜝 > qí; #871D
+蜞 > qí; #871E
+蜟 > yù; #871F
+蜠 > jŭn; #8720
+蜡 > zhà; #8721
+蜢 > mĕng; #8722
+蜣 > qiāng; #8723
+蜤 > sī; #8724
+蜥 > xī; #8725
+蜦 > lún; #8726
+蜧 > lì; #8727
+蜨 > dié; #8728
+蜩 > tiáo; #8729
+蜪 > tāo; #872A
+蜫 > kūn; #872B
+蜬 > gān; #872C
+蜭 > hàn; #872D
+蜮 > yù; #872E
+蜯 > bàng; #872F
+蜰 > féi; #8730
+蜱 > pí; #8731
+蜲 > wĕi; #8732
+蜳 > dūn; #8733
+蜴 > yì; #8734
+蜵 > yuān; #8735
+蜶 > sù; #8736
+蜷 > quán; #8737
+蜸 > qiăn; #8738
+蜹 > rùi; #8739
+蜺 > ní; #873A
+蜻 > qīng; #873B
+蜼 > wèi; #873C
+蜽 > liăng; #873D
+蜾 > gŭo; #873E
+蜿 > wān; #873F
+蝀 > dōng; #8740
+蝁 > è; #8741
+蝂 > băn; #8742
+蝃 > dì; #8743
+蝄 > wăng; #8744
+蝅 > cán; #8745
+蝆 > yăng; #8746
+蝇 > yíng; #8747
+蝈 > gūo; #8748
+蝉 > chán; #8749
+蝋 > là; #874B
+蝌 > kē; #874C
+蝍 > jí; #874D
+蝎 > hé; #874E
+蝏 > tíng; #874F
+蝐 > mài; #8750
+蝑 > xū; #8751
+蝒 > mián; #8752
+蝓 > yú; #8753
+蝔 > jiē; #8754
+蝕 > shí; #8755
+蝖 > xuān; #8756
+蝗 > huáng; #8757
+蝘 > yăn; #8758
+蝙 > biān; #8759
+蝚 > róu; #875A
+蝛 > wēi; #875B
+蝜 > fù; #875C
+蝝 > yuán; #875D
+蝞 > mèi; #875E
+蝟 > wèi; #875F
+蝠 > fú; #8760
+蝡 > ruăn; #8761
+蝢 > xié; #8762
+蝣 > yóu; #8763
+蝤 > qíu; #8764
+蝥 > máo; #8765
+蝦 > xiā; #8766
+蝧 > yīng; #8767
+蝨 > shī; #8768
+蝩 > chóng; #8769
+蝪 > tāng; #876A
+蝫 > zhū; #876B
+蝬 > zōng; #876C
+蝭 > tí; #876D
+蝮 > fù; #876E
+蝯 > yuán; #876F
+蝰 > hŭi; #8770
+蝱 > méng; #8771
+蝲 > là; #8772
+蝳 > dú; #8773
+蝴 > hú; #8774
+蝵 > qīu; #8775
+蝶 > dié; #8776
+蝷 > lì; #8777
+蝸 > guā; #8778
+蝹 > yūn; #8779
+蝺 > jŭ; #877A
+蝻 > năn; #877B
+蝼 > lóu; #877C
+蝽 > qŭn; #877D
+蝾 > róng; #877E
+蝿 > yíng; #877F
+螀 > jiāng; #8780
+螂 > láng; #8782
+螃 > páng; #8783
+螄 > sī; #8784
+螅 > xī; #8785
+螆 > cì; #8786
+螇 > xī; #8787
+螈 > yuán; #8788
+螉 > wēng; #8789
+螊 > lián; #878A
+螋 > sōu; #878B
+螌 > bān; #878C
+融 > róng; #878D
+螎 > róng; #878E
+螏 > jí; #878F
+螐 > wū; #8790
+螑 > qìu; #8791
+螒 > hàn; #8792
+螓 > qín; #8793
+螔 > yí; #8794
+螕 > bī; #8795
+螖 > huá; #8796
+螗 > táng; #8797
+螘 > yĭ; #8798
+螙 > dù; #8799
+螚 > nài; #879A
+螛 > hé; #879B
+螜 > hú; #879C
+螝 > hùi; #879D
+螞 > mă; #879E
+螟 > míng; #879F
+螠 > yì; #87A0
+螡 > wén; #87A1
+螢 > yíng; #87A2
+螣 > téng; #87A3
+螤 > yŭ; #87A4
+螥 > cāng; #87A5
+螨 > măn; #87A8
+螪 > shāng; #87AA
+螫 > zhē; #87AB
+螬 > cáo; #87AC
+螭 > chī; #87AD
+螮 > dì; #87AE
+螯 > áo; #87AF
+螰 > lù; #87B0
+螱 > wèi; #87B1
+螲 > zhì; #87B2
+螳 > táng; #87B3
+螴 > chén; #87B4
+螵 > piāo; #87B5
+螶 > qú; #87B6
+螷 > pí; #87B7
+螸 > yú; #87B8
+螹 > jiàn; #87B9
+螺 > lúo; #87BA
+螻 > lóu; #87BB
+螼 > qĭn; #87BC
+螽 > zhōng; #87BD
+螾 > yĭn; #87BE
+螿 > jiāng; #87BF
+蟀 > shuài; #87C0
+蟁 > wén; #87C1
+蟂 > jiāo; #87C2
+蟃 > wàn; #87C3
+蟄 > zhí; #87C4
+蟅 > zhè; #87C5
+蟆 > má; #87C6
+蟇 > má; #87C7
+蟈 > gūo; #87C8
+蟉 > líu; #87C9
+蟊 > máo; #87CA
+蟋 > xī; #87CB
+蟌 > cōng; #87CC
+蟍 > lí; #87CD
+蟎 > măn; #87CE
+蟏 > xiāo; #87CF
+蟑 > zhāng; #87D1
+蟒 > măng; #87D2
+蟓 > xiàng; #87D3
+蟔 > mò; #87D4
+蟕 > zūi; #87D5
+蟖 > sī; #87D6
+蟗 > qīu; #87D7
+蟘 > tè; #87D8
+蟙 > zhí; #87D9
+蟚 > péng; #87DA
+蟛 > péng; #87DB
+蟜 > jiăo; #87DC
+蟝 > qú; #87DD
+蟞 > bié; #87DE
+蟟 > liáo; #87DF
+蟠 > pán; #87E0
+蟡 > gŭi; #87E1
+蟢 > xĭ; #87E2
+蟣 > jĭ; #87E3
+蟤 > zhuān; #87E4
+蟥 > huáng; #87E5
+蟦 > fèi; #87E6
+蟧 > láo; #87E7
+蟨 > jué; #87E8
+蟩 > jué; #87E9
+蟪 > hùi; #87EA
+蟫 > yín; #87EB
+蟬 > chán; #87EC
+蟭 > jiāo; #87ED
+蟮 > shàn; #87EE
+蟯 > ráo; #87EF
+蟰 > xiāo; #87F0
+蟱 > móu; #87F1
+蟲 > chóng; #87F2
+蟳 > xún; #87F3
+蟴 > sī; #87F4
+蟶 > chēng; #87F6
+蟷 > dāng; #87F7
+蟸 > lĭ; #87F8
+蟹 > xiè; #87F9
+蟺 > shàn; #87FA
+蟻 > yĭ; #87FB
+蟼 > jĭng; #87FC
+蟽 > dá; #87FD
+蟾 > chán; #87FE
+蟿 > qì; #87FF
+蠀 > cī; #8800
+蠁 > xiàng; #8801
+蠂 > shè; #8802
+蠃 > lŭo; #8803
+蠄 > qín; #8804
+蠅 > yíng; #8805
+蠆 > chài; #8806
+蠇 > lì; #8807
+蠈 > zé; #8808
+蠉 > xuān; #8809
+蠊 > lián; #880A
+蠋 > zhú; #880B
+蠌 > zé; #880C
+蠍 > xiē; #880D
+蠎 > măng; #880E
+蠏 > xiè; #880F
+蠐 > qí; #8810
+蠑 > róng; #8811
+蠒 > jiăn; #8812
+蠓 > mĕng; #8813
+蠔 > háo; #8814
+蠕 > ruăn; #8815
+蠖 > hùo; #8816
+蠗 > zhúo; #8817
+蠘 > jié; #8818
+蠙 > bīn; #8819
+蠚 > hè; #881A
+蠛 > miè; #881B
+蠜 > fán; #881C
+蠝 > léi; #881D
+蠞 > jié; #881E
+蠟 > là; #881F
+蠠 > mì; #8820
+蠡 > lĭ; #8821
+蠢 > chŭn; #8822
+蠣 > lì; #8823
+蠤 > qīu; #8824
+蠥 > niè; #8825
+蠦 > lú; #8826
+蠧 > dù; #8827
+蠨 > xiāo; #8828
+蠩 > zhū; #8829
+蠪 > lóng; #882A
+蠫 > lì; #882B
+蠬 > lóng; #882C
+蠭 > fēng; #882D
+蠮 > yē; #882E
+蠯 > bèng; #882F
+蠰 > shàng; #8830
+蠱 > gŭ; #8831
+蠲 > juān; #8832
+蠳 > yīng; #8833
+蠵 > xī; #8835
+蠶 > cán; #8836
+蠷 > qú; #8837
+蠸 > quán; #8838
+蠹 > dù; #8839
+蠺 > cán; #883A
+蠻 > mán; #883B
+蠼 > jué; #883C
+蠽 > jié; #883D
+蠾 > zhú; #883E
+蠿 > zhá; #883F
+血 > xiĕ; #8840
+衁 > huāng; #8841
+衂 > nìu; #8842
+衃 > pēi; #8843
+衄 > nǜ; #8844
+衅 > xìn; #8845
+衆 > zhòng; #8846
+衇 > mò; #8847
+衈 > èr; #8848
+衉 > kè; #8849
+衊 > miè; #884A
+衋 > xì; #884B
+行 > xíng; #884C
+衍 > yăn; #884D
+衎 > kàn; #884E
+衏 > yuàn; #884F
+衑 > líng; #8851
+衒 > xuàn; #8852
+術 > shù; #8853
+衔 > xián; #8854
+衕 > tòng; #8855
+衖 > lòng; #8856
+街 > jiē; #8857
+衘 > xián; #8858
+衙 > yá; #8859
+衚 > hú; #885A
+衛 > wèi; #885B
+衜 > dào; #885C
+衝 > chōng; #885D
+衞 > wèi; #885E
+衟 > dào; #885F
+衠 > zhūn; #8860
+衡 > héng; #8861
+衢 > qú; #8862
+衣 > yī; #8863
+衤 > yī' 'zì' 'páng; #8864
+补 > bŭ; #8865
+衦 > găn; #8866
+衧 > yú; #8867
+表 > biăo; #8868
+衩 > chà; #8869
+衪 > yĭ; #886A
+衫 > shān; #886B
+衬 > chèn; #886C
+衭 > fū; #886D
+衮 > gŭn; #886E
+衯 > fēn; #886F
+衰 > shuāi; #8870
+衱 > jié; #8871
+衲 > nà; #8872
+衳 > zhōng; #8873
+衴 > dăn; #8874
+衵 > rì; #8875
+衶 > zhòng; #8876
+衷 > zhōng; #8877
+衸 > xiè; #8878
+衹 > qí; #8879
+衺 > xié; #887A
+衻 > rán; #887B
+衼 > zhī; #887C
+衽 > rèn; #887D
+衾 > qīn; #887E
+衿 > jīn; #887F
+袀 > jūn; #8880
+袁 > yuán; #8881
+袂 > mèi; #8882
+袃 > chài; #8883
+袄 > ăo; #8884
+袅 > niăo; #8885
+袆 > hūi; #8886
+袇 > rán; #8887
+袈 > jiā; #8888
+袉 > túo; #8889
+袊 > lĭng; #888A
+袋 > dài; #888B
+袌 > bào; #888C
+袍 > páo; #888D
+袎 > yào; #888E
+袏 > zùo; #888F
+袐 > bì; #8890
+袑 > shào; #8891
+袒 > tăn; #8892
+袓 > jŭ; #8893
+袔 > hè; #8894
+袕 > shù; #8895
+袖 > xìu; #8896
+袗 > zhĕn; #8897
+袘 > yí; #8898
+袙 > pà; #8899
+袚 > bō; #889A
+袛 > dī; #889B
+袜 > wà; #889C
+袝 > fù; #889D
+袞 > gŭn; #889E
+袟 > zhì; #889F
+袠 > zhì; #88A0
+袡 > rán; #88A1
+袢 > pàn; #88A2
+袣 > yì; #88A3
+袤 > mào; #88A4
+袥 > tuo; #88A5
+袦 > nà; #88A6
+袧 > kōu; #88A7
+袨 > xiàn; #88A8
+袩 > chān; #88A9
+袪 > qū; #88AA
+被 > bèi; #88AB
+袬 > gŭn; #88AC
+袭 > xí; #88AD
+袯 > bó; #88AF
+袱 > fú; #88B1
+袲 > yí; #88B2
+袳 > chĭ; #88B3
+袴 > kù; #88B4
+袵 > rèn; #88B5
+袶 > jiàng; #88B6
+袷 > jiá; #88B7
+袸 > cún; #88B8
+袹 > mò; #88B9
+袺 > jié; #88BA
+袻 > ér; #88BB
+袼 > lùo; #88BC
+袽 > rú; #88BD
+袾 > zhū; #88BE
+袿 > gūi; #88BF
+裀 > yīn; #88C0
+裁 > cái; #88C1
+裂 > liè; #88C2
+装 > zhuāng; #88C5
+裆 > dāng; #88C6
+裈 > kūn; #88C8
+裉 > kèn; #88C9
+裊 > niăo; #88CA
+裋 > shù; #88CB
+裌 > jiá; #88CC
+裍 > kŭn; #88CD
+裎 > chéng; #88CE
+裏 > lĭ; #88CF
+裐 > juān; #88D0
+裑 > shēn; #88D1
+裒 > póu; #88D2
+裓 > gé; #88D3
+裔 > yì; #88D4
+裕 > yù; #88D5
+裖 > zhĕn; #88D6
+裗 > líu; #88D7
+裘 > qíu; #88D8
+裙 > qún; #88D9
+裚 > jì; #88DA
+裛 > yì; #88DB
+補 > bŭ; #88DC
+裝 > zhuāng; #88DD
+裞 > shùi; #88DE
+裟 > shā; #88DF
+裠 > qún; #88E0
+裡 > lĭ; #88E1
+裢 > lián; #88E2
+裣 > liàn; #88E3
+裤 > kù; #88E4
+裥 > jiăn; #88E5
+裦 > fóu; #88E6
+裧 > chān; #88E7
+裨 > bì; #88E8
+裩 > gūn; #88E9
+裪 > táo; #88EA
+裫 > yuàn; #88EB
+裬 > líng; #88EC
+裭 > chĭ; #88ED
+裮 > chāng; #88EE
+裯 > chóu; #88EF
+裰 > dúo; #88F0
+裱 > biăo; #88F1
+裲 > liăng; #88F2
+裳 > cháng; #88F3
+裴 > péi; #88F4
+裵 > péi; #88F5
+裶 > fēi; #88F6
+裷 > yuān; #88F7
+裸 > lŭo; #88F8
+裹 > gŭo; #88F9
+裺 > yăn; #88FA
+裻 > dŭ; #88FB
+裼 > xí; #88FC
+製 > zhì; #88FD
+裾 > jū; #88FE
+裿 > qĭ; #88FF
+褀 > jì; #8900
+褁 > zhí; #8901
+褂 > guà; #8902
+褃 > kèn; #8903
+褅 > tì; #8905
+褆 > tí; #8906
+複 > fù; #8907
+褈 > chóng; #8908
+褉 > xiē; #8909
+褊 > biăn; #890A
+褋 > dié; #890B
+褌 > kūn; #890C
+褍 > duān; #890D
+褎 > xìu; #890E
+褏 > xìu; #890F
+褐 > hé; #8910
+褑 > yuàn; #8911
+褒 > bāo; #8912
+褓 > băo; #8913
+褔 > fù; #8914
+褕 > yú; #8915
+褖 > tuàn; #8916
+褗 > yăn; #8917
+褘 > hūi; #8918
+褙 > bèi; #8919
+褚 > chŭ; #891A
+褛 > lǚ; #891B
+褞 > yŭn; #891E
+褟 > dá; #891F
+褠 > gōu; #8920
+褡 > dā; #8921
+褢 > huái; #8922
+褣 > róng; #8923
+褤 > yuàn; #8924
+褥 > rù; #8925
+褦 > nài; #8926
+褧 > jĭong; #8927
+褨 > sŭo; #8928
+褩 > bān; #8929
+褪 > tùn; #892A
+褫 > chĭ; #892B
+褬 > săng; #892C
+褭 > niăo; #892D
+褮 > yīng; #892E
+褯 > jiè; #892F
+褰 > qiān; #8930
+褱 > huái; #8931
+褲 > kù; #8932
+褳 > lián; #8933
+褴 > băo; #8934
+褵 > lí; #8935
+褶 > zhé; #8936
+褷 > shī; #8937
+褸 > lǚ; #8938
+褹 > yì; #8939
+褺 > dié; #893A
+褻 > xiè; #893B
+褼 > xiān; #893C
+褽 > wèi; #893D
+褾 > biăo; #893E
+褿 > cáo; #893F
+襀 > jī; #8940
+襁 > jiăng; #8941
+襂 > sēn; #8942
+襃 > bāo; #8943
+襄 > xiāng; #8944
+襆 > pú; #8946
+襇 > jiăn; #8947
+襈 > zhuàn; #8948
+襉 > jiàn; #8949
+襊 > zùi; #894A
+襋 > jí; #894B
+襌 > dān; #894C
+襍 > zá; #894D
+襎 > fán; #894E
+襏 > bó; #894F
+襐 > xiàng; #8950
+襑 > xín; #8951
+襒 > bié; #8952
+襓 > ráo; #8953
+襔 > măn; #8954
+襕 > lán; #8955
+襖 > ăo; #8956
+襗 > dúo; #8957
+襘 > gùi; #8958
+襙 > cào; #8959
+襚 > sùi; #895A
+襛 > nóng; #895B
+襜 > chān; #895C
+襝 > liàn; #895D
+襞 > bì; #895E
+襟 > jīn; #895F
+襠 > dāng; #8960
+襡 > shú; #8961
+襢 > tăn; #8962
+襣 > bì; #8963
+襤 > lán; #8964
+襥 > pú; #8965
+襦 > rú; #8966
+襧 > zhĭ; #8967
+襩 > shŭ; #8969
+襪 > wà; #896A
+襫 > shì; #896B
+襬 > băi; #896C
+襭 > xié; #896D
+襮 > bó; #896E
+襯 > chèn; #896F
+襰 > lài; #8970
+襱 > lóng; #8971
+襲 > xí; #8972
+襳 > xiān; #8973
+襴 > lán; #8974
+襵 > zhé; #8975
+襶 > dài; #8976
+襸 > zàn; #8978
+襹 > shī; #8979
+襺 > jiăn; #897A
+襻 > pàn; #897B
+襼 > yì; #897C
+襾 > yà; #897E
+西 > xī; #897F
+覀 > xī; #8980
+要 > yào; #8981
+覂 > fĕng; #8982
+覃 > tán; #8983
+覅 > biào; #8985
+覆 > fù; #8986
+覇 > bà; #8987
+覈 > hé; #8988
+覉 > jī; #8989
+覊 > jī; #898A
+見 > jiàn; #898B
+覌 > guān; #898C
+覍 > biàn; #898D
+覎 > yàn; #898E
+規 > gūi; #898F
+覐 > jué; #8990
+覑 > piăn; #8991
+覒 > máo; #8992
+覓 > mì; #8993
+覔 > mì; #8994
+覕 > miè; #8995
+視 > shì; #8996
+覗 > sī; #8997
+覘 > zhān; #8998
+覙 > lúo; #8999
+覚 > jué; #899A
+覛 > mì; #899B
+覜 > tiào; #899C
+覝 > lián; #899D
+覞 > yào; #899E
+覟 > zhì; #899F
+覠 > jūn; #89A0
+覡 > xí; #89A1
+覢 > shăn; #89A2
+覣 > wēi; #89A3
+覤 > xì; #89A4
+覥 > tiăn; #89A5
+覦 > yú; #89A6
+覧 > lăn; #89A7
+覨 > è; #89A8
+覩 > dŭ; #89A9
+親 > qīn; #89AA
+覫 > păng; #89AB
+覬 > jì; #89AC
+覭 > míng; #89AD
+覮 > yíng; #89AE
+覯 > gòu; #89AF
+覰 > qù; #89B0
+覱 > zhàn; #89B1
+覲 > jĭn; #89B2
+観 > guān; #89B3
+覴 > dēng; #89B4
+覵 > jiàn; #89B5
+覶 > lúo; #89B6
+覷 > qù; #89B7
+覸 > jiàn; #89B8
+覹 > wéi; #89B9
+覺 > jué; #89BA
+覻 > qù; #89BB
+覼 > lúo; #89BC
+覽 > lăn; #89BD
+覾 > shĕn; #89BE
+覿 > dí; #89BF
+觀 > guān; #89C0
+见 > jiàn; #89C1
+观 > guān; #89C2
+觃 > yàn; #89C3
+规 > gūi; #89C4
+觅 > mì; #89C5
+视 > shì; #89C6
+觇 > zhān; #89C7
+览 > lăn; #89C8
+觉 > jué; #89C9
+觊 > jì; #89CA
+觋 > xí; #89CB
+觌 > dí; #89CC
+觍 > tiăn; #89CD
+觎 > yú; #89CE
+觏 > gòu; #89CF
+觐 > jĭn; #89D0
+觑 > qù; #89D1
+角 > jiăo; #89D2
+觓 > jīu; #89D3
+觔 > jīn; #89D4
+觕 > cū; #89D5
+觖 > jué; #89D6
+觗 > zhì; #89D7
+觘 > chào; #89D8
+觙 > jí; #89D9
+觚 > gū; #89DA
+觛 > dàn; #89DB
+觜 > zŭi; #89DC
+觝 > dĭ; #89DD
+觞 > shāng; #89DE
+觟 > huà; #89DF
+觠 > quán; #89E0
+觡 > gé; #89E1
+觢 > chì; #89E2
+解 > jiĕ; #89E3
+觤 > gŭi; #89E4
+觥 > gōng; #89E5
+触 > hóng; #89E6
+觧 > jiĕ; #89E7
+觨 > hùn; #89E8
+觩 > qíu; #89E9
+觪 > xīng; #89EA
+觫 > sù; #89EB
+觬 > ní; #89EC
+觭 > jī; #89ED
+觮 > lù; #89EE
+觯 > zhì; #89EF
+觰 > zhā; #89F0
+觱 > bì; #89F1
+觲 > xīng; #89F2
+觳 > hú; #89F3
+觴 > shāng; #89F4
+觵 > gōng; #89F5
+觶 > zhì; #89F6
+觷 > xué; #89F7
+觸 > chù; #89F8
+觹 > xī; #89F9
+觺 > yí; #89FA
+觻 > lù; #89FB
+觼 > jué; #89FC
+觽 > xī; #89FD
+觾 > yàn; #89FE
+觿 > xī; #89FF
+言 > yán; #8A00
+訁 > yán' 'zì' 'páng; #8A01
+訂 > dìng; #8A02
+訃 > fù; #8A03
+訄 > qíu; #8A04
+訅 > qíu; #8A05
+訆 > jiào; #8A06
+訇 > hōng; #8A07
+計 > jì; #8A08
+訉 > fàn; #8A09
+訊 > xùn; #8A0A
+訋 > diào; #8A0B
+訌 > hóng; #8A0C
+訍 > chà; #8A0D
+討 > tăo; #8A0E
+訏 > xū; #8A0F
+訐 > jié; #8A10
+訑 > yí; #8A11
+訒 > rèn; #8A12
+訓 > xùn; #8A13
+訔 > yín; #8A14
+訕 > shàn; #8A15
+訖 > qì; #8A16
+託 > tūo; #8A17
+記 > jì; #8A18
+訙 > xùn; #8A19
+訚 > yín; #8A1A
+訛 > é; #8A1B
+訜 > fēn; #8A1C
+訝 > yà; #8A1D
+訞 > yāo; #8A1E
+訟 > sòng; #8A1F
+訠 > shĕn; #8A20
+訡 > yín; #8A21
+訢 > xīn; #8A22
+訣 > jué; #8A23
+訤 > xiáo; #8A24
+訥 > nè; #8A25
+訦 > chén; #8A26
+訧 > yóu; #8A27
+訨 > zhĭ; #8A28
+訩 > xīong; #8A29
+訪 > făng; #8A2A
+訫 > xìn; #8A2B
+訬 > chāo; #8A2C
+設 > shè; #8A2D
+訮 > xiān; #8A2E
+訯 > shă; #8A2F
+訰 > tún; #8A30
+許 > xŭ; #8A31
+訲 > yì; #8A32
+訳 > yì; #8A33
+訴 > sù; #8A34
+訵 > chī; #8A35
+訶 > hē; #8A36
+訷 > shēn; #8A37
+訸 > hé; #8A38
+訹 > xù; #8A39
+診 > zhĕn; #8A3A
+註 > zhù; #8A3B
+証 > zhèng; #8A3C
+訽 > gòu; #8A3D
+訾 > zĭ; #8A3E
+訿 > zĭ; #8A3F
+詀 > zhān; #8A40
+詁 > gŭ; #8A41
+詂 > fù; #8A42
+詃 > quăn; #8A43
+詄 > dié; #8A44
+詅 > líng; #8A45
+詆 > dĭ; #8A46
+詇 > yàng; #8A47
+詈 > lì; #8A48
+詉 > náo; #8A49
+詊 > pàn; #8A4A
+詋 > zhòu; #8A4B
+詌 > gàn; #8A4C
+詍 > yì; #8A4D
+詎 > jù; #8A4E
+詏 > ào; #8A4F
+詐 > zhà; #8A50
+詑 > túo; #8A51
+詒 > yí; #8A52
+詓 > qŭ; #8A53
+詔 > zhào; #8A54
+評 > píng; #8A55
+詖 > bì; #8A56
+詗 > xìong; #8A57
+詘 > qù; #8A58
+詙 > bá; #8A59
+詚 > dá; #8A5A
+詛 > zŭ; #8A5B
+詜 > tāo; #8A5C
+詝 > zhŭ; #8A5D
+詞 > cí; #8A5E
+詟 > zhé; #8A5F
+詠 > yŏng; #8A60
+詡 > xŭ; #8A61
+詢 > xún; #8A62
+詣 > yì; #8A63
+詤 > huăng; #8A64
+詥 > hé; #8A65
+試 > shì; #8A66
+詧 > chá; #8A67
+詨 > jiāo; #8A68
+詩 > shī; #8A69
+詪 > hĕn; #8A6A
+詫 > chà; #8A6B
+詬 > gòu; #8A6C
+詭 > gŭi; #8A6D
+詮 > quán; #8A6E
+詯 > hùi; #8A6F
+詰 > jié; #8A70
+話 > huà; #8A71
+該 > gāi; #8A72
+詳 > xiáng; #8A73
+詴 > wēi; #8A74
+詵 > shēn; #8A75
+詶 > chóu; #8A76
+詷 > tóng; #8A77
+詸 > mí; #8A78
+詹 > zhān; #8A79
+詺 > mìng; #8A7A
+詻 > è; #8A7B
+詼 > hūi; #8A7C
+詽 > yán; #8A7D
+詾 > xīong; #8A7E
+詿 > guà; #8A7F
+誀 > èr; #8A80
+誁 > bĕng; #8A81
+誂 > tiăo; #8A82
+誃 > chĭ; #8A83
+誄 > lĕi; #8A84
+誅 > zhū; #8A85
+誆 > kuāng; #8A86
+誇 > kuā; #8A87
+誈 > wú; #8A88
+誉 > yù; #8A89
+誊 > téng; #8A8A
+誋 > jì; #8A8B
+誌 > zhì; #8A8C
+認 > rèn; #8A8D
+誎 > sù; #8A8E
+誏 > lăng; #8A8F
+誐 > é; #8A90
+誑 > kuáng; #8A91
+誒 > è; #8A92
+誓 > shì; #8A93
+誔 > tĭng; #8A94
+誕 > dàn; #8A95
+誖 > bó; #8A96
+誗 > chán; #8A97
+誘 > yòu; #8A98
+誙 > héng; #8A99
+誚 > qiào; #8A9A
+誛 > qīn; #8A9B
+誜 > shuà; #8A9C
+誝 > ān; #8A9D
+語 > yŭ; #8A9E
+誟 > xiào; #8A9F
+誠 > chéng; #8AA0
+誡 > jiè; #8AA1
+誢 > xiàn; #8AA2
+誣 > wú; #8AA3
+誤 > wù; #8AA4
+誥 > gào; #8AA5
+誦 > sòng; #8AA6
+誧 > pŭ; #8AA7
+誨 > hùi; #8AA8
+誩 > jìng; #8AA9
+說 > shūo; #8AAA
+誫 > zhèn; #8AAB
+説 > shūo; #8AAC
+読 > dú; #8AAD
+誯 > chàng; #8AAF
+誰 > shúi; #8AB0
+誱 > jié; #8AB1
+課 > kè; #8AB2
+誳 > qū; #8AB3
+誴 > cóng; #8AB4
+誵 > xiáo; #8AB5
+誶 > sùi; #8AB6
+誷 > wăng; #8AB7
+誸 > xuán; #8AB8
+誹 > fĕi; #8AB9
+誺 > chī; #8ABA
+誻 > tà; #8ABB
+誼 > yí; #8ABC
+誽 > ná; #8ABD
+誾 > yín; #8ABE
+調 > diào; #8ABF
+諀 > pĭ; #8AC0
+諁 > chùo; #8AC1
+諂 > chăn; #8AC2
+諃 > chēn; #8AC3
+諄 > zhūn; #8AC4
+諅 > jī; #8AC5
+諆 > qī; #8AC6
+談 > tán; #8AC7
+諈 > zhùi; #8AC8
+諉 > wĕi; #8AC9
+諊 > jú; #8ACA
+請 > qĭng; #8ACB
+諌 > jiàn; #8ACC
+諍 > zhēng; #8ACD
+諎 > zé; #8ACE
+諏 > zōu; #8ACF
+諐 > qiān; #8AD0
+諑 > zhúo; #8AD1
+諒 > liàng; #8AD2
+諓 > jiàn; #8AD3
+諔 > zhù; #8AD4
+諕 > háo; #8AD5
+論 > lùn; #8AD6
+諗 > shĕn; #8AD7
+諘 > biăo; #8AD8
+諙 > huài; #8AD9
+諚 > pián; #8ADA
+諛 > yú; #8ADB
+諜 > dié; #8ADC
+諝 > xŭ; #8ADD
+諞 > pián; #8ADE
+諟 > shì; #8ADF
+諠 > xuān; #8AE0
+諡 > shì; #8AE1
+諢 > hùn; #8AE2
+諣 > huà; #8AE3
+諤 > è; #8AE4
+諥 > zhòng; #8AE5
+諦 > dì; #8AE6
+諧 > xié; #8AE7
+諨 > fú; #8AE8
+諩 > pŭ; #8AE9
+諪 > tíng; #8AEA
+諫 > jiàn; #8AEB
+諬 > qĭ; #8AEC
+諭 > yù; #8AED
+諮 > zī; #8AEE
+諯 > chuán; #8AEF
+諰 > xĭ; #8AF0
+諱 > hùi; #8AF1
+諲 > yīn; #8AF2
+諳 > ān; #8AF3
+諴 > xián; #8AF4
+諵 > nán; #8AF5
+諶 > chén; #8AF6
+諷 > fēng; #8AF7
+諸 > zhū; #8AF8
+諹 > yáng; #8AF9
+諺 > yàn; #8AFA
+諻 > hēng; #8AFB
+諼 > xuān; #8AFC
+諽 > gé; #8AFD
+諾 > nùo; #8AFE
+諿 > qì; #8AFF
+謀 > móu; #8B00
+謁 > yè; #8B01
+謂 > wèi; #8B02
+謄 > téng; #8B04
+謅 > zōu; #8B05
+謆 > shàn; #8B06
+謇 > jiăn; #8B07
+謈 > bó; #8B08
+謉 > kù1; #8B09
+謊 > huăng; #8B0A
+謋 > hùo; #8B0B
+謌 > gē; #8B0C
+謍 > yíng; #8B0D
+謎 > mí; #8B0E
+謏 > xiăo; #8B0F
+謐 > mì; #8B10
+謑 > xì; #8B11
+謒 > qiāng; #8B12
+謓 > chēn; #8B13
+謔 > nǜe; #8B14
+謕 > tí; #8B15
+謖 > sù; #8B16
+謗 > bàng; #8B17
+謘 > chí; #8B18
+謙 > qiān; #8B19
+謚 > shì; #8B1A
+講 > jiăng; #8B1B
+謜 > yuàn; #8B1C
+謝 > xiè; #8B1D
+謞 > xuè; #8B1E
+謟 > tāo; #8B1F
+謠 > yáo; #8B20
+謡 > yáo; #8B21
+謣 > yú; #8B23
+謤 > biāo; #8B24
+謥 > còng; #8B25
+謦 > qìng; #8B26
+謧 > lí; #8B27
+謨 > mó; #8B28
+謩 > mò; #8B29
+謪 > shāng; #8B2A
+謫 > zhé; #8B2B
+謬 > mìu; #8B2C
+謭 > jiăn; #8B2D
+謮 > zé; #8B2E
+謯 > jiē; #8B2F
+謰 > lián; #8B30
+謱 > lóu; #8B31
+謲 > cān; #8B32
+謳 > ōu; #8B33
+謴 > guàn; #8B34
+謵 > xí; #8B35
+謶 > zhúo; #8B36
+謷 > áo; #8B37
+謸 > áo; #8B38
+謹 > jĭn; #8B39
+謺 > zhé; #8B3A
+謻 > yí; #8B3B
+謼 > hù; #8B3C
+謽 > jiàng; #8B3D
+謾 > mán; #8B3E
+謿 > cháo; #8B3F
+譀 > hàn; #8B40
+譁 > huá; #8B41
+譂 > chăn; #8B42
+譃 > xū; #8B43
+譄 > zēng; #8B44
+譅 > sè; #8B45
+譆 > xī; #8B46
+譇 > shē; #8B47
+譈 > dùi; #8B48
+證 > zhèng; #8B49
+譊 > náo; #8B4A
+譋 > lán; #8B4B
+譌 > é; #8B4C
+譍 > yìng; #8B4D
+譎 > jué; #8B4E
+譏 > jī; #8B4F
+譐 > zŭn; #8B50
+譑 > jiăo; #8B51
+譒 > bò; #8B52
+譓 > hùi; #8B53
+譔 > zhuàn; #8B54
+譕 > mú; #8B55
+譖 > zèn; #8B56
+譗 > zhá; #8B57
+識 > shì; #8B58
+譙 > qiáo; #8B59
+譚 > tán; #8B5A
+譛 > zèn; #8B5B
+譜 > pŭ; #8B5C
+譝 > shéng; #8B5D
+譞 > xuān; #8B5E
+譟 > zào; #8B5F
+譠 > tān; #8B60
+譡 > dăng; #8B61
+譢 > sùi; #8B62
+譣 > qiān; #8B63
+譤 > jī; #8B64
+譥 > jiào; #8B65
+警 > jĭng; #8B66
+譧 > lián; #8B67
+譨 > nóu; #8B68
+譩 > yī; #8B69
+譪 > ài; #8B6A
+譫 > zhān; #8B6B
+譬 > pì; #8B6C
+譭 > hŭi; #8B6D
+譮 > huà; #8B6E
+譯 > yì; #8B6F
+議 > yì; #8B70
+譱 > shàn; #8B71
+譲 > ràng; #8B72
+譳 > nòu; #8B73
+譴 > qiăn; #8B74
+譵 > zhùi; #8B75
+譶 > tà; #8B76
+護 > hù; #8B77
+譸 > zhōu; #8B78
+譹 > háo; #8B79
+譺 > yè; #8B7A
+譻 > yīng; #8B7B
+譼 > jiàn; #8B7C
+譽 > yù; #8B7D
+譾 > jiăn; #8B7E
+譿 > hùi; #8B7F
+讀 > dú; #8B80
+讁 > zhé; #8B81
+讂 > xuàn; #8B82
+讃 > zàn; #8B83
+讄 > lĕi; #8B84
+讅 > shĕn; #8B85
+讆 > wèi; #8B86
+讇 > chăn; #8B87
+讈 > lì; #8B88
+讉 > yí; #8B89
+變 > biàn; #8B8A
+讋 > zhé; #8B8B
+讌 > yàn; #8B8C
+讍 > è; #8B8D
+讎 > chóu; #8B8E
+讏 > wèi; #8B8F
+讐 > chóu; #8B90
+讑 > yào; #8B91
+讒 > chán; #8B92
+讓 > ràng; #8B93
+讔 > yĭn; #8B94
+讕 > lán; #8B95
+讖 > chèn; #8B96
+讗 > hùo; #8B97
+讘 > zhé; #8B98
+讙 > huān; #8B99
+讚 > zàn; #8B9A
+讛 > yì; #8B9B
+讜 > dăng; #8B9C
+讝 > zhān; #8B9D
+讞 > yàn; #8B9E
+讟 > dú; #8B9F
+讠 > yán; #8BA0
+计 > jì; #8BA1
+订 > dìng; #8BA2
+讣 > fù; #8BA3
+认 > rèn; #8BA4
+讥 > jī; #8BA5
+讦 > jié; #8BA6
+讧 > hóng; #8BA7
+讨 > tăo; #8BA8
+让 > ràng; #8BA9
+讪 > shàn; #8BAA
+讫 > qì; #8BAB
+讬 > tūo; #8BAC
+训 > xùn; #8BAD
+议 > yì; #8BAE
+讯 > xùn; #8BAF
+记 > jì; #8BB0
+讱 > rèn; #8BB1
+讲 > jiăng; #8BB2
+讳 > hùi; #8BB3
+讴 > ōu; #8BB4
+讵 > jù; #8BB5
+讶 > yà; #8BB6
+讷 > nè; #8BB7
+许 > xŭ; #8BB8
+讹 > é; #8BB9
+论 > lùn; #8BBA
+讻 > xīong; #8BBB
+讼 > sòng; #8BBC
+讽 > fēng; #8BBD
+设 > shè; #8BBE
+访 > făng; #8BBF
+诀 > jué; #8BC0
+证 > zhèng; #8BC1
+诂 > gŭ; #8BC2
+诃 > hē; #8BC3
+评 > píng; #8BC4
+诅 > zŭ; #8BC5
+识 > shì; #8BC6
+诇 > xìong; #8BC7
+诈 > zhà; #8BC8
+诉 > sù; #8BC9
+诊 > zhĕn; #8BCA
+诋 > dĭ; #8BCB
+诌 > zōu; #8BCC
+词 > cí; #8BCD
+诎 > qù; #8BCE
+诏 > zhào; #8BCF
+诐 > bì; #8BD0
+译 > yì; #8BD1
+诒 > yí; #8BD2
+诓 > kuāng; #8BD3
+诔 > lĕi; #8BD4
+试 > shì; #8BD5
+诖 > guà; #8BD6
+诗 > shī; #8BD7
+诘 > jié; #8BD8
+诙 > hūi; #8BD9
+诚 > chéng; #8BDA
+诛 > zhū; #8BDB
+诜 > shēn; #8BDC
+话 > huà; #8BDD
+诞 > dàn; #8BDE
+诟 > gòu; #8BDF
+诠 > quán; #8BE0
+诡 > gŭi; #8BE1
+询 > xún; #8BE2
+诣 > yì; #8BE3
+诤 > zhēng; #8BE4
+该 > gāi; #8BE5
+详 > xiáng; #8BE6
+诧 > chà; #8BE7
+诨 > hùn; #8BE8
+诩 > xŭ; #8BE9
+诪 > zhōu; #8BEA
+诫 > jiè; #8BEB
+诬 > wú; #8BEC
+语 > yŭ; #8BED
+诮 > qiào; #8BEE
+误 > wù; #8BEF
+诰 > gào; #8BF0
+诱 > yòu; #8BF1
+诲 > hùi; #8BF2
+诳 > kuáng; #8BF3
+说 > shūo; #8BF4
+诵 > sòng; #8BF5
+诶 > āi; #8BF6
+请 > qĭng; #8BF7
+诸 > zhū; #8BF8
+诹 > zōu; #8BF9
+诺 > nùo; #8BFA
+读 > dú; #8BFB
+诼 > zhúo; #8BFC
+诽 > fĕi; #8BFD
+课 > kè; #8BFE
+诿 > wĕi; #8BFF
+谀 > yú; #8C00
+谁 > shúi; #8C01
+谂 > shĕn; #8C02
+调 > diào; #8C03
+谄 > chăn; #8C04
+谅 > liàng; #8C05
+谆 > zhūn; #8C06
+谇 > sùi; #8C07
+谈 > tán; #8C08
+谉 > shĕn; #8C09
+谊 > yí; #8C0A
+谋 > móu; #8C0B
+谌 > chén; #8C0C
+谍 > dié; #8C0D
+谎 > huăng; #8C0E
+谏 > jiàn; #8C0F
+谐 > xié; #8C10
+谑 > nǜe; #8C11
+谒 > yè; #8C12
+谓 > wèi; #8C13
+谔 > è; #8C14
+谕 > yù; #8C15
+谖 > xuān; #8C16
+谗 > chán; #8C17
+谘 > zī; #8C18
+谙 > ān; #8C19
+谚 > yàn; #8C1A
+谛 > dì; #8C1B
+谜 > mí; #8C1C
+谝 > pián; #8C1D
+谞 > xŭ; #8C1E
+谟 > mó; #8C1F
+谠 > dăng; #8C20
+谡 > sù; #8C21
+谢 > xiè; #8C22
+谣 > yáo; #8C23
+谤 > bàng; #8C24
+谥 > shì; #8C25
+谦 > qiān; #8C26
+谧 > mì; #8C27
+谨 > jĭn; #8C28
+谩 > mán; #8C29
+谪 > zhé; #8C2A
+谫 > jiăn; #8C2B
+谬 > mìu; #8C2C
+谭 > tán; #8C2D
+谮 > zèn; #8C2E
+谯 > qiáo; #8C2F
+谰 > lán; #8C30
+谱 > pŭ; #8C31
+谲 > jué; #8C32
+谳 > yàn; #8C33
+谴 > qiăn; #8C34
+谵 > zhān; #8C35
+谶 > chèn; #8C36
+谷 > gŭ; #8C37
+谸 > qiān; #8C38
+谹 > hóng; #8C39
+谺 > xiā; #8C3A
+谻 > jué; #8C3B
+谼 > hóng; #8C3C
+谽 > hān; #8C3D
+谾 > hōng; #8C3E
+谿 > xī; #8C3F
+豀 > xī; #8C40
+豁 > hùo; #8C41
+豂 > liáo; #8C42
+豃 > hăn; #8C43
+豄 > dú; #8C44
+豅 > lóng; #8C45
+豆 > dòu; #8C46
+豇 > jiāng; #8C47
+豈 > qĭ; #8C48
+豉 > shì; #8C49
+豊 > lĭ; #8C4A
+豋 > dēng; #8C4B
+豌 > wān; #8C4C
+豍 > bī; #8C4D
+豎 > shù; #8C4E
+豏 > xiàn; #8C4F
+豐 > fēng; #8C50
+豑 > zhì; #8C51
+豒 > zhì; #8C52
+豓 > yàn; #8C53
+豔 > yàn; #8C54
+豕 > shĭ; #8C55
+豖 > chù; #8C56
+豗 > hūi; #8C57
+豘 > tún; #8C58
+豙 > yì; #8C59
+豚 > tún; #8C5A
+豛 > yì; #8C5B
+豜 > jiān; #8C5C
+豝 > bā; #8C5D
+豞 > hòu; #8C5E
+豟 > è; #8C5F
+豠 > cú; #8C60
+象 > xiàng; #8C61
+豢 > huàn; #8C62
+豣 > jiān; #8C63
+豤 > kĕn; #8C64
+豥 > gāi; #8C65
+豦 > qú; #8C66
+豧 > fū; #8C67
+豨 > xī; #8C68
+豩 > bīn; #8C69
+豪 > háo; #8C6A
+豫 > yù; #8C6B
+豬 > zhū; #8C6C
+豭 > jiā; #8C6D
+豯 > xī; #8C6F
+豰 > bó; #8C70
+豱 > wēn; #8C71
+豲 > huán; #8C72
+豳 > bīn; #8C73
+豴 > dí; #8C74
+豵 > zōng; #8C75
+豶 > fén; #8C76
+豷 > yì; #8C77
+豸 > zhì; #8C78
+豹 > bào; #8C79
+豺 > chái; #8C7A
+豻 > hàn; #8C7B
+豼 > pí; #8C7C
+豽 > nà; #8C7D
+豾 > pī; #8C7E
+豿 > gŏu; #8C7F
+貀 > nà; #8C80
+貁 > yòu; #8C81
+貂 > diāo; #8C82
+貃 > mò; #8C83
+貄 > sì; #8C84
+貅 > xīu; #8C85
+貆 > huán; #8C86
+貇 > kūn; #8C87
+貈 > hé; #8C88
+貉 > hé; #8C89
+貊 > mò; #8C8A
+貋 > hàn; #8C8B
+貌 > mào; #8C8C
+貍 > lí; #8C8D
+貎 > ní; #8C8E
+貏 > bĭ; #8C8F
+貐 > yŭ; #8C90
+貑 > jiā; #8C91
+貒 > tuān; #8C92
+貓 > māo; #8C93
+貔 > pí; #8C94
+貕 > xī; #8C95
+貖 > è; #8C96
+貗 > jù; #8C97
+貘 > mò; #8C98
+貙 > chū; #8C99
+貚 > tán; #8C9A
+貛 > huān; #8C9B
+貜 > jué; #8C9C
+貝 > bèi; #8C9D
+貞 > zhēn; #8C9E
+貟 > yuán; #8C9F
+負 > fù; #8CA0
+財 > cái; #8CA1
+貢 > gòng; #8CA2
+貣 > tè; #8CA3
+貤 > yí; #8CA4
+貥 > háng; #8CA5
+貦 > wàn; #8CA6
+貧 > pín; #8CA7
+貨 > hùo; #8CA8
+販 > fàn; #8CA9
+貪 > tān; #8CAA
+貫 > guàn; #8CAB
+責 > zé; #8CAC
+貭 > zhí; #8CAD
+貮 > èr; #8CAE
+貯 > zhŭ; #8CAF
+貰 > shì; #8CB0
+貱 > bì; #8CB1
+貲 > zī; #8CB2
+貳 > èr; #8CB3
+貴 > gùi; #8CB4
+貵 > piăn; #8CB5
+貶 > biăn; #8CB6
+買 > măi; #8CB7
+貸 > dài; #8CB8
+貹 > shèng; #8CB9
+貺 > kuàng; #8CBA
+費 > fèi; #8CBB
+貼 > tiē; #8CBC
+貽 > yí; #8CBD
+貾 > chí; #8CBE
+貿 > mào; #8CBF
+賀 > hè; #8CC0
+賁 > bì; #8CC1
+賂 > lù; #8CC2
+賃 > rèn; #8CC3
+賄 > hùi; #8CC4
+賅 > gāi; #8CC5
+賆 > pián; #8CC6
+資 > zī; #8CC7
+賈 > jiă; #8CC8
+賉 > xù; #8CC9
+賊 > zéi; #8CCA
+賋 > jiăo; #8CCB
+賌 > gài; #8CCC
+賍 > zāng; #8CCD
+賎 > jiàn; #8CCE
+賏 > yìng; #8CCF
+賐 > xùn; #8CD0
+賑 > zhèn; #8CD1
+賒 > shē; #8CD2
+賓 > bīn; #8CD3
+賔 > bīn; #8CD4
+賕 > qíu; #8CD5
+賖 > shē; #8CD6
+賗 > chuàn; #8CD7
+賘 > zāng; #8CD8
+賙 > zhōu; #8CD9
+賚 > lài; #8CDA
+賛 > zàn; #8CDB
+賜 > sì; #8CDC
+賝 > chēn; #8CDD
+賞 > shăng; #8CDE
+賟 > tiăn; #8CDF
+賠 > péi; #8CE0
+賡 > gēng; #8CE1
+賢 > xián; #8CE2
+賣 > mài; #8CE3
+賤 > jiàn; #8CE4
+賥 > sùi; #8CE5
+賦 > fù; #8CE6
+賧 > tàn; #8CE7
+賨 > cóng; #8CE8
+賩 > cóng; #8CE9
+質 > zhí; #8CEA
+賫 > jī; #8CEB
+賬 > zhàng; #8CEC
+賭 > dŭ; #8CED
+賮 > jìn; #8CEE
+賯 > xīong; #8CEF
+賰 > shŭn; #8CF0
+賱 > yŭn; #8CF1
+賲 > băo; #8CF2
+賳 > zāi; #8CF3
+賴 > lài; #8CF4
+賵 > fèng; #8CF5
+賶 > càng; #8CF6
+賷 > jī; #8CF7
+賸 > shèng; #8CF8
+賹 > ài; #8CF9
+賺 > zhuàn; #8CFA
+賻 > fù; #8CFB
+購 > gòu; #8CFC
+賽 > sài; #8CFD
+賾 > zé; #8CFE
+賿 > liáo; #8CFF
+贀 > wèi; #8D00
+贁 > bài; #8D01
+贂 > chĕn; #8D02
+贃 > zhuàn; #8D03
+贄 > zhì; #8D04
+贅 > zhùi; #8D05
+贆 > biāo; #8D06
+贇 > yūn; #8D07
+贈 > zèng; #8D08
+贉 > tăn; #8D09
+贊 > zàn; #8D0A
+贋 > yàn; #8D0B
+贍 > shàn; #8D0D
+贎 > wàn; #8D0E
+贏 > yíng; #8D0F
+贐 > jìn; #8D10
+贑 > găn; #8D11
+贒 > xián; #8D12
+贓 > zāng; #8D13
+贔 > bì; #8D14
+贕 > dú; #8D15
+贖 > shú; #8D16
+贗 > yàn; #8D17
+贙 > xuàn; #8D19
+贚 > lòng; #8D1A
+贛 > gàn; #8D1B
+贜 > zāng; #8D1C
+贝 > bèi; #8D1D
+贞 > zhēn; #8D1E
+负 > fù; #8D1F
+贠 > yuán; #8D20
+贡 > gòng; #8D21
+财 > cái; #8D22
+责 > zé; #8D23
+贤 > xián; #8D24
+败 > bài; #8D25
+账 > zhàng; #8D26
+货 > hùo; #8D27
+质 > zhí; #8D28
+贩 > fàn; #8D29
+贪 > tān; #8D2A
+贫 > pín; #8D2B
+贬 > biăn; #8D2C
+购 > gòu; #8D2D
+贮 > zhŭ; #8D2E
+贯 > guàn; #8D2F
+贰 > èr; #8D30
+贱 > jiàn; #8D31
+贲 > bì; #8D32
+贳 > shì; #8D33
+贴 > tiē; #8D34
+贵 > gùi; #8D35
+贶 > kuàng; #8D36
+贷 > dài; #8D37
+贸 > mào; #8D38
+费 > fèi; #8D39
+贺 > hè; #8D3A
+贻 > yí; #8D3B
+贼 > zéi; #8D3C
+贽 > zhì; #8D3D
+贾 > jiă; #8D3E
+贿 > hùi; #8D3F
+赀 > zī; #8D40
+赁 > rèn; #8D41
+赂 > lù; #8D42
+赃 > zāng; #8D43
+资 > zī; #8D44
+赅 > gāi; #8D45
+赆 > jìn; #8D46
+赇 > qíu; #8D47
+赈 > zhèn; #8D48
+赉 > lài; #8D49
+赊 > shē; #8D4A
+赋 > fù; #8D4B
+赌 > dŭ; #8D4C
+赍 > jī; #8D4D
+赎 > shú; #8D4E
+赏 > shăng; #8D4F
+赐 > sì; #8D50
+赑 > bì; #8D51
+赒 > zhōu; #8D52
+赓 > gēng; #8D53
+赔 > péi; #8D54
+赕 > tàn; #8D55
+赖 > lài; #8D56
+赗 > fèng; #8D57
+赘 > zhùi; #8D58
+赙 > fù; #8D59
+赚 > zhuàn; #8D5A
+赛 > sài; #8D5B
+赜 > zé; #8D5C
+赝 > yàn; #8D5D
+赞 > zàn; #8D5E
+赟 > yūn; #8D5F
+赠 > zèng; #8D60
+赡 > shàn; #8D61
+赢 > yíng; #8D62
+赣 > gàn; #8D63
+赤 > chì; #8D64
+赥 > xì; #8D65
+赦 > shè; #8D66
+赧 > năn; #8D67
+赨 > xíong; #8D68
+赩 > xì; #8D69
+赪 > chēng; #8D6A
+赫 > hè; #8D6B
+赬 > chēng; #8D6C
+赭 > zhĕ; #8D6D
+赮 > xiá; #8D6E
+赯 > táng; #8D6F
+走 > zŏu; #8D70
+赱 > zŏu; #8D71
+赲 > lì; #8D72
+赳 > jĭu; #8D73
+赴 > fù; #8D74
+赵 > zhào; #8D75
+赶 > găn; #8D76
+起 > qĭ; #8D77
+赸 > shàn; #8D78
+赹 > qíong; #8D79
+赺 > qín; #8D7A
+赻 > xiăn; #8D7B
+赼 > cī; #8D7C
+赽 > jué; #8D7D
+赾 > qĭn; #8D7E
+赿 > chí; #8D7F
+趀 > cī; #8D80
+趁 > chèn; #8D81
+趂 > chèn; #8D82
+趃 > dié; #8D83
+趄 > jū; #8D84
+超 > chāo; #8D85
+趆 > dī; #8D86
+趇 > sè; #8D87
+趈 > zhān; #8D88
+趉 > zhú; #8D89
+越 > yuè; #8D8A
+趋 > qū; #8D8B
+趌 > jié; #8D8C
+趍 > chí; #8D8D
+趎 > chú; #8D8E
+趏 > guā; #8D8F
+趐 > xuè; #8D90
+趑 > cī; #8D91
+趒 > tiáo; #8D92
+趓 > dŭo; #8D93
+趔 > liè; #8D94
+趕 > găn; #8D95
+趖 > sūo; #8D96
+趗 > cù; #8D97
+趘 > xí; #8D98
+趙 > zhào; #8D99
+趚 > sù; #8D9A
+趛 > yĭn; #8D9B
+趜 > jú; #8D9C
+趝 > jiàn; #8D9D
+趞 > què; #8D9E
+趟 > tàng; #8D9F
+趠 > chùo; #8DA0
+趡 > cŭi; #8DA1
+趢 > lù; #8DA2
+趣 > qù; #8DA3
+趤 > dàng; #8DA4
+趥 > qīu; #8DA5
+趦 > zī; #8DA6
+趧 > tí; #8DA7
+趨 > qū; #8DA8
+趩 > chì; #8DA9
+趪 > huáng; #8DAA
+趫 > qiáo; #8DAB
+趬 > qiáo; #8DAC
+趭 > yào; #8DAD
+趮 > zào; #8DAE
+趯 > tì; #8DAF
+趱 > zăn; #8DB1
+趲 > zăn; #8DB2
+足 > zú; #8DB3
+趴 > pā; #8DB4
+趵 > bào; #8DB5
+趶 > kù; #8DB6
+趷 > kē; #8DB7
+趸 > dŭn; #8DB8
+趹 > jué; #8DB9
+趺 > fū; #8DBA
+趻 > chĕn; #8DBB
+趼 > jiăn; #8DBC
+趽 > fàng; #8DBD
+趾 > zhĭ; #8DBE
+趿 > sà; #8DBF
+跀 > yuè; #8DC0
+跁 > pá; #8DC1
+跂 > qí; #8DC2
+跃 > yuè; #8DC3
+跄 > qiāng; #8DC4
+跅 > tùo; #8DC5
+跆 > tái; #8DC6
+跇 > yì; #8DC7
+跈 > niăn; #8DC8
+跉 > líng; #8DC9
+跊 > mèi; #8DCA
+跋 > bá; #8DCB
+跌 > diē; #8DCC
+跍 > kū; #8DCD
+跎 > túo; #8DCE
+跏 > jiā; #8DCF
+跐 > cĭ; #8DD0
+跑 > păo; #8DD1
+跒 > qiă; #8DD2
+跓 > zhù; #8DD3
+跔 > jū; #8DD4
+跕 > dié; #8DD5
+跖 > zhī; #8DD6
+跗 > fū; #8DD7
+跘 > pán; #8DD8
+跙 > jŭ; #8DD9
+跚 > shān; #8DDA
+跛 > bŏ; #8DDB
+跜 > ní; #8DDC
+距 > jù; #8DDD
+跞 > lì; #8DDE
+跟 > gēn; #8DDF
+跠 > yí; #8DE0
+跡 > jī; #8DE1
+跢 > dài; #8DE2
+跣 > xiăn; #8DE3
+跤 > jiāo; #8DE4
+跥 > dùo; #8DE5
+跦 > zhū; #8DE6
+跧 > zhuān; #8DE7
+跨 > kuà; #8DE8
+跩 > zhuăi; #8DE9
+跪 > gùi; #8DEA
+跫 > qíong; #8DEB
+跬 > kŭi; #8DEC
+跭 > xiáng; #8DED
+跮 > chì; #8DEE
+路 > lù; #8DEF
+跰 > bèng; #8DF0
+跱 > zhì; #8DF1
+跲 > jiá; #8DF2
+跳 > tiào; #8DF3
+跴 > căi; #8DF4
+践 > jiàn; #8DF5
+跶 > tà; #8DF6
+跷 > qiāo; #8DF7
+跸 > bì; #8DF8
+跹 > xiān; #8DF9
+跺 > dùo; #8DFA
+跻 > jī; #8DFB
+跼 > jú; #8DFC
+跽 > jì; #8DFD
+跾 > shú; #8DFE
+跿 > tú; #8DFF
+踀 > chù; #8E00
+踁 > jìng; #8E01
+踂 > niè; #8E02
+踃 > xiāo; #8E03
+踄 > bó; #8E04
+踅 > chì; #8E05
+踆 > qūn; #8E06
+踇 > mŏu; #8E07
+踈 > shū; #8E08
+踉 > láng; #8E09
+踊 > yŏng; #8E0A
+踋 > jiăo; #8E0B
+踌 > chóu; #8E0C
+踍 > qiāo; #8E0D
+踏 > tà; #8E0F
+踐 > jiàn; #8E10
+踑 > qí; #8E11
+踒 > wō; #8E12
+踓 > wĕi; #8E13
+踔 > zhúo; #8E14
+踕 > jié; #8E15
+踖 > jí; #8E16
+踗 > niē; #8E17
+踘 > jú; #8E18
+踙 > jū; #8E19
+踚 > lún; #8E1A
+踛 > lù; #8E1B
+踜 > lèng; #8E1C
+踝 > huái; #8E1D
+踞 > jù; #8E1E
+踟 > chí; #8E1F
+踠 > wăn; #8E20
+踡 > quán; #8E21
+踢 > tī; #8E22
+踣 > bó; #8E23
+踤 > zú; #8E24
+踥 > qiè; #8E25
+踦 > jĭ; #8E26
+踧 > cù; #8E27
+踨 > zōng; #8E28
+踩 > căi; #8E29
+踪 > zōng; #8E2A
+踫 > pèng; #8E2B
+踬 > zhì; #8E2C
+踭 > zhēng; #8E2D
+踮 > diăn; #8E2E
+踯 > zhí; #8E2F
+踰 > yú; #8E30
+踱 > dùo; #8E31
+踲 > dùn; #8E32
+踳 > chŭn; #8E33
+踴 > yŏng; #8E34
+踵 > zhŏng; #8E35
+踶 > dì; #8E36
+踷 > zhĕ; #8E37
+踸 > chĕn; #8E38
+踹 > chuài; #8E39
+踺 > jiàn; #8E3A
+踻 > guā; #8E3B
+踼 > táng; #8E3C
+踽 > jŭ; #8E3D
+踾 > fú; #8E3E
+踿 > zú; #8E3F
+蹀 > dié; #8E40
+蹁 > pián; #8E41
+蹂 > róu; #8E42
+蹃 > nùo; #8E43
+蹄 > tí; #8E44
+蹅 > chă; #8E45
+蹆 > tŭi; #8E46
+蹇 > jiăn; #8E47
+蹈 > dào; #8E48
+蹉 > cūo; #8E49
+蹊 > xī; #8E4A
+蹋 > tà; #8E4B
+蹌 > qiāng; #8E4C
+蹍 > zhăn; #8E4D
+蹎 > diān; #8E4E
+蹏 > tí; #8E4F
+蹐 > jí; #8E50
+蹑 > niè; #8E51
+蹒 > mán; #8E52
+蹓 > līu; #8E53
+蹔 > zhàn; #8E54
+蹕 > bì; #8E55
+蹖 > chōng; #8E56
+蹗 > lù; #8E57
+蹘 > liáo; #8E58
+蹙 > cù; #8E59
+蹚 > tāng; #8E5A
+蹛 > dài; #8E5B
+蹜 > sūo; #8E5C
+蹝 > xĭ; #8E5D
+蹞 > kŭi; #8E5E
+蹟 > jī; #8E5F
+蹠 > zhí; #8E60
+蹡 > qiāng; #8E61
+蹢 > dí; #8E62
+蹣 > mán; #8E63
+蹤 > zōng; #8E64
+蹥 > lián; #8E65
+蹦 > bèng; #8E66
+蹧 > zāo; #8E67
+蹨 > niăn; #8E68
+蹩 > bié; #8E69
+蹪 > túi; #8E6A
+蹫 > jú; #8E6B
+蹬 > dèng; #8E6C
+蹭 > cèng; #8E6D
+蹮 > xiān; #8E6E
+蹯 > fán; #8E6F
+蹰 > chú; #8E70
+蹱 > zhōng; #8E71
+蹲 > dūn; #8E72
+蹳 > bō; #8E73
+蹴 > cù; #8E74
+蹵 > zú; #8E75
+蹶 > jué; #8E76
+蹷 > jué; #8E77
+蹸 > lìn; #8E78
+蹹 > tà; #8E79
+蹺 > qiāo; #8E7A
+蹻 > qiāo; #8E7B
+蹼 > pú; #8E7C
+蹽 > liāo; #8E7D
+蹾 > dūn; #8E7E
+蹿 > cuān; #8E7F
+躀 > kuàng; #8E80
+躁 > zào; #8E81
+躂 > tà; #8E82
+躃 > bì; #8E83
+躄 > bì; #8E84
+躅 > zhú; #8E85
+躆 > jù; #8E86
+躇 > chú; #8E87
+躈 > qiào; #8E88
+躉 > dŭn; #8E89
+躊 > chóu; #8E8A
+躋 > jī; #8E8B
+躌 > wŭ; #8E8C
+躍 > yuè; #8E8D
+躎 > niăn; #8E8E
+躏 > lìn; #8E8F
+躐 > liè; #8E90
+躑 > zhí; #8E91
+躒 > lì; #8E92
+躓 > zhì; #8E93
+躔 > chán; #8E94
+躕 > chú; #8E95
+躖 > duàn; #8E96
+躗 > wèi; #8E97
+躘 > lóng; #8E98
+躙 > lìn; #8E99
+躚 > xiān; #8E9A
+躛 > wèi; #8E9B
+躜 > zuān; #8E9C
+躝 > lán; #8E9D
+躞 > xiè; #8E9E
+躟 > ráng; #8E9F
+躠 > xiĕ; #8EA0
+躡 > niè; #8EA1
+躢 > tà; #8EA2
+躣 > qú; #8EA3
+躤 > jiè; #8EA4
+躥 > cuān; #8EA5
+躦 > zuān; #8EA6
+躧 > xĭ; #8EA7
+躨 > kúi; #8EA8
+躩 > jué; #8EA9
+躪 > lìn; #8EAA
+身 > shēn; #8EAB
+躬 > gōng; #8EAC
+躭 > dān; #8EAD
+躯 > qū; #8EAF
+躰 > tĭ; #8EB0
+躱 > dŭo; #8EB1
+躲 > dŭo; #8EB2
+躳 > gōng; #8EB3
+躴 > láng; #8EB4
+躶 > lŭo; #8EB6
+躷 > ăi; #8EB7
+躸 > jī; #8EB8
+躹 > jú; #8EB9
+躺 > tăng; #8EBA
+躽 > yăn; #8EBD
+躿 > kāng; #8EBF
+軀 > qū; #8EC0
+軁 > lóu; #8EC1
+軂 > lào; #8EC2
+軃 > tŭo; #8EC3
+軄 > zhí; #8EC4
+軆 > tĭ; #8EC6
+軇 > dào; #8EC7
+軉 > yù; #8EC9
+車 > chē; #8ECA
+軋 > yà; #8ECB
+軌 > gŭi; #8ECC
+軍 > jūn; #8ECD
+軎 > wèi; #8ECE
+軏 > yuè; #8ECF
+軐 > xìn; #8ED0
+軑 > dì; #8ED1
+軒 > xuān; #8ED2
+軓 > fàn; #8ED3
+軔 > rèn; #8ED4
+軕 > shān; #8ED5
+軖 > qiáng; #8ED6
+軗 > shū; #8ED7
+軘 > tún; #8ED8
+軙 > chén; #8ED9
+軚 > dài; #8EDA
+軛 > è; #8EDB
+軜 > nà; #8EDC
+軝 > qí; #8EDD
+軞 > máo; #8EDE
+軟 > ruăn; #8EDF
+軠 > rèn; #8EE0
+軡 > făn; #8EE1
+転 > zhuăn; #8EE2
+軣 > hōng; #8EE3
+軤 > hū; #8EE4
+軥 > qú; #8EE5
+軦 > huàng; #8EE6
+軧 > dĭ; #8EE7
+軨 > líng; #8EE8
+軩 > dài; #8EE9
+軪 > āo; #8EEA
+軫 > zhĕn; #8EEB
+軬 > fàn; #8EEC
+軭 > kuāng; #8EED
+軮 > ăng; #8EEE
+軯 > pēng; #8EEF
+軰 > bèi; #8EF0
+軱 > gū; #8EF1
+軲 > kū; #8EF2
+軳 > páo; #8EF3
+軴 > zhù; #8EF4
+軵 > rŏng; #8EF5
+軶 > è; #8EF6
+軷 > bá; #8EF7
+軸 > zhóu; #8EF8
+軹 > zhĭ; #8EF9
+軺 > yáo; #8EFA
+軻 > kē; #8EFB
+軼 > yì; #8EFC
+軽 > qīng; #8EFD
+軾 > shì; #8EFE
+軿 > píng; #8EFF
+輀 > ér; #8F00
+輁 > qíong; #8F01
+輂 > jú; #8F02
+較 > jiào; #8F03
+輄 > guāng; #8F04
+輅 > lù; #8F05
+輆 > kăi; #8F06
+輇 > quán; #8F07
+輈 > zhōu; #8F08
+載 > zài; #8F09
+輊 > zhì; #8F0A
+輋 > shē; #8F0B
+輌 > liàng; #8F0C
+輍 > yù; #8F0D
+輎 > shāo; #8F0E
+輏 > yóu; #8F0F
+輐 > huăn; #8F10
+輑 > yŭn; #8F11
+輒 > zhé; #8F12
+輓 > wăn; #8F13
+輔 > fŭ; #8F14
+輕 > qīng; #8F15
+輖 > zhōu; #8F16
+輗 > ní; #8F17
+輘 > líng; #8F18
+輙 > zhé; #8F19
+輚 > zhàn; #8F1A
+輛 > liàng; #8F1B
+輜 > zī; #8F1C
+輝 > hūi; #8F1D
+輞 > wăng; #8F1E
+輟 > chùo; #8F1F
+輠 > gŭo; #8F20
+輡 > kăn; #8F21
+輢 > yĭ; #8F22
+輣 > péng; #8F23
+輤 > qiàn; #8F24
+輥 > gŭn; #8F25
+輦 > niăn; #8F26
+輧 > pián; #8F27
+輨 > guăn; #8F28
+輩 > bèi; #8F29
+輪 > lún; #8F2A
+輫 > pái; #8F2B
+輬 > liáng; #8F2C
+輭 > ruăn; #8F2D
+輮 > róu; #8F2E
+輯 > jí; #8F2F
+輰 > yáng; #8F30
+輱 > xián; #8F31
+輲 > chuán; #8F32
+輳 > còu; #8F33
+輴 > qūn; #8F34
+輵 > gé; #8F35
+輶 > yóu; #8F36
+輷 > hōng; #8F37
+輸 > shū; #8F38
+輹 > fù; #8F39
+輺 > zī; #8F3A
+輻 > fú; #8F3B
+輼 > wēn; #8F3C
+輽 > bèn; #8F3D
+輾 > zhăn; #8F3E
+輿 > yú; #8F3F
+轀 > wēn; #8F40
+轁 > tāo; #8F41
+轂 > gŭ; #8F42
+轃 > zhēn; #8F43
+轄 > xiá; #8F44
+轅 > yuán; #8F45
+轆 > lù; #8F46
+轇 > jīu; #8F47
+轈 > cháo; #8F48
+轉 > zhuăn; #8F49
+轊 > wèi; #8F4A
+轋 > hún; #8F4B
+轍 > chè; #8F4D
+轎 > jiào; #8F4E
+轏 > zhàn; #8F4F
+轐 > pú; #8F50
+轑 > lăo; #8F51
+轒 > fén; #8F52
+轓 > fān; #8F53
+轔 > lín; #8F54
+轕 > gé; #8F55
+轖 > sè; #8F56
+轗 > kăn; #8F57
+轘 > huàn; #8F58
+轙 > yĭ; #8F59
+轚 > jí; #8F5A
+轛 > dùi; #8F5B
+轜 > ér; #8F5C
+轝 > yú; #8F5D
+轞 > xiàn; #8F5E
+轟 > hōng; #8F5F
+轠 > lĕi; #8F60
+轡 > pèi; #8F61
+轢 > lì; #8F62
+轣 > lì; #8F63
+轤 > lú; #8F64
+轥 > lìn; #8F65
+车 > chē; #8F66
+轧 > yà; #8F67
+轨 > gŭi; #8F68
+轩 > xuān; #8F69
+轪 > dì; #8F6A
+轫 > rèn; #8F6B
+转 > zhuăn; #8F6C
+轭 > è; #8F6D
+轮 > lún; #8F6E
+软 > ruăn; #8F6F
+轰 > hōng; #8F70
+轱 > kū; #8F71
+轲 > kē; #8F72
+轳 > lú; #8F73
+轴 > zhóu; #8F74
+轵 > zhĭ; #8F75
+轶 > yì; #8F76
+轷 > hū; #8F77
+轸 > zhĕn; #8F78
+轹 > lì; #8F79
+轺 > yáo; #8F7A
+轻 > qīng; #8F7B
+轼 > shì; #8F7C
+载 > zài; #8F7D
+轾 > zhì; #8F7E
+轿 > jiào; #8F7F
+辀 > zhōu; #8F80
+辁 > quán; #8F81
+辂 > lù; #8F82
+较 > jiào; #8F83
+辄 > zhé; #8F84
+辅 > fŭ; #8F85
+辆 > liàng; #8F86
+辇 > niăn; #8F87
+辈 > bèi; #8F88
+辉 > hūi; #8F89
+辊 > gŭn; #8F8A
+辋 > wăng; #8F8B
+辌 > liáng; #8F8C
+辍 > chùo; #8F8D
+辎 > zī; #8F8E
+辏 > còu; #8F8F
+辐 > fú; #8F90
+辑 > jí; #8F91
+辒 > wēn; #8F92
+输 > shū; #8F93
+辔 > pèi; #8F94
+辕 > yuán; #8F95
+辖 > xiá; #8F96
+辗 > zhăn; #8F97
+辘 > lù; #8F98
+辙 > chè; #8F99
+辚 > lín; #8F9A
+辛 > xīn; #8F9B
+辜 > gū; #8F9C
+辝 > cí; #8F9D
+辞 > cí; #8F9E
+辟 > pì; #8F9F
+辠 > zùi; #8FA0
+辡 > biàn; #8FA1
+辢 > là; #8FA2
+辣 > là; #8FA3
+辤 > cí; #8FA4
+辥 > xuē; #8FA5
+辦 > bàn; #8FA6
+辧 > biàn; #8FA7
+辨 > biàn; #8FA8
+辩 > biàn; #8FA9
+辫 > biàn; #8FAB
+辬 > bān; #8FAC
+辭 > cí; #8FAD
+辮 > biàn; #8FAE
+辯 > biàn; #8FAF
+辰 > chén; #8FB0
+辱 > rù; #8FB1
+農 > nóng; #8FB2
+辳 > nóng; #8FB3
+辴 > zhĕn; #8FB4
+辵 > chùo; #8FB5
+辶 > chùo; #8FB6
+辸 > réng; #8FB8
+边 > biān; #8FB9
+辺 > biān; #8FBA
+辽 > liáo; #8FBD
+达 > dá; #8FBE
+辿 > chān; #8FBF
+迀 > gān; #8FC0
+迁 > qiān; #8FC1
+迂 > yū; #8FC2
+迃 > yū; #8FC3
+迄 > qì; #8FC4
+迅 > xùn; #8FC5
+迆 > yĭ; #8FC6
+过 > gùo; #8FC7
+迈 > mài; #8FC8
+迉 > qí; #8FC9
+迊 > zā; #8FCA
+迋 > wàng; #8FCB
+迌 > jia; #8FCC
+迍 > zhūn; #8FCD
+迎 > yíng; #8FCE
+迏 > tì; #8FCF
+运 > yùn; #8FD0
+近 > jìn; #8FD1
+迒 > háng; #8FD2
+迓 > yà; #8FD3
+返 > făn; #8FD4
+迕 > wù; #8FD5
+迖 > dá; #8FD6
+迗 > é; #8FD7
+还 > huán; #8FD8
+这 > zhè; #8FD9
+进 > jìn; #8FDB
+远 > yuăn; #8FDC
+违 > wéi; #8FDD
+连 > lián; #8FDE
+迟 > chí; #8FDF
+迠 > chè; #8FE0
+迡 > nì; #8FE1
+迢 > tiáo; #8FE2
+迣 > zhì; #8FE3
+迤 > yĭ; #8FE4
+迥 > jĭong; #8FE5
+迦 > jiā; #8FE6
+迧 > chén; #8FE7
+迨 > dài; #8FE8
+迩 > ĕr; #8FE9
+迪 > dí; #8FEA
+迫 > pò; #8FEB
+迬 > wăng; #8FEC
+迭 > dié; #8FED
+迮 > zé; #8FEE
+迯 > táo; #8FEF
+述 > shù; #8FF0
+迱 > túo; #8FF1
+迳 > jìng; #8FF3
+迴 > húi; #8FF4
+迵 > tóng; #8FF5
+迶 > yòu; #8FF6
+迷 > mí; #8FF7
+迸 > bèng; #8FF8
+迹 > jī; #8FF9
+迺 > năi; #8FFA
+迻 > yí; #8FFB
+迼 > jié; #8FFC
+追 > zhūi; #8FFD
+迾 > liè; #8FFE
+迿 > xùn; #8FFF
+退 > tùi; #9000
+送 > sòng; #9001
+适 > guā; #9002
+逃 > táo; #9003
+逄 > páng; #9004
+逅 > hòu; #9005
+逆 > nì; #9006
+逇 > dùn; #9007
+逈 > jĭong; #9008
+选 > xuăn; #9009
+逊 > xùn; #900A
+逋 > bū; #900B
+逌 > yóu; #900C
+逍 > xiāo; #900D
+逎 > qíu; #900E
+透 > tòu; #900F
+逐 > zhú; #9010
+逑 > qíu; #9011
+递 > dì; #9012
+逓 > dì; #9013
+途 > tú; #9014
+逕 > jìng; #9015
+逖 > tì; #9016
+逗 > dòu; #9017
+逘 > yĭ; #9018
+這 > zhè; #9019
+通 > tōng; #901A
+逛 > guàng; #901B
+逜 > wù; #901C
+逝 > shì; #901D
+逞 > chĕng; #901E
+速 > sù; #901F
+造 > zào; #9020
+逡 > qūn; #9021
+逢 > féng; #9022
+連 > lián; #9023
+逤 > sùo; #9024
+逥 > húi; #9025
+逦 > lĭ; #9026
+逨 > lái; #9028
+逩 > bèn; #9029
+逪 > cùo; #902A
+逫 > jué; #902B
+逬 > bèng; #902C
+逭 > huàn; #902D
+逮 > dài; #902E
+逯 > lù; #902F
+逰 > yóu; #9030
+週 > zhōu; #9031
+進 > jìn; #9032
+逳 > yù; #9033
+逴 > chùo; #9034
+逵 > kúi; #9035
+逶 > wēi; #9036
+逷 > tì; #9037
+逸 > yì; #9038
+逹 > dá; #9039
+逺 > yuăn; #903A
+逻 > lúo; #903B
+逼 > bī; #903C
+逽 > nùo; #903D
+逾 > yú; #903E
+逿 > dàng; #903F
+遀 > súi; #9040
+遁 > dùn; #9041
+遂 > sùi; #9042
+遃 > yăn; #9043
+遄 > chuán; #9044
+遅 > chí; #9045
+遆 > tí; #9046
+遇 > yù; #9047
+遈 > shí; #9048
+遉 > zhēn; #9049
+遊 > yóu; #904A
+運 > yùn; #904B
+遌 > è; #904C
+遍 > biàn; #904D
+過 > gùo; #904E
+遏 > è; #904F
+遐 > xiá; #9050
+遑 > huáng; #9051
+遒 > qíu; #9052
+道 > dào; #9053
+達 > dá; #9054
+違 > wéi; #9055
+遗 > yí; #9057
+遘 > gòu; #9058
+遙 > yáo; #9059
+遚 > chù; #905A
+遛 > líu; #905B
+遜 > xùn; #905C
+遝 > tà; #905D
+遞 > dì; #905E
+遟 > chí; #905F
+遠 > yuăn; #9060
+遡 > sù; #9061
+遢 > tà; #9062
+遣 > qiăn; #9063
+遥 > yáo; #9065
+遦 > guàn; #9066
+遧 > zhāng; #9067
+遨 > áo; #9068
+適 > shì; #9069
+遪 > cè; #906A
+遫 > chì; #906B
+遬 > sù; #906C
+遭 > zāo; #906D
+遮 > zhē; #906E
+遯 > dùn; #906F
+遰 > dì; #9070
+遱 > lóu; #9071
+遲 > chí; #9072
+遳 > cūo; #9073
+遴 > lín; #9074
+遵 > zūn; #9075
+遶 > rào; #9076
+遷 > qiān; #9077
+選 > xuăn; #9078
+遹 > yù; #9079
+遺 > yí; #907A
+遻 > wù; #907B
+遼 > liáo; #907C
+遽 > jù; #907D
+遾 > shì; #907E
+避 > bì; #907F
+邀 > yāo; #9080
+邁 > mài; #9081
+邂 > xiè; #9082
+邃 > sùi; #9083
+還 > huán; #9084
+邅 > zhān; #9085
+邆 > téng; #9086
+邇 > ĕr; #9087
+邈 > miăo; #9088
+邉 > biān; #9089
+邊 > biān; #908A
+邋 > lá; #908B
+邌 > lí; #908C
+邍 > yuán; #908D
+邎 > yáo; #908E
+邏 > lúo; #908F
+邐 > lĭ; #9090
+邑 > yì; #9091
+邒 > tíng; #9092
+邓 > dèng; #9093
+邔 > qĭ; #9094
+邕 > yōng; #9095
+邖 > shān; #9096
+邗 > hán; #9097
+邘 > yú; #9098
+邙 > máng; #9099
+邚 > rú; #909A
+邛 > qíong; #909B
+邝 > kuàng; #909D
+邞 > fū; #909E
+邟 > kàng; #909F
+邠 > bīn; #90A0
+邡 > fāng; #90A1
+邢 > xíng; #90A2
+那 > nà; #90A3
+邤 > xin; #90A4
+邥 > shĕn; #90A5
+邦 > bāng; #90A6
+邧 > yuán; #90A7
+邨 > cūn; #90A8
+邩 > hŭo; #90A9
+邪 > xié; #90AA
+邫 > bāng; #90AB
+邬 > wū; #90AC
+邭 > jù; #90AD
+邮 > yóu; #90AE
+邯 > hán; #90AF
+邰 > tái; #90B0
+邱 > qīu; #90B1
+邲 > bì; #90B2
+邳 > péi; #90B3
+邴 > bĭng; #90B4
+邵 > shào; #90B5
+邶 > bèi; #90B6
+邷 > wă; #90B7
+邸 > dĭ; #90B8
+邹 > zōu; #90B9
+邺 > yè; #90BA
+邻 > lín; #90BB
+邼 > kuāng; #90BC
+邽 > gūi; #90BD
+邾 > zhū; #90BE
+邿 > shī; #90BF
+郀 > kū; #90C0
+郁 > yù; #90C1
+郂 > gāi; #90C2
+郃 > gé; #90C3
+郄 > xì; #90C4
+郅 > zhì; #90C5
+郆 > jí; #90C6
+郇 > xún; #90C7
+郈 > hòu; #90C8
+郉 > xíng; #90C9
+郊 > jiāo; #90CA
+郋 > xí; #90CB
+郌 > gūi; #90CC
+郍 > núo; #90CD
+郎 > láng; #90CE
+郏 > jiá; #90CF
+郐 > kuài; #90D0
+郑 > zhèng; #90D1
+郓 > yùn; #90D3
+郔 > yán; #90D4
+郕 > chéng; #90D5
+郖 > dōu; #90D6
+郗 > chī; #90D7
+郘 > lǚ; #90D8
+郙 > fŭ; #90D9
+郚 > wú; #90DA
+郛 > fú; #90DB
+郜 > gào; #90DC
+郝 > hăo; #90DD
+郞 > láng; #90DE
+郟 > jiá; #90DF
+郠 > gĕng; #90E0
+郡 > jùn; #90E1
+郢 > yĭng; #90E2
+郣 > bó; #90E3
+郤 > xì; #90E4
+郥 > bèi; #90E5
+郦 > lì; #90E6
+郧 > yún; #90E7
+部 > bù; #90E8
+郩 > xiáo; #90E9
+郪 > qī; #90EA
+郫 > pí; #90EB
+郬 > qīng; #90EC
+郭 > gūo; #90ED
+郮 > zhou; #90EE
+郯 > tán; #90EF
+郰 > zōu; #90F0
+郱 > píng; #90F1
+郲 > lái; #90F2
+郳 > ní; #90F3
+郴 > chēn; #90F4
+郵 > yóu; #90F5
+郶 > bù; #90F6
+郷 > xiāng; #90F7
+郸 > dān; #90F8
+郹 > jú; #90F9
+郺 > yōng; #90FA
+郻 > qiāo; #90FB
+郼 > yī; #90FC
+都 > dū; #90FD
+郾 > yăn; #90FE
+郿 > méi; #90FF
+鄀 > rùo; #9100
+鄁 > bèi; #9101
+鄂 > è; #9102
+鄃 > yú; #9103
+鄄 > juàn; #9104
+鄅 > yŭ; #9105
+鄆 > yùn; #9106
+鄇 > hòu; #9107
+鄈 > kúi; #9108
+鄉 > xiāng; #9109
+鄊 > xiāng; #910A
+鄋 > sōu; #910B
+鄌 > táng; #910C
+鄍 > míng; #910D
+鄎 > xì; #910E
+鄏 > rù; #910F
+鄐 > chù; #9110
+鄑 > zī; #9111
+鄒 > zōu; #9112
+鄓 > jú; #9113
+鄔 > wū; #9114
+鄕 > xiāng; #9115
+鄖 > yún; #9116
+鄗 > hào; #9117
+鄘 > yōng; #9118
+鄙 > bĭ; #9119
+鄚 > mò; #911A
+鄛 > cháo; #911B
+鄜 > fū; #911C
+鄝 > liăo; #911D
+鄞 > yín; #911E
+鄟 > zhuān; #911F
+鄠 > hù; #9120
+鄡 > qiāo; #9121
+鄢 > yān; #9122
+鄣 > zhāng; #9123
+鄤 > fàn; #9124
+鄥 > qiāo; #9125
+鄦 > xŭ; #9126
+鄧 > dèng; #9127
+鄨 > bì; #9128
+鄩 > xín; #9129
+鄪 > bì; #912A
+鄫 > céng; #912B
+鄬 > wéi; #912C
+鄭 > zhèng; #912D
+鄮 > mào; #912E
+鄯 > shàn; #912F
+鄰 > lín; #9130
+鄱 > pó; #9131
+鄲 > dān; #9132
+鄳 > méng; #9133
+鄴 > yè; #9134
+鄵 > cāo; #9135
+鄶 > kuài; #9136
+鄷 > fēng; #9137
+鄸 > méng; #9138
+鄹 > zōu; #9139
+鄺 > kuàng; #913A
+鄻 > lián; #913B
+鄼 > zàn; #913C
+鄽 > chán; #913D
+鄾 > yōu; #913E
+鄿 > qí; #913F
+酀 > yān; #9140
+酁 > chán; #9141
+酂 > zàn; #9142
+酃 > líng; #9143
+酄 > huān; #9144
+酅 > xī; #9145
+酆 > fēng; #9146
+酇 > zàn; #9147
+酈 > lì; #9148
+酉 > yŏu; #9149
+酊 > dĭng; #914A
+酋 > qíu; #914B
+酌 > zhúo; #914C
+配 > pèi; #914D
+酎 > zhòu; #914E
+酏 > yí; #914F
+酐 > hăng; #9150
+酑 > yŭ; #9151
+酒 > jĭu; #9152
+酓 > yăn; #9153
+酔 > zùi; #9154
+酕 > máo; #9155
+酖 > dān; #9156
+酗 > xù; #9157
+酘 > tóu; #9158
+酙 > zhēn; #9159
+酚 > fēn; #915A
+酝 > yùn; #915D
+酞 > tài; #915E
+酟 > tiān; #915F
+酠 > qiă; #9160
+酡 > túo; #9161
+酢 > zùo; #9162
+酣 > hān; #9163
+酤 > gū; #9164
+酥 > sū; #9165
+酦 > pò; #9166
+酧 > chóu; #9167
+酨 > zài; #9168
+酩 > míng; #9169
+酪 > lùo; #916A
+酫 > chùo; #916B
+酬 > chóu; #916C
+酭 > yòu; #916D
+酮 > tóng; #916E
+酯 > zhĭ; #916F
+酰 > xiān; #9170
+酱 > jiàng; #9171
+酲 > chéng; #9172
+酳 > yìn; #9173
+酴 > tú; #9174
+酵 > xiào; #9175
+酶 > méi; #9176
+酷 > kù; #9177
+酸 > suān; #9178
+酹 > lèi; #9179
+酺 > pú; #917A
+酻 > zùi; #917B
+酼 > hăi; #917C
+酽 > yàn; #917D
+酾 > xĭ; #917E
+酿 > niàng; #917F
+醀 > wéi; #9180
+醁 > lù; #9181
+醂 > lăn; #9182
+醃 > yān; #9183
+醄 > táo; #9184
+醅 > pēi; #9185
+醆 > zhăn; #9186
+醇 > chún; #9187
+醈 > tán; #9188
+醉 > zùi; #9189
+醊 > chùo; #918A
+醋 > cù; #918B
+醌 > kūn; #918C
+醍 > tí; #918D
+醎 > mián; #918E
+醏 > dū; #918F
+醐 > hú; #9190
+醑 > xŭ; #9191
+醒 > xĭng; #9192
+醓 > tăn; #9193
+醔 > jīu; #9194
+醕 > chún; #9195
+醖 > yùn; #9196
+醗 > pò; #9197
+醘 > kè; #9198
+醙 > sōu; #9199
+醚 > mí; #919A
+醛 > quán; #919B
+醜 > chŏu; #919C
+醝 > cúo; #919D
+醞 > yùn; #919E
+醟 > yòng; #919F
+醠 > àng; #91A0
+醡 > zhà; #91A1
+醢 > hăi; #91A2
+醣 > táng; #91A3
+醤 > jiàng; #91A4
+醥 > piăo; #91A5
+醦 > shăn; #91A6
+醧 > yù; #91A7
+醨 > lí; #91A8
+醩 > záo; #91A9
+醪 > láo; #91AA
+醫 > yī; #91AB
+醬 > jiàng; #91AC
+醭 > pū; #91AD
+醮 > jiào; #91AE
+醯 > xī; #91AF
+醰 > tán; #91B0
+醱 > pò; #91B1
+醲 > nóng; #91B2
+醳 > yì; #91B3
+醴 > lĭ; #91B4
+醵 > jù; #91B5
+醶 > jiào; #91B6
+醷 > yì; #91B7
+醸 > niàng; #91B8
+醹 > rú; #91B9
+醺 > xūn; #91BA
+醻 > chóu; #91BB
+醼 > yàn; #91BC
+醽 > líng; #91BD
+醾 > mí; #91BE
+醿 > mí; #91BF
+釀 > niàng; #91C0
+釁 > xìn; #91C1
+釂 > jiào; #91C2
+釃 > xĭ; #91C3
+釄 > mí; #91C4
+釅 > yàn; #91C5
+釆 > biàn; #91C6
+采 > căi; #91C7
+釈 > shì; #91C8
+釉 > yòu; #91C9
+释 > shì; #91CA
+釋 > shì; #91CB
+里 > lĭ; #91CC
+重 > zhòng; #91CD
+野 > yĕ; #91CE
+量 > liàng; #91CF
+釐 > lí; #91D0
+金 > jīn; #91D1
+釒 > jīn' 'zì' 'páng; #91D2
+釓 > qíu; #91D3
+釔 > yĭ; #91D4
+釕 > diăo; #91D5
+釖 > dāo; #91D6
+釗 > zhāo; #91D7
+釘 > dīng; #91D8
+釙 > pò; #91D9
+釚 > qíu; #91DA
+釛 > hé; #91DB
+釜 > fŭ; #91DC
+針 > zhēn; #91DD
+釞 > zhí; #91DE
+釟 > bā; #91DF
+釠 > luàn; #91E0
+釡 > fŭ; #91E1
+釢 > nái; #91E2
+釣 > diào; #91E3
+釤 > shàn; #91E4
+釥 > qiăo; #91E5
+釦 > kòu; #91E6
+釧 > chuàn; #91E7
+釨 > zĭ; #91E8
+釩 > fán; #91E9
+釪 > yú; #91EA
+釫 > huá; #91EB
+釬 > hàn; #91EC
+釭 > gōng; #91ED
+釮 > qí; #91EE
+釯 > máng; #91EF
+釰 > rì; #91F0
+釱 > dì; #91F1
+釲 > sì; #91F2
+釳 > xì; #91F3
+釴 > yì; #91F4
+釵 > chāi; #91F5
+釶 > shī; #91F6
+釷 > tŭ; #91F7
+釸 > xì; #91F8
+釹 > nǚ; #91F9
+釺 > qiān; #91FA
+釼 > jiàn; #91FC
+釽 > pī; #91FD
+釾 > yé; #91FE
+釿 > yín; #91FF
+鈀 > bă; #9200
+鈁 > fāng; #9201
+鈂 > chén; #9202
+鈃 > xíng; #9203
+鈄 > tŏu; #9204
+鈅 > yuè; #9205
+鈆 > yán; #9206
+鈇 > fū; #9207
+鈈 > pī; #9208
+鈉 > nà; #9209
+鈊 > xīn; #920A
+鈋 > é; #920B
+鈌 > jué; #920C
+鈍 > dùn; #920D
+鈎 > gōu; #920E
+鈏 > yĭn; #920F
+鈐 > qián; #9210
+鈑 > băn; #9211
+鈒 > jí; #9212
+鈓 > rén; #9213
+鈔 > chāo; #9214
+鈕 > nĭu; #9215
+鈖 > fēn; #9216
+鈗 > yŭn; #9217
+鈘 > jĭ; #9218
+鈙 > qín; #9219
+鈚 > pí; #921A
+鈛 > gūo; #921B
+鈜 > hóng; #921C
+鈝 > yín; #921D
+鈞 > jūn; #921E
+鈟 > shī; #921F
+鈠 > yì; #9220
+鈡 > zhōng; #9221
+鈢 > niē; #9222
+鈣 > gài; #9223
+鈤 > rì; #9224
+鈥 > húo; #9225
+鈦 > tài; #9226
+鈧 > kàng; #9227
+鈬 > dúo; #922C
+鈭 > zī; #922D
+鈮 > nĭ; #922E
+鈯 > tú; #922F
+鈰 > shì; #9230
+鈱 > mín; #9231
+鈲 > gū; #9232
+鈳 > ē; #9233
+鈴 > líng; #9234
+鈵 > bìng; #9235
+鈶 > yí; #9236
+鈷 > gū; #9237
+鈸 > bá; #9238
+鈹 > pī; #9239
+鈺 > yù; #923A
+鈻 > sì; #923B
+鈼 > zúo; #923C
+鈽 > bù; #923D
+鈾 > yóu; #923E
+鈿 > diàn; #923F
+鉀 > jiă; #9240
+鉁 > zhēn; #9241
+鉂 > shĭ; #9242
+鉃 > shì; #9243
+鉄 > tiĕ; #9244
+鉅 > jù; #9245
+鉆 > zhān; #9246
+鉇 > shī; #9247
+鉈 > shé; #9248
+鉉 > xuàn; #9249
+鉊 > zhāo; #924A
+鉋 > bào; #924B
+鉌 > hé; #924C
+鉍 > bì; #924D
+鉎 > shēng; #924E
+鉏 > chú; #924F
+鉐 > shí; #9250
+鉑 > bó; #9251
+鉒 > zhù; #9252
+鉓 > chì; #9253
+鉔 > zā; #9254
+鉕 > pō; #9255
+鉖 > tóng; #9256
+鉗 > qián; #9257
+鉘 > fú; #9258
+鉙 > zhăi; #9259
+鉚 > lĭu; #925A
+鉛 > qiān; #925B
+鉜 > fú; #925C
+鉝 > lì; #925D
+鉞 > yuè; #925E
+鉟 > pī; #925F
+鉠 > yāng; #9260
+鉡 > bàn; #9261
+鉢 > bō; #9262
+鉣 > jié; #9263
+鉤 > gōu; #9264
+鉥 > shù; #9265
+鉦 > zhēng; #9266
+鉧 > mŭ; #9267
+鉨 > nĭ; #9268
+鉩 > niē; #9269
+鉪 > dì; #926A
+鉫 > jiā; #926B
+鉬 > mù; #926C
+鉭 > dàn; #926D
+鉮 > shēn; #926E
+鉯 > yĭ; #926F
+鉰 > sī; #9270
+鉱 > kuàng; #9271
+鉲 > kă; #9272
+鉳 > bĕi; #9273
+鉴 > jiàn; #9274
+鉵 > tóng; #9275
+鉶 > xíng; #9276
+鉷 > hóng; #9277
+鉸 > jiăo; #9278
+鉹 > chĭ; #9279
+鉺 > èr; #927A
+鉻 > gè; #927B
+鉼 > bĭng; #927C
+鉽 > shì; #927D
+鉾 > móu; #927E
+鉿 > jiá; #927F
+銀 > yín; #9280
+銁 > jūn; #9281
+銂 > zhōu; #9282
+銃 > chòng; #9283
+銄 > shàng; #9284
+銅 > tóng; #9285
+銆 > mò; #9286
+銇 > lèi; #9287
+銈 > jī; #9288
+銉 > yù; #9289
+銊 > xù; #928A
+銋 > rén; #928B
+銌 > zùn; #928C
+銍 > zhì; #928D
+銎 > qīong; #928E
+銏 > shàn; #928F
+銐 > chì; #9290
+銑 > xiăn; #9291
+銒 > xíng; #9292
+銓 > quán; #9293
+銔 > pī; #9294
+銕 > tiĕ; #9295
+銖 > zhū; #9296
+銗 > hóu; #9297
+銘 > míng; #9298
+銙 > kuă; #9299
+銚 > yáo; #929A
+銛 > xiān; #929B
+銜 > xián; #929C
+銝 > xīu; #929D
+銞 > jūn; #929E
+銟 > chā; #929F
+銠 > lăo; #92A0
+銡 > jí; #92A1
+銢 > pĭ; #92A2
+銣 > rŭ; #92A3
+銤 > mĭ; #92A4
+銥 > yĭ; #92A5
+銦 > yīn; #92A6
+銧 > guāng; #92A7
+銨 > ān; #92A8
+銩 > diōu; #92A9
+銪 > yŏu; #92AA
+銫 > sè; #92AB
+銬 > kào; #92AC
+銭 > qián; #92AD
+銮 > luán; #92AE
+銰 > āi; #92B0
+銱 > diào; #92B1
+銲 > hàn; #92B2
+銳 > rùi; #92B3
+銴 > shì; #92B4
+銵 > kēng; #92B5
+銶 > qíu; #92B6
+銷 > xiāo; #92B7
+銸 > zhé; #92B8
+銹 > xìu; #92B9
+銺 > zàng; #92BA
+銻 > tì; #92BB
+銼 > cùo; #92BC
+銽 > guā; #92BD
+銾 > gŏng; #92BE
+銿 > zhōng; #92BF
+鋀 > dòu; #92C0
+鋁 > lǚ; #92C1
+鋂 > méi; #92C2
+鋃 > láng; #92C3
+鋄 > wăn; #92C4
+鋅 > xīn; #92C5
+鋆 > yún; #92C6
+鋇 > bèi; #92C7
+鋈 > wù; #92C8
+鋉 > sù; #92C9
+鋊 > yù; #92CA
+鋋 > chán; #92CB
+鋌 > tĭng; #92CC
+鋍 > bó; #92CD
+鋎 > hàn; #92CE
+鋏 > jiá; #92CF
+鋐 > hóng; #92D0
+鋑 > cuān; #92D1
+鋒 > fēng; #92D2
+鋓 > chān; #92D3
+鋔 > wăn; #92D4
+鋕 > zhì; #92D5
+鋖 > sī; #92D6
+鋗 > xuān; #92D7
+鋘 > wú; #92D8
+鋙 > wú; #92D9
+鋚 > tiáo; #92DA
+鋛 > gŏng; #92DB
+鋜 > zhúo; #92DC
+鋝 > lǜe; #92DD
+鋞 > xíng; #92DE
+鋟 > qiān; #92DF
+鋠 > shèn; #92E0
+鋡 > hán; #92E1
+鋢 > lǜe; #92E2
+鋣 > xié; #92E3
+鋤 > chú; #92E4
+鋥 > zhèng; #92E5
+鋦 > jú; #92E6
+鋧 > xiàn; #92E7
+鋨 > tiĕ; #92E8
+鋩 > máng; #92E9
+鋪 > pū; #92EA
+鋫 > lí; #92EB
+鋬 > pàn; #92EC
+鋭 > rùi; #92ED
+鋮 > chéng; #92EE
+鋯 > gào; #92EF
+鋰 > lĭ; #92F0
+鋱 > tè; #92F1
+鋳 > zhù; #92F3
+鋵 > tū; #92F5
+鋶 > lĭu; #92F6
+鋷 > zùi; #92F7
+鋸 > jù; #92F8
+鋹 > chăng; #92F9
+鋺 > yuān; #92FA
+鋻 > jiàn; #92FB
+鋼 > gāng; #92FC
+鋽 > diào; #92FD
+鋾 > táo; #92FE
+鋿 > cháng; #92FF
+錀 > lún; #9300
+錁 > kuă; #9301
+錂 > líng; #9302
+錃 > bēi; #9303
+錄 > lù; #9304
+錅 > lí; #9305
+錆 > qiāng; #9306
+錇 > póu; #9307
+錈 > juàn; #9308
+錉 > mín; #9309
+錊 > zùi; #930A
+錋 > péng; #930B
+錌 > àn; #930C
+錍 > pí; #930D
+錎 > xiàn; #930E
+錏 > yà; #930F
+錐 > zhūi; #9310
+錑 > lèi; #9311
+錒 > ā; #9312
+錓 > kōng; #9313
+錔 > tà; #9314
+錕 > kūn; #9315
+錖 > dŭ; #9316
+錗 > wèi; #9317
+錘 > chúi; #9318
+錙 > zī; #9319
+錚 > zhēng; #931A
+錛 > bēn; #931B
+錜 > niē; #931C
+錝 > cóng; #931D
+錞 > qún; #931E
+錟 > tán; #931F
+錠 > dìng; #9320
+錡 > qí; #9321
+錢 > qián; #9322
+錣 > zhúo; #9323
+錤 > qí; #9324
+錥 > yù; #9325
+錦 > jĭn; #9326
+錧 > guăn; #9327
+錨 > máo; #9328
+錩 > chāng; #9329
+錪 > tiăn; #932A
+錫 > xí; #932B
+錬 > liàn; #932C
+錭 > táo; #932D
+錮 > gù; #932E
+錯 > cùo; #932F
+錰 > shù; #9330
+錱 > zhēn; #9331
+録 > lù; #9332
+錳 > mĕng; #9333
+錴 > lù; #9334
+錵 > huā; #9335
+錶 > biăo; #9336
+錷 > gá; #9337
+錸 > lái; #9338
+錹 > kĕn; #9339
+錼 > nài; #933C
+錽 > wăn; #933D
+錾 > zàn; #933E
+鍀 > dé; #9340
+鍁 > xiān; #9341
+鍃 > hūo; #9343
+鍄 > liàng; #9344
+鍆 > mén; #9346
+鍇 > kăi; #9347
+鍈 > yīng; #9348
+鍉 > dī; #9349
+鍊 > liàn; #934A
+鍋 > gūo; #934B
+鍌 > xiăn; #934C
+鍍 > dù; #934D
+鍎 > tú; #934E
+鍏 > wéi; #934F
+鍐 > cōng; #9350
+鍑 > fù; #9351
+鍒 > róu; #9352
+鍓 > jí; #9353
+鍔 > è; #9354
+鍕 > róu; #9355
+鍖 > chĕn; #9356
+鍗 > tí; #9357
+鍘 > zhá; #9358
+鍙 > hòng; #9359
+鍚 > yáng; #935A
+鍛 > duàn; #935B
+鍜 > xiā; #935C
+鍝 > yú; #935D
+鍞 > kēng; #935E
+鍟 > xīng; #935F
+鍠 > huáng; #9360
+鍡 > wĕi; #9361
+鍢 > fù; #9362
+鍣 > zhāo; #9363
+鍤 > chá; #9364
+鍥 > qiè; #9365
+鍦 > shé; #9366
+鍧 > hōng; #9367
+鍨 > kúi; #9368
+鍩 > tiăn; #9369
+鍪 > móu; #936A
+鍫 > qiāo; #936B
+鍬 > qiāo; #936C
+鍭 > hóu; #936D
+鍮 > tōu; #936E
+鍯 > cōng; #936F
+鍰 > huán; #9370
+鍱 > yè; #9371
+鍲 > mín; #9372
+鍳 > jiàn; #9373
+鍴 > duān; #9374
+鍵 > jiàn; #9375
+鍶 > sōng; #9376
+鍷 > kūi; #9377
+鍸 > hú; #9378
+鍹 > xuān; #9379
+鍺 > dŭo; #937A
+鍻 > jié; #937B
+鍼 > zhēn; #937C
+鍽 > biān; #937D
+鍾 > zhōng; #937E
+鍿 > zī; #937F
+鎀 > xīu; #9380
+鎁 > yé; #9381
+鎂 > mĕi; #9382
+鎃 > pài; #9383
+鎄 > āi; #9384
+鎅 > jiè; #9385
+鎇 > méi; #9387
+鎈 > chūo; #9388
+鎉 > tà; #9389
+鎊 > bàng; #938A
+鎋 > xiá; #938B
+鎌 > lián; #938C
+鎍 > sŭo; #938D
+鎎 > xì; #938E
+鎏 > líu; #938F
+鎐 > zú; #9390
+鎑 > yè; #9391
+鎒 > nòu; #9392
+鎓 > wēng; #9393
+鎔 > róng; #9394
+鎕 > táng; #9395
+鎖 > sŭo; #9396
+鎗 > qiāng; #9397
+鎘 > gé; #9398
+鎙 > shùo; #9399
+鎚 > chúi; #939A
+鎛 > bó; #939B
+鎜 > pán; #939C
+鎝 > sà; #939D
+鎞 > bì; #939E
+鎟 > săng; #939F
+鎠 > gāng; #93A0
+鎡 > zī; #93A1
+鎢 > wù; #93A2
+鎣 > yìng; #93A3
+鎤 > huăng; #93A4
+鎥 > tiáo; #93A5
+鎦 > líu; #93A6
+鎧 > kăi; #93A7
+鎨 > sŭn; #93A8
+鎩 > shā; #93A9
+鎪 > sōu; #93AA
+鎫 > wàn; #93AB
+鎬 > hào; #93AC
+鎭 > zhèn; #93AD
+鎮 > zhèn; #93AE
+鎯 > lŭo; #93AF
+鎰 > yì; #93B0
+鎱 > yuán; #93B1
+鎲 > tăng; #93B2
+鎳 > niè; #93B3
+鎴 > xí; #93B4
+鎵 > jiā; #93B5
+鎶 > gē; #93B6
+鎷 > mă; #93B7
+鎸 > juān; #93B8
+鎻 > sŭo; #93BB
+鎿 > ná; #93BF
+鏀 > lŭ; #93C0
+鏁 > sŭo; #93C1
+鏂 > ōu; #93C2
+鏃 > zú; #93C3
+鏄 > tuán; #93C4
+鏅 > xīu; #93C5
+鏆 > guàn; #93C6
+鏇 > xuàn; #93C7
+鏈 > liàn; #93C8
+鏉 > shòu; #93C9
+鏊 > áo; #93CA
+鏋 > măn; #93CB
+鏌 > mò; #93CC
+鏍 > lúo; #93CD
+鏎 > bì; #93CE
+鏏 > wèi; #93CF
+鏐 > líu; #93D0
+鏑 > dí; #93D1
+鏒 > qiāo; #93D2
+鏓 > cōng; #93D3
+鏔 > yí; #93D4
+鏕 > lù; #93D5
+鏖 > áo; #93D6
+鏗 > kēng; #93D7
+鏘 > qiāng; #93D8
+鏙 > cūi; #93D9
+鏚 > qì; #93DA
+鏛 > cháng; #93DB
+鏜 > tāng; #93DC
+鏝 > màn; #93DD
+鏞 > yōng; #93DE
+鏟 > chăn; #93DF
+鏠 > fēng; #93E0
+鏡 > jìng; #93E1
+鏢 > biāo; #93E2
+鏣 > shù; #93E3
+鏤 > lòu; #93E4
+鏥 > xìu; #93E5
+鏦 > cōng; #93E6
+鏧 > lóng; #93E7
+鏨 > zàn; #93E8
+鏩 > jiàn; #93E9
+鏪 > cáo; #93EA
+鏫 > lí; #93EB
+鏬 > xià; #93EC
+鏭 > xī; #93ED
+鏮 > kāng; #93EE
+鏰 > bèng; #93F0
+鏳 > zhēng; #93F3
+鏴 > lù; #93F4
+鏵 > huá; #93F5
+鏶 > jí; #93F6
+鏷 > pú; #93F7
+鏸 > hùi; #93F8
+鏹 > qiāng; #93F9
+鏺 > pō; #93FA
+鏻 > lín; #93FB
+鏼 > sŭo; #93FC
+鏽 > xìu; #93FD
+鏾 > săn; #93FE
+鏿 > chēng; #93FF
+鐀 > kùi; #9400
+鐁 > sī; #9401
+鐂 > lìu; #9402
+鐃 > náo; #9403
+鐄 > héng; #9404
+鐅 > piĕ; #9405
+鐆 > sùi; #9406
+鐇 > fán; #9407
+鐈 > qiáo; #9408
+鐉 > quān; #9409
+鐊 > yáng; #940A
+鐋 > tàng; #940B
+鐌 > xiàng; #940C
+鐍 > jué; #940D
+鐎 > jiāo; #940E
+鐏 > zūn; #940F
+鐐 > liáo; #9410
+鐑 > jié; #9411
+鐒 > láo; #9412
+鐓 > dùi; #9413
+鐔 > tán; #9414
+鐕 > zān; #9415
+鐖 > jī; #9416
+鐗 > jiăn; #9417
+鐘 > zhōng; #9418
+鐙 > dēng; #9419
+鐚 > yà; #941A
+鐛 > yìng; #941B
+鐜 > dùi; #941C
+鐝 > jué; #941D
+鐞 > nòu; #941E
+鐟 > tì; #941F
+鐠 > pŭ; #9420
+鐡 > tiĕ; #9421
+鐤 > dĭng; #9424
+鐥 > shàn; #9425
+鐦 > kāi; #9426
+鐧 > jiăn; #9427
+鐨 > fèi; #9428
+鐩 > sùi; #9429
+鐪 > lŭ; #942A
+鐫 > juān; #942B
+鐬 > hùi; #942C
+鐭 > yù; #942D
+鐮 > lián; #942E
+鐯 > zhúo; #942F
+鐰 > qiāo; #9430
+鐱 > qiān; #9431
+鐲 > zhúo; #9432
+鐳 > léi; #9433
+鐴 > bì; #9434
+鐵 > tiĕ; #9435
+鐶 > huán; #9436
+鐷 > yè; #9437
+鐸 > dúo; #9438
+鐹 > gŭo; #9439
+鐺 > dāng; #943A
+鐻 > jù; #943B
+鐼 > fén; #943C
+鐽 > dá; #943D
+鐾 > bèi; #943E
+鐿 > yì; #943F
+鑀 > ài; #9440
+鑁 > zōng; #9441
+鑂 > xùn; #9442
+鑃 > diào; #9443
+鑄 > zhù; #9444
+鑅 > héng; #9445
+鑆 > zhùi; #9446
+鑇 > jī; #9447
+鑈 > niē; #9448
+鑉 > tà; #9449
+鑊 > hùo; #944A
+鑋 > qìng; #944B
+鑌 > bīn; #944C
+鑍 > yīng; #944D
+鑎 > kùi; #944E
+鑏 > níng; #944F
+鑐 > xū; #9450
+鑑 > jiàn; #9451
+鑒 > jiàn; #9452
+鑔 > chă; #9454
+鑕 > zhì; #9455
+鑖 > miè; #9456
+鑗 > lí; #9457
+鑘 > léi; #9458
+鑙 > jī; #9459
+鑚 > zuàn; #945A
+鑛 > kuàng; #945B
+鑜 > shàng; #945C
+鑝 > péng; #945D
+鑞 > là; #945E
+鑟 > dú; #945F
+鑠 > shùo; #9460
+鑡 > chùo; #9461
+鑢 > lǜ; #9462
+鑣 > biāo; #9463
+鑤 > bào; #9464
+鑥 > lŭ; #9465
+鑨 > lóng; #9468
+鑩 > è; #9469
+鑪 > lú; #946A
+鑫 > xīn; #946B
+鑬 > jiàn; #946C
+鑭 > làn; #946D
+鑮 > bó; #946E
+鑯 > jiān; #946F
+鑰 > yào; #9470
+鑱 > chán; #9471
+鑲 > xiāng; #9472
+鑳 > jiàn; #9473
+鑴 > xī; #9474
+鑵 > guàn; #9475
+鑶 > cáng; #9476
+鑷 > niè; #9477
+鑸 > lĕi; #9478
+鑹 > cuàn; #9479
+鑺 > qú; #947A
+鑻 > pàn; #947B
+鑼 > lúo; #947C
+鑽 > zuàn; #947D
+鑾 > luán; #947E
+鑿 > záo; #947F
+钀 > niè; #9480
+钁 > jué; #9481
+钂 > tăng; #9482
+钃 > shŭ; #9483
+钄 > lán; #9484
+钅 > jīn; #9485
+钆 > qíu; #9486
+钇 > yĭ; #9487
+针 > zhēn; #9488
+钉 > dīng; #9489
+钊 > zhāo; #948A
+钋 > pò; #948B
+钌 > diăo; #948C
+钍 > tŭ; #948D
+钎 > qiān; #948E
+钏 > chuàn; #948F
+钐 > shàn; #9490
+钑 > jí; #9491
+钒 > fán; #9492
+钓 > diào; #9493
+钔 > mén; #9494
+钕 > nǚ; #9495
+钖 > xí; #9496
+钗 > chāi; #9497
+钘 > xíng; #9498
+钙 > gài; #9499
+钚 > bù; #949A
+钛 > tài; #949B
+钜 > jù; #949C
+钝 > dùn; #949D
+钞 > chāo; #949E
+钟 > zhōng; #949F
+钠 > nà; #94A0
+钡 > bèi; #94A1
+钢 > gāng; #94A2
+钣 > băn; #94A3
+钤 > qián; #94A4
+钥 > yào; #94A5
+钦 > qīn; #94A6
+钧 > jūn; #94A7
+钨 > wù; #94A8
+钩 > gōu; #94A9
+钪 > kàng; #94AA
+钫 > fāng; #94AB
+钬 > húo; #94AC
+钭 > tŏu; #94AD
+钮 > nĭu; #94AE
+钯 > bă; #94AF
+钰 > yù; #94B0
+钱 > qián; #94B1
+钲 > zhēng; #94B2
+钳 > qián; #94B3
+钴 > gū; #94B4
+钵 > bō; #94B5
+钶 > ē; #94B6
+钷 > pō; #94B7
+钸 > bù; #94B8
+钹 > bá; #94B9
+钺 > yuè; #94BA
+钻 > zuàn; #94BB
+钼 > mù; #94BC
+钽 > dàn; #94BD
+钾 > jiă; #94BE
+钿 > diàn; #94BF
+铀 > yóu; #94C0
+铁 > tiĕ; #94C1
+铂 > bó; #94C2
+铃 > líng; #94C3
+铄 > shùo; #94C4
+铅 > qiān; #94C5
+铆 > lĭu; #94C6
+铇 > bào; #94C7
+铈 > shì; #94C8
+铉 > xuàn; #94C9
+铊 > shé; #94CA
+铋 > bì; #94CB
+铌 > nĭ; #94CC
+铍 > pī; #94CD
+铎 > dúo; #94CE
+铏 > xíng; #94CF
+铐 > kào; #94D0
+铑 > lăo; #94D1
+铒 > èr; #94D2
+铓 > máng; #94D3
+铔 > yà; #94D4
+铕 > yŏu; #94D5
+铖 > chéng; #94D6
+铗 > jiá; #94D7
+铘 > yé; #94D8
+铙 > náo; #94D9
+铚 > zhì; #94DA
+铛 > dāng; #94DB
+铜 > tóng; #94DC
+铝 > lǚ; #94DD
+铞 > diào; #94DE
+铟 > yīn; #94DF
+铠 > kăi; #94E0
+铡 > zhá; #94E1
+铢 > zhū; #94E2
+铣 > xiăn; #94E3
+铤 > tĭng; #94E4
+铥 > dīu; #94E5
+铦 > xiān; #94E6
+铧 > huá; #94E7
+铨 > quán; #94E8
+铩 > shā; #94E9
+铪 > jiá; #94EA
+铫 > yáo; #94EB
+铬 > gè; #94EC
+铭 > míng; #94ED
+铮 > zhēng; #94EE
+铯 > sè; #94EF
+铰 > jiăo; #94F0
+铱 > yĭ; #94F1
+铲 > chăn; #94F2
+铳 > chòng; #94F3
+铴 > tàng; #94F4
+铵 > ān; #94F5
+银 > yín; #94F6
+铷 > rŭ; #94F7
+铸 > zhù; #94F8
+铹 > láo; #94F9
+铺 > pū; #94FA
+铻 > wú; #94FB
+铼 > lái; #94FC
+铽 > tè; #94FD
+链 > liàn; #94FE
+铿 > kēng; #94FF
+销 > xiāo; #9500
+锁 > sŭo; #9501
+锂 > lĭ; #9502
+锃 > zhèng; #9503
+锄 > chú; #9504
+锅 > gūo; #9505
+锆 > gào; #9506
+锇 > tiĕ; #9507
+锈 > xìu; #9508
+锉 > cùo; #9509
+锊 > lǜe; #950A
+锋 > fēng; #950B
+锌 > xīn; #950C
+锍 > lĭu; #950D
+锎 > kāi; #950E
+锏 > jiăn; #950F
+锐 > rùi; #9510
+锑 > tì; #9511
+锒 > láng; #9512
+锓 > qiān; #9513
+锔 > jú; #9514
+锕 > ā; #9515
+锖 > qiāng; #9516
+锗 > dŭo; #9517
+锘 > tiăn; #9518
+错 > cùo; #9519
+锚 > máo; #951A
+锛 > bēn; #951B
+锜 > qí; #951C
+锝 > dé; #951D
+锞 > kuă; #951E
+锟 > kūn; #951F
+锠 > chāng; #9520
+锡 > xí; #9521
+锢 > gù; #9522
+锣 > lúo; #9523
+锤 > chúi; #9524
+锥 > zhūi; #9525
+锦 > jĭn; #9526
+锧 > zhì; #9527
+锨 > xiān; #9528
+锩 > juàn; #9529
+锪 > hūo; #952A
+锫 > póu; #952B
+锬 > tán; #952C
+锭 > dìng; #952D
+键 > jiàn; #952E
+锯 > jù; #952F
+锰 > mĕng; #9530
+锱 > zī; #9531
+锲 > qiè; #9532
+锳 > yīng; #9533
+锴 > kăi; #9534
+锵 > qiāng; #9535
+锶 > sōng; #9536
+锷 > è; #9537
+锸 > chá; #9538
+锹 > qiāo; #9539
+锺 > zhōng; #953A
+锻 > duàn; #953B
+锼 > sōu; #953C
+锽 > huáng; #953D
+锾 > huán; #953E
+锿 > āi; #953F
+镀 > dù; #9540
+镁 > mĕi; #9541
+镂 > lòu; #9542
+镃 > zī; #9543
+镄 > fèi; #9544
+镅 > méi; #9545
+镆 > mò; #9546
+镇 > zhèn; #9547
+镈 > bó; #9548
+镉 > gé; #9549
+镊 > niè; #954A
+镋 > tăng; #954B
+镌 > juān; #954C
+镍 > niè; #954D
+镎 > ná; #954E
+镏 > líu; #954F
+镐 > hào; #9550
+镑 > bàng; #9551
+镒 > yì; #9552
+镓 > jiā; #9553
+镔 > bīn; #9554
+镕 > róng; #9555
+镖 > biāo; #9556
+镗 > tāng; #9557
+镘 > màn; #9558
+镙 > lúo; #9559
+镚 > bèng; #955A
+镛 > yōng; #955B
+镜 > jìng; #955C
+镝 > dí; #955D
+镞 > zú; #955E
+镟 > xuàn; #955F
+镠 > líu; #9560
+镡 > tán; #9561
+镢 > jué; #9562
+镣 > liáo; #9563
+镤 > pú; #9564
+镥 > lŭ; #9565
+镦 > dùi; #9566
+镧 > làn; #9567
+镨 > pŭ; #9568
+镩 > cuàn; #9569
+镪 > qiāng; #956A
+镫 > dēng; #956B
+镬 > hùo; #956C
+镭 > léi; #956D
+镮 > huán; #956E
+镯 > zhúo; #956F
+镰 > lián; #9570
+镱 > yì; #9571
+镲 > chă; #9572
+镳 > biāo; #9573
+镴 > là; #9574
+镵 > chán; #9575
+镶 > xiāng; #9576
+長 > cháng; #9577
+镸 > cháng; #9578
+镹 > jĭu; #9579
+镺 > ăo; #957A
+镻 > dié; #957B
+镼 > qū; #957C
+镽 > liăo; #957D
+镾 > mí; #957E
+长 > cháng; #957F
+門 > mén; #9580
+閁 > mà; #9581
+閂 > shuān; #9582
+閃 > shăn; #9583
+閄 > hùo; #9584
+閅 > mén; #9585
+閆 > yàn; #9586
+閇 > bì; #9587
+閈 > hàn; #9588
+閉 > bì; #9589
+開 > kāi; #958B
+閌 > kàng; #958C
+閍 > bēng; #958D
+閎 > hóng; #958E
+閏 > rùn; #958F
+閐 > sàn; #9590
+閑 > xián; #9591
+閒 > xián; #9592
+間 > jiān; #9593
+閔 > mĭn; #9594
+閕 > xiā; #9595
+閗 > dòu; #9597
+閘 > zhá; #9598
+閙 > nào; #9599
+閚 > jian; #959A
+閛 > pēng; #959B
+閜 > xiă; #959C
+閝 > líng; #959D
+閞 > biàn; #959E
+閟 > bì; #959F
+閠 > rùn; #95A0
+閡 > hé; #95A1
+関 > guān; #95A2
+閣 > gé; #95A3
+閤 > gé; #95A4
+閥 > fá; #95A5
+閦 > chù; #95A6
+閧 > hòng; #95A7
+閨 > gūi; #95A8
+閩 > mĭn; #95A9
+閫 > kŭn; #95AB
+閬 > lăng; #95AC
+閭 > lǘ; #95AD
+閮 > tíng; #95AE
+閯 > shà; #95AF
+閰 > jú; #95B0
+閱 > yuè; #95B1
+閲 > yuè; #95B2
+閳 > chăn; #95B3
+閴 > qù; #95B4
+閵 > lìn; #95B5
+閶 > chāng; #95B6
+閷 > shài; #95B7
+閸 > kŭn; #95B8
+閹 > yān; #95B9
+閺 > mín; #95BA
+閻 > yán; #95BB
+閼 > è; #95BC
+閽 > hūn; #95BD
+閾 > yù; #95BE
+閿 > wén; #95BF
+闀 > xiàng; #95C0
+闁 > bao; #95C1
+闂 > xiàng; #95C2
+闃 > qù; #95C3
+闄 > yăo; #95C4
+闅 > wén; #95C5
+闆 > băn; #95C6
+闇 > àn; #95C7
+闈 > wéi; #95C8
+闉 > yīn; #95C9
+闊 > kùo; #95CA
+闋 > què; #95CB
+闌 > lán; #95CC
+闍 > dū; #95CD
+闐 > tián; #95D0
+闑 > niè; #95D1
+闒 > tà; #95D2
+闓 > kăi; #95D3
+闔 > hé; #95D4
+闕 > què; #95D5
+闖 > chuăng; #95D6
+闗 > guān; #95D7
+闘 > dòu; #95D8
+闙 > qĭ; #95D9
+闚 > kūi; #95DA
+闛 > táng; #95DB
+關 > guān; #95DC
+闝 > piáo; #95DD
+闞 > kàn; #95DE
+闟 > xì; #95DF
+闠 > hùi; #95E0
+闡 > chăn; #95E1
+闢 > pì; #95E2
+闣 > dàng; #95E3
+闤 > huán; #95E4
+闥 > tà; #95E5
+闦 > wén; #95E6
+门 > mén; #95E8
+闩 > shuān; #95E9
+闪 > shăn; #95EA
+闫 > yàn; #95EB
+闬 > hàn; #95EC
+闭 > bì; #95ED
+问 > wèn; #95EE
+闯 > chuăng; #95EF
+闰 > rùn; #95F0
+闱 > wéi; #95F1
+闲 > xián; #95F2
+闳 > hóng; #95F3
+间 > jiān; #95F4
+闵 > mĭn; #95F5
+闶 > kàng; #95F6
+闷 > mèn; #95F7
+闸 > zhá; #95F8
+闹 > nào; #95F9
+闺 > gūi; #95FA
+闻 > wén; #95FB
+闼 > tà; #95FC
+闽 > mĭn; #95FD
+闾 > lǘ; #95FE
+闿 > kăi; #95FF
+阀 > fá; #9600
+阁 > gé; #9601
+阂 > hé; #9602
+阃 > kŭn; #9603
+阄 > jīu; #9604
+阅 > yuè; #9605
+阆 > lăng; #9606
+阇 > dū; #9607
+阈 > yù; #9608
+阉 > yān; #9609
+阊 > chāng; #960A
+阋 > xì; #960B
+阌 > wén; #960C
+阍 > hūn; #960D
+阎 > yán; #960E
+阏 > è; #960F
+阐 > chăn; #9610
+阑 > lán; #9611
+阒 > qù; #9612
+阓 > hùi; #9613
+阔 > kùo; #9614
+阕 > què; #9615
+阖 > gé; #9616
+阗 > tián; #9617
+阘 > tà; #9618
+阙 > què; #9619
+阚 > kàn; #961A
+阛 > huán; #961B
+阜 > fù; #961C
+阝 > fù; #961D
+阞 > lè; #961E
+队 > dùi; #961F
+阠 > xìn; #9620
+阡 > qiān; #9621
+阢 > wù; #9622
+阣 > yì; #9623
+阤 > túo; #9624
+阥 > yīn; #9625
+阦 > yáng; #9626
+阧 > dŏu; #9627
+阨 > è; #9628
+阩 > shēng; #9629
+阪 > băn; #962A
+阫 > péi; #962B
+阬 > kēng; #962C
+阭 > yŭn; #962D
+阮 > ruăn; #962E
+阯 > zhĭ; #962F
+阰 > pí; #9630
+阱 > jĭng; #9631
+防 > fáng; #9632
+阳 > yáng; #9633
+阴 > yīn; #9634
+阵 > zhèn; #9635
+阶 > jiē; #9636
+阷 > chēng; #9637
+阸 > è; #9638
+阹 > qū; #9639
+阺 > dĭ; #963A
+阻 > zŭ; #963B
+阼 > zùo; #963C
+阽 > diàn; #963D
+阾 > lĭng; #963E
+阿 > ā; #963F
+陀 > túo; #9640
+陁 > túo; #9641
+陂 > pō; #9642
+陃 > bĭng; #9643
+附 > fù; #9644
+际 > jì; #9645
+陆 > lù; #9646
+陇 > lŏng; #9647
+陈 > chén; #9648
+陉 > xíng; #9649
+陊 > dùo; #964A
+陋 > lòu; #964B
+陌 > mò; #964C
+降 > jiàng; #964D
+陎 > shū; #964E
+陏 > dùo; #964F
+限 > xiàn; #9650
+陑 > ér; #9651
+陒 > gŭi; #9652
+陓 > yū; #9653
+陔 > gāi; #9654
+陕 > shăn; #9655
+陖 > xùn; #9656
+陗 > qiào; #9657
+陘 > xíng; #9658
+陙 > chún; #9659
+陚 > fù; #965A
+陛 > bì; #965B
+陜 > xiá; #965C
+陝 > shăn; #965D
+陞 > shēng; #965E
+陟 > zhì; #965F
+陠 > pū; #9660
+陡 > dŏu; #9661
+院 > yuàn; #9662
+陣 > zhèn; #9663
+除 > chú; #9664
+陥 > xiàn; #9665
+陧 > niè; #9667
+陨 > yŭn; #9668
+险 > xiăn; #9669
+陪 > péi; #966A
+陫 > péi; #966B
+陬 > zōu; #966C
+陭 > yī; #966D
+陮 > dŭi; #966E
+陯 > lún; #966F
+陰 > yīn; #9670
+陱 > jū; #9671
+陲 > chúi; #9672
+陳 > chén; #9673
+陴 > pí; #9674
+陵 > líng; #9675
+陶 > táo; #9676
+陷 > xiàn; #9677
+陸 > lù; #9678
+陹 > sheng; #9679
+険 > xiăn; #967A
+陻 > yīn; #967B
+陼 > zhŭ; #967C
+陽 > yáng; #967D
+陾 > réng; #967E
+陿 > shăn; #967F
+隀 > chóng; #9680
+隁 > yàn; #9681
+隂 > yīn; #9682
+隃 > yú; #9683
+隄 > tí; #9684
+隅 > yú; #9685
+隆 > lóng; #9686
+隇 > wēi; #9687
+隈 > wēi; #9688
+隉 > niè; #9689
+隊 > dùi; #968A
+隋 > súi; #968B
+隌 > ăn; #968C
+隍 > huáng; #968D
+階 > jiē; #968E
+随 > súi; #968F
+隐 > yĭn; #9690
+隑 > gāi; #9691
+隒 > yăn; #9692
+隓 > hūi; #9693
+隔 > gé; #9694
+隕 > yŭn; #9695
+隖 > wù; #9696
+隗 > wĕi; #9697
+隘 > ài; #9698
+隙 > xì; #9699
+隚 > táng; #969A
+際 > jì; #969B
+障 > zhàng; #969C
+隝 > dăo; #969D
+隞 > áo; #969E
+隟 > xì; #969F
+隠 > yĭn; #96A0
+隢 > rào; #96A2
+隣 > lín; #96A3
+隤 > túi; #96A4
+隥 > dèng; #96A5
+隦 > pĭ; #96A6
+隧 > sùi; #96A7
+隨 > súi; #96A8
+隩 > yù; #96A9
+險 > xiăn; #96AA
+隫 > fēn; #96AB
+隬 > nĭ; #96AC
+隭 > ér; #96AD
+隮 > jī; #96AE
+隯 > dăo; #96AF
+隰 > xí; #96B0
+隱 > yĭn; #96B1
+隲 > é; #96B2
+隳 > hūi; #96B3
+隴 > lŏng; #96B4
+隵 > xī; #96B5
+隶 > lì; #96B6
+隷 > lì; #96B7
+隸 > lì; #96B8
+隹 > zhūi; #96B9
+隺 > hè; #96BA
+隻 > zhī; #96BB
+隼 > zhŭn; #96BC
+隽 > jùn; #96BD
+难 > nán; #96BE
+隿 > yì; #96BF
+雀 > què; #96C0
+雁 > yàn; #96C1
+雂 > qián; #96C2
+雃 > yă; #96C3
+雄 > xíong; #96C4
+雅 > yă; #96C5
+集 > jí; #96C6
+雇 > gù; #96C7
+雈 > huán; #96C8
+雉 > zhì; #96C9
+雊 > gòu; #96CA
+雋 > jùn; #96CB
+雌 > cí; #96CC
+雍 > yōng; #96CD
+雎 > jū; #96CE
+雏 > chú; #96CF
+雐 > hū; #96D0
+雑 > zá; #96D1
+雒 > lùo; #96D2
+雓 > yú; #96D3
+雔 > chóu; #96D4
+雕 > diāo; #96D5
+雖 > sūi; #96D6
+雗 > hàn; #96D7
+雘 > hùo; #96D8
+雙 > shuāng; #96D9
+雚 > guàn; #96DA
+雛 > chú; #96DB
+雜 > zá; #96DC
+雝 > yōng; #96DD
+雞 > jī; #96DE
+雟 > xī; #96DF
+雠 > chóu; #96E0
+雡 > lìu; #96E1
+離 > lí; #96E2
+難 > nán; #96E3
+雤 > xué; #96E4
+雥 > zá; #96E5
+雦 > jí; #96E6
+雧 > jí; #96E7
+雨 > yŭ; #96E8
+雩 > yú; #96E9
+雪 > xuĕ; #96EA
+雫 > nă; #96EB
+雬 > fŏu; #96EC
+雭 > sè; #96ED
+雮 > mù; #96EE
+雯 > wén; #96EF
+雰 > fēn; #96F0
+雱 > páng; #96F1
+雲 > yún; #96F2
+雳 > lì; #96F3
+雴 > lì; #96F4
+雵 > ăng; #96F5
+零 > líng; #96F6
+雷 > léi; #96F7
+雸 > án; #96F8
+雹 > báo; #96F9
+雺 > méng; #96FA
+電 > diàn; #96FB
+雼 > dàng; #96FC
+雽 > xíng; #96FD
+雾 > wù; #96FE
+雿 > zhào; #96FF
+需 > xū; #9700
+霁 > jì; #9701
+霂 > mù; #9702
+霃 > chén; #9703
+霄 > xiāo; #9704
+霅 > zhá; #9705
+霆 > tíng; #9706
+震 > zhèn; #9707
+霈 > pèi; #9708
+霉 > méi; #9709
+霊 > líng; #970A
+霋 > qī; #970B
+霌 > chōu; #970C
+霍 > hùo; #970D
+霎 > shà; #970E
+霏 > fēi; #970F
+霐 > wēng; #9710
+霑 > zhān; #9711
+霒 > yīn; #9712
+霓 > ní; #9713
+霔 > chòu; #9714
+霕 > tún; #9715
+霖 > lín; #9716
+霘 > dòng; #9718
+霙 > yīng; #9719
+霚 > wù; #971A
+霛 > líng; #971B
+霜 > shuāng; #971C
+霝 > líng; #971D
+霞 > xiá; #971E
+霟 > hóng; #971F
+霠 > yīn; #9720
+霡 > mò; #9721
+霢 > mài; #9722
+霣 > yŭn; #9723
+霤 > lìu; #9724
+霥 > mèng; #9725
+霦 > bīn; #9726
+霧 > wù; #9727
+霨 > wèi; #9728
+霩 > hùo; #9729
+霪 > yín; #972A
+霫 > xí; #972B
+霬 > yì; #972C
+霭 > ăi; #972D
+霮 > dàn; #972E
+霯 > dèng; #972F
+霰 > xiàn; #9730
+霱 > yù; #9731
+露 > lù; #9732
+霳 > lóng; #9733
+霴 > dài; #9734
+霵 > jí; #9735
+霶 > páng; #9736
+霷 > yáng; #9737
+霸 > bà; #9738
+霹 > pī; #9739
+霺 > wéi; #973A
+霼 > xĭ; #973C
+霽 > jì; #973D
+霾 > mái; #973E
+霿 > mèng; #973F
+靀 > méng; #9740
+靁 > léi; #9741
+靂 > lì; #9742
+靃 > hùo; #9743
+靄 > ăi; #9744
+靅 > fèi; #9745
+靆 > dài; #9746
+靇 > lóng; #9747
+靈 > líng; #9748
+靉 > ài; #9749
+靊 > fēng; #974A
+靋 > lì; #974B
+靌 > băo; #974C
+靎 > hè; #974E
+靏 > hè; #974F
+靐 > bìng; #9750
+靑 > qīng; #9751
+青 > qīng; #9752
+靓 > jìng; #9753
+靔 > tiān; #9754
+靕 > zhēn; #9755
+靖 > jìng; #9756
+靗 > chèng; #9757
+靘 > qìng; #9758
+静 > jìng; #9759
+靚 > jìng; #975A
+靛 > diàn; #975B
+靜 > jìng; #975C
+靝 > tiān; #975D
+非 > fēi; #975E
+靟 > fēi; #975F
+靠 > kào; #9760
+靡 > mĭ; #9761
+面 > miàn; #9762
+靣 > miàn; #9763
+靤 > pào; #9764
+靥 > yè; #9765
+靦 > tiăn; #9766
+靧 > hùi; #9767
+靨 > yè; #9768
+革 > gé; #9769
+靪 > dīng; #976A
+靫 > chā; #976B
+靬 > jiān; #976C
+靭 > rèn; #976D
+靮 > dí; #976E
+靯 > dù; #976F
+靰 > wù; #9770
+靱 > rèn; #9771
+靲 > qín; #9772
+靳 > jìn; #9773
+靴 > xuē; #9774
+靵 > nĭu; #9775
+靶 > bă; #9776
+靷 > yĭn; #9777
+靸 > să; #9778
+靹 > nà; #9779
+靺 > mò; #977A
+靻 > zŭ; #977B
+靼 > dá; #977C
+靽 > bàn; #977D
+靾 > yì; #977E
+靿 > yào; #977F
+鞀 > táo; #9780
+鞁 > túo; #9781
+鞂 > jiá; #9782
+鞃 > hóng; #9783
+鞄 > páo; #9784
+鞅 > yăng; #9785
+鞇 > yīn; #9787
+鞈 > jiá; #9788
+鞉 > táo; #9789
+鞊 > jí; #978A
+鞋 > xié; #978B
+鞌 > ān; #978C
+鞍 > ān; #978D
+鞎 > hén; #978E
+鞏 > gŏng; #978F
+鞑 > dá; #9791
+鞒 > qiāo; #9792
+鞓 > tīng; #9793
+鞔 > wăn; #9794
+鞕 > yìng; #9795
+鞖 > sūi; #9796
+鞗 > tiáo; #9797
+鞘 > qiào; #9798
+鞙 > xuàn; #9799
+鞚 > kòng; #979A
+鞛 > bĕng; #979B
+鞜 > tà; #979C
+鞝 > zhăng; #979D
+鞞 > bĭng; #979E
+鞟 > kùo; #979F
+鞠 > jú; #97A0
+鞡 > la; #97A1
+鞢 > xiè; #97A2
+鞣 > róu; #97A3
+鞤 > bāng; #97A4
+鞥 > yì; #97A5
+鞦 > qīu; #97A6
+鞧 > qīu; #97A7
+鞨 > hé; #97A8
+鞩 > xiào; #97A9
+鞪 > mù; #97AA
+鞫 > jú; #97AB
+鞬 > jiān; #97AC
+鞭 > biān; #97AD
+鞮 > dī; #97AE
+鞯 > jiān; #97AF
+鞱 > tāo; #97B1
+鞲 > gōu; #97B2
+鞳 > tà; #97B3
+鞴 > bèi; #97B4
+鞵 > xié; #97B5
+鞶 > pán; #97B6
+鞷 > gé; #97B7
+鞸 > bì; #97B8
+鞹 > kùo; #97B9
+鞺 > tang; #97BA
+鞻 > lóu; #97BB
+鞼 > gùi; #97BC
+鞽 > qiáo; #97BD
+鞾 > xuē; #97BE
+鞿 > jī; #97BF
+韀 > jiān; #97C0
+韁 > jiāng; #97C1
+韂 > chàn; #97C2
+韃 > dá; #97C3
+韄 > hùo; #97C4
+韅 > xiăn; #97C5
+韆 > qiān; #97C6
+韇 > dú; #97C7
+韈 > wà; #97C8
+韉 > jiān; #97C9
+韊 > lán; #97CA
+韋 > wéi; #97CB
+韌 > rèn; #97CC
+韍 > fú; #97CD
+韎 > mèi; #97CE
+韏 > juàn; #97CF
+韐 > gé; #97D0
+韑 > wĕi; #97D1
+韒 > qiào; #97D2
+韓 > hán; #97D3
+韔 > chàng; #97D4
+韖 > róu; #97D6
+韗 > xùn; #97D7
+韘 > shè; #97D8
+韙 > wĕi; #97D9
+韚 > gé; #97DA
+韛 > bèi; #97DB
+韜 > tāo; #97DC
+韝 > gōu; #97DD
+韞 > yùn; #97DE
+韠 > bì; #97E0
+韡 > wĕi; #97E1
+韢 > hùi; #97E2
+韣 > dú; #97E3
+韤 > wà; #97E4
+韥 > dú; #97E5
+韦 > wéi; #97E6
+韧 > rèn; #97E7
+韨 > fú; #97E8
+韩 > hán; #97E9
+韪 > wĕi; #97EA
+韫 > yùn; #97EB
+韬 > tāo; #97EC
+韭 > jĭu; #97ED
+韮 > jĭu; #97EE
+韯 > xiān; #97EF
+韰 > xiè; #97F0
+韱 > xiān; #97F1
+韲 > jī; #97F2
+音 > yīn; #97F3
+韴 > zá; #97F4
+韵 > yùn; #97F5
+韶 > sháo; #97F6
+韷 > lè; #97F7
+韸 > péng; #97F8
+韹 > héng; #97F9
+韺 > yīng; #97FA
+韻 > yùn; #97FB
+韼 > péng; #97FC
+韽 > yīn; #97FD
+韾 > yīn; #97FE
+響 > xiăng; #97FF
+頀 > hù; #9800
+頁 > yè; #9801
+頂 > dĭng; #9802
+頃 > qĭng; #9803
+頄 > pàn; #9804
+項 > xiàng; #9805
+順 > shùn; #9806
+頇 > hān; #9807
+須 > xū; #9808
+頉 > yí; #9809
+頊 > xù; #980A
+頋 > gù; #980B
+頌 > sòng; #980C
+頍 > kŭi; #980D
+頎 > qí; #980E
+頏 > háng; #980F
+預 > yù; #9810
+頑 > wán; #9811
+頒 > bān; #9812
+頓 > dùn; #9813
+頔 > dí; #9814
+頕 > dān; #9815
+頖 > pàn; #9816
+頗 > pŏ; #9817
+領 > lĭng; #9818
+頙 > cè; #9819
+頚 > jĭng; #981A
+頛 > lĕi; #981B
+頜 > hé; #981C
+頝 > qiāo; #981D
+頞 > è; #981E
+頟 > é; #981F
+頠 > wĕi; #9820
+頡 > jié; #9821
+頢 > guā; #9822
+頣 > shĕn; #9823
+頤 > yí; #9824
+頥 > shĕn; #9825
+頦 > hái; #9826
+頧 > dūi; #9827
+頨 > piān; #9828
+頩 > pīng; #9829
+頪 > lèi; #982A
+頫 > fŭ; #982B
+頬 > jiá; #982C
+頭 > tóu; #982D
+頮 > hùi; #982E
+頯 > kúi; #982F
+頰 > jiá; #9830
+頱 > lè; #9831
+頲 > tian; #9832
+頳 > chēng; #9833
+頴 > yĭng; #9834
+頵 > jūn; #9835
+頶 > hú; #9836
+頷 > hàn; #9837
+頸 > jĭng; #9838
+頹 > túi; #9839
+頺 > túi; #983A
+頻 > pín; #983B
+頼 > lài; #983C
+頽 > túi; #983D
+頾 > zī; #983E
+頿 > zī; #983F
+顀 > chúi; #9840
+顁 > dìng; #9841
+顂 > lài; #9842
+顃 > yán; #9843
+顄 > hàn; #9844
+顅 > jiān; #9845
+顆 > kē; #9846
+顇 > cùi; #9847
+顈 > jĭong; #9848
+顉 > qīn; #9849
+顊 > yí; #984A
+顋 > sāi; #984B
+題 > tí; #984C
+額 > é; #984D
+顎 > è; #984E
+顏 > yán; #984F
+顐 > hún; #9850
+顑 > kăn; #9851
+顒 > yóng; #9852
+顓 > zhuān; #9853
+顔 > yán; #9854
+顕 > xiăn; #9855
+顖 > xìn; #9856
+顗 > yĭ; #9857
+願 > yuàn; #9858
+顙 > săng; #9859
+顚 > diān; #985A
+顛 > diān; #985B
+顜 > jiăng; #985C
+顝 > kū; #985D
+類 > lèi; #985E
+顟 > liáo; #985F
+顠 > piào; #9860
+顡 > yì; #9861
+顢 > mán; #9862
+顣 > qī; #9863
+顤 > rào; #9864
+顥 > hào; #9865
+顦 > qiáo; #9866
+顧 > gù; #9867
+顨 > xùn; #9868
+顩 > qiān; #9869
+顪 > hūi; #986A
+顫 > zhàn; #986B
+顬 > rú; #986C
+顭 > hōng; #986D
+顮 > bīn; #986E
+顯 > xiăn; #986F
+顰 > pín; #9870
+顱 > lú; #9871
+顲 > lăn; #9872
+顳 > niè; #9873
+顴 > quán; #9874
+页 > yè; #9875
+顶 > dĭng; #9876
+顷 > qĭng; #9877
+顸 > hān; #9878
+项 > xiàng; #9879
+顺 > shùn; #987A
+须 > xū; #987B
+顼 > xù; #987C
+顽 > wán; #987D
+顾 > gù; #987E
+顿 > dùn; #987F
+颀 > qí; #9880
+颁 > bān; #9881
+颂 > sòng; #9882
+颃 > háng; #9883
+预 > yù; #9884
+颅 > lú; #9885
+领 > lĭng; #9886
+颇 > pŏ; #9887
+颈 > jĭng; #9888
+颉 > jié; #9889
+颊 > jiá; #988A
+颋 > tian; #988B
+颌 > hàn; #988C
+颍 > yĭng; #988D
+颎 > jĭong; #988E
+颏 > hái; #988F
+颐 > yí; #9890
+频 > pín; #9891
+颒 > hùi; #9892
+颓 > túi; #9893
+颔 > hàn; #9894
+颕 > yĭng; #9895
+颖 > yĭng; #9896
+颗 > kē; #9897
+题 > tí; #9898
+颙 > yóng; #9899
+颚 > è; #989A
+颛 > zhuān; #989B
+颜 > yán; #989C
+额 > é; #989D
+颞 > niè; #989E
+颟 > mán; #989F
+颠 > diān; #98A0
+颡 > săng; #98A1
+颢 > hào; #98A2
+颣 > lèi; #98A3
+颤 > zhàn; #98A4
+颥 > rú; #98A5
+颦 > pín; #98A6
+颧 > quán; #98A7
+風 > fēng; #98A8
+颩 > biāo; #98A9
+颫 > fú; #98AB
+颬 > xiā; #98AC
+颭 > zhăn; #98AD
+颮 > biāo; #98AE
+颯 > sà; #98AF
+颰 > bá; #98B0
+颱 > tái; #98B1
+颲 > liè; #98B2
+颳 > guā; #98B3
+颴 > xuàn; #98B4
+颵 > shào; #98B5
+颶 > jù; #98B6
+颷 > bī; #98B7
+颸 > sī; #98B8
+颹 > wĕi; #98B9
+颺 > yáng; #98BA
+颻 > yáo; #98BB
+颼 > sōu; #98BC
+颽 > kăi; #98BD
+颾 > sāo; #98BE
+颿 > fán; #98BF
+飀 > líu; #98C0
+飁 > xí; #98C1
+飂 > liáo; #98C2
+飃 > piāo; #98C3
+飄 > piāo; #98C4
+飅 > líu; #98C5
+飆 > biāo; #98C6
+飇 > biāo; #98C7
+飈 > biăo; #98C8
+飉 > liáo; #98C9
+飋 > sè; #98CB
+飌 > fēng; #98CC
+飍 > biāo; #98CD
+风 > fēng; #98CE
+飏 > yáng; #98CF
+飐 > zhăn; #98D0
+飑 > biāo; #98D1
+飒 > sà; #98D2
+飓 > jù; #98D3
+飔 > sī; #98D4
+飕 > sōu; #98D5
+飖 > yáo; #98D6
+飗 > líu; #98D7
+飘 > piāo; #98D8
+飙 > biāo; #98D9
+飚 > biāo; #98DA
+飛 > fēi; #98DB
+飜 > fān; #98DC
+飝 > fēi; #98DD
+飞 > fēi; #98DE
+食 > shí; #98DF
+飠 > shí; #98E0
+飡 > cān; #98E1
+飢 > jī; #98E2
+飣 > dìng; #98E3
+飤 > sì; #98E4
+飥 > tūo; #98E5
+飦 > zhān; #98E6
+飧 > sūn; #98E7
+飨 > xiăng; #98E8
+飩 > tún; #98E9
+飪 > rèn; #98EA
+飫 > yù; #98EB
+飬 > juàn; #98EC
+飭 > chì; #98ED
+飮 > yĭn; #98EE
+飯 > fàn; #98EF
+飰 > fàn; #98F0
+飱 > sūn; #98F1
+飲 > yĭn; #98F2
+飳 > zhù; #98F3
+飴 > yí; #98F4
+飵 > zhăi; #98F5
+飶 > bì; #98F6
+飷 > jiĕ; #98F7
+飸 > tāo; #98F8
+飹 > lĭu; #98F9
+飺 > cí; #98FA
+飻 > tiè; #98FB
+飼 > sì; #98FC
+飽 > băo; #98FD
+飾 > shì; #98FE
+飿 > dùo; #98FF
+餀 > hài; #9900
+餁 > rèn; #9901
+餂 > tiăn; #9902
+餃 > jiăo; #9903
+餄 > jiá; #9904
+餅 > bĭng; #9905
+餆 > yáo; #9906
+餇 > tóng; #9907
+餈 > cí; #9908
+餉 > xiăng; #9909
+養 > yăng; #990A
+餋 > yăng; #990B
+餌 > ĕr; #990C
+餍 > yàn; #990D
+餎 > le; #990E
+餏 > yī; #990F
+餐 > cān; #9910
+餑 > bó; #9911
+餒 > nĕi; #9912
+餓 > è; #9913
+餔 > bū; #9914
+餕 > jùn; #9915
+餖 > dòu; #9916
+餗 > sù; #9917
+餘 > yú; #9918
+餙 > shì; #9919
+餚 > yáo; #991A
+餛 > hún; #991B
+餜 > gŭo; #991C
+餝 > shì; #991D
+餞 > jiàn; #991E
+餟 > zhùi; #991F
+餠 > bĭng; #9920
+餡 > xiàn; #9921
+餢 > bù; #9922
+餣 > yè; #9923
+餤 > tán; #9924
+餥 > fĕi; #9925
+餦 > zhāng; #9926
+餧 > wèi; #9927
+館 > guăn; #9928
+餩 > è; #9929
+餪 > nuăn; #992A
+餫 > hún; #992B
+餬 > hú; #992C
+餭 > huáng; #992D
+餮 > tiè; #992E
+餯 > hùi; #992F
+餰 > jiān; #9930
+餱 > hóu; #9931
+餲 > hé; #9932
+餳 > xíng; #9933
+餴 > fēn; #9934
+餵 > wèi; #9935
+餶 > gŭ; #9936
+餷 > chā; #9937
+餸 > sòng; #9938
+餹 > táng; #9939
+餺 > bó; #993A
+餻 > gāo; #993B
+餼 > xì; #993C
+餽 > kùi; #993D
+餾 > lìu; #993E
+餿 > sōu; #993F
+饀 > táo; #9940
+饁 > yè; #9941
+饂 > yún; #9942
+饃 > mó; #9943
+饄 > táng; #9944
+饅 > mán; #9945
+饆 > bì; #9946
+饇 > yù; #9947
+饈 > xīu; #9948
+饉 > jĭn; #9949
+饊 > săn; #994A
+饋 > kùi; #994B
+饌 > zhuàn; #994C
+饍 > shàn; #994D
+饎 > chì; #994E
+饏 > dàn; #994F
+饐 > yì; #9950
+饑 > jī; #9951
+饒 > ráo; #9952
+饓 > chēng; #9953
+饔 > yōng; #9954
+饕 > tāo; #9955
+饖 > hùi; #9956
+饗 > xiăng; #9957
+饘 > zhān; #9958
+饙 > fēn; #9959
+饚 > hài; #995A
+饛 > méng; #995B
+饜 > yàn; #995C
+饝 > mó; #995D
+饞 > chán; #995E
+饟 > xiăng; #995F
+饠 > lúo; #9960
+饡 > zuàn; #9961
+饢 > năng; #9962
+饣 > shí; #9963
+饤 > dìng; #9964
+饥 > jī; #9965
+饦 > tūo; #9966
+饧 > xíng; #9967
+饨 > tún; #9968
+饩 > xì; #9969
+饪 > rèn; #996A
+饫 > yù; #996B
+饬 > chì; #996C
+饭 > fàn; #996D
+饮 > yĭn; #996E
+饯 > jiàn; #996F
+饰 > shì; #9970
+饱 > băo; #9971
+饲 > sì; #9972
+饳 > dùo; #9973
+饴 > yí; #9974
+饵 > ĕr; #9975
+饶 > ráo; #9976
+饷 > xiăng; #9977
+饸 > jiá; #9978
+饹 > le; #9979
+饺 > jiăo; #997A
+饻 > yī; #997B
+饼 > bĭng; #997C
+饽 > bó; #997D
+饾 > dòu; #997E
+饿 > è; #997F
+馀 > yú; #9980
+馁 > nĕi; #9981
+馂 > jùn; #9982
+馃 > gŭo; #9983
+馄 > hún; #9984
+馅 > xiàn; #9985
+馆 > guăn; #9986
+馇 > chā; #9987
+馈 > kùi; #9988
+馉 > gŭ; #9989
+馊 > sōu; #998A
+馋 > chán; #998B
+馌 > yè; #998C
+馍 > mó; #998D
+馎 > bó; #998E
+馏 > lìu; #998F
+馐 > xīu; #9990
+馑 > jĭn; #9991
+馒 > mán; #9992
+馓 > săn; #9993
+馔 > zhuàn; #9994
+馕 > năng; #9995
+首 > shŏu; #9996
+馗 > kúi; #9997
+馘 > gúo; #9998
+香 > xiāng; #9999
+馚 > fén; #999A
+馛 > bá; #999B
+馜 > nĭ; #999C
+馝 > bì; #999D
+馞 > bó; #999E
+馟 > tú; #999F
+馠 > hān; #99A0
+馡 > fēi; #99A1
+馢 > jiān; #99A2
+馣 > ān; #99A3
+馤 > ăi; #99A4
+馥 > fù; #99A5
+馦 > xiān; #99A6
+馧 > wēn; #99A7
+馨 > xīn; #99A8
+馩 > fén; #99A9
+馪 > bīn; #99AA
+馫 > xīng; #99AB
+馬 > mă; #99AC
+馭 > yù; #99AD
+馮 > féng; #99AE
+馯 > hàn; #99AF
+馰 > dì; #99B0
+馱 > túo; #99B1
+馲 > tūo; #99B2
+馳 > chí; #99B3
+馴 > xún; #99B4
+馵 > zhù; #99B5
+馶 > zhī; #99B6
+馷 > pèi; #99B7
+馸 > xìn; #99B8
+馹 > rì; #99B9
+馺 > sà; #99BA
+馻 > yĭn; #99BB
+馼 > wén; #99BC
+馽 > zhí; #99BD
+馾 > dàn; #99BE
+馿 > lǘ; #99BF
+駀 > yóu; #99C0
+駁 > bó; #99C1
+駂 > băo; #99C2
+駃 > kuài; #99C3
+駄 > túo; #99C4
+駅 > yì; #99C5
+駆 > qū; #99C6
+駈 > qū; #99C8
+駉 > jīong; #99C9
+駊 > bŏ; #99CA
+駋 > zhāo; #99CB
+駌 > yuān; #99CC
+駍 > pēng; #99CD
+駎 > zhòu; #99CE
+駏 > jù; #99CF
+駐 > zhù; #99D0
+駑 > nú; #99D1
+駒 > jū; #99D2
+駓 > pí; #99D3
+駔 > zăng; #99D4
+駕 > jià; #99D5
+駖 > líng; #99D6
+駗 > zhēn; #99D7
+駘 > tái; #99D8
+駙 > fù; #99D9
+駚 > yăng; #99DA
+駛 > shĭ; #99DB
+駜 > bì; #99DC
+駝 > túo; #99DD
+駞 > túo; #99DE
+駟 > sì; #99DF
+駠 > líu; #99E0
+駡 > mà; #99E1
+駢 > pián; #99E2
+駣 > táo; #99E3
+駤 > zhì; #99E4
+駥 > róng; #99E5
+駦 > téng; #99E6
+駧 > dòng; #99E7
+駨 > xún; #99E8
+駩 > quán; #99E9
+駪 > shēn; #99EA
+駫 > jīong; #99EB
+駬 > ĕr; #99EC
+駭 > hài; #99ED
+駮 > bó; #99EE
+駯 > zhu; #99EF
+駰 > yīn; #99F0
+駱 > lùo; #99F1
+駳 > dàn; #99F3
+駴 > xiè; #99F4
+駵 > líu; #99F5
+駶 > jú; #99F6
+駷 > sŏng; #99F7
+駸 > qīn; #99F8
+駹 > máng; #99F9
+駺 > liáng; #99FA
+駻 > hàn; #99FB
+駼 > tú; #99FC
+駽 > xuàn; #99FD
+駾 > tùi; #99FE
+駿 > jùn; #99FF
+騀 > é; #9A00
+騁 > chĕng; #9A01
+騂 > xīn; #9A02
+騃 > ái; #9A03
+騄 > lù; #9A04
+騅 > zhūi; #9A05
+騆 > zhōu; #9A06
+騇 > shĕ; #9A07
+騈 > pián; #9A08
+騉 > kūn; #9A09
+騊 > táo; #9A0A
+騋 > lái; #9A0B
+騌 > zōng; #9A0C
+騍 > kè; #9A0D
+騎 > qí; #9A0E
+騏 > qí; #9A0F
+騐 > yàn; #9A10
+騑 > fēi; #9A11
+騒 > sāo; #9A12
+験 > yăn; #9A13
+騔 > jié; #9A14
+騕 > yăo; #9A15
+騖 > wù; #9A16
+騗 > piàn; #9A17
+騘 > cōng; #9A18
+騙 > piàn; #9A19
+騚 > qián; #9A1A
+騛 > fēi; #9A1B
+騜 > huáng; #9A1C
+騝 > jiān; #9A1D
+騞 > hùo; #9A1E
+騟 > yù; #9A1F
+騠 > tí; #9A20
+騡 > quán; #9A21
+騢 > xiá; #9A22
+騣 > zōng; #9A23
+騤 > kúi; #9A24
+騥 > róu; #9A25
+騦 > sī; #9A26
+騧 > guā; #9A27
+騨 > túo; #9A28
+騩 > kùi; #9A29
+騪 > sōu; #9A2A
+騫 > qiān; #9A2B
+騬 > chéng; #9A2C
+騭 > zhì; #9A2D
+騮 > líu; #9A2E
+騯 > páng; #9A2F
+騰 > téng; #9A30
+騱 > xī; #9A31
+騲 > căo; #9A32
+騳 > dú; #9A33
+騴 > yàn; #9A34
+騵 > yuán; #9A35
+騶 > zōu; #9A36
+騷 > sāo; #9A37
+騸 > shàn; #9A38
+騹 > lí; #9A39
+騺 > zhì; #9A3A
+騻 > shuăng; #9A3B
+騼 > lù; #9A3C
+騽 > xí; #9A3D
+騾 > lúo; #9A3E
+騿 > zhāng; #9A3F
+驀 > mò; #9A40
+驁 > áo; #9A41
+驂 > cān; #9A42
+驃 > piào; #9A43
+驄 > cōng; #9A44
+驅 > qū; #9A45
+驆 > bì; #9A46
+驇 > zhì; #9A47
+驈 > yù; #9A48
+驉 > xū; #9A49
+驊 > huá; #9A4A
+驋 > bō; #9A4B
+驌 > sù; #9A4C
+驍 > xiāo; #9A4D
+驎 > lín; #9A4E
+驏 > chăn; #9A4F
+驐 > dūn; #9A50
+驑 > líu; #9A51
+驒 > túo; #9A52
+驓 > zēng; #9A53
+驔 > tán; #9A54
+驕 > jiāo; #9A55
+驖 > tiĕ; #9A56
+驗 > yàn; #9A57
+驘 > lúo; #9A58
+驙 > zhān; #9A59
+驚 > jīng; #9A5A
+驛 > yì; #9A5B
+驜 > yè; #9A5C
+驝 > tūo; #9A5D
+驞 > bīn; #9A5E
+驟 > zòu; #9A5F
+驠 > yàn; #9A60
+驡 > péng; #9A61
+驢 > lǘ; #9A62
+驣 > téng; #9A63
+驤 > xiāng; #9A64
+驥 > jì; #9A65
+驦 > shuāng; #9A66
+驧 > jú; #9A67
+驨 > xī; #9A68
+驩 > huān; #9A69
+驪 > lí; #9A6A
+驫 > biāo; #9A6B
+马 > mă; #9A6C
+驭 > yù; #9A6D
+驮 > túo; #9A6E
+驯 > xún; #9A6F
+驰 > chí; #9A70
+驱 > qū; #9A71
+驲 > rì; #9A72
+驳 > bó; #9A73
+驴 > lǘ; #9A74
+驵 > zăng; #9A75
+驶 > shĭ; #9A76
+驷 > sì; #9A77
+驸 > fù; #9A78
+驹 > jū; #9A79
+驺 > zōu; #9A7A
+驻 > zhù; #9A7B
+驼 > túo; #9A7C
+驽 > nú; #9A7D
+驾 > jià; #9A7E
+驿 > yì; #9A7F
+骀 > tái; #9A80
+骁 > xiāo; #9A81
+骂 > mà; #9A82
+骃 > yīn; #9A83
+骄 > jiāo; #9A84
+骅 > huá; #9A85
+骆 > lùo; #9A86
+骇 > hài; #9A87
+骈 > pián; #9A88
+骉 > biāo; #9A89
+骊 > lí; #9A8A
+骋 > chĕng; #9A8B
+验 > yàn; #9A8C
+骍 > xīn; #9A8D
+骎 > qīn; #9A8E
+骏 > jùn; #9A8F
+骐 > qí; #9A90
+骑 > qí; #9A91
+骒 > kè; #9A92
+骓 > zhūi; #9A93
+骔 > zōng; #9A94
+骕 > sù; #9A95
+骖 > cān; #9A96
+骗 > piàn; #9A97
+骘 > zhì; #9A98
+骙 > kúi; #9A99
+骚 > sāo; #9A9A
+骛 > wù; #9A9B
+骜 > áo; #9A9C
+骝 > líu; #9A9D
+骞 > qiān; #9A9E
+骟 > shàn; #9A9F
+骠 > piào; #9AA0
+骡 > lúo; #9AA1
+骢 > cōng; #9AA2
+骣 > chăn; #9AA3
+骤 > zòu; #9AA4
+骥 > jì; #9AA5
+骦 > shuāng; #9AA6
+骧 > xiāng; #9AA7
+骨 > gŭ; #9AA8
+骩 > wĕi; #9AA9
+骪 > wĕi; #9AAA
+骫 > wĕi; #9AAB
+骬 > yú; #9AAC
+骭 > gàn; #9AAD
+骮 > yì; #9AAE
+骯 > āng; #9AAF
+骰 > tóu; #9AB0
+骱 > xiè; #9AB1
+骲 > bāo; #9AB2
+骳 > bì; #9AB3
+骴 > chī; #9AB4
+骵 > tĭ; #9AB5
+骶 > dĭ; #9AB6
+骷 > kū; #9AB7
+骸 > hái; #9AB8
+骹 > qiāo; #9AB9
+骺 > gòu; #9ABA
+骻 > kuà; #9ABB
+骼 > gé; #9ABC
+骽 > tŭi; #9ABD
+骾 > gĕng; #9ABE
+骿 > pián; #9ABF
+髀 > bì; #9AC0
+髁 > kē; #9AC1
+髂 > kà; #9AC2
+髃 > yú; #9AC3
+髄 > sŭi; #9AC4
+髅 > lóu; #9AC5
+髆 > bó; #9AC6
+髇 > xiāo; #9AC7
+髈 > páng; #9AC8
+髉 > bō; #9AC9
+髊 > cī; #9ACA
+髋 > kuān; #9ACB
+髌 > bìn; #9ACC
+髍 > mó; #9ACD
+髎 > liáo; #9ACE
+髏 > lóu; #9ACF
+髐 > náo; #9AD0
+髑 > dú; #9AD1
+髒 > zāng; #9AD2
+髓 > sŭi; #9AD3
+體 > tĭ; #9AD4
+髕 > bìn; #9AD5
+髖 > kuān; #9AD6
+髗 > lú; #9AD7
+高 > gāo; #9AD8
+髙 > gāo; #9AD9
+髚 > qiào; #9ADA
+髛 > kāo; #9ADB
+髜 > qiāo; #9ADC
+髝 > lào; #9ADD
+髞 > zào; #9ADE
+髟 > biāo; #9ADF
+髠 > kūn; #9AE0
+髡 > kūn; #9AE1
+髢 > tì; #9AE2
+髣 > făng; #9AE3
+髤 > xīu; #9AE4
+髥 > rán; #9AE5
+髦 > máo; #9AE6
+髧 > dàn; #9AE7
+髨 > kūn; #9AE8
+髩 > bìn; #9AE9
+髪 > fà; #9AEA
+髫 > tiáo; #9AEB
+髬 > peng; #9AEC
+髭 > zī; #9AED
+髮 > fă; #9AEE
+髯 > rán; #9AEF
+髰 > tì; #9AF0
+髱 > pào; #9AF1
+髲 > pī; #9AF2
+髳 > máo; #9AF3
+髴 > fú; #9AF4
+髵 > ér; #9AF5
+髶 > róng; #9AF6
+髷 > qū; #9AF7
+髸 > gong; #9AF8
+髹 > xīu; #9AF9
+髺 > guà; #9AFA
+髻 > jì; #9AFB
+髼 > péng; #9AFC
+髽 > zhuā; #9AFD
+髾 > shāo; #9AFE
+髿 > shā; #9AFF
+鬀 > tì; #9B00
+鬁 > lì; #9B01
+鬂 > bìn; #9B02
+鬃 > zōng; #9B03
+鬄 > tì; #9B04
+鬅 > péng; #9B05
+鬆 > sōng; #9B06
+鬇 > zhēng; #9B07
+鬈 > quán; #9B08
+鬉 > zōng; #9B09
+鬊 > shùn; #9B0A
+鬋 > jiān; #9B0B
+鬌 > dŭo; #9B0C
+鬍 > hú; #9B0D
+鬎 > là; #9B0E
+鬏 > jīu; #9B0F
+鬐 > qí; #9B10
+鬑 > lián; #9B11
+鬒 > zhĕn; #9B12
+鬓 > bìn; #9B13
+鬔 > péng; #9B14
+鬕 > mò; #9B15
+鬖 > sān; #9B16
+鬗 > màn; #9B17
+鬘 > mán; #9B18
+鬙 > sēng; #9B19
+鬚 > xū; #9B1A
+鬛 > liè; #9B1B
+鬜 > qiān; #9B1C
+鬝 > qiān; #9B1D
+鬞 > nóng; #9B1E
+鬟 > huán; #9B1F
+鬠 > kuài; #9B20
+鬡 > níng; #9B21
+鬢 > bìn; #9B22
+鬣 > liè; #9B23
+鬤 > ráng; #9B24
+鬥 > dòu; #9B25
+鬦 > dòu; #9B26
+鬧 > nào; #9B27
+鬨 > hōng; #9B28
+鬩 > xì; #9B29
+鬪 > dòu; #9B2A
+鬫 > hăn; #9B2B
+鬬 > dòu; #9B2C
+鬭 > dòu; #9B2D
+鬮 > jīu; #9B2E
+鬯 > chàng; #9B2F
+鬰 > yù; #9B30
+鬱 > yù; #9B31
+鬲 > lì; #9B32
+鬳 > juàn; #9B33
+鬴 > fŭ; #9B34
+鬵 > qián; #9B35
+鬶 > gūi; #9B36
+鬷 > zōng; #9B37
+鬸 > lìu; #9B38
+鬹 > gūi; #9B39
+鬺 > shāng; #9B3A
+鬻 > yù; #9B3B
+鬼 > gŭi; #9B3C
+鬽 > mèi; #9B3D
+鬾 > jì; #9B3E
+鬿 > qí; #9B3F
+魀 > jiè; #9B40
+魁 > kúi; #9B41
+魂 > hún; #9B42
+魃 > bá; #9B43
+魄 > pò; #9B44
+魅 > mèi; #9B45
+魆 > xù; #9B46
+魇 > yăn; #9B47
+魈 > xiāo; #9B48
+魉 > liăng; #9B49
+魊 > yù; #9B4A
+魋 > túi; #9B4B
+魌 > qī; #9B4C
+魍 > wăng; #9B4D
+魎 > liăng; #9B4E
+魏 > wèi; #9B4F
+魐 > jiān; #9B50
+魑 > chī; #9B51
+魒 > piāo; #9B52
+魓 > bì; #9B53
+魔 > mó; #9B54
+魕 > jĭ; #9B55
+魖 > xū; #9B56
+魗 > chŏu; #9B57
+魘 > yăn; #9B58
+魙 > zhăn; #9B59
+魚 > yú; #9B5A
+魛 > dāo; #9B5B
+魜 > rén; #9B5C
+魝 > jì; #9B5D
+魟 > gōng; #9B5F
+魠 > túo; #9B60
+魡 > diào; #9B61
+魢 > jĭ; #9B62
+魣 > xù; #9B63
+魤 > é; #9B64
+魥 > è; #9B65
+魦 > shā; #9B66
+魧 > háng; #9B67
+魨 > tún; #9B68
+魩 > mò; #9B69
+魪 > jiè; #9B6A
+魫 > shĕn; #9B6B
+魬 > făn; #9B6C
+魭 > yuán; #9B6D
+魮 > bí; #9B6E
+魯 > lŭ; #9B6F
+魰 > wén; #9B70
+魱 > hú; #9B71
+魲 > lú; #9B72
+魳 > zá; #9B73
+魴 > fáng; #9B74
+魵 > fén; #9B75
+魶 > nà; #9B76
+魷 > yóu; #9B77
+魺 > hé; #9B7A
+魻 > xiá; #9B7B
+魼 > qū; #9B7C
+魽 > hān; #9B7D
+魾 > pí; #9B7E
+魿 > líng; #9B7F
+鮀 > túo; #9B80
+鮁 > bō; #9B81
+鮂 > qíu; #9B82
+鮃 > píng; #9B83
+鮄 > fú; #9B84
+鮅 > bì; #9B85
+鮆 > jì; #9B86
+鮇 > wèi; #9B87
+鮈 > jū; #9B88
+鮉 > diāo; #9B89
+鮊 > bó; #9B8A
+鮋 > yóu; #9B8B
+鮌 > gŭn; #9B8C
+鮍 > pī; #9B8D
+鮎 > nián; #9B8E
+鮏 > xīng; #9B8F
+鮐 > tái; #9B90
+鮑 > bào; #9B91
+鮒 > fù; #9B92
+鮓 > zhă; #9B93
+鮔 > jù; #9B94
+鮕 > gū; #9B95
+鮙 > tà; #9B99
+鮚 > jié; #9B9A
+鮛 > shù; #9B9B
+鮜 > hòu; #9B9C
+鮝 > xiăng; #9B9D
+鮞 > ér; #9B9E
+鮟 > àn; #9B9F
+鮠 > wéi; #9BA0
+鮡 > tiāo; #9BA1
+鮢 > zhū; #9BA2
+鮣 > yìn; #9BA3
+鮤 > liè; #9BA4
+鮥 > lùo; #9BA5
+鮦 > tóng; #9BA6
+鮧 > yí; #9BA7
+鮨 > qí; #9BA8
+鮩 > bìng; #9BA9
+鮪 > wĕi; #9BAA
+鮫 > jiăo; #9BAB
+鮬 > bù; #9BAC
+鮭 > gūi; #9BAD
+鮮 > xiān; #9BAE
+鮯 > gé; #9BAF
+鮰 > húi; #9BB0
+鮳 > kăo; #9BB3
+鮵 > dúo; #9BB5
+鮶 > jūn; #9BB6
+鮷 > tí; #9BB7
+鮸 > măn; #9BB8
+鮹 > xiāo; #9BB9
+鮺 > ză; #9BBA
+鮻 > shā; #9BBB
+鮼 > qīn; #9BBC
+鮽 > yú; #9BBD
+鮾 > nĕi; #9BBE
+鮿 > zhé; #9BBF
+鯀 > gŭn; #9BC0
+鯁 > gĕng; #9BC1
+鯂 > su; #9BC2
+鯃 > wú; #9BC3
+鯄 > qíu; #9BC4
+鯅 > tíng; #9BC5
+鯆 > fŭ; #9BC6
+鯇 > wăn; #9BC7
+鯈 > yóu; #9BC8
+鯉 > lĭ; #9BC9
+鯊 > shā; #9BCA
+鯋 > shā; #9BCB
+鯌 > gào; #9BCC
+鯍 > méng; #9BCD
+鯒 > yŏng; #9BD2
+鯓 > ní; #9BD3
+鯔 > zī; #9BD4
+鯕 > qí; #9BD5
+鯖 > qīng; #9BD6
+鯗 > xiăng; #9BD7
+鯘 > nĕi; #9BD8
+鯙 > chún; #9BD9
+鯚 > jì; #9BDA
+鯛 > diāo; #9BDB
+鯜 > qiè; #9BDC
+鯝 > gù; #9BDD
+鯞 > zhŏu; #9BDE
+鯟 > dōng; #9BDF
+鯠 > lái; #9BE0
+鯡 > fēi; #9BE1
+鯢 > ní; #9BE2
+鯣 > yì; #9BE3
+鯤 > kūn; #9BE4
+鯥 > lù; #9BE5
+鯦 > jìu; #9BE6
+鯧 > chāng; #9BE7
+鯨 > jīng; #9BE8
+鯩 > lún; #9BE9
+鯪 > líng; #9BEA
+鯫 > zōu; #9BEB
+鯬 > lí; #9BEC
+鯭 > mĕng; #9BED
+鯮 > zōng; #9BEE
+鯯 > zhì; #9BEF
+鯰 > nián; #9BF0
+鯴 > shī; #9BF4
+鯵 > shēn; #9BF5
+鯶 > hŭn; #9BF6
+鯷 > shì; #9BF7
+鯸 > hóu; #9BF8
+鯹 > xīng; #9BF9
+鯺 > zhū; #9BFA
+鯻 > là; #9BFB
+鯼 > zōng; #9BFC
+鯽 > jì; #9BFD
+鯾 > biān; #9BFE
+鯿 > biān; #9BFF
+鰀 > huàn; #9C00
+鰁 > quán; #9C01
+鰂 > zé; #9C02
+鰃 > wēi; #9C03
+鰄 > wēi; #9C04
+鰅 > yú; #9C05
+鰆 > qūn; #9C06
+鰇 > róu; #9C07
+鰈 > dié; #9C08
+鰉 > huáng; #9C09
+鰊 > liàn; #9C0A
+鰋 > yăn; #9C0B
+鰌 > qíu; #9C0C
+鰍 > qīu; #9C0D
+鰎 > jiàn; #9C0E
+鰏 > bì; #9C0F
+鰐 > è; #9C10
+鰑 > yáng; #9C11
+鰒 > fù; #9C12
+鰓 > sāi; #9C13
+鰔 > jiăn; #9C14
+鰕 > xiá; #9C15
+鰖 > tŭo; #9C16
+鰗 > hú; #9C17
+鰙 > rùo; #9C19
+鰛 > wēn; #9C1B
+鰜 > jiān; #9C1C
+鰝 > hào; #9C1D
+鰞 > wū; #9C1E
+鰟 > fáng; #9C1F
+鰠 > sāo; #9C20
+鰡 > líu; #9C21
+鰢 > mă; #9C22
+鰣 > shí; #9C23
+鰤 > shī; #9C24
+鰥 > yín; #9C25
+鰦 > z̄; #9C26
+鰧 > téng; #9C27
+鰨 > tà; #9C28
+鰩 > yáo; #9C29
+鰪 > gé; #9C2A
+鰫 > róng; #9C2B
+鰬 > qián; #9C2C
+鰭 > qí; #9C2D
+鰮 > wēn; #9C2E
+鰯 > rùo; #9C2F
+鰱 > lián; #9C31
+鰲 > áo; #9C32
+鰳 > lè; #9C33
+鰴 > hūi; #9C34
+鰵 > mĭn; #9C35
+鰶 > jì; #9C36
+鰷 > tiáo; #9C37
+鰸 > qū; #9C38
+鰹 > jiān; #9C39
+鰺 > sāo; #9C3A
+鰻 > mán; #9C3B
+鰼 > xí; #9C3C
+鰽 > qíu; #9C3D
+鰾 > biào; #9C3E
+鰿 > jī; #9C3F
+鱀 > jì; #9C40
+鱁 > zhú; #9C41
+鱂 > jiāng; #9C42
+鱃 > qīu; #9C43
+鱄 > zhuān; #9C44
+鱅 > yóng; #9C45
+鱆 > zhāng; #9C46
+鱇 > kāng; #9C47
+鱈 > xuĕ; #9C48
+鱉 > biē; #9C49
+鱊 > jué; #9C4A
+鱋 > qū; #9C4B
+鱌 > xiàng; #9C4C
+鱍 > bō; #9C4D
+鱎 > jiāo; #9C4E
+鱏 > xún; #9C4F
+鱐 > sù; #9C50
+鱑 > huáng; #9C51
+鱒 > zùn; #9C52
+鱓 > shàn; #9C53
+鱔 > shàn; #9C54
+鱕 > fān; #9C55
+鱖 > jué; #9C56
+鱗 > lín; #9C57
+鱘 > xún; #9C58
+鱙 > miáo; #9C59
+鱚 > xĭ; #9C5A
+鱝 > fèn; #9C5D
+鱞 > guān; #9C5E
+鱟 > hòu; #9C5F
+鱠 > kuài; #9C60
+鱡 > zéi; #9C61
+鱢 > sāo; #9C62
+鱣 > zhān; #9C63
+鱤 > găn; #9C64
+鱥 > gùi; #9C65
+鱦 > shéng; #9C66
+鱧 > lĭ; #9C67
+鱨 > cháng; #9C68
+鱬 > rú; #9C6C
+鱭 > jì; #9C6D
+鱮 > xù; #9C6E
+鱯 > hùo; #9C6F
+鱱 > lì; #9C71
+鱲 > liè; #9C72
+鱳 > lì; #9C73
+鱴 > miè; #9C74
+鱵 > zhēn; #9C75
+鱶 > xiăng; #9C76
+鱷 > è; #9C77
+鱸 > lú; #9C78
+鱹 > guàn; #9C79
+鱺 > lí; #9C7A
+鱻 > xiān; #9C7B
+鱼 > yú; #9C7C
+鱽 > dāo; #9C7D
+鱾 > jĭ; #9C7E
+鱿 > yóu; #9C7F
+鲀 > tún; #9C80
+鲁 > lŭ; #9C81
+鲂 > fáng; #9C82
+鲃 > bā; #9C83
+鲄 > hé; #9C84
+鲅 > bō; #9C85
+鲆 > píng; #9C86
+鲇 > nián; #9C87
+鲈 > lú; #9C88
+鲉 > yóu; #9C89
+鲊 > zhă; #9C8A
+鲋 > fù; #9C8B
+鲌 > bó; #9C8C
+鲍 > bào; #9C8D
+鲎 > hòu; #9C8E
+鲏 > pī; #9C8F
+鲐 > tái; #9C90
+鲑 > gūi; #9C91
+鲒 > jié; #9C92
+鲓 > kăo; #9C93
+鲔 > wĕi; #9C94
+鲕 > ér; #9C95
+鲖 > tóng; #9C96
+鲗 > zé; #9C97
+鲘 > hòu; #9C98
+鲙 > kuài; #9C99
+鲚 > jì; #9C9A
+鲛 > jiăo; #9C9B
+鲜 > xiān; #9C9C
+鲝 > ză; #9C9D
+鲞 > xiăng; #9C9E
+鲟 > xún; #9C9F
+鲠 > gĕng; #9CA0
+鲡 > lí; #9CA1
+鲢 > lián; #9CA2
+鲣 > jiān; #9CA3
+鲤 > lĭ; #9CA4
+鲥 > shí; #9CA5
+鲦 > tiáo; #9CA6
+鲧 > gŭn; #9CA7
+鲨 > shā; #9CA8
+鲩 > wăn; #9CA9
+鲪 > jūn; #9CAA
+鲫 > jì; #9CAB
+鲬 > yŏng; #9CAC
+鲭 > qīng; #9CAD
+鲮 > líng; #9CAE
+鲯 > qí; #9CAF
+鲰 > zōu; #9CB0
+鲱 > fēi; #9CB1
+鲲 > kūn; #9CB2
+鲳 > chāng; #9CB3
+鲴 > gù; #9CB4
+鲵 > ní; #9CB5
+鲶 > nián; #9CB6
+鲷 > diāo; #9CB7
+鲸 > jīng; #9CB8
+鲹 > shēn; #9CB9
+鲺 > shī; #9CBA
+鲻 > zī; #9CBB
+鲼 > fèn; #9CBC
+鲽 > dié; #9CBD
+鲾 > bì; #9CBE
+鲿 > cháng; #9CBF
+鳀 > shì; #9CC0
+鳁 > wēn; #9CC1
+鳂 > wēi; #9CC2
+鳃 > sāi; #9CC3
+鳄 > è; #9CC4
+鳅 > qīu; #9CC5
+鳆 > fù; #9CC6
+鳇 > huáng; #9CC7
+鳈 > quán; #9CC8
+鳉 > jiāng; #9CC9
+鳊 > biān; #9CCA
+鳋 > sāo; #9CCB
+鳌 > áo; #9CCC
+鳍 > qí; #9CCD
+鳎 > tà; #9CCE
+鳏 > yín; #9CCF
+鳐 > yáo; #9CD0
+鳑 > fáng; #9CD1
+鳒 > jiān; #9CD2
+鳓 > lè; #9CD3
+鳔 > biào; #9CD4
+鳕 > xuĕ; #9CD5
+鳖 > biē; #9CD6
+鳗 > mán; #9CD7
+鳘 > mĭn; #9CD8
+鳙 > yóng; #9CD9
+鳚 > wèi; #9CDA
+鳛 > xí; #9CDB
+鳜 > jué; #9CDC
+鳝 > shàn; #9CDD
+鳞 > lín; #9CDE
+鳟 > zùn; #9CDF
+鳠 > hùo; #9CE0
+鳡 > găn; #9CE1
+鳢 > lĭ; #9CE2
+鳣 > zhān; #9CE3
+鳤 > guăn; #9CE4
+鳥 > niăo; #9CE5
+鳦 > yĭ; #9CE6
+鳧 > fú; #9CE7
+鳨 > lì; #9CE8
+鳩 > jīu; #9CE9
+鳪 > bŭ; #9CEA
+鳫 > yàn; #9CEB
+鳬 > fú; #9CEC
+鳭 > diāo; #9CED
+鳮 > jī; #9CEE
+鳯 > fèng; #9CEF
+鳱 > gān; #9CF1
+鳲 > shī; #9CF2
+鳳 > fèng; #9CF3
+鳴 > míng; #9CF4
+鳵 > băo; #9CF5
+鳶 > yuān; #9CF6
+鳷 > zhī; #9CF7
+鳸 > hù; #9CF8
+鳹 > qín; #9CF9
+鳺 > fū; #9CFA
+鳻 > fēn; #9CFB
+鳼 > wén; #9CFC
+鳽 > jiān; #9CFD
+鳾 > shī; #9CFE
+鳿 > yù; #9CFF
+鴀 > fŏu; #9D00
+鴁 > yiāo; #9D01
+鴂 > juè; #9D02
+鴃 > jué; #9D03
+鴄 > pī; #9D04
+鴅 > huān; #9D05
+鴆 > zhèn; #9D06
+鴇 > băo; #9D07
+鴈 > yàn; #9D08
+鴉 > yā; #9D09
+鴊 > zhèng; #9D0A
+鴋 > fāng; #9D0B
+鴌 > fèng; #9D0C
+鴍 > wén; #9D0D
+鴎 > ōu; #9D0E
+鴏 > tè; #9D0F
+鴐 > jiā; #9D10
+鴑 > nú; #9D11
+鴒 > líng; #9D12
+鴓 > miè; #9D13
+鴔 > fú; #9D14
+鴕 > túo; #9D15
+鴖 > wén; #9D16
+鴗 > lì; #9D17
+鴘 > biàn; #9D18
+鴙 > zhì; #9D19
+鴚 > gē; #9D1A
+鴛 > yuān; #9D1B
+鴜 > zī; #9D1C
+鴝 > qú; #9D1D
+鴞 > xiāo; #9D1E
+鴟 > zhī; #9D1F
+鴠 > dàn; #9D20
+鴡 > jū; #9D21
+鴢 > yòu; #9D22
+鴣 > gū; #9D23
+鴤 > zhōng; #9D24
+鴥 > yù; #9D25
+鴦 > yāng; #9D26
+鴧 > ròng; #9D27
+鴨 > yā; #9D28
+鴩 > tiĕ; #9D29
+鴪 > yù; #9D2A
+鴬 > yīng; #9D2C
+鴭 > zhūi; #9D2D
+鴮 > wū; #9D2E
+鴯 > ér; #9D2F
+鴰 > guā; #9D30
+鴱 > ài; #9D31
+鴲 > zhī; #9D32
+鴳 > yàn; #9D33
+鴴 > héng; #9D34
+鴵 > jiāo; #9D35
+鴶 > jí; #9D36
+鴷 > liè; #9D37
+鴸 > zhū; #9D38
+鴹 > rén; #9D39
+鴺 > yí; #9D3A
+鴻 > hóng; #9D3B
+鴼 > lùo; #9D3C
+鴽 > rú; #9D3D
+鴾 > móu; #9D3E
+鴿 > gē; #9D3F
+鵀 > rèn; #9D40
+鵁 > jiāo; #9D41
+鵂 > xīu; #9D42
+鵃 > zhōu; #9D43
+鵄 > zhī; #9D44
+鵅 > lùo; #9D45
+鵉 > luán; #9D49
+鵊 > jiá; #9D4A
+鵋 > jì; #9D4B
+鵌 > yú; #9D4C
+鵍 > huān; #9D4D
+鵎 > tŭo; #9D4E
+鵏 > bū; #9D4F
+鵐 > wú; #9D50
+鵑 > juān; #9D51
+鵒 > yù; #9D52
+鵓 > bó; #9D53
+鵔 > xùn; #9D54
+鵕 > xùn; #9D55
+鵖 > bì; #9D56
+鵗 > xī; #9D57
+鵘 > jùn; #9D58
+鵙 > jú; #9D59
+鵚 > tú; #9D5A
+鵛 > jīng; #9D5B
+鵜 > tí; #9D5C
+鵝 > é; #9D5D
+鵞 > é; #9D5E
+鵟 > kuáng; #9D5F
+鵠 > hú; #9D60
+鵡 > wŭ; #9D61
+鵢 > shēn; #9D62
+鵣 > lài; #9D63
+鵦 > lù; #9D66
+鵧 > píng; #9D67
+鵨 > shū; #9D68
+鵩 > fú; #9D69
+鵪 > ān; #9D6A
+鵫 > zhào; #9D6B
+鵬 > péng; #9D6C
+鵭 > qín; #9D6D
+鵮 > qiān; #9D6E
+鵯 > bēi; #9D6F
+鵰 > diāo; #9D70
+鵱 > lù; #9D71
+鵲 > què; #9D72
+鵳 > jiān; #9D73
+鵴 > jú; #9D74
+鵵 > tù; #9D75
+鵶 > yā; #9D76
+鵷 > yuān; #9D77
+鵸 > qí; #9D78
+鵹 > lí; #9D79
+鵺 > yè; #9D7A
+鵻 > zhūi; #9D7B
+鵼 > kōng; #9D7C
+鵽 > zhùi; #9D7D
+鵾 > kūn; #9D7E
+鵿 > shēng; #9D7F
+鶀 > qí; #9D80
+鶁 > jīng; #9D81
+鶂 > yì; #9D82
+鶃 > yì; #9D83
+鶄 > jīng; #9D84
+鶅 > zī; #9D85
+鶆 > lái; #9D86
+鶇 > dōng; #9D87
+鶈 > qī; #9D88
+鶉 > chún; #9D89
+鶊 > gēng; #9D8A
+鶋 > jū; #9D8B
+鶌 > qū; #9D8C
+鶏 > jī; #9D8F
+鶐 > shù; #9D90
+鶒 > chì; #9D92
+鶓 > miáo; #9D93
+鶔 > róu; #9D94
+鶕 > ān; #9D95
+鶖 > qīu; #9D96
+鶗 > tí; #9D97
+鶘 > hú; #9D98
+鶙 > tí; #9D99
+鶚 > è; #9D9A
+鶛 > jiē; #9D9B
+鶜 > máo; #9D9C
+鶝 > fú; #9D9D
+鶞 > chūn; #9D9E
+鶟 > tú; #9D9F
+鶠 > yăn; #9DA0
+鶡 > hé; #9DA1
+鶢 > yuán; #9DA2
+鶣 > piān; #9DA3
+鶤 > yùn; #9DA4
+鶥 > méi; #9DA5
+鶦 > hú; #9DA6
+鶧 > yīng; #9DA7
+鶨 > dùn; #9DA8
+鶩 > mù; #9DA9
+鶪 > jú; #9DAA
+鶬 > cāng; #9DAC
+鶭 > făng; #9DAD
+鶮 > gù; #9DAE
+鶯 > yīng; #9DAF
+鶰 > yuán; #9DB0
+鶱 > xuān; #9DB1
+鶲 > wēng; #9DB2
+鶳 > shī; #9DB3
+鶴 > hè; #9DB4
+鶵 > chú; #9DB5
+鶶 > táng; #9DB6
+鶷 > xià; #9DB7
+鶸 > rùo; #9DB8
+鶹 > líu; #9DB9
+鶺 > jí; #9DBA
+鶻 > gú; #9DBB
+鶼 > jiān; #9DBC
+鶽 > zhŭn; #9DBD
+鶾 > hàn; #9DBE
+鶿 > zī; #9DBF
+鷀 > zī; #9DC0
+鷁 > nì; #9DC1
+鷂 > yào; #9DC2
+鷃 > yàn; #9DC3
+鷄 > jī; #9DC4
+鷅 > lì; #9DC5
+鷆 > tián; #9DC6
+鷇 > kòu; #9DC7
+鷈 > tī; #9DC8
+鷉 > tī; #9DC9
+鷊 > nì; #9DCA
+鷋 > tú; #9DCB
+鷌 > mă; #9DCC
+鷍 > jiāo; #9DCD
+鷎 > gāo; #9DCE
+鷏 > tián; #9DCF
+鷐 > chén; #9DD0
+鷑 > lì; #9DD1
+鷒 > zhuān; #9DD2
+鷓 > zhè; #9DD3
+鷔 > áo; #9DD4
+鷕 > yăo; #9DD5
+鷖 > yī; #9DD6
+鷗 > ōu; #9DD7
+鷘 > chì; #9DD8
+鷙 > zhì; #9DD9
+鷚 > liáo; #9DDA
+鷛 > róng; #9DDB
+鷜 > lóu; #9DDC
+鷝 > bì; #9DDD
+鷞 > shuāng; #9DDE
+鷟 > zhúo; #9DDF
+鷠 > yú; #9DE0
+鷡 > wú; #9DE1
+鷢 > jué; #9DE2
+鷣 > yín; #9DE3
+鷤 > quán; #9DE4
+鷥 > sī; #9DE5
+鷦 > jiāo; #9DE6
+鷧 > yì; #9DE7
+鷨 > huā; #9DE8
+鷩 > bì; #9DE9
+鷪 > yīng; #9DEA
+鷫 > sù; #9DEB
+鷬 > huáng; #9DEC
+鷭 > fán; #9DED
+鷮 > jiāo; #9DEE
+鷯 > liáo; #9DEF
+鷰 > yàn; #9DF0
+鷱 > kāo; #9DF1
+鷲 > jìu; #9DF2
+鷳 > xián; #9DF3
+鷴 > xián; #9DF4
+鷵 > tú; #9DF5
+鷶 > măi; #9DF6
+鷷 > zūn; #9DF7
+鷸 > yù; #9DF8
+鷹 > yīng; #9DF9
+鷺 > lù; #9DFA
+鷻 > tuán; #9DFB
+鷼 > xián; #9DFC
+鷽 > xué; #9DFD
+鷾 > yì; #9DFE
+鷿 > pì; #9DFF
+鸀 > shú; #9E00
+鸁 > lúo; #9E01
+鸂 > qī; #9E02
+鸃 > yí; #9E03
+鸄 > jí; #9E04
+鸅 > zhé; #9E05
+鸆 > yú; #9E06
+鸇 > zhān; #9E07
+鸈 > yè; #9E08
+鸉 > yáng; #9E09
+鸊 > pì; #9E0A
+鸋 > níng; #9E0B
+鸌 > hùo; #9E0C
+鸍 > mí; #9E0D
+鸎 > yīng; #9E0E
+鸏 > méng; #9E0F
+鸐 > dí; #9E10
+鸑 > yuè; #9E11
+鸒 > yú; #9E12
+鸓 > lĕi; #9E13
+鸔 > bào; #9E14
+鸕 > lú; #9E15
+鸖 > hè; #9E16
+鸗 > lóng; #9E17
+鸘 > shuāng; #9E18
+鸙 > yuè; #9E19
+鸚 > yīng; #9E1A
+鸛 > guàn; #9E1B
+鸜 > qú; #9E1C
+鸝 > lí; #9E1D
+鸞 > luán; #9E1E
+鸟 > niăo; #9E1F
+鸠 > jīu; #9E20
+鸡 > jī; #9E21
+鸢 > yuān; #9E22
+鸣 > míng; #9E23
+鸤 > shī; #9E24
+鸥 > ōu; #9E25
+鸦 > yā; #9E26
+鸧 > cāng; #9E27
+鸨 > băo; #9E28
+鸩 > zhèn; #9E29
+鸪 > gū; #9E2A
+鸫 > dōng; #9E2B
+鸬 > lú; #9E2C
+鸭 > yā; #9E2D
+鸮 > xiāo; #9E2E
+鸯 > yāng; #9E2F
+鸰 > líng; #9E30
+鸱 > zhī; #9E31
+鸲 > qú; #9E32
+鸳 > yuān; #9E33
+鸴 > xué; #9E34
+鸵 > túo; #9E35
+鸶 > sī; #9E36
+鸷 > zhì; #9E37
+鸸 > ér; #9E38
+鸹 > guā; #9E39
+鸺 > xīu; #9E3A
+鸻 > héng; #9E3B
+鸼 > zhōu; #9E3C
+鸽 > gē; #9E3D
+鸾 > luán; #9E3E
+鸿 > hóng; #9E3F
+鹀 > wú; #9E40
+鹁 > bó; #9E41
+鹂 > lí; #9E42
+鹃 > juān; #9E43
+鹄 > hú; #9E44
+鹅 > é; #9E45
+鹆 > yù; #9E46
+鹇 > xián; #9E47
+鹈 > tí; #9E48
+鹉 > wŭ; #9E49
+鹊 > què; #9E4A
+鹋 > miáo; #9E4B
+鹌 > ān; #9E4C
+鹍 > kūn; #9E4D
+鹎 > bēi; #9E4E
+鹏 > péng; #9E4F
+鹐 > qiān; #9E50
+鹑 > chún; #9E51
+鹒 > gēng; #9E52
+鹓 > yuān; #9E53
+鹔 > sù; #9E54
+鹕 > hú; #9E55
+鹖 > hé; #9E56
+鹗 > è; #9E57
+鹘 > gú; #9E58
+鹙 > qīu; #9E59
+鹚 > zī; #9E5A
+鹛 > méi; #9E5B
+鹜 > mù; #9E5C
+鹝 > nì; #9E5D
+鹞 > yào; #9E5E
+鹟 > wēng; #9E5F
+鹠 > líu; #9E60
+鹡 > jí; #9E61
+鹢 > nì; #9E62
+鹣 > jiān; #9E63
+鹤 > hè; #9E64
+鹥 > yī; #9E65
+鹦 > yīng; #9E66
+鹧 > zhè; #9E67
+鹨 > liáo; #9E68
+鹩 > liáo; #9E69
+鹪 > jiāo; #9E6A
+鹫 > jìu; #9E6B
+鹬 > yù; #9E6C
+鹭 > lù; #9E6D
+鹮 > xuán; #9E6E
+鹯 > zhān; #9E6F
+鹰 > yīng; #9E70
+鹱 > hùo; #9E71
+鹲 > méng; #9E72
+鹳 > guàn; #9E73
+鹴 > shuāng; #9E74
+鹵 > lŭ; #9E75
+鹶 > jīn; #9E76
+鹷 > líng; #9E77
+鹸 > jiăn; #9E78
+鹹 > xián; #9E79
+鹺 > cúo; #9E7A
+鹻 > jiăn; #9E7B
+鹼 > jiăn; #9E7C
+鹽 > yán; #9E7D
+鹾 > cúo; #9E7E
+鹿 > lù; #9E7F
+麀 > yōu; #9E80
+麁 > cū; #9E81
+麂 > jĭ; #9E82
+麃 > biāo; #9E83
+麄 > cū; #9E84
+麅 > biāo; #9E85
+麆 > zhù; #9E86
+麇 > jūn; #9E87
+麈 > zhŭ; #9E88
+麉 > jiān; #9E89
+麊 > mí; #9E8A
+麋 > mí; #9E8B
+麌 > wú; #9E8C
+麍 > líu; #9E8D
+麎 > chén; #9E8E
+麏 > jūn; #9E8F
+麐 > lín; #9E90
+麑 > ní; #9E91
+麒 > qí; #9E92
+麓 > lù; #9E93
+麔 > jìu; #9E94
+麕 > jūn; #9E95
+麖 > jīng; #9E96
+麗 > lì; #9E97
+麘 > xiāng; #9E98
+麙 > yán; #9E99
+麚 > jiā; #9E9A
+麛 > mí; #9E9B
+麜 > lì; #9E9C
+麝 > shè; #9E9D
+麞 > zhāng; #9E9E
+麟 > lín; #9E9F
+麠 > jīng; #9EA0
+麡 > jī; #9EA1
+麢 > líng; #9EA2
+麣 > yán; #9EA3
+麤 > cū; #9EA4
+麥 > mài; #9EA5
+麦 > mài; #9EA6
+麧 > gē; #9EA7
+麨 > chăo; #9EA8
+麩 > fū; #9EA9
+麪 > miăn; #9EAA
+麫 > miăn; #9EAB
+麬 > fū; #9EAC
+麭 > pào; #9EAD
+麮 > qù; #9EAE
+麯 > qú; #9EAF
+麰 > móu; #9EB0
+麱 > fū; #9EB1
+麲 > xiàn; #9EB2
+麳 > lái; #9EB3
+麴 > qú; #9EB4
+麵 > miàn; #9EB5
+麷 > fēng; #9EB7
+麸 > fū; #9EB8
+麹 > qú; #9EB9
+麺 > miàn; #9EBA
+麻 > má; #9EBB
+麼 > mo; #9EBC
+麽 > mo; #9EBD
+麾 > hūi; #9EBE
+黀 > zōu; #9EC0
+黁 > nēn; #9EC1
+黂 > fén; #9EC2
+黃 > huáng; #9EC3
+黄 > huáng; #9EC4
+黅 > jīn; #9EC5
+黆 > guāng; #9EC6
+黇 > tiān; #9EC7
+黈 > tŏu; #9EC8
+黉 > héng; #9EC9
+黊 > xī; #9ECA
+黋 > kuăng; #9ECB
+黌 > héng; #9ECC
+黍 > shŭ; #9ECD
+黎 > lí; #9ECE
+黏 > nián; #9ECF
+黐 > chī; #9ED0
+黑 > hēi; #9ED1
+黒 > hēi; #9ED2
+黓 > yì; #9ED3
+黔 > qián; #9ED4
+黕 > dān; #9ED5
+黖 > xì; #9ED6
+黗 > tuăn; #9ED7
+默 > mò; #9ED8
+黙 > mò; #9ED9
+黚 > qián; #9EDA
+黛 > dài; #9EDB
+黜 > chù; #9EDC
+黝 > yŏu; #9EDD
+點 > diăn; #9EDE
+黟 > yī; #9EDF
+黠 > xiá; #9EE0
+黡 > yăn; #9EE1
+黢 > qū; #9EE2
+黣 > mĕi; #9EE3
+黤 > yăn; #9EE4
+黥 > jīng; #9EE5
+黦 > yù; #9EE6
+黧 > lí; #9EE7
+黨 > dăng; #9EE8
+黩 > dú; #9EE9
+黪 > căn; #9EEA
+黫 > yīn; #9EEB
+黬 > àn; #9EEC
+黭 > yān; #9EED
+黮 > tăn; #9EEE
+黯 > àn; #9EEF
+黰 > zhĕn; #9EF0
+黱 > dài; #9EF1
+黲 > căn; #9EF2
+黳 > yī; #9EF3
+黴 > méi; #9EF4
+黵 > dăn; #9EF5
+黶 > yăn; #9EF6
+黷 > dú; #9EF7
+黸 > lú; #9EF8
+黹 > zhĭ; #9EF9
+黺 > fĕn; #9EFA
+黻 > fù; #9EFB
+黼 > fŭ; #9EFC
+黽 > mĭn; #9EFD
+黾 > mĭn; #9EFE
+黿 > yuán; #9EFF
+鼀 > cù; #9F00
+鼁 > qù; #9F01
+鼂 > cháo; #9F02
+鼃 > wā; #9F03
+鼄 > zhū; #9F04
+鼅 > zhī; #9F05
+鼆 > máng; #9F06
+鼇 > áo; #9F07
+鼈 > biē; #9F08
+鼉 > túo; #9F09
+鼊 > bì; #9F0A
+鼋 > yuán; #9F0B
+鼌 > cháo; #9F0C
+鼍 > túo; #9F0D
+鼎 > dĭng; #9F0E
+鼏 > mì; #9F0F
+鼐 > nài; #9F10
+鼑 > dĭng; #9F11
+鼒 > zī; #9F12
+鼓 > gŭ; #9F13
+鼔 > gŭ; #9F14
+鼕 > dōng; #9F15
+鼖 > fén; #9F16
+鼗 > táo; #9F17
+鼘 > yuān; #9F18
+鼙 > pí; #9F19
+鼚 > chāng; #9F1A
+鼛 > gāo; #9F1B
+鼜 > qì; #9F1C
+鼝 > yuān; #9F1D
+鼞 > tāng; #9F1E
+鼟 > tēng; #9F1F
+鼠 > shŭ; #9F20
+鼡 > shŭ; #9F21
+鼢 > fén; #9F22
+鼣 > fèi; #9F23
+鼤 > wén; #9F24
+鼥 > bá; #9F25
+鼦 > diāo; #9F26
+鼧 > túo; #9F27
+鼨 > tóng; #9F28
+鼩 > qú; #9F29
+鼪 > shēng; #9F2A
+鼫 > shí; #9F2B
+鼬 > yòu; #9F2C
+鼭 > shí; #9F2D
+鼮 > tíng; #9F2E
+鼯 > wú; #9F2F
+鼰 > niàn; #9F30
+鼱 > jīng; #9F31
+鼲 > hún; #9F32
+鼳 > jú; #9F33
+鼴 > yăn; #9F34
+鼵 > tú; #9F35
+鼶 > tí; #9F36
+鼷 > xī; #9F37
+鼸 > xiăn; #9F38
+鼹 > yăn; #9F39
+鼺 > léi; #9F3A
+鼻 > bí; #9F3B
+鼼 > yăo; #9F3C
+鼽 > qíu; #9F3D
+鼾 > hān; #9F3E
+鼿 > wū; #9F3F
+齀 > wù; #9F40
+齁 > hóu; #9F41
+齂 > xì; #9F42
+齃 > gé; #9F43
+齄 > zhā; #9F44
+齅 > xìu; #9F45
+齆 > wèng; #9F46
+齇 > zhā; #9F47
+齈 > nóng; #9F48
+齉 > nàng; #9F49
+齊 > qí; #9F4A
+齋 > zhāi; #9F4B
+齌 > jì; #9F4C
+齍 > zī; #9F4D
+齎 > jī; #9F4E
+齏 > jī; #9F4F
+齐 > qí; #9F50
+齑 > jī; #9F51
+齒 > chĭ; #9F52
+齓 > chèn; #9F53
+齔 > chèn; #9F54
+齕 > hé; #9F55
+齖 > yá; #9F56
+齗 > kĕn; #9F57
+齘 > xiè; #9F58
+齙 > páo; #9F59
+齚 > cùo; #9F5A
+齛 > shì; #9F5B
+齜 > zī; #9F5C
+齝 > chī; #9F5D
+齞 > niàn; #9F5E
+齟 > jŭ; #9F5F
+齠 > tiáo; #9F60
+齡 > líng; #9F61
+齢 > líng; #9F62
+齣 > chū; #9F63
+齤 > quán; #9F64
+齥 > xiè; #9F65
+齦 > kĕn; #9F66
+齧 > niè; #9F67
+齨 > jìu; #9F68
+齩 > yăo; #9F69
+齪 > chùo; #9F6A
+齫 > kŭn; #9F6B
+齬 > yŭ; #9F6C
+齭 > chŭ; #9F6D
+齮 > yĭ; #9F6E
+齯 > ní; #9F6F
+齰 > cùo; #9F70
+齱 > zōu; #9F71
+齲 > qŭ; #9F72
+齳 > nĕn; #9F73
+齴 > xiăn; #9F74
+齵 > óu; #9F75
+齶 > è; #9F76
+齷 > wò; #9F77
+齸 > yì; #9F78
+齹 > chūo; #9F79
+齺 > zōu; #9F7A
+齻 > diān; #9F7B
+齼 > chŭ; #9F7C
+齽 > jìn; #9F7D
+齾 > yà; #9F7E
+齿 > chĭ; #9F7F
+龀 > chèn; #9F80
+龁 > hé; #9F81
+龂 > kĕn; #9F82
+龃 > jŭ; #9F83
+龄 > líng; #9F84
+龅 > páo; #9F85
+龆 > tiáo; #9F86
+龇 > zī; #9F87
+龈 > kĕn; #9F88
+龉 > yŭ; #9F89
+龊 > chùo; #9F8A
+龋 > qŭ; #9F8B
+龌 > wò; #9F8C
+龍 > lóng; #9F8D
+龎 > páng; #9F8E
+龏 > gōng; #9F8F
+龐 > páng; #9F90
+龑 > yăn; #9F91
+龒 > lóng; #9F92
+龓 > lóng; #9F93
+龔 > gōng; #9F94
+龕 > kān; #9F95
+龖 > tà; #9F96
+龗 > líng; #9F97
+龘 > tà; #9F98
+龙 > lóng; #9F99
+龚 > gōng; #9F9A
+龛 > kān; #9F9B
+龜 > gūi; #9F9C
+龝 > qīu; #9F9D
+龞 > biē; #9F9E
+龟 > gūi; #9F9F
+龠 > yuè; #9FA0
+龡 > chùi; #9FA1
+龢 > hé; #9FA2
+龣 > jué; #9FA3
+龤 > xié; #9FA4
+龥 > yù; #9FA5
+癩 > là; #F90E
+兀 > wù; #FA0C
+嗀 > hùo; #FA0D
+塚 > zhŏng; #FA10
+晴 > qíng; #FA12
+凞 > xī; #FA15
+猪 > zhū; #FA16
+益 > yì; #FA17
+礼 > lĭ; #FA18
+神 > shén; #FA19
+祥 > xiáng; #FA1A
+福 > fú; #FA1B
+靖 > jìng; #FA1C
+精 > jīng; #FA1D
+羽 > yŭ; #FA1E
+諸 > zhū; #FA22
+逸 > yì; #FA25
+都 > dū; #FA26
+飯 > fàn; #FA2A
+飼 > sì; #FA2B
+館 > guăn; #FA2C
+鶴 > hè; #FA2D
+
+# eof
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_English.txt b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_English.txt
new file mode 100644
index 00000000000..fe353f3a024
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_English.txt
@@ -0,0 +1,6366 @@
+#--------------------------------------------------------------------
+# Copyright (c) 1999-2001, International Business Machines
+# Corporation and others. All Rights Reserved.
+#--------------------------------------------------------------------
+# Date: Tue Jan 23 12:42:02 2001
+#--------------------------------------------------------------------
+
+# Kanji-English
+
+丁>'[male adult]';
+七>'[seven]';
+万>'[ten thousand]';
+丈>'[unit of length equal 3.3 meters]';
+三>'[three]';
+上>'[top]';
+下>'[under]';
+不>'[no]';
+与>'[and]';
+丐>'[beggar]';
+丑>'[clown]';
+且>'[moreover]';
+丕>'[great]';
+世>'[generation]';
+丗>'[thirty]';
+丘>'[hill]';
+丙>'[third of heavenly stems]';
+丞>'[assist]';
+両>'[two]';
+並>'[equal to]';
+个>'[numerary adjunct]';
+中>'[central]';
+丱>'[child''s hairstyle bound in two tufts]';
+串>'[string]';
+丶>'[dot]';
+丸>'[small round object]';
+丹>'[cinnabar (native HgS)]';
+主>'[master]';
+丼>'[bowl of food]';
+丿>'[line]';
+乂>'[govern]';
+乃>'[then]';
+久>'[long time (ago)]';
+之>'[''s (marks preceding phrase as modifier of following phrase)]';
+乍>'[suddenly]';
+乎>'[interrogative or exclamatory final particle]';
+乏>'[lack]';
+乕>'[tiger]';
+乖>'[rebel]';
+乗>'[ride]';
+乘>'[ride]';
+乙>'[second heaven''s stem]';
+九>'[nine]';
+乞>'[beg]';
+也>'[also]';
+乢>'[lid]';
+乱>'[confusion]';
+乳>'[breast]';
+乾>'[dry]';
+亀>'[turtle or tortoise]';
+亂>'[confusion]';
+亅>'[hook]';
+了>'[to finish]';
+予>'[I]';
+争>'[dispute]';
+亊>'[affair]';
+事>'[affair]';
+二>'[two]';
+于>'[in]';
+云>'[say]';
+互>'[mutually]';
+五>'[five]';
+井>'[well]';
+亘>'[extend across]';
+亙>'[extend across]';
+些>'[little]';
+亜>'[asia]';
+亞>'[asia]';
+亟>'[urgently]';
+亠>'[head]';
+亡>'[death]';
+亢>'[high]';
+交>'[mix]';
+亥>'[last of 12 earth branches]';
+亦>'[also]';
+亨>'[smoothly]';
+享>'[enjoy]';
+京>'[capital city]';
+亭>'[pavilion]';
+亮>'[bright]';
+亰>'[capital city]';
+亳>'[name of district in Anhui]';
+亶>'[sincere]';
+人>'[man]';
+什>'[file of ten soldiers]';
+仁>'[humaneness]';
+仂>'[surplus or excess]';
+仄>'[slanting]';
+仆>'[fall forward]';
+仇>'[enemy]';
+今>'[now]';
+介>'[forerunner]';
+仍>'[yet]';
+从>'[from]';
+仏>'[buddha]';
+仔>'[small thing]';
+仕>'[official]';
+他>'[other]';
+仗>'[rely upon]';
+付>'[give]';
+仙>'[Taoist super-being]';
+仝>'[together]';
+仞>'[ancient unit of measure (8 feet)]';
+仟>'[one thousand]';
+代>'[replace]';
+令>'[command]';
+以>'[by means of]';
+仭>'[ancient unit of measure (8 feet)]';
+仮>'[falsehood]';
+仰>'[raise the head to look]';
+仲>'[middle brother]';
+件>'[numerary adjunct for article]';
+价>'[price]';
+任>'[trust to]';
+企>'[plan a project]';
+伉>'[compare]';
+伊>'[third person pronoun]';
+伍>'[five]';
+伎>'[talent]';
+伏>'[crouch]';
+伐>'[cut down]';
+休>'[rest]';
+会>'[assemble]';
+伜>'[deputy]';
+伝>'[summon]';
+伯>'[older brother]';
+估>'[merchant]';
+伴>'[companion]';
+伶>'[lonely]';
+伸>'[extend]';
+伺>'[serve]';
+似>'[resemble]';
+伽>'[transcription of sanskrit gha in buddhist texts ('ëmâæ' \"samgha\")]';
+佃>'[tenant farmer]';
+但>'[only]';
+佇>'[wait]';
+位>'[throne]';
+低>'[low]';
+住>'[reside]';
+佐>'[assist]';
+佑>'[help]';
+体>'[body]';
+何>'[what]';
+佗>'[other]';
+余>'[I]';
+佚>'[indulge in pleasures]';
+佛>'[buddha (contraction of MC 'bhiêtdha')]';
+作>'[make]';
+佝>'[rickets]';
+佞>'[flattery]';
+佩>'[belt ornament]';
+佯>'[pretend]';
+佰>'[hundred]';
+佳>'[good]';
+併>'[combine]';
+佶>'[strong]';
+佻>'[frivolous]';
+佼>'[beautiful]';
+使>'[cause]';
+侃>'[upright and strong]';
+來>'[come]';
+侈>'[luxurious]';
+例>'[precedent]';
+侍>'[serve]';
+侏>'[small]';
+侑>'[help]';
+侖>'[logical reasons]';
+侘>'[disappointed]';
+供>'[supply]';
+依>'[rely on]';
+侠>'[chivalrous person]';
+価>'[price]';
+侫>'[flattery]';
+侭>'[complete]';
+侮>'[insult]';
+侯>'[marquis]';
+侵>'[invade]';
+侶>'[companion]';
+便>'[convenience]';
+係>'[bind]';
+促>'[urge]';
+俄>'[sudden(ly)]';
+俊>'[talented]';
+俎>'[chopping board or block]';
+俐>'[smooth]';
+俑>'[wooden figure buried with dead]';
+俔>'[like]';
+俗>'[social customs]';
+俘>'[prisoner of war]';
+俚>'[rustic]';
+俛>'[make effort]';
+保>'[protect]';
+俟>'[wait for]';
+信>'[trust]';
+俣>'[big]';
+#"俣>'[big]'",
+俥>'[rickshaw]';
+修>'[study]';
+俯>'[bow down]';
+俳>'[actor]';
+俵>'[divide]';
+俶>'[start]';
+俸>'[wages]';
+俺>'[personal pronoun]';
+俾>'[so that]';
+倅>'[deputy]';
+倆>'[clever]';
+倉>'[granary]';
+個>'[numerary adjunct]';
+倍>'[times]';
+倏>'[hastily]';
+們>'[adjunct pronoun indicate plural]';
+倒>'[fall over]';
+倔>'[stubborn]';
+倖>'[lucky]';
+候>'[wait]';
+倚>'[rely on]';
+借>'[borrow]';
+倡>'[guide]';
+倣>'[imitate]';
+値>'[price]';
+倥>'[boorish]';
+倦>'[be tired of]';
+倨>'[arrogant]';
+倩>'[beautiful]';
+倪>'[feeble]';
+倫>'[normal human relationships]';
+倬>'[noticeable]';
+倭>'[dwarf]';
+倶>'[all]';
+倹>'[temperate]';
+偃>'[cease]';
+假>'[falsehood]';
+偈>'[brave]';
+偉>'[great]';
+偏>'[inclined one side]';
+偐>'[false]';
+偕>'[together]';
+偖>'[rip up]';
+做>'[work]';
+停>'[stop]';
+健>'[strong]';
+偬>'[urgent]';
+偲>'[talented]';
+側>'[side]';
+偵>'[spy]';
+偶>'[accidentally]';
+偸>'[to steal]';
+偽>'[false]';
+傀>'[great]';
+傅>'[tutor]';
+傍>'[by side of]';
+傑>'[hero]';
+傘>'[umbrella]';
+備>'[prepare]';
+傚>'[imitate]';
+催>'[press]';
+傭>'[hire]';
+傲>'[proud]';
+傳>'[summon]';
+傴>'[humpback]';
+債>'[debt]';
+傷>'[wound]';
+傾>'[upset]';
+僂>'[humpback]';
+僅>'[only]';
+僉>'[all]';
+僊>'[Taoist super-being]';
+働>'[labor]';
+像>'[picture]';
+僑>'[sojourn]';
+僕>'[slave]';
+僖>'[joy]';
+僚>'[companion]';
+僞>'[false]';
+僣>'[assume]';
+僥>'[be lucky]';
+僧>'[buddhist priest]';
+僭>'[assume]';
+僮>'[page]';
+僵>'[stiff and motionless]';
+價>'[price]';
+僻>'[out-of-the-way]';
+儀>'[ceremony]';
+儁>'[outstanding]';
+儂>'[I]';
+億>'[hundred million]';
+儉>'[temperate]';
+儒>'[confucian scholar]';
+儔>'[companion]';
+儕>'[a company]';
+#"儕>'[a company]'",
+儘>'[utmost]';
+#"儘>'[utmost]'",
+償>'[repay]';
+儡>'[puppet]';
+優>'[superior]';
+儲>'[save money]';
+儷>'[spouse]';
+儺>'[rich]';
+儻>'[if]';
+儼>'[grave]';
+儿>'[son]';
+兀>'[to cut off the feet]';
+允>'[to grant]';
+元>'[first]';
+兄>'[elder brother]';
+充>'[fill]';
+兆>'[omen]';
+兇>'[atrocious]';
+先>'[first]';
+光>'[light]';
+克>'[gram]';
+兌>'[cash]';
+免>'[spare]';
+兎>'[rabbit]';
+児>'[son]';
+兒>'[son]';
+兔>'[rabbit]';
+党>'[political party]';
+兜>'[pouch]';
+兢>'[fearful]';
+入>'[enter]';
+全>'[maintain]';
+兩>'[two]';
+兪>'[surname]';
+八>'[eight]';
+公>'[fair]';
+六>'[number six]';
+兮>'[exclamatory particle]';
+共>'[together with]';
+兵>'[soldier]';
+其>'[his]';
+具>'[tool]';
+典>'[law]';
+兼>'[unite]';
+冀>'[hope for]';
+冂>'[wide]';
+内>'[inside]';
+円>'[yen]';
+冉>'[tender]';
+冊>'[book]';
+册>'[book]';
+再>'[again]';
+冏>'[[not found in dictionary]]';
+冐>'[risk]';
+冑>'[helmet]';
+冒>'[risk]';
+冓>'[a secluded place]';
+冕>'[crown]';
+冖>'[cover]';
+冗>'[excessive]';
+写>'[write]';
+冠>'[cap]';
+冢>'[burial mound]';
+冤>'[grievance]';
+冥>'[dark]';
+冦>'[bandits]';
+冨>'[abundant]';
+冩>'[write]';
+冪>'[cover-cloth]';
+冫>'[ice]';
+冬>'[winter]';
+冰>'[ice]';
+冱>'[freezing]';
+冲>'[soar]';
+决>'[decide]';
+冴>'[freezing]';
+况>'[condition]';
+冶>'[smelt]';
+冷>'[cold]';
+冽>'[cold and raw]';
+凄>'[bitter cold]';
+凅>'[dried up]';
+准>'[approve]';
+凉>'[cool]';
+凋>'[be withered]';
+凌>'[pure]';
+凍>'[freeze]';
+凖>'[rule]';
+凛>'[to shiver with cold or fear]';
+凜>'[shiver with cold or fear]';
+凝>'[coagulate]';
+几>'[small table]';
+凡>'[all]';
+処>'[place]';
+凧>'[kite]';
+凩>'[wintry wind]';
+凪>'[calm]';
+凭>'[lean on]';
+凰>'[female phoenix]';
+凱>'[triumphant]';
+凵>'[receptacle]';
+凶>'[culprit]';
+凸>'[protrude]';
+凹>'[concave]';
+出>'[go out]';
+函>'[correspondence]';
+凾>'[correspondence]';
+刀>'[knife]';
+刃>'[edged tool]';
+刄>'[edged tool]';
+分>'[divide]';
+切>'[cut]';
+刈>'[cut off]';
+刊>'[publication]';
+刋>'[publication]';
+刎>'[behead]';
+刑>'[punishment]';
+刔>'[scoop out]';
+列>'[line]';
+初>'[beginning]';
+判>'[judge]';
+別>'[separate]';
+刧>'[disaster]';
+利>'[gains]';
+刪>'[to cut]';
+刮>'[shave]';
+到>'[go to]';
+刳>'[cut out]';
+制>'[system]';
+刷>'[brush]';
+券>'[certificate]';
+刹>'[temple]';
+刺>'[stab]';
+刻>'[carve]';
+剃>'[shave]';
+剄>'[cut throat]';
+則>'[rule]';
+削>'[scrape off]';
+剋>'[subdue]';
+剌>'[slash]';
+前>'[in front]';
+剏>'[establish]';
+剔>'[pick out]';
+剖>'[split in two]';
+剛>'[hard]';
+剞>'[carving or engraving knife]';
+剣>'[sword]';
+剤>'[medicinal preparation]';
+剥>'[peel]';
+剩>'[leftovers]';
+剪>'[scissors]';
+副>'[assist]';
+剰>'[leftovers]';
+剱>'[sword]';
+割>'[cut]';
+剳>'[brief note]';
+剴>'[sharpen]';
+創>'[establish]';
+剽>'[rob]';
+剿>'[destroy]';
+劃>'[divide]';
+劇>'[theatrical plays]';
+劈>'[cut apart]';
+劉>'[surname]';
+劍>'[sword]';
+劑>'[medicinal preparation]';
+劒>'[sword]';
+劔>'[sword]';
+力>'[power]';
+功>'[achievement]';
+加>'[add to]';
+劣>'[bad]';
+助>'[help]';
+努>'[exert]';
+劫>'[take by force]';
+劬>'[be diligent]';
+劭>'[encourage]';
+励>'[strive]';
+労>'[labor]';
+劵>'[certificate]';
+効>'[efficacious]';
+劼>'[be discreet]';
+劾>'[examine into]';
+勁>'[strong]';
+勃>'[suddenly]';
+勅>'[imperial degree]';
+勇>'[brave]';
+勉>'[endeavor]';
+勍>'[strong]';
+勒>'[strangle]';
+動>'[move]';
+勗>'[enjoin]';
+勘>'[investigate]';
+務>'[affairs]';
+勝>'[victory]';
+勞>'[labor]';
+募>'[levy]';
+勠>'[join forces]';
+勢>'[power]';
+勣>'[achievements]';
+勤>'[industrious]';
+勦>'[destroy]';
+勧>'[recommend]';
+勲>'[meritorious deed]';
+勳>'[meritorious deed]';
+勵>'[strive]';
+勸>'[recommend]';
+勹>'[wrap]';
+勺>'[spoon]';
+勾>'[hook]';
+勿>'[must not]';
+匁>'[Japanese unit of weight (1/1000 of a kan)]';
+匂>'[fragrance]';
+包>'[wrap]';
+匆>'[hastily]';
+匈>'[breast]';
+匍>'[crawl]';
+匏>'[gourd]';
+匐>'[fall prostrate]';
+匕>'[spoon]';
+化>'[change]';
+北>'[north]';
+匙>'[spoon]';
+匚>'[box]';
+匝>'[full circle]';
+匠>'[craftsman]';
+匡>'[correct]';
+匣>'[small box]';
+匪>'[bandits]';
+匯>'[concourse]';
+匱>'[to lack]';
+匳>'[ladies toilet case with mirror]';
+匸>'[box]';
+匹>'[bolt of cloth]';
+区>'[area]';
+医>'[cure]';
+匿>'[hide]';
+區>'[area]';
+十>'[ten]';
+千>'[thousand]';
+卅>'[thirty]';
+卆>'[soldier]';
+升>'[arise]';
+午>'[noon]';
+卉>'[general term for plants]';
+半>'[half]';
+卍>'[swastika - fourth of auspicious]';
+卑>'[humble]';
+卒>'[soldier]';
+卓>'[profound]';
+協>'[be united]';
+南>'[south]';
+#"南>'[south]'",
+博>'[gamble]';
+卜>'[fortune telling]';
+卞>'[be impatient]';
+占>'[divine]';
+卦>'[fortune telling]';
+卩>'[seal]';
+卮>'[measuring cup]';
+卯>'[4th of Earth Branches]';
+印>'[print]';
+危>'[dangerous]';
+即>'[promptly]';
+却>'[still]';
+卵>'[egg]';
+卷>'[scroll]';
+卸>'[lay down]';
+卻>'[still]';
+卿>'[noble]';
+厂>'[factory]';
+厄>'[adversity]';
+厖>'[bulky]';
+厘>'[thousandth part of tael]';
+厚>'[thick]';
+原>'[source]';
+厠>'[mingle with]';
+厥>'[personal pronoun - he]';
+厦>'[big building]';
+厨>'[kitchen]';
+厩>'[stable]';
+厭>'[dislike]';
+厮>'[servant]';
+厰>'[factory]';
+厳>'[strict]';
+厶>'[private]';
+去>'[go away]';
+参>'[take part in]';
+參>'[take part in]';
+又>'[and]';
+叉>'[crotch]';
+及>'[extend]';
+友>'[friend]';
+双>'[set of two]';
+反>'[reverse]';
+収>'[gather together]';
+叔>'[father''s younger brother]';
+取>'[take]';
+受>'[receive]';
+叙>'[express]';
+叛>'[rebel]';
+叟>'[old man]';
+叡>'[astute]';
+叢>'[bush]';
+口>'[mouth]';
+古>'[old]';
+句>'[sentence]';
+叨>'[talkative]';
+叩>'[knock]';
+只>'[only]';
+叫>'[cry]';
+召>'[imperial decree]';
+叭>'[trumpet]';
+叮>'[exhort or enjoin repeatedly]';
+可>'[may]';
+台>'[platform]';
+叱>'[scold]';
+史>'[history]';
+右>'[right]';
+叶>'[to harmonize]';
+号>'[mark]';
+司>'[take charge of]';
+#"叹>'[sigh]'",
+吁>'[interjection \"Alas!\"]';
+吃>'[eat]';
+各>'[each]';
+合>'[combine]';
+吉>'[lucky]';
+吊>'[condole]';
+吋>'[inch]';
+同>'[same]';
+名>'[name]';
+后>'[queen]';
+吏>'[government official]';
+吐>'[vomit]';
+向>'[toward]';
+君>'[sovereign]';
+吝>'[stingy]';
+吟>'[sing]';
+吠>'[bark]';
+否>'[not]';
+吩>'[order]';
+含>'[hold in mouth]';
+听>'[hear]';
+吭>'[throat]';
+吮>'[suck with mouth]';
+吶>'[raise voice]';
+吸>'[inhale]';
+吹>'[blow]';
+吻>'[kiss]';
+吼>'[roar]';
+吽>'[\"OM\"]';
+吾>'[i]';
+呀>'[particle used express surprise]';
+呂>'[surname]';
+呆>'[dull]';
+呈>'[submit]';
+呉>'[one of warring states]';
+告>'[tell]';
+呎>'[foot]';
+呑>'[swallow]';
+#"呜>'[sound of crying]'",
+周>'[zhou dynasty]';
+呪>'[curse]';
+#"呰>'[................................]'",
+呱>'[wail]';
+味>'[taste]';
+呵>'[scold]';
+呶>'[talkative]';
+呷>'[suck]';
+呻>'[groan]';
+呼>'[breathe sigh]';
+命>'[life]';
+咀>'[suck]';
+咄>'[noise of rage]';
+咆>'[roar]';
+咋>'[why? how? what?]';
+和>'[harmony]';
+咎>'[fault]';
+咏>'[sing song or poem]';
+咐>'[instruct]';
+咒>'[curse]';
+咢>'[sound]';
+咤>'[scold]';
+咥>'[sound of cat]';
+咨>'[inquire]';
+咫>'[foot measure of Zhou dynasty]';
+咬>'[bite]';
+咯>'[final particle]';
+咲>'[smile]';
+咳>'[cough]';
+咸>'[together]';
+咼>'[chat]';
+咽>'[throat]';
+咾>'[a noise]';
+哀>'[sad]';
+品>'[article]';
+哂>'[smile]';
+哄>'[coax]';
+哇>'[vomit]';
+哈>'[sound of laughter]';
+哉>'[final exclamatory particle]';
+#"哗>'[rushing sound]'",
+員>'[member]';
+哢>'[syllable]';
+哥>'[elder brother]';
+哦>'[oh? really? is that so?]';
+哨>'[whistle]';
+哩>'[mile]';
+哭>'[weep]';
+哮>'[cough]';
+哲>'[wise]';
+哺>'[chew food]';
+哽>'[choke]';
+唄>'[final particle of assertion pathaka]';
+唆>'[make mischief]';
+唇>'[lips]';
+唏>'[weep or sob]';
+唐>'[tang dynasty]';
+唔>'[hold in mouth]';
+唖>'[dumb]';
+售>'[sell]';
+唯>'[only]';
+唱>'[sing]';
+唳>'[cry of bird]';
+唸>'[recite]';
+唹>'[to smile at]';
+唾>'[spit]';
+啀>'[gnaw]';
+啄>'[to peck]';
+#"啄>'[to peck]'",
+商>'[commerce]';
+啌>'[animal disease]';
+問>'[ask (about)]';
+啓>'[open]';
+啖>'[eat]';
+啗>'[eat]';
+啜>'[sip]';
+#"啜>'[sip]'",
+啣>'[hold in mouth]';
+啻>'[only]';
+啼>'[weep]';
+啾>'[wailing of child]';
+喀>'[vomit]';
+喃>'[keep talking]';
+善>'[good]';
+喇>'[horn]';
+喉>'[throat]';
+喊>'[shout]';
+喋>'[nag]';
+喘>'[pant]';
+喙>'[beak]';
+喚>'[call]';
+喜>'[like]';
+喝>'[drink]';
+喞>'[chirping of insects]';
+喟>'[heave sigh]';
+喧>'[lively]';
+喨>'[wail]';
+喩>'[metaphor]';
+喪>'[mourning]';
+喫>'[eat]';
+喬>'[tall]';
+單>'[single]';
+喰>'[to eat]';
+営>'[encampment]';
+嗄>'[hoarse of voice]';
+嗅>'[smell]';
+嗇>'[miserly]';
+嗔>'[be angry at]';
+嗚>'[sound of crying]';
+嗜>'[be fond of]';
+嗟>'[sigh]';
+嗣>'[to connect]';
+嗤>'[laugh at]';
+嗷>'[loud clamor]';
+嗹>'[chatter]';
+嗽>'[cough]';
+嗾>'[to set a dog on]';
+嘆>'[sigh]';
+嘉>'[excellent]';
+嘔>'[vomit]';
+嘖>'[interjection of approval or admi]';
+嘗>'[taste]';
+嘘>'[exhale]';
+嘛>'[final exclamatory particle]';
+嘩>'[rushing sound]';
+嘯>'[roar]';
+嘱>'[order]';
+嘲>'[ridicule]';
+嘴>'[mouth]';
+嘶>'[neighing of a horse]';
+嘸>'[unclear]';
+噂>'[meet]';
+噌>'[scold]';
+噎>'[choke]';
+噐>'[receptacle]';
+噛>'[bite]';
+噤>'[close]';
+器>'[receptacle]';
+噪>'[be noisy]';
+噫>'[belch]';
+噬>'[bite]';
+噴>'[spurt]';
+噸>'[metric ton]';
+噺>'[story]';
+嚀>'[enjoin]';
+嚆>'[give forth sound]';
+嚇>'[scare]';
+嚊>'[to pant]';
+嚏>'[sneeze]';
+嚔>'[sneeze]';
+#"嚜>'[be silent]'",
+嚢>'[bag]';
+嚥>'[swallow]';
+嚮>'[guide]';
+嚴>'[strict]';
+嚶>'[seek friends]';
+嚼>'[prattle]';
+囀>'[sing]';
+囁>'[move lip when speaking]';
+囂>'[be noisy]';
+#"囂>'[be noisy]'",
+囈>'[talk in one''s sleep]';
+#"囍>'[double happiness]'",
+囑>'[order]';
+囓>'[gnaw]';
+囗>'[erect]';
+囘>'[return]';
+囚>'[prisoner]';
+四>'[four]';
+回>'[return]';
+因>'[cause]';
+団>'[sphere]';
+囮>'[inveigle]';
+困>'[surround]';
+囲>'[surround]';
+図>'[diagram]';
+囹>'[prison]';
+固>'[become solid]';
+国>'[nation]';
+囿>'[pen up]';
+圀>'[nation]';
+圃>'[garden]';
+圄>'[prison]';
+圈>'[to circle]';
+圉>'[stable]';
+國>'[nation]';
+圍>'[surround]';
+圏>'[to circle]';
+園>'[garden]';
+圓>'[circle]';
+圖>'[diagram]';
+團>'[sphere]';
+圜>'[circle]';
+土>'[soil]';
+圦>'[(kokuji) water gate]';
+#"圦>'[(kokuji) water gate]'",
+在>'[be at]';
+圭>'[jade pointed at top]';
+地>'[earth]';
+#"圳>'[furrow in field]'",
+#"圳>'[furrow in field]'",
+圻>'[border]';
+址>'[site]';
+坂>'[hillside]';
+均>'[equal]';
+坊>'[neighborhood]';
+坎>'[pit]';
+坏>'[rotten]';
+坐>'[sit]';
+坑>'[pit]';
+坡>'[slope]';
+坤>'[earth]';
+坦>'[flat]';
+坩>'[earthenware]';
+坪>'[level ground]';
+坿>'[mound]';
+垂>'[let down]';
+#"垆>'[black clods of earth]'",
+#"垉>'[................................]'",
+型>'[pattern]';
+垓>'[border]';
+垠>'[boundary]';
+垢>'[dirt]';
+垣>'[low wall]';
+垤>'[ant-hill]';
+#"垩>'[holy]'",
+#"垮>'[be defeated]'",
+#"垲>'[high and dry place]'",
+埀>'[let down]';
+埃>'[fine dust]';
+埆>'[stony]';
+埋>'[bury]';
+城>'[castle]';
+埒>'[enclosure]';
+埓>'[enclosure]';
+埔>'[plain]';
+#"埔>'[plain]'",
+埜>'[open country]';
+域>'[district]';
+埠>'[port city]';
+#"埣>'[................................]'",
+埴>'[soil with large clay content]';
+執>'[hold in hand]';
+培>'[bank up with dirt]';
+基>'[foundation]';
+埼>'[headland]';
+堀>'[cave]';
+堂>'[hall]';
+堅>'[hard]';
+堆>'[heap]';
+堊>'[white earth]';
+堋>'[bury]';
+堕>'[fall]';
+堙>'[bury]';
+堝>'[crucible]';
+堡>'[fort]';
+堤>'[dike]';
+堪>'[adequately capable of]';
+堯>'[a legendary ancient emperor-sage]';
+堰>'[dam]';
+報>'[report]';
+場>'[open space]';
+堵>'[wall]';
+堺>'[person''s name]';
+堽>'[mound]';
+塀>'[wall]';
+塁>'[rampart]';
+塊>'[piece]';
+塋>'[grave]';
+塑>'[model in clay]';
+塒>'[roost]';
+塔>'[tower]';
+塗>'[smear]';
+塘>'[pond]';
+塙>'[truly]';
+塚>'[cemetery]';
+塞>'[stop up]';
+塢>'[entrenchment]';
+塩>'[salt]';
+填>'[fill in]';
+#"塭>'[[not found in any dictionary]]'",
+塲>'[open space]';
+塵>'[dust]';
+塹>'[moat]';
+塾>'[village school]';
+境>'[boundery]';
+墅>'[villa]';
+墓>'[grave]';
+増>'[increase]';
+墜>'[fall down]';
+墟>'[high mound]';
+墨>'[ink]';
+墫>'[cup]';
+墮>'[fall]';
+墳>'[grave]';
+#"墳>'[grave]'",
+#"墳>'[grave]'",
+墺>'[4 walls]';
+墻>'[wall]';
+墾>'[cultivate]';
+壁>'[partition wall]';
+壅>'[to obstruct]';
+壇>'[altar]';
+壊>'[bad]';
+壌>'[soil]';
+壑>'[bed of torrent]';
+壓>'[press]';
+壕>'[trench]';
+#"壖>'[open space along water]'",
+壘>'[rampart]';
+壙>'[tomb]';
+壜>'[earthen jar or jug]';
+壞>'[bad]';
+壟>'[grave]';
+壤>'[soil]';
+#"壥>'[................................]'",
+士>'[scholar]';
+壬>'[ninth of ten celestial stems]';
+壮>'[big]';
+壯>'[big]';
+声>'[sound]';
+壱>'[number one]';
+売>'[sell]';
+壷>'[jar]';
+壹>'[number one]';
+壺>'[jar]';
+壻>'[son-in-law]';
+壼>'[palace corridor or passageway]';
+壽>'[old age]';
+夂>'[go]';
+変>'[change]';
+夊>'[Radical No. 35]';
+夏>'[summer]';
+夐>'[long]';
+夕>'[evening]';
+外>'[out]';
+夘>'[4th of Earth Branches]';
+夙>'[early in morning]';
+多>'[much]';
+夛>'[much]';
+夜>'[night]';
+夢>'[dream]';
+夥>'[companion]';
+大>'[big]';
+天>'[sky]';
+太>'[very]';
+夫>'[man]';
+夬>'[parted]';
+夭>'[young]';
+央>'[center]';
+失>'[lose]';
+夲>'[advance quickly]';
+夷>'[ancient barbarian tribes]';
+夸>'[extravagant]';
+夾>'[be wedged or inserted between]';
+奄>'[ere long]';
+奇>'[strange]';
+奈>'[but]';
+奉>'[offer]';
+奎>'[stride of man]';
+奏>'[memorialize emperor]';
+奐>'[be numerous]';
+契>'[deed]';
+奔>'[run fast]';
+奕>'[in sequence]';
+套>'[case]';
+奘>'[large]';
+奚>'[where? what? how? why?]';
+奠>'[pay respect]';
+奢>'[extravagant]';
+奥>'[mysterious]';
+奧>'[mysterious]';
+奨>'[prize]';
+奩>'[lady''s vanity case]';
+奪>'[take by force]';
+奬>'[prize]';
+奮>'[strive]';
+女>'[woman]';
+奴>'[slave]';
+奸>'[crafty]';
+好>'[good]';
+妁>'[act as go-between]';
+如>'[if]';
+妃>'[wife]';
+妄>'[absurd]';
+妊>'[conceive]';
+妍>'[beautiful]';
+妓>'[prostitute]';
+妖>'[strange]';
+妙>'[mysterious]';
+#"妙>'[mysterious]'",
+妝>'[adorn oneself]';
+妣>'[one''s deceased mother]';
+妥>'[satisfactory]';
+妨>'[interfere with]';
+妬>'[jealous]';
+妲>'[concubine of last ruler of shang]';
+妹>'[younger sister]';
+妻>'[wife]';
+妾>'[concubine]';
+姆>'[child''s governess]';
+姉>'[elder sister]';
+始>'[begin]';
+姐>'[elder sister]';
+姑>'[father''s sister]';
+姓>'[one''s family name]';
+委>'[appoint]';
+姙>'[conceive]';
+姚>'[handsome]';
+姜>'[surname]';
+姥>'[maternal grandmother]';
+姦>'[adultery]';
+姨>'[mother/wife''s sister]';
+姪>'[niece]';
+姫>'[beauty]';
+#"姱>'[beautiful]'",
+姻>'[relatives by marriage]';
+姿>'[one''s manner]';
+威>'[pomp]';
+娃>'[baby]';
+娉>'[beautiful]';
+娑>'[dance]';
+娘>'[mother]';
+#"娚>'[................................]'",
+娜>'[elegant]';
+娟>'[beautiful]';
+娠>'[pregnant]';
+娥>'[be beautiful]';
+娩>'[give birth child]';
+娯>'[pleasure]';
+娵>'[star]';
+娶>'[marry]';
+娼>'[prostitute]';
+婀>'[be beautiful]';
+婁>'[surname]';
+婆>'[old woman]';
+婉>'[amiable]';
+婚>'[get married]';
+婢>'[servant girl]';
+婦>'[married women]';
+婪>'[covet]';
+婬>'[obscene]';
+婿>'[son-in-law]';
+媒>'[go-between]';
+媚>'[charming]';
+媛>'[beauty]';
+媼>'[old woman]';
+媽>'[mother]';
+媾>'[marry]';
+嫁>'[marry]';
+嫂>'[sister-in-law]';
+嫉>'[jealousy]';
+嫋>'[slender and delicate]';
+嫌>'[hate]';
+嫐>'[frolic]';
+嫖>'[patronize prostitutes]';
+嫗>'[old woman]';
+嫡>'[legal wife]';
+嫣>'[charming]';
+嫦>'[name of a moon goddess]';
+嫩>'[soft]';
+嫺>'[refined]';
+嫻>'[elegant]';
+嬉>'[enjoy]';
+嬋>'[beautiful]';
+嬌>'[seductive and loveable]';
+嬖>'[favorite]';
+嬢>'[troubled]';
+嬪>'[court lady]';
+嬬>'[mistress]';
+嬰>'[baby]';
+嬲>'[frolic]';
+#"嬴>'[to win]'",
+嬾>'[lazy]';
+孀>'[widow]';
+孃>'[troubled]';
+孅>'[slender]';
+子>'[offspring]';
+孑>'[remaining]';
+孔>'[opening]';
+孕>'[be pregnant]';
+字>'[letter]';
+存>'[exist]';
+孚>'[brood over eggs]';
+孛>'[comet]';
+孜>'[be as diligent as possible]';
+孝>'[filial piety]';
+孟>'[first in series]';
+季>'[quarter of year]';
+孤>'[orphan]';
+孥>'[one''s children]';
+学>'[learning]';
+孩>'[baby]';
+孫>'[grandchild]';
+孰>'[who? which? what? which one?]';
+孱>'[weak]';
+孳>'[breed in large numbers]';
+孵>'[sit on eggs]';
+學>'[learning]';
+孺>'[child]';
+宀>'[roof]';
+它>'[it]';
+宅>'[residence]';
+宇>'[house]';
+守>'[defend]';
+安>'[peaceful]';
+宋>'[Song dynasty]';
+完>'[complete]';
+宍>'[flesh]';
+宏>'[wide]';
+宕>'[stone quarry]';
+宗>'[lineage]';
+官>'[official]';
+宙>'[time as concept]';
+定>'[decide]';
+宛>'[seem]';
+宜>'[suitable]';
+宝>'[treasure]';
+実>'[real]';
+客>'[guest]';
+宣>'[declare]';
+室>'[room]';
+宥>'[forgive]';
+宦>'[officialdom]';
+宮>'[palace]';
+宰>'[to slaughter]';
+害>'[injure]';
+宴>'[entertain]';
+宵>'[night]';
+家>'[house]';
+宸>'[imperial]';
+容>'[looks]';
+宿>'[stop]';
+寂>'[still]';
+寃>'[grievance]';
+寄>'[send]';
+寅>'[respect]';
+密>'[dense]';
+寇>'[bandits]';
+#"寉>'[................................]'",
+富>'[abundant]';
+寐>'[sleep]';
+寒>'[cold]';
+寓>'[residence]';
+寔>'[real]';
+寛>'[broad]';
+寝>'[sleep]';
+寞>'[silent]';
+察>'[examine]';
+寡>'[widowed]';
+寢>'[sleep]';
+寤>'[few]';
+寥>'[few]';
+實>'[real]';
+寧>'[repose]';
+寨>'[stockade]';
+審>'[examine]';
+寫>'[write]';
+寮>'[shanty]';
+寰>'[great domain]';
+寳>'[treasure]';
+寵>'[favorite]';
+寶>'[treasure]';
+寸>'[inch]';
+寺>'[court]';
+対>'[correct]';
+寿>'[old age]';
+封>'[letter]';
+専>'[monopolize]';
+射>'[shoot]';
+尅>'[subdue]';
+将>'[will]';
+將>'[will]';
+專>'[monopolize]';
+尉>'[officer]';
+尊>'[respect]';
+尋>'[seek]';
+對>'[correct]';
+導>'[direct]';
+小>'[small]';
+少>'[few]';
+尓>'[you]';
+尖>'[sharp]';
+尚>'[still]';
+尠>'[very few]';
+尢>'[weak]';
+尤>'[especially]';
+尨>'[shaggy haired dog]';
+尭>'[a legendary ancient emperor-sage]';
+就>'[just]';
+尸>'[corpse]';
+尹>'[govern]';
+尺>'[chinese measure approx. \"foot\"]';
+尻>'[end of spine]';
+尼>'[buddhist nun]';
+尽>'[exhaust]';
+尾>'[tail]';
+尿>'[urine]';
+局>'[bureau]';
+屁>'[break wind]';
+居>'[live]';
+屆>'[numerary adjunct for time]';
+屈>'[bend]';
+届>'[numerary adjunct for time]';
+屋>'[house]';
+屍>'[corpse]';
+屎>'[excrement]';
+屏>'[folding screen]';
+屐>'[wooden shoes]';
+屑>'[bits]';
+屓>'[gigantic strength]';
+展>'[open]';
+属>'[class]';
+屠>'[butcher]';
+屡>'[frequently]';
+層>'[storey]';
+履>'[footwear]';
+屬>'[class]';
+屮>'[sprout]';
+屯>'[village]';
+山>'[mountain]';
+屶>'[lofty]';
+屹>'[to rise high]';
+岌>'[perilous]';
+岐>'[high]';
+岑>'[steep]';
+岔>'[diverge]';
+岡>'[ridge or crest of hill]';
+岨>'[uneven]';
+岩>'[cliff]';
+岫>'[mountain peak]';
+岬>'[cape]';
+岱>'[daishan one of five sacred mount]';
+岳>'[mountain peak]';
+#"岶>'[................................]'",
+岷>'[min mountain]';
+岸>'[bank]';
+#"岺>'[mountain ridge]'",
+#"岺>'[mountain ridge]'",
+岾>'[mountain pass (korean)]';
+#"峄>'[range of peaks]'",
+峇>'[cave]';
+峙>'[stand erect]';
+峠>'[mountain pass]';
+峡>'[gorge]';
+峨>'[lofty]';
+峩>'[lofty]';
+峪>'[valley]';
+峭>'[steep]';
+峯>'[peak]';
+峰>'[peak]';
+島>'[island]';
+#"峺>'[................................]'",
+峻>'[high]';
+峽>'[gorge]';
+崇>'[esteem]';
+崋>'[flowery]';
+崎>'[rough]';
+崑>'[Kunlun mountains in Jiang Su province.]';
+崔>'[high]';
+崕>'[cliff]';
+崖>'[cliff]';
+崗>'[post]';
+崘>'[kunlun mountains in jiangsu]';
+崙>'[kunlun mountains in jiangsu]';
+崚>'[hilly]';
+崛>'[towering]';
+崟>'[cliffs]';
+崢>'[high]';
+崩>'[rupture]';
+嵋>'[omei mountain in sichuan]';
+嵌>'[inlay]';
+嵎>'[mountain recess]';
+嵐>'[mountain mist]';
+嵒>'[cliff]';
+嵜>'[rough]';
+嵩>'[high]';
+嵬>'[high]';
+嵯>'[high]';
+嵳>'[high]';
+嵶>'[low part of a mountain]';
+嶂>'[cliff]';
+嶄>'[high]';
+嶇>'[steep]';
+嶋>'[island]';
+嶌>'[island]';
+#"嶐>'[................................]'",
+嶝>'[path leading up a mountain]';
+嶢>'[high or tall]';
+#"嶬>'[................................]'",
+嶮>'[high]';
+嶷>'[range of mountains in hunan prov]';
+嶺>'[mountain ridge]';
+嶼>'[island]';
+嶽>'[mountain peak]';
+巉>'[steep]';
+巌>'[cliff]';
+巍>'[high]';
+巒>'[mountain range]';
+巓>'[summit of mountain]';
+巖>'[cliff]';
+巛>'[river]';
+川>'[stream]';
+州>'[administrative division]';
+巡>'[patrol]';
+巣>'[nest]';
+工>'[labor]';
+左>'[left]';
+巧>'[skillful]';
+巨>'[large]';
+巫>'[wizard]';
+差>'[differ]';
+己>'[self]';
+已>'[already]';
+巳>'[sixth of twelve branches]';
+巴>'[greatly desire]';
+巵>'[measuring cup]';
+巷>'[alley]';
+巻>'[scroll]';
+巽>'[5th of the 8 trigrams]';
+巾>'[kerchief]';
+市>'[market]';
+布>'[cotton cloth]';
+帆>'[sail]';
+帋>'[paper]';
+希>'[rare]';
+帑>'[a treasury]';
+帖>'[invitation card]';
+帙>'[book cover]';
+帚>'[broom]';
+帛>'[silks]';
+帝>'[supreme ruler]';
+帥>'[commander]';
+師>'[teacher]';
+席>'[seat]';
+帯>'[belt]';
+帰>'[return]';
+帳>'[tent]';
+帶>'[belt]';
+帷>'[tent]';
+常>'[common]';
+帽>'[hat]';
+幀>'[picture]';
+幃>'[curtain that forms wall]';
+幄>'[tent]';
+幅>'[piece]';
+幇>'[help]';
+幌>'[curtain]';
+幎>'[cover-cloth]';
+幔>'[curtain]';
+幕>'[curtain]';
+幗>'[women''s headgear]';
+幟>'[flag]';
+幡>'[pennant]';
+幢>'[carriage curtain]';
+幣>'[currency]';
+幤>'[evil]';
+干>'[oppose]';
+平>'[flat]';
+年>'[year]';
+幵>'[even level. to raise in both hands]';
+并>'[combine]';
+幸>'[luck(ily)]';
+幹>'[trunk of tree or of human body]';
+幺>'[one]';
+幻>'[illusion]';
+幼>'[infant]';
+幽>'[quiet]';
+幾>'[how many? how much? (a)few]';
+广>'[wide]';
+庁>'[hall]';
+広>'[broad]';
+庄>'[village]';
+庇>'[cover]';
+床>'[bed]';
+序>'[series]';
+底>'[bottom]';
+庖>'[kitchen]';
+店>'[shop]';
+庚>'[seventh of ten cyclical stems]';
+府>'[prefecture]';
+庠>'[village school]';
+度>'[degree]';
+座>'[seat]';
+庫>'[armory]';
+庭>'[courtyard]';
+庵>'[buddhist monastery or nunnery]';
+庶>'[numerous]';
+康>'[peaceful]';
+庸>'[usual]';
+廁>'[toilet]';
+廂>'[side-room]';
+廃>'[abrogate]';
+廈>'[big building]';
+廉>'[upright]';
+廊>'[corridor]';
+廏>'[stable]';
+廐>'[stable]';
+廓>'[broad]';
+廖>'[surname]';
+廚>'[kitchen]';
+廛>'[store]';
+廝>'[servant]';
+廟>'[temple]';
+廠>'[factory]';
+廡>'[corridor]';
+廢>'[abrogate]';
+廣>'[broad]';
+廨>'[government office]';
+廩>'[granary]';
+廬>'[hut]';
+廰>'[hall]';
+廱>'[harmonious]';
+廳>'[hall]';
+廴>'[go]';
+延>'[delay]';
+廷>'[court]';
+廸>'[enlighten]';
+建>'[build]';
+廻>'[circle around]';
+廼>'[then]';
+廾>'[two hands]';
+廿>'[twenty]';
+弁>'[conical cap worn under zhou dyna]';
+弃>'[reject]';
+弄>'[do]';
+弉>'[large]';
+弊>'[evil]';
+弋>'[catch]';
+弌>'[number one]';
+弍>'[number two]';
+式>'[style]';
+弐>'[number two]';
+弑>'[to kill one''s superior]';
+弓>'[bow]';
+弔>'[condole]';
+引>'[pull]';
+弖>'[phonetic for \"te\" (Japanese)]';
+弗>'[not]';
+弘>'[enlarge]';
+弛>'[loosen]';
+弟>'[young brother]';
+弥>'[extensive]';
+弦>'[string]';
+弧>'[wooden bow]';
+弩>'[cross-bow]';
+弭>'[stop]';
+弯>'[bend]';
+弱>'[weak]';
+張>'[stretch]';
+強>'[strong]';
+弸>'[bow stretched full]';
+弼>'[aid]';
+#"弼>'[aid]'",
+#"彁>'[................................]'",
+彈>'[pellet]';
+彊>'[stubborn]';
+彌>'[extensive]';
+彎>'[bend]';
+彑>'[snout]';
+当>'[bear]';
+彖>'[a hog]';
+彗>'[broomstick]';
+彙>'[collect]';
+彜>'[yi]';
+彝>'[yi]';
+彡>'[hair]';
+形>'[form]';
+彦>'[elegant]';
+彩>'[hue]';
+彪>'[tiger]';
+彫>'[carve]';
+彬>'[cultivated]';
+彭>'[name of ancient country]';
+彰>'[clear]';
+影>'[shadow]';
+彳>'[step with left foot]';
+彷>'[like]';
+役>'[service]';
+彼>'[that]';
+彿>'[resembling]';
+往>'[go]';
+征>'[invade]';
+徂>'[go]';
+徃>'[go]';
+径>'[narrow path]';
+待>'[treat]';
+徇>'[comply with]';
+很>'[very]';
+徊>'[linger]';
+律>'[statute]';
+後>'[behind]';
+徐>'[slowly]';
+徑>'[narrow path]';
+徒>'[disciple]';
+従>'[from]';
+得>'[obtain]';
+徘>'[walk back and forth]';
+徙>'[move one''s abode]';
+從>'[from]';
+徠>'[induce]';
+御>'[drive]';
+徨>'[doubtful]';
+復>'[return]';
+循>'[obey]';
+徭>'[conscript labor]';
+微>'[small]';
+徳>'[virtue]';
+徴>'[summon]';
+徹>'[penetrate]';
+徼>'[frontier]';
+徽>'[a badge]';
+心>'[heart]';
+必>'[surely]';
+忌>'[jealous]';
+忍>'[endure]';
+忖>'[guess]';
+志>'[purpose]';
+忘>'[forget]';
+忙>'[busy]';
+応>'[should]';
+忝>'[disgrace]';
+忠>'[loyalty]';
+忤>'[insubordinate]';
+快>'[rapid]';
+忰>'[suffer]';
+忱>'[truth]';
+念>'[think of]';
+忸>'[blush]';
+忻>'[delightful]';
+忽>'[suddenly]';
+忿>'[get angry]';
+怎>'[what? why? how?]';
+怏>'[discontented]';
+#"怐>'[................................]'",
+怒>'[anger]';
+怕>'[fear]';
+怖>'[terror]';
+怙>'[rely on]';
+怛>'[grieved]';
+怜>'[pity]';
+思>'[think]';
+怠>'[idle]';
+怡>'[harmony]';
+急>'[quick]';
+怦>'[eager]';
+性>'[nature]';
+怨>'[hatred]';
+怩>'[shy]';
+怪>'[strange]';
+怫>'[sorry]';
+怯>'[lacking in courage]';
+怱>'[hastily]';
+怺>'[to endure]';
+恁>'[that]';
+恂>'[careful]';
+恃>'[rely on]';
+恆>'[constant]';
+恊>'[be united]';
+恋>'[love]';
+恍>'[seemingly]';
+恐>'[fear]';
+恒>'[constant]';
+恕>'[forgive]';
+恙>'[illness]';
+恚>'[anger]';
+恟>'[scared]';
+恠>'[strange]';
+恢>'[restore]';
+恣>'[indulge oneself]';
+恤>'[show pity]';
+恥>'[shame]';
+恨>'[hatred]';
+恩>'[kindness]';
+恪>'[respectful]';
+恫>'[in pain]';
+恬>'[quiet]';
+恭>'[respectful]';
+息>'[rest]';
+恰>'[just]';
+恵>'[favor]';
+#"恶>'[evil]'",
+悁>'[irritable]';
+悃>'[sincere]';
+悄>'[silent]';
+悉>'[know]';
+悋>'[stingy]';
+悌>'[brotherly]';
+悍>'[courageous]';
+悒>'[sorrowful]';
+悔>'[repent]';
+悖>'[be contradictory to]';
+#"悖>'[be contradictory to]'",
+悚>'[be afraid]';
+悛>'[repent]';
+悟>'[apprehend]';
+悠>'[long]';
+患>'[suffer]';
+悦>'[pleased]';
+悧>'[smooth]';
+悩>'[angered]';
+悪>'[evil]';
+悲>'[sorrow]';
+悳>'[ethics]';
+悴>'[suffer]';
+悵>'[disappointed]';
+悶>'[gloomy]';
+悸>'[fearful]';
+悼>'[grieve]';
+悽>'[sorrowful]';
+情>'[feeling]';
+惆>'[distressed]';
+惇>'[be kind]';
+惑>'[confuse]';
+惓>'[careful]';
+惘>'[disconcerted]';
+惚>'[absent-minded]';
+惜>'[pity]';
+惟>'[but]';
+惠>'[favor]';
+惡>'[evil]';
+惣>'[overall [questionable variant]]';
+惧>'[fear]';
+惨>'[sad]';
+惰>'[indolent]';
+惱>'[angered]';
+想>'[think]';
+惴>'[afraid]';
+惶>'[fearful]';
+惷>'[wriggle]';
+惹>'[irritate]';
+惺>'[intelligent]';
+惻>'[feel anguish]';
+愀>'[change one''s countenance]';
+愁>'[anxiety]';
+愃>'[relax]';
+愆>'[fault]';
+愈>'[more and more]';
+愉>'[pleasant]';
+愍>'[pity]';
+愎>'[obstinate]';
+意>'[thought]';
+愕>'[startled]';
+愚>'[stupid]';
+愛>'[love]';
+感>'[feel]';
+愡>'[absent-minded]';
+愧>'[ashamed]';
+愨>'[sincerity]';
+愬>'[accuse]';
+愴>'[sad]';
+愼>'[act with care]';
+愽>'[gamble]';
+愾>'[anger]';
+愿>'[sincere]';
+慂>'[urge]';
+慄>'[shiver]';
+慇>'[careful]';
+慈>'[kind]';
+慊>'[to resent]';
+態>'[manner]';
+慌>'[nervous]';
+慍>'[angry]';
+慎>'[act with care]';
+#"慓>'[................................]'",
+慕>'[long for]';
+慘>'[sad]';
+慙>'[ashamed]';
+慚>'[ashamed]';
+慝>'[do evil in secret]';
+慟>'[sadness]';
+慢>'[slow(ly)]';
+慣>'[habit]';
+慥>'[sincere]';
+慧>'[bright]';
+慨>'[sigh]';
+慫>'[to alarm]';
+慮>'[be concerned]';
+#"慯>'[................................]'",
+慰>'[comfort]';
+慱>'[sad]';
+慳>'[miserly]';
+慴>'[fear]';
+慵>'[indolent]';
+慶>'[congratulate]';
+慷>'[ardent]';
+慾>'[lust]';
+憂>'[sad]';
+憇>'[rest]';
+憊>'[tired]';
+憎>'[hate]';
+憐>'[pity]';
+憑>'[lean on]';
+憔>'[be worn-out]';
+憖>'[cautious]';
+憙>'[like]';
+憚>'[dread]';
+憤>'[resent]';
+憧>'[irresolute]';
+憩>'[rest]';
+憫>'[pity]';
+憬>'[rouse]';
+憮>'[regretful]';
+憲>'[constitution]';
+憶>'[remember]';
+憺>'[peace]';
+憾>'[regret]';
+懃>'[courteous]';
+懆>'[anxious]';
+懇>'[sincere]';
+懈>'[idle]';
+應>'[should]';
+懊>'[vexed]';
+懋>'[splendid]';
+懌>'[enjoy]';
+懍>'[be afraid of]';
+懐>'[bosom]';
+懣>'[be sick at heart]';
+懦>'[weak]';
+懲>'[punish]';
+懴>'[regret]';
+懶>'[lazy]';
+懷>'[bosom]';
+懸>'[hang]';
+懺>'[regret]';
+懼>'[fear]';
+懽>'[happy]';
+懾>'[afraid]';
+懿>'[virtuous]';
+戀>'[love]';
+戈>'[halberd]';
+戉>'[a battle-axe]';
+戊>'[fifth of ten celestial stems]';
+戌>'[eleventh of terrestrial branches]';
+戍>'[defend borders]';
+戎>'[arms]';
+成>'[completed]';
+我>'[our]';
+戒>'[warn]';
+戔>'[small]';
+或>'[or]';
+戚>'[relative]';
+戛>'[lance]';
+戝>'[pirate]';
+戞>'[lance]';
+戟>'[halberd with crescent blade]';
+戡>'[subjugate]';
+戦>'[war]';
+截>'[cut off]';
+戮>'[kill]';
+戯>'[theatrical play]';
+戰>'[war]';
+戲>'[theatrical play]';
+戳>'[prick]';
+戴>'[wear on top]';
+戸>'[door]';
+戻>'[perverse]';
+房>'[house]';
+所>'[place]';
+扁>'[flat]';
+扇>'[fan]';
+扈>'[escort]';
+扉>'[door panel]';
+手>'[hand]';
+才>'[talent]';
+扎>'[pull up]';
+打>'[strike]';
+払>'[shake off]';
+托>'[hold up with palm]';
+扛>'[carry on shoulders]';
+扞>'[ward off]';
+扠>'[pick up with fork or pincers]';
+扣>'[knock]';
+扨>'[pick up with fork or pincers]';
+扮>'[dress up]';
+扱>'[collect]';
+扶>'[support]';
+批>'[comment]';
+扼>'[grasp]';
+找>'[search]';
+承>'[inherit]';
+技>'[skill]';
+#"抂>'[................................]'",
+抃>'[to clap hands]';
+抄>'[copy]';
+抉>'[choose]';
+把>'[hold]';
+抑>'[press down]';
+抒>'[express]';
+抓>'[scratch]';
+抔>'[take or hold up in both hands]';
+投>'[throw]';
+抖>'[tremble]';
+抗>'[resist]';
+折>'[break off]';
+抛>'[throw (away)]';
+抜>'[uproot]';
+択>'[select]';
+披>'[wear]';
+抬>'[lift]';
+抱>'[embrace]';
+抵>'[resist]';
+抹>'[smear]';
+抻>'[pull]';
+押>'[mortgage]';
+抽>'[draw out]';
+拂>'[shake off]';
+担>'[carry]';
+拆>'[break up]';
+拇>'[thumb]';
+拈>'[pick up with fingers]';
+拉>'[pull]';
+拊>'[slap]';
+拌>'[mix]';
+拍>'[clap]';
+拏>'[take]';
+拐>'[kidnap]';
+拑>'[to clamp]';
+拒>'[ward off with hand]';
+拓>'[expand]';
+拔>'[uproot]';
+拗>'[obstinate]';
+拘>'[restrain]';
+拙>'[stupid]';
+招>'[beckon]';
+拜>'[do obeisance]';
+拝>'[do obeisance]';
+拠>'[occupy]';
+拡>'[expand]';
+括>'[include]';
+拭>'[wipe away stains with cloth]';
+拮>'[laboring hard]';
+拯>'[help]';
+拱>'[fold hands on breast]';
+拳>'[fist]';
+#"拴>'[bind with rope]'",
+拶>'[press]';
+拷>'[torture and interrogate]';
+拾>'[pick up]';
+拿>'[take]';
+持>'[sustain]';
+挂>'[hang]';
+指>'[finger]';
+挈>'[assist]';
+按>'[put hand on]';
+挌>'[fight]';
+挑>'[load carried on shoulders]';
+挙>'[raise]';
+挟>'[clasp under arm]';
+#"挥>'[direct]'",
+挨>'[near]';
+挫>'[push down]';
+振>'[raise]';
+挺>'[stand upright]';
+挽>'[pull]';
+挾>'[clasp under arm]';
+挿>'[insert]';
+捉>'[grasp]';
+捌>'[break open]';
+捍>'[ward off]';
+捏>'[pick with fingers]';
+捐>'[contribute]';
+捕>'[arrest]';
+捗>'[make progress]';
+捜>'[search]';
+捧>'[hold up in two hands]';
+捨>'[discard]';
+捩>'[twist with hands]';
+捫>'[stoke]';
+据>'[occupy]';
+捲>'[curl]';
+捶>'[strike with stick]';
+捷>'[win]';
+捺>'[press down heavily with fringers]';
+捻>'[twist with fingers]';
+掀>'[lift]';
+掃>'[sweep]';
+授>'[give to]';
+掉>'[turn]';
+掌>'[palm of hand]';
+掎>'[drag aside]';
+掏>'[take out]';
+排>'[row]';
+掖>'[stick in]';
+掘>'[dig]';
+掛>'[hang]';
+#"掟>'[................................]'",
+掠>'[rob]';
+採>'[gather]';
+探>'[find]';
+掣>'[drag]';
+接>'[receive]';
+控>'[accuse]';
+推>'[push]';
+掩>'[cover]';
+措>'[place]';
+掫>'[be on night watch]';
+掬>'[grasp with both hands]';
+#"掱>'[pickpocket]'",
+掴>'[box one''s ears]';
+#"掴>'[box one''s ears]'",
+掻>'[scratch lightly]';
+掾>'[a general designation of officials]';
+揀>'[choose]';
+揃>'[shear]';
+揄>'[lift]';
+揆>'[prime minister]';
+揉>'[rub]';
+描>'[copy]';
+提>'[hold in hand]';
+插>'[insert]';
+揖>'[salute]';
+揚>'[scatter]';
+換>'[change]';
+握>'[grasp]';
+揣>'[put things under clothes]';
+揩>'[rub and wipe]';
+揮>'[direct]';
+援>'[aid]';
+揶>'[make fun of]';
+揺>'[wag]';
+搆>'[pull]';
+損>'[diminish]';
+搏>'[seize]';
+搓>'[trample]';
+搖>'[wag]';
+搗>'[hull]';
+搜>'[search]';
+搦>'[grasp]';
+搨>'[rub]';
+搬>'[transfer]';
+搭>'[join together]';
+搴>'[extract]';
+搶>'[plunder]';
+携>'[lead by hand]';
+搾>'[press]';
+摂>'[take in]';
+摎>'[to strangle]';
+摘>'[pluck]';
+摧>'[destroy]';
+摩>'[rub]';
+摯>'[sincere]';
+摶>'[roll around with hand]';
+摸>'[gently touch with hand]';
+摺>'[fold]';
+撃>'[strike]';
+撈>'[scoop out of water]';
+撒>'[release]';
+撓>'[scratch]';
+撕>'[rip]';
+撚>'[twirl in fingers]';
+撞>'[knock against]';
+撤>'[omit]';
+撥>'[move]';
+撩>'[lift up]';
+撫>'[pat]';
+播>'[sow]';
+撮>'[little bit]';
+撰>'[compose]';
+撲>'[pound]';
+撹>'[disturb]';
+撻>'[flog]';
+撼>'[move]';
+擁>'[embrace]';
+擂>'[rub]';
+擅>'[monopolize]';
+擇>'[select]';
+操>'[conduct]';
+擒>'[catch]';
+擔>'[carry]';
+擘>'[thumb]';
+據>'[occupy]';
+擠>'[crowd]';
+擡>'[carry]';
+擢>'[pull up]';
+擣>'[hull]';
+擦>'[wipe]';
+擧>'[raise]';
+擬>'[draft]';
+擯>'[exclude]';
+擱>'[place]';
+擲>'[throw]';
+擴>'[expand]';
+#"擶>'[................................]'",
+擺>'[put]';
+擽>'[tickle]';
+擾>'[disturb]';
+攀>'[climb]';
+攅>'[save]';
+攘>'[seize]';
+攜>'[lead by hand]';
+攝>'[take in]';
+攣>'[tangled]';
+攤>'[spread out]';
+攪>'[disturb]';
+攫>'[snatch away]';
+攬>'[grasp]';
+支>'[disperse]';
+攴>'[rap]';
+攵>'[rap]';
+收>'[gather together]';
+攷>'[examine]';
+攸>'[distant]';
+改>'[change]';
+攻>'[attack]';
+放>'[put]';
+政>'[government]';
+故>'[ancient]';
+效>'[result]';
+敍>'[express]';
+敏>'[fast]';
+救>'[save]';
+敕>'[an imperial order or decree]';
+敖>'[ramble]';
+敗>'[be defeated]';
+敘>'[express]';
+教>'[teach]';
+敝>'[break]';
+敞>'[roomy]';
+敢>'[dare]';
+散>'[scatter]';
+敦>'[esteem]';
+敬>'[respect]';
+数>'[number]';
+敲>'[strike]';
+整>'[orderly]';
+敵>'[enemy]';
+敷>'[spread]';
+數>'[number]';
+斂>'[draw back]';
+斃>'[kill]';
+文>'[literature]';
+斈>'[learning]';
+斉>'[even]';
+斌>'[refined]';
+斎>'[vegetarian diet]';
+斐>'[graceful]';
+斑>'[mottled]';
+斗>'[chinese peck]';
+料>'[consider]';
+斛>'[dry measure 10 or 5 times of dou]';
+斜>'[slanting]';
+斟>'[pour wine or tea into cup]';
+斡>'[revolve]';
+斤>'[catty]';
+斥>'[scold]';
+斧>'[axe]';
+斫>'[cut]';
+斬>'[cut]';
+断>'[sever]';
+斯>'[this]';
+新>'[new]';
+斷>'[sever]';
+方>'[square]';
+於>'[in]';
+施>'[grant]';
+旁>'[side]';
+旃>'[silk banner with bent pole]';
+旄>'[a kind of ancient flag]';
+旅>'[trip]';
+旆>'[flag ornament]';
+旋>'[revolve]';
+旌>'[banner or flag adorned with feat]';
+族>'[family clan]';
+旒>'[fringes of pearls on crowns]';
+旗>'[banner]';
+旙>'[a pennant]';
+旛>'[a pennant]';
+无>'[negative]';
+旡>'[choke on something eaten]';
+既>'[already]';
+日>'[sun]';
+旦>'[dawn]';
+旧>'[old]';
+旨>'[purpose]';
+早>'[early]';
+旬>'[ten-day period]';
+旭>'[rising sun]';
+旱>'[drought]';
+旺>'[prosper]';
+旻>'[heaven]';
+昂>'[rise]';
+昃>'[afternoon]';
+昆>'[elder brother]';
+昇>'[rise]';
+昊>'[summer time]';
+昌>'[light of sun]';
+明>'[bright]';
+昏>'[dusk]';
+易>'[change]';
+昔>'[formerly]';
+昜>'[to open out]';
+星>'[star]';
+映>'[project]';
+春>'[spring]';
+昧>'[obscure]';
+昨>'[yesterday]';
+昭>'[bright]';
+是>'[indeed]';
+昴>'[one of the 28 constellations]';
+昵>'[intimate]';
+昶>'[a long day. bright. extended. clear]';
+昼>'[daytime]';
+昿>'[extensive]';
+晁>'[morning]';
+時>'[time]';
+晃>'[bright]';
+晄>'[bright]';
+晉>'[advance]';
+晋>'[advance]';
+晏>'[peaceful]';
+晒>'[dry in sun]';
+晝>'[daytime]';
+晞>'[dry]';
+晟>'[clear]';
+晢>'[light of stars]';
+晤>'[have interview with]';
+晦>'[dark]';
+晧>'[daybreak]';
+晨>'[early morning]';
+晩>'[night]';
+普>'[universal]';
+景>'[scenery]';
+晰>'[clear]';
+晴>'[clear weather]';
+晶>'[crystal]';
+智>'[wisdom]';
+暁>'[dawn]';
+#"暂>'[temporary]'",
+暄>'[warm]';
+暇>'[leisure]';
+暈>'[halo in sky]';
+暉>'[sunshine]';
+暎>'[sun beginning decline]';
+暑>'[hot]';
+暖>'[warm]';
+暗>'[dark]';
+暘>'[rising sun]';
+暝>'[dark]';
+暢>'[smoothly]';
+暦>'[calendar]';
+暫>'[temporary]';
+暮>'[evening]';
+暴>'[violent]';
+暸>'[bright]';
+暹>'[rise]';
+暼>'[take fleeting glance at]';
+暾>'[morning sun]';
+曁>'[and]';
+曄>'[bright]';
+曇>'[become cloudy]';
+曉>'[dawn]';
+曖>'[obscure]';
+曙>'[bright]';
+曚>'[twilight just before sun rises]';
+曜>'[glorious]';
+曝>'[sun]';
+曠>'[extensive]';
+曦>'[sunlight]';
+曩>'[in ancient times]';
+曰>'[say]';
+曲>'[crooked]';
+曳>'[trail]';
+更>'[more]';
+曵>'[trail]';
+曷>'[why? what? where?]';
+書>'[book]';
+曹>'[ministry officials]';
+曼>'[long]';
+曽>'[already]';
+曾>'[already]';
+替>'[change]';
+最>'[most]';
+會>'[assemble]';
+月>'[moon]';
+有>'[have]';
+朋>'[friend]';
+服>'[clothes]';
+朏>'[light of crescent moon]';
+朔>'[first day of lunar month]';
+朕>'[pronoun \"i\"]';
+朖>'[clear]';
+朗>'[clear]';
+望>'[look at or forward]';
+朝>'[dynasty]';
+朞>'[full year]';
+期>'[period of time]';
+朦>'[condition or appearance of moon]';
+朧>'[condition or appearance of moon]';
+木>'[tree]';
+未>'[not yet]';
+末>'[final]';
+本>'[root]';
+札>'[letter]';
+朮>'[skill]';
+朱>'[cinnabar]';
+朴>'[simple]';
+朶>'[cluster of flowers]';
+#"朶>'[cluster of flowers]'",
+#"朶>'[cluster of flowers]'",
+机>'[desk]';
+朽>'[decayed]';
+朿>'[stab]';
+#"杀>'[kill]'",
+杆>'[pole]';
+杉>'[various species of pine and fir]';
+李>'[plum]';
+杏>'[apricot]';
+材>'[material]';
+村>'[village]';
+杓>'[handle of cup]';
+杖>'[cane]';
+杙>'[a tiny wooden post]';
+杜>'[stop]';
+杞>'[willow]';
+束>'[bind]';
+杠>'[lever]';
+条>'[clause]';
+杢>'[woodworker]';
+#"杢>'[woodworker]'",
+杤>'[type of oak]';
+来>'[come]';
+杪>'[tip of twig]';
+杭>'[cross stream]';
+杯>'[cup]';
+杰>'[hero]';
+東>'[east]';
+杲>'[bright sun]';
+杳>'[obscure]';
+杵>'[pestle]';
+杷>'[loquat]';
+杼>'[shuttle of loom]';
+松>'[pine tree]';
+板>'[plank]';
+#"枅>'[................................]'",
+枇>'[loquat]';
+枉>'[useless]';
+枋>'[sandalwood]';
+枌>'[variety of elm with small seeds]';
+析>'[split wood]';
+枕>'[pillow]';
+林>'[forest]';
+枚>'[stalk of shrub]';
+果>'[fruit]';
+枝>'[branches]';
+#"枟>'[wood streaks]'",
+#"枟>'[wood streaks]'",
+枢>'[door hinge]';
+#"枦>'[................................]'",
+枩>'[pine tree]';
+枯>'[dried out]';
+枳>'[trifoliate orange]';
+枴>'[cane]';
+架>'[rack]';
+枷>'[cangue scaffold]';
+枸>'[kind of aspen found in sichuan]';
+枹>'[drumstick]';
+柁>'[large tie-beams]';
+柄>'[handle]';
+#"柆>'[................................]'",
+#"柈>'[container]'",
+柎>'[calyx of flower]';
+柏>'[cypress]';
+某>'[certain thing or person]';
+柑>'[tangerine]';
+染>'[dye]';
+柔>'[soft]';
+柘>'[a thorny tree]';
+柚>'[pumelo]';
+柝>'[watchman''s rattle]';
+柞>'[oak]';
+柢>'[root]';
+柤>'[hawthorn]';
+#"柧>'[................................]'",
+柩>'[coffin which contains corpse]';
+柬>'[letter]';
+柮>'[flat pieces of wood]';
+柯>'[axe-handle]';
+柱>'[pillar]';
+柳>'[willow tree]';
+柴>'[firewood]';
+柵>'[fence]';
+査>'[investigate]';
+柾>'[straight grain]';
+柿>'[persimmon]';
+栂>'[a kind of evergreen tree]';
+栃>'[type of oak]';
+栄>'[glory]';
+栓>'[wooden peg]';
+栖>'[perch]';
+栗>'[chestnut tree]';
+栞>'[publication]';
+校>'[school]';
+栢>'[cypress]';
+栩>'[species of oak]';
+株>'[numerary adjunct for trees]';
+栫>'[fence]';
+栲>'[mangrove]';
+栴>'[sandalwood]';
+核>'[seed]';
+根>'[root]';
+格>'[pattern]';
+栽>'[cultivate]';
+桀>'[chicken roost]';
+桁>'[cross-beams of roof]';
+桂>'[cassia or cinnamon]';
+桃>'[peach]';
+框>'[frame]';
+案>'[table]';
+#"桍>'[................................]'",
+桎>'[fetters]';
+桐>'[name applied various trees]';
+桑>'[mulberry tree]';
+桓>'[variety of tree]';
+桔>'[chinese bellflower]';
+#"桙>'[................................]'",
+桜>'[cherry]';
+桝>'[................]';
+#"桝>'[................]'",
+档>'[shelf]';
+桧>'[chinese cypress]';
+桴>'[raft]';
+桶>'[pail]';
+桷>'[rafter]';
+#"桾>'[................................]'",
+桿>'[pole]';
+梁>'[bridge]';
+梃>'[a club]';
+梅>'[plums]';
+梍>'[tree name]';
+梏>'[handcuffs]';
+梓>'[catalpa ovata]';
+梔>'[gardenia]';
+梗>'[stem of flower]';
+#"梘>'[bamboo tube]'",
+條>'[clause]';
+梟>'[owl thus]';
+梠>'[small beam supporting rafters at]';
+梢>'[pointed tip of something long like a branch]';
+梦>'[dream]';
+梧>'[sterculia platanifolia]';
+梨>'[pear]';
+梭>'[weaver''s shuttle]';
+梯>'[ladder]';
+械>'[weapons]';
+梱>'[doorsill]';
+梳>'[comb]';
+梵>'[buddhist]';
+梶>'[oar]';
+梹>'[the areca-nut]';
+#"梹>'[the areca-nut]'",
+梼>'[block of wood]';
+棄>'[reject]';
+#"棆>'[................................]'",
+棉>'[cotton]';
+棊>'[chess]';
+棋>'[chess]';
+棍>'[stick]';
+棒>'[stick]';
+#"棔>'[................................]'",
+棕>'[hemp palm]';
+棗>'[date tree]';
+棘>'[jujube tree]';
+棚>'[tent]';
+棟>'[main beams supporting house]';
+棠>'[crab apple tree]';
+#"棡>'[................................]'",
+棣>'[kerria japonica plant]';
+棧>'[warehouse]';
+森>'[forest]';
+棯>'[jujube tree]';
+棲>'[perch]';
+棹>'[oar]';
+棺>'[coffin]';
+椀>'[bowl]';
+椁>'[outer-coffin]';
+椄>'[to graft]';
+椅>'[chair]';
+椈>'[cedar]';
+椋>'[fruit]';
+椌>'[instrument]';
+植>'[plant]';
+椎>'[hammer]';
+椏>'[the forking branch of a tree]';
+椒>'[pepper]';
+#"椓>'[strike]'",
+#"椚>'[................................]'",
+椛>'[type of birch]';
+検>'[check]';
+#"椡>'[................................]'",
+#"椢>'[................................]'",
+#"椣>'[................................]'",
+#"椥>'[................................]'",
+#"椦>'[................................]'",
+#"椨>'[................................]'",
+椪>'[machilus nanmu]';
+椰>'[palm tree]';
+椴>'[poplar]';
+椶>'[palm tree]';
+椹>'[a chopping board]';
+椽>'[beams]';
+椿>'[father]';
+楊>'[willow]';
+楓>'[maple tree]';
+楔>'[wedge]';
+楕>'[oval-shaped]';
+楙>'[name of plant]';
+楚>'[name of feudal state]';
+#"楜>'[................................]'",
+楝>'[melia japonica]';
+楞>'[used for ceylon in buddhist text]';
+楠>'[name of tree]';
+楡>'[elm tree]';
+楢>'[tinder]';
+楪>'[small dish]';
+楫>'[oar]';
+業>'[profession]';
+楮>'[mulberry]';
+楯>'[shield]';
+楳>'[plums]';
+#"楴>'[................................]'",
+極>'[extreme]';
+楷>'[model style of chinese writing]';
+楸>'[mallotus japonicus]';
+楹>'[column]';
+楼>'[building of two or more stories]';
+楽>'[happy]';
+#"楽>'[happy]'",
+#"榀>'[[not found in dictionary]]'",
+概>'[generally]';
+#"榉>'[type of elm]'",
+榎>'[small evergreen shrub]';
+#"榑>'[................................]'",
+榔>'[betel-nut tree]';
+榕>'[banyan tree]';
+榛>'[hazelnut]';
+榜>'[placard]';
+#"榠>'[................................]'",
+榧>'[type of yew]';
+榮>'[glory]';
+榱>'[rafter]';
+榲>'[pillar]';
+榴>'[pomegranate]';
+榻>'[cot]';
+榾>'[pieces of wood]';
+榿>'[alder]';
+槁>'[wither]';
+槃>'[tray]';
+槇>'[tip of a tree]';
+槊>'[spear]';
+構>'[frame]';
+槌>'[hammer]';
+槍>'[spear]';
+槎>'[raft]';
+槐>'[locust tree]';
+槓>'[lever]';
+様>'[shape]';
+#"様>'[shape]'",
+#"槛>'[threshold]'",
+#"槛>'[threshold]'",
+槧>'[wooden tablet]';
+槨>'[outer-coffin]';
+#"槫>'[................................]'",
+槭>'[maple]';
+槲>'[type of oak]';
+槹>'[spar]';
+槻>'[zelkova tree]';
+槽>'[trough]';
+槿>'[hibiscus]';
+樂>'[happy]';
+樅>'[fir tree]';
+樊>'[a railing]';
+樋>'[tree name]';
+#"樌>'[................................]'",
+#"樒>'[................................]'",
+樓>'[building of two or more stories]';
+#"樓>'[building of two or more stories]'",
+樗>'[kind of tree with useless timber]';
+標>'[mark]';
+樛>'[bending branches]';
+樞>'[door hinge]';
+樟>'[camphor tree]';
+模>'[model]';
+#"樢>'[................................]'",
+樣>'[shape]';
+権>'[power]';
+横>'[across]';
+#"横>'[across]'",
+#"横>'[across]'",
+樵>'[woodcutter]';
+樶>'[c]';
+樸>'[simple]';
+樹>'[tree]';
+樺>'[type of birch]';
+樽>'[goblet]';
+橄>'[olive]';
+橇>'[a sledge for transportation]';
+橈>'[bent or twisted piece of wood]';
+橋>'[bridge]';
+橘>'[orange]';
+橙>'[orange]';
+機>'[machine]';
+橡>'[chestnut oak]';
+橢>'[oval-shaped]';
+橦>'[tree]';
+#"橱>'[cabinet]'",
+#"橵>'[wood placed under roof tiles]'",
+#"橿>'[................................]'",
+檀>'[sandalwood]';
+檄>'[call arms]';
+檍>'[ilex]';
+檎>'[small red apple]';
+檐>'[eaves of house]';
+檗>'[tree]';
+檜>'[chinese cypress]';
+檠>'[stand for lamp]';
+檢>'[check]';
+檣>'[mast]';
+檪>'[chestnut-leaved oak]';
+檬>'[type of locust oracacia]';
+檮>'[block of wood]';
+檳>'[betelnut]';
+檸>'[lemon]';
+檻>'[threshold]';
+#"櫁>'[................................]'",
+櫂>'[oar]';
+櫃>'[cupboard]';
+#"櫑>'[................................]'",
+櫓>'[oar]';
+櫚>'[palm]';
+櫛>'[comb out]';
+櫞>'[citrus]';
+櫟>'[chestnut-leaved oak]';
+櫨>'[supporting block]';
+櫪>'[type of oak]';
+櫺>'[carved or patterned window sills]';
+櫻>'[cherry]';
+欄>'[railing]';
+欅>'[zelkova]';
+權>'[power]';
+欒>'[name of tree]';
+欖>'[olive]';
+欝>'[luxuriant]';
+#"欞>'[the lattice of a window a sill]'",
+欠>'[owe]';
+次>'[order]';
+欣>'[happy]';
+欧>'[translit.: europe]';
+欲>'[desire]';
+欷>'[sob]';
+欸>'[sighs]';
+欹>'[fierce dog]';
+欺>'[cheat]';
+欽>'[respect]';
+款>'[item]';
+歃>'[smear one''s mouth with blood of a victim when taking an oath]';
+歇>'[rest]';
+歉>'[deficient]';
+歌>'[song]';
+歎>'[sigh]';
+歐>'[translit.: europe]';
+歓>'[happy]';
+歔>'[blow through nose]';
+歙>'[to suck]';
+歛>'[draw back]';
+歟>'[final particle used express ques]';
+歡>'[happy]';
+止>'[stop]';
+正>'[right]';
+此>'[this]';
+武>'[military]';
+歩>'[step]';
+歪>'[slant]';
+歯>'[teeth]';
+歳>'[year]';
+歴>'[take place]';
+歸>'[return]';
+歹>'[bad]';
+死>'[die]';
+歿>'[die]';
+殀>'[die young]';
+殃>'[misfortune]';
+殄>'[to end]';
+殆>'[dangerous]';
+殉>'[die for cause]';
+殊>'[different]';
+残>'[injure]';
+殍>'[to starve to death]';
+殕>'[[not found in dictionary]]';
+殖>'[breed]';
+殘>'[injure]';
+殞>'[die]';
+殤>'[die young]';
+殪>'[die]';
+殫>'[utmost]';
+殯>'[encoffin]';
+殱>'[annihilate]';
+殲>'[annihilate]';
+殳>'[name of old weapon]';
+殴>'[beat]';
+段>'[section]';
+殷>'[many]';
+殺>'[kill]';
+殻>'[casing]';
+殼>'[casing]';
+殿>'[hall]';
+毀>'[destroy]';
+毅>'[resolute]';
+毆>'[beat]';
+毋>'[do not]';
+母>'[mother]';
+毎>'[every]';
+毒>'[poison]';
+毓>'[give birth to]';
+比>'[compare]';
+毘>'[help]';
+毛>'[hair]';
+#"毟>'[................................]'",
+毫>'[fine hair]';
+毬>'[ball]';
+毯>'[rug]';
+毳>'[fine hair or fur on animals]';
+氈>'[felt]';
+氏>'[clan]';
+民>'[people]';
+氓>'[people]';
+气>'[steam]';
+気>'[air]';
+氛>'[gas]';
+氣>'[air]';
+氤>'[hanging fog]';
+水>'[water]';
+氷>'[ice]';
+永>'[long]';
+氾>'[overflow]';
+汀>'[sandbar]';
+汁>'[juice]';
+求>'[seek]';
+汎>'[float]';
+汐>'[night tides]';
+汕>'[basket for catching fish]';
+汗>'[perspiration]';
+汚>'[filthy]';
+汝>'[you]';
+汞>'[element mercury]';
+江>'[large river]';
+池>'[pool]';
+#"汢>'[................................]'",
+汨>'[Mi(luo) river in hunan province where Qu Yuan drowned himself]';
+汪>'[vast]';
+汰>'[excessive]';
+汲>'[draw water from well]';
+#"汲>'[draw water from well]'",
+決>'[decide]';
+汽>'[steam]';
+汾>'[river in shanxi province]';
+沁>'[soak into]';
+沂>'[river in southeast shandong flow]';
+沃>'[water]';
+沈>'[sink]';
+沌>'[chaotic]';
+沍>'[freezing]';
+沐>'[bathe]';
+沒>'[not]';
+沓>'[connected]';
+沖>'[pour]';
+沙>'[sand]';
+沚>'[islet in stream]';
+沛>'[abundant]';
+没>'[not]';
+沢>'[marsh]';
+沫>'[froth]';
+沮>'[stop]';
+沱>'[rivers]';
+河>'[river]';
+沸>'[boil]';
+油>'[oil]';
+沺>'[turbulent]';
+治>'[govern]';
+沼>'[lake]';
+沽>'[buy and sell]';
+沾>'[moisten]';
+沿>'[follow course]';
+況>'[condition]';
+泄>'[leak]';
+泅>'[swim]';
+泉>'[spring]';
+泊>'[anchor vessel]';
+泌>'[to seep out]';
+泓>'[clear]';
+法>'[law]';
+泗>'[mucous]';
+泙>'[roar]';
+泛>'[drift]';
+泝>'[go upstream]';
+泡>'[bubbles]';
+波>'[waves]';
+泣>'[cry]';
+泥>'[mud]';
+注>'[concentrate]';
+泪>'[tears]';
+泯>'[destroy]';
+泰>'[great]';
+泱>'[great]';
+泳>'[dive]';
+洋>'[ocean]';
+洌>'[clear]';
+洒>'[sprinkle]';
+洗>'[wash]';
+洙>'[name of a river in shandong]';
+洛>'[river in shaanxi province]';
+洞>'[cave]';
+洟>'[snivel]';
+津>'[ferry]';
+洩>'[leak]';
+洪>'[vast]';
+洫>'[to ditch]';
+洲>'[continent]';
+洳>'[damp]';
+洵>'[true]';
+洶>'[the rush of water]';
+洸>'[sparkle]';
+活>'[live]';
+洽>'[spread]';
+派>'[school of thought]';
+流>'[flow]';
+浄>'[pure]';
+浅>'[shallow]';
+浙>'[zhejiang province]';
+浚>'[dredge]';
+浜>'[creek]';
+浣>'[to wash]';
+浤>'[beating of ocean]';
+浦>'[bank of river]';
+浩>'[great]';
+浪>'[wave]';
+浬>'[nautical mile]';
+浮>'[float]';
+浴>'[bathe]';
+海>'[sea]';
+浸>'[soak]';
+浹>'[saturate]';
+涅>'[blacken]';
+消>'[vanish]';
+涌>'[surge up]';
+涎>'[saliva]';
+涓>'[brook]';
+涕>'[tear]';
+涙>'[tears]';
+涛>'[large waves]';
+涜>'[ditch]';
+涯>'[shore]';
+液>'[sap]';
+涵>'[soak]';
+涸>'[dried up]';
+涼>'[cool]';
+淀>'[shallow water]';
+淅>'[water used wash rice]';
+淆>'[confused]';
+淇>'[river in henan province]';
+淋>'[drip]';
+淌>'[trickle]';
+淑>'[good]';
+淒>'[bitter cold]';
+#"淕>'[................................]'",
+淘>'[wash in sieve]';
+淙>'[gurgling sound of water]';
+淞>'[name of a river in Jiangsu]';
+淡>'[weak]';
+淤>'[mud]';
+淦>'[river in jiangxi province: water]';
+淨>'[pure]';
+淪>'[be lost]';
+淫>'[obscene]';
+淬>'[temper]';
+淮>'[river in anhui province]';
+深>'[deep]';
+淳>'[cyanogen]';
+淵>'[gulf]';
+混>'[mix]';
+淹>'[drown]';
+淺>'[shallow]';
+添>'[append]';
+清>'[clear]';
+渇>'[thirsty]';
+済>'[help]';
+渉>'[ford stream]';
+渊>'[surge up]';
+渋>'[astringent]';
+渓>'[mountain stream]';
+渕>'[surge up]';
+渙>'[scatter]';
+渚>'[small sand bank]';
+減>'[decrease]';
+渝>'[change]';
+渟>'[(of water) not flowing]';
+渠>'[ditch]';
+渡>'[cross]';
+渣>'[refuse]';
+渤>'[swelling]';
+渥>'[moisten]';
+渦>'[swirl]';
+温>'[lukewarm]';
+渫>'[beating of ocean]';
+測>'[measure]';
+渭>'[name of a river in shaanxi]';
+渮>'[river in shandong province]';
+港>'[port]';
+游>'[swim]';
+渺>'[endlessly long]';
+渾>'[muddy]';
+湃>'[sound of waves]';
+湊>'[piece together]';
+湍>'[rapid water current]';
+湎>'[flushed with drink]';
+湖>'[lake]';
+湘>'[hunan province]';
+湛>'[deep]';
+湟>'[river in qinghai province]';
+湧>'[well up]';
+湫>'[a small pond]';
+湮>'[bury]';
+湯>'[hot water]';
+湲>'[flow]';
+#"湳>'[[not found in dictionary]]'",
+湾>'[bay]';
+湿>'[wet]';
+#"湿>'[wet]'",
+#"湿>'[wet]'",
+溌>'[pour]';
+溏>'[pool]';
+源>'[spring]';
+準>'[rule]';
+溘>'[abruptly]';
+溜>'[slide]';
+溝>'[ditch]';
+溟>'[drizzling rain]';
+溢>'[overflow]';
+溥>'[big]';
+溪>'[mountain stream]';
+溯>'[go upstream]';
+溲>'[urinate]';
+溶>'[melt]';
+溷>'[privy]';
+溺>'[drown]';
+溽>'[moist]';
+滂>'[torrential]';
+滄>'[blue]';
+滅>'[extinguish]';
+滉>'[deep]';
+滋>'[grow]';
+滌>'[wash]';
+滑>'[slip]';
+滓>'[sediment]';
+滔>'[overflow]';
+滕>'[ county in shandong province]';
+滝>'[raining]';
+滞>'[block up]';
+滬>'[shanghai]';
+滯>'[block up]';
+滲>'[soak through]';
+滴>'[drip]';
+滷>'[thick gravy]';
+滸>'[riverbank]';
+滾>'[turn]';
+滿>'[fill]';
+漁>'[to fish]';
+漂>'[float]';
+漆>'[varnish]';
+漉>'[filter]';
+漏>'[leak]';
+漑>'[water]';
+漓>'[river in guangxi province]';
+演>'[perform]';
+漕>'[transport by water]';
+漠>'[desert]';
+漢>'[chinese people]';
+漣>'[flowing water]';
+漫>'[overflow of water]';
+漬>'[soak]';
+漱>'[gargle]';
+漲>'[rise in price]';
+漸>'[gradually]';
+漾>'[overflow]';
+漿>'[any thick fluid]';
+潁>'[river in anhui]';
+潅>'[pour]';
+潔>'[clean]';
+潘>'[surname]';
+潛>'[hide]';
+潜>'[hide]';
+潟>'[land inundated with salt from ti]';
+潤>'[soft]';
+潦>'[to flood]';
+潭>'[deep pool]';
+潮>'[tide]';
+潯>'[steep bank by stream]';
+潰>'[flooding river]';
+潴>'[pond]';
+潸>'[weep]';
+潺>'[sound of flowing water]';
+潼>'[high]';
+澀>'[astringent]';
+澁>'[astringent]';
+澂>'[clear and still water]';
+澄>'[purify water by allowing sedimen]';
+澆>'[spray]';
+澎>'[splatter]';
+澑>'[slide]';
+澗>'[brook]';
+澡>'[wash]';
+澣>'[cleanse]';
+澤>'[marsh]';
+#"澪>'[................................]'",
+澱>'[sediment]';
+澳>'[inlet]';
+澹>'[calm]';
+激>'[arouse]';
+濁>'[muddy]';
+濂>'[waterfall]';
+濃>'[thick]';
+濆>'[river bank]';
+濔>'[many]';
+濕>'[wet]';
+濘>'[mud]';
+濛>'[drizzling]';
+濟>'[help]';
+濠>'[moat]';
+濡>'[immerse]';
+濤>'[large waves]';
+濫>'[flood]';
+濬>'[dredge]';
+濮>'[county in Henan province]';
+濯>'[wash out]';
+濱>'[beach]';
+濳>'[hide]';
+濶>'[broad]';
+濺>'[sprinkle]';
+濾>'[strain out]';
+瀁>'[waves]';
+瀉>'[drain off]';
+瀋>'[juice]';
+瀏>'[clear]';
+瀑>'[waterfall]';
+瀕>'[approach]';
+瀘>'[river in jiangxi province]';
+瀚>'[vast]';
+瀛>'[sea]';
+瀝>'[trickle]';
+瀞>'[pool in a river]';
+瀟>'[sound of beating wind and rain]';
+瀦>'[pond]';
+瀧>'[raining]';
+瀬>'[swift current]';
+瀰>'[overflow]';
+瀲>'[waves]';
+瀾>'[overflowing]';
+灌>'[pour]';
+灑>'[sprinkle]';
+灘>'[bank]';
+灣>'[bay]';
+火>'[fire]';
+灯>'[lantern]';
+灰>'[ashes]';
+灸>'[cauterize with moxa]';
+灼>'[burn]';
+災>'[calamity]';
+炉>'[fireplace]';
+炊>'[cook]';
+炎>'[flame]';
+炒>'[fry]';
+炙>'[roast]';
+炬>'[torch]';
+炭>'[charcoal]';
+炮>'[large gun]';
+炯>'[bright]';
+炳>'[bright]';
+炸>'[fry in oil]';
+点>'[dot]';
+為>'[do]';
+烈>'[fiery]';
+烋>'[boast]';
+烏>'[crow]';
+烙>'[brand]';
+烝>'[rise]';
+烟>'[smoke]';
+烱>'[bright]';
+烹>'[boil]';
+烽>'[signal fire]';
+焉>'[thereupon]';
+焔>'[flame]';
+焙>'[dry over slow fire]';
+焚>'[burn]';
+焜>'[fire]';
+無>'[negative]';
+焦>'[burned]';
+然>'[yes]';
+焼>'[burn]';
+煉>'[smelt]';
+煌>'[bright]';
+煎>'[fry in fat or oil]';
+煕>'[bright]';
+煖>'[warm]';
+煙>'[smoke]';
+煢>'[alone]';
+煤>'[coal]';
+煥>'[shining]';
+煦>'[kind]';
+照>'[shine]';
+煩>'[bother]';
+煬>'[roast]';
+煮>'[cook]';
+煽>'[stir up]';
+熄>'[put out]';
+熈>'[bright]';
+熊>'[bear]';
+熏>'[smoke]';
+熔>'[melt]';
+#"熕>'[................................]'",
+熙>'[bright]';
+熟>'[well cooked]';
+熨>'[iron]';
+熬>'[cook down]';
+熱>'[hot]';
+熹>'[dim light]';
+熾>'[burning hot]';
+燃>'[burn]';
+燈>'[lantern]';
+燉>'[heat with fire]';
+燎>'[burn]';
+燐>'[phosphorus]';
+燒>'[burn]';
+燔>'[to roast]';
+燕>'[swallow (bird)]';
+#"燗>'[................................]'",
+營>'[encampment]';
+燠>'[warm]';
+燥>'[dry]';
+燦>'[vivid]';
+燧>'[flintstone]';
+燬>'[burn down]';
+燭>'[candle]';
+燮>'[harmonize]';
+#"燵>'[................................]'",
+燹>'[fire]';
+燻>'[smoke]';
+燼>'[cinders]';
+燿>'[shine]';
+爆>'[crackle]';
+爍>'[shine]';
+爐>'[fireplace]';
+爛>'[rotten]';
+爨>'[oven]';
+爪>'[claw]';
+爬>'[crawl]';
+爭>'[dispute]';
+爰>'[lead on to]';
+爲>'[do]';
+爵>'[feudal title or rank]';
+父>'[father]';
+爺>'[father]';
+爻>'[diagrams for divination]';
+爼>'[chopping board or block]';
+爽>'[happy]';
+爾>'[you]';
+爿>'[half of tree trunk]';
+牀>'[bed]';
+牆>'[wall]';
+片>'[slice]';
+版>'[printing blocks]';
+牋>'[memorandum]';
+牌>'[signboard]';
+牒>'[documents]';
+牘>'[writing tablet]';
+牙>'[tooth]';
+牛>'[cow]';
+牝>'[female of species]';
+牟>'[make]';
+牡>'[male of animals]';
+牢>'[prison]';
+牧>'[tend cattle]';
+物>'[thing]';
+牲>'[sacrificial animal]';
+牴>'[gore]';
+特>'[special]';
+牽>'[drag]';
+牾>'[to oppose]';
+犀>'[rhinoceros]';
+犁>'[plow]';
+犂>'[plow]';
+犇>'[run fast]';
+犒>'[entertain victorious soldiers]';
+犖>'[brindled ox]';
+犠>'[sacrifice]';
+犢>'[calf]';
+犧>'[sacrifice]';
+犬>'[dog]';
+犯>'[commit crime]';
+犲>'[wolf]';
+状>'[form]';
+犹>'[like]';
+狂>'[insane]';
+狃>'[to covet]';
+狄>'[tribe from northern china]';
+狆>'[pekinese dog]';
+狎>'[be familiar with]';
+狐>'[species of fox]';
+狒>'[baboon]';
+狗>'[dog]';
+狙>'[ape]';
+#"狛>'[................................]'",
+狠>'[vicious]';
+狡>'[cunning]';
+狢>'[animal name]';
+狩>'[winter hunting]';
+独>'[alone]';
+狭>'[narrow]';
+狷>'[rash]';
+狸>'[fox]';
+狹>'[narrow]';
+狼>'[wolf]';
+狽>'[legendary animal with short fore]';
+猊>'[lion]';
+猖>'[mad]';
+猗>'[exclamation of admiration]';
+猛>'[violent]';
+猜>'[guess]';
+猝>'[abruptly]';
+猟>'[hunt]';
+猥>'[vulgar]';
+猩>'[species of orangutan]';
+猪>'[pig]';
+猫>'[cat]';
+献>'[offer]';
+#"猯>'[................................]'",
+猴>'[monkey]';
+猶>'[like]';
+猷>'[plan]';
+猾>'[crafty]';
+猿>'[ape]';
+獄>'[prison]';
+獅>'[lion]';
+獎>'[prize]';
+獏>'[the panther]';
+獗>'[unruly]';
+獣>'[beast]';
+獨>'[alone]';
+獪>'[sly]';
+獰>'[ferocious appearance]';
+獲>'[obtain]';
+獵>'[hunt]';
+獸>'[beast]';
+獺>'[otter]';
+獻>'[offer]';
+玄>'[deep]';
+率>'[to lead]';
+玉>'[jade]';
+王>'[king]';
+玖>'[black-colored jade]';
+玩>'[play with]';
+玲>'[tinkling of jade]';
+玳>'[tortoise shell]';
+玻>'[glass]';
+珀>'[amber]';
+珂>'[inferior kind of jade]';
+珈>'[ornament attached woman''s hairpi]';
+珊>'[coral]';
+珍>'[precious]';
+珎>'[precious]';
+珞>'[kind of necklace]';
+珠>'[precious stone]';
+珥>'[ear ornament]';
+珪>'[jade table conferred upon feudal]';
+班>'[class]';
+珮>'[jade ornament]';
+珱>'[necklace made of precious stones]';
+#"珸>'[................................]'",
+現>'[appear]';
+球>'[ball]';
+琅>'[variety of white carnelian]';
+理>'[reason]';
+琉>'[sparkling stone]';
+琢>'[polish jade]';
+琥>'[jewel in shape of tiger]';
+琲>'[necklace]';
+琳>'[beautiful jade]';
+琴>'[chinese lute or guitar]';
+琵>'[guitar-like instrument]';
+琶>'[guitar-like instrument]';
+琺>'[enamel]';
+琿>'[bright]';
+瑁>'[fine piece of jade]';
+瑕>'[flaw in gem]';
+瑙>'[agate]';
+瑚>'[coral]';
+瑛>'[luster of gem]';
+瑜>'[flawless gem or jewel]';
+瑞>'[felicitous omen]';
+瑟>'[large stringed musical instrument]';
+瑠>'[precious stone]';
+瑣>'[fragments]';
+瑤>'[precious jade]';
+瑩>'[lustre of gems]';
+瑪>'[agate]';
+瑯>'[kind of white cornelian]';
+瑰>'[extraordinary]';
+瑳>'[luster of gem]';
+瑶>'[precious jade]';
+瑾>'[brilliance of gems]';
+璃>'[glass]';
+璋>'[jade plaything]';
+璞>'[unpolished gem]';
+璢>'[precious stone]';
+璧>'[piece of jade with hole in it]';
+環>'[jade ring or bracelet]';
+璽>'[imperial signet]';
+瓊>'[jade]';
+瓏>'[gem cut like dragon]';
+瓔>'[necklace made of precious stones]';
+瓜>'[melon]';
+瓠>'[bottle gourd]';
+瓢>'[ladle made from dried gourd]';
+瓣>'[petal]';
+瓦>'[tile]';
+瓧>'[decagram]';
+瓩>'[kilowatt]';
+瓮>'[earthen jar]';
+瓰>'[[not found in dictionary]]';
+瓱>'[milligram]';
+#"瓲>'[................................]'",
+瓶>'[jug]';
+瓷>'[crockery]';
+瓸>'[hectogram]';
+甃>'[brick wall of a well]';
+甄>'[examine]';
+甅>'[centigram]';
+甌>'[bowl]';
+甍>'[rafters supporting roof tiles]';
+甎>'[brick]';
+甑>'[boiler for steaming rice]';
+甓>'[glazed tiles]';
+甕>'[earthen jar]';
+甘>'[sweetness]';
+甚>'[great extent]';
+甜>'[sweet]';
+甞>'[taste]';
+生>'[life]';
+産>'[give birth]';
+甥>'[sister''s child]';
+甦>'[be reborn]';
+用>'[use]';
+甫>'[begin]';
+甬>'[path]';
+田>'[field]';
+由>'[cause]';
+甲>'[armor]';
+申>'[to state to a superior]';
+男>'[male]';
+甸>'[suburbs of capital]';
+町>'[raised path between fields]';
+画>'[painting]';
+甼>'[raised path between fields]';
+畄>'[stop]';
+畆>'[chinese land measure]';
+#"畉>'[................................]'",
+畊>'[plow]';
+畋>'[till land]';
+界>'[boundary]';
+#"畍>'[................................]'",
+畏>'[fear]';
+畑>'[dry (as opposed to rice) field]';
+畔>'[boundary path dividing fields]';
+留>'[stop]';
+畚>'[straw basket]';
+畛>'[border]';
+畜>'[livestock]';
+畝>'[chinese land measure]';
+畠>'[garden]';
+畢>'[end]';
+畤>'[place for worshipping the haven]';
+略>'[approximately]';
+畦>'[sections in vegetable farm]';
+畧>'[approximately]';
+#"畩>'[................................]'",
+番>'[take turns]';
+畫>'[delineate]';
+#"畭>'[................................]'",
+異>'[different]';
+畳>'[repeat]';
+畴>'[farmland]';
+當>'[bear]';
+畷>'[raised path between fields]';
+畸>'[odd]';
+畿>'[imperial domain]';
+疂>'[repeat]';
+疆>'[boundary]';
+疇>'[farmland]';
+疉>'[repeat]';
+疊>'[repeat]';
+疋>'[roll]';
+疎>'[neglect]';
+疏>'[neglect]';
+疑>'[doubt]';
+疔>'[carbuncle]';
+疚>'[chronic disease]';
+疝>'[hernia]';
+疣>'[wart]';
+疥>'[scabies]';
+疫>'[epidemic]';
+疱>'[acne]';
+疲>'[feel tired]';
+疳>'[childhood diseases]';
+疵>'[flaw]';
+疸>'[jaundice]';
+疹>'[measles]';
+疼>'[aches]';
+疽>'[ulcer]';
+疾>'[illness]';
+痂>'[scab]';
+痃>'[indigestion]';
+病>'[illness]';
+症>'[disease]';
+痊>'[be healed]';
+痍>'[wound]';
+痒>'[itch]';
+痔>'[hemorrhoids]';
+痕>'[scar]';
+痘>'[smallpox]';
+痙>'[convulsions]';
+痛>'[pain]';
+痞>'[dyspepsia]';
+痢>'[dysentry]';
+痣>'[spots]';
+痩>'[thin]';
+痰>'[phlegm]';
+痲>'[pock-marked]';
+痳>'[pock-marked]';
+痴>'[foolish]';
+痺>'[paralysis]';
+痼>'[chronic disease]';
+痾>'[chronic illness]';
+痿>'[paralysis]';
+瘁>'[feel tired]';
+瘉>'[get well]';
+瘋>'[crazy]';
+瘍>'[ulcers]';
+瘟>'[epidemic]';
+瘠>'[thin]';
+瘡>'[tumor]';
+瘢>'[scar]';
+瘤>'[tumor]';
+瘧>'[intermittent fever]';
+瘰>'[scrofula]';
+瘴>'[malaria pestilential vapors]';
+瘻>'[fistula]';
+療>'[be healed]';
+癆>'[consumption]';
+癇>'[epilepsy]';
+癈>'[abrogate]';
+癌>'[cancer]';
+癒>'[get well]';
+癖>'[craving]';
+癘>'[sore]';
+癜>'[erythema]';
+癡>'[silly]';
+癢>'[itch]';
+癧>'[scrofulous lumps or swellings]';
+癨>'[quickly]';
+癩>'[leprosy]';
+癪>'[spasms]';
+癬>'[ringworms]';
+癰>'[carbuncle]';
+癲>'[crazy]';
+癶>'[legs]';
+癸>'[last of ten celestial stems]';
+発>'[issue]';
+登>'[rise]';
+發>'[issue]';
+白>'[white]';
+百>'[one hundred]';
+皀>'[kernel]';
+皃>'[countenance]';
+的>'[possessive]';
+皆>'[all]';
+皇>'[royal]';
+皈>'[follow]';
+皋>'[the high land along a river]';
+皎>'[white]';
+皐>'[the high land along a river]';
+皓>'[bright]';
+皖>'[anhui province]';
+皙>'[white]';
+皚>'[brilliant white]';
+皮>'[skin]';
+皰>'[pimples]';
+皴>'[chapped]';
+皷>'[drum]';
+皸>'[crack]';
+皹>'[crack]';
+皺>'[wrinkles]';
+皿>'[shallow container]';
+盂>'[basin]';
+盃>'[glass]';
+盆>'[basin]';
+盈>'[fill]';
+益>'[profit]';
+盍>'[what? why not? correspond]';
+盒>'[small box or case]';
+盖>'[cover]';
+盗>'[rob]';
+盛>'[abundant]';
+盜>'[rob]';
+盞>'[small cup or container]';
+盟>'[swear]';
+盡>'[exhaust]';
+監>'[supervise]';
+盤>'[tray]';
+盥>'[wash]';
+盧>'[cottage]';
+盪>'[to toss about]';
+目>'[eye]';
+盲>'[blind]';
+直>'[straight]';
+相>'[mutual]';
+盻>'[glare]';
+盾>'[shield]';
+省>'[province]';
+眄>'[to look askance]';
+眇>'[blind in one eye]';
+眈>'[gloat]';
+眉>'[eyebrows]';
+看>'[look]';
+県>'[county]';
+眛>'[dim]';
+#"眞>'[real]'",
+真>'[real]';
+眠>'[close eyes]';
+#"眤>'[................................]'",
+眥>'[eye sockets]';
+眦>'[corner of the eyes]';
+眩>'[confuse]';
+眷>'[take interest in]';
+眸>'[pupil of eye]';
+眺>'[look at]';
+眼>'[eye]';
+着>'[make move]';
+睇>'[look at]';
+睚>'[corner of eye]';
+睛>'[eyeball]';
+睡>'[sleep]';
+督>'[supervise]';
+睥>'[look askance at]';
+睦>'[friendly]';
+睨>'[look askance at]';
+睫>'[eyelashes]';
+睹>'[look at]';
+睾>'[testicle]';
+睿>'[shrewd]';
+瞋>'[glare with anger]';
+瞎>'[blind]';
+瞑>'[close eyes]';
+瞞>'[deceive]';
+瞠>'[look at]';
+瞥>'[take fleeting glance at]';
+瞬>'[wink]';
+瞭>'[bright]';
+瞰>'[watch]';
+瞳>'[pupil of eye]';
+瞶>'[dim]';
+#"瞶>'[dim]'",
+瞻>'[look]';
+瞼>'[eyelid]';
+瞽>'[blind]';
+瞿>'[surname]';
+矇>'[stupid]';
+矍>'[look about in firght or alarm]';
+矗>'[straight]';
+矚>'[watch carefully]';
+矛>'[spear]';
+矜>'[pity]';
+矢>'[arrow]';
+矣>'[particle of completed action]';
+知>'[know]';
+矧>'[much more]';
+矩>'[carpenter''s square]';
+短>'[short]';
+矮>'[short]';
+矯>'[correct]';
+石>'[stone]';
+矼>'[stone bridge]';
+砂>'[sand]';
+砌>'[stone steps]';
+砒>'[arsenic]';
+研>'[grind]';
+砕>'[break]';
+砠>'[rocky]';
+砥>'[whetstone]';
+砦>'[stockade]';
+砧>'[anvil]';
+砲>'[gun]';
+破>'[break]';
+砺>'[whetstone]';
+砿>'[mine]';
+硅>'[silicon]';
+硝>'[saltpeter]';
+硫>'[sulfur]';
+硬>'[hard]';
+硯>'[inkstone]';
+#"硲>'[................................]'",
+#"硴>'[................................]'",
+硼>'[borax]';
+碁>'[chess]';
+碆>'[arrow-tip]';
+碇>'[anchor]';
+碌>'[rough]';
+碍>'[obstruct]';
+碎>'[break]';
+碑>'[stone tablet]';
+碓>'[pestle]';
+#"碕>'[................................]'",
+碗>'[bowl]';
+碚>'[suburb]';
+碣>'[stone tablet]';
+碧>'[jade]';
+碩>'[great]';
+碪>'[stone slab used for washing clot]';
+碯>'[agate]';
+#"碵>'[................................]'",
+確>'[sure]';
+碼>'[number]';
+碾>'[roller]';
+磁>'[magnetic]';
+磅>'[pound]';
+#"磆>'[................................]'",
+磊>'[pile of rocks or stones]';
+磋>'[polish]';
+磐>'[large rock]';
+磑>'[stone mill]';
+磔>'[downward stroke slanting righ]';
+磚>'[tile]';
+磧>'[sand and gravel]';
+磨>'[grind]';
+磬>'[musical instrument]';
+磯>'[jetty]';
+磴>'[steps on ledge]';
+磽>'[barren land]';
+礁>'[reef]';
+#"礇>'[................................]'",
+礎>'[foundation stone]';
+#"礎>'[foundation stone]'",
+#"礒>'[................................]'",
+礙>'[obstruct]';
+礦>'[mine]';
+礪>'[whetstone]';
+礫>'[gravel]';
+礬>'[alum]';
+示>'[show]';
+礼>'[social custom]';
+社>'[god of the soil and altars to him]';
+祀>'[to sacrifice]';
+祁>'[pray]';
+祇>'[only]';
+祈>'[pray]';
+祉>'[happiness]';
+祐>'[divine intervention]';
+祓>'[exorcise]';
+祕>'[mysterious]';
+祖>'[ancestor]';
+祗>'[respect]';
+祚>'[throne]';
+祝>'[pray for happiness or blessings]';
+神>'[spirit]';
+祟>'[evil spirit]';
+祠>'[ancestral temple]';
+祢>'[one''s deceased father]';
+祥>'[good luck]';
+票>'[slip of paper or bamboo]';
+祭>'[sacrifice to]';
+祷>'[pray]';
+祺>'[good luck]';
+祿>'[blessing]';
+禀>'[report to]';
+禁>'[restrict]';
+禄>'[blessing]';
+禅>'[meditation]';
+禊>'[semi-annual ceremony of purifica]';
+禍>'[misfortune]';
+禎>'[lucky]';
+福>'[happiness]';
+禝>'[[not found in dictionary]]';
+禦>'[defend]';
+禧>'[happiness]';
+禪>'[meditation]';
+禮>'[social custom]';
+禰>'[one''s deceased father]';
+禳>'[pray or sacrifice]';
+禹>'[legendary hsia dynasty founder]';
+禺>'[district]';
+禽>'[birds]';
+禾>'[grain still on stalk]';
+禿>'[bald]';
+秀>'[ear of grain]';
+私>'[private]';
+秉>'[grasp]';
+秋>'[autumn]';
+科>'[section]';
+秒>'[beard of grain or corn]';
+秕>'[empty grain or rice husk]';
+秘>'[secret]';
+租>'[rent]';
+#"秡>'[................................]'",
+秣>'[fodder]';
+秤>'[balance]';
+秦>'[feudal state of qin]';
+秧>'[rice seedlings]';
+秩>'[order]';
+秬>'[black millet]';
+称>'[call]';
+移>'[change place]';
+稀>'[rare]';
+稈>'[stalk of grain]';
+程>'[journey]';
+稍>'[little]';
+税>'[taxes]';
+稔>'[ripe grain]';
+稗>'[darnels]';
+#"稘>'[................................]'",
+稙>'[grain ready for grinding]';
+稚>'[young]';
+稜>'[corner]';
+稟>'[report to]';
+稠>'[dense]';
+種>'[seed]';
+稱>'[call]';
+稲>'[rice growing in field]';
+稷>'[god of cereals]';
+稻>'[rice growing in field]';
+稼>'[sow grain]';
+稽>'[examine]';
+稾>'[draft]';
+稿>'[draft]';
+穀>'[corn]';
+穂>'[ear of grain]';
+#"穃>'[................................]'",
+穆>'[majestic]';
+穉>'[young grain]';
+積>'[accumulate]';
+穎>'[rice tassel]';
+#"穎>'[rice tassel]'",
+穐>'[fall]';
+穗>'[ear of grain]';
+穡>'[farm]';
+穢>'[dirty]';
+穣>'[stalks of grain]';
+穩>'[stable]';
+穫>'[harvest]';
+穰>'[stalks of grain]';
+穴>'[cave]';
+究>'[examine]';
+穹>'[high and vast]';
+空>'[empty]';
+穽>'[hole]';
+穿>'[penetrate]';
+突>'[suddenly]';
+窃>'[secretly]';
+窄>'[narrow]';
+窈>'[obscure]';
+窒>'[stop up]';
+窓>'[window]';
+窕>'[slender]';
+窖>'[pit]';
+窗>'[window]';
+窘>'[embrassassed]';
+窟>'[hole]';
+窩>'[nest]';
+窪>'[hollow]';
+窮>'[poor]';
+窯>'[kiln]';
+窰>'[kiln]';
+窶>'[poor]';
+窺>'[peep]';
+窿>'[mine shaft]';
+竃>'[furnace]';
+竄>'[run away]';
+竅>'[hole]';
+竇>'[surname]';
+竈>'[furnace]';
+竊>'[secretly]';
+立>'[stand]';
+竍>'[decaliter]';
+竏>'[kiloliter]';
+竒>'[strange]';
+竓>'[milliliter]';
+竕>'[deciliter]';
+站>'[stand up]';
+竚>'[stand and wait for long time]';
+竜>'[dragon]';
+竝>'[combine]';
+竟>'[finally]';
+章>'[composition]';
+竡>'[hectoliter]';
+竢>'[wait for]';
+竣>'[terminate]';
+童>'[child]';
+竦>'[revere]';
+竪>'[perpendicular]';
+竭>'[put forth great effort]';
+端>'[end]';
+竰>'[centiliter]';
+競>'[contend]';
+竸>'[contend]';
+竹>'[bamboo]';
+竺>'[india]';
+竿>'[bamboo pole]';
+#"笂>'[................................]'",
+笄>'[hairpin]';
+笆>'[bamboo fence]';
+笈>'[bamboo box used carry books]';
+笊>'[ladle]';
+笋>'[bamboo shoots]';
+笏>'[tablet held by someone having au]';
+笑>'[smile]';
+#"笘>'[................................]'",
+笙>'[small gourd-shaped musical instrument]';
+笛>'[bamboo flute]';
+笞>'[bamboo rod used for beatings]';
+笠>'[bamboo hat]';
+笥>'[a hamper]';
+符>'[i.d. tag]';
+笨>'[foolish]';
+第>'[sequence]';
+笳>'[a reed leaf whistle]';
+笵>'[a bamboo form]';
+#"笶>'[................................]'",
+笹>'[small bamboo]';
+筅>'[bamboo brush]';
+筆>'[writing brush]';
+筈>'[arrow end]';
+等>'[rank]';
+筋>'[muscles]';
+筌>'[bamboo fish trap]';
+筍>'[bamboo shoot]';
+筏>'[raft]';
+筐>'[bamboo basket or chest]';
+筑>'[ancient lute]';
+筒>'[thick piece of bamboo]';
+答>'[answer]';
+策>'[scheme]';
+筝>'[stringed musical instrument]';
+筥>'[round-shaped bamboo basket for]';
+筧>'[bamboo water pipe]';
+筬>'[reed of a loom]';
+筮>'[divination with stalks of plants]';
+筰>'[cable]';
+筱>'[dwarf bamboo]';
+筴>'[type of grass used in divination]';
+筵>'[bamboo mat]';
+筺>'[bamboo basket or chest]';
+箆>'[fine-toothed comb]';
+箇>'[numerary adjunct]';
+箋>'[note]';
+箍>'[hoop]';
+箏>'[stringed musical instrument]';
+箒>'[broom]';
+箔>'[reed screen]';
+箕>'[sieve]';
+算>'[count]';
+箘>'[fine bamboo]';
+箙>'[quiver]';
+箚>'[brief note]';
+箜>'[ancient string music instrument]';
+箝>'[tweezers]';
+#"箟>'[................................]'",
+管>'[pipe]';
+箪>'[small bamboo basket for holding]';
+箭>'[arrow]';
+箱>'[case]';
+箴>'[needle]';
+箸>'[chopsticks]';
+節>'[knot]';
+篁>'[bamboo grove]';
+範>'[pattern]';
+篆>'[seal script]';
+篇>'[chapter]';
+築>'[build]';
+篋>'[ratton box]';
+篌>'[ancient music instrument]';
+篏>'[inlay]';
+篝>'[bamboo basket]';
+篠>'[dwarf bamboo]';
+篤>'[deep]';
+篥>'[bulgle]';
+篦>'[fine-toothed comb]';
+篩>'[sieve]';
+篭>'[cage]';
+篳>'[wicker]';
+#"篶>'[................................]'",
+篷>'[awning]';
+簀>'[bed mat]';
+簇>'[swarm]';
+簍>'[bamboo basket]';
+簑>'[a coir raincoat]';
+簒>'[usurp]';
+#"簓>'[................................]'",
+簔>'[a coir raincoat]';
+#"簗>'[................................]'",
+簟>'[bamboo mat]';
+簡>'[simple]';
+簣>'[bamboo basket for carrying earth]';
+簧>'[reed of woodwind instrument]';
+簪>'[hairpin]';
+簫>'[musical instrument like pan-pipes]';
+簷>'[eaves of house]';
+簸>'[winnower]';
+簽>'[sign]';
+簾>'[a blind]';
+簿>'[register]';
+籀>'[recite]';
+籃>'[basket]';
+籌>'[chip]';
+籍>'[record]';
+籏>'[flag]';
+籐>'[climbing plants]';
+籔>'[bamboo basket]';
+籖>'[tally]';
+籘>'[climbing plants]';
+籟>'[bamboo flute]';
+籠>'[cage]';
+籤>'[tally]';
+籥>'[key]';
+籬>'[bamboo or wooden fence]';
+米>'[hulled or husked uncooked rice]';
+籵>'[dm]';
+籾>'[unhulled rice]';
+粁>'[km]';
+粂>'[surname]';
+粃>'[empty husks of grain]';
+粉>'[powder]';
+粋>'[pure]';
+粍>'[mm]';
+#"粐>'[................................]'",
+粒>'[grain]';
+粕>'[lees]';
+粗>'[rough]';
+粘>'[viscous]';
+粛>'[pay respects]';
+粟>'[unhusked millet]';
+#"粡>'[................................]'",
+粢>'[grain offered in ritual sacrific]';
+粤>'[Guangdong and Guangxi provinces]';
+粥>'[rice gruel]';
+粧>'[toilet]';
+粨>'[hm]';
+#"粫>'[................................]'",
+#"粭>'[................................]'",
+粮>'[food]';
+粱>'[better varieties of millet]';
+粲>'[polish]';
+粳>'[non-glutinous rice]';
+粹>'[pure]';
+粽>'[dumpling made of glutinous rice]';
+精>'[essence]';
+#"糀>'[................................]'",
+#"糂>'[................................]'",
+糅>'[blend]';
+糊>'[paste]';
+糎>'[mm]';
+糒>'[food for a journey]';
+糖>'[sugar]';
+#"糘>'[................................]'",
+糜>'[rice gruel]';
+糞>'[manure]';
+糟>'[sediment]';
+糠>'[chaff]';
+糢>'[rice snacks]';
+糧>'[food]';
+糯>'[glutinous rice]';
+糲>'[unpolished rice]';
+糴>'[purchase grains]';
+糶>'[sell grains]';
+糸>'[silk]';
+糺>'[to collaborate]';
+系>'[system]';
+糾>'[investigate]';
+紀>'[record]';
+紂>'[name of an emperor]';
+約>'[treaty]';
+紅>'[red]';
+紆>'[bend]';
+紊>'[confused]';
+紋>'[line]';
+納>'[admit]';
+紐>'[knot]';
+純>'[pure]';
+紕>'[spoiled silk]';
+紗>'[gauze]';
+紘>'[string]';
+紙>'[paper]';
+級>'[level]';
+紛>'[in disorder]';
+紜>'[confused]';
+素>'[white (silk)]';
+紡>'[spin]';
+索>'[large rope]';
+紫>'[purple]';
+紬>'[kind of thin silk]';
+紮>'[tie]';
+累>'[tired]';
+細>'[fine]';
+紲>'[bridle]';
+紳>'[girdle]';
+紵>'[ramie]';
+紹>'[continue]';
+紺>'[dark blue color]';
+紿>'[cheat]';
+終>'[end]';
+絃>'[string on musical instrument]';
+組>'[class]';
+絅>'[unlined garment]';
+絆>'[loop]';
+#"絋>'[................................]'",
+経>'[classic works]';
+絎>'[baste]';
+絏>'[rope]';
+結>'[knot]';
+絖>'[fine silks]';
+絛>'[silk braid]';
+絞>'[twist]';
+絡>'[enmesh]';
+絢>'[variegated]';
+絣>'[to baste for sewing]';
+給>'[give]';
+絨>'[silk]';
+絮>'[waste cotton]';
+統>'[govern]';
+絲>'[silk]';
+絳>'[deep red]';
+絵>'[draw]';
+絶>'[cut]';
+絹>'[kind of thick stiff silk]';
+#"絽>'[................................]'",
+綉>'[embroider]';
+綏>'[soothe]';
+經>'[classic works]';
+継>'[continue]';
+続>'[continue]';
+#"綛>'[................................]'",
+綜>'[arrange threads for weaving]';
+#"綟>'[................................]'",
+綢>'[silk cloth]';
+綣>'[affectionate]';
+綫>'[line]';
+綬>'[silk ribbon attached as a seal]';
+維>'[maintain]';
+綮>'[embroidered banner]';
+綯>'[braid]';
+綰>'[to string together]';
+綱>'[heavy rope]';
+網>'[net]';
+綴>'[patch together]';
+綵>'[varicolored silk]';
+綸>'[green silk thread or tassel]';
+綺>'[fine thin silk]';
+綻>'[ripped seam]';
+綽>'[graceful]';
+綾>'[thin silk]';
+綿>'[cotton wad]';
+緇>'[black silk]';
+緊>'[tense]';
+緋>'[scarlet]';
+総>'[collect]';
+緑>'[green]';
+緒>'[end of thread]';
+#"緕>'[................................]'",
+緘>'[seal]';
+線>'[thread]';
+緜>'[cotton wad]';
+緝>'[to sew in close stitches]';
+緞>'[satin]';
+締>'[tie]';
+緡>'[fishing-line]';
+緤>'[cord]';
+編>'[knit]';
+緩>'[slow]';
+緬>'[distant]';
+緯>'[woof]';
+緲>'[indistinct]';
+練>'[practice]';
+緻>'[delicate]';
+縁>'[hem]';
+縄>'[rope]';
+#"縄>'[rope]'",
+縉>'[red silk]';
+縊>'[hang]';
+縋>'[climd down rope]';
+#"縑>'[fine silk]'",
+縛>'[to tie]';
+縞>'[white raw silk]';
+縟>'[decorative]';
+縡>'[matter]';
+縢>'[bind]';
+縣>'[county]';
+縦>'[indulge in]';
+縫>'[sew]';
+縮>'[contract]';
+縱>'[indulge in]';
+縲>'[chain or rope used bind criminal]';
+縵>'[plain silk]';
+縷>'[thread]';
+縹>'[light blue silk]';
+#"縺>'[................................]'",
+縻>'[halter for ox]';
+總>'[collect]';
+績>'[spin]';
+繁>'[complicated]';
+繃>'[bind]';
+繆>'[wind around]';
+繊>'[fine]';
+繋>'[attach]';
+繍>'[embroider]';
+織>'[weave]';
+繕>'[repair]';
+繖>'[umbrella]';
+繙>'[interpret]';
+繚>'[wind round]';
+#"繝>'[................................]'",
+繞>'[entwine]';
+繦>'[string of copper coins]';
+#"繧>'[................................]'",
+繩>'[rope]';
+繪>'[draw]';
+繭>'[cocoon]';
+繰>'[to reel silk from cocoons]';
+繹>'[unravel or unreel silk]';
+繻>'[fine silk guaze]';
+繼>'[continue]';
+繽>'[flourishing]';
+#"繿>'[................................]'",
+纂>'[edit]';
+#"纃>'[................................]'",
+纈>'[patterned silk]';
+#"纉>'[................................]'",
+續>'[continue]';
+纎>'[fine]';
+纏>'[wrap]';
+#"纐>'[................................]'",
+纒>'[wrap]';
+纓>'[chin strap]';
+纔>'[talent]';
+纖>'[fine]';
+纛>'[a banner]';
+纜>'[hawser]';
+缶>'[earthen crock or jar]';
+缸>'[earthen jug]';
+缺>'[be short of]';
+罅>'[crack]';
+罌>'[long necked jar or bottle]';
+罍>'[large earthenware wine jar]';
+罎>'[an earthenware jar]';
+罐>'[jar]';
+网>'[net]';
+罔>'[net]';
+罕>'[rare]';
+罘>'[screen used in ancient times]';
+罟>'[net]';
+罠>'[animal trap]';
+#"罧>'[................................]'",
+罨>'[medical compress]';
+罩>'[basket for catching fish]';
+罪>'[crime]';
+罫>'[hinder]';
+置>'[place]';
+罰>'[penalty]';
+署>'[public office]';
+罵>'[accuse]';
+罷>'[cease]';
+罸>'[penalty]';
+罹>'[sorrow]';
+#"羂>'[................................]'",
+羃>'[cover-cloth]';
+羅>'[net for catching birds]';
+羆>'[brown bear]';
+羇>'[inn]';
+羈>'[halter]';
+羊>'[sheep]';
+羌>'[qiang nationality]';
+美>'[beautiful]';
+羔>'[lamb]';
+羚>'[species of antelope]';
+羝>'[ram]';
+羞>'[disgrace]';
+羣>'[group]';
+群>'[group]';
+羨>'[envy]';
+義>'[right conduct]';
+羮>'[soup]';
+羯>'[wether]';
+羲>'[ancient emperor]';
+羶>'[rank odor]';
+羸>'[weak]';
+羹>'[soup]';
+羽>'[feather]';
+翁>'[old man]';
+翅>'[wings]';
+翆>'[color green]';
+翊>'[flying]';
+翌>'[bright]';
+習>'[practice]';
+翔>'[soar]';
+翕>'[agree]';
+翠>'[color green]';
+翡>'[kingfisher]';
+翦>'[scissors]';
+翩>'[fly]';
+翫>'[careless]';
+翰>'[writing brush]';
+翳>'[shade]';
+翹>'[turn up]';
+翻>'[flip over]';
+翼>'[wings]';
+耀>'[shine]';
+老>'[old]';
+考>'[examine]';
+耄>'[elderly person]';
+者>'[that which]';
+耆>'[man of sixty]';
+耋>'[aged]';
+而>'[and]';
+耐>'[endure]';
+耒>'[handle of plow]';
+耕>'[plow]';
+耗>'[consume]';
+耘>'[weed]';
+耙>'[rake]';
+耜>'[spade-shaped tool]';
+耡>'[hoe]';
+耨>'[hoe]';
+耳>'[ear]';
+耶>'[used in transliteration]';
+耻>'[shame]';
+耽>'[indulge in]';
+耿>'[bright]';
+聆>'[listen]';
+聊>'[somewhat]';
+聒>'[clamor]';
+聖>'[holy]';
+聘>'[engage]';
+聚>'[assemble]';
+聞>'[hear]';
+聟>'[son-in-law]';
+聡>'[intelligent]';
+#"聢>'[................................]'",
+聨>'[connect]';
+聯>'[connect]';
+聰>'[intelligent]';
+聲>'[sound]';
+聳>'[urge on]';
+聴>'[hear]';
+聶>'[whisper]';
+職>'[duty]';
+聹>'[earwax]';
+聽>'[hear]';
+聾>'[deaf]';
+聿>'[writing brush]';
+肄>'[learn]';
+肅>'[pay respects]';
+肆>'[indulge]';
+肇>'[begin]';
+肉>'[flesh]';
+肋>'[ribs]';
+肌>'[muscle tissue]';
+肓>'[region between heart and diaphragm]';
+肖>'[look like]';
+肘>'[elbow]';
+肚>'[belly]';
+肛>'[anus]';
+肝>'[liver]';
+股>'[thighs]';
+肢>'[human limbs]';
+肥>'[fat]';
+肩>'[shoulders]';
+肪>'[animal fat]';
+肬>'[wart]';
+肭>'[fat]';
+肯>'[willing]';
+肱>'[forearm]';
+育>'[produce]';
+肴>'[cooked or prepared meat]';
+肺>'[lungs]';
+胃>'[stomach]';
+胄>'[helmet]';
+胆>'[gall bladder]';
+背>'[back]';
+胎>'[unborn child]';
+胖>'[fat]';
+胙>'[food offered in sacrificial serv]';
+胚>'[embryo]';
+胛>'[the shoulder]';
+胝>'[callous]';
+胞>'[womb]';
+胡>'[recklessly]';
+胤>'[heir]';
+胥>'[all]';
+胯>'[pelvis]';
+胱>'[bladder]';
+胴>'[the large intestine]';
+胸>'[breast]';
+胼>'[callus]';
+能>'[be able]';
+脂>'[fat]';
+脅>'[ribs]';
+脆>'[crisp]';
+脇>'[ribs]';
+脈>'[blood vessels]';
+脉>'[blood vessels]';
+脊>'[spine]';
+脚>'[leg]';
+脛>'[shinbone]';
+脣>'[lips]';
+脩>'[dried meat (used as teachers payment in ancient times)]';
+脯>'[dried meat]';
+脱>'[take off]';
+脳>'[brain]';
+脹>'[swell]';
+脾>'[spleen]';
+腆>'[prosperous]';
+腋>'[armpit]';
+腎>'[kidneys]';
+腐>'[rot]';
+腑>'[bowels]';
+腓>'[calf]';
+腔>'[chest cavity]';
+腕>'[wrist]';
+腟>'[vagina]';
+腥>'[raw meat]';
+腦>'[brain]';
+腫>'[swell]';
+腮>'[lower part of face]';
+腰>'[waist]';
+腱>'[tendons]';
+腴>'[fat]';
+腸>'[intestines]';
+腹>'[stomach]';
+腺>'[gland]';
+腿>'[legs]';
+膀>'[upper arm]';
+膂>'[backbone]';
+膃>'[fat]';
+膈>'[diaphragm]';
+膊>'[shoulders]';
+膏>'[grease]';
+膓>'[intestines]';
+膕>'[hollow]';
+膚>'[skin]';
+膜>'[membrane]';
+膝>'[knee]';
+膠>'[glue]';
+膣>'[vagina]';
+#"膤>'[................................]'",
+膨>'[swell]';
+膩>'[greasy]';
+膰>'[cook meat for sacrifice or offer]';
+膳>'[meals]';
+膵>'[pancreas]';
+#"膸>'[................................]'",
+膺>'[breast]';
+膽>'[gall bladder]';
+膾>'[minced meat or fish]';
+膿>'[pus]';
+臀>'[buttocks]';
+臂>'[arm]';
+臆>'[chest]';
+臈>'[year end sacrifice]';
+臉>'[face]';
+臍>'[abdominal area of crab]';
+臑>'[soft]';
+臓>'[internal organs]';
+臘>'[year end sacrifice]';
+臙>'[rouge]';
+臚>'[arrange in order]';
+臟>'[internal organs]';
+臠>'[small lump of meat]';
+臣>'[minister]';
+臥>'[lie down]';
+臧>'[good]';
+臨>'[draw near]';
+自>'[self]';
+臭>'[smell]';
+至>'[reach]';
+致>'[send]';
+臺>'[tower]';
+臻>'[reach]';
+臼>'[mortar]';
+臾>'[moment]';
+舁>'[carry on one''s shoulder]';
+舂>'[grind in mortar]';
+舅>'[mother''s brother]';
+與>'[and]';
+興>'[thrive]';
+舉>'[raise]';
+舊>'[old]';
+舌>'[tongue]';
+舍>'[house]';
+舎>'[house]';
+舐>'[lick with tongue]';
+舒>'[open up]';
+舖>'[store]';
+舗>'[store]';
+舘>'[a mansion]';
+舛>'[oppose]';
+舜>'[legendary ruler]';
+舞>'[dance]';
+舟>'[boat]';
+舩>'[boat]';
+航>'[sail]';
+舫>'[fancy boat]';
+般>'[sort]';
+舮>'[bow or prow of boat]';
+舳>'[stern of ship]';
+舵>'[rudder]';
+舶>'[large]';
+舷>'[sides of boat]';
+舸>'[large boat]';
+船>'[ship]';
+艀>'[[not found in dictionary]]';
+艇>'[small boat]';
+艘>'[counter for ships]';
+艙>'[hold of ship]';
+艚>'[ship]';
+#"艝>'[................................]'",
+艟>'[ancient warship]';
+艢>'[a mast]';
+艤>'[to moor a boat to the bank]';
+艦>'[warship]';
+艨>'[long and narrow war-boat]';
+艪>'[oar]';
+艫>'[bow or prow of boat]';
+艮>'[seventh of eight diagrams]';
+良>'[good]';
+艱>'[difficult]';
+色>'[color]';
+艶>'[beautiful]';
+艷>'[beautiful]';
+艸>'[grass]';
+艾>'[artemisia]';
+芋>'[taro]';
+芍>'[peony]';
+芒>'[miscanthus sinensis]';
+芙>'[hibiscus]';
+芝>'[purplish mushroom thought promot]';
+芟>'[mow]';
+芥>'[mustard plant]';
+芦>'[rushes]';
+芫>'[daphne genkwa]';
+芬>'[fragrance]';
+芭>'[plantain or banana palm]';
+芯>'[pith from rush (juncus effusus)]';
+花>'[flower]';
+芳>'[fragrant]';
+芸>'[rue]';
+芹>'[celery]';
+芻>'[mow]';
+芽>'[bud]';
+苅>'[cut off]';
+苑>'[pasture]';
+苒>'[lush]';
+苓>'[fungus]';
+苔>'[moss]';
+苗>'[sprouts]';
+苙>'[pigsty]';
+苛>'[small]';
+苜>'[clover]';
+苞>'[variety of rush]';
+苟>'[careless]';
+苡>'[barley]';
+苣>'[kind of lettuce]';
+若>'[if]';
+苦>'[bitter]';
+苧>'[china grass]';
+苫>'[rush or straw matting]';
+英>'[petal]';
+#"苳>'[................................]'",
+苴>'[sackcloth]';
+苹>'[artemisia]';
+苺>'[berries]';
+苻>'[kind of herb]';
+茂>'[thick]';
+范>'[surname]';
+茄>'[eggplant]';
+茅>'[reeds]';
+茆>'[species of grass]';
+茉>'[white jasmine]';
+茎>'[stem]';
+茖>'[allium victorialis]';
+茗>'[tea]';
+#"茗>'[tea]'",
+茜>'[madder]';
+#"茣>'[................................]'",
+茨>'[caltrop]';
+茫>'[vast]';
+茯>'[china root]';
+茱>'[dogwood]';
+茲>'[now]';
+茴>'[fennel]';
+茵>'[cushion]';
+茶>'[tea]';
+茸>'[soft]';
+茹>'[roots]';
+荀>'[surname]';
+荅>'[answer: small bean]';
+草>'[grass]';
+荊>'[thorns]';
+荏>'[beans]';
+荐>'[repeat]';
+荒>'[wasteland]';
+荘>'[village]';
+荳>'[beans]';
+#"荵>'[................................]'",
+荷>'[lotus]';
+荻>'[reed]';
+荼>'[bitter vegetable]';
+莅>'[attend]';
+#"莇>'[................................]'",
+莉>'[white jasmine]';
+莊>'[village]';
+莎>'[kind of sedge grass]';
+莓>'[moss]';
+莖>'[stem]';
+莚>'[bamboo mat]';
+莞>'[smiling]';
+#"莟>'[................................]'",
+莠>'[weeds]';
+莢>'[pods of leguminous plants]';
+莨>'[herb]';
+莪>'[artemisia]';
+莫>'[do not]';
+莱>'[goosefoot]';
+莵>'[dodder]';
+莽>'[thicket]';
+菁>'[flower of leek family]';
+菅>'[coarse grass]';
+菊>'[chrysanthemum]';
+菌>'[mushroom]';
+菎>'[beautiful jade]';
+菓>'[fruits]';
+菖>'[iris]';
+菘>'[celery]';
+菜>'[vegetables]';
+菟>'[dodder]';
+菠>'[spinach and similar greens]';
+菩>'[herb]';
+菫>'[celery]';
+華>'[flowery]';
+菰>'[wild rice]';
+菱>'[water-chestnut]';
+菲>'[fragrant]';
+菴>'[small buddhist monastery]';
+#"菷>'[................................]'",
+菻>'[artemisia]';
+菽>'[beans and peas]';
+萃>'[dense]';
+萄>'[grapes]';
+萇>'[averrhora carambola]';
+萋>'[luxuriant foliage]';
+萌>'[bud]';
+萍>'[duckweed]';
+萎>'[wither]';
+#"萓>'[................................]'",
+萠>'[bud]';
+#"萢>'[................................]'",
+萩>'[scandent hop]';
+#"萪>'[................................]'",
+萬>'[ten thousand]';
+萱>'[day-lily]';
+萵>'[lettuce]';
+萸>'[dogwood]';
+萼>'[calyx of flower]';
+落>'[fall]';
+葆>'[reserve]';
+葉>'[leaf]';
+#"葎>'[................................]'",
+著>'[manifest]';
+葛>'[edible bean]';
+葡>'[grapes]';
+葢>'[cover]';
+董>'[direct]';
+葦>'[reed]';
+葩>'[flowers]';
+葫>'[bottle-gourd]';
+葬>'[bury]';
+葭>'[bulrush]';
+#"葭>'[bulrush]'",
+葯>'[leaf of angelica plant]';
+葱>'[scallions]';
+葵>'[sunflower]';
+葷>'[meat diet]';
+葹>'[kind of chrysanthemum]';
+葺>'[thatch]';
+蒂>'[peduncle or stem of plants]';
+#"蒄>'[................................]'",
+蒋>'[surname]';
+蒐>'[collect]';
+蒔>'[transplant]';
+蒙>'[cover]';
+蒜>'[garlic]';
+蒟>'[betel pepper]';
+蒡>'[burdock]';
+蒭>'[to cutgrass]';
+蒲>'[type of rush]';
+蒸>'[steam]';
+蒹>'[reed]';
+蒻>'[rushes]';
+蒼>'[blue]';
+蒿>'[mugwort]';
+蓁>'[abundant]';
+蓄>'[store]';
+蓆>'[straw mat]';
+蓉>'[hibiscus]';
+蓊>'[luxuriant vegetation]';
+蓋>'[cover]';
+蓍>'[milfoil]';
+蓐>'[straw bed mat]';
+蓑>'[rain coat made of straw]';
+蓖>'[castor-oil plant]';
+#"蓙>'[................................]'",
+蓚>'[oxalic (used in compounds)]';
+蓬>'[type of raspberry]';
+蓮>'[lotus]';
+蓴>'[edible water plant]';
+蓼>'[smartweed]';
+蓿>'[clover]';
+蔀>'[screen]';
+蔆>'[water-chestnut]';
+蔑>'[disdain]';
+蔓>'[creeping plants]';
+蔔>'[radish]';
+蔕>'[peduncle or stem of plants]';
+蔗>'[sugar cane]';
+蔘>'[ginsen]';
+蔚>'[luxuriant]';
+蔟>'[frame on which silkworms spin]';
+蔡>'[surname]';
+蔦>'[parasitic plants]';
+蔬>'[vegetables]';
+蔭>'[shade]';
+蔵>'[hide]';
+蔽>'[cover]';
+#"蕀>'[................................]'",
+蕁>'[nettle]';
+蕃>'[foreign things]';
+蕈>'[mushrooms]';
+蕉>'[banana]';
+蕊>'[unopened flowers]';
+蕋>'[unopened flowers]';
+蕎>'[buckwheat]';
+蕕>'[caryopteris divaricata]';
+#"蕗>'[................................]'",
+蕘>'[fuel]';
+蕚>'[calyx of flower]';
+蕣>'[hibiscus]';
+蕨>'[pteris aquilina]';
+蕩>'[pond]';
+蕪>'[luxurious growth of weeds]';
+蕭>'[common artemisia]';
+蕷>'[yam]';
+蕾>'[buds]';
+薀>'[the hippuris or mare''s tail plant]';
+薄>'[thin]';
+薇>'[osmunda regalis]';
+薈>'[luxuriant]';
+薊>'[circium]';
+薐>'[spinach]';
+薑>'[ginger]';
+薔>'[rose]';
+薗>'[garden]';
+薙>'[weed]';
+薛>'[kind of marsh grass]';
+薜>'[evergreen shrubs]';
+薤>'[allium bakeri]';
+薦>'[offer]';
+薨>'[death of prince]';
+薩>'[transliteration of \"sat\" of boddhisattva etc.]';
+薪>'[fuel]';
+薫>'[a medicinal herb]';
+薬>'[drugs]';
+薮>'[marsh]';
+薯>'[yam]';
+薹>'[cyperus rotundus]';
+薺>'[water-chestnuts]';
+藁>'[straw]';
+藉>'[mat]';
+藍>'[blue]';
+藏>'[hide]';
+藐>'[disregard]';
+藕>'[lotus root]';
+藜>'[chenopodium album]';
+藝>'[art]';
+藤>'[rattan]';
+藥>'[drugs]';
+藩>'[fence]';
+藪>'[marsh]';
+藷>'[yam]';
+藹>'[lush]';
+藺>'[rush used in making mats]';
+藻>'[splendid]';
+藾>'[shade]';
+蘂>'[stamen or pistil]';
+蘆>'[rushes]';
+蘇>'[thyme]';
+蘊>'[collect]';
+蘋>'[apple]';
+蘓>'[thyme]';
+蘖>'[stump]';
+蘗>'[stump]';
+蘚>'[moss]';
+蘢>'[tall grass]';
+蘭>'[orchid]';
+蘯>'[to toss about]';
+#"蘯>'[to toss about]'",
+蘿>'[type of creeping plant]';
+虍>'[tiger]';
+虎>'[tiger]';
+虐>'[cruel]';
+虔>'[act with reverence]';
+處>'[place]';
+虚>'[false]';
+虜>'[capture]';
+虞>'[concerned about]';
+號>'[mark]';
+虧>'[lose]';
+虫>'[insects]';
+虱>'[louse]';
+虹>'[rainbow]';
+虻>'[horsefly]';
+蚊>'[mosquito]';
+蚋>'[gnat]';
+蚌>'[oysters]';
+蚓>'[earthworm]';
+蚕>'[silkworms]';
+蚣>'[centipede]';
+蚤>'[flea]';
+蚩>'[worm]';
+蚪>'[tadpole]';
+蚫>'[abalone]';
+蚯>'[earthworm]';
+蚰>'[millipede]';
+蚶>'[kind of clam]';
+蛄>'[mole cricket]';
+蛆>'[maggots]';
+蛇>'[snake]';
+蛉>'[dragonfly]';
+蛋>'[eggs]';
+蛍>'[glow-worm]';
+蛎>'[oyster]';
+蛔>'[tapeworm]';
+蛙>'[frog]';
+蛛>'[spider]';
+蛞>'[snail]';
+蛟>'[scaly dragon with four legs]';
+蛤>'[clam]';
+蛩>'[cricket]';
+#"蛩>'[cricket]'",
+蛭>'[leech]';
+蛮>'[barbarians]';
+蛯>'[shrimp]';
+蛸>'[long legged spider]';
+蛹>'[chrysalis]';
+蛻>'[molt]';
+蛾>'[moth]';
+蜀>'[name of an ancient state]';
+蜂>'[bee]';
+蜃>'[marine monster which can change its shape]';
+蜆>'[a variety of bivalves]';
+蜈>'[centipede]';
+蜉>'[mayfly]';
+蜊>'[clam]';
+蜍>'[toad]';
+蜑>'[egg]';
+蜒>'[millipede]';
+蜘>'[spider]';
+蜚>'[cockroach]';
+蜜>'[honey]';
+蜥>'[lizard]';
+蜩>'[cicada]';
+蜴>'[lizard]';
+蜷>'[creep like worm]';
+蜻>'[dragonfly]';
+蜿>'[creep]';
+蝉>'[cicada]';
+蝋>'[wax]';
+蝌>'[tadpole]';
+蝎>'[scorpion]';
+蝓>'[snail]';
+蝕>'[nibble away]';
+蝗>'[kind of locust]';
+蝙>'[bat]';
+蝟>'[hedgehog]';
+蝠>'[kind of bat]';
+蝣>'[mayfly]';
+蝦>'[shrimp]';
+蝨>'[louse]';
+#"蝨>'[louse]'",
+蝮>'[venomous snake]';
+蝴>'[butterfly]';
+蝶>'[butterfly]';
+蝸>'[snail]';
+蝿>'[flies]';
+螂>'[mantis]';
+融>'[melt]';
+螟>'[kind of caterpillar]';
+螢>'[glow-worm]';
+螫>'[poison]';
+螯>'[nippers]';
+螳>'[mantis]';
+螺>'[spiral shell]';
+螻>'[gryllotalpa africana]';
+螽>'[katydid]';
+蟀>'[cricket]';
+蟄>'[to hibernate]';
+蟆>'[frog]';
+蟇>'[frog]';
+蟋>'[cricket]';
+#"蟋>'[cricket]'",
+蟒>'[python]';
+蟠>'[coil]';
+蟯>'[worms]';
+蟲>'[worms]';
+蟶>'[razor clam]';
+蟷>'[mantis]';
+蟹>'[crab]';
+蟻>'[ants]';
+蟾>'[toad]';
+蠅>'[flies]';
+蠍>'[scorpion]';
+蠎>'[python]';
+蠏>'[crab]';
+蠑>'[lizard]';
+蠕>'[eumenes polifomis]';
+蠖>'[measuring worm]';
+蠡>'[wood-boring insect]';
+蠢>'[wriggle]';
+蠣>'[oyster]';
+蠧>'[moth]';
+蠱>'[posion]';
+蠶>'[silkworms]';
+蠹>'[moth]';
+蠻>'[barbarians]';
+血>'[blood]';
+衂>'[to be defeated]';
+衄>'[epistaxis]';
+衆>'[multitude]';
+行>'[go]';
+衍>'[overflow]';
+衒>'[brag]';
+術>'[art]';
+街>'[street]';
+衙>'[public office]';
+衛>'[guard]';
+衝>'[rush against]';
+衞>'[guard]';
+衡>'[measure]';
+衢>'[highway]';
+衣>'[clothes]';
+表>'[show]';
+衫>'[shirt]';
+衰>'[decline]';
+衲>'[mend]';
+衵>'[chemise]';
+衷>'[heart]';
+衽>'[lapel]';
+衾>'[coverlet]';
+衿>'[collar or lapel of garment]';
+袁>'[robe]';
+袂>'[sleeves]';
+袈>'[buddhist cassock]';
+袋>'[pocket]';
+袍>'[long gown]';
+袒>'[strip]';
+袖>'[sleeve]';
+袗>'[unlined garments]';
+#"袙>'[................................]'",
+袞>'[ceremonial dress worn by emperor]';
+袢>'[robe]';
+袤>'[longitude]';
+被>'[passive indicator \"by\"]';
+#"袮>'[................................]'",
+#"袰>'[................................]'",
+袱>'[piece of cloth used wrap bundles]';
+袴>'[pants]';
+袵>'[lapel]';
+袷>'[lined garment]';
+袿>'[gown]';
+裁>'[cut out]';
+裂>'[split]';
+#"裂>'[split]'",
+#"裂>'[split]'",
+装>'[dress]';
+裏>'[inside]';
+裔>'[progeny]';
+裕>'[abundant]';
+裘>'[fur garments]';
+裙>'[skirt]';
+補>'[mend]';
+裝>'[dress]';
+裟>'[a cassock or robe of a monk]';
+裡>'[inside]';
+裨>'[aid]';
+裲>'[waistcoat]';
+裳>'[clothes]';
+裴>'[surname]';
+裸>'[bare]';
+裹>'[wrap]';
+裼>'[to take off one''s top]';
+製>'[make]';
+裾>'[lapel]';
+褂>'[jacket]';
+#"褃>'[a seam in a garment]'",
+複>'[repeat]';
+褊>'[cramped]';
+褌>'[trousers]';
+褐>'[coarse woolen cloth]';
+褒>'[praise]';
+褓>'[swaddling cloth]';
+褝>'[unlined garment]';
+#"褞>'[................................]'",
+褥>'[mattress]';
+褪>'[strip]';
+褫>'[strip]';
+褶>'[pleat]';
+褸>'[lapel]';
+褻>'[dirty]';
+襁>'[swaddling clothes]';
+襃>'[commend]';
+襄>'[aid]';
+襌>'[unlined garment]';
+襍>'[mixed]';
+襖>'[outer garments]';
+襞>'[fold]';
+襟>'[lapel]';
+襠>'[crotch or seat of pants]';
+襤>'[ragged]';
+襦>'[short coat]';
+襪>'[socks]';
+襭>'[tuck up hem of garment and wrap]';
+襯>'[underwear]';
+襲>'[raid]';
+襴>'[a one piece garment]';
+#"襶>'[ignorant]'",
+襾>'[cover]';
+西>'[west(ern)]';
+要>'[necessary]';
+覃>'[reach to]';
+覆>'[cover]';
+覇>'[rule by might rather than right]';
+覈>'[investigate]';
+覊>'[halter]';
+見>'[see]';
+規>'[rules]';
+覓>'[seek]';
+視>'[look at]';
+覗>'[peek]';
+覘>'[peek]';
+覚>'[wake up from sleep]';
+覡>'[wizard]';
+覦>'[desire strongly]';
+覧>'[look at]';
+覩>'[see]';
+親>'[relatives]';
+覬>'[covet]';
+覯>'[meet or see unexpectedly]';
+覲>'[have imperial audience]';
+観>'[see]';
+覺>'[wake up from sleep]';
+覽>'[look at]';
+覿>'[see]';
+觀>'[see]';
+角>'[horn]';
+觚>'[jug]';
+觜>'[beak]';
+觝>'[gore]';
+解>'[loosen]';
+触>'[butt]';
+觧>'[loosen]';
+觴>'[wine vessel]';
+觸>'[touch]';
+言>'[words]';
+訂>'[draw up agreement]';
+訃>'[obituary]';
+計>'[plan]';
+訊>'[inquire]';
+訌>'[confusion]';
+討>'[ask for]';
+訐>'[expose other''s secrets]';
+訓>'[teach]';
+訖>'[finish]';
+託>'[entrust]';
+記>'[record]';
+訛>'[swindle]';
+訝>'[express surprise]';
+訟>'[accuse]';
+訣>'[take leave of]';
+訥>'[slow of speech]';
+訪>'[visit]';
+設>'[build]';
+許>'[allow]';
+訳>'[translate]';
+訴>'[accuse]';
+訶>'[scold loudly]';
+診>'[examine patient]';
+註>'[explain]';
+証>'[prove]';
+詁>'[exegesis]';
+詆>'[slander]';
+詈>'[scold]';
+詐>'[cheat]';
+詑>'[cheat]';
+詒>'[bequeath]';
+詔>'[decree]';
+評>'[appraise]';
+詛>'[curse]';
+詞>'[words]';
+詠>'[sing]';
+詢>'[inquire into]';
+詣>'[reach]';
+試>'[test]';
+詩>'[poetry]';
+詫>'[be surprised]';
+詬>'[abuse]';
+詭>'[deceive]';
+詮>'[explain]';
+詰>'[question]';
+話>'[speech]';
+該>'[should]';
+詳>'[detailed]';
+詼>'[tease]';
+誂>'[tempt]';
+誄>'[eulogize]';
+誅>'[execute]';
+誇>'[exaggerate]';
+誉>'[fame]';
+誌>'[write down]';
+認>'[recognize]';
+誑>'[deceive]';
+誓>'[swear]';
+誕>'[bear children]';
+誘>'[persuade]';
+誚>'[criticize]';
+語>'[language]';
+誠>'[sincere]';
+誡>'[warn]';
+誣>'[make false accusation]';
+誤>'[err]';
+誥>'[inform]';
+誦>'[recite]';
+誨>'[teach]';
+説>'[speak]';
+読>'[read]';
+誰>'[who? whom? whose? anyone?]';
+課>'[lesson]';
+誹>'[slander]';
+誼>'[friendship]';
+調>'[transfer]';
+諂>'[flatter]';
+諄>'[patient]';
+談>'[talk]';
+請>'[ask]';
+諌>'[remonstrate]';
+諍>'[to expostulate]';
+諏>'[consult]';
+諒>'[excuse]';
+論>'[debate]';
+諚>'[(kokuji) command]';
+諛>'[flatter]';
+諜>'[intelligence report]';
+諞>'[brag]';
+諠>'[noisy]';
+諡>'[posthumous name]';
+諢>'[jokes]';
+諤>'[honest speech]';
+諦>'[careful]';
+諧>'[harmonize]';
+諫>'[remonstrate]';
+諭>'[proclaim]';
+諮>'[consult]';
+諱>'[conceal]';
+諳>'[versed in]';
+諷>'[recite]';
+諸>'[several]';
+諺>'[proverb]';
+諾>'[promise]';
+謀>'[plan]';
+謁>'[visit]';
+謂>'[say]';
+謄>'[copy]';
+謇>'[stutter]';
+謌>'[slander]';
+謎>'[riddle]';
+謐>'[calm]';
+謔>'[jeer]';
+謖>'[rise up]';
+謗>'[slander]';
+謙>'[humble]';
+謚>'[to confer posthumous titles]';
+講>'[explain]';
+謝>'[thank]';
+謠>'[sing]';
+謡>'[sing]';
+謦>'[t speak softly]';
+謨>'[scheme]';
+謫>'[charge]';
+謬>'[error]';
+謳>'[sing]';
+謹>'[prudent]';
+謾>'[deceive]';
+譁>'[noise]';
+證>'[proof]';
+譌>'[false]';
+譎>'[cunning]';
+譏>'[ridicule]';
+譖>'[to slander]';
+識>'[recognize]';
+譚>'[surname]';
+譛>'[to slander]';
+譜>'[register]';
+譟>'[clamor]';
+警>'[guard]';
+譫>'[talkative]';
+譬>'[metaphor]';
+譯>'[translate]';
+議>'[consult]';
+#"譱>'[................................]'",
+譲>'[allow]';
+譴>'[reprimand]';
+護>'[protect]';
+譽>'[fame]';
+讀>'[read]';
+讃>'[praise]';
+變>'[change]';
+讌>'[feast]';
+讎>'[enemy]';
+讐>'[enemy]';
+讒>'[slander]';
+讓>'[allow]';
+讖>'[prophecy]';
+讙>'[cheer]';
+讚>'[praise]';
+谷>'[valley]';
+谺>'[the mouth of a valley]';
+谿>'[valley]';
+豁>'[open up]';
+豆>'[beans]';
+豈>'[how? what?]';
+豊>'[abundant]';
+豌>'[peas]';
+豎>'[vertical]';
+豐>'[abundant]';
+豕>'[pigs]';
+豚>'[small pig]';
+象>'[elephant]';
+豢>'[domestic animals]';
+豪>'[brave]';
+豫>'[relaxed]';
+豬>'[pig]';
+豸>'[radical 153]';
+豹>'[leopard]';
+豺>'[wolf]';
+豼>'[fox]';
+貂>'[marten]';
+貅>'[brave]';
+貉>'[badger]';
+貊>'[leopard]';
+貌>'[countenance]';
+貍>'[a fox-like animal]';
+貎>'[lion]';
+貔>'[fox]';
+貘>'[the panther]';
+貝>'[sea shell]';
+貞>'[virtuous]';
+負>'[load]';
+財>'[wealth]';
+貢>'[offer tribute]';
+貧>'[poor]';
+貨>'[goods]';
+販>'[peddler]';
+貪>'[greedy]';
+貫>'[string of 1000 coins]';
+責>'[one''s responsibility]';
+貭>'[matter]';
+貮>'[number two]';
+貯>'[store]';
+貰>'[borrow]';
+貲>'[property]';
+貳>'[number two]';
+貴>'[expensive]';
+貶>'[decrease]';
+買>'[buy]';
+貸>'[lend]';
+費>'[expenses]';
+貼>'[paste to]';
+貽>'[give to]';
+貿>'[trade]';
+賀>'[congratulate]';
+賁>'[forge ahead]';
+賂>'[bribe]';
+賃>'[rent]';
+賄>'[bribe]';
+資>'[property]';
+賈>'[surname]';
+賊>'[thief]';
+賍>'[booty]';
+賎>'[mean]';
+賑>'[relieve]';
+賓>'[guest]';
+賚>'[give]';
+賛>'[help]';
+賜>'[give]';
+賞>'[reward]';
+賠>'[indemnify]';
+賢>'[virtuous]';
+賣>'[sell]';
+賤>'[mean]';
+賦>'[tax]';
+質>'[matter]';
+賭>'[bet]';
+賺>'[make money]';
+賻>'[gift of money help pay funeral]';
+購>'[buy]';
+賽>'[compete]';
+贄>'[gift superior]';
+贅>'[unnecessary]';
+贇>'[affable]';
+贈>'[give present]';
+贊>'[help]';
+贋>'[false]';
+贍>'[support]';
+贏>'[win]';
+贐>'[farewell present]';
+贓>'[booty]';
+贔>'[strong]';
+贖>'[buy]';
+赤>'[red]';
+赦>'[forgive]';
+赧>'[blush]';
+赫>'[bright]';
+赭>'[reddish brown]';
+走>'[walk]';
+赱>'[walk]';
+赳>'[grand]';
+赴>'[go to]';
+起>'[rise]';
+趁>'[take advantage of]';
+超>'[jump over]';
+越>'[exceed]';
+趙>'[surname]';
+趣>'[what attracts one''s attention]';
+趨>'[hasten]';
+足>'[foot]';
+趺>'[sit cross-legged]';
+趾>'[toe]';
+跂>'[creeping]';
+跋>'[go by foot]';
+跌>'[stumble]';
+跏>'[sit cross-legged]';
+跖>'[sole (of the foot)]';
+跚>'[stagger]';
+跛>'[lame]';
+距>'[distance]';
+跟>'[heel]';
+跡>'[search]';
+跣>'[bare footed]';
+跨>'[straddle]';
+跪>'[kneel]';
+跫>'[sound of footsteps]';
+路>'[road]';
+跳>'[jump]';
+践>'[trample]';
+跼>'[bent]';
+#"跿>'[................................]'",
+踈>'[neglect]';
+踉>'[hop]';
+踊>'[leap]';
+踏>'[step on]';
+踐>'[trample]';
+踝>'[ankle]';
+踞>'[crouch]';
+踟>'[hesitate]';
+踪>'[footprints]';
+踰>'[exceed]';
+踴>'[leap]';
+踵>'[heel]';
+蹂>'[trample under foot]';
+蹄>'[hoof]';
+蹇>'[lame]';
+蹈>'[stamp feet]';
+蹉>'[error]';
+蹊>'[footpath]';
+蹌>'[walk rapidly]';
+蹐>'[take short steps]';
+蹕>'[clear way]';
+蹙>'[urgent]';
+蹟>'[trace]';
+蹠>'[step on]';
+蹣>'[to jump over]';
+蹤>'[footprints]';
+蹲>'[squat]';
+蹴>'[kick]';
+蹶>'[stumble]';
+蹼>'[webbed feet of waterfowl]';
+躁>'[tense]';
+躄>'[cripple]';
+躅>'[walk carefully]';
+躇>'[hesitate]';
+躊>'[hesitate]';
+躋>'[ascend]';
+躍>'[skip]';
+躑>'[waver]';
+躓>'[stumble]';
+躔>'[follow in]';
+躙>'[trample down]';
+躡>'[tread]';
+躪>'[trample down]';
+身>'[body]';
+躬>'[body]';
+躯>'[body]';
+躰>'[body]';
+躱>'[hide]';
+#"躺>'[lie down]'",
+#"軄>'[to govern]'",
+軆>'[body]';
+#"軆>'[body]'",
+車>'[cart]';
+軋>'[crush by weight]';
+軌>'[track]';
+軍>'[army]';
+軒>'[carriage]';
+軛>'[yoke]';
+軟>'[soft]';
+転>'[shift]';
+軣>'[rumble]';
+軫>'[cross board at rear of carriage]';
+軸>'[axle]';
+軻>'[axle]';
+軼>'[rush forth]';
+軽>'[light]';
+軾>'[horizontal wooden bar in front of a sedan chair]';
+較>'[compare]';
+輅>'[chariot]';
+載>'[load]';
+輊>'[low rear portion of cart]';
+輌>'[numerary adjunct for vehicles]';
+輒>'[sides of chariot where weapons]';
+輓>'[mourn]';
+輔>'[cheek bone]';
+輕>'[light]';
+輙>'[sides of chariot were weapons]';
+輛>'[numerary adjunct for vehicles]';
+輜>'[supply cart]';
+輝>'[brightness]';
+輟>'[stop]';
+輦>'[hand-cart]';
+輩>'[generation]';
+輪>'[wheel]';
+輯>'[gather up]';
+輳>'[hubs of wheel]';
+輸>'[transport]';
+輹>'[two pieces of wood underneath ca]';
+輻>'[spokes of wheel]';
+輾>'[turn over]';
+輿>'[cart]';
+轂>'[hub of wheel]';
+轄>'[linchpin of wheel]';
+轅>'[axle]';
+轆>'[windlass]';
+轉>'[shift]';
+轌>'[sled]';
+轍>'[wagon ruts]';
+轎>'[sedan-chair]';
+轗>'[fail]';
+轜>'[hearse]';
+轟>'[rumble]';
+轡>'[bridle of horse]';
+轢>'[run over something with vehicle]';
+#"轣>'[................................]'",
+轤>'[pulley]';
+辛>'[bitter]';
+辜>'[crime]';
+辞>'[words]';
+辟>'[law]';
+辣>'[peppery]';
+辧>'[manage]';
+辨>'[distinguish]';
+辭>'[words]';
+辮>'[braid]';
+辯>'[dispute]';
+辰>'[early morning]';
+辱>'[humiliate]';
+農>'[agriculture]';
+辷>'[smooth]';
+辺>'[edge]';
+辻>'[crossroads]';
+込>'[crowd into]';
+辿>'[follow]';
+迂>'[doctrinaire]';
+迄>'[extend]';
+迅>'[quick]';
+迎>'[receive]';
+近>'[near]';
+返>'[return]';
+迚>'[very]';
+迢>'[far]';
+迥>'[distant]';
+迦>'[character for transliteration]';
+迩>'[be near]';
+迪>'[enlighten]';
+迫>'[coerce]';
+迭>'[repeatedly]';
+迯>'[escape]';
+述>'[narrate]';
+迴>'[revolve]';
+迷>'[bewitch]';
+迸>'[gush out]';
+迹>'[traces]';
+迺>'[then]';
+追>'[pursue]';
+退>'[step back]';
+送>'[see off]';
+逃>'[escape]';
+逅>'[meet unexpectedly]';
+逆>'[disobey]';
+逋>'[flee]';
+逍>'[ramble]';
+逎>'[strong]';
+透>'[penetrate]';
+逐>'[chase]';
+逑>'[collect]';
+逓>'[hand over]';
+途>'[way]';
+逕>'[pass by]';
+逖>'[far]';
+逗>'[tempt]';
+這>'[this]';
+通>'[pass through]';
+逝>'[pass away]';
+逞>'[indulge oneself]';
+速>'[quick]';
+造>'[construct]';
+逡>'[retreat]';
+逢>'[come upon]';
+連>'[join]';
+#"逦>'[meandering]'",
+逮>'[seize]';
+週>'[week]';
+進>'[advance]';
+逵>'[thoroughfare]';
+逶>'[winding]';
+逸>'[flee]';
+逹>'[arrive at]';
+逼>'[compel]';
+逾>'[go over]';
+遁>'[hide]';
+遂>'[comply with]';
+遅>'[late]';
+遇>'[meet]';
+遉>'[spy]';
+遊>'[wander]';
+運>'[luck]';
+遍>'[everywhere]';
+過>'[pass]';
+遏>'[stop]';
+遐>'[afar]';
+遑>'[leisure]';
+遒>'[strong]';
+道>'[path]';
+達>'[arrive at]';
+違>'[disobey]';
+#"違>'[disobey]'",
+遘>'[to meet]';
+遙>'[far away]';
+遜>'[humble]';
+遞>'[hand over]';
+遠>'[distant]';
+遡>'[go upstream]';
+遣>'[send]';
+遥>'[far away]';
+遨>'[ramble]';
+適>'[match]';
+遭>'[come across]';
+遮>'[cover]';
+遯>'[deceive]';
+遲>'[late]';
+遵>'[obey]';
+遶>'[entwine]';
+遷>'[move]';
+選>'[choose]';
+遺>'[lose]';
+遼>'[distant]';
+遽>'[suddenly]';
+避>'[avoid]';
+邀>'[invite]';
+邁>'[take a big stride]';
+邂>'[unexpected meeting]';
+邃>'[profound]';
+還>'[still]';
+邇>'[be near]';
+邉>'[edge]';
+邊>'[edge]';
+邏>'[patrol]';
+邑>'[area]';
+那>'[that]';
+邦>'[nation]';
+邨>'[village]';
+邪>'[wrong]';
+邯>'[city in hebei province]';
+邱>'[surname]';
+邵>'[surname]';
+邸>'[official residence]';
+郁>'[sweet smelling]';
+郊>'[suburbs]';
+郎>'[gentleman]';
+郛>'[outer walls of city]';
+郡>'[administrative division]';
+郢>'[state in today''s hubei province]';
+郤>'[crack]';
+部>'[part]';
+郭>'[outer part (of a city)]';
+郵>'[postal]';
+郷>'[country]';
+都>'[metropolis]';
+鄂>'[hubei province]';
+鄒>'[name of an ancient state]';
+鄙>'[mean]';
+鄭>'[state in today''s henan]';
+鄰>'[neighbor]';
+鄲>'[county in hebei proincev]';
+酉>'[tenth in series of twelve cyclic]';
+酊>'[drunk]';
+酋>'[chief of tribe]';
+酌>'[serve wine]';
+配>'[match]';
+酎>'[double-fermented wine]';
+酒>'[wine]';
+酔>'[intoxicated]';
+酖>'[wine with bird poison added]';
+#"酘>'[................................]'",
+酢>'[toast one''s host with wine]';
+酣>'[enjoy intoxicants]';
+酥>'[butter]';
+酩>'[drunk]';
+酪>'[cream]';
+酬>'[toast]';
+酲>'[hangover]';
+#"酲>'[hangover]'",
+酵>'[yeast]';
+酷>'[strong]';
+酸>'[tart]';
+#"醁>'[a kind of green-colored wine]'",
+醇>'[rich]';
+醉>'[intoxicated]';
+醋>'[vinegar]';
+醍>'[essential oil of butter]';
+醐>'[purest cream]';
+醒>'[wake up]';
+醗>'[to brew for the second time]';
+醜>'[ugly looking]';
+醢>'[minced pickled meat]';
+醤>'[any jam-like or paste-like food]';
+醪>'[unclear wine]';
+醫>'[cure]';
+醯>'[vinegar]';
+醴>'[sweet wine]';
+醵>'[contribute for drinks]';
+醸>'[brew]';
+醺>'[get drunk]';
+釀>'[brew]';
+釁>'[smear with blood in sacrifice]';
+釆>'[distinguish]';
+采>'[collect]';
+釈>'[interprete]';
+釉>'[glaze]';
+釋>'[interprete]';
+里>'[unit of distance]';
+重>'[heavy]';
+野>'[open country]';
+量>'[measure]';
+釐>'[manage]';
+金>'[gold]';
+釖>'[knife]';
+釘>'[nail]';
+#"釛>'[................................]'",
+釜>'[cauldron]';
+針>'[needle]';
+#"釟>'[................................]'",
+釡>'[cauldron]';
+釣>'[fish]';
+釦>'[button]';
+釧>'[bracelet]';
+釵>'[ornamental hairpin]';
+#"釶>'[................................]'",
+釼>'[sword]';
+#"釿>'[................................]'",
+鈍>'[blunt]';
+鈎>'[hook]';
+鈑>'[plate]';
+鈔>'[paper money]';
+鈕>'[button]';
+鈞>'[unit of measure equivalent thirt]';
+鈩>'[fireplace]';
+鈬>'[bell]';
+鈴>'[bell]';
+鈷>'[cobalt]';
+鈿>'[hairpin]';
+鉄>'[iron]';
+鉅>'[steel]';
+鉈>'[thallium]';
+鉉>'[device for carrying a tripod]';
+鉋>'[carpenter''s plane]';
+#"鉐>'[................................]'",
+鉗>'[pincers]';
+鉚>'[rivet]';
+鉛>'[lead plumbum]';
+鉞>'[broad-axe]';
+鉢>'[earthenware basin]';
+鉤>'[hook]';
+鉦>'[kind of gong used in ancient tim]';
+鉱>'[mine]';
+鉾>'[spear]';
+銀>'[silver]';
+銃>'[ancient weapon]';
+銅>'[copper]';
+銑>'[mill]';
+銓>'[weigh]';
+銕>'[iron]';
+銖>'[unit of weight]';
+銘>'[inscribe]';
+銚>'[large hoe]';
+銛>'[sharp]';
+銜>'[bit]';
+銭>'[money]';
+銷>'[melt]';
+銹>'[rust]';
+鋏>'[tongs]';
+鋒>'[point of spear]';
+鋤>'[hoe]';
+鋩>'[point of sword]';
+鋪>'[spread out]';
+鋭>'[sharp]';
+鋲>'[rivet]';
+鋳>'[melt]';
+鋸>'[a saw]';
+#"鋺>'[................................]'",
+鋼>'[steel]';
+錆>'[the color of a mineral]';
+錏>'[soft steel]';
+錐>'[gimlet]';
+錘>'[balance weight on scale]';
+錙>'[8 oz]';
+錚>'[clanging sound]';
+錠>'[spindle]';
+錢>'[money]';
+#"錣>'[................................]'",
+錦>'[brocade]';
+錨>'[anchor]';
+錫>'[tin]';
+錬>'[smelt metals]';
+錮>'[run metal into cracks]';
+錯>'[error]';
+録>'[copy]';
+#"錵>'[................................]'",
+#"錺>'[................................]'",
+#"錻>'[................................]'",
+#"鍄>'[................................]'",
+鍋>'[cooking-pot]';
+鍍>'[plate]';
+鍔>'[high]';
+#"鍖>'[................................]'",
+鍛>'[forge metal]';
+#"鍜>'[................................]'",
+鍠>'[weapon]';
+鍬>'[shovel]';
+鍮>'[brass]';
+鍵>'[door bolt]';
+鍼>'[needle]';
+鍾>'[cup]';
+鎌>'[sickle]';
+鎔>'[fuse]';
+鎖>'[lock]';
+鎗>'[rifle]';
+鎚>'[hammer]';
+鎧>'[armor]';
+鎬>'[stove]';
+鎭>'[town]';
+鎮>'[town]';
+鎰>'[measure of weight for gold]';
+#"鎹>'[................................]'",
+鏃>'[arrowhead]';
+鏈>'[chain]';
+鏐>'[pure gold]';
+鏑>'[dysprosium the barb of an arrow]';
+鏖>'[fight end]';
+鏗>'[strike]';
+鏘>'[tinkle]';
+鏝>'[trowel]';
+鏡>'[mirror]';
+鏤>'[carve]';
+鏥>'[rust]';
+鏨>'[engraving tool]';
+鐃>'[cymbals]';
+鐇>'[vanadium]';
+鐐>'[fetters]';
+鐓>'[ferrule]';
+鐔>'[dagger]';
+鐘>'[clock]';
+鐙>'[lamp]';
+鐚>'[soft steel]';
+鐡>'[iron]';
+鐫>'[engraving tool]';
+鐵>'[iron]';
+鐶>'[metal ring]';
+鐸>'[bell]';
+鐺>'[frying pan]';
+#"鑁>'[................................]'",
+鑄>'[melt]';
+鑑>'[mirror]';
+鑒>'[mirror]';
+鑓>'[spear]';
+鑚>'[drill]';
+鑛>'[mine]';
+鑞>'[solder]';
+鑠>'[melt]';
+鑢>'[file]';
+鑪>'[fireplace]';
+鑰>'[key]';
+鑵>'[jar]';
+鑷>'[tweezers]';
+鑼>'[gong]';
+鑽>'[drill]';
+鑾>'[bells hung on horse]';
+鑿>'[chisel]';
+钁>'[a mattock]';
+長>'[long]';
+門>'[gate]';
+閂>'[bolt]';
+閃>'[flash]';
+閇>'[shut]';
+閉>'[shut]';
+#"閉>'[shut]'",
+開>'[open]';
+閏>'[intercalary]';
+閑>'[fence]';
+間>'[interval]';
+閔>'[mourn]';
+#"閔>'[mourn]'",
+閘>'[sluice]';
+閙>'[quarrel]';
+閠>'[intercalary]';
+関>'[frontier pass]';
+閣>'[chamber]';
+閤>'[small side door]';
+閥>'[powerful and influential group]';
+閧>'[boisterous]';
+閨>'[small entrance]';
+閭>'[village of twenty-five families]';
+閲>'[examine]';
+閹>'[castrate]';
+閻>'[village gate]';
+閼>'[block]';
+閾>'[threshold]';
+闃>'[alone]';
+闇>'[close]';
+闊>'[broad]';
+闌>'[door screen]';
+闍>'[tower over city gate]';
+闔>'[close]';
+闕>'[watch tower]';
+闖>'[rush in]';
+闘>'[struggle]';
+關>'[frontier pass]';
+闡>'[explain]';
+闢>'[open]';
+闥>'[door]';
+阜>'[mound]';
+阡>'[footpaths between fields]';
+阨>'[in distress]';
+阪>'[hillside farmland]';
+阮>'[ancient musical instrument: surname]';
+阯>'[foundation]';
+防>'[defend]';
+阻>'[impede]';
+阿>'[prefix to name]';
+陀>'[steep bank]';
+陂>'[dam]';
+附>'[adhere to]';
+陋>'[narrow]';
+陌>'[foot path between rice fields]';
+降>'[descend]';
+#"降>'[descend]'",
+限>'[boundary]';
+陛>'[steps leading throne]';
+陜>'[narrow]';
+陝>'[mountain pass]';
+陞>'[promote]';
+陟>'[climb]';
+院>'[courtyard]';
+陣>'[column]';
+除>'[eliminate]';
+陥>'[submerge]';
+#"陥>'[submerge]'",
+陪>'[accompany]';
+陬>'[corner]';
+陰>'[\"female\" principle]';
+陲>'[frontier]';
+陳>'[exhibit]';
+陵>'[hill]';
+陶>'[pottery]';
+陷>'[submerge]';
+陸>'[land]';
+険>'[narrow pass]';
+陽>'[\"male\" principle]';
+隅>'[corner]';
+隆>'[prosperous]';
+隈>'[cove]';
+隊>'[team]';
+隋>'[Sui dynasty]';
+隍>'[dry ditch]';
+階>'[stairs]';
+随>'[follow]';
+隔>'[separate]';
+隕>'[fall]';
+隗>'[high]';
+隘>'[narrow]';
+隙>'[crack]';
+際>'[border]';
+障>'[separate]';
+#"障>'[separate]'",
+隣>'[neighbor]';
+隧>'[tunnel]';
+隨>'[follow]';
+險>'[narrow pass]';
+隰>'[low]';
+隱>'[hide]';
+隲>'[stallion]';
+隴>'[mountain located between shaanxi]';
+隶>'[subservient]';
+隷>'[be subservient to]';
+隸>'[be subservient to]';
+隹>'[bird]';
+隻>'[single]';
+隼>'[aquiline (nose)]';
+雀>'[sparrow]';
+雁>'[wild goose]';
+雄>'[male of species]';
+雅>'[elegant]';
+集>'[assemble]';
+雇>'[employ]';
+雉>'[pheasant]';
+雋>'[superior]';
+雌>'[female]';
+雍>'[harmony]';
+雎>'[osprey]';
+雑>'[mixed]';
+雕>'[engrave]';
+雖>'[although]';
+雙>'[set of two]';
+雛>'[chick]';
+雜>'[mixed]';
+離>'[leave]';
+難>'[difficult]';
+雨>'[rain]';
+雪>'[snow]';
+#"雫>'[................................]'",
+雰>'[atmosphere]';
+雲>'[clouds]';
+零>'[zero]';
+雷>'[thunder]';
+雹>'[hail]';
+電>'[electricity]';
+需>'[need]';
+霄>'[sky]';
+霆>'[a sudden peal of thunder]';
+震>'[shake]';
+霈>'[torrential rains]';
+霊>'[spirit]';
+霍>'[quickly]';
+霎>'[light rain]';
+霏>'[falling of snow and rain]';
+霑>'[be moistened]';
+霓>'[rainbow]';
+霖>'[long spell of rain]';
+霙>'[sleet]';
+霜>'[frost]';
+霞>'[rosy clouds]';
+霤>'[drip]';
+霧>'[fog]';
+霪>'[long and heavy rain]';
+霰>'[hail]';
+露>'[dew]';
+霸>'[rule by might rather than right]';
+霹>'[thunder]';
+霽>'[clear up after rain cease be ang]';
+霾>'[misty]';
+靂>'[thunderclap]';
+靄>'[cloudy sky]';
+靆>'[cloudy sky]';
+靈>'[spirit]';
+靉>'[cloudy sky]';
+青>'[blue]';
+靖>'[pacify]';
+静>'[quiet]';
+靜>'[quiet]';
+非>'[not]';
+靠>'[lean on]';
+靡>'[divide]';
+面>'[face]';
+#"靤>'[................................]'",
+靦>'[timid]';
+靨>'[dimples]';
+革>'[leather]';
+靫>'[strong and pliable]';
+靭>'[strong and pliable]';
+靱>'[strong and pliable]';
+靴>'[boots]';
+#"靹>'[................................]'",
+靺>'[stocking]';
+靼>'[tartars]';
+#"鞁>'[................................]'",
+鞄>'[to work hides]';
+鞅>'[leather strap over horse''s neck]';
+#"鞅>'[leather strap over horse''s neck]'",
+鞋>'[shoes]';
+鞍>'[saddle]';
+鞏>'[bind]';
+#"鞏>'[bind]'",
+鞘>'[scabbard]';
+#"鞜>'[................................]'",
+鞠>'[bow]';
+鞣>'[tan]';
+鞦>'[leather stap]';
+鞨>'[tribe]';
+鞫>'[interrogate]';
+鞭>'[whip]';
+#"鞳>'[................................]'",
+鞴>'[saddle up horse]';
+韃>'[tatars]';
+韆>'[swing]';
+韈>'[socks]';
+韋>'[tanned leather]';
+韓>'[fence]';
+韜>'[sheath]';
+韭>'[scallion]';
+韮>'[scallion]';
+韲>'[break or smash into pieces]';
+音>'[sound]';
+韵>'[rhyme]';
+韶>'[music of emperor Shun]';
+韻>'[rhyme]';
+響>'[make sound]';
+頁>'[page]';
+頂>'[top]';
+頃>'[moment]';
+項>'[neck]';
+順>'[obey]';
+須>'[must]';
+頌>'[laud]';
+頏>'[fly down or downward]';
+預>'[prepare]';
+頑>'[obstinate]';
+頒>'[confer]';
+頓>'[pause]';
+頗>'[lean one side]';
+領>'[neck]';
+頚>'[neck]';
+頡>'[fly upward]';
+頤>'[cheeks]';
+頬>'[cheeks]';
+頭>'[head]';
+頴>'[rice tassel]';
+頷>'[chin]';
+頸>'[neck]';
+頻>'[frequently]';
+頼>'[rely]';
+頽>'[ruined]';
+顆>'[grain]';
+顋>'[lower part of face]';
+題>'[forehead]';
+額>'[forehead]';
+顎>'[jaw]';
+顏>'[face]';
+顔>'[face]';
+顕>'[manifest]';
+願>'[desire]';
+顛>'[top]';
+類>'[class]';
+顧>'[look back]';
+顫>'[shiver]';
+顯>'[manifest]';
+顰>'[frown]';
+顱>'[skull]';
+顳>'[the temporal bone]';
+顴>'[cheek bones]';
+風>'[wind]';
+#"風>'[wind]'",
+颯>'[sound of wind]';
+颱>'[taiphoon]';
+颶>'[cyclone]';
+飃>'[whirlwind]';
+飄>'[whirlwind]';
+飆>'[whirlwind]';
+飛>'[fly]';
+飜>'[flip over]';
+食>'[eat]';
+飢>'[hunger]';
+飩>'[stuffed dumplings]';
+飫>'[surfeited]';
+飭>'[order]';
+飮>'[drink]';
+飯>'[cooked rice]';
+飲>'[drink]';
+飴>'[sweet-meats]';
+飼>'[raise animals]';
+飽>'[eat heartily]';
+飾>'[decorate]';
+餃>'[stuffed dumplings]';
+餅>'[rice-cakes]';
+餉>'[rations and pay for soldiers]';
+養>'[raise]';
+餌>'[bait]';
+餐>'[eat]';
+餒>'[hungry]';
+餓>'[be hungry]';
+餔>'[eat]';
+餘>'[surplus]';
+#"餝>'[................................]'",
+餞>'[farewell party]';
+餠>'[rice-cakes]';
+餡>'[pastry filling]';
+餤>'[incite]';
+館>'[public building]';
+餬>'[porridge]';
+餮>'[a legendary animal]';
+餽>'[make present of food]';
+餾>'[distill]';
+#"饁>'[carry meal to workers in field]'",
+饅>'[steamed bread]';
+饉>'[time of famine or crop failure]';
+饋>'[offer food superior]';
+饌>'[feed]';
+饐>'[spoiled]';
+饑>'[starve]';
+饒>'[bountiful]';
+饕>'[gluttonous]';
+饗>'[host banquet]';
+首>'[head]';
+馗>'[cheekbone]';
+馘>'[cut off left ear]';
+香>'[fragrant]';
+馥>'[fragrance]';
+馨>'[fragrant]';
+馬>'[horse]';
+馭>'[drive]';
+馮>'[surname]';
+馳>'[go quickly or swiftly]';
+馴>'[tame]';
+#"馼>'[................................]'",
+駁>'[varicolored]';
+#"駃>'[gallop]'",
+駅>'[relay station]';
+駆>'[spur horse on]';
+駈>'[spur horse on]';
+駐>'[to be stationed at]';
+駑>'[tired]';
+駒>'[colt]';
+駕>'[drive]';
+駘>'[tired]';
+駛>'[sail]';
+駝>'[camel]';
+駟>'[team of four horses]';
+駢>'[team of horses]';
+駭>'[terrify]';
+駮>'[a kind of fierce animal]';
+駱>'[white horse w. black mane]';
+#"駱>'[white horse w. black mane]'",
+駸>'[galloping]';
+#"駻>'[................................]'",
+駿>'[excellent horse]';
+騁>'[gallop horse]';
+騅>'[piebald horse]';
+騎>'[ride horseback]';
+騏>'[piebald horse]';
+騒>'[harass]';
+験>'[test]';
+騙>'[swindle]';
+騨>'[dappled]';
+騫>'[raise]';
+騰>'[fly]';
+騷>'[harass]';
+騾>'[mule]';
+驀>'[suddenly]';
+驂>'[two outside ones in three horse]';
+驃>'[charger]';
+驅>'[spur horse on]';
+驍>'[excellent horse]';
+驕>'[spirited horse]';
+驗>'[test]';
+驚>'[frighten]';
+驛>'[relay station]';
+驟>'[procedure]';
+驢>'[donkey]';
+驤>'[gallop about with head uplifted]';
+驥>'[thoroughbred horse]';
+驩>'[happy]';
+驪>'[pure black horse]';
+驫>'[horses]';
+骨>'[bone]';
+骭>'[shin bone]';
+骰>'[die]';
+骸>'[skelton]';
+骼>'[bone]';
+髀>'[buttocks]';
+髄>'[bone marrow]';
+髏>'[skull]';
+髑>'[skull]';
+髓>'[bone marrow]';
+體>'[body]';
+高>'[high]';
+髞>'[high]';
+髟>'[hair]';
+髢>'[wig]';
+髣>'[similar to]';
+髦>'[flowing hair of young child]';
+髪>'[hair]';
+髫>'[children''s hair style]';
+髭>'[mustache]';
+髮>'[hair]';
+髯>'[beard]';
+#"髱>'[................................]'",
+髴>'[disheveled hair]';
+#"髷>'[................................]'",
+髻>'[hair rolled up in a bun]';
+鬆>'[lax]';
+鬘>'[beautiful hair]';
+鬚>'[beard]';
+鬟>'[dress hair in coiled knot]';
+鬢>'[hair on temples]';
+鬣>'[horse''s mane]';
+鬥>'[struggle]';
+鬧>'[quarrel]';
+鬨>'[boisterous]';
+鬩>'[feud]';
+鬪>'[struggle]';
+鬮>'[lots (to be drawn)]';
+鬯>'[sacrificial wine]';
+鬱>'[luxuriant]';
+鬲>'[type of caldron]';
+鬻>'[sell]';
+鬼>'[ghost]';
+魁>'[chief]';
+魂>'[soul]';
+魃>'[drought demon]';
+魄>'[vigor]';
+魅>'[kind of forest demon]';
+魍>'[demons]';
+魎>'[a kind of monster]';
+魏>'[kingdom of wei]';
+魑>'[a montain demon resembling tiger]';
+魔>'[demon]';
+魘>'[nightmare]';
+魚>'[fish]';
+魯>'[foolish]';
+魴>'[bream]';
+#"鮁>'[the bonito]'",
+鮎>'[sheatfish]';
+鮑>'[abalone]';
+鮒>'[carp]';
+鮓>'[minced and salted fish]';
+#"鮓>'[minced and salted fish]'",
+#"鮓>'[minced and salted fish]'",
+鮟>'[anglerfish]';
+鮠>'[a kind of shad with a head like a sturgeon]';
+#"鮨>'[................................]'",
+鮪>'[kind of sturgeon]';
+鮫>'[shark]';
+鮭>'[salmon]';
+鮮>'[fresh]';
+#"鮴>'[................................]'",
+#"鮹>'[................................]'",
+鯀>'[giant fish]';
+鯆>'[the skate or ray]';
+鯉>'[carp]';
+鯊>'[shark]';
+鯏>'[a dialect........ name of fish]';
+#"鯑>'[................................]'",
+#"鯒>'[................................]'",
+鯔>'[mullet]';
+鯖>'[mackerel]';
+鯛>'[pagrosomus major]';
+鯡>'[herring]';
+鯢>'[cryptobranchus japonicus]';
+#"鯣>'[................................]'",
+鯤>'[spawn]';
+鯨>'[whale]';
+鯰>'[sheat]';
+#"鯱>'[................................]'",
+#"鯱>'[................................]'",
+#"鯱>'[................................]'",
+#"鰄>'[................................]'",
+#"鰆>'[................................]'",
+鰈>'[flatfish]';
+鰉>'[sturgeon]';
+#"鰊>'[................................]'",
+鰌>'[loach]';
+鰍>'[loach]';
+鰐>'[alligator]';
+鰒>'[abalone]';
+鰓>'[fish gills]';
+#"鰔>'[................................]'",
+鰕>'[shrimp]';
+鰛>'[sardine]';
+#"鰡>'[................................]'",
+#"鰤>'[................................]'",
+鰥>'[huge fish]';
+鰭>'[fin]';
+鰮>'[sardine]';
+#"鰯>'[................................]'",
+#"鰯>'[................................]'",
+鰲>'[huge sea turtle]';
+鰹>'[skipjack]';
+#"鰺>'[................................]'",
+鰻>'[eel]';
+鰾>'[swimming bladder of fish]';
+#"鱆>'[................................]'",
+鱇>'[anglerfish]';
+鱈>'[codfish]';
+鱒>'[barbel]';
+鱗>'[fish scales]';
+#"鱘>'[sturgeon]'",
+鱠>'[minced fish]';
+鱧>'[snakehead]';
+#"鱶>'[................................]'",
+鱸>'[sea perch]';
+鳥>'[bird]';
+鳧>'[wild duck]';
+鳩>'[pigeon]';
+鳫>'[wild goose]';
+鳬>'[wild duck]';
+#"鳰>'[................................]'",
+鳳>'[male phoenix]';
+鳴>'[cry of bird or animal]';
+鳶>'[kite]';
+鴃>'[shrike]';
+鴆>'[bird resembling secretary falcon]';
+鴇>'[bustard]';
+鴈>'[wild goose]';
+鴉>'[crow]';
+鴎>'[seagull]';
+鴒>'[species of lark]';
+鴕>'[ostrich]';
+鴛>'[male mandarin duck]';
+鴟>'[kite]';
+鴣>'[species of taiwan pigeon]';
+鴦>'[female mandarin duck]';
+鴨>'[duck]';
+鴪>'[swoop]';
+鴫>'[a snipe]';
+鴬>'[oriole]';
+鴻>'[species of wild swan]';
+#"鴾>'[................................]'",
+鴿>'[pigeon]';
+#"鵁>'[................................]'",
+鵄>'[kite]';
+#"鵄>'[kite]'",
+#"鵄>'[kite]'",
+#"鵐>'[................................]'",
+鵑>'[cuckoo]';
+鵙>'[a shrike]';
+鵜>'[pelican]';
+鵝>'[goose]';
+鵞>'[goose]';
+鵠>'[target]';
+鵡>'[species of parrot]';
+#"鵡>'[species of parrot]'",
+鵬>'[fabulous bird of enormous size]';
+鵯>'[bird]';
+鵲>'[magpie]';
+#"鵺>'[................................]'",
+鶇>'[thrush]';
+鶉>'[quail]';
+鶏>'[chicken]';
+鶚>'[osprey]';
+#"鶤>'[................................]'",
+鶩>'[duck]';
+鶫>'[thrush]';
+鶯>'[oriole]';
+#"鶱>'[soar]'",
+鶴>'[crane]';
+鶸>'[[not found in any dictionary]]';
+鶺>'[wagtail]';
+鶻>'[a kind of pigeon]';
+鷁>'[fishhawk bow or prow]';
+鷂>'[sparrow hawk]';
+鷄>'[chicken]';
+鷆>'[bird name]';
+鷏>'[bird name]';
+鷓>'[partridge]';
+鷙>'[hawk]';
+鷦>'[wren]';
+#"鷫>'[turquoise kingfisher]'",
+鷯>'[wren]';
+鷲>'[condor]';
+鷸>'[snipe]';
+鷹>'[eagle]';
+鷺>'[heron]';
+鷽>'[oriental bullfinch]';
+鸚>'[species of parrot]';
+鸛>'[crane]';
+鸞>'[fabulous bird]';
+鹵>'[saline soil]';
+鹸>'[alkaline]';
+鹹>'[salty]';
+鹽>'[salt]';
+鹿>'[deer]';
+麁>'[rough]';
+麈>'[species of deer]';
+麋>'[elk]';
+麌>'[stag]';
+麑>'[fawn]';
+麒>'[legendary auspicious animal]';
+麓>'[foot of hill]';
+麕>'[muntjac deer]';
+麗>'[beautiful]';
+麝>'[musk deer]';
+麟>'[female of chinese unicorn]';
+麥>'[wheat]';
+麦>'[wheat]';
+麩>'[bran]';
+麪>'[flour]';
+#"麫>'[flour]'",
+麸>'[bran]';
+麹>'[yeast]';
+麺>'[flour]';
+麻>'[hemp]';
+麼>'[interrogative final particle]';
+麾>'[pennant]';
+麿>'[I]';
+黄>'[yellow]';
+黌>'[school]';
+黍>'[glutinous millet]';
+黎>'[surname]';
+黏>'[stick to]';
+黐>'[stick]';
+黒>'[black]';
+黔>'[black]';
+默>'[silent]';
+黙>'[silent]';
+黛>'[blacken eyebrows]';
+黜>'[dismiss]';
+黝>'[black]';
+點>'[dot]';
+黠>'[sly]';
+黥>'[tattooing face]';
+黨>'[political party]';
+黯>'[dark]';
+黴>'[mold]';
+黶>'[mole]';
+黷>'[dishonor]';
+黹>'[embroidery]';
+黻>'[special pattern of embroidery]';
+黼>'[embroidered official or sacrific]';
+黽>'[to strive]';
+鼇>'[huge sea turtle]';
+鼈>'[fresh water turtle]';
+鼎>'[large]';
+鼓>'[drum]';
+鼕>'[rattle of drums]';
+鼠>'[rat]';
+鼡>'[rat]';
+鼬>'[weasel]';
+鼻>'[nose]';
+鼾>'[snore loudly]';
+齊>'[even]';
+齋>'[vegetarian diet]';
+齎>'[take in both hands and offer to]';
+齏>'[break or smash into pieces]';
+齒>'[teeth]';
+齔>'[lose baby teeth and get adult te]';
+齟>'[irregular teeth]';
+齠>'[lose baby teeth and get adult teeth]';
+齡>'[age]';
+齢>'[age]';
+齣>'[act]';
+齦>'[gums]';
+齧>'[bite]';
+齪>'[narrow]';
+齬>'[uneven teeth]';
+齲>'[tooth decay]';
+齶>'[palate]';
+齷>'[narrow]';
+龍>'[dragon]';
+龕>'[niche]';
+龜>'[turtle or tortoise]';
+龝>'[autumn]';
+龠>'[flute]';
+
+# eof
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_OnRomaji.txt b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_OnRomaji.txt
new file mode 100644
index 00000000000..7435913f8ff
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/resources/Transliterator_Kanji_OnRomaji.txt
@@ -0,0 +1,6216 @@
+#--------------------------------------------------------------------
+# Copyright (c) 1999-2001, International Business Machines
+# Corporation and others. All Rights Reserved.
+#--------------------------------------------------------------------
+# Date: Tue Jan 23 12:42:03 2001
+#--------------------------------------------------------------------
+
+# Kanji-OnRomaji
+
+丁>Tei;
+七>Shichi;
+万>Man;
+丈>Jou;
+三>San;
+上>Jou;
+下>Ka;
+不>Fu;
+与>Yo;
+丐>Kai;
+丑>Chuu;
+且>Sha;
+丕>Hi;
+世>Se;
+丗>Sei;
+丘>Kyuu;
+丙>Hei;
+丞>Shou;
+両>Ryou;
+並>Hei;
+个>Ka;
+中>Chuu;
+丱>Kan;
+串>Sen;
+丶>Chu;
+丸>Gan;
+丹>Tan;
+主>Shu;
+丼>Sei;
+丿>Hetsu;
+乂>Gai;
+乃>Dai;
+久>Kyuu;
+之>Shi;
+乍>Saku;
+乎>Ko;
+乏>Bou;
+乕>Ko;
+乖>Kai;
+乗>Jou;
+乘>Jou;
+乙>Otsu;
+九>Kyuu;
+乞>Kotsu;
+也>Ya;
+乢>Gai;
+乱>Ran;
+乳>Nyuu;
+乾>Kan;
+亀>Ki;
+亂>Ran;
+亅>Ketsu;
+了>Ryou;
+予>Yo;
+争>Sou;
+亊>Ji;
+事>Ji;
+二>Ni;
+于>U;
+云>Un;
+互>Go;
+五>Go;
+井>Sei;
+亘>Sen;
+亙>Kou;
+些>Sa;
+亜>A;
+亞>A;
+亟>Kyoku;
+亠>Tou;
+亡>Bou;
+亢>Kou;
+交>Kou;
+亥>Gai;
+亦>Eki;
+亨>Kyou;
+享>Kyou;
+京>Kyou;
+亭>Tei;
+亮>Ryou;
+亰>Kei;
+亳>Haku;
+亶>Tan;
+人>Jin;
+什>Shuu;
+仁>Jin;
+仂>Roku;
+仄>Soku;
+仆>Fu;
+仇>Kyuu;
+今>Kon;
+介>Kai;
+仍>Jou;
+从>Juu;
+仏>Butsu;
+仔>Shi;
+仕>Shi;
+他>Ta;
+仗>Jou;
+付>Fu;
+仙>Sen;
+仝>Dou;
+仞>Jin;
+仟>Sen;
+代>Dai;
+令>Rei;
+以>I;
+仭>Jin;
+仮>Ka;
+仰>Gyou;
+仲>Chuu;
+件>Ken;
+价>Kai;
+任>Nin;
+企>Ki;
+伉>Kou;
+伊>I;
+伍>Go;
+伎>Ki;
+伏>Fuku;
+伐>Batsu;
+休>Kyuu;
+会>Kai;
+伜>Sai;
+伝>Den;
+伯>Haku;
+估>Ko;
+伴>Han;
+伶>Rei;
+伸>Shin;
+伺>Shi;
+似>Ji;
+伽>Kya;
+佃>Ten;
+但>Tan;
+佇>Cho;
+位>I;
+低>Tei;
+住>Juu;
+佐>Sa;
+佑>Yuu;
+体>Tei;
+何>Ka;
+佗>Ta;
+余>Yo;
+佚>Itsu;
+佛>Butsu;
+作>Saku;
+佝>Kou;
+佞>Nei;
+佩>Hai;
+佯>You;
+佰>Haku;
+佳>Ka;
+併>Hei;
+佶>Kitsu;
+佻>Chou;
+佼>Kou;
+使>Shi;
+侃>Kan;
+來>Rai;
+侈>Shi;
+例>Rei;
+侍>Ji;
+侏>Shu;
+侑>Yuu;
+侖>Ron;
+侘>Ta;
+供>Kyou;
+依>I;
+侠>Kyou;
+価>Ka;
+侫>Nei;
+侭>Jin;
+侮>Bu;
+侯>Kou;
+侵>Shin;
+侶>Ryo;
+便>Ben;
+係>Kei;
+促>Soku;
+俄>Ga;
+俊>Shun;
+俎>Sho;
+俐>Ri;
+俑>You;
+俔>Ken;
+俗>Zoku;
+俘>Fu;
+俚>Ri;
+俛>Ben;
+保>Ho;
+俟>Shi;
+信>Shin;
+修>Shuu;
+俯>Fu;
+俳>Hai;
+俵>Hyou;
+俶>Shuku;
+俸>Hou;
+俺>En;
+俾>Hi;
+倅>Sai;
+倆>Ryou;
+倉>Sou;
+個>Ko;
+倍>Bai;
+倏>Shuku;
+們>Mon;
+倒>Tou;
+倔>Kutsu;
+倖>Kou;
+候>Kou;
+倚>I;
+借>Shaku;
+倡>Shou;
+倣>Hou;
+値>Chi;
+倥>Kou;
+倦>Ken;
+倨>Kyo;
+倩>Sen;
+倪>Gei;
+倫>Rin;
+倬>Taku;
+倭>I;
+倶>Ku;
+倹>Ken;
+偃>En;
+假>Ka;
+偈>Ketsu;
+偉>I;
+偏>Hen;
+偐>Gan;
+偕>Kai;
+偖>Sha;
+做>Saku;
+停>Tei;
+健>Ken;
+偬>Sou;
+偲>Shi;
+側>Soku;
+偵>Tei;
+偶>Guu;
+偸>Chuu;
+偽>Gi;
+傀>Kai;
+傅>Fu;
+傍>Bou;
+傑>Ketsu;
+傘>San;
+備>Bi;
+傚>Kou;
+催>Sai;
+傭>You;
+傲>Gou;
+傳>Den;
+傴>U;
+債>Sai;
+傷>Shou;
+傾>Kei;
+僂>Rou;
+僅>Kin;
+僉>Sen;
+僊>Sen;
+働>Dou;
+像>Zou;
+僑>Kyou;
+僕>Boku;
+僖>Ki;
+僚>Ryou;
+僞>Gi;
+僣>Sen;
+僥>Kyou;
+僧>Sou;
+僭>Sen;
+僮>Dou;
+僵>Kyou;
+價>Ka;
+僻>Heki;
+儀>Gi;
+儁>Shun;
+儂>Dou;
+億>Oku;
+儉>Ken;
+儒>Ju;
+儔>Chuu;
+儕>Sei;
+儖>Ran;
+儘>Jin;
+儚>Bou;
+償>Shou;
+儡>Rai;
+優>Yuu;
+儲>Cho;
+儷>Rei;
+儺>Da;
+儻>Tou;
+儼>Gen;
+儿>Jin;
+兀>Kotsu;
+允>In;
+元>Gen;
+兄>Kei;
+充>Juu;
+兆>Chou;
+兇>Kyou;
+先>Sen;
+光>Kou;
+克>Koku;
+兌>Da;
+免>Men;
+兎>To;
+児>Ji;
+兒>Ji;
+兔>To;
+党>Tou;
+兜>Tou;
+兢>Kyou;
+入>Nyuu;
+全>Zen;
+兩>Ryou;
+兪>Yu;
+八>Hachi;
+公>Kou;
+六>Roku;
+兮>Kei;
+共>Kyou;
+兵>Hei;
+其>Ki;
+具>Gu;
+典>Ten;
+兼>Ken;
+冀>Ki;
+冂>Kei;
+内>Dai;
+円>En;
+冉>Nen;
+冊>Satsu;
+册>Satsu;
+再>Sai;
+冏>Kei;
+冐>Bou;
+冑>Chuu;
+冒>Bou;
+冓>Kou;
+冕>Ben;
+冖>Beki;
+冗>Jou;
+写>Sha;
+冠>Kan;
+冢>Chou;
+冤>En;
+冥>Mei;
+冦>Kou;
+冨>Fu;
+冩>Sha;
+冪>Beki;
+冫>Hyou;
+冬>Tou;
+冰>Hyou;
+冱>Go;
+冲>Chuu;
+决>Ketsu;
+冴>Go;
+况>Kyou;
+冶>Ya;
+冷>Rei;
+冽>Retsu;
+凄>Sei;
+凅>Ko;
+准>Jun;
+凉>Ryou;
+凋>Chou;
+凌>Ryou;
+凍>Tou;
+凖>Jun;
+凛>Rin;
+凜>Rin;
+凝>Gyou;
+几>Ki;
+凡>Bon;
+処>Sho;
+凭>Hyou;
+凰>Kou;
+凱>Gai;
+凵>Kan;
+凶>Kyou;
+凸>Totsu;
+凹>Ou;
+出>Shutsu;
+函>Kan;
+凾>Kan;
+刀>Tou;
+刃>Jin;
+刄>Jin;
+分>Bun;
+切>Setsu;
+刈>Gai;
+刊>Kan;
+刋>Sen;
+刎>Fun;
+刑>Kei;
+刔>Ketsu;
+列>Retsu;
+初>Sho;
+判>Han;
+別>Betsu;
+刧>Gou;
+利>Ri;
+刪>San;
+刮>Katsu;
+到>Tou;
+刳>Ko;
+制>Sei;
+刷>Satsu;
+券>Ken;
+刹>Satsu;
+刺>Shi;
+刻>Koku;
+剃>Tei;
+剄>Kei;
+則>Soku;
+削>Saku;
+剋>Koku;
+剌>Ratsu;
+前>Zen;
+剏>Sou;
+剔>Teki;
+剖>Bou;
+剛>Gou;
+剞>Ki;
+剣>Ken;
+剤>Zai;
+剥>Haku;
+剩>Jou;
+剪>Sen;
+副>Fuku;
+剰>Jou;
+剱>Ken;
+割>Katsu;
+剳>Tou;
+剴>Gai;
+創>Sou;
+剽>Hyou;
+剿>Shou;
+劃>Kaku;
+劇>Geki;
+劈>Heki;
+劉>Ru;
+劍>Ken;
+劑>Zai;
+劒>Ken;
+劔>Ken;
+力>Ryoku;
+功>Kou;
+加>Ka;
+劣>Retsu;
+助>Jo;
+努>Do;
+劫>Gou;
+劬>Ku;
+劭>Shou;
+励>Rei;
+労>Rou;
+劵>Ken;
+効>Kou;
+劼>Katsu;
+劾>Gai;
+勁>Kei;
+勃>Botsu;
+勅>Choku;
+勇>Yuu;
+勉>Ben;
+勍>Kei;
+勒>Roku;
+動>Dou;
+勗>Bou;
+勘>Kan;
+務>Mu;
+勝>Shou;
+勞>Rou;
+募>Bo;
+勠>Riku;
+勢>Sei;
+勣>Seki;
+勤>Kin;
+勦>Sou;
+勧>Kan;
+勲>Kun;
+勳>Kun;
+勵>Rei;
+勸>Kan;
+勹>Hou;
+勺>Shaku;
+勾>Kou;
+勿>Butsu;
+包>Hou;
+匆>Sou;
+匈>Kyou;
+匍>Ho;
+匏>Hou;
+匐>Hoku;
+匕>Hi;
+化>Ka;
+北>Hoku;
+匙>Shi;
+匚>Hou;
+匝>Sou;
+匠>Shou;
+匡>Kyou;
+匣>Kou;
+匪>Hi;
+匯>Wai;
+匱>Ki;
+匳>Ren;
+匸>Kei;
+匹>Hitsu;
+区>Ku;
+医>I;
+匿>Toku;
+區>Ku;
+十>Juu;
+千>Sen;
+卅>Sou;
+卆>Sotsu;
+升>Shou;
+午>Go;
+卉>Ki;
+半>Han;
+卍>Ban;
+卑>Hi;
+卒>Sotsu;
+卓>Taku;
+協>Kyou;
+南>Nan;
+単>Tan;
+博>Haku;
+卜>Boku;
+卞>Hen;
+占>Sen;
+卦>Ka;
+卩>Setsu;
+卮>Shi;
+卯>Bou;
+印>In;
+危>Ki;
+即>Soku;
+却>Kyaku;
+卵>Ran;
+卷>Kan;
+卸>Sha;
+卻>Kyaku;
+卿>Kyou;
+厂>Kan;
+厄>Yaku;
+厖>Bou;
+厘>Rin;
+厚>Kou;
+原>Gen;
+厠>Shi;
+厥>Ketsu;
+厦>Ka;
+厨>Zu;
+厩>Kyuu;
+厭>En;
+厮>Shi;
+厰>Shou;
+厳>Gen;
+厶>Shi;
+去>Kyo;
+参>San;
+參>San;
+又>Yuu;
+叉>Sha;
+及>Kyuu;
+友>Yuu;
+双>Sou;
+反>Han;
+収>Shuu;
+叔>Shuku;
+取>Shu;
+受>Ju;
+叙>Jo;
+叛>Han;
+叟>Sou;
+叡>Ei;
+叢>Sou;
+口>Kou;
+古>Ko;
+句>Ku;
+叨>Tou;
+叩>Kou;
+只>Shi;
+叫>Kyou;
+召>Shou;
+叭>Hatsu;
+叮>Tei;
+可>Ka;
+台>Dai;
+叱>Shitsu;
+史>Shi;
+右>U;
+叶>Kyou;
+号>Gou;
+司>Shi;
+吁>Ku;
+吃>Kitsu;
+各>Kaku;
+合>Gou;
+吉>Kichi;
+吊>Chou;
+吋>Sun;
+同>Dou;
+名>Mei;
+后>Kou;
+吏>Ri;
+吐>To;
+向>Kou;
+君>Kun;
+吝>Rin;
+吟>Gin;
+吠>Hai;
+否>Hi;
+吩>Fun;
+含>Gan;
+听>Kin;
+吭>Kou;
+吮>Sen;
+吶>Totsu;
+吸>Kyuu;
+吹>Sui;
+吻>Fun;
+吼>Kou;
+吽>In;
+吾>Go;
+呀>Ga;
+呂>Ryo;
+呆>Bou;
+呈>Tei;
+呉>Go;
+告>Koku;
+呎>Seki;
+呑>Don;
+呟>Gen;
+周>Shuu;
+呪>Ju;
+呰>Shi;
+呱>Ko;
+味>Mi;
+呵>Ka;
+呶>Do;
+呷>Kou;
+呻>Shin;
+呼>Ko;
+命>Mei;
+咀>So;
+咄>Totsu;
+咆>Hou;
+咋>Saku;
+和>Wa;
+咎>Kyuu;
+咏>Ei;
+咐>Ho;
+咒>Shu;
+咢>Gaku;
+咤>Ta;
+咥>Ki;
+咨>Shi;
+咫>Shi;
+咬>Kou;
+咯>Kaku;
+咲>Shou;
+咳>Gai;
+咸>Kan;
+咼>Ka;
+咽>In;
+咾>Rou;
+哀>Ai;
+品>Hin;
+哂>Shin;
+哄>Kou;
+哇>Ai;
+哈>Gou;
+哉>Sai;
+員>In;
+哢>Rou;
+哥>Ka;
+哦>Ga;
+哨>Shou;
+哩>Ri;
+哭>Koku;
+哮>Kou;
+哲>Tetsu;
+哺>Ho;
+哽>Kou;
+唄>Bai;
+唆>Sa;
+唇>Shin;
+唏>Ki;
+唐>Tou;
+唔>Go;
+唖>A;
+售>Shuu;
+唯>Yui;
+唱>Shou;
+唳>Rei;
+唸>Ten;
+唹>Yo;
+唾>Da;
+啀>Gai;
+啄>Taku;
+啅>Taku;
+商>Shou;
+啌>Kou;
+問>Mon;
+啓>Kei;
+啖>Tan;
+啗>Tan;
+啜>Setsu;
+啝>Ka;
+啣>Kan;
+啻>Shi;
+啼>Tei;
+啾>Shuu;
+喀>Kaku;
+喃>Nan;
+善>Zen;
+喇>Ratsu;
+喉>Kou;
+喊>Kan;
+喋>Chou;
+喘>Zen;
+喙>Kai;
+喚>Kan;
+喜>Ki;
+喝>Katsu;
+喞>Soku;
+喟>Ki;
+喧>Ken;
+喨>Ryou;
+喩>Yu;
+喪>Sou;
+喫>Kitsu;
+喬>Kyou;
+單>Tan;
+営>Ei;
+嗄>Sa;
+嗅>Kyuu;
+嗇>Shoku;
+嗔>Shin;
+嗚>O;
+嗜>Shi;
+嗟>Sa;
+嗣>Shi;
+嗤>Shi;
+嗷>Gou;
+嗹>Ren;
+嗽>Soku;
+嗾>Sou;
+嘆>Tan;
+嘉>Ka;
+嘔>Ou;
+嘖>Saku;
+嘗>Shou;
+嘘>Kyo;
+嘛>Ma;
+嘩>Ka;
+嘯>Shou;
+嘱>Shoku;
+嘲>Tou;
+嘴>Shi;
+嘶>Sei;
+嘸>Bu;
+噂>Son;
+噌>Sou;
+噎>Itsu;
+噐>Ki;
+噛>Gou;
+噤>Kin;
+器>Ki;
+噪>Sou;
+噫>I;
+噬>Zei;
+噴>Fun;
+噸>Ton;
+嚀>Dei;
+嚆>Kou;
+嚇>Kaku;
+嚊>Hi;
+嚏>Tei;
+嚔>Tei;
+嚠>Ryuu;
+嚢>Nou;
+嚥>En;
+嚮>Kou;
+嚴>Gen;
+嚶>Ou;
+嚼>Shaku;
+囀>Ten;
+囁>Shou;
+囂>Gou;
+囃>Sou;
+囈>Gei;
+囎>So;
+囑>Shoku;
+囓>Ketsu;
+囗>I;
+囘>Kai;
+囚>Shuu;
+四>Shi;
+回>Kai;
+因>In;
+団>Dan;
+囮>Ka;
+困>Kon;
+囲>I;
+図>To;
+囹>Rei;
+固>Ko;
+国>Koku;
+囿>Yuu;
+圀>Koku;
+圃>Ho;
+圄>Gyo;
+圈>Ken;
+圉>Gyo;
+國>Koku;
+圍>I;
+圏>Ken;
+園>En;
+圓>En;
+圖>To;
+團>Dan;
+圜>Kan;
+土>Do;
+圧>Atsu;
+在>Zai;
+圭>Kei;
+地>Chi;
+圻>Ki;
+址>Shi;
+坂>Han;
+均>Kin;
+坊>Bou;
+坎>Kan;
+坏>Hai;
+坐>Za;
+坑>Kou;
+坡>Ha;
+坤>Kon;
+坦>Tan;
+坩>Kan;
+坪>Hei;
+坿>Fu;
+垂>Sui;
+垈>Tai;
+垉>Hou;
+型>Kei;
+垓>Gai;
+垠>Gin;
+垢>Kou;
+垣>En;
+垤>Tetsu;
+埀>Sui;
+埃>Ai;
+埆>Kaku;
+埋>Mai;
+城>Jou;
+埒>Rachi;
+埓>Rachi;
+埔>Ho;
+埜>Sho;
+域>Iki;
+埠>Fu;
+埣>Sai;
+埴>Shoku;
+執>Shitsu;
+培>Bai;
+基>Ki;
+埼>Ki;
+堀>Kutsu;
+堂>Dou;
+堅>Ken;
+堆>Tai;
+堊>A;
+堋>Hou;
+堕>Da;
+堙>In;
+堝>Ka;
+堡>Hou;
+堤>Tei;
+堪>Kan;
+堯>Gyou;
+堰>En;
+報>Hou;
+場>Jou;
+堵>To;
+堺>Kai;
+堽>Kou;
+塁>Rui;
+塊>Kai;
+塋>Ei;
+塑>So;
+塒>Shi;
+塔>Tou;
+塗>To;
+塘>Tou;
+塙>Kaku;
+塚>Chou;
+塞>Soku;
+塢>O;
+塩>En;
+填>Ten;
+塲>Jou;
+塵>Jin;
+塹>Zan;
+塾>Juku;
+境>Kyou;
+墅>Sho;
+墓>Bo;
+増>Zou;
+墜>Tsui;
+墟>Kyo;
+墨>Boku;
+墫>Shun;
+墮>Da;
+墳>Fun;
+墸>Sho;
+墺>Ou;
+墻>Shou;
+墾>Kon;
+壁>Heki;
+壅>You;
+壇>Dan;
+壊>Kai;
+壌>Jou;
+壑>Gaku;
+壓>Atsu;
+壕>Gou;
+壘>Rui;
+壙>Kou;
+壜>Tan;
+壞>Kai;
+壟>Ryou;
+壤>Jou;
+壥>Ten;
+士>Shi;
+壬>Jin;
+壮>Sou;
+壯>Sou;
+声>Sei;
+壱>Ichi;
+売>Bai;
+壷>Ko;
+壹>Ichi;
+壺>Ko;
+壻>Sei;
+壼>Kon;
+壽>Ju;
+夂>Chi;
+変>Hen;
+夊>Sui;
+夏>Ka;
+夐>Kei;
+夕>Seki;
+外>Gai;
+夘>Bou;
+夙>Shuku;
+多>Ta;
+夛>Ta;
+夜>Ya;
+夢>Mu;
+夥>Ka;
+大>Tai;
+天>Ten;
+太>Ta;
+夫>Fu;
+夬>Ketsu;
+夭>You;
+央>Ou;
+失>Shitsu;
+夲>Tou;
+夷>I;
+夸>Ko;
+夾>Kou;
+奄>En;
+奇>Ki;
+奈>Na;
+奉>Hou;
+奎>Kei;
+奏>Sou;
+奐>Kan;
+契>Setsu;
+奔>Hon;
+奕>Eki;
+套>Tou;
+奘>Jou;
+奚>Kei;
+奠>Ten;
+奢>Sha;
+奥>Ou;
+奧>Ou;
+奨>Shou;
+奩>Ren;
+奪>Datsu;
+奬>Shou;
+奮>Fun;
+女>Jo;
+奴>Do;
+奸>Kan;
+好>Kou;
+妁>Shaku;
+如>Jo;
+妃>Hi;
+妄>Bou;
+妊>Nin;
+妍>Ken;
+妓>Ki;
+妖>You;
+妙>Myou;
+妛>Shi;
+妝>Sou;
+妣>Hi;
+妥>Da;
+妨>Bou;
+妬>To;
+妲>Datsu;
+妹>Mai;
+妻>Sai;
+妾>Shou;
+姆>Bo;
+姉>Shi;
+始>Shi;
+姐>So;
+姑>Ko;
+姓>Sei;
+委>I;
+姙>Nin;
+姚>You;
+姜>Kyou;
+姥>Bo;
+姦>Kan;
+姨>I;
+姪>Tetsu;
+姫>Ki;
+姶>Ou;
+姻>In;
+姿>Shi;
+威>I;
+娃>A;
+娉>Hei;
+娑>Sha;
+娘>Jou;
+娚>Nan;
+娜>Da;
+娟>Ken;
+娠>Shin;
+娥>Ga;
+娩>Ben;
+娯>Go;
+娵>Shu;
+娶>Shu;
+娼>Shou;
+婀>A;
+婁>Rou;
+婆>Ba;
+婉>En;
+婚>Kon;
+婢>Hi;
+婦>Fu;
+婪>Ran;
+婬>In;
+婿>Sei;
+媒>Bai;
+媚>Bi;
+媛>En;
+媼>Ou;
+媽>Bo;
+媾>Kou;
+嫁>Ka;
+嫂>Sou;
+嫉>Shitsu;
+嫋>Jou;
+嫌>Ken;
+嫐>Jou;
+嫖>Hyou;
+嫗>Ou;
+嫡>Chaku;
+嫣>En;
+嫦>Kou;
+嫩>Don;
+嫺>Kan;
+嫻>Kan;
+嬉>Ki;
+嬋>Sen;
+嬌>Kyou;
+嬖>Hei;
+嬢>Jou;
+嬪>Hin;
+嬬>Shu;
+嬰>Ei;
+嬲>Jou;
+嬾>Ran;
+孀>Sou;
+孃>Jou;
+孅>Sen;
+子>Shi;
+孑>Getsu;
+孔>Kou;
+孕>You;
+字>Ji;
+存>Son;
+孚>Fu;
+孛>Botsu;
+孜>Shi;
+孝>Kou;
+孟>Mou;
+季>Ki;
+孤>Ko;
+孥>Do;
+学>Gaku;
+孩>Kai;
+孫>Son;
+孰>Juku;
+孱>Sen;
+孳>Shi;
+孵>Fu;
+學>Gaku;
+孺>Ju;
+宀>Ben;
+它>Ta;
+宅>Taku;
+宇>U;
+守>Shu;
+安>An;
+宋>Sou;
+完>Kan;
+宍>Niku;
+宏>Kou;
+宕>Tou;
+宗>Shuu;
+官>Kan;
+宙>Chuu;
+定>Tei;
+宛>En;
+宜>Gi;
+宝>Hou;
+実>Jitsu;
+客>Kyaku;
+宣>Sen;
+室>Shitsu;
+宥>Yuu;
+宦>Kan;
+宮>Kyuu;
+宰>Sai;
+害>Gai;
+宴>En;
+宵>Shou;
+家>Ka;
+宸>Shin;
+容>You;
+宿>Shuku;
+寂>Jaku;
+寃>En;
+寄>Ki;
+寅>In;
+密>Mitsu;
+寇>Kou;
+寉>Kaku;
+富>Fu;
+寐>Bi;
+寒>Kan;
+寓>Guu;
+寔>Shoku;
+寛>Kan;
+寝>Shin;
+寞>Baku;
+察>Satsu;
+寡>Ka;
+寢>Shin;
+寤>Go;
+寥>Ryou;
+實>Jitsu;
+寧>Nei;
+寨>Sai;
+審>Shin;
+寫>Sha;
+寮>Ryou;
+寰>Kan;
+寳>Hou;
+寵>Chou;
+寶>Hou;
+寸>Sun;
+寺>Ji;
+対>Tai;
+寿>Ju;
+封>Fuu;
+専>Sen;
+射>Sha;
+尅>Koku;
+将>Shou;
+將>Shou;
+專>Sen;
+尉>I;
+尊>Son;
+尋>Jin;
+對>Tai;
+導>Dou;
+小>Shou;
+少>Shou;
+尓>Ji;
+尖>Sen;
+尚>Shou;
+尠>Sen;
+尢>Ou;
+尤>Yuu;
+尨>Bou;
+尭>Gyou;
+就>Shuu;
+尸>Shi;
+尹>In;
+尺>Shaku;
+尻>Kou;
+尼>Ni;
+尽>Jin;
+尾>Bi;
+尿>Nyou;
+局>Kyoku;
+屁>Hi;
+居>Kyo;
+屆>Kai;
+屈>Kutsu;
+届>Kai;
+屋>Oku;
+屍>Shi;
+屎>Shi;
+屏>Hei;
+屐>Geki;
+屑>Setsu;
+屓>Ki;
+展>Ten;
+属>Zoku;
+屠>To;
+屡>Ru;
+層>Sou;
+履>Ri;
+屬>Zoku;
+屮>Sa;
+屯>Ton;
+山>San;
+屹>Kitsu;
+岌>Kyuu;
+岐>Ki;
+岑>Shin;
+岔>Sa;
+岡>Kou;
+岨>So;
+岩>Gan;
+岫>Shuu;
+岬>Kou;
+岱>Tai;
+岳>Gaku;
+岶>Haku;
+岷>Bin;
+岸>Gan;
+岻>Ji;
+岾>Sen;
+峇>Kou;
+峙>Ji;
+峡>Kyou;
+峨>Ga;
+峩>Ga;
+峪>Yoku;
+峭>Shou;
+峯>Hou;
+峰>Hou;
+島>Tou;
+峺>Kou;
+峻>Shun;
+峽>Kyou;
+崇>Suu;
+崋>Ka;
+崎>Ki;
+崑>Kon;
+崔>Sai;
+崕>Gai;
+崖>Gai;
+崗>Kou;
+崘>Ron;
+崙>Ron;
+崚>Ryou;
+崛>Kutsu;
+崟>Gin;
+崢>Sou;
+崩>Hou;
+嵋>Bi;
+嵌>Kan;
+嵎>Guu;
+嵐>Ran;
+嵒>Gan;
+嵜>Ki;
+嵩>Suu;
+嵬>Kai;
+嵯>Sa;
+嵳>Sa;
+嶂>Shou;
+嶄>San;
+嶇>Ku;
+嶋>Tou;
+嶌>Tou;
+嶐>Ryuu;
+嶝>Tou;
+嶢>Gyou;
+嶬>Gi;
+嶮>Ken;
+嶷>Gi;
+嶺>Rei;
+嶼>Sho;
+嶽>Gaku;
+巉>San;
+巌>Gan;
+巍>Gi;
+巒>Ran;
+巓>Ten;
+巖>Gan;
+巛>Sen;
+川>Sen;
+州>Shuu;
+巡>Jun;
+巣>Sou;
+工>Kou;
+左>Sa;
+巧>Kou;
+巨>Kyo;
+巫>Fu;
+差>Shi;
+己>Ki;
+已>I;
+巳>Shi;
+巴>Ha;
+巵>Shi;
+巷>Kou;
+巻>Kan;
+巽>Son;
+巾>Kin;
+市>Shi;
+布>Fu;
+帆>Han;
+帋>Shi;
+希>Ki;
+帑>Do;
+帖>Jou;
+帙>Chitsu;
+帚>Sou;
+帛>Haku;
+帝>Tei;
+帥>Sotsu;
+師>Shi;
+席>Seki;
+帯>Tai;
+帰>Ki;
+帳>Chou;
+帶>Tai;
+帷>I;
+常>Jou;
+帽>Bou;
+幀>Tei;
+幃>I;
+幄>Aku;
+幅>Fuku;
+幇>Hou;
+幌>Kou;
+幎>Beki;
+幔>Ban;
+幕>Maku;
+幗>Kaku;
+幟>Shi;
+幡>Han;
+幢>Tou;
+幣>Hei;
+幤>Hei;
+干>Kan;
+平>Hei;
+年>Nen;
+幵>Ken;
+并>Hei;
+幸>Kou;
+幹>Kan;
+幺>You;
+幻>Gen;
+幼>You;
+幽>Yuu;
+幾>Ki;
+广>Gen;
+庁>Chou;
+広>Kou;
+庄>Sou;
+庇>Hi;
+床>Shou;
+序>Jo;
+底>Tei;
+庖>Hou;
+店>Ten;
+庚>Kou;
+府>Fu;
+庠>Shou;
+度>Taku;
+座>Za;
+庫>Ko;
+庭>Tei;
+庵>An;
+庶>Sho;
+康>Kou;
+庸>You;
+廁>Shi;
+廂>Shou;
+廃>Hai;
+廈>Ka;
+廉>Ren;
+廊>Rou;
+廐>Kyuu;
+廓>Kaku;
+廖>Ryou;
+廚>Zu;
+廛>Ten;
+廝>Shi;
+廟>Byou;
+廠>Shou;
+廡>Bu;
+廢>Hai;
+廣>Kou;
+廨>Kai;
+廩>Rin;
+廬>Ryo;
+廰>Chou;
+廱>You;
+廳>Chou;
+廴>In;
+延>En;
+廷>Tei;
+廸>Teki;
+建>Ken;
+廻>Kai;
+廼>Dai;
+廾>Kyou;
+廿>Juu;
+弁>Ben;
+弃>Ki;
+弄>Rou;
+弉>Jou;
+弊>Hei;
+弋>Yoku;
+弌>Itsu;
+弍>Ni;
+式>Shiki;
+弐>Ni;
+弑>Shi;
+弓>Kyuu;
+弔>Chou;
+引>In;
+弗>Futsu;
+弘>Kou;
+弛>Shi;
+弟>Tei;
+弥>Mi;
+弦>Gen;
+弧>Ko;
+弩>Do;
+弭>Bi;
+弯>Wan;
+弱>Jaku;
+張>Chou;
+強>Kyou;
+弸>Hou;
+弼>Hitsu;
+弾>Dan;
+彁>Sei;
+彈>Dan;
+彊>Kyou;
+彌>Bi;
+彎>Wan;
+彑>Kei;
+当>Tou;
+彖>Tan;
+彗>Sui;
+彙>I;
+彜>I;
+彝>I;
+彡>San;
+形>Kei;
+彦>Gen;
+彩>Sai;
+彪>Hyou;
+彫>Chou;
+彬>Hin;
+彭>Hou;
+彰>Shou;
+影>Ei;
+彳>Teki;
+彷>Hou;
+役>Yaku;
+彼>Hi;
+彿>Futsu;
+往>Ou;
+征>Sei;
+徂>So;
+徃>Ou;
+径>Kei;
+待>Tai;
+徇>Shun;
+很>Kon;
+徊>Kai;
+律>Ritsu;
+後>Go;
+徐>Jo;
+徑>Kei;
+徒>To;
+従>Juu;
+得>Toku;
+徘>Hai;
+徙>Shi;
+從>Juu;
+徠>Rai;
+御>Gyo;
+徨>Kou;
+復>Fuku;
+循>Jun;
+徭>You;
+微>Bi;
+徳>Toku;
+徴>Chou;
+徹>Tetsu;
+徼>Kyou;
+徽>Ki;
+心>Shin;
+必>Hitsu;
+忌>Ki;
+忍>Nin;
+忖>Son;
+志>Shi;
+忘>Bou;
+忙>Bou;
+応>Ou;
+忝>Ten;
+忠>Chuu;
+忤>Go;
+快>Kai;
+忰>Sui;
+忱>Shin;
+念>Nen;
+忸>Jiku;
+忻>Kin;
+忽>Kotsu;
+忿>Fun;
+怎>Shin;
+怏>You;
+怐>Kou;
+怒>Do;
+怕>Ha;
+怖>Fu;
+怙>Ko;
+怛>Datsu;
+怜>Rei;
+思>Shi;
+怠>Tai;
+怡>I;
+急>Kyuu;
+怦>Hou;
+性>Sei;
+怨>En;
+怩>Ji;
+怪>Kai;
+怫>Futsu;
+怯>Kyou;
+怱>Sou;
+恁>In;
+恂>Jun;
+恃>Ji;
+恆>Kou;
+恊>Kyou;
+恋>Ren;
+恍>Kou;
+恐>Kyou;
+恒>Kou;
+恕>Jo;
+恙>You;
+恚>I;
+恟>Kyou;
+恠>Kai;
+恢>Kai;
+恣>Shi;
+恤>Jutsu;
+恥>Chi;
+恨>Kon;
+恩>On;
+恪>Kaku;
+恫>Dou;
+恬>Ten;
+恭>Kyou;
+息>Soku;
+恰>Kou;
+恵>Kei;
+恷>Kyuu;
+悁>En;
+悃>Kon;
+悄>Shou;
+悉>Shitsu;
+悋>Rin;
+悌>Tei;
+悍>Kan;
+悒>Yuu;
+悔>Kai;
+悖>Hai;
+悗>Ban;
+悚>Shou;
+悛>Sen;
+悟>Go;
+悠>Yuu;
+患>Kan;
+悦>Etsu;
+悧>Ri;
+悩>Nou;
+悪>Aku;
+悲>Hi;
+悳>Toku;
+悴>Sui;
+悵>Chou;
+悶>Mon;
+悸>Ki;
+悼>Tou;
+悽>Sei;
+情>Jou;
+惆>Chuu;
+惇>Ton;
+惑>Waku;
+惓>Ken;
+惘>Bou;
+惚>Kotsu;
+惜>Seki;
+惟>I;
+惠>Kei;
+惡>O;
+惣>Sou;
+惧>Ku;
+惨>San;
+惰>Da;
+惱>Nou;
+想>Sou;
+惴>Zui;
+惶>Kou;
+惷>Shun;
+惹>Ja;
+惺>Sei;
+惻>Soku;
+愀>Shou;
+愁>Shuu;
+愃>Ken;
+愆>Ken;
+愈>Yu;
+愉>Yu;
+愍>Bin;
+愎>Hyoku;
+意>I;
+愕>Gaku;
+愚>Gu;
+愛>Ai;
+感>Kan;
+愡>Sou;
+愧>Ki;
+愨>Kaku;
+愬>So;
+愴>Sou;
+愼>Shin;
+愽>Haku;
+愾>Ki;
+愿>Gen;
+慂>You;
+慄>Ritsu;
+慇>In;
+慈>Ji;
+慊>Ken;
+態>Tai;
+慌>Kou;
+慍>Un;
+慎>Shin;
+慓>Hyou;
+慕>Bo;
+慘>San;
+慙>Zan;
+慚>Zan;
+慝>Toku;
+慟>Dou;
+慢>Man;
+慣>Kan;
+慥>Zou;
+慧>Kei;
+慨>Gai;
+慫>Shou;
+慮>Ryo;
+慯>Shou;
+慰>I;
+慱>Tan;
+慳>Kan;
+慴>Shou;
+慵>You;
+慶>Kei;
+慷>Kou;
+慾>Yoku;
+憂>Yuu;
+憇>Kei;
+憊>Hai;
+憎>Zou;
+憐>Ren;
+憑>Hyou;
+憔>Shou;
+憖>Gin;
+憙>Ki;
+憚>Tan;
+憤>Fun;
+憧>Dou;
+憩>Kei;
+憫>Bin;
+憬>Kei;
+憮>Bu;
+憲>Ken;
+憶>Oku;
+憺>Tan;
+憾>Kan;
+懃>Kin;
+懆>Sou;
+懇>Kon;
+懈>Kai;
+應>You;
+懊>Ou;
+懋>Bou;
+懌>Eki;
+懍>Rin;
+懐>Kai;
+懣>Mon;
+懦>Da;
+懲>Chou;
+懴>Zan;
+懶>Ran;
+懷>Kai;
+懸>Ken;
+懺>Zan;
+懼>Ku;
+懽>Kan;
+懾>Shou;
+懿>I;
+戀>Ren;
+戈>Ka;
+戉>Etsu;
+戊>Bo;
+戌>Jutsu;
+戍>Ju;
+戎>Juu;
+成>Sei;
+我>Ga;
+戒>Kai;
+戔>San;
+或>Waku;
+戚>Seki;
+戛>Katsu;
+戝>Zoku;
+戞>Katsu;
+戟>Geki;
+戡>Kan;
+戦>Sen;
+截>Setsu;
+戮>Riku;
+戯>Gi;
+戰>Sen;
+戲>Gi;
+戳>Taku;
+戴>Tai;
+戸>Ko;
+戻>Rei;
+房>Bou;
+所>Sho;
+扁>Hen;
+扇>Sen;
+扈>Ko;
+扉>Hi;
+手>Shu;
+才>Sai;
+扎>Satsu;
+打>Da;
+払>Futsu;
+托>Taku;
+扛>Kou;
+扞>Kan;
+扠>Sa;
+扣>Kou;
+扮>Fun;
+扱>Sou;
+扶>Fu;
+批>Hi;
+扼>Aku;
+找>Ka;
+承>Shou;
+技>Gi;
+抂>Kyou;
+抃>Ben;
+抄>Shou;
+抉>Ketsu;
+把>Ha;
+抑>Yoku;
+抒>Jo;
+抓>Sou;
+抔>Hou;
+投>Tou;
+抖>Tou;
+抗>Kou;
+折>Setsu;
+抛>Hou;
+抜>Batsu;
+択>Taku;
+披>Hi;
+抬>Tai;
+抱>Hou;
+抵>Tei;
+抹>Matsu;
+抻>Shin;
+押>Ou;
+抽>Chuu;
+拂>Futsu;
+担>Tan;
+拆>Taku;
+拇>Bo;
+拈>Den;
+拉>Ratsu;
+拊>Fu;
+拌>Han;
+拍>Haku;
+拏>Da;
+拐>Kai;
+拑>Kan;
+拒>Kyo;
+拓>Taku;
+拔>Batsu;
+拗>Ou;
+拘>Kou;
+拙>Setsu;
+招>Shou;
+拜>Hai;
+拝>Hai;
+拠>Kyo;
+拡>Kaku;
+括>Katsu;
+拭>Shiki;
+拮>Kitsu;
+拯>Jou;
+拱>Kyou;
+拳>Ken;
+拵>Son;
+拶>Satsu;
+拷>Gou;
+拾>Shuu;
+拿>Da;
+持>Ji;
+挂>Kei;
+指>Shi;
+挈>Ketsu;
+按>An;
+挌>Kaku;
+挑>Chou;
+挙>Kyo;
+挟>Kyou;
+挧>Ku;
+挨>Ai;
+挫>Za;
+振>Shin;
+挺>Tei;
+挽>Ban;
+挾>Kyou;
+挿>Sou;
+捉>Soku;
+捌>Hatsu;
+捍>Kan;
+捏>Detsu;
+捐>En;
+捕>Ho;
+捗>Ho;
+捜>Sou;
+捧>Hou;
+捨>Sha;
+捩>Retsu;
+捫>Mon;
+据>Kyo;
+捲>Ken;
+捶>Sui;
+捷>Shou;
+捺>Natsu;
+捻>Nen;
+掀>Kin;
+掃>Sou;
+授>Ju;
+掉>Tou;
+掌>Shou;
+掎>Ki;
+掏>Tou;
+排>Hai;
+掖>Eki;
+掘>Kutsu;
+掛>Ka;
+掟>Tou;
+掠>Ryaku;
+採>Sai;
+探>Tan;
+掣>Sei;
+接>Setsu;
+控>Kou;
+推>Sui;
+掩>En;
+措>So;
+掫>Sou;
+掬>Kiku;
+掲>Kei;
+掴>Kaku;
+掻>Sou;
+掾>En;
+揀>Kan;
+揃>Sen;
+揄>Yu;
+揆>Ki;
+揉>Juu;
+描>Byou;
+提>Tei;
+插>Sou;
+揖>Yuu;
+揚>You;
+換>Kan;
+握>Aku;
+揣>Shi;
+揩>Kai;
+揮>Ki;
+援>En;
+揶>Ya;
+揺>You;
+搆>Kou;
+損>Son;
+搏>Haku;
+搓>Sa;
+搖>You;
+搗>Tou;
+搜>Sou;
+搦>Jaku;
+搨>Tou;
+搬>Han;
+搭>Tou;
+搴>Ken;
+搶>Shou;
+携>Kei;
+搾>Saku;
+摂>Setsu;
+摎>Kyuu;
+摘>Teki;
+摧>Sai;
+摩>Ma;
+摯>Shi;
+摶>Tan;
+摸>Mo;
+摺>Shou;
+撃>Geki;
+撈>Rou;
+撒>San;
+撓>Dou;
+撕>Sei;
+撚>Nen;
+撞>Shu;
+撤>Tetsu;
+撥>Hatsu;
+撩>Ryou;
+撫>Bu;
+播>Ha;
+撮>Satsu;
+撰>San;
+撲>Boku;
+撹>Kaku;
+撻>Tachi;
+撼>Kan;
+擁>You;
+擂>Rai;
+擅>Sen;
+擇>Taku;
+操>Sou;
+擒>Kin;
+擔>Tan;
+擘>Haku;
+據>Kyo;
+擠>Sei;
+擡>Tai;
+擢>Teki;
+擣>Tou;
+擦>Satsu;
+擧>Kyo;
+擬>Gi;
+擯>Hin;
+擱>Kaku;
+擲>Teki;
+擴>Kaku;
+擶>Sen;
+擺>Hai;
+擽>Ryaku;
+擾>Jou;
+攀>Han;
+攅>San;
+攘>Jou;
+攜>Kei;
+攝>Setsu;
+攣>Ren;
+攤>Tan;
+攪>Kaku;
+攫>Kaku;
+攬>Ran;
+支>Shi;
+攴>Hoku;
+攵>Boku;
+收>Shuu;
+攷>Kou;
+攸>Yuu;
+改>Kai;
+攻>Kou;
+放>Hou;
+政>Sei;
+故>Ko;
+效>Kou;
+敍>Jo;
+敏>Bin;
+救>Kyuu;
+敕>Choku;
+敖>Gou;
+敗>Hai;
+敘>Jo;
+教>Kyou;
+敝>Hei;
+敞>Shou;
+敢>Kan;
+散>San;
+敦>Ton;
+敬>Kei;
+数>Suu;
+敲>Kou;
+整>Sei;
+敵>Teki;
+敷>Fu;
+數>Suu;
+斂>Ren;
+斃>Hei;
+文>Bun;
+斈>Gaku;
+斉>Sei;
+斌>Hin;
+斎>Sai;
+斐>Hi;
+斑>Han;
+斗>To;
+料>Ryou;
+斛>Koku;
+斜>Sha;
+斟>Shin;
+斡>Atsu;
+斤>Kin;
+斥>Seki;
+斧>Fu;
+斫>Shaku;
+斬>Zan;
+断>Dan;
+斯>Shi;
+新>Shin;
+斷>Dan;
+方>Hou;
+於>O;
+施>Shi;
+旁>Hou;
+旃>Sen;
+旄>Bou;
+旅>Ryo;
+旆>Hai;
+旋>Sen;
+旌>Sei;
+族>Zoku;
+旒>Ryuu;
+旗>Ki;
+旙>Han;
+旛>Han;
+无>Bu;
+旡>Ki;
+既>Ki;
+日>Nichi;
+旦>Tan;
+旧>Kyuu;
+旨>Shi;
+早>Sou;
+旬>Jun;
+旭>Kyoku;
+旱>Kan;
+旺>Ou;
+旻>Bin;
+昂>Kou;
+昃>Soku;
+昆>Kon;
+昇>Shou;
+昊>Kou;
+昌>Shou;
+明>Mei;
+昏>Kon;
+易>Eki;
+昔>Seki;
+昜>You;
+星>Sei;
+映>Ei;
+春>Shun;
+昧>Mai;
+昨>Saku;
+昭>Shou;
+是>Ze;
+昴>Bou;
+昵>Jitsu;
+昶>Chou;
+昼>Chuu;
+昿>Kou;
+晁>Chou;
+時>Ji;
+晃>Kou;
+晄>Kou;
+晉>Shin;
+晋>Shin;
+晏>An;
+晒>Sai;
+晝>Chuu;
+晞>Ki;
+晟>Sei;
+晢>Setsu;
+晤>Go;
+晦>Kai;
+晧>Kou;
+晨>Shin;
+晩>Ban;
+普>Fu;
+景>Kei;
+晰>Seki;
+晴>Sei;
+晶>Shou;
+智>Chi;
+暁>Gyou;
+暃>Hi;
+暄>Ken;
+暇>Ka;
+暈>Un;
+暉>Ki;
+暎>Ei;
+暑>Sho;
+暖>Dan;
+暗>An;
+暘>You;
+暝>Mei;
+暢>Chou;
+暦>Reki;
+暫>Zan;
+暮>Bo;
+暴>Bou;
+暸>Ryou;
+暹>Sen;
+暼>Hetsu;
+暾>Ton;
+曁>Ki;
+曄>You;
+曇>Don;
+曉>Gyou;
+曖>Ai;
+曙>Sho;
+曚>Bou;
+曜>You;
+曝>Baku;
+曠>Kou;
+曦>Gi;
+曩>Dou;
+曰>Etsu;
+曲>Kyoku;
+曳>Ei;
+更>Kou;
+曵>Ei;
+曷>Katsu;
+書>Sho;
+曹>Sou;
+曼>Ban;
+曽>Zo;
+曾>So;
+替>Tai;
+最>Sai;
+會>Kai;
+月>Getsu;
+有>Yuu;
+朋>Hou;
+服>Fuku;
+朏>Hi;
+朔>Saku;
+朕>Chin;
+朖>Rou;
+朗>Rou;
+望>Bou;
+朝>Chou;
+朞>Ki;
+期>Ki;
+朦>Bou;
+朧>Rou;
+木>Boku;
+未>Mi;
+末>Matsu;
+本>Hon;
+札>Satsu;
+朮>Jutsu;
+朱>Shu;
+朴>Boku;
+朶>Da;
+朷>Tou;
+朸>Ryoku;
+机>Ki;
+朽>Kyuu;
+朿>Shi;
+杆>Kan;
+杉>San;
+李>Ri;
+杏>Kyou;
+材>Zai;
+村>Son;
+杓>Hyou;
+杖>Jou;
+杙>Yoku;
+杜>Do;
+杞>Ko;
+束>Soku;
+杠>Kou;
+条>Jou;
+来>Rai;
+杪>Byou;
+杭>Kou;
+杯>Hai;
+杰>Ketsu;
+東>Tou;
+杲>Kou;
+杳>You;
+杵>Sho;
+杷>Ha;
+杼>Cho;
+松>Shou;
+板>Han;
+枅>Kei;
+枇>Hi;
+枉>Ou;
+枋>Hou;
+枌>Fun;
+析>Seki;
+枕>Chin;
+林>Rin;
+枚>Mai;
+果>Ka;
+枝>Shi;
+枢>Suu;
+枦>Ro;
+枩>Shou;
+枯>Ko;
+枳>Ki;
+枴>Kai;
+架>Ka;
+枷>Ka;
+枸>Ku;
+枹>Hou;
+柁>Ta;
+柄>Hei;
+柆>Rou;
+柊>Shuu;
+柎>Fu;
+柏>Haku;
+某>Bou;
+柑>Kan;
+染>Sen;
+柔>Nyuu;
+柘>Sha;
+柚>Yuu;
+柝>Taku;
+柞>Saku;
+柢>Tei;
+柤>Sa;
+柧>Ko;
+柩>Kyuu;
+柬>Kan;
+柮>Totsu;
+柯>Ka;
+柱>Chuu;
+柳>Ryuu;
+柴>Sai;
+柵>Saku;
+査>Sa;
+柾>Kyuu;
+柿>Shi;
+栄>Ei;
+栓>Sen;
+栖>Sei;
+栗>Ritsu;
+栞>Kan;
+校>Kou;
+栢>Haku;
+栩>Ku;
+株>Chu;
+栫>Son;
+栲>Gou;
+栴>Sen;
+核>Kaku;
+根>Kon;
+格>Kou;
+栽>Sai;
+桀>Ketsu;
+桁>Kou;
+桂>Kei;
+桃>Tou;
+框>Kyou;
+案>An;
+桍>Ko;
+桎>Shitsu;
+桐>Dou;
+桑>Sou;
+桓>Kan;
+桔>Kitsu;
+桙>U;
+桜>Ou;
+桟>San;
+档>Tou;
+桧>Kai;
+桴>Fu;
+桶>Tou;
+桷>Kaku;
+桾>Kun;
+桿>Kan;
+梁>Ryou;
+梃>Tei;
+梅>Bai;
+梍>Sou;
+梏>Koku;
+梓>Shi;
+梔>Shi;
+梗>Kou;
+梛>Da;
+條>Jou;
+梟>Kyou;
+梠>Ryo;
+梢>Shou;
+梦>Mu;
+梧>Go;
+梨>Ri;
+梭>Sa;
+梯>Tei;
+械>Kai;
+梱>Kon;
+梳>So;
+梵>Fuu;
+梶>Bi;
+梹>Bin;
+梼>Tou;
+棄>Ki;
+棆>Rin;
+棉>Men;
+棊>Ki;
+棋>Ki;
+棍>Kon;
+棒>Bou;
+棔>Kon;
+棕>Shu;
+棗>Sou;
+棘>Kyoku;
+棚>Hou;
+棟>Tou;
+棠>Tou;
+棡>Kou;
+棣>Tei;
+棧>San;
+森>Shin;
+棯>Jin;
+棲>Sei;
+棹>Tou;
+棺>Kan;
+椀>Wan;
+椁>Kaku;
+椄>Setsu;
+椅>I;
+椈>Kiku;
+椋>Ryou;
+椌>Kou;
+植>Shoku;
+椎>Tsui;
+椏>A;
+椒>Shou;
+検>Ken;
+椢>Kai;
+椦>Ken;
+椰>Ya;
+椴>Tan;
+椶>Shu;
+椹>Chin;
+椽>Ten;
+椿>Chin;
+楊>You;
+楓>Fuu;
+楔>Setsu;
+楕>Da;
+楙>Bou;
+楚>So;
+楜>Ko;
+楝>Ren;
+楞>Rou;
+楠>Nan;
+楡>Yu;
+楢>Shuu;
+楪>You;
+楫>Shuu;
+業>Gyou;
+楮>Cho;
+楯>Jun;
+楳>Bai;
+楴>Tei;
+極>Goku;
+楷>Kai;
+楸>Shuu;
+楹>Ei;
+楼>Rou;
+楽>Gaku;
+概>Gai;
+榎>Ka;
+榑>Fu;
+榔>Rou;
+榕>You;
+榛>Shin;
+榜>Bou;
+榠>Bei;
+榧>Hi;
+榮>Ei;
+榱>Sui;
+榲>Otsu;
+榴>Ryuu;
+榻>Tou;
+榾>Kotsu;
+榿>Ki;
+槁>Kou;
+槃>Han;
+槇>Ten;
+槊>Saku;
+構>Kou;
+槌>Tsui;
+槍>Sou;
+槎>Sa;
+槐>Kai;
+槓>Kou;
+様>You;
+槙>Ten;
+槝>Tou;
+槞>Rou;
+槧>San;
+槨>Kaku;
+槫>Tan;
+槭>Shuku;
+槲>Koku;
+槹>Kou;
+槻>Ki;
+槽>Sou;
+槿>Kin;
+樂>Gaku;
+樅>Shou;
+樊>Han;
+樋>Tou;
+樌>Kan;
+樒>Mitsu;
+樓>Rou;
+樔>Sou;
+樗>Cho;
+標>Hyou;
+樛>Kyuu;
+樞>Suu;
+樟>Shou;
+模>Mo;
+樢>Boku;
+樣>You;
+権>Ken;
+横>Ou;
+樵>Shou;
+樶>Sai;
+樸>Boku;
+樹>Ju;
+樺>Ka;
+樽>Son;
+橄>Kan;
+橇>Zei;
+橈>Dou;
+橋>Kyou;
+橘>Kitsu;
+橙>Tou;
+機>Ki;
+橡>Shou;
+橢>Da;
+橦>Tou;
+橸>Shou;
+橿>Kyou;
+檀>Tan;
+檄>Geki;
+檍>Yoku;
+檎>Go;
+檐>En;
+檗>Haku;
+檜>Kai;
+檠>Kei;
+檢>Ken;
+檣>Shou;
+檪>Reki;
+檬>Bou;
+檮>Tou;
+檳>Bin;
+檸>Nei;
+檻>Kan;
+櫁>Mitsu;
+櫂>Tou;
+櫃>Ki;
+櫑>Rai;
+櫓>Ro;
+櫚>Ryo;
+櫛>Shitsu;
+櫞>En;
+櫟>Reki;
+櫨>Ro;
+櫪>Reki;
+櫺>Rei;
+櫻>Ou;
+欄>Ran;
+欅>Kyo;
+權>Ken;
+欒>Ran;
+欖>Ran;
+欝>Utsu;
+欠>Ketsu;
+次>Ji;
+欣>Gon;
+欧>Ou;
+欲>Yoku;
+欷>Ki;
+欸>Ai;
+欹>I;
+欺>Gi;
+欽>Kin;
+款>Kan;
+歃>Sou;
+歇>Ketsu;
+歉>Ken;
+歌>Ka;
+歎>Tan;
+歐>Ou;
+歓>Kan;
+歔>Kyo;
+歙>Kyuu;
+歛>Kan;
+歟>Yo;
+歡>Kan;
+止>Shi;
+正>Sei;
+此>Shi;
+武>Bu;
+歩>Ho;
+歪>Wai;
+歯>Shi;
+歳>Sei;
+歴>Reki;
+歸>Ki;
+歹>Gatsu;
+死>Shi;
+歿>Botsu;
+殀>You;
+殃>You;
+殄>Ten;
+殆>Tai;
+殉>Jun;
+殊>Shu;
+残>Zan;
+殍>Hyou;
+殕>Fuu;
+殖>Shoku;
+殘>Zan;
+殞>In;
+殤>Shou;
+殪>Ei;
+殫>Tan;
+殯>Hin;
+殱>Sen;
+殲>Sen;
+殳>Shu;
+殴>Ou;
+段>Dan;
+殷>In;
+殺>Satsu;
+殻>Kaku;
+殼>Kaku;
+殿>Ten;
+毀>Ki;
+毅>Ki;
+毆>Ou;
+毋>Bu;
+母>Bo;
+毎>Mai;
+毒>Doku;
+毓>Iku;
+比>Hi;
+毘>Hi;
+毛>Mou;
+毫>Kou;
+毬>Kyuu;
+毯>Tan;
+毳>Zei;
+氈>Sen;
+氏>Shi;
+民>Min;
+氓>Bou;
+气>Ki;
+気>Ki;
+氛>Fun;
+氣>Ki;
+氤>In;
+水>Sui;
+氷>Hyou;
+永>Ei;
+氾>Han;
+汀>Tei;
+汁>Juu;
+求>Kyuu;
+汎>Han;
+汐>Seki;
+汕>San;
+汗>Kan;
+汚>O;
+汝>Jo;
+汞>Kou;
+江>Kou;
+池>Chi;
+汨>Beki;
+汪>Ou;
+汰>Ta;
+汲>Kyuu;
+汳>Hen;
+決>Ketsu;
+汽>Ki;
+汾>Fun;
+沁>Shin;
+沂>Ki;
+沃>Yoku;
+沈>Chin;
+沌>Ton;
+沍>Go;
+沐>Boku;
+沒>Botsu;
+沓>Tou;
+沖>Chuu;
+沙>Sa;
+沚>Shi;
+沛>Hai;
+没>Botsu;
+沢>Taku;
+沫>Matsu;
+沮>Sho;
+沱>Ta;
+河>Ka;
+沸>Futsu;
+油>Yu;
+沺>Ten;
+治>Ji;
+沼>Shou;
+沽>Ko;
+沾>Ten;
+沿>En;
+況>Kyou;
+泄>Ei;
+泅>Shuu;
+泉>Sen;
+泊>Haku;
+泌>Hitsu;
+泓>Ou;
+法>Hou;
+泗>Shi;
+泙>Hou;
+泛>Han;
+泝>So;
+泡>Hou;
+波>Ha;
+泣>Kyuu;
+泥>Dei;
+注>Chuu;
+泪>Rui;
+泯>Bin;
+泰>Tai;
+泱>Ou;
+泳>Ei;
+洋>You;
+洌>Retsu;
+洒>Sei;
+洗>Sen;
+洙>Shu;
+洛>Raku;
+洞>Dou;
+洟>I;
+津>Shin;
+洩>Ei;
+洪>Kou;
+洫>Kyoku;
+洲>Shuu;
+洳>Jo;
+洵>Shun;
+洶>Kyou;
+洸>Kou;
+活>Katsu;
+洽>Kou;
+派>Ha;
+流>Ryuu;
+浄>Jou;
+浅>Sen;
+浙>Setsu;
+浚>Shun;
+浜>Hin;
+浣>Kan;
+浤>Kou;
+浦>Ho;
+浩>Kou;
+浪>Rou;
+浬>Ri;
+浮>Fu;
+浴>Yoku;
+海>Kai;
+浸>Shin;
+浹>Shou;
+涅>Netsu;
+消>Shou;
+涌>Yuu;
+涎>Sen;
+涓>Ken;
+涕>Tei;
+涙>Rui;
+涛>Tou;
+涜>Toku;
+涯>Gai;
+液>Eki;
+涵>Kan;
+涸>Ko;
+涼>Ryou;
+淀>Ten;
+淅>Seki;
+淆>Kou;
+淇>Ki;
+淋>Rin;
+淌>Shou;
+淑>Shuku;
+淒>Sei;
+淕>Riku;
+淘>Tou;
+淙>Sou;
+淞>Shou;
+淡>Tan;
+淤>Yo;
+淦>Kan;
+淨>Jou;
+淪>Rin;
+淫>In;
+淬>Sai;
+淮>Wai;
+深>Shin;
+淳>Shun;
+淵>En;
+混>Kon;
+淹>En;
+淺>Sen;
+添>Ten;
+清>Sei;
+渇>Katsu;
+済>Sai;
+渉>Shou;
+渊>En;
+渋>Juu;
+渓>Kei;
+渕>En;
+渙>Kan;
+渚>Sho;
+減>Gen;
+渝>Yu;
+渟>Tei;
+渠>Kyo;
+渡>To;
+渣>Sa;
+渤>Botsu;
+渥>Aku;
+渦>Ka;
+温>On;
+渫>Setsu;
+測>Soku;
+渭>I;
+渮>Ka;
+港>Kou;
+游>Yuu;
+渺>Byou;
+渾>Kon;
+湃>Hai;
+湊>Sou;
+湍>Tan;
+湎>Ben;
+湖>Ko;
+湘>Shou;
+湛>Tan;
+湟>Kou;
+湧>Yuu;
+湫>Shou;
+湮>In;
+湯>Tou;
+湲>Kan;
+湶>Sen;
+湾>Wan;
+湿>Shitsu;
+満>Man;
+溂>Ratsu;
+溌>Hatsu;
+溏>Tou;
+源>Gen;
+準>Jun;
+溘>Kou;
+溜>Ryuu;
+溝>Kou;
+溟>Mei;
+溢>Itsu;
+溥>Ho;
+溪>Kei;
+溯>So;
+溲>Sou;
+溶>You;
+溷>Kon;
+溺>Deki;
+溽>Joku;
+滂>Bou;
+滄>Sou;
+滅>Metsu;
+滉>Kou;
+滋>Ji;
+滌>Deki;
+滑>Katsu;
+滓>Sai;
+滔>Tou;
+滕>Tou;
+滝>Sou;
+滞>Tai;
+滬>Ko;
+滯>Tai;
+滲>Shin;
+滴>Teki;
+滷>Ro;
+滸>Ko;
+滾>Kon;
+滿>Man;
+漁>Ryou;
+漂>Hyou;
+漆>Shitsu;
+漉>Roku;
+漏>Rou;
+漑>Gai;
+漓>Ri;
+演>En;
+漕>Sou;
+漠>Baku;
+漢>Kan;
+漣>Ran;
+漫>Man;
+漬>Shi;
+漱>Sou;
+漲>Chou;
+漸>Zen;
+漾>You;
+漿>Shou;
+潁>Ei;
+潅>Kan;
+潔>Ketsu;
+潘>Han;
+潛>Sen;
+潜>Sen;
+潟>Seki;
+潤>Jun;
+潦>Rou;
+潭>Tan;
+潮>Chou;
+潯>Jin;
+潰>Kai;
+潴>Cho;
+潸>San;
+潺>San;
+潼>Dou;
+澀>Juu;
+澁>Juu;
+澂>Chou;
+澄>Chou;
+澆>Gyou;
+澎>Hou;
+澑>Ryuu;
+澗>Kan;
+澡>Sou;
+澣>Kan;
+澤>Taku;
+澪>Rei;
+澱>Ten;
+澳>Iku;
+澹>Tan;
+激>Geki;
+濁>Daku;
+濂>Ren;
+濃>Nou;
+濆>Fun;
+濔>Dei;
+濕>Shitsu;
+濘>Nei;
+濟>Sai;
+濠>Gou;
+濡>Ju;
+濤>Tou;
+濫>Ran;
+濬>Shun;
+濮>Hoku;
+濯>Taku;
+濱>Hin;
+濳>Sen;
+濶>Katsu;
+濺>Sen;
+濾>Ryo;
+瀁>You;
+瀉>Sha;
+瀋>Shin;
+瀏>Ryuu;
+瀑>Baku;
+瀕>Hin;
+瀘>Ro;
+瀚>Kan;
+瀛>Ei;
+瀝>Reki;
+瀞>Jou;
+瀟>Shou;
+瀦>Cho;
+瀧>Sou;
+瀬>Rai;
+瀰>Bi;
+瀲>Ren;
+瀾>Ran;
+灌>Kan;
+灑>Sai;
+灘>Dan;
+灣>Wan;
+火>Ka;
+灯>Tou;
+灰>Kai;
+灸>Kyuu;
+灼>Shaku;
+災>Sai;
+炉>Ro;
+炊>Sui;
+炎>En;
+炒>Sou;
+炙>Sha;
+炬>Ko;
+炭>Tan;
+炮>Hou;
+炯>Kei;
+炳>Hei;
+炸>Saku;
+点>Ten;
+為>I;
+烈>Retsu;
+烋>Kou;
+烏>U;
+烙>Raku;
+烝>Jou;
+烟>En;
+烱>Kei;
+烹>Hou;
+烽>Hou;
+焉>En;
+焔>En;
+焙>Hou;
+焚>Fun;
+焜>Kon;
+無>Mu;
+焦>Shou;
+然>Zen;
+焼>Shou;
+煉>Ren;
+煌>Kou;
+煎>Sen;
+煕>Ki;
+煖>Dan;
+煙>En;
+煢>Kei;
+煤>Bai;
+煥>Kan;
+煦>Ku;
+照>Shou;
+煩>Han;
+煬>You;
+煮>Sha;
+煽>Sen;
+熄>Soku;
+熈>Ki;
+熊>Yuu;
+熏>Kun;
+熔>You;
+熕>Kou;
+熙>Ki;
+熟>Juku;
+熨>I;
+熬>Gou;
+熱>Netsu;
+熹>Ki;
+熾>Shi;
+燃>Nen;
+燈>Tou;
+燉>Ton;
+燎>Ryou;
+燐>Rin;
+燒>Shou;
+燔>Han;
+燕>En;
+燗>Ran;
+營>Ei;
+燠>Iku;
+燥>Sou;
+燦>San;
+燧>Sui;
+燬>Ki;
+燭>Shoku;
+燮>Shou;
+燵>Tatsu;
+燹>Sen;
+燻>Kun;
+燼>Jin;
+燿>You;
+爆>Baku;
+爍>Shaku;
+爐>Ro;
+爛>Ran;
+爨>San;
+爪>Sou;
+爬>Ha;
+爭>Sou;
+爰>En;
+爲>I;
+爵>Shaku;
+父>Fu;
+爺>Ya;
+爻>Kou;
+爼>Sho;
+爽>Sou;
+爾>Ji;
+爿>Shou;
+牀>Sou;
+牆>Shou;
+片>Hen;
+版>Han;
+牋>Sen;
+牌>Hai;
+牒>Chou;
+牘>Toku;
+牙>Ga;
+牛>Gyuu;
+牝>Hin;
+牟>Bou;
+牡>Bo;
+牢>Rou;
+牧>Boku;
+物>Motsu;
+牲>Sei;
+牴>Tei;
+特>Toku;
+牽>Ken;
+牾>Go;
+犀>Sei;
+犁>Ri;
+犂>Ri;
+犇>Hon;
+犒>Kou;
+犖>Raku;
+犠>Gi;
+犢>Toku;
+犧>Gi;
+犬>Ken;
+犯>Han;
+犲>Sai;
+状>Jou;
+犹>Yuu;
+狂>Kyou;
+狃>Juu;
+狄>Teki;
+狆>Chuu;
+狎>Kou;
+狐>Ko;
+狒>Hi;
+狗>Kou;
+狙>Sho;
+狛>Haku;
+狠>Gan;
+狡>Kou;
+狢>Kaku;
+狩>Shu;
+独>Doku;
+狭>Kyou;
+狷>Ken;
+狸>Ri;
+狹>Kyou;
+狼>Rou;
+狽>Hai;
+猊>Gei;
+猖>Shou;
+猗>I;
+猛>Mou;
+猜>Sai;
+猝>Sotsu;
+猟>Ryou;
+猥>Wai;
+猩>Sei;
+猪>Cho;
+猫>Byou;
+献>Ken;
+猯>Tan;
+猴>Kou;
+猶>Yuu;
+猷>Yuu;
+猾>Katsu;
+猿>En;
+獄>Goku;
+獅>Shi;
+獎>Shou;
+獏>Baku;
+獗>Ketsu;
+獣>Juu;
+獨>Doku;
+獪>Kai;
+獰>Dou;
+獲>Kaku;
+獵>Ryou;
+獸>Juu;
+獺>Datsu;
+獻>Ken;
+玄>Gen;
+率>Ritsu;
+玉>Gyoku;
+王>Ou;
+玖>Kyuu;
+玩>Gan;
+玲>Rei;
+玳>Tai;
+玻>Ha;
+珀>Haku;
+珂>Ka;
+珈>Ka;
+珊>San;
+珍>Chin;
+珎>Chin;
+珞>Raku;
+珠>Shu;
+珥>Ji;
+珪>Kei;
+班>Han;
+珮>Hai;
+珱>Ei;
+珸>Go;
+現>Gen;
+球>Kyuu;
+琅>Rou;
+理>Ri;
+琉>Ryuu;
+琢>Taku;
+琥>Ko;
+琲>Hai;
+琳>Rin;
+琴>Kin;
+琵>Bi;
+琶>Ha;
+琺>Hou;
+琿>Kon;
+瑁>Bou;
+瑕>Ka;
+瑙>Dou;
+瑚>Ko;
+瑛>Ei;
+瑜>Yu;
+瑞>Zui;
+瑟>Shitsu;
+瑠>Ryuu;
+瑣>Sa;
+瑤>You;
+瑩>Ei;
+瑪>Ba;
+瑯>Rou;
+瑰>Kai;
+瑳>Sa;
+瑶>You;
+瑾>Kin;
+璃>Ri;
+璋>Shou;
+璞>Haku;
+璢>Ryuu;
+璧>Heki;
+環>Kan;
+璽>Ji;
+瓊>Kei;
+瓏>Rou;
+瓔>Ei;
+瓜>Ka;
+瓠>Ko;
+瓢>Hyou;
+瓣>Ben;
+瓦>Ga;
+瓮>Ou;
+瓶>Hei;
+瓷>Shi;
+甃>Shuu;
+甄>Ken;
+甌>Ou;
+甍>Bou;
+甎>Sen;
+甑>Sou;
+甓>Heki;
+甕>Ou;
+甘>Kan;
+甚>Shin;
+甜>Ten;
+甞>Shou;
+生>Sei;
+産>San;
+甥>Sei;
+甦>So;
+用>You;
+甫>Ho;
+甬>You;
+田>Den;
+由>Yuu;
+甲>Kou;
+申>Shin;
+男>Dan;
+甸>Ten;
+町>Chou;
+画>Ga;
+甼>Chou;
+畄>Ryuu;
+畆>Ho;
+畉>Fu;
+畊>Kou;
+畋>Ten;
+界>Kai;
+畍>Kai;
+畏>I;
+畔>Han;
+留>Ryuu;
+畚>Hon;
+畛>Shin;
+畜>Chiku;
+畝>Ho;
+畢>Hitsu;
+畤>Shi;
+略>Ryaku;
+畦>Kei;
+畧>Ryaku;
+番>Ban;
+畫>Kaku;
+畭>Yo;
+異>I;
+畳>Jou;
+畴>Chuu;
+當>Tou;
+畷>Tetsu;
+畸>Ki;
+畿>Ki;
+疂>Jou;
+疆>Kyou;
+疇>Chuu;
+疉>Jou;
+疊>Jou;
+疋>So;
+疎>So;
+疏>So;
+疑>Gi;
+疔>Chou;
+疚>Kyuu;
+疝>San;
+疣>Yuu;
+疥>Kai;
+疫>Eki;
+疱>Hou;
+疲>Hi;
+疳>Kan;
+疵>Shi;
+疸>Tan;
+疹>Shin;
+疼>Tou;
+疽>Sho;
+疾>Shitsu;
+痂>Ka;
+痃>Ken;
+病>Byou;
+症>Shou;
+痊>Sen;
+痍>I;
+痒>You;
+痔>Ji;
+痕>Kon;
+痘>Tou;
+痙>Kei;
+痛>Ts;
+痞>Hi;
+痢>Ri;
+痣>Shi;
+痩>Sou;
+痰>Tan;
+痲>Ma;
+痳>Rin;
+痴>Chi;
+痺>Hi;
+痼>Ko;
+痾>A;
+痿>I;
+瘁>Sui;
+瘉>Yu;
+瘋>Fuu;
+瘍>You;
+瘟>On;
+瘠>Seki;
+瘡>Sou;
+瘢>Han;
+瘤>Ryuu;
+瘧>Gyaku;
+瘰>Rui;
+瘴>Shou;
+瘻>Rou;
+療>Ryou;
+癆>Rou;
+癇>Kan;
+癈>Hai;
+癌>Gan;
+癒>Yu;
+癖>Heki;
+癘>Rei;
+癜>Den;
+癡>Chi;
+癢>You;
+癧>Reki;
+癨>Kaku;
+癩>Rai;
+癪>Shaku;
+癬>Sen;
+癰>You;
+癲>Ten;
+癶>Hatsu;
+癸>Ki;
+発>Hotsu;
+登>Tou;
+發>Hotsu;
+白>Haku;
+百>Hyaku;
+皀>Hyuu;
+皃>Bou;
+的>Teki;
+皆>Kai;
+皇>Kou;
+皈>Ki;
+皋>Kou;
+皎>Kou;
+皐>Kou;
+皓>Kou;
+皖>Kan;
+皙>Seki;
+皚>Gai;
+皮>Hi;
+皰>Hou;
+皴>Shun;
+皷>Ko;
+皸>Kun;
+皹>Kun;
+皺>Suu;
+皿>Bai;
+盂>U;
+盃>Hai;
+盆>Bon;
+盈>Ei;
+益>Eki;
+盍>Kou;
+盒>Kou;
+盖>Gai;
+盗>Tou;
+盛>Sei;
+盜>Tou;
+盞>San;
+盟>Mei;
+盡>Jin;
+監>Kan;
+盤>Ban;
+盥>Kan;
+盧>Ro;
+盪>Tou;
+目>Moku;
+盲>Mou;
+直>Choku;
+相>Shou;
+盻>Kei;
+盾>Jun;
+省>Sei;
+眄>Ben;
+眇>Byou;
+眈>Tan;
+眉>Bi;
+看>Kan;
+県>Ken;
+眛>Mai;
+眞>Shin;
+真>Shin;
+眠>Min;
+眤>Tei;
+眥>Sei;
+眦>Sei;
+眩>Gen;
+眷>Ken;
+眸>Bou;
+眺>Chou;
+眼>Gan;
+着>Chaku;
+睇>Tei;
+睚>Gai;
+睛>Sei;
+睡>Sui;
+督>Toku;
+睥>Hei;
+睦>Boku;
+睨>Gei;
+睫>Shou;
+睹>To;
+睾>Kou;
+睿>Ei;
+瞋>Shin;
+瞎>Katsu;
+瞑>Mei;
+瞞>Ban;
+瞠>Dou;
+瞥>Betsu;
+瞬>Shun;
+瞭>Ryou;
+瞰>Kan;
+瞳>Dou;
+瞶>Ki;
+瞹>Ai;
+瞻>Sen;
+瞼>Ken;
+瞽>Ko;
+瞿>Ku;
+矇>Mou;
+矍>Kaku;
+矗>Chiku;
+矚>Shoku;
+矛>Mu;
+矜>Kin;
+矢>Shi;
+矣>I;
+知>Chi;
+矧>Shin;
+矩>Ku;
+短>Tan;
+矮>Wai;
+矯>Kyou;
+石>Shaku;
+矼>Kou;
+砂>Sa;
+砌>Sei;
+砒>Hi;
+研>Ken;
+砕>Sai;
+砠>Sho;
+砥>Shi;
+砦>Sai;
+砧>Chin;
+砲>Hou;
+破>Ha;
+砺>Rei;
+砿>Kou;
+硅>Kei;
+硝>Shou;
+硫>Ryuu;
+硬>Kou;
+硯>Ken;
+硼>Hou;
+碁>Go;
+碆>Ha;
+碇>Tei;
+碌>Roku;
+碍>Gai;
+碎>Sai;
+碑>Hi;
+碓>Tai;
+碕>Ki;
+碗>Wan;
+碚>Hai;
+碣>Ketsu;
+碧>Heki;
+碩>Seki;
+碪>Chin;
+碯>Dou;
+碵>Seki;
+確>Kaku;
+碼>Ba;
+碾>Ten;
+磁>Ji;
+磅>Hou;
+磆>Katsu;
+磊>Rai;
+磋>Sa;
+磐>Han;
+磑>Gai;
+磔>Taku;
+磚>Sen;
+磧>Seki;
+磨>Ma;
+磬>Kei;
+磯>Ki;
+磴>Tou;
+磽>Kou;
+礁>Shou;
+礇>Iku;
+礎>So;
+礑>Tou;
+礒>Gi;
+礙>Gai;
+礦>Kou;
+礪>Rei;
+礫>Reki;
+礬>Ban;
+示>Shi;
+礼>Rei;
+社>Sha;
+祀>Shi;
+祁>Ki;
+祇>Ki;
+祈>Ki;
+祉>Shi;
+祐>Yuu;
+祓>Futsu;
+祕>Hi;
+祖>So;
+祗>Shi;
+祚>So;
+祝>Shuku;
+神>Shin;
+祟>Sui;
+祠>Shi;
+祢>Dei;
+祥>Shou;
+票>Hyou;
+祭>Sai;
+祷>Tou;
+祺>Ki;
+祿>Roku;
+禀>Rin;
+禁>Kin;
+禄>Roku;
+禅>Zen;
+禊>Kei;
+禍>Ka;
+禎>Tei;
+福>Fuku;
+禝>Shoku;
+禦>Gyo;
+禧>Ki;
+禪>Zen;
+禮>Rei;
+禰>Dei;
+禳>Jou;
+禹>U;
+禺>Guu;
+禽>Kin;
+禾>Ka;
+禿>Toku;
+秀>Shuu;
+私>Shi;
+秉>Hei;
+秋>Shuu;
+科>Ka;
+秒>Byou;
+秕>Hi;
+秘>Hi;
+租>So;
+秡>Hatsu;
+秣>Matsu;
+秤>Shou;
+秦>Shin;
+秧>Ou;
+秩>Chitsu;
+秬>Kyo;
+称>Shou;
+移>I;
+稀>Ki;
+稈>Kan;
+程>Tei;
+稍>Sou;
+税>Zei;
+稔>Jin;
+稗>Hai;
+稘>Ki;
+稙>Choku;
+稚>Chi;
+稜>Ryou;
+稟>Rin;
+稠>Chuu;
+種>Shu;
+稱>Shou;
+稲>Tou;
+稷>Shoku;
+稻>Tou;
+稼>Ka;
+稽>Kei;
+稾>Kou;
+稿>Kou;
+穀>Koku;
+穂>Sui;
+穃>You;
+穆>Boku;
+穉>Chi;
+積>Seki;
+穎>Ei;
+穏>On;
+穐>Shuu;
+穗>Sui;
+穡>Shoku;
+穢>Ai;
+穣>Jou;
+穩>On;
+穫>Kaku;
+穰>Jou;
+穴>Ketsu;
+究>Kyuu;
+穹>Kyuu;
+空>Kuu;
+穽>Sei;
+穿>Sen;
+突>Totsu;
+窃>Setsu;
+窄>Saku;
+窈>You;
+窒>Chitsu;
+窓>Sou;
+窕>Chou;
+窖>Kou;
+窗>Sou;
+窘>Kin;
+窟>Kutsu;
+窩>Ka;
+窪>Wa;
+窮>Kyuu;
+窯>You;
+窰>You;
+窶>Ku;
+窺>Ki;
+窿>Ryuu;
+竃>Sou;
+竄>Zan;
+竅>Kyou;
+竇>Tou;
+竈>Sou;
+竊>Setsu;
+立>Ritsu;
+竒>Ki;
+站>Tan;
+竚>Cho;
+竜>Ryuu;
+竝>Hei;
+竟>Kei;
+章>Shou;
+竢>Shi;
+竣>Shun;
+童>Dou;
+竦>Shou;
+竪>Ju;
+竭>Ketsu;
+端>Tan;
+競>Kyou;
+竸>Kei;
+竹>Chiku;
+竺>Toku;
+竿>Kan;
+笄>Kei;
+笆>Ha;
+笈>Kyuu;
+笊>Sou;
+笋>Jun;
+笏>Kotsu;
+笑>Shou;
+笘>Sen;
+笙>Sou;
+笛>Teki;
+笞>Chi;
+笠>Ryuu;
+笥>Shi;
+符>Fu;
+笨>Hon;
+第>Dai;
+笳>Ka;
+笵>Han;
+笶>Shi;
+筅>Sen;
+筆>Hitsu;
+筈>Katsu;
+等>Tou;
+筋>Kin;
+筌>Sen;
+筍>Jun;
+筏>Batsu;
+筐>Kyou;
+筑>Chiku;
+筒>Tou;
+答>Tou;
+策>Saku;
+筝>Sou;
+筥>Kyo;
+筧>Ken;
+筬>Sei;
+筮>Sei;
+筰>Saku;
+筱>Shou;
+筴>Kyou;
+筵>En;
+筺>Kyou;
+箆>Hei;
+箇>Ka;
+箋>Sen;
+箍>Ko;
+箏>Sou;
+箒>Sou;
+箔>Haku;
+箕>Ki;
+算>San;
+箘>Kin;
+箙>Fuku;
+箚>Satsu;
+箜>Kou;
+箝>Kan;
+箟>Kin;
+管>Kan;
+箪>Tan;
+箭>Sen;
+箱>Shou;
+箴>Shin;
+箸>Cho;
+節>Setsu;
+篁>Kou;
+範>Han;
+篆>Ten;
+篇>Hen;
+築>Chiku;
+篋>Kyou;
+篌>Kou;
+篏>Kan;
+篝>Kou;
+篠>Shou;
+篤>Toku;
+篥>Ritsu;
+篦>Hei;
+篩>Shi;
+篭>Rou;
+篳>Hitsu;
+篶>En;
+篷>Hou;
+簀>Saku;
+簇>Sou;
+簍>Rou;
+簑>Sa;
+簒>San;
+簓>Sen;
+簔>Sa;
+簟>Ten;
+簡>Kan;
+簣>Ki;
+簧>Kou;
+簪>Shin;
+簫>Shou;
+簷>En;
+簸>Ha;
+簽>Sen;
+簾>Ren;
+簿>Bo;
+籀>Chuu;
+籃>Ran;
+籌>Chuu;
+籍>Seki;
+籐>Tou;
+籔>Su;
+籖>Sen;
+籘>Tou;
+籟>Rai;
+籠>Rou;
+籤>Sen;
+籥>Yaku;
+籬>Ri;
+米>Bei;
+粃>Hi;
+粉>Fun;
+粋>Sui;
+粐>Ro;
+粒>Ryuu;
+粕>Haku;
+粗>So;
+粘>Nen;
+粛>Shuku;
+粟>Zoku;
+粡>Tou;
+粢>Shi;
+粤>Etsu;
+粥>Shuku;
+粧>Shou;
+粫>Ji;
+粮>Ryou;
+粱>Ryou;
+粲>San;
+粳>Kou;
+粹>Sui;
+粽>Sou;
+精>Sei;
+糂>San;
+糅>Juu;
+糊>Ko;
+糒>Hi;
+糖>Tou;
+糜>Bi;
+糞>Fun;
+糟>Sou;
+糠>Kou;
+糢>Bo;
+糧>Ryou;
+糯>Da;
+糲>Rei;
+糴>Teki;
+糶>Chou;
+糸>Shi;
+糺>Kyuu;
+系>Kei;
+糾>Kyuu;
+紀>Ki;
+紂>Chuu;
+約>Yaku;
+紅>Ku;
+紆>U;
+紊>Bin;
+紋>Mon;
+納>Tou;
+紐>Chuu;
+純>Shun;
+紕>Hi;
+紗>Sa;
+紘>Kou;
+紙>Shi;
+級>Kyuu;
+紛>Fun;
+紜>Un;
+素>So;
+紡>Bou;
+索>Saku;
+紫>Shi;
+紬>Chuu;
+紮>Satsu;
+累>Rui;
+細>Sei;
+紲>Setsu;
+紳>Shin;
+紵>Cho;
+紹>Shou;
+紺>Kon;
+紿>Tai;
+終>Shuu;
+絃>Gen;
+組>So;
+絅>Kei;
+絆>Ban;
+絋>Kou;
+経>Kei;
+絎>Kou;
+絏>Setsu;
+結>Ketsu;
+絖>Kou;
+絛>Jou;
+絞>Kou;
+絡>Raku;
+絢>Ken;
+絣>Hou;
+給>Kyuu;
+絨>Juu;
+絮>Jo;
+統>Tou;
+絲>Shi;
+絳>Kou;
+絵>Kai;
+絶>Zetsu;
+絹>Ken;
+絽>Ryo;
+綉>Tou;
+綏>Sui;
+經>Kei;
+継>Kei;
+続>Zoku;
+綜>Sou;
+綟>Rei;
+綢>Chuu;
+綣>Ken;
+綫>Sen;
+綬>Ju;
+維>I;
+綮>Kei;
+綯>Tou;
+綰>Wan;
+綱>Kou;
+網>Mou;
+綴>Tei;
+綵>Sai;
+綸>Rin;
+綺>Ki;
+綻>Tan;
+綽>Shaku;
+綾>Ryou;
+綿>Men;
+緇>Shi;
+緊>Kin;
+緋>Hi;
+総>Sou;
+緑>Ryoku;
+緒>Sho;
+緘>Kan;
+線>Sen;
+緜>Ben;
+緝>Shuu;
+緞>Tan;
+締>Tei;
+緡>Bin;
+緤>Setsu;
+編>Hen;
+緩>Kan;
+緬>Men;
+緯>I;
+緲>Byou;
+練>Ren;
+緻>Chi;
+縁>En;
+縄>Jou;
+縉>Shin;
+縊>Ei;
+縋>Tsui;
+縒>Shi;
+縛>Baku;
+縞>Kou;
+縟>Joku;
+縡>Sai;
+縢>Tou;
+縣>Ken;
+縦>Juu;
+縫>Hou;
+縮>Shuku;
+縱>Juu;
+縲>Rui;
+縵>Ban;
+縷>Ru;
+縹>Hyou;
+縺>Ren;
+縻>Bi;
+總>Sou;
+績>Seki;
+繁>Han;
+繃>Hou;
+繆>Kyuu;
+繊>Sen;
+繋>Kei;
+繍>Shuu;
+織>Shoku;
+繕>Zen;
+繖>San;
+繙>Han;
+繚>Ryou;
+繝>Kan;
+繞>Jou;
+繦>Kyou;
+繧>Un;
+繩>Jou;
+繪>Kai;
+繭>Ken;
+繰>Sou;
+繹>Eki;
+繻>Ju;
+繼>Kei;
+繽>Hin;
+繿>Ran;
+纂>San;
+纈>Ketsu;
+纉>San;
+續>Zoku;
+纎>Sen;
+纏>Ten;
+纒>Ten;
+纓>Ei;
+纔>San;
+纖>Sen;
+纛>Tou;
+纜>Ran;
+缶>Kan;
+缸>Kou;
+缺>Ketsu;
+罅>Ka;
+罌>Ou;
+罍>Rai;
+罎>Tan;
+罐>Kan;
+网>Bou;
+罔>Bou;
+罕>Kan;
+罘>Fu;
+罟>Ko;
+罠>Bin;
+罧>Shin;
+罨>An;
+罩>Tou;
+罪>Zai;
+罫>Kei;
+置>Chi;
+罰>Batsu;
+署>Sho;
+罵>Ba;
+罷>Hi;
+罸>Batsu;
+罹>Ri;
+羂>Ken;
+羃>Beki;
+羅>Ra;
+羆>Hi;
+羇>Ki;
+羈>Ki;
+羊>You;
+羌>Kyou;
+美>Bi;
+羔>Kou;
+羚>Rei;
+羝>Tei;
+羞>Shuu;
+羣>Gun;
+群>Gun;
+羨>Sen;
+義>Gi;
+羮>Kou;
+羯>Katsu;
+羲>Gi;
+羶>Sen;
+羸>Rui;
+羹>Kou;
+羽>U;
+翁>Ou;
+翅>Shi;
+翆>Sui;
+翊>Yoku;
+翌>Yoku;
+習>Shuu;
+翔>Shou;
+翕>Kyuu;
+翠>Sui;
+翡>Hi;
+翦>Sen;
+翩>Hen;
+翫>Gan;
+翰>Kan;
+翳>Ei;
+翹>Gyou;
+翻>Hon;
+翼>Yoku;
+耀>You;
+老>Rou;
+考>Kou;
+耄>Mou;
+者>Sha;
+耆>Ki;
+耋>Tetsu;
+而>Ji;
+耐>Tai;
+耒>Rai;
+耕>Kou;
+耗>Mou;
+耘>Un;
+耙>Ha;
+耜>Shi;
+耡>Jo;
+耨>Dou;
+耳>Ji;
+耶>Ya;
+耻>Chi;
+耽>Tan;
+耿>Kou;
+聆>Rei;
+聊>Ryou;
+聒>Katsu;
+聖>Sei;
+聘>Hei;
+聚>Shuu;
+聞>Bun;
+聟>Sei;
+聡>Sou;
+聨>Ren;
+聯>Ren;
+聰>Sou;
+聲>Sei;
+聳>Shou;
+聴>Chou;
+聶>Jou;
+職>Shoku;
+聹>Dei;
+聽>Chou;
+聾>Rou;
+聿>Itsu;
+肄>I;
+肅>Shuku;
+肆>Shi;
+肇>Chou;
+肉>Niku;
+肋>Roku;
+肌>Ki;
+肓>Kou;
+肖>Shou;
+肘>Chuu;
+肚>To;
+肛>Kou;
+肝>Kan;
+股>Ko;
+肢>Shi;
+肥>Hi;
+肩>Ken;
+肪>Bou;
+肬>Yuu;
+肭>Dotsu;
+肯>Kou;
+肱>Kou;
+育>Iku;
+肴>Kou;
+肺>Hai;
+胃>I;
+胄>Chuu;
+胆>Tan;
+背>Hai;
+胎>Tai;
+胖>Han;
+胙>So;
+胚>Hai;
+胛>Kou;
+胝>Chi;
+胞>Hou;
+胡>Ko;
+胤>In;
+胥>Sho;
+胯>Ko;
+胱>Kou;
+胴>Dou;
+胸>Kyou;
+胼>Hen;
+能>Nou;
+脂>Shi;
+脅>Kyou;
+脆>Zei;
+脇>Kyou;
+脈>Myaku;
+脉>Myaku;
+脊>Seki;
+脚>Kyaku;
+脛>Kei;
+脣>Shin;
+脩>Shuu;
+脯>Ho;
+脱>Datsu;
+脳>Nou;
+脹>Chou;
+脾>Hi;
+腆>Ten;
+腋>Eki;
+腎>Jin;
+腐>Fu;
+腑>Fu;
+腓>Hi;
+腔>Kou;
+腕>Wan;
+腟>Chitsu;
+腥>Sei;
+腦>Nou;
+腫>Shou;
+腮>Sai;
+腰>You;
+腱>Ken;
+腴>Yu;
+腸>Chou;
+腹>Fuku;
+腺>Sen;
+腿>Tai;
+膀>Hou;
+膂>Ryo;
+膃>Otsu;
+膈>Kaku;
+膊>Haku;
+膏>Kou;
+膓>Chou;
+膕>Kaku;
+膚>Fu;
+膜>Maku;
+膝>Shitsu;
+膠>Kou;
+膣>Chitsu;
+膨>Bou;
+膩>Ji;
+膰>Han;
+膳>Sen;
+膵>Sui;
+膸>Zui;
+膺>You;
+膽>Tan;
+膾>Kai;
+膿>Dou;
+臀>Den;
+臂>Hi;
+臆>Oku;
+臈>Rou;
+臉>Ren;
+臍>Sei;
+臑>Dau;
+臓>Zou;
+臘>Rou;
+臙>En;
+臚>Ryo;
+臟>Zou;
+臠>Ren;
+臣>Shin;
+臥>Ga;
+臧>Zou;
+臨>Rin;
+自>Shi;
+臭>Shuu;
+至>Shi;
+致>Chi;
+臺>Tai;
+臻>Shin;
+臼>Kyuu;
+臾>Yu;
+舁>Yo;
+舂>Shou;
+舅>Kyuu;
+與>Yo;
+興>Kyou;
+舉>Kyo;
+舊>Kyuu;
+舌>Zetsu;
+舍>Sha;
+舎>Sha;
+舐>Shi;
+舒>Jo;
+舖>Ho;
+舗>Ho;
+舘>Kan;
+舛>Sen;
+舜>Shun;
+舞>Bu;
+舟>Shuu;
+舩>Sen;
+航>Kou;
+舫>Hou;
+般>Han;
+舮>Ro;
+舳>Chiku;
+舵>Ta;
+舶>Haku;
+舷>Ken;
+舸>Ka;
+船>Sen;
+艀>Fu;
+艇>Tei;
+艘>Sou;
+艙>Sou;
+艚>Sou;
+艟>Dou;
+艢>Shou;
+艤>Gi;
+艦>Kan;
+艨>Mou;
+艪>Ro;
+艫>Ro;
+艮>Kon;
+良>Ryou;
+艱>Kan;
+色>Shoku;
+艶>En;
+艷>En;
+艸>Sou;
+艾>Gai;
+芋>U;
+芍>Shaku;
+芒>Bou;
+芙>Fu;
+芝>Shi;
+芟>San;
+芥>Kai;
+芦>Ro;
+芫>Gen;
+芬>Fun;
+芭>Ba;
+芯>Shin;
+花>Ka;
+芳>Hou;
+芸>Gei;
+芹>Kin;
+芻>Suu;
+芽>Ga;
+苅>Gai;
+苑>En;
+苒>Zen;
+苓>Rei;
+苔>Tai;
+苗>Byou;
+苙>Ryuu;
+苛>Ka;
+苜>Boku;
+苞>Hou;
+苟>Kou;
+苡>I;
+苣>Kyo;
+若>Jaku;
+苦>Ku;
+苧>Cho;
+苫>Sen;
+英>Ei;
+苳>Tou;
+苴>So;
+苹>Hei;
+苺>Mai;
+苻>Fu;
+茂>Mo;
+范>Han;
+茄>Ka;
+茅>Bou;
+茆>Bou;
+茉>Matsu;
+茎>Kei;
+茖>Kaku;
+茗>Mei;
+茘>Ri;
+茜>Sen;
+茣>Go;
+茨>Shi;
+茫>Bou;
+茯>Fuku;
+茱>Shu;
+茲>Ji;
+茴>Kai;
+茵>In;
+茶>Cha;
+茸>Jou;
+茹>Jo;
+荀>Jun;
+荅>Tou;
+草>Sou;
+荊>Kei;
+荏>Jin;
+荐>Sen;
+荒>Kou;
+荘>Sou;
+荳>Tou;
+荵>Jin;
+荷>Ka;
+荻>Teki;
+荼>To;
+莅>Ri;
+莇>Cho;
+莉>Chi;
+莊>Sou;
+莎>Sa;
+莓>Mai;
+莖>Kei;
+莚>En;
+莞>Kan;
+莟>Kan;
+莠>Yuu;
+莢>Kyou;
+莨>Rou;
+莪>Ga;
+莫>Bo;
+莱>Rai;
+莵>To;
+莽>Bou;
+菁>Sei;
+菅>Kan;
+菊>Kiku;
+菌>Kin;
+菎>Kon;
+菓>Ka;
+菖>Shou;
+菘>Suu;
+菜>Sai;
+菟>To;
+菠>Ha;
+菩>Hai;
+菫>Kin;
+華>Ka;
+菰>Ko;
+菱>Ryou;
+菲>Hi;
+菴>An;
+菷>Sou;
+菻>Rin;
+菽>Shuku;
+萃>Sui;
+萄>Tou;
+萇>Chou;
+萋>Sei;
+萌>Hou;
+萍>Hei;
+萎>I;
+萓>Gi;
+萠>Hou;
+萩>Shuu;
+萪>Kuwa;
+萬>Man;
+萱>Ken;
+萵>Wa;
+萸>Yu;
+萼>Gaku;
+落>Raku;
+葆>Ho;
+葉>You;
+葎>Ritsu;
+著>Cho;
+葛>Katsu;
+葡>Ho;
+葢>Gai;
+董>Tou;
+葦>I;
+葩>Ha;
+葫>Ko;
+葬>Sou;
+葭>Ka;
+葮>Tan;
+葯>Yaku;
+葱>Sou;
+葵>Ki;
+葷>Gun;
+葹>Shi;
+葺>Shuu;
+蒂>Tei;
+蒄>Kan;
+蒋>Shou;
+蒐>Shuu;
+蒔>Shi;
+蒙>Bou;
+蒜>San;
+蒟>Kon;
+蒡>Hou;
+蒭>Suu;
+蒲>Ho;
+蒸>Jou;
+蒹>Ken;
+蒻>Jaku;
+蒼>Sou;
+蒿>Kou;
+蓁>Shin;
+蓄>Chiku;
+蓆>Seki;
+蓉>You;
+蓊>Ou;
+蓋>Gai;
+蓍>Shi;
+蓐>Joku;
+蓑>Sa;
+蓖>Hi;
+蓚>Chou;
+蓬>Hou;
+蓮>Ren;
+蓴>Shun;
+蓼>Ryou;
+蓿>Shuku;
+蔀>Hou;
+蔆>Ryou;
+蔑>Betsu;
+蔓>Ban;
+蔔>Fuku;
+蔕>Tei;
+蔗>Sho;
+蔘>Shin;
+蔚>Utsu;
+蔟>Zoku;
+蔡>Sai;
+蔦>Chou;
+蔬>So;
+蔭>In;
+蔵>Zou;
+蔽>Hei;
+蕀>Kyoku;
+蕁>Jin;
+蕃>Ban;
+蕈>Shin;
+蕉>Shou;
+蕊>Zui;
+蕋>Zui;
+蕎>Kyou;
+蕕>Yuu;
+蕗>Ro;
+蕘>Jou;
+蕚>Gaku;
+蕣>Shun;
+蕨>Ketsu;
+蕩>Tou;
+蕪>Bu;
+蕭>Shou;
+蕷>Yo;
+蕾>Rai;
+薀>Un;
+薄>Haku;
+薇>Bi;
+薈>Kai;
+薊>Kei;
+薐>Rou;
+薑>Kyou;
+薔>Shoku;
+薗>En;
+薙>Tei;
+薛>Setsu;
+薜>Heki;
+薤>Kai;
+薦>Sen;
+薨>Kou;
+薩>Satsu;
+薪>Shin;
+薫>Kun;
+薬>Yaku;
+薮>Sou;
+薯>Sho;
+薹>Tai;
+薺>Sei;
+藁>Kou;
+藉>Sha;
+藍>Ran;
+藏>Zou;
+藐>Baku;
+藕>Guu;
+藜>Rei;
+藝>Gei;
+藤>Tou;
+藥>Yaku;
+藩>Han;
+藪>Sou;
+藷>Sho;
+藹>Ai;
+藺>Rin;
+藻>Sou;
+藾>Rai;
+蘂>Zui;
+蘆>Ro;
+蘇>So;
+蘊>Un;
+蘋>Hin;
+蘓>So;
+蘖>Getsu;
+蘗>Haku;
+蘚>Sen;
+蘢>Rou;
+蘭>Ran;
+蘯>Tou;
+蘿>Ra;
+虍>Ko;
+虎>Ko;
+虐>Gyaku;
+虔>Ken;
+處>Sho;
+虚>Kyo;
+虜>Ryo;
+虞>Gu;
+號>Gou;
+虧>Ki;
+虫>Chuu;
+虱>Shitsu;
+虹>Kou;
+虻>Bou;
+蚊>Bun;
+蚋>Zei;
+蚌>Hou;
+蚓>In;
+蚕>San;
+蚣>Kou;
+蚤>Sou;
+蚩>Shi;
+蚪>Tou;
+蚫>Hou;
+蚯>Kyuu;
+蚰>Yuu;
+蚶>Kan;
+蛄>Ko;
+蛆>Sho;
+蛇>Ja;
+蛉>Rei;
+蛋>Tan;
+蛍>Kei;
+蛎>Rei;
+蛔>Kai;
+蛙>A;
+蛛>Shu;
+蛞>Katsu;
+蛟>Kou;
+蛤>Kou;
+蛩>Kyou;
+蛬>Kyou;
+蛭>Shitsu;
+蛮>Ban;
+蛸>Sou;
+蛹>You;
+蛻>Zei;
+蛾>Ga;
+蜀>Shoku;
+蜂>Hou;
+蜃>Shin;
+蜆>Ken;
+蜈>Go;
+蜉>Fu;
+蜊>Ri;
+蜍>Sho;
+蜑>Tan;
+蜒>En;
+蜘>Chi;
+蜚>Hi;
+蜜>Mitsu;
+蜥>Seki;
+蜩>Chou;
+蜴>Eki;
+蜷>Ken;
+蜻>Sei;
+蜿>En;
+蝉>Sen;
+蝋>Rou;
+蝌>Ka;
+蝎>Katsu;
+蝓>Yu;
+蝕>Shoku;
+蝗>Kou;
+蝙>Hen;
+蝟>I;
+蝠>Fuku;
+蝣>Yuu;
+蝦>Ka;
+蝨>Shitsu;
+蝪>Tou;
+蝮>Fuku;
+蝴>Ko;
+蝶>Chou;
+蝸>Ka;
+蝿>You;
+螂>Rou;
+融>Yuu;
+螟>Mei;
+螢>Kei;
+螫>Seki;
+螯>Gou;
+螳>Tou;
+螺>Ra;
+螻>Rou;
+螽>Shuu;
+蟀>Shutsu;
+蟄>Chitsu;
+蟆>Ba;
+蟇>Ba;
+蟋>Shitsu;
+蟐>Tou;
+蟒>Bou;
+蟠>Han;
+蟯>Gyou;
+蟲>Ki;
+蟶>Tei;
+蟷>Tou;
+蟹>Kai;
+蟻>Gi;
+蟾>Sen;
+蠅>You;
+蠍>Katsu;
+蠎>Bou;
+蠏>Kai;
+蠑>Ei;
+蠕>Da;
+蠖>Kaku;
+蠡>Rei;
+蠢>Shun;
+蠣>Rei;
+蠧>To;
+蠱>Ko;
+蠶>San;
+蠹>To;
+蠻>Ban;
+血>Ketsu;
+衂>Jiku;
+衄>Jiku;
+衆>Shuu;
+行>Kou;
+衍>En;
+衒>Ken;
+術>Jutsu;
+街>Gai;
+衙>Gyo;
+衛>Ei;
+衝>Shou;
+衞>Ei;
+衡>Kou;
+衢>Ku;
+衣>I;
+表>Hyou;
+衫>San;
+衰>Sa;
+衲>Dou;
+衵>Jitsu;
+衷>Chuu;
+衽>Jin;
+衾>Kin;
+衿>Kin;
+袁>En;
+袂>Bei;
+袈>Ka;
+袋>Tai;
+袍>Hou;
+袒>Tan;
+袖>Shuu;
+袗>Shin;
+袙>Ha;
+袞>Kon;
+袢>Han;
+袤>Bou;
+被>Hi;
+袮>Ne;
+袱>Fuku;
+袴>Ko;
+袵>Jin;
+袷>Kou;
+袿>Kei;
+裁>Sai;
+裂>Retsu;
+装>Sou;
+裏>Ri;
+裔>Ei;
+裕>Yuu;
+裘>Kyuu;
+裙>Kun;
+補>Ho;
+裝>Sou;
+裟>Sa;
+裡>Ri;
+裨>Hi;
+裲>Ryou;
+裳>Shou;
+裴>Hai;
+裸>Ra;
+裹>Ka;
+裼>Seki;
+製>Sei;
+裾>Kyo;
+褂>Kai;
+複>Fuku;
+褊>Hen;
+褌>Kon;
+褐>Katsu;
+褒>Hou;
+褓>Ho;
+褝>Tan;
+褞>On;
+褥>Joku;
+褪>Tai;
+褫>Chi;
+褶>Chou;
+褸>Rou;
+褻>Setsu;
+襁>Kyou;
+襃>Hou;
+襄>Jou;
+襌>Tan;
+襍>Zatsu;
+襖>Ou;
+襞>Heki;
+襟>Kin;
+襠>Tou;
+襤>Ran;
+襦>Ju;
+襪>Betsu;
+襭>Ketsu;
+襯>Shin;
+襲>Shuu;
+襴>Ran;
+襾>Aka;
+西>Sei;
+要>You;
+覃>Tan;
+覆>Fuku;
+覇>Ha;
+覈>Kaku;
+覊>Ki;
+見>Ken;
+規>Ki;
+覓>Beki;
+視>Shi;
+覗>Shi;
+覘>Ten;
+覚>Kaku;
+覡>Geki;
+覦>Yu;
+覧>Ran;
+覩>To;
+親>Shin;
+覬>Ki;
+覯>Kou;
+覲>Kin;
+観>Kan;
+覺>Kaku;
+覽>Ran;
+覿>Teki;
+觀>Kan;
+角>Kaku;
+觚>Ko;
+觜>Shi;
+觝>Tei;
+解>Kai;
+触>Shoku;
+觧>Kai;
+觴>Shou;
+觸>Shoku;
+言>Gen;
+訂>Tei;
+訃>Fu;
+計>Kei;
+訊>Jin;
+訌>Kou;
+討>Tou;
+訐>Ketsu;
+訓>Kun;
+訖>Kitsu;
+託>Taku;
+記>Ki;
+訛>Ka;
+訝>Ga;
+訟>Shou;
+訣>Ketsu;
+訥>Totsu;
+訪>Hou;
+設>Setsu;
+許>Kyo;
+訳>Yaku;
+訴>So;
+訶>Ka;
+診>Shin;
+註>Chuu;
+証>Shou;
+詁>Ko;
+詆>Tei;
+詈>Ri;
+詐>Sa;
+詑>Ta;
+詒>Tai;
+詔>Shou;
+評>Hyou;
+詛>So;
+詞>Shi;
+詠>Ei;
+詢>Jun;
+詣>Kei;
+試>Shi;
+詩>Shi;
+詫>Ta;
+詬>Kou;
+詭>Ki;
+詮>Sen;
+詰>Kitsu;
+話>Wa;
+該>Gai;
+詳>Shou;
+詼>Kai;
+誂>Chou;
+誄>Rui;
+誅>Chuu;
+誇>Ko;
+誉>Yo;
+誌>Shi;
+認>Nin;
+誑>Kyou;
+誓>Sei;
+誕>Tan;
+誘>Yuu;
+誚>Shou;
+語>Go;
+誠>Sei;
+誡>Kai;
+誣>Fu;
+誤>Go;
+誥>Kou;
+誦>Shou;
+誨>Kai;
+説>Setsu;
+読>Toku;
+誰>Sui;
+課>Ka;
+誹>Hi;
+誼>Gi;
+調>Chou;
+諂>Ten;
+諄>Jun;
+談>Dan;
+請>Sei;
+諌>Kan;
+諍>Sou;
+諏>Shu;
+諒>Ryou;
+論>Ron;
+諚>Jou;
+諛>Yu;
+諜>Chou;
+諞>Hen;
+諠>Ken;
+諡>Shi;
+諢>Kon;
+諤>Gaku;
+諦>Tei;
+諧>Kai;
+諫>Kan;
+諭>Yu;
+諮>Shi;
+諱>Ki;
+諳>An;
+諷>Fuu;
+諸>Sho;
+諺>Gen;
+諾>Daku;
+謀>Bou;
+謁>Etsu;
+謂>I;
+謄>Tou;
+謇>Ken;
+謌>Ka;
+謎>Mei;
+謐>Hitsu;
+謔>Gyaku;
+謖>Shoku;
+謗>Bou;
+謙>Ken;
+謚>Shi;
+講>Kou;
+謝>Sha;
+謠>You;
+謡>You;
+謦>Kei;
+謨>Bo;
+謫>Taku;
+謬>Byou;
+謳>Ou;
+謹>Kin;
+謾>Ban;
+譁>Ka;
+證>Shou;
+譌>Ka;
+譎>Kitsu;
+譏>Ki;
+譖>Shin;
+識>Shiki;
+譚>Tan;
+譛>Shin;
+譜>Fu;
+譟>Sou;
+警>Kei;
+譫>Sen;
+譬>Hi;
+譯>Yaku;
+議>Gi;
+譱>Zen;
+譲>Jou;
+譴>Ken;
+護>Go;
+譽>Yo;
+讀>Toku;
+讃>San;
+變>Hen;
+讌>En;
+讎>Shuu;
+讐>Shuu;
+讒>San;
+讓>Jou;
+讖>Shin;
+讙>Kan;
+讚>San;
+谷>Koku;
+谺>Ka;
+谿>Kei;
+豁>Katsu;
+豆>Tou;
+豈>Ki;
+豊>Hou;
+豌>En;
+豎>Ju;
+豐>Hou;
+豕>Shi;
+豚>Ton;
+象>Shou;
+豢>Ken;
+豪>Gou;
+豫>Yo;
+豬>Cho;
+豸>Chi;
+豹>Hou;
+豺>Sai;
+豼>Hi;
+貂>Chou;
+貅>Kyuu;
+貉>Kaku;
+貊>Haku;
+貌>Bou;
+貍>Ri;
+貎>Gei;
+貔>Hi;
+貘>Baku;
+貝>Bai;
+貞>Tei;
+負>Fu;
+財>Zai;
+貢>Kou;
+貧>Hin;
+貨>Ka;
+販>Han;
+貪>Don;
+貫>Kan;
+責>Seki;
+貭>Shitsu;
+貮>Ni;
+貯>Cho;
+貰>Sei;
+貲>Shi;
+貳>Ni;
+貴>Ki;
+貶>Hen;
+買>Bai;
+貸>Tai;
+費>Hi;
+貼>Ten;
+貽>I;
+貿>Bou;
+賀>Ga;
+賁>Hi;
+賂>Ro;
+賃>Chin;
+賄>Wai;
+資>Shi;
+賈>Ko;
+賊>Zoku;
+賍>Sou;
+賎>Sen;
+賑>Shin;
+賓>Hin;
+賚>Rai;
+賛>San;
+賜>Shi;
+賞>Shou;
+賠>Bai;
+賢>Ken;
+賣>Bai;
+賤>Sen;
+賦>Fu;
+質>Shitsu;
+賭>To;
+賺>Tan;
+賻>Fu;
+購>Kou;
+賽>Sai;
+贄>Shi;
+贅>Zei;
+贇>In;
+贈>Zou;
+贊>San;
+贋>Gan;
+贍>Sen;
+贏>Ei;
+贐>Shin;
+贓>Zou;
+贔>Hi;
+贖>Shoku;
+赤>Seki;
+赦>Sha;
+赧>Tan;
+赫>Kaku;
+赭>Sha;
+走>Sou;
+赱>Sou;
+赳>Kyuu;
+赴>Fu;
+起>Ki;
+趁>Chin;
+超>Chou;
+越>Etsu;
+趙>Chou;
+趣>Shu;
+趨>Suu;
+足>Soku;
+趺>Fu;
+趾>Shi;
+跂>Ki;
+跋>Batsu;
+跌>Tetsu;
+跏>Ka;
+跖>Seki;
+跚>San;
+跛>Ha;
+距>Kyo;
+跟>Kon;
+跡>Seki;
+跣>Sen;
+跨>Ko;
+跪>Ki;
+跫>Kyou;
+路>Ro;
+跳>Chou;
+践>Sen;
+跼>Kyoku;
+跿>To;
+踈>Sho;
+踉>Ryou;
+踊>You;
+踏>Tou;
+踐>Sen;
+踝>Ka;
+踞>Kyo;
+踟>Chi;
+踪>Shou;
+踰>Yu;
+踴>You;
+踵>Shou;
+蹂>Juu;
+蹄>Tei;
+蹇>Ken;
+蹈>Tou;
+蹉>Sa;
+蹊>Kei;
+蹌>Shou;
+蹐>Seki;
+蹕>Hitsu;
+蹙>Shuku;
+蹟>Seki;
+蹠>Seki;
+蹣>Man;
+蹤>Shou;
+蹲>Son;
+蹴>Shuu;
+蹶>Ketsu;
+蹼>Boku;
+躁>Sou;
+躄>Heki;
+躅>Choku;
+躇>Cho;
+躊>Chuu;
+躋>Sei;
+躍>Yaku;
+躑>Teki;
+躓>Chi;
+躔>Ten;
+躙>Rin;
+躡>Jou;
+躪>Rin;
+身>Shin;
+躬>Kyuu;
+躯>Ku;
+躰>Tei;
+躱>Ta;
+軆>Tei;
+車>Sha;
+軋>Atsu;
+軌>Ki;
+軍>Gun;
+軒>Ken;
+軛>Aku;
+軟>Nan;
+転>Ten;
+軣>Gou;
+軫>Shin;
+軸>Jiku;
+軻>Ka;
+軼>Itsu;
+軽>Kei;
+軾>Shoku;
+較>Kaku;
+輅>Ro;
+載>Sai;
+輊>Chi;
+輌>Ryou;
+輒>Chou;
+輓>Ban;
+輔>Fu;
+輕>Kei;
+輙>Chou;
+輛>Ryou;
+輜>Shi;
+輝>Ki;
+輟>Tetsu;
+輦>Ren;
+輩>Hai;
+輪>Rin;
+輯>Shuu;
+輳>Sou;
+輸>Yu;
+輹>Fuku;
+輻>Fuku;
+輾>Ten;
+輿>Yo;
+轂>Koku;
+轄>Katsu;
+轅>En;
+轆>Roku;
+轉>Ten;
+轍>Tetsu;
+轎>Kyou;
+轗>Kan;
+轜>Ji;
+轟>Gou;
+轡>Hi;
+轢>Reki;
+轣>Reki;
+轤>Ro;
+辛>Shin;
+辜>Ko;
+辞>Ji;
+辟>Heki;
+辣>Ratsu;
+辧>Ben;
+辨>Ben;
+辭>Ji;
+辮>Hen;
+辯>Ben;
+辰>Shin;
+辱>Joku;
+農>Nou;
+辺>Hen;
+辿>Ten;
+迂>U;
+迄>Kitsu;
+迅>Jin;
+迎>Gei;
+近>Kin;
+返>Hen;
+迢>Chou;
+迥>Kei;
+迦>Ka;
+迩>Ji;
+迪>Teki;
+迫>Haku;
+迭>Tetsu;
+迯>Tou;
+述>Jutsu;
+迴>Kai;
+迷>Mei;
+迸>Hou;
+迹>Seki;
+迺>Dai;
+追>Tsui;
+退>Tai;
+送>Sou;
+逃>Tou;
+逅>Kou;
+逆>Gyaku;
+逋>Ho;
+逍>Shou;
+逎>Shuu;
+透>Tou;
+逐>Chiku;
+逑>Kyuu;
+逓>Tei;
+途>To;
+逕>Kei;
+逖>Teki;
+逗>Tou;
+這>Gen;
+通>Ts;
+逝>Sei;
+逞>Tei;
+速>Soku;
+造>Zou;
+逡>Shun;
+逢>Hou;
+連>Ren;
+逮>Tai;
+週>Shuu;
+進>Shin;
+逵>Ki;
+逶>I;
+逸>Itsu;
+逹>Tatsu;
+逼>Hitsu;
+逾>Yu;
+遁>Ton;
+遂>Sui;
+遅>Chi;
+遇>Guu;
+遉>Tei;
+遊>Yuu;
+運>Un;
+遍>Hen;
+過>Ka;
+遏>Atsu;
+遐>Ka;
+遑>Kou;
+遒>Shuu;
+道>Dou;
+達>Tatsu;
+違>I;
+遘>Kou;
+遙>You;
+遜>Son;
+遞>Tei;
+遠>En;
+遡>So;
+遣>Ken;
+遥>You;
+遨>Gou;
+適>Teki;
+遭>Sou;
+遮>Sha;
+遯>Ton;
+遲>Chi;
+遵>Jun;
+遶>Jou;
+遷>Sen;
+選>Sen;
+遺>I;
+遼>Ryou;
+遽>Kyo;
+避>Hi;
+邀>You;
+邁>Bai;
+邂>Kai;
+邃>Sui;
+還>Kan;
+邇>Ji;
+邉>Hen;
+邊>Hen;
+邏>Ra;
+邑>Yuu;
+那>Da;
+邦>Hou;
+邨>Son;
+邪>Ya;
+邯>Kan;
+邱>Kyuu;
+邵>Shou;
+邸>Tei;
+郁>Iku;
+郊>Kou;
+郎>Rou;
+郛>Fu;
+郡>Gun;
+郢>Ei;
+郤>Geki;
+部>Bu;
+郭>Kaku;
+郵>Yuu;
+郷>Kyou;
+都>To;
+鄂>Gaku;
+鄒>Suu;
+鄙>Hi;
+鄭>Tei;
+鄰>Rin;
+鄲>Tan;
+酉>Yuu;
+酊>Tei;
+酋>Shuu;
+酌>Shaku;
+配>Hai;
+酎>Chuu;
+酒>Shu;
+酔>Sui;
+酖>Tan;
+酘>Tou;
+酢>Saku;
+酣>Kan;
+酥>So;
+酩>Mei;
+酪>Raku;
+酬>Shuu;
+酲>Tei;
+酳>In;
+酵>Kou;
+酷>Koku;
+酸>San;
+醂>Rin;
+醇>Shun;
+醉>Sui;
+醋>Saku;
+醍>Tei;
+醐>Ko;
+醒>Sei;
+醗>Hatsu;
+醜>Shuu;
+醢>Kai;
+醤>Shou;
+醪>Rou;
+醫>I;
+醯>Kei;
+醴>Rei;
+醵>Kyo;
+醸>Jou;
+醺>Kun;
+釀>Jou;
+釁>Kin;
+釆>Han;
+采>Sai;
+釈>Shaku;
+釉>Yuu;
+釋>Shaku;
+里>Ri;
+重>Chou;
+野>Ya;
+量>Ryou;
+釐>Ri;
+金>Kin;
+釖>Tou;
+釘>Tei;
+釛>Koku;
+釜>Fu;
+針>Shin;
+釟>Hatsu;
+釡>Fu;
+釣>Chou;
+釦>Kou;
+釧>Sen;
+釵>Sa;
+釶>Shi;
+釼>Ken;
+釿>Kin;
+鈍>Don;
+鈎>Kou;
+鈑>Han;
+鈔>Shou;
+鈕>Chuu;
+鈞>Kin;
+鈩>Ro;
+鈬>Taku;
+鈴>Rei;
+鈷>Ko;
+鈿>Ten;
+鉄>Tetsu;
+鉅>Kyo;
+鉈>Sha;
+鉉>Ken;
+鉋>Hou;
+鉐>Seki;
+鉗>Kan;
+鉚>Ryuu;
+鉛>En;
+鉞>Etsu;
+鉢>Hachi;
+鉤>Kou;
+鉦>Sei;
+鉱>Kou;
+鉾>Bou;
+銀>Gin;
+銃>Juu;
+銅>Dou;
+銑>Sen;
+銓>Sen;
+銕>Tetsu;
+銖>Shu;
+銘>Mei;
+銚>You;
+銛>Sen;
+銜>Kan;
+銭>Sen;
+銷>Shou;
+銹>Shuu;
+鋏>Kyou;
+鋒>Hou;
+鋤>Jo;
+鋩>Bou;
+鋪>Ho;
+鋭>Ei;
+鋳>Chuu;
+鋸>Kyo;
+鋺>En;
+鋼>Kou;
+錆>Sei;
+錏>A;
+錐>Sui;
+錘>Tsui;
+錙>Shi;
+錚>Sou;
+錠>Jou;
+錢>Sen;
+錣>Tei;
+錦>Kin;
+錨>Byou;
+錫>Seki;
+錬>Ren;
+錮>Ko;
+錯>Saku;
+録>Roku;
+錻>Bu;
+鍄>Kei;
+鍋>Ka;
+鍍>To;
+鍔>Gaku;
+鍖>Chin;
+鍛>Tan;
+鍜>Ka;
+鍠>Kou;
+鍬>Shuu;
+鍮>Chuu;
+鍵>Ken;
+鍼>Shin;
+鍾>Shou;
+鎌>Ren;
+鎔>You;
+鎖>Sa;
+鎗>Sou;
+鎚>Tsui;
+鎧>Gai;
+鎬>Kou;
+鎭>Chin;
+鎮>Chin;
+鎰>Itsu;
+鏃>Zoku;
+鏈>Ren;
+鏐>Ryuu;
+鏑>Teki;
+鏖>Ou;
+鏗>Kou;
+鏘>Shou;
+鏝>Man;
+鏡>Kyou;
+鏤>Rou;
+鏥>Shuu;
+鏨>San;
+鐃>Dou;
+鐇>Han;
+鐐>Ryou;
+鐓>Tai;
+鐔>Shin;
+鐘>Shou;
+鐙>Tou;
+鐚>A;
+鐡>Tetsu;
+鐫>Sen;
+鐵>Tetsu;
+鐶>Kan;
+鐸>Taku;
+鐺>Tou;
+鑁>Ban;
+鑄>Chuu;
+鑑>Kan;
+鑒>Kan;
+鑚>San;
+鑛>Kou;
+鑞>Rou;
+鑠>Shaku;
+鑢>Ryo;
+鑪>Ro;
+鑰>Yaku;
+鑵>Kan;
+鑷>Jou;
+鑼>Ra;
+鑽>San;
+鑾>Ran;
+鑿>Saku;
+钁>Kaku;
+長>Chou;
+門>Mon;
+閂>San;
+閃>Sen;
+閇>Hei;
+閉>Hei;
+開>Kai;
+閏>Jun;
+閑>Kan;
+間>Kan;
+閔>Bin;
+閘>Kou;
+閙>Tou;
+関>Kan;
+閣>Kaku;
+閤>Kou;
+閥>Batsu;
+閧>Kou;
+閨>Kei;
+閭>Ro;
+閲>Etsu;
+閹>En;
+閻>En;
+閼>A;
+閾>Yoku;
+闃>Geki;
+闇>An;
+闊>Katsu;
+闌>Ran;
+闍>To;
+闔>Kou;
+闕>Ketsu;
+闖>Chin;
+闘>Tou;
+關>Kan;
+闡>Sen;
+闢>Heki;
+闥>Tatsu;
+阜>Fu;
+阡>Sen;
+阨>Aku;
+阪>Han;
+阮>Gen;
+阯>Shi;
+防>Bou;
+阻>So;
+阿>A;
+陀>Da;
+陂>Ha;
+附>Fu;
+陋>Rou;
+陌>Haku;
+降>Kou;
+陏>Ta;
+限>Gen;
+陛>Hei;
+陜>Kou;
+陝>Sen;
+陞>Shou;
+陟>Choku;
+院>In;
+陣>Jin;
+除>Jo;
+陥>Kan;
+陦>Tou;
+陪>Bai;
+陬>Suu;
+陰>In;
+陲>Sui;
+陳>Chin;
+陵>Ryou;
+陶>Tou;
+陷>Kan;
+陸>Riku;
+険>Ken;
+陽>You;
+隅>Guu;
+隆>Ryuu;
+隈>Wai;
+隊>Tai;
+隋>Ta;
+隍>Kou;
+階>Kai;
+随>Zui;
+隔>Kaku;
+隕>In;
+隗>Kai;
+隘>Ai;
+隙>Geki;
+際>Sai;
+障>Shou;
+隠>In;
+隣>Rin;
+隧>Sui;
+隨>Zui;
+險>Ken;
+隰>Shitsu;
+隱>In;
+隲>Shitsu;
+隴>Rou;
+隶>Tai;
+隷>Rei;
+隸>Rei;
+隹>Sui;
+隻>Seki;
+隼>Jun;
+雀>Jaku;
+雁>Gan;
+雄>Yuu;
+雅>Ga;
+集>Shuu;
+雇>Ko;
+雉>Chi;
+雋>Sen;
+雌>Shi;
+雍>You;
+雎>Sho;
+雑>Zatsu;
+雕>Chou;
+雖>Sui;
+雙>Sou;
+雛>Suu;
+雜>Zatsu;
+離>Ri;
+難>Nan;
+雨>U;
+雪>Setsu;
+雫>Da;
+雰>Fun;
+雲>Un;
+零>Rei;
+雷>Rai;
+雹>Haku;
+電>Den;
+需>Ju;
+霄>Shou;
+霆>Tei;
+震>Shin;
+霈>Hai;
+霊>Rei;
+霍>Kaku;
+霎>Sou;
+霏>Hi;
+霑>Ten;
+霓>Gei;
+霖>Rin;
+霙>Ei;
+霜>Sou;
+霞>Ka;
+霤>Ryuu;
+霧>Mu;
+霪>In;
+霰>San;
+露>Ro;
+霸>Haku;
+霹>Heki;
+霽>Sei;
+霾>Bai;
+靂>Reki;
+靄>Ai;
+靆>Tai;
+靈>Rei;
+靉>Ai;
+青>Sei;
+靖>Sei;
+静>Sei;
+靜>Sei;
+非>Hi;
+靠>Kou;
+靡>Hi;
+面>Men;
+靤>Hou;
+靦>Ten;
+靨>You;
+革>Kaku;
+靫>Sai;
+靭>Jin;
+靱>Jin;
+靴>Ka;
+靹>Ketsu;
+靺>Matsu;
+靼>Tan;
+鞁>Hi;
+鞄>Hou;
+鞅>Ou;
+鞋>Ai;
+鞍>An;
+鞏>Kyou;
+鞘>Sou;
+鞜>Tou;
+鞠>Kiku;
+鞣>Juu;
+鞦>Shuu;
+鞨>Katsu;
+鞫>Kiku;
+鞭>Hen;
+鞳>Tou;
+鞴>Fuku;
+韃>Datsu;
+韆>Sen;
+韈>Betsu;
+韋>I;
+韓>Kan;
+韜>Tou;
+韭>Kyuu;
+韮>Kyou;
+韲>Sei;
+音>On;
+韵>In;
+韶>Shou;
+韻>In;
+響>Kyou;
+頁>Ketsu;
+頂>Chou;
+頃>Kei;
+項>Kou;
+順>Jun;
+須>Shu;
+頌>Shou;
+頏>Kou;
+預>Yo;
+頑>Gan;
+頒>Han;
+頓>Ton;
+頗>Ha;
+領>Ryou;
+頚>Kei;
+頡>Kitsu;
+頤>I;
+頬>Kyou;
+頭>Tou;
+頴>Ei;
+頷>Kan;
+頸>Kei;
+頻>Hin;
+頼>Rai;
+頽>Tai;
+顆>Ka;
+顋>Sai;
+題>Dai;
+額>Gaku;
+顎>Gaku;
+顏>Gan;
+顔>Gan;
+顕>Ken;
+願>Gan;
+顛>Ten;
+類>Rui;
+顧>Ko;
+顫>Sen;
+顯>Ken;
+顰>Hin;
+顱>Ro;
+顳>Shou;
+顴>Kan;
+風>Fuu;
+颯>Satsu;
+颱>Tai;
+颶>Ku;
+飃>Hyou;
+飄>Hyou;
+飆>Hyou;
+飛>Hi;
+飜>Hon;
+食>Shoku;
+飢>Ki;
+飩>Ton;
+飫>Yo;
+飭>Choku;
+飮>In;
+飯>Han;
+飲>In;
+飴>I;
+飼>Shi;
+飽>Hou;
+飾>Shoku;
+餃>Kou;
+餅>Hei;
+餉>Shou;
+養>You;
+餌>Ji;
+餐>San;
+餒>Dai;
+餓>Ga;
+餔>Ho;
+餘>Yo;
+餝>Shoku;
+餞>Sen;
+餠>Hei;
+餡>Kan;
+餤>Tan;
+館>Kan;
+餬>Ko;
+餮>Tetsu;
+餽>Ki;
+餾>Ryuu;
+饂>Un;
+饅>Man;
+饉>Kin;
+饋>Ki;
+饌>Sen;
+饐>I;
+饑>Ki;
+饒>Jou;
+饕>Tou;
+饗>Kyou;
+首>Shu;
+馗>Ki;
+馘>Kaku;
+香>Kou;
+馥>Fuku;
+馨>Kei;
+馬>Ba;
+馭>Gyo;
+馮>Hyou;
+馳>Chi;
+馴>Shun;
+馼>Bun;
+駁>Baku;
+駄>Ta;
+駅>Eki;
+駆>Ku;
+駈>Ku;
+駐>Chuu;
+駑>Do;
+駒>Ku;
+駕>Ga;
+駘>Tai;
+駛>Shi;
+駝>Ta;
+駟>Shi;
+駢>Hen;
+駭>Kai;
+駮>Haku;
+駱>Raku;
+駸>Shin;
+駻>Kan;
+駿>Shun;
+騁>Tei;
+騅>Sui;
+騎>Ki;
+騏>Ki;
+騒>Sou;
+験>Ken;
+騙>Hen;
+騨>Tan;
+騫>Ken;
+騰>Tou;
+騷>Sou;
+騾>Ra;
+驀>Baku;
+驂>San;
+驃>Hyou;
+驅>Ku;
+驍>Gyou;
+驕>Kyou;
+驗>Ken;
+驚>Kyou;
+驛>Eki;
+驟>Shuu;
+驢>Ryo;
+驤>Jou;
+驥>Ki;
+驩>Kan;
+驪>Ri;
+驫>Hyuu;
+骨>Kotsu;
+骭>Kan;
+骰>Tou;
+骸>Kai;
+骼>Kaku;
+髀>Hi;
+髄>Zui;
+髏>Rou;
+髑>Toku;
+髓>Zui;
+體>Tei;
+高>Kou;
+髞>Sou;
+髟>Hyou;
+髢>Tei;
+髣>Hou;
+髦>Bou;
+髪>Hatsu;
+髫>Chou;
+髭>Shi;
+髮>Hatsu;
+髯>Zen;
+髱>Hou;
+髴>Futsu;
+髷>Kyoku;
+髻>Kei;
+鬆>Shou;
+鬘>Ban;
+鬚>Shu;
+鬟>Kan;
+鬢>Hin;
+鬣>Ryou;
+鬥>Tou;
+鬧>Tou;
+鬨>Kou;
+鬩>Geki;
+鬪>Tou;
+鬮>Kyuu;
+鬯>Chou;
+鬱>Utsu;
+鬲>Reki;
+鬻>Shuku;
+鬼>Ki;
+魁>Kai;
+魂>Kon;
+魃>Batsu;
+魄>Haku;
+魅>Mi;
+魍>Bou;
+魎>Ryou;
+魏>Gi;
+魑>Chi;
+魔>Ma;
+魘>En;
+魚>Gyo;
+魯>Ro;
+魴>Hou;
+鮃>Hyou;
+鮎>Nen;
+鮑>Hou;
+鮒>Fu;
+鮓>Sa;
+鮟>An;
+鮠>Gai;
+鮨>Shi;
+鮪>I;
+鮫>Kou;
+鮭>Kei;
+鮮>Sen;
+鮹>Sou;
+鯀>Kon;
+鯆>Ho;
+鯉>Ri;
+鯊>Sa;
+鯔>Shi;
+鯖>Sei;
+鯛>Chou;
+鯡>Hi;
+鯢>Gei;
+鯣>Eki;
+鯤>Kon;
+鯨>Gei;
+鯰>Nen;
+鯵>Sou;
+鰄>I;
+鰆>Shun;
+鰈>Chou;
+鰉>Kou;
+鰊>Ren;
+鰌>Shuu;
+鰍>Shuu;
+鰐>Gaku;
+鰒>Fuku;
+鰓>Sai;
+鰔>Kan;
+鰕>Ka;
+鰛>On;
+鰡>Ryuu;
+鰤>Shi;
+鰥>Kan;
+鰭>Ki;
+鰮>On;
+鰲>Gou;
+鰹>Ken;
+鰺>Sou;
+鰻>Ban;
+鰾>Hyou;
+鱆>Shou;
+鱇>Kou;
+鱈>Setsu;
+鱒>Son;
+鱗>Rin;
+鱠>Kai;
+鱧>Rei;
+鱶>Shou;
+鱸>Ro;
+鳥>Chou;
+鳧>Fu;
+鳩>Kyuu;
+鳫>Gan;
+鳬>Fu;
+鳳>Hou;
+鳴>Mei;
+鳶>En;
+鴃>Ketsu;
+鴆>Chin;
+鴇>Hou;
+鴈>Gan;
+鴉>A;
+鴎>Ou;
+鴒>Rei;
+鴕>Ta;
+鴛>En;
+鴟>Shi;
+鴣>Ko;
+鴦>You;
+鴨>Ou;
+鴪>Itsu;
+鴬>Ou;
+鴻>Kou;
+鴾>Bou;
+鴿>Kou;
+鵁>Kou;
+鵄>Shi;
+鵐>Bu;
+鵑>Ken;
+鵙>Geki;
+鵜>Tei;
+鵝>Ga;
+鵞>Ga;
+鵠>Koku;
+鵡>Bu;
+鵬>Hou;
+鵯>Hi;
+鵲>Jaku;
+鵺>Ya;
+鶇>Tou;
+鶉>Shun;
+鶏>Kei;
+鶚>Gaku;
+鶤>Kon;
+鶩>Boku;
+鶯>Ou;
+鶲>Ou;
+鶴>Kaku;
+鶸>Jaku;
+鶺>Seki;
+鶻>Kotsu;
+鷁>Geki;
+鷂>You;
+鷄>Kei;
+鷆>Ten;
+鷏>Ten;
+鷓>Sha;
+鷙>Shi;
+鷦>Shou;
+鷭>Ban;
+鷯>Ryou;
+鷲>Shuu;
+鷸>Itsu;
+鷹>You;
+鷺>Ro;
+鷽>Kaku;
+鸚>Ou;
+鸛>Kan;
+鸞>Ran;
+鹵>Ro;
+鹸>Ken;
+鹹>Kan;
+鹽>En;
+鹿>Roku;
+麁>So;
+麈>Shu;
+麋>Bi;
+麌>Gu;
+麑>Gei;
+麒>Ki;
+麓>Roku;
+麕>Kin;
+麗>Rei;
+麝>Sha;
+麟>Rin;
+麥>Baku;
+麦>Baku;
+麩>Fu;
+麪>Men;
+麭>Hou;
+麸>Fu;
+麹>Kiku;
+麺>Men;
+麻>Ma;
+麼>Ma;
+麾>Ki;
+麿>Ro;
+黄>Kou;
+黌>Kou;
+黍>Sho;
+黎>Rei;
+黏>Nen;
+黐>Chi;
+黒>Koku;
+黔>Ken;
+默>Moku;
+黙>Moku;
+黛>Tai;
+黜>Chutsu;
+黝>Yuu;
+點>Ten;
+黠>Katsu;
+黥>Gei;
+黨>Tou;
+黯>An;
+黴>Bai;
+黶>En;
+黷>Toku;
+黹>Chi;
+黻>Futsu;
+黼>Ho;
+黽>Bou;
+鼇>Gou;
+鼈>Betsu;
+鼎>Tei;
+鼓>Ko;
+鼕>Tou;
+鼠>So;
+鼡>Sho;
+鼬>Yuu;
+鼻>Bi;
+鼾>Kan;
+齊>Sei;
+齋>Sai;
+齎>Sei;
+齏>Sei;
+齒>Shi;
+齔>Shin;
+齟>So;
+齠>Chou;
+齡>Rei;
+齢>Rei;
+齣>Shutsu;
+齦>Gin;
+齧>Ketsu;
+齪>Soku;
+齬>Gyo;
+齲>Ku;
+齶>Gaku;
+齷>Aku;
+龍>Ryuu;
+龕>Gan;
+龜>Ki;
+龝>Shuu;
+龠>Yaku;
+
+# eof
diff --git a/demos/src/com/ibm/icu/dev/demo/translit/thai_test.txt b/demos/src/com/ibm/icu/dev/demo/translit/thai_test.txt
new file mode 100644
index 00000000000..ef5f90c6840
--- /dev/null
+++ b/demos/src/com/ibm/icu/dev/demo/translit/thai_test.txt
@@ -0,0 +1,55 @@
+#--------------------------------------------------------------------
+# Copyright (c) 1999-2004, International Business Machines
+# Corporation and others. All Rights Reserved.
+#--------------------------------------------------------------------
+@UPPERFILTER@
+Unicode คืออะไร?
+Unicode กำหนดหมายเลขเฉพาะสำหรับทุกอักขระ
+โดยไม่สนใจว่าเป็นแพล็ตฟอร์มใด
+ไม่ขึ้นกับว่าจะเป็นโปรแกรมใด
+และไม่ว่าจะเป็นภาษาใด
+
+โดยพื้นฐานแล้ว, คอมพิวเตอร์จะเกี่ยวข้องกับเรื่องของตัวเลข. คอมพิวเตอร์จัดเก็บตัวอักษรและอักขระอื่นๆ โดยการกำหนดหมายเลขให้สำหรับแต่ละตัว. ก่อนหน้าที่๊ Unicode จะถูกสร้างขึ้น, ได้มีระบบ encoding อยู่หลายร้อยระบบสำหรับการกำหนดหมายเลขเหล่านี้. ไม่มี encoding ใดที่มีจำนวนตัวอักขระมากเพียงพอ: ยกตัวอย่างเช่น, เฉพาะในกลุ่มสหภาพยุโรปเพียงแห่งเดียว ก็ต้องการหลาย encoding ในการครอบคลุมทุกภาษาในกลุ่ม. หรือแม้แต่ในภาษาเดี่ยว เช่น ภาษาอังกฤษ ก็ไม่มี encoding ใดที่เพียงพอสำหรับทุกตัวอักษร, เครื่องหมายวรรคตอน และสัญลักษณ์ทางเทคนิคที่ใช้กันอยู่ทั่วไป.
+
+ระบบ encoding เหล่านี้ยังขัดแย้งซึ่งกันและกัน. นั่นก็คือ, ในสอง encoding สามารถใช้หมายเลขเดียวกันสำหรับตัวอักขระสองตัวที่แตกต่างกัน,หรือใช้หมายเลขต่างกันสำหรับอักขระตัวเดียวกัน. ในระบบคอมพิวเตอร์ (โดยเฉพาะเซิร์ฟเวอร์) ต้องมีการสนับสนุนหลาย encoding; และเมื่อข้อมูลที่ผ่านไปมาระหว่างการเข้ารหัสหรือแพล็ตฟอร์มที่ต่างกัน, ข้อมูลนั้นจะเสี่ยงต่อการผิดพลาดเสียหาย.
+
+Unicode จะเปลี่ยนแปลงสิ่งเหล่านั้นทั้งหมด!
+Unicode กำหนดหมายเลขเฉพาะสำหรับแต่ละอักขระ, โดยไม่สนใจว่าเป็นแพล็ตฟอร์มใด, ไม่ขึ้นกับว่าจะเป็นโปรแกรมใดและไม่ว่าจะเป็นภาษาใด. มาตรฐาน Unicode ได้ถูกนำไปใช้โดยผู้นำในอุตสาหกรรม เช่น Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, Sun, Sybase, Unisys และอื่นๆ อีกมาก. Unicode เป็นสิ่งที่จำเป็นสำหรับมาตรฐานใหม่ๆ เช่น XML, Java, ECMAScript (JavaScript), LDAP, CORBA 3.0, WML ฯลฯ., และเป็นแนวทางอย่างเป็นทางการในการทำ ISO/IEC 10646. Unicode ได้รับการสนับสนุนในระบบปฏิบัติการจำนวนมาก, บราวเซอร์ใหม่ๆ ทกตัว, และผลิตภัณฑ์อื่นๆ อีกมาก. การเกิดขึ้นของ Unicode Standard และทูลส์ต่างๆ ที่มีในการสนับสนุน Unicode, เป็นหนึ่งในแนวโน้มทางเทคโนโลยีซอฟต์แวร์ระดับโลกที่มีความสำคัญที่สุด.
+
+การรวม Unicode เข้าไปในระบบไคลเอ็นต์-เซิร์ฟเวอร์ หรือแอ็พพลิเคชันแบบ multi-tiered และเว็บไซต์ จะทำให้เกิดการประหยัดค่าใช้จ่ายมากกว่าการใช้ชุดอักขระแบบเดิม. Unicode ทำให้ผลิตภัณฑ์ซอฟต์แวร์หนึ่งเดียว หรือเว็บไซต์แห่งเดียว รองรับได้หลายแพล็ตฟอร์ม, หลายภาษาและหลายประเทศโดยไม่ต้องทำการรื้อปรับระบบ. Unicode ยังทำให้ข้อมูลสามารถเคลื่อนย้ายไปมาในหลายๆ ระบบโดยไม่เกิดความผิดพลาดเสียหาย.
+
+เกี่ยวกับ Unicode Consortium
+Unicode Consortium เป็นองค์กรไม่แสวงหากำไรที่ก่อตั้งขึ้นเพื่อพัฒนา, ขยายและส่งเสริมการใช้ Unicode Standard, ซึ่งกำหนดรูปแบบการแทนค่าของข้อความในผลิตภัณฑ์ซอฟต์แวร์และมาตรฐานใหม่ๆ. สมาชิกของสมาคมเป็นตัวแทนจากบริษัทและองค์กรในอุตสาหกรรมคอมพิวเตอร์และการประมวลผลสารสนเทศ. สมาคมได้รับการสนับสนุนทางการเงินผ่านทางค่าธรรมเนียมของการเป็นสมาชิกเท่านั้น. สมาชิกภาพของ Unicode Consortium เปิดกว้างสำหรับองค์กรหรือบุคคลใดๆ ในโลกที่ต้องการสนับสนุน Unicode Standard และช่วยเหลือการขยายตัวและการนำ Unicode ไปใช้งาน.
+
+สำหรับข้อมูลเพิ่มเติม, ให้ดูที่ Glossary, Sample Unicode-Enabled Products, Technical Introduction และ Useful Resources.
+@TITLECASE@
+ก๊กเฮง แซ่แต้
+กชกร ศราทธทัต
+กติกา อังคสุภณ
+กนก ธรรมประทีป
+กนก วงศ์ทองศรี
+กนกกร ช้างเย็นฉ่ำ
+กนกฉัตร์ ถาวรนันท์
+กนกนวล โปษยะนันทน์
+กนกพร คมคาย
+กนกพร ตีรเลิศพานิช
+กนกพร พันทร
+กนกพร ศรีบัณฑิต
+กนกพร อติวรรณาพัฒน์
+กนกพรรณ ศรีวนาภิรมย์
+กนกรัตน์ เกียรติยิ่งอังศุลี
+กนกรัตน์ สุธรรมพิทักษ์
+กนกวรรณ คงคาประเสริฐ
+กนกวรรณ แซ่เตียว
+กนกวรรณ บุญประเสริฐ
+กนกวรรณ รักทรัพย์
+กนกวรรณ สัจจพงษ์
+กนกวรรณ อุ้ยวงศ์ไพศาล
+กนกศักดิ์ ยิ่งยง
+กนกแก้ว กรสมิต
+กนิษฐา ทนุถนอมราษฎร์
+กนิษฐา หวังวิบูลย์กิจ
+กมล กาญจนโรจน์
+กมล คัมภีร์
+กมล เจตน์มงคลรัตน์
+กมล ชูตระกูลธรรม
\ No newline at end of file
diff --git a/eclipse-build/build.properties b/eclipse-build/build.properties
new file mode 100644
index 00000000000..642e3895d52
--- /dev/null
+++ b/eclipse-build/build.properties
@@ -0,0 +1,7 @@
+#*******************************************************************************
+#* Copyright (C) 2010-2011, International Business Machines Corporation and *
+#* others. All Rights Reserved. *
+#*******************************************************************************
+icu4j.plugin.impl.version.string=4.8.0
+copyright.eclipse=Licensed Materials - Property of IBM \n (C) Copyright IBM Corp. 2000, 2011. All Rights Reserved. \n IBM is a registered trademark of IBM Corp.
+icu4j.data.version.number=48
diff --git a/eclipse-build/build.xml b/eclipse-build/build.xml
new file mode 100644
index 00000000000..282243fd826
--- /dev/null
+++ b/eclipse-build/build.xml
@@ -0,0 +1,391 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/eclipse-build/eclipse_mod_classes.txt b/eclipse-build/eclipse_mod_classes.txt
new file mode 100644
index 00000000000..d279e2e7897
--- /dev/null
+++ b/eclipse-build/eclipse_mod_classes.txt
@@ -0,0 +1,5 @@
+# Copyright (C) 2011, International Business Machines Corporation and
+# others. All Rights Reserved.
+
+com/ibm/icu/lang/UCharacter.java
+com/ibm/icu/text/DecimalFormat.java
diff --git a/eclipse-build/eclipse_mod_test_classes.txt b/eclipse-build/eclipse_mod_test_classes.txt
new file mode 100644
index 00000000000..7059aeb914a
--- /dev/null
+++ b/eclipse-build/eclipse_mod_test_classes.txt
@@ -0,0 +1,7 @@
+# Copyright (C) 2011, International Business Machines Corporation and
+# others. All Rights Reserved.
+
+com/ibm/icu/dev/test/format/IntlTestDecimalFormatAPI.java
+com/ibm/icu/dev/test/format/NumberFormatTest.java
+com/ibm/icu/dev/test/serializable/CompatibilityTest.java
+com/ibm/icu/dev/test/serializable/CoverageTest.java
diff --git a/eclipse-build/features.template/com.ibm.icu.base/.project b/eclipse-build/features.template/com.ibm.icu.base/.project
new file mode 100644
index 00000000000..db8f9304a47
--- /dev/null
+++ b/eclipse-build/features.template/com.ibm.icu.base/.project
@@ -0,0 +1,17 @@
+
+
+ com.ibm.icu.base-feature
+
+
+
+
+
+ org.eclipse.pde.FeatureBuilder
+
+
+
+
+
+ org.eclipse.pde.FeatureNature
+
+
diff --git a/eclipse-build/features.template/com.ibm.icu.base/build.properties b/eclipse-build/features.template/com.ibm.icu.base/build.properties
new file mode 100644
index 00000000000..9d55f55c5aa
--- /dev/null
+++ b/eclipse-build/features.template/com.ibm.icu.base/build.properties
@@ -0,0 +1,20 @@
+###############################################################################
+# Copyright (c) 2011 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+###############################################################################
+bin.includes =\
+epl-v10.html,\
+eclipse_update_120.jpg,\
+feature.xml,\
+feature.properties,\
+license.html
+outputUpdateJars = true
+
+generate.plugin@com.ibm.icu.base.source=com.ibm.icu.base
+
diff --git a/eclipse-build/features.template/com.ibm.icu.base/feature.xml b/eclipse-build/features.template/com.ibm.icu.base/feature.xml
new file mode 100644
index 00000000000..5102e29769f
--- /dev/null
+++ b/eclipse-build/features.template/com.ibm.icu.base/feature.xml
@@ -0,0 +1,33 @@
+
+
+
+
+ [Enter Feature Description here.]
+
+
+
+ [Enter Copyright Description here.]
+
+
+
+ [Enter License Description here.]
+
+
+
+
+
+
+
diff --git a/eclipse-build/features.template/com.ibm.icu/.project b/eclipse-build/features.template/com.ibm.icu/.project
new file mode 100644
index 00000000000..d407fa0f079
--- /dev/null
+++ b/eclipse-build/features.template/com.ibm.icu/.project
@@ -0,0 +1,17 @@
+
+
+ com.ibm.icu-feature
+
+
+
+
+
+ org.eclipse.pde.FeatureBuilder
+
+
+
+
+
+ org.eclipse.pde.FeatureNature
+
+
diff --git a/eclipse-build/features.template/com.ibm.icu/build.properties b/eclipse-build/features.template/com.ibm.icu/build.properties
new file mode 100644
index 00000000000..e9000a47e50
--- /dev/null
+++ b/eclipse-build/features.template/com.ibm.icu/build.properties
@@ -0,0 +1,20 @@
+###############################################################################
+# Copyright (c) 2000, 2008 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+###############################################################################
+bin.includes =\
+epl-v10.html,\
+eclipse_update_120.jpg,\
+feature.xml,\
+feature.properties,\
+license.html
+outputUpdateJars = true
+
+generate.plugin@com.ibm.icu.source=com.ibm.icu
+
diff --git a/eclipse-build/features.template/com.ibm.icu/feature.xml b/eclipse-build/features.template/com.ibm.icu/feature.xml
new file mode 100644
index 00000000000..417e65b570b
--- /dev/null
+++ b/eclipse-build/features.template/com.ibm.icu/feature.xml
@@ -0,0 +1,33 @@
+
+
+
+
+ [Enter Feature Description here.]
+
+
+
+ [Enter Copyright Description here.]
+
+
+
+ [Enter License Description here.]
+
+
+
+
+
+
+
diff --git a/eclipse-build/misc/ICUConfig.properties b/eclipse-build/misc/ICUConfig.properties
new file mode 100644
index 00000000000..83f650315b3
--- /dev/null
+++ b/eclipse-build/misc/ICUConfig.properties
@@ -0,0 +1,31 @@
+#*
+#*******************************************************************************
+#* Copyright (C) 2008-2011, International Business Machines Corporation and *
+#* others. All Rights Reserved. *
+#*******************************************************************************
+#* This is the properties contains ICU runtime configuration
+#*
+
+#
+# The default TimeZone implementation type used by the ICU TimeZone
+# factory method. [ ICU | JDK ]
+#
+com.ibm.icu.util.TimeZone.DefaultTimeZoneType = JDK
+
+#
+# By default, DecimalFormat uses some internal equivalent character
+# data in addition to ones in DecimalFormatSymbols for parsing
+# decimal/grouping separators. When this property is true,
+# DecimalFormat uses separators configured by DecimalFormatSymbols only
+# and does not try to find a match in the internal equivalent character
+# data.
+#
+com.ibm.icu.text.DecimalFormat.SkipExtendedSeparatorParsing = false
+
+
+#
+# [Internal Use Only]
+# Disable resource path scan for building full locale name list
+# at run time.
+#
+com.ibm.icu.impl.ICUResourceBundle.skipRuntimeLocaleResourceScan = false
diff --git a/eclipse-build/misc/about_icu.html b/eclipse-build/misc/about_icu.html
new file mode 100644
index 00000000000..d1cbf11e9ea
--- /dev/null
+++ b/eclipse-build/misc/about_icu.html
@@ -0,0 +1,54 @@
+
+
+
+
+About
+
+
+About This Content
+
+XXXX XX, 2011
+License
+
+The Eclipse Foundation makes available all content in this plug-in ("Content"). Unless otherwise
+indicated below, the Content is provided to you under the terms and conditions of the
+Eclipse Public License Version 1.0 ("EPL"). A copy of the EPL is available
+at http://www.eclipse.org/legal/epl-v10.html .
+For purposes of the EPL, "Program" will mean the Content.
+
+If you did not receive this Content directly from the Eclipse Foundation, the Content is
+being redistributed by another party ("Redistributor") and different terms and conditions may
+apply to your use of any object code in the Content. Check the Redistributor's license that was
+provided with the Content. If no such license exists, contact the Redistributor. Unless otherwise
+indicated below, the terms and conditions of the EPL still apply to any source code in the Content
+and such source code may be obtained at http://www.eclipse.org .
+
+
+ Third Party Content
+ The Content includes items that have been sourced from third parties as set out below. If you
+ did not receive this Content directly from the Eclipse Foundation, the following is provided
+ for informational purposes only, and you should look to the Redistributor's license for
+ terms and conditions of use.
+
+ ICU4J 4.8.X.v2011XXXX plug-in
+ The plug-in includes software ("ICU4J") developed by International Business Machines
+ Corporation and others.
+
+ ICU4J is:
+
+ Copyright (c) 1995-2011 International Business Machines Corporation and others
+ All rights reserved.
+
+
+ Your use of ICU4J is subject to the terms and conditions of the ICU4J license. A copy of the
+ license is contained in the file about_files/license.html .
+
+ ICU4J bundles data files imported from the Unicode Character Database and the Locale Data. A copy of the
+ Unicode Data and Software license is contained in the file about_files/ucdterms.txt.
+
+ The project information including source code, documentations and demo programs are available on
+ the ICU public web site .
+
+
+
diff --git a/eclipse-build/misc/about_icu_base.html b/eclipse-build/misc/about_icu_base.html
new file mode 100644
index 00000000000..271380756e3
--- /dev/null
+++ b/eclipse-build/misc/about_icu_base.html
@@ -0,0 +1,51 @@
+
+
+
+
+About
+
+
+About This Content
+
+XXXX XX, 2011
+License
+
+The Eclipse Foundation makes available all content in this plug-in ("Content"). Unless otherwise
+indicated below, the Content is provided to you under the terms and conditions of the
+Eclipse Public License Version 1.0 ("EPL"). A copy of the EPL is available
+at http://www.eclipse.org/legal/epl-v10.html .
+For purposes of the EPL, "Program" will mean the Content.
+
+If you did not receive this Content directly from the Eclipse Foundation, the Content is
+being redistributed by another party ("Redistributor") and different terms and conditions may
+apply to your use of any object code in the Content. Check the Redistributor's license that was
+provided with the Content. If no such license exists, contact the Redistributor. Unless otherwise
+indicated below, the terms and conditions of the EPL still apply to any source code in the Content
+and such source code may be obtained at http://www.eclipse.org .
+
+
+ Third Party Content
+ The Content includes items that have been sourced from third parties as set out below. If you
+ did not receive this Content directly from the Eclipse Foundation, the following is provided
+ for informational purposes only, and you should look to the Redistributor's license for
+ terms and conditions of use.
+
+ ICU4J 4.8.X.v2011XXXX base plug-in
+ The plug-in includes software ("ICU4J") developed by International Business Machines
+ Corporation and others.
+
+ ICU4J is:
+
+ Copyright (c) 1995-2011 International Business Machines Corporation and others
+ All rights reserved.
+
+
+ Your use of ICU4J is subject to the terms and conditions of the ICU4J license. A copy of the
+ license is contained in the file about_files/license.html .
+
+ The project information including source code, documentations and demo programs are available on
+ the ICU public web site .
+
+
+
diff --git a/eclipse-build/pdebuild/allElements.xml b/eclipse-build/pdebuild/allElements.xml
new file mode 100644
index 00000000000..e4d661303b0
--- /dev/null
+++ b/eclipse-build/pdebuild/allElements.xml
@@ -0,0 +1,28 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/eclipse-build/pdebuild/build.properties b/eclipse-build/pdebuild/build.properties
new file mode 100644
index 00000000000..86f6cf4d7e9
--- /dev/null
+++ b/eclipse-build/pdebuild/build.properties
@@ -0,0 +1,220 @@
+###############################################################################
+# Copyright (c) 2003-2008 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+###############################################################################
+#####################
+# Parameters describing how and where to execute the build.
+# Typical users need only update the following properties:
+# baseLocation - where things you are building against are installed
+# bootclasspath - The base jars to compile against (typicaly rt.jar)
+# configs - the list of {os, ws, arch} configurations to build.
+#
+# Of course any of the settings here can be overridden by spec'ing
+# them on the command line (e.g., -DbaseLocation=d:/eclipse
+
+############# PRODUCT/PACKAGING CONTROL #############
+product=/plugin or feature id/path/to/.product
+runPackager=true
+
+#Set the name of the archive that will result from the product build.
+#archiveNamePrefix=
+
+# The prefix that will be used in the generated archive.
+#archivePrefix=
+archivePrefix=
+
+# The location underwhich all of the build output will be collected.
+collectingFolder=${archivePrefix}
+
+# The list of {os, ws, arch} configurations to build. This
+# value is a '&' separated list of ',' separate triples. For example,
+# configs=win32,win32,x86 & linux,motif,x86
+# By default the value is *,*,*
+#configs=win32, win32, x86 & \
+# linux, gtk, ppc &\
+# linux, gtk, x86 & \
+# linux, gtk, x86_64 & \
+# linux, motif, x86 & \
+# solaris, motif, sparc & \
+# solaris, gtk, sparc & \
+# aix, motif, ppc & \
+# hpux, motif, PA_RISC & \
+# macosx, carbon, ppc
+
+# By default PDE creates one archive (result) per entry listed in the configs property.
+# Setting this value to try will cause PDE to only create one output containing all
+# artifacts for all the platforms listed in the configs property.
+#groupConfigurations=true
+
+#The format of the archive. By default a zip is created using antZip.
+#The list can only contain the configuration for which the desired format is different than zip.
+#archivesFormat=win32, win32, x86 - antZip& \
+# linux, gtk, ppc - antZip &\
+# linux, gtk, x86 - antZip& \
+# linux, gtk, x86_64 - antZip& \
+# linux, motif, x86 - antZip& \
+# solaris, motif, sparc - antZip& \
+# solaris, gtk, sparc - antZip& \
+# aix, motif, ppc - antZip& \
+# hpux, motif, PA_RISC - antZip& \
+# macosx, carbon, ppc - antZip
+
+#Set to true if you want the output to be ready for an update jar (no site.xml generated)
+#outputUpdateJars = false
+
+#Set to true for Jnlp generation
+#codebase should be a URL that will be used as the root of all relative URLs in the output.
+#generateJnlp=false
+#jnlp.codebase=
+#jnlp.j2se=
+#jnlp.locale=
+
+#Set to true if you want to sign jars
+#signJars=false
+#sign.alias=
+#sign.keystore=
+#sign.storepass=
+
+#Arguments to send to the zip executable
+zipargs=
+
+#Arguments to send to the tar executable
+tarargs=
+
+#Control the creation of a file containing the version included in each configuration - on by default
+#generateVersionsLists=false
+
+############## BUILD NAMING CONTROL ################
+# The directory into which the build elements are fetched and where
+# the build takes place.
+buildDirectory=@BUILD_DIR@
+
+# Type of build. Used in naming the build output. Typically this value is
+# one of I, N, M, S, ...
+buildType=@BUILD_TYPE@
+
+# ID of the build. Used in naming the build output.
+buildId=@BUILD_ID@
+
+# Label for the build. Used in naming the build output
+buildLabel=${buildType}.${buildId}
+
+# Timestamp for the build. Used in naming the build output
+timestamp=007
+
+#The value to be used for the qualifier of a plugin or feature when you want to override the value computed by pde.
+#The value will only be applied to plugin or features indicating build.properties, qualifier = context
+#forceContextQualifier=
+
+#Enable / disable the generation of a suffix for the features that use .qualifier.
+#The generated suffix is computed according to the content of the feature
+#generateFeatureVersionSuffix=true
+
+############# BASE CONTROL #############
+# Settings for the base Eclipse components and Java class libraries
+# against which you are building.
+# Base location for anything the build needs to compile against. For example,
+# in most RCP app or a plug-in, the baseLocation should be the location of a previously
+# installed Eclipse against which the application or plug-in code will be compiled and the RCP delta pack.
+
+base=
+#baseLocation=${base}/eclipse
+baseLocation=@BASE_LOCATION@
+#Os/Ws/Arch/nl of the eclipse specified by baseLocation
+baseos=@BASE_OS@
+basews=@BASE_WS@
+basearch=@BASE_ARCH@
+
+#this property indicates whether you want the set of plug-ins and features to be considered during the build to be limited to the ones reachable from the features / plugins being built
+filteredDependencyCheck=false
+
+#pluginPath is a list of locations in which to find plugins and features. This list is separated by the platform file separator (; or :)
+#a location is one of:
+#- the location of the jar or folder that is the plugin or feature : /path/to/foo.jar or /path/to/foo
+#- a directory that contains a /plugins or /features subdirectory
+#- the location of a feature.xml, or for 2.1 style plugins, the plugin.xml or fragment.xml
+#pluginPath=
+
+skipBase=true
+eclipseURL=
+eclipseBuildId=
+eclipseBaseURL=${eclipseURL}/eclipse-platform-${eclipseBuildId}-win32.zip
+
+
+############# MAP FILE CONTROL ################
+# This section defines CVS tags to use when fetching the map files from the repository.
+# If you want to fetch the map file from repository / location, change the getMapFiles target in the customTargets.xml
+
+skipMaps=true
+mapsRepo=:pserver:anonymous@example.com/path/to/repo
+mapsRoot=path/to/maps
+mapsCheckoutTag=HEAD
+
+#tagMaps=true
+mapsTagTag=v${buildId}
+
+
+############ REPOSITORY CONTROL ###############
+# This section defines properties parameterizing the repositories where plugins, fragments
+# bundles and features are being obtained from.
+
+# The tags to use when fetching elements to build.
+# By default thebuilder will use whatever is in the maps.
+# This value takes the form of a comma separated list of repository identifier (like used in the map files) and the
+# overriding value
+# For example fetchTag=CVS=HEAD, SVN=v20050101
+# fetchTag=HEAD
+skipFetch=true
+
+
+############# JAVA COMPILER OPTIONS ##############
+# The location of the Java jars to compile against. Typically the rt.jar for your JDK/JRE
+#bootclasspath=${java.home}/lib/rt.jar
+
+# specific JRE locations to compile against. These values are used to compile bundles specifying a
+# Bundle-RequiredExecutionEnvironment. Uncomment and set values for environments that you support
+#CDC-1.0/Foundation-1.0= /path/to/rt.jar
+#CDC-1.1/Foundation-1.1=
+#OSGi/Minimum-1.0=
+#OSGi/Minimum-1.1=
+#JRE-1.1=
+#J2SE-1.2=
+#J2SE-1.3=
+#J2SE-1.4=
+#J2SE-1.5=
+#JavaSE-1.6=
+#PersonalJava-1.1=
+#PersonalJava-1.2=
+#CDC-1.0/PersonalBasis-1.0=
+#CDC-1.0/PersonalJava-1.0=
+#CDC-1.1/PersonalBasis-1.1=
+#CDC-1.1/PersonalJava-1.1=
+
+# Specify the output format of the compiler log when eclipse jdt is used
+logExtension=.log
+
+# Whether or not to include debug info in the output jars
+javacDebugInfo=true
+
+# Whether or not to fail the build if there are compiler errors
+javacFailOnError=true
+
+# Enable or disable verbose mode of the compiler
+javacVerbose=true
+
+# Extra arguments for the compiler. These are specific to the java compiler being used.
+compilerArg=-inlineJSR -enableJavadoc -encoding ISO-8859-1
+
+# Default value for the version of the source code. This value is used when compiling plug-ins that do not set the Bundle-RequiredExecutionEnvironment or set javacSource in build.properties
+javacSource=1.5
+
+# Default value for the version of the byte code targeted. This value is used when compiling plug-ins that do not set the Bundle-RequiredExecutionEnvironment or set javacTarget in build.properties.
+javacTarget=1.5
+
+individualSourceBundles=true
diff --git a/eclipse-build/pdebuild/customTargets.xml b/eclipse-build/pdebuild/customTargets.xml
new file mode 100644
index 00000000000..ae4266d683c
--- /dev/null
+++ b/eclipse-build/pdebuild/customTargets.xml
@@ -0,0 +1,161 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/.classpath b/eclipse-build/plugins.template/com.ibm.icu.base.tests/.classpath
new file mode 100644
index 00000000000..45f024e850e
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/.classpath
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/.project b/eclipse-build/plugins.template/com.ibm.icu.base.tests/.project
new file mode 100644
index 00000000000..3beefc76a2c
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/.project
@@ -0,0 +1,28 @@
+
+
+ com.ibm.icu.base.tests
+
+
+
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+
+ org.eclipse.pde.ManifestBuilder
+
+
+
+
+ org.eclipse.pde.SchemaBuilder
+
+
+
+
+
+ org.eclipse.pde.PluginNature
+ org.eclipse.jdt.core.javanature
+
+
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/.settings/org.eclipse.jdt.core.prefs b/eclipse-build/plugins.template/com.ibm.icu.base.tests/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 00000000000..9e89b12a05c
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,78 @@
+#Thu Jan 13 17:45:06 EST 2011
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
+org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning
+org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
+org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
+org.eclipse.jdt.core.compiler.problem.unusedLocal=warning
+org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
+org.eclipse.jdt.core.compiler.problem.unusedLabel=warning
+org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
+org.eclipse.jdt.core.compiler.problem.rawTypeReference=ignore
+org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotationForInterfaceMethodImplementation=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
+org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning
+org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
+org.eclipse.jdt.core.compiler.problem.deprecation=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
+org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore
+org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
+org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
+org.eclipse.jdt.core.compiler.problem.unusedImport=warning
+org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=disabled
+org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
+org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning
+org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
+org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
+org.eclipse.jdt.core.compiler.problem.includeNullInfoFromAsserts=disabled
+org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
+org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.problem.finalParameterBound=ignore
+org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning
+org.eclipse.jdt.core.compiler.problem.nullReference=warning
+org.eclipse.jdt.core.compiler.source=1.5
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
+org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning
+org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning
+org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning
+org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
+org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning
+org.eclipse.jdt.core.compiler.problem.discouragedReference=warning
+org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
+org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
+org.eclipse.jdt.core.compiler.problem.deadCode=ignore
+org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
+org.eclipse.jdt.core.compiler.problem.fatalOptionalError=disabled
+org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.unusedObjectAllocation=ignore
+org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning
+org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
+org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning
+org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
+org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=error
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/.settings/org.eclipse.jdt.ui.prefs b/eclipse-build/plugins.template/com.ibm.icu.base.tests/.settings/org.eclipse.jdt.ui.prefs
new file mode 100644
index 00000000000..d57be9d85cd
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/.settings/org.eclipse.jdt.ui.prefs
@@ -0,0 +1,3 @@
+#Thu Dec 14 11:51:01 EST 2006
+eclipse.preferences.version=1
+internal.default.compliance=default
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/META-INF/MANIFEST.MF b/eclipse-build/plugins.template/com.ibm.icu.base.tests/META-INF/MANIFEST.MF
new file mode 100644
index 00000000000..fccd74066e3
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/META-INF/MANIFEST.MF
@@ -0,0 +1,10 @@
+Manifest-Version: 1.0
+Bundle-ManifestVersion: 2
+Bundle-Name: %pluginName
+Bundle-SymbolicName: com.ibm.icu.base.tests
+Bundle-Version: @BUILD_VERSION@
+Bundle-Vendor: %providerName
+Fragment-Host: com.ibm.icu.base
+Bundle-Copyright: @COPYRIGHT@
+Require-Bundle: org.junit
+Bundle-RequiredExecutionEnvironment: J2SE-1.5
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/build.properties b/eclipse-build/plugins.template/com.ibm.icu.base.tests/build.properties
new file mode 100644
index 00000000000..b6bb464ebf8
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/build.properties
@@ -0,0 +1,17 @@
+###############################################################################
+# Copyright (c) 2000, 2011 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+###############################################################################
+source.. = src/
+output.. = bin/
+bin.includes = .,\
+ about.html,\
+ about_files/,\
+ plugin.properties,\
+ META-INF/
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/plugin.properties b/eclipse-build/plugins.template/com.ibm.icu.base.tests/plugin.properties
new file mode 100644
index 00000000000..a991df0374d
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/plugin.properties
@@ -0,0 +1,12 @@
+###############################################################################
+# Copyright (c) 2011 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+###############################################################################
+pluginName = International Components for Unicode for Java (ICU4J) Replacement plug-in Tests
+providerName = IBM Corporation
\ No newline at end of file
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/BreakIteratorTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/BreakIteratorTest.java
new file mode 100644
index 00000000000..3db154648c4
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/BreakIteratorTest.java
@@ -0,0 +1,359 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.Locale;
+
+import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.util.ULocale;
+
+public class BreakIteratorTest extends ICUTestCase {
+ // ICU behaves a bit differently with this text, but the tested values aren't
+ // affected. If Java changes behavior they might need to change.
+ private static final String text = "Mr. and Mrs. Mumblety-Peg paid $35.97 for a new 12\" cockatoo. " +
+ "When they got home they both cooed \"Isn't it lovely?\" and sighed softly. " +
+ "\"Let's name it u\u0308\u5098!\" they said with glee.";
+ private static int pos = text.indexOf("sn't");
+ private static BreakIterator cbr;
+ private static BreakIterator wbr;
+ private static BreakIterator lbr;
+ private static BreakIterator sbr;
+
+ static {
+ cbr = BreakIterator.getCharacterInstance();
+ cbr.setText(text);
+ wbr = BreakIterator.getWordInstance();
+ wbr.setText(text);
+ lbr = BreakIterator.getLineInstance();
+ lbr.setText(text);
+ sbr = BreakIterator.getSentenceInstance();
+ sbr.setText(text);
+
+ // diagnostic
+ // dump(cbr);
+ // dump(wbr);
+ // dump(lbr);
+ // dump(sbr);
+ }
+
+ // private static void dump(BreakIterator bi) {
+ // for (int ix = bi.first(), lim = text.length(); ix != lim;) {
+ // int nx = bi.next();
+ // if (nx < 0) nx = lim;
+ // System.out.println(Integer.toString(ix) + ": " + text.substring(ix, nx));
+ // ix = nx;
+ // }
+ // }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.hashCode()'
+ */
+ public void testHashCode() {
+ BreakIterator br = BreakIterator.getWordInstance();
+ br.setText(text);
+ BreakIterator brne = BreakIterator.getWordInstance();
+ brne.setText(text + "X");
+ wbr.first();
+ testEHCS(br, wbr, brne);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.BreakIterator(BreakIterator)'
+ */
+ public void testBreakIterator() {
+ // implicitly tested everywhere
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.first()'
+ */
+ public void testFirst() {
+ assertEquals(0, cbr.first());
+ assertEquals(0, wbr.first());
+ assertEquals(0, lbr.first());
+ assertEquals(0, sbr.first());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.last()'
+ */
+ public void testLast() {
+ assertEquals(text.length(), cbr.last());
+ assertEquals(text.length(), wbr.last());
+ assertEquals(text.length(), lbr.last());
+ assertEquals(text.length(), sbr.last());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.next(int)'
+ */
+ public void testNextInt() {
+ cbr.first();
+ wbr.first();
+ lbr.first();
+ sbr.first();
+ assertEquals(2, cbr.next(2));
+ assertEquals(3, wbr.next(2));
+ assertEquals(8, lbr.next(2));
+ assertEquals(62, sbr.next(2));
+
+ cbr.last();
+ wbr.last();
+ lbr.last();
+ sbr.last();
+ assertEquals(174, cbr.next(-2));
+ assertEquals(171, wbr.next(-2));
+ assertEquals(166, lbr.next(-2));
+ assertEquals(135, sbr.next(-2));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.next()'
+ */
+ public void testNext() {
+ cbr.first();
+ wbr.first();
+ lbr.first();
+ sbr.first();
+ assertEquals(1, cbr.next());
+ assertEquals(2, wbr.next());
+ assertEquals(4, lbr.next());
+ assertEquals(13, sbr.next());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.previous()'
+ */
+ public void testPrevious() {
+ cbr.last();
+ wbr.last();
+ lbr.last();
+ sbr.last();
+ assertEquals(175, cbr.previous());
+ assertEquals(175, wbr.previous());
+ assertEquals(171, lbr.previous());
+ assertEquals(156, sbr.previous());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.following(int)'
+ */
+ public void testFollowing() {
+ assertEquals(100, cbr.following(pos));
+ assertEquals(103, wbr.following(pos));
+ assertEquals(104, lbr.following(pos));
+ assertEquals(116, sbr.following(pos));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.preceding(int)'
+ */
+ public void testPreceding() {
+ assertEquals(98, cbr.preceding(pos));
+ assertEquals(98, wbr.preceding(pos));
+ assertEquals(97, lbr.preceding(pos));
+ assertEquals(62, sbr.preceding(pos));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.isBoundary(int)'
+ */
+ public void testIsBoundary() {
+ assertTrue(cbr.isBoundary(pos));
+ assertFalse(wbr.isBoundary(pos));
+ assertFalse(lbr.isBoundary(pos));
+ assertFalse(sbr.isBoundary(pos));
+
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.current()'
+ */
+ public void testCurrent() {
+ cbr.following(pos);
+ wbr.following(pos);
+ lbr.following(pos);
+ sbr.following(pos);
+ assertEquals(100, cbr.current());
+ assertEquals(103, wbr.current());
+ assertEquals(104, lbr.current());
+ assertEquals(116, sbr.current());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getText()'
+ */
+ public void testGetText() {
+ CharacterIterator ci = cbr.getText();
+ StringBuffer buf = new StringBuffer(ci.getEndIndex() - ci.getBeginIndex());
+ for (char c = ci.first(); c != CharacterIterator.DONE; c = ci.next()) {
+ buf.append(c);
+ }
+ String result = buf.toString();
+ assertEquals(text, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.setText(String)'
+ */
+ public void testSetTextString() {
+ // implicitly tested
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.setText(CharacterIterator)'
+ */
+ public void testSetTextCharacterIterator() {
+ CharacterIterator ci = new StringCharacterIterator(text, pos);
+ BreakIterator bi = BreakIterator.getWordInstance();
+ bi.setText(ci);
+ assertEquals(2, bi.next());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getWordInstance()'
+ */
+ public void testGetWordInstance() {
+ // implicitly tested
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getWordInstance(Locale)'
+ */
+ public void testGetWordInstanceLocale() {
+ assertNotNull(BreakIterator.getWordInstance(Locale.JAPAN));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getWordInstance(ULocale)'
+ */
+ public void testGetWordInstanceULocale() {
+ assertNotNull(BreakIterator.getWordInstance(ULocale.JAPAN));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getLineInstance()'
+ */
+ public void testGetLineInstance() {
+ // implicitly tested
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getLineInstance(Locale)'
+ */
+ public void testGetLineInstanceLocale() {
+ assertNotNull(BreakIterator.getLineInstance(Locale.JAPAN));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getLineInstance(ULocale)'
+ */
+ public void testGetLineInstanceULocale() {
+ assertNotNull(BreakIterator.getLineInstance(ULocale.JAPAN));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getCharacterInstance()'
+ */
+ public void testGetCharacterInstance() {
+ // implicitly tested
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getCharacterInstance(Locale)'
+ */
+ public void testGetCharacterInstanceLocale() {
+ assertNotNull(BreakIterator.getCharacterInstance(Locale.JAPAN));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getCharacterInstance(ULocale)'
+ */
+ public void testGetCharacterInstanceULocale() {
+ assertNotNull(BreakIterator.getCharacterInstance(ULocale.JAPAN));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getSentenceInstance()'
+ */
+ public void testGetSentenceInstance() {
+ // implicitly tested
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getSentenceInstance(Locale)'
+ */
+ public void testGetSentenceInstanceLocale() {
+ assertNotNull(BreakIterator.getSentenceInstance(Locale.JAPAN));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getSentenceInstance(ULocale)'
+ */
+ public void testGetSentenceInstanceULocale() {
+ assertNotNull(BreakIterator.getSentenceInstance(ULocale.JAPAN));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getTitleInstance()'
+ */
+ public void testGetTitleInstance() {
+ // not implemented
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getTitleInstance(Locale)'
+ */
+ public void testGetTitleInstanceLocale() {
+ // not implemented
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getTitleInstance(ULocale)'
+ */
+ public void testGetTitleInstanceULocale() {
+ // not implemented
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getAvailableLocales()'
+ */
+ public void testGetAvailableLocales() {
+ assertNotNull(BreakIterator.getAvailableLocales());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.getAvailableULocales()'
+ */
+ public void testGetAvailableULocales() {
+ assertNotNull(BreakIterator.getAvailableULocales());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.toString()'
+ */
+ public void testToString() {
+ assertNotNull(cbr.toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.clone()'
+ */
+ public void testClone() {
+ // see testHashCode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.BreakIterator.equals(Object)'
+ */
+ public void testEqualsObject() {
+ // see testHashCode
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CalendarTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CalendarTest.java
new file mode 100644
index 00000000000..3237a940363
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CalendarTest.java
@@ -0,0 +1,565 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.util.Date;
+import java.util.Locale;
+
+import com.ibm.icu.text.DateFormat;
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.TimeZone;
+import com.ibm.icu.util.ULocale;
+
+public class CalendarTest extends ICUTestCase {
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.hashCode()'
+ */
+ public void testHashCode() {
+ Calendar cal1 = Calendar.getInstance();
+ Calendar cal2 = Calendar.getInstance();
+ Calendar cal3 = Calendar.getInstance();
+
+ long t = System.currentTimeMillis();
+ cal1.setTimeInMillis(t);
+ cal2.setTimeInMillis(t);
+ cal3.setTimeInMillis(t);
+
+ cal3.setMinimalDaysInFirstWeek(cal3.getMinimalDaysInFirstWeek()+1);
+ testEHCS(cal1, cal2, cal3);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.Calendar(Calendar)'
+ */
+ public void testCalendar() {
+ // tested implicitly everywhere
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getInstance()'
+ */
+ public void testGetInstance() {
+ // tested by testEHCS
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getInstance(TimeZone)'
+ */
+ public void testGetInstanceTimeZone() {
+ TimeZone tz = TimeZone.getTimeZone("America/Los_Angeles");
+ Calendar cal = Calendar.getInstance(tz);
+ assertNotNull(cal);
+ assertNotNull(cal.getTime());
+ assertEquals(tz, cal.getTimeZone());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getInstance(Locale)'
+ */
+ public void testGetInstanceLocale() {
+ Calendar cal = Calendar.getInstance(Locale.US);
+ assertNotNull(cal);
+ assertNotNull(cal.getTime());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getInstance(ULocale)'
+ */
+ public void testGetInstanceULocale() {
+ Calendar cal = Calendar.getInstance(ULocale.US);
+ assertNotNull(cal);
+ assertNotNull(cal.getTime());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getInstance(TimeZone, Locale)'
+ */
+ public void testGetInstanceTimeZoneLocale() {
+ TimeZone tz = TimeZone.getTimeZone("America/New_York");
+ Calendar cal = Calendar.getInstance(tz, Locale.US);
+ assertNotNull(cal);
+ assertNotNull(cal.getTime());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getInstance(TimeZone, ULocale)'
+ */
+ public void testGetInstanceTimeZoneULocale() {
+ TimeZone tz = TimeZone.getTimeZone("America/New_York");
+ Calendar cal = Calendar.getInstance(tz, ULocale.US);
+ assertNotNull(cal);
+ assertNotNull(cal.getTime());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getAvailableLocales()'
+ */
+ public void testGetAvailableLocales() {
+ assertNotNull(Calendar.getAvailableLocales());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getAvailableULocales()'
+ */
+ public void testGetAvailableULocales() {
+ assertNotNull(Calendar.getAvailableULocales());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getTime()'
+ */
+ public void testGetTime() {
+ Calendar cal = Calendar.getInstance();
+ assertNotNull(cal.getTime());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.setTime(Date)'
+ */
+ public void testSetTime() {
+ Calendar cal = Calendar.getInstance();
+ cal.clear();
+ cal.set(2006, 0, 20, 9, 30, 0);
+ Date date = cal.getTime();
+ cal = Calendar.getInstance();
+ cal.setTime(date);
+ assertEquals(date, cal.getTime());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getTimeInMillis()'
+ */
+ public void testGetTimeInMillis() {
+ Calendar cal = Calendar.getInstance();
+ assertTrue(0 != cal.getTimeInMillis());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.setTimeInMillis(long)'
+ */
+ public void testSetTimeInMillis() {
+ Calendar cal = Calendar.getInstance();
+ cal.clear();
+ cal.set(2006, 0, 20, 9, 30, 0);
+ long millis = cal.getTimeInMillis();
+ Date date = cal.getTime();
+
+ cal = Calendar.getInstance();
+ cal.setTimeInMillis(millis);
+
+ assertEquals(date, cal.getTime());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.get(int)'
+ */
+ public void testGet() {
+ Calendar cal = Calendar.getInstance();
+ cal.clear();
+ cal.set(2006, 0, 20, 9, 30, 0);
+ assertEquals(0, cal.get(Calendar.MONTH));
+ assertEquals(20, cal.get(Calendar.DAY_OF_MONTH));
+ assertEquals(30, cal.get(Calendar.MINUTE));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.set(int, int)'
+ */
+ public void testSetIntInt() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(Calendar.YEAR, 1977);
+ assertEquals(1977, cal.get(Calendar.YEAR));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.set(int, int, int)'
+ */
+ public void testSetIntIntInt() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(1997, 9, 15);
+ assertEquals(15, cal.get(Calendar.DATE));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.set(int, int, int, int, int)'
+ */
+ public void testSetIntIntIntIntInt() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(1997, 9, 15, 14, 25);
+ assertEquals(25, cal.get(Calendar.MINUTE));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.set(int, int, int, int, int, int)'
+ */
+ public void testSetIntIntIntIntIntInt() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(1997, 9, 15, 14, 25, 51);
+ assertEquals(51, cal.get(Calendar.SECOND));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.clear()'
+ */
+ public void testClear() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(1997, 9, 15, 14, 25, 51);
+ cal.clear();
+ assertEquals(0, cal.get(Calendar.MONTH));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.clear(int)'
+ */
+ public void testClearInt() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(1997, 9, 15, 14, 25, 51);
+ assertTrue(cal.isSet(Calendar.DAY_OF_MONTH));
+ cal.clear(Calendar.DAY_OF_MONTH);
+ assertFalse(cal.isSet(Calendar.DAY_OF_MONTH));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.isSet(int)'
+ */
+ public void testIsSet() {
+ // see testClearInt
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.equals(Object)'
+ */
+ public void testEqualsObject() {
+ // tested by testHashCode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.isEquivalentTo(Calendar)'
+ */
+ public void testIsEquivalentTo() {
+ Calendar cal = Calendar.getInstance();
+ Calendar cal2 = Calendar.getInstance();
+ cal2.set(1994, 6, 21, 8, 7);
+ assertTrue(cal.isEquivalentTo(cal2));
+ cal.setTimeZone(TimeZone.getTimeZone("CST"));
+ cal2.setTimeZone(TimeZone.getTimeZone("PDT"));
+ assertFalse(cal.isEquivalentTo(cal2));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.before(Object)'
+ */
+ public void testBefore() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(Calendar.YEAR, 1990);
+ assertTrue(cal.before(new Date()));
+ assertTrue(cal.before(Calendar.getInstance()));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.after(Object)'
+ */
+ public void testAfter() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(Calendar.YEAR, 3058);
+ assertTrue(cal.after(new Date()));
+ assertTrue(cal.after(Calendar.getInstance()));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getActualMaximum(int)'
+ */
+ public void testGetActualMaximum() {
+ Calendar cal = Calendar.getInstance(Locale.US);
+ assertEquals(11, cal.getActualMaximum(Calendar.MONTH));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getActualMinimum(int)'
+ */
+ public void testGetActualMinimum() {
+ Calendar cal = Calendar.getInstance(Locale.US);
+ assertEquals(0, cal.getActualMinimum(Calendar.MONTH));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.roll(int, boolean)'
+ */
+ public void testRollIntBoolean() {
+ Calendar cal = Calendar.getInstance(Locale.US);
+ cal.set(1997, 1, 27);
+ cal.roll(Calendar.DATE, true);
+ assertEquals(28, cal.get(Calendar.DATE));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.roll(int, int)'
+ */
+ public void testRollIntInt() {
+ Calendar cal = Calendar.getInstance(Locale.US);
+ cal.set(1997, 1, 27);
+ cal.roll(Calendar.DATE, 3);
+ assertEquals(2, cal.get(Calendar.DATE));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.add(int, int)'
+ */
+ public void testAdd() {
+ Calendar cal = Calendar.getInstance(Locale.US);
+ cal.set(1997, 1, 27);
+ cal.add(Calendar.DATE, 3);
+ assertEquals(2, cal.get(Calendar.DATE));
+ assertEquals(2, cal.get(Calendar.MONTH));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getDisplayName(Locale)'
+ */
+ public void testGetDisplayNameLocale() {
+ Calendar cal = Calendar.getInstance();
+ assertEquals("Gregorian Calendar", cal.getDisplayName(Locale.US));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getDisplayName(ULocale)'
+ */
+ public void testGetDisplayNameULocale() {
+ Calendar cal = Calendar.getInstance();
+ assertEquals("Gregorian Calendar", cal.getDisplayName(ULocale.US));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.compareTo(Calendar)'
+ */
+ public void testCompareToCalendar() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(Calendar.YEAR, 1990);
+ assertTrue(0 > cal.compareTo(Calendar.getInstance()));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.compareTo(Object)'
+ */
+ public void testCompareToObject() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(Calendar.YEAR, 1990);
+ assertTrue(0 > cal.compareTo(Calendar.getInstance()));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getDateTimeFormat(int, int, Locale)'
+ */
+ public void testGetDateTimeFormatIntIntLocale() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(1990, 8, 16, 20, 3);
+ DateFormat df = cal.getDateTimeFormat(DateFormat.LONG, DateFormat.SHORT, Locale.US);
+ assertEquals("September 16, 1990 8:03 PM", df.format(cal));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getDateTimeFormat(int, int, ULocale)'
+ */
+ public void testGetDateTimeFormatIntIntULocale() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(1990, 8, 16, 20, 3);
+ DateFormat df = cal.getDateTimeFormat(DateFormat.LONG, DateFormat.SHORT, ULocale.US);
+ assertEquals("September 16, 1990 8:03 PM", df.format(cal));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.fieldDifference(Date, int)'
+ */
+ public void testFieldDifference() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(Calendar.DAY_OF_MONTH, 0);
+ Date date = cal.getTime();
+ cal.add(Calendar.DAY_OF_MONTH, 5);
+ assertEquals(-5, cal.fieldDifference(date, Calendar.DAY_OF_MONTH));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getTimeZone()'
+ */
+ public void testGetTimeZone() {
+ Calendar cal = Calendar.getInstance();
+ assertNotNull(cal.getTimeZone());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.setTimeZone(TimeZone)'
+ */
+ public void testSetTimeZone() {
+ Calendar cal = Calendar.getInstance();
+ TimeZone value1 = cal.getTimeZone();
+ String tzn = "PDT".equals(value1.getID()) ? "CST" : "PDT";
+ TimeZone value2 = TimeZone.getTimeZone(tzn);
+ cal.setTimeZone(value2);
+ TimeZone result = cal.getTimeZone();
+ assertNotEqual(value1, result);
+ assertEquals(value2, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.setLenient(boolean)'
+ */
+ public void testSetLenient() {
+ Calendar cal = Calendar.getInstance();
+ boolean lenient = cal.isLenient();
+ cal.setLenient(!lenient);
+ assertFalse(lenient == cal.isLenient());
+
+ // not testing if it has the expected effect
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.isLenient()'
+ */
+ public void testIsLenient() {
+ // tested by testSetLenient
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.setFirstDayOfWeek(int)'
+ */
+ public void testSetFirstDayOfWeek() {
+ Calendar cal = Calendar.getInstance();
+ int firstDay = cal.getFirstDayOfWeek();
+ cal.setFirstDayOfWeek(firstDay+1);
+ assertEquals(firstDay+1, cal.getFirstDayOfWeek());
+
+ // don't test functionality
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getFirstDayOfWeek()'
+ */
+ public void testGetFirstDayOfWeek() {
+ // tested by testSetFirstDayOfWeek
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.setMinimalDaysInFirstWeek(int)'
+ */
+ public void testSetMinimalDaysInFirstWeek() {
+ Calendar cal = Calendar.getInstance();
+ int firstDay = cal.getMinimalDaysInFirstWeek();
+ cal.setMinimalDaysInFirstWeek(firstDay+1);
+ assertEquals(firstDay+1, cal.getMinimalDaysInFirstWeek());
+
+ // don't test functionality
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getMinimalDaysInFirstWeek()'
+ */
+ public void testGetMinimalDaysInFirstWeek() {
+ // tested by testSetMinimalDaysInFirstWeek
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getMinimum(int)'
+ */
+ public void testGetMinimum() {
+ Calendar cal = Calendar.getInstance();
+ assertEquals(1, cal.getMinimum(Calendar.DAY_OF_WEEK));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getMaximum(int)'
+ */
+ public void testGetMaximum() {
+ Calendar cal = Calendar.getInstance();
+ assertEquals(7, cal.getMaximum(Calendar.DAY_OF_WEEK));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getGreatestMinimum(int)'
+ */
+ public void testGetGreatestMinimum() {
+ Calendar cal = Calendar.getInstance();
+ assertEquals(1, cal.getGreatestMinimum(Calendar.DATE));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getLeastMaximum(int)'
+ */
+ public void testGetLeastMaximum() {
+ Calendar cal = Calendar.getInstance();
+ assertEquals(28, cal.getLeastMaximum(Calendar.DATE));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getDayOfWeekType(int)'
+ */
+ public void testGetDayOfWeekType() {
+ Calendar cal = Calendar.getInstance(Locale.US);
+ assertEquals(Calendar.WEEKDAY, cal.getDayOfWeekType(Calendar.FRIDAY));
+ assertEquals(Calendar.WEEKEND, cal.getDayOfWeekType(Calendar.SATURDAY));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getWeekendTransition(int)'
+ */
+ public void testGetWeekendTransition() {
+ Calendar cal = Calendar.getInstance(Locale.US);
+ try {
+ cal.getWeekendTransition(Calendar.WEEKEND_ONSET);
+ fail("expected IllegalArgumentException from getWeekendTransition");
+ }
+ catch (UnsupportedOperationException e) {
+ // ok
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.isWeekend(Date)'
+ */
+ public void testIsWeekendDate() {
+ Calendar cal = Calendar.getInstance(Locale.US);
+ cal.set(Calendar.DAY_OF_WEEK, Calendar.SATURDAY);
+ assertTrue(cal.isWeekend(cal.getTime()));
+ cal.set(Calendar.DAY_OF_WEEK, Calendar.WEDNESDAY);
+ assertFalse(cal.isWeekend(cal.getTime()));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.isWeekend()'
+ */
+ public void testIsWeekend() {
+ Calendar cal = Calendar.getInstance(Locale.US);
+ cal.set(Calendar.DAY_OF_WEEK, Calendar.SATURDAY);
+ assertTrue(cal.isWeekend());
+ cal.set(Calendar.DAY_OF_WEEK, Calendar.WEDNESDAY);
+ assertFalse(cal.isWeekend());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.clone()'
+ */
+ public void testClone() {
+ // tested by testHashCode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.toString()'
+ */
+ public void testToString() {
+ Calendar cal = Calendar.getInstance();
+ assertNotNull(cal.toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.Calendar.getType()'
+ */
+ public void testGetType() {
+ Calendar cal = Calendar.getInstance(Locale.US);
+ assertEquals("gregorian", cal.getType());
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CollationKeyTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CollationKeyTest.java
new file mode 100644
index 00000000000..a1afd34a424
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CollationKeyTest.java
@@ -0,0 +1,102 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import com.ibm.icu.text.CollationKey;
+import com.ibm.icu.text.Collator;
+
+public class CollationKeyTest extends ICUTestCase {
+
+ /*
+ * Test method for 'com.ibm.icu.text.CollationKey.hashCode()'
+ */
+ public void testHashCode() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ CollationKey k1 = c.getCollationKey("This");
+ CollationKey k2 = c.getCollationKey("this");
+ c.setStrength(Collator.TERTIARY);
+ CollationKey kn = c.getCollationKey("this");
+ testEHCS(k1, k2, kn);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.CollationKey.CollationKey(CollationKey)'
+ */
+ public void testCollationKey() {
+ // implicitly tested everywhere
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.CollationKey.compareTo(CollationKey)'
+ */
+ public void testCompareToCollationKey() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ CollationKey k1 = c.getCollationKey("This");
+ CollationKey k2 = c.getCollationKey("this");
+ c.setStrength(Collator.TERTIARY);
+ CollationKey k3 = c.getCollationKey("this");
+ assertTrue(0 == k1.compareTo(k2));
+ assertFalse(0 == k1.compareTo(k3));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.CollationKey.compareTo(Object)'
+ */
+ public void testCompareToObject() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ CollationKey k1 = c.getCollationKey("This");
+ CollationKey k2 = c.getCollationKey("this");
+ assertTrue(0 == k1.compareTo(k2));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.CollationKey.equals(Object)'
+ */
+ public void testEqualsObject() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ CollationKey k1 = c.getCollationKey("This");
+ CollationKey k2 = c.getCollationKey("this");
+ assertTrue(k1.equals((Object)k2));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.CollationKey.toString()'
+ */
+ public void testToString() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ CollationKey k1 = c.getCollationKey("This");
+ assertNotNull(k1.toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.CollationKey.getSourceString()'
+ */
+ public void testGetSourceString() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ CollationKey k1 = c.getCollationKey("This");
+ assertEquals("This", k1.getSourceString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.CollationKey.toByteArray()'
+ */
+ public void testToByteArray() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ CollationKey k1 = c.getCollationKey("This");
+ byte[] key = k1.toByteArray();
+ assertNotNull(key);
+ assertTrue(0 < key.length);
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CollatorTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CollatorTest.java
new file mode 100644
index 00000000000..28f003d7cc6
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CollatorTest.java
@@ -0,0 +1,205 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.util.Locale;
+
+import com.ibm.icu.text.CollationKey;
+import com.ibm.icu.text.Collator;
+import com.ibm.icu.util.ULocale;
+
+public class CollatorTest extends ICUTestCase {
+ private static final String s1 = "Fu\u0308nf"; // capital F + u + diaresis
+ private static final String s2 = "fu\u0308nf"; // u + diaresis
+ private static final String s3 = "f\u00fcnf"; // u-umlaut
+ private static final String s4 = "fu\u0308\u0316nf"; // u + diaresis above + grave below
+ private static final String s5 = "fu\u0316\u0308nf"; // u + grave below + diaresis above
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.hashCode()'
+ */
+ public void testHashCode() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.SECONDARY);
+ Collator c2 = Collator.getInstance();
+ c2.setStrength(Collator.SECONDARY);
+ Collator cn = Collator.getInstance();
+ cn.setStrength(Collator.TERTIARY);
+ testEHCS(c, c2, cn);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.Collator(Collator)'
+ */
+ public void testCollator() {
+ // implicitly tested everywhere
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.setStrength(int)'
+ */
+ public void testSetStrength() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ assertTrue(0 == c.compare(s1, s2));
+ c.setStrength(Collator.SECONDARY);
+ assertTrue(0 == c.compare(s1, s2));
+ c.setStrength(Collator.TERTIARY);
+ assertTrue(0 < c.compare(s1, s2));
+ assertTrue(0 == c.compare(s2, s3));
+ c.setStrength(Collator.QUATERNARY);
+ assertTrue(0 > c.compare(s2, s3));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.setDecomposition(int)'
+ */
+ public void testSetDecomposition() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.TERTIARY);
+ assertTrue(0 != c.compare(s4, s5));
+ c.setDecomposition(Collator.IDENTICAL);
+ assertTrue(0 == c.compare(s4, s5));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.getInstance()'
+ */
+ public void testGetInstance() {
+ // implicitly tested everywhere
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.getInstance(ULocale)'
+ */
+ public void testGetInstanceULocale() {
+ Collator c = Collator.getInstance(ULocale.GERMANY);
+ assertNotNull(c);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.getInstance(Locale)'
+ */
+ public void testGetInstanceLocale() {
+ Collator c = Collator.getInstance(Locale.GERMANY);
+ assertNotNull(c);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.getAvailableLocales()'
+ */
+ public void testGetAvailableLocales() {
+ assertNotNull(Collator.getAvailableLocales());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.getAvailableULocales()'
+ */
+ public void testGetAvailableULocales() {
+ assertNotNull(Collator.getAvailableULocales());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.getKeywords()'
+ */
+ public void testGetKeywords() {
+ assertEquals(0, Collator.getKeywords().length);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.getKeywordValues(String)'
+ */
+ public void testGetKeywordValues() {
+ assertEquals(0, Collator.getKeywordValues("").length);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.getStrength()'
+ */
+ public void testGetStrength() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ assertEquals(Collator.PRIMARY, c.getStrength());
+ c.setStrength(Collator.SECONDARY);
+ assertEquals(Collator.SECONDARY, c.getStrength());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.getDecomposition()'
+ */
+ public void testGetDecomposition() {
+ Collator c = Collator.getInstance();
+ c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
+ assertEquals(Collator.CANONICAL_DECOMPOSITION, c.getDecomposition());
+ c.setDecomposition(Collator.NO_DECOMPOSITION);
+ assertEquals(Collator.NO_DECOMPOSITION, c.getDecomposition());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.compare(Object, Object)'
+ */
+ public void testCompareObjectObject() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ assertTrue(0 == c.compare((Object)s1, (Object)s2));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.equals(String, String)'
+ */
+ public void testEqualsStringString() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ assertTrue(c.equals(s1, s2));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.compare(String, String)'
+ */
+ public void testCompareStringString() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ assertTrue(0 == c.compare(s1, s2));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.getCollationKey(String)'
+ */
+ public void testGetCollationKey() {
+ Collator c = Collator.getInstance();
+ c.setStrength(Collator.PRIMARY);
+ CollationKey k1 = c.getCollationKey(s1);
+ CollationKey k2 = c.getCollationKey(s2);
+ assertTrue(k1.equals(k2));
+ c.setStrength(Collator.TERTIARY);
+ k1 = c.getCollationKey(s1);
+ k2 = c.getCollationKey(s2);
+ assertFalse(k1.equals(k2));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.toString()'
+ */
+ public void testToString() {
+ assertNotNull(Collator.getInstance().toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.clone()'
+ */
+ public void testClone() {
+ // tested above
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.Collator.equals(Object)'
+ */
+ public void testEqualsObject() {
+ // tested above
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DateFormatSymbolsTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DateFormatSymbolsTest.java
new file mode 100644
index 00000000000..4636e696dcb
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DateFormatSymbolsTest.java
@@ -0,0 +1,257 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.util.Locale;
+
+import com.ibm.icu.text.DateFormatSymbols;
+import com.ibm.icu.util.ULocale;
+
+public class DateFormatSymbolsTest extends ICUTestCase {
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.hashCode()'
+ */
+ public void testHashCode() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ DateFormatSymbols dfs2 = new DateFormatSymbols(ULocale.US);
+ DateFormatSymbols dfsn = new DateFormatSymbols(Locale.US);
+ dfsn.setAmPmStrings(new String[] { "sw", "xw" });
+ testEHCS(dfs, dfs2, dfsn);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.DateFormatSymbols(DateFormatSymbols)'
+ */
+ public void testDateFormatSymbolsDateFormatSymbols() {
+ // implicitly tested everywhere
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.DateFormatSymbols()'
+ */
+ public void testDateFormatSymbols() {
+ DateFormatSymbols dfs = new DateFormatSymbols();
+ assertNotNull(dfs.getWeekdays());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.DateFormatSymbols(Locale)'
+ */
+ public void testDateFormatSymbolsLocale() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ assertNotNull(dfs.getWeekdays());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.DateFormatSymbols(ULocale)'
+ */
+ public void testDateFormatSymbolsULocale() {
+ DateFormatSymbols dfs = new DateFormatSymbols(ULocale.US);
+ assertNotNull(dfs.getWeekdays());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.getEras()'
+ */
+ public void testGetEras() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ assertNotNull(dfs.getEras());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.setEras(String[])'
+ */
+ public void testSetEras() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ String[] oldvalue = dfs.getEras();
+ String[] newvalue = (String[])oldvalue.clone();
+ newvalue[0] = newvalue[0] + "!";
+ dfs.setEras(newvalue);
+ String[] result = dfs.getEras();
+ assertArraysNotEqual(oldvalue, result);
+ assertArraysEqual(newvalue, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.getMonths()'
+ */
+ public void testGetMonths() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ assertNotNull(dfs.getMonths());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.setMonths(String[])'
+ */
+ public void testSetMonths() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ String[] oldvalue = dfs.getMonths();
+ String[] newvalue = (String[])oldvalue.clone();
+ newvalue[0] = newvalue[0] + "!";
+ dfs.setMonths(newvalue);
+ String[] result = dfs.getMonths();
+ assertArraysNotEqual(oldvalue, result);
+ assertArraysEqual(newvalue, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.getShortMonths()'
+ */
+ public void testGetShortMonths() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ assertNotNull(dfs.getShortMonths());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.setShortMonths(String[])'
+ */
+ public void testSetShortMonths() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ String[] oldvalue = dfs.getShortMonths();
+ String[] newvalue = (String[])oldvalue.clone();
+ newvalue[0] = newvalue[0] + "!";
+ dfs.setShortMonths(newvalue);
+ String[] result = dfs.getShortMonths();
+ assertArraysNotEqual(oldvalue, result);
+ assertArraysEqual(newvalue, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.getWeekdays()'
+ */
+ public void testGetWeekdays() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ assertNotNull(dfs.getShortMonths());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.setWeekdays(String[])'
+ */
+ public void testSetWeekdays() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ String[] oldvalue = dfs.getWeekdays();
+ String[] newvalue = (String[])oldvalue.clone();
+ newvalue[0] = newvalue[0] + "!";
+ dfs.setWeekdays(newvalue);
+ String[] result = dfs.getWeekdays();
+ assertArraysNotEqual(oldvalue, result);
+ assertArraysEqual(newvalue, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.getShortWeekdays()'
+ */
+ public void testGetShortWeekdays() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ assertNotNull(dfs.getShortWeekdays());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.setShortWeekdays(String[])'
+ */
+ public void testSetShortWeekdays() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ String[] oldvalue = dfs.getShortWeekdays();
+ String[] newvalue = (String[])oldvalue.clone();
+ newvalue[0] = newvalue[0] + "!";
+ dfs.setShortWeekdays(newvalue);
+ String[] result = dfs.getShortWeekdays();
+ assertArraysNotEqual(oldvalue, result);
+ assertArraysEqual(newvalue, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.getAmPmStrings()'
+ */
+ public void testGetAmPmStrings() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ assertNotNull(dfs.getAmPmStrings());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.setAmPmStrings(String[])'
+ */
+ public void testSetAmPmStrings() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ String[] oldvalue = dfs.getAmPmStrings();
+ String[] newvalue = (String[])oldvalue.clone();
+ newvalue[0] = newvalue[0] + "!";
+ dfs.setAmPmStrings(newvalue);
+ String[] result = dfs.getAmPmStrings();
+ assertArraysNotEqual(oldvalue, result);
+ assertArraysEqual(newvalue, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.getZoneStrings()'
+ */
+ public void testGetZoneStrings() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ assertNotNull(dfs.getZoneStrings());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.setZoneStrings(String[][])'
+ */
+ public void testSetZoneStrings() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ String[][] oldvalue = dfs.getZoneStrings();
+ String[][] newvalue = (String[][])cloneComplex(oldvalue);
+ newvalue[0][0] = newvalue[0][0] + "!";
+ dfs.setZoneStrings(newvalue);
+ String[][] result = dfs.getZoneStrings();
+ assertArraysNotEqual(oldvalue, result);
+ assertArraysEqual(newvalue, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.getLocalPatternChars()'
+ */
+ public void testGetLocalPatternChars() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ assertNotNull(dfs.getLocalPatternChars());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.setLocalPatternChars(String)'
+ */
+ public void testSetLocalPatternChars() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ String pat = dfs.getLocalPatternChars();
+ StringBuffer buf = new StringBuffer(pat);
+ buf.setCharAt(0, (char)(pat.charAt(0) + 1));
+ String pat2 = buf.toString();
+ dfs.setLocalPatternChars(pat2);
+ String pat3 = dfs.getLocalPatternChars();
+ assertNotEqual(pat, pat2);
+ assertEquals(pat2, pat3);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.toString()'
+ */
+ public void testToString() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ assertNotNull(dfs.toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.clone()'
+ */
+ public void testClone() {
+ // tested by testHashCode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormatSymbols.equals(Object)'
+ */
+ public void testEqualsObject() {
+ // tested by testHashCode
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DateFormatTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DateFormatTest.java
new file mode 100644
index 00000000000..1e1e7e9815b
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DateFormatTest.java
@@ -0,0 +1,454 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.text.FieldPosition;
+import java.text.ParseException;
+import java.text.ParsePosition;
+import java.util.Date;
+import java.util.Locale;
+
+import com.ibm.icu.text.DateFormat;
+import com.ibm.icu.text.NumberFormat;
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.TimeZone;
+import com.ibm.icu.util.ULocale;
+
+public class DateFormatTest extends ICUTestCase {
+ private Calendar aCal;
+ private Calendar anESTCal;
+ private Date aDate;
+ private String aDateString;
+ private String aTimeString;
+ private String anESTTimeString;
+ private String aDateTimeString;
+ private String aShortDateTimeString;
+ private String aDefaultESTDateTimeString;
+ private DateFormat aDF;
+ private StringBuffer aBuf;
+ private FieldPosition anFP;
+ private FieldPosition anFPField;
+
+ private static int YEAR_POS_START = 8;
+ private static int YEAR_POS_END = 12;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ java.util.GregorianCalendar gcal = new java.util.GregorianCalendar();
+ gcal.clear();
+ gcal.set(java.util.GregorianCalendar.YEAR, 1990);
+ gcal.set(java.util.GregorianCalendar.MONTH, java.util.GregorianCalendar.DECEMBER);
+ gcal.set(java.util.GregorianCalendar.DATE, 17);
+ gcal.set(java.util.GregorianCalendar.HOUR, 5);
+ gcal.set(java.util.GregorianCalendar.MINUTE, 17);
+ aCal = new Calendar(gcal);
+ anESTCal = Calendar.getInstance();
+ anESTCal.setTimeZone(TimeZone.getTimeZone("EST"));
+ aDate = gcal.getTime();
+ aDateString = "Dec 17, 1990"; // medium -- the default
+ aTimeString = "5:17:00 AM"; // medium
+ anESTTimeString = "8:17:00 AM";
+ aDateTimeString = "Dec 17, 1990 5:17:00 AM"; // medium, medium
+ aDefaultESTDateTimeString = "Dec 17, 1990 8:17 AM"; // medium, short -- the default
+ aShortDateTimeString = "12/17/90 5:17 AM"; // short, short
+ aDF = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.MEDIUM, Locale.US);
+ aBuf = new StringBuffer();
+ anFP = new FieldPosition(DateFormat.YEAR_FIELD);
+ anFPField = new FieldPosition(DateFormat.Field.YEAR);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.hashCode()'
+ */
+ public final void testHashCode() {
+ DateFormat df = DateFormat.getInstance();
+ DateFormat eq = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT);
+ testEHCS(df, eq, aDF);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.DateFormat(DateFormat)'
+ */
+ public final void testDateFormat() {
+ DateFormat df = new DateFormat(java.text.DateFormat.getInstance());
+ assertEquals(DateFormat.getInstance(), df);
+ }
+
+ private void assertEqualDateString(StringBuffer buf) {
+ assertEquals(aDateTimeString, buf.toString());
+ }
+
+ private void assertEqualDateString(String str) {
+ assertEquals(aDateTimeString, str);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.format(Object, StringBuffer, FieldPosition)'
+ */
+ public final void testFormatObjectStringBufferFieldPosition() {
+ assertEqualDateString(aDF.format(aDate, aBuf, anFP));
+ assertEquals(YEAR_POS_START, anFP.getBeginIndex());
+ assertEquals(YEAR_POS_END, anFP.getEndIndex());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.format(Calendar, StringBuffer, FieldPosition)'
+ */
+ public final void testFormatCalendarStringBufferFieldPosition() {
+ assertEqualDateString(aDF.format(aCal, aBuf, anFP));
+ assertEquals(YEAR_POS_START, anFP.getBeginIndex());
+ assertEquals(YEAR_POS_END, anFP.getEndIndex());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.format(Date, StringBuffer, FieldPosition)'
+ */
+ public final void testFormatDateStringBufferFieldPosition() {
+ assertEqualDateString(aDF.format(aDate, aBuf, anFPField));
+ assertEquals(YEAR_POS_START, anFPField.getBeginIndex());
+ assertEquals(YEAR_POS_END, anFPField.getEndIndex());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.format(Date)'
+ */
+ public final void testFormatDate() {
+ assertEqualDateString(aDF.format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.parse(String)'
+ */
+ public final void testParseString() throws Exception {
+ assertEquals(aDate, aDF.parse(aDateTimeString));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.parse(String, Calendar, ParsePosition)'
+ */
+ public final void testParseStringCalendarParsePosition() {
+ aDF.parse(aDateTimeString, aCal, new ParsePosition(0));
+ assertEquals(aDate, aCal.getTime());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.parse(String, ParsePosition)'
+ */
+ public final void testParseStringParsePosition() {
+ assertEquals(aDate, aDF.parse(aDateTimeString, new ParsePosition(0)));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.parseObject(String, ParsePosition)'
+ */
+ public final void testParseObjectStringParsePosition() {
+ assertEquals(aDate, aDF.parseObject(aDateTimeString, new ParsePosition(0)));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getTimeInstance()'
+ */
+ public final void testGetTimeInstance() {
+ assertEquals(aTimeString, DateFormat.getTimeInstance().format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getTimeInstance(int)'
+ */
+ public final void testGetTimeInstanceInt() {
+ assertEquals(aTimeString, DateFormat.getTimeInstance(DateFormat.MEDIUM).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getTimeInstance(int, Locale)'
+ */
+ public final void testGetTimeInstanceIntLocale() {
+ assertEquals(aTimeString, DateFormat.getTimeInstance(DateFormat.MEDIUM, Locale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getTimeInstance(int, ULocale)'
+ */
+ public final void testGetTimeInstanceIntULocale() {
+ assertEquals(aTimeString, DateFormat.getTimeInstance(DateFormat.MEDIUM, ULocale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateInstance()'
+ */
+ public final void testGetDateInstance() {
+ assertEquals(aDateString, DateFormat.getDateInstance().format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateInstance(int)'
+ */
+ public final void testGetDateInstanceInt() {
+ assertEquals(aDateString, DateFormat.getDateInstance(DateFormat.MEDIUM).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateInstance(int, Locale)'
+ */
+ public final void testGetDateInstanceIntLocale() {
+ assertEquals(aDateString, DateFormat.getDateInstance(DateFormat.MEDIUM, Locale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateInstance(int, ULocale)'
+ */
+ public final void testGetDateInstanceIntULocale() {
+ assertEquals(aDateString, DateFormat.getDateInstance(DateFormat.MEDIUM, ULocale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateTimeInstance()'
+ */
+ public final void testGetDateTimeInstance() {
+ assertEquals(aDateTimeString, DateFormat.getDateTimeInstance().format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateTimeInstance(int, int)'
+ */
+ public final void testGetDateTimeInstanceIntInt() {
+ assertEquals(aDateTimeString,
+ DateFormat.getDateTimeInstance(
+ DateFormat.MEDIUM, DateFormat.MEDIUM).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateTimeInstance(int, int, Locale)'
+ */
+ public final void testGetDateTimeInstanceIntIntLocale() {
+ assertEquals(aDateTimeString,
+ DateFormat.getDateTimeInstance(
+ DateFormat.MEDIUM, DateFormat.MEDIUM, Locale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateTimeInstance(int, int, ULocale)'
+ */
+ public final void testGetDateTimeInstanceIntIntULocale() {
+ assertEquals(aDateTimeString,
+ DateFormat.getDateTimeInstance(
+ DateFormat.MEDIUM, DateFormat.MEDIUM, ULocale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getInstance()'
+ */
+ public final void testGetInstance() {
+ assertEquals(aShortDateTimeString, DateFormat.getInstance().format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getAvailableLocales()'
+ */
+ public final void testGetAvailableLocales() {
+ Locale[] locales = DateFormat.getAvailableLocales();
+ if (ICUTestCase.testingWrapper) {
+ ICUTestCase.assertArraysEqual(java.text.DateFormat.getAvailableLocales(), locales);
+ } else {
+ assertNotNull(locales);
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.setCalendar(Calendar)'
+ */
+ public final void testSetCalendar() {
+ Calendar cal = Calendar.getInstance();
+ cal.setTimeZone(TimeZone.getTimeZone("EST"));
+ DateFormat df = DateFormat.getTimeInstance(DateFormat.SHORT);
+ df.setCalendar(cal);
+ assertEquals("8:17 AM", df.format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getCalendar()'
+ */
+ public final void testGetCalendar() {
+ Calendar cal = Calendar.getInstance();
+ cal.setTimeZone(TimeZone.getTimeZone("EST"));
+ DateFormat df = DateFormat.getTimeInstance(DateFormat.SHORT);
+ df.setCalendar(cal);
+ assertEquals(cal, df.getCalendar());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.setNumberFormat(NumberFormat)'
+ */
+ public final void testSetNumberFormat() {
+ // no easy way to test effect of setting the number format
+ NumberFormat nf = NumberFormat.getInstance();
+ DateFormat df = DateFormat.getTimeInstance(DateFormat.SHORT);
+ df.setNumberFormat(nf);
+ // note, can't actually USE the dateformat since it changes the calendar
+ assertEquals(nf, df.getNumberFormat());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getNumberFormat()'
+ */
+ public final void testGetNumberFormat() {
+ // see testSetNumberFormat
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.setTimeZone(TimeZone)'
+ */
+ public final void testSetTimeZone() {
+ DateFormat df = DateFormat.getTimeInstance(DateFormat.SHORT);
+ TimeZone tz = TimeZone.getTimeZone("EST");
+ df.setTimeZone(tz);
+ assertEquals("8:17 AM", df.format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getTimeZone()'
+ */
+ public final void testGetTimeZone() {
+ DateFormat df = DateFormat.getTimeInstance(DateFormat.SHORT);
+ TimeZone tz = TimeZone.getTimeZone("EST");
+ df.setTimeZone(tz);
+ assertEquals(tz, df.getTimeZone());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.setLenient(boolean)'
+ */
+ public final void testSetLenient() throws Exception {
+ DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
+ df.parse("2/31/90"); // succeeds, default is lenient
+ df.setLenient(false);
+ try {
+ df.parse("2/31/90");
+ throw new Exception("strict parse should have failed");
+ }
+ catch (ParseException e) {
+ // ok, this is what we expect
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.isLenient()'
+ */
+ public final void testIsLenient() {
+ DateFormat df = DateFormat.getInstance();
+ assertTrue(df.isLenient());
+ df.setLenient(false);
+ assertFalse(df.isLenient());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateInstance(Calendar, int, Locale)'
+ */
+ public final void testGetDateInstanceCalendarIntLocale() {
+ assertEquals(aDateString, DateFormat.getDateInstance(aCal, DateFormat.MEDIUM, Locale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateInstance(Calendar, int, ULocale)'
+ */
+ public final void testGetDateInstanceCalendarIntULocale() {
+ assertEquals(aDateString, DateFormat.getDateInstance(aCal, DateFormat.MEDIUM, ULocale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getTimeInstance(Calendar, int, Locale)'
+ */
+ public final void testGetTimeInstanceCalendarIntLocale() {
+ assertEquals(anESTTimeString, DateFormat.getTimeInstance(anESTCal, DateFormat.MEDIUM, Locale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getTimeInstance(Calendar, int, ULocale)'
+ */
+ public final void testGetTimeInstanceCalendarIntULocale() {
+ assertEquals(anESTTimeString, DateFormat.getTimeInstance(anESTCal, DateFormat.MEDIUM, ULocale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateTimeInstance(Calendar, int, int, Locale)'
+ */
+ public final void testGetDateTimeInstanceCalendarIntIntLocale() {
+ assertEquals(aDefaultESTDateTimeString, DateFormat.getDateTimeInstance(anESTCal, DateFormat.MEDIUM, DateFormat.SHORT, Locale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateTimeInstance(Calendar, int, int, ULocale)'
+ */
+ public final void testGetDateTimeInstanceCalendarIntIntULocale() {
+ assertEquals(aDefaultESTDateTimeString, DateFormat.getDateTimeInstance(anESTCal, DateFormat.MEDIUM, DateFormat.SHORT, ULocale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getInstance(Calendar, Locale)'
+ */
+ public final void testGetInstanceCalendarLocale() {
+ assertEquals(aDefaultESTDateTimeString, DateFormat.getInstance(anESTCal, Locale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getInstance(Calendar, ULocale)'
+ */
+ public final void testGetInstanceCalendarULocale() {
+ assertEquals(aDefaultESTDateTimeString, DateFormat.getInstance(anESTCal, ULocale.US).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getInstance(Calendar)'
+ */
+ public final void testGetInstanceCalendar() {
+ assertEquals(aDefaultESTDateTimeString, DateFormat.getInstance(anESTCal).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateInstance(Calendar, int)'
+ */
+ public final void testGetDateInstanceCalendarInt() {
+ assertEquals(aDateString, DateFormat.getDateInstance(aCal, DateFormat.MEDIUM).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getTimeInstance(Calendar, int)'
+ */
+ public final void testGetTimeInstanceCalendarInt() {
+ assertEquals(anESTTimeString, DateFormat.getTimeInstance(anESTCal, DateFormat.MEDIUM).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.getDateTimeInstance(Calendar, int, int)'
+ */
+ public final void testGetDateTimeInstanceCalendarIntInt() {
+ assertEquals(aDefaultESTDateTimeString, DateFormat.getDateTimeInstance(anESTCal, DateFormat.MEDIUM, DateFormat.SHORT).format(aDate));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.toString()'
+ */
+ public final void testToString() {
+ assertNotNull(aDF.toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.clone()'
+ */
+ public final void testClone() {
+ // see testHashCode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DateFormat.equals(Object)'
+ */
+ public final void testEqualsObject() {
+ // see testHashCode
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DecimalFormatSymbolsTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DecimalFormatSymbolsTest.java
new file mode 100644
index 00000000000..22779a5b2b0
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DecimalFormatSymbolsTest.java
@@ -0,0 +1,344 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.util.Locale;
+
+import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.util.ULocale;
+
+public class DecimalFormatSymbolsTest extends ICUTestCase {
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.hashCode()'
+ */
+ public void testHashCode() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(Locale.US);
+ DecimalFormatSymbols dfs2 = new DecimalFormatSymbols(ULocale.US);
+ DecimalFormatSymbols dfsn = new DecimalFormatSymbols(Locale.FRANCE);
+ testEHCS(dfs, dfs2, dfsn);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.DecimalFormatSymbols(DecimalFormatSymbols)'
+ */
+ public void testDecimalFormatSymbolsDecimalFormatSymbols() {
+ // implicitly tested everywhere
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.DecimalFormatSymbols()'
+ */
+ public void testDecimalFormatSymbols() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols();
+ assertTrue(-1 != dfs.getDecimalSeparator());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.DecimalFormatSymbols(Locale)'
+ */
+ public void testDecimalFormatSymbolsLocale() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(Locale.US);
+ assertTrue(-1 != dfs.getDecimalSeparator());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.DecimalFormatSymbols(ULocale)'
+ */
+ public void testDecimalFormatSymbolsULocale() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertTrue(-1 != dfs.getDecimalSeparator());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getZeroDigit()'
+ */
+ public void testGetZeroDigit() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals('0', dfs.getZeroDigit());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setZeroDigit(char)'
+ */
+ public void testSetZeroDigit() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ char value = dfs.getZeroDigit();
+ char value1 = (char)(value + 1);
+ dfs.setZeroDigit(value1);
+ char result = dfs.getZeroDigit();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getGroupingSeparator()'
+ */
+ public void testGetGroupingSeparator() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals(',', dfs.getGroupingSeparator());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setGroupingSeparator(char)'
+ */
+ public void testSetGroupingSeparator() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ char value = dfs.getGroupingSeparator();
+ char value1 = (char)(value + 1);
+ dfs.setGroupingSeparator(value1);
+ char result = dfs.getGroupingSeparator();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getDecimalSeparator()'
+ */
+ public void testGetDecimalSeparator() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals('.', dfs.getDecimalSeparator());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setDecimalSeparator(char)'
+ */
+ public void testSetDecimalSeparator() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ char value = dfs.getDecimalSeparator();
+ char value1 = (char)(value + 1);
+ dfs.setDecimalSeparator(value1);
+ char result = dfs.getDecimalSeparator();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getPerMill()'
+ */
+ public void testGetPerMill() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals('\u2030', dfs.getPerMill());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setPerMill(char)'
+ */
+ public void testSetPerMill() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ char value = dfs.getPerMill();
+ char value1 = (char)(value + 1);
+ dfs.setPerMill(value1);
+ char result = dfs.getPerMill();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getPercent()'
+ */
+ public void testGetPercent() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals('%', dfs.getPercent());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setPercent(char)'
+ */
+ public void testSetPercent() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ char value = dfs.getPercent();
+ char value1 = (char)(value + 1);
+ dfs.setPercent(value1);
+ char result = dfs.getPercent();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getDigit()'
+ */
+ public void testGetDigit() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals('#', dfs.getDigit());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setDigit(char)'
+ */
+ public void testSetDigit() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ char value = dfs.getDigit();
+ char value1 = (char)(value + 1);
+ dfs.setDigit(value1);
+ char result = dfs.getDigit();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getPatternSeparator()'
+ */
+ public void testGetPatternSeparator() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals(';', dfs.getPatternSeparator());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setPatternSeparator(char)'
+ */
+ public void testSetPatternSeparator() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ char value = dfs.getPatternSeparator();
+ char value1 = (char)(value + 1);
+ dfs.setPatternSeparator(value1);
+ char result = dfs.getPatternSeparator();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getInfinity()'
+ */
+ public void testGetInfinity() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals("\u221e", dfs.getInfinity());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setInfinity(String)'
+ */
+ public void testSetInfinity() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ String value = dfs.getInfinity();
+ String value1 = value + "!";
+ dfs.setInfinity(value1);
+ String result = dfs.getInfinity();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getNaN()'
+ */
+ public void testGetNaN() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertNotNull(dfs.getNaN()); // java returns missing character???
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setNaN(String)'
+ */
+ public void testSetNaN() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ String value = dfs.getNaN();
+ String value1 = value + "!";
+ dfs.setNaN(value1);
+ String result = dfs.getNaN();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getMinusSign()'
+ */
+ public void testGetMinusSign() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals('-', dfs.getMinusSign());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setMinusSign(char)'
+ */
+ public void testSetMinusSign() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ char value = dfs.getMinusSign();
+ char value1 = (char)(value + 1);
+ dfs.setMinusSign(value1);
+ char result = dfs.getMinusSign();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getCurrencySymbol()'
+ */
+ public void testGetCurrencySymbol() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals("$", dfs.getCurrencySymbol());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setCurrencySymbol(String)'
+ */
+ public void testSetCurrencySymbol() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ String value = dfs.getCurrencySymbol();
+ String value1 = value + "!";
+ dfs.setCurrencySymbol(value1);
+ String result = dfs.getCurrencySymbol();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getInternationalCurrencySymbol()'
+ */
+ public void testGetInternationalCurrencySymbol() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals("USD", dfs.getInternationalCurrencySymbol());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setInternationalCurrencySymbol(String)'
+ */
+ public void testSetInternationalCurrencySymbol() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ String value = dfs.getInternationalCurrencySymbol();
+ String value1 = value + "!";
+ dfs.setInternationalCurrencySymbol(value1);
+ String result = dfs.getInternationalCurrencySymbol();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.getMonetaryDecimalSeparator()'
+ */
+ public void testGetMonetaryDecimalSeparator() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ assertEquals('.', dfs.getMonetaryDecimalSeparator());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.setMonetaryDecimalSeparator(char)'
+ */
+ public void testSetMonetaryDecimalSeparator() {
+ DecimalFormatSymbols dfs = new DecimalFormatSymbols(ULocale.US);
+ char value = dfs.getMonetaryDecimalSeparator();
+ char value1 = (char)(value + 1);
+ dfs.setMonetaryDecimalSeparator(value1);
+ char result = dfs.getMonetaryDecimalSeparator();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.clone()'
+ */
+ public void testClone() {
+ // tested in testHashcode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormatSymbols.equals(Object)'
+ */
+ public void testEqualsObject() {
+ // tested in testHashcode
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DecimalFormatTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DecimalFormatTest.java
new file mode 100644
index 00000000000..b438e2fb95f
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DecimalFormatTest.java
@@ -0,0 +1,242 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.util.Locale;
+
+import com.ibm.icu.text.DecimalFormat;
+import com.ibm.icu.text.DecimalFormatSymbols;
+
+public class DecimalFormatTest extends ICUTestCase {
+ private static final long lmax = Long.MAX_VALUE;
+ private static final double dsmall = 23.33;
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.clone()'
+ */
+ public void testClone() {
+ DecimalFormat df = new DecimalFormat("#,#0.00");
+ DecimalFormat df2 = new DecimalFormat("#,#0.00");
+ DecimalFormat dfn = new DecimalFormat("#,#0.00");
+ dfn.setNegativePrefix(dfn.getNegativePrefix() + '!');
+ testEHCS(df, df2, dfn);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.DecimalFormat(DecimalFormat)'
+ */
+ public void testDecimalFormatDecimalFormat() {
+ // tested implicitly
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.DecimalFormat()'
+ */
+ public void testDecimalFormat() {
+ DecimalFormat df = new DecimalFormat();
+ assertEquals("9,223,372,036,854,775,807", df.format(lmax));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.DecimalFormat(String)'
+ */
+ public void testDecimalFormatString() {
+ DecimalFormat df = new DecimalFormat("#,##0.000");
+ assertEquals("23.330", df.format(dsmall));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.DecimalFormat(String, DecimalFormatSymbols)'
+ */
+ public void testDecimalFormatStringDecimalFormatSymbols() {
+ DecimalFormatSymbols sym = new DecimalFormatSymbols(Locale.FRANCE);
+ DecimalFormat df = new DecimalFormat("#,##0.000", sym);
+ assertEquals("23,330", df.format(dsmall));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.getDecimalFormatSymbols()'
+ */
+ public void testGetDecimalFormatSymbols() {
+ DecimalFormatSymbols sym = new DecimalFormatSymbols(Locale.FRANCE);
+ DecimalFormat df = new DecimalFormat("#,##0.000", sym);
+ assertEquals(sym, df.getDecimalFormatSymbols());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.setDecimalFormatSymbols(DecimalFormatSymbols)'
+ */
+ public void testSetDecimalFormatSymbols() {
+ DecimalFormat df = new DecimalFormat();
+ df.setDecimalFormatSymbols(new DecimalFormatSymbols(Locale.FRANCE));
+ assertEquals("23,33", df.format(dsmall));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.getPositivePrefix()'
+ */
+ public void testGetPositivePrefix() {
+ DecimalFormat df = new DecimalFormat("+#,##0.#;-#,##0.#");
+ assertEquals("+", df.getPositivePrefix());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.setPositivePrefix(String)'
+ */
+ public void testSetPositivePrefix() {
+ DecimalFormat df = new DecimalFormat("+#,##0.#;-#,##0.#");
+ df.setPositivePrefix("?");
+ assertEquals("?23.3", df.format(dsmall));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.getNegativePrefix()'
+ */
+ public void testGetNegativePrefix() {
+ DecimalFormat df = new DecimalFormat("+#,##0.#;-#,##0.#");
+ assertEquals("-", df.getNegativePrefix());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.setNegativePrefix(String)'
+ */
+ public void testSetNegativePrefix() {
+ DecimalFormat df = new DecimalFormat("+#,##0.#;-#,##0.#");
+ df.setNegativePrefix("~");
+ assertEquals("~23.3", df.format(-dsmall));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.getPositiveSuffix()'
+ */
+ public void testGetPositiveSuffix() {
+ DecimalFormat df = new DecimalFormat("+#,##0.#**;-#,##0.#~~");
+ assertEquals("**", df.getPositiveSuffix());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.setPositiveSuffix(String)'
+ */
+ public void testSetPositiveSuffix() {
+ DecimalFormat df = new DecimalFormat("+#,##0.#;-#,##0.#");
+ df.setPositiveSuffix("**");
+ assertEquals("+23.3**", df.format(dsmall));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.getNegativeSuffix()'
+ */
+ public void testGetNegativeSuffix() {
+ DecimalFormat df = new DecimalFormat("+#,##0.#**;-#,##0.#~~");
+ assertEquals("~~", df.getNegativeSuffix());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.setNegativeSuffix(String)'
+ */
+ public void testSetNegativeSuffix() {
+ DecimalFormat df = new DecimalFormat("+#,##0.#;-#,##0.#");
+ df.setNegativeSuffix("~~");
+ assertEquals("-23.3~~", df.format(-dsmall));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.getMultiplier()'
+ */
+ public void testGetMultiplier() {
+ DecimalFormat df = new DecimalFormat("%000");
+ df.setMultiplier(1000);
+ assertEquals(1000, df.getMultiplier());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.setMultiplier(int)'
+ */
+ public void testSetMultiplier() {
+ DecimalFormat df = new DecimalFormat("%000");
+ assertEquals("%012", df.format(.123));
+ df.setMultiplier(1000);
+ assertEquals("%123", df.format(.123));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.getGroupingSize()'
+ */
+ public void testGetGroupingSize() {
+ DecimalFormat df = new DecimalFormat("#,#0.#");
+ assertEquals(2, df.getGroupingSize());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.setGroupingSize(int)'
+ */
+ public void testSetGroupingSize() {
+ DecimalFormat df = new DecimalFormat("#,##0.##");
+ assertEquals("1,234,567.89", df.format(1234567.89));
+ df.setGroupingSize(2);
+ assertEquals("1,23,45,67.89", df.format(1234567.89));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.isDecimalSeparatorAlwaysShown()'
+ */
+ public void testIsDecimalSeparatorAlwaysShown() {
+ DecimalFormat df = new DecimalFormat("#.#");
+ df.setDecimalSeparatorAlwaysShown(false);
+ assertEquals("1", df.format(1));
+ assertEquals("1.2", df.format(1.2));
+ df.setDecimalSeparatorAlwaysShown(true);
+ assertEquals("1.", df.format(1));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.setDecimalSeparatorAlwaysShown(boolean)'
+ */
+ public void testSetDecimalSeparatorAlwaysShown() {
+ DecimalFormat df = new DecimalFormat("#.#");
+ df.setDecimalSeparatorAlwaysShown(false);
+ assertFalse(df.isDecimalSeparatorAlwaysShown());
+ df.setDecimalSeparatorAlwaysShown(true);
+ assertTrue(df.isDecimalSeparatorAlwaysShown());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.toPattern()'
+ */
+ public void testToPattern() {
+ DecimalFormat df = new DecimalFormat("#,##0.##");
+ assertEquals("#,##0.##", df.toPattern());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.toLocalizedPattern()'
+ */
+ public void testToLocalizedPattern() {
+ DecimalFormat df = new DecimalFormat("#,##0.##", new DecimalFormatSymbols(Locale.FRANCE));
+ assertEquals("#,##0.##", df.toPattern());
+ assertEquals("#\u00a0##0,##", df.toLocalizedPattern());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.applyPattern(String)'
+ */
+ public void testApplyPattern() {
+ DecimalFormat df = new DecimalFormat("#,##0.##");
+ df.applyPattern("#,0.#");
+ assertEquals("1,2,3.4", df.format(123.4));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.DecimalFormat.applyLocalizedPattern(String)'
+ */
+ public void testApplyLocalizedPattern() {
+ DecimalFormat df = new DecimalFormat("#,##0.##", new DecimalFormatSymbols(Locale.FRANCE));
+ df.applyLocalizedPattern("#\u00a00,#");
+ assertEquals("1\u00a02\u00a03,4", df.format(123.4));
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ICUTestCase.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ICUTestCase.java
new file mode 100644
index 00000000000..617fc14709c
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ICUTestCase.java
@@ -0,0 +1,286 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.lang.reflect.Array;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.Locale;
+
+import com.ibm.icu.util.TimeZone;
+import com.ibm.icu.util.ULocale;
+
+import junit.framework.TestCase;
+
+/**
+ * Implement boilerplate tests.
+ * Currently there is only one method, testEHCS, which tests equals, hashCode,
+ * clone, and serialization.
+ */
+public abstract class ICUTestCase extends TestCase {
+ private static final Object[] EMPTY_ARGS = {};
+ private static final Class>[] EMPTY_CLASSES = {};
+
+ private static final Locale oldLocale = Locale.getDefault();
+ private static final ULocale oldULocale = ULocale.getDefault();
+ private static final java.util.TimeZone oldJTimeZone = java.util.TimeZone.getDefault();
+ private static final TimeZone oldITimeZone = TimeZone.getDefault();
+
+ // TODO: what's the best way to check this?
+ public static final boolean testingWrapper = true;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ Locale.setDefault(Locale.US);
+ ULocale.setDefault(ULocale.US);
+ java.util.TimeZone.setDefault(java.util.TimeZone.getTimeZone("PST"));
+ TimeZone.setDefault(TimeZone.getTimeZone("PST"));
+ }
+
+ protected void tearDown() throws Exception {
+ ULocale.setDefault(oldULocale);
+ Locale.setDefault(oldLocale);
+ TimeZone.setDefault(oldITimeZone);
+ java.util.TimeZone.setDefault(oldJTimeZone);
+ super.tearDown();
+ }
+
+ private static final Object test = new Object();
+
+ /**
+ * Assert that two objects are _not_ equal. Curiously missing from Assert.
+ * @param lhs an object to test, may be null
+ * @param rhs an object to test, may be null
+ */
+ public static void assertNotEqual(Object lhs, Object rhs) {
+ if (lhs == null) {
+ if (rhs == null) fail("null equals null");
+ } else {
+ if (lhs.equals(rhs)) {
+ fail(lhs.toString() + " equals " + rhs);
+ }
+ }
+ }
+
+ public static void assertNotEqual(long lhs, long rhs) {
+ if (lhs == rhs) {
+ fail("values are equal: " + lhs);
+ }
+ }
+
+ /**
+ * Test whether equality, hashCode, clone, and serialization work as expected.
+ * Equals(Object) is assumed to return false (not throw an exception) if passed
+ * null or an object of an incompatible class.
+ * Hashcodes must be equal iff the two objects compare equal. No attempt is made to
+ * evaluate the quality of the hashcode distribution, so (in particular) degenerate
+ * hashcode implementations will pass this test.
+ * Clone will be tested if the method "clone" is public on the class of obj.
+ * It is assumed to return an object that compares equal to obj.
+ * Serialization will be tested if object implements Serializable or Externalizable.
+ * It is assumed the serialized/deserialized object compares equal to obj.
+ * @param obj the object to test
+ * @param eq an object that should compare equal to, but is not the same as, obj.
+ * it should be assignable to the class of obj.
+ * @param neq a non-null object that should not compare equal to obj.
+ * it should be assignable to the class of obj.
+ */
+ public static void testEHCS(Object obj, Object eq, Object neq) {
+ if (obj == null || eq == null || neq == null) {
+ throw new NullPointerException();
+ }
+ Class extends Object> cls = obj.getClass();
+ if (!(cls.isAssignableFrom(eq.getClass()) && cls.isAssignableFrom(neq.getClass()))) {
+ throw new IllegalArgumentException("unassignable classes");
+ }
+
+ // reflexive
+ assertEquals(obj, obj);
+
+ // should return false, not throw exception
+ assertNotEqual(obj, test);
+ assertNotEqual(obj, null);
+
+ // commutative
+ assertEquals(obj, eq);
+ assertEquals(eq, obj);
+
+ assertNotEqual(obj, neq);
+ assertNotEqual(neq, obj);
+
+ // equal objects MUST have equal hashes, unequal objects MAY have equal hashes
+ assertEquals(obj.hashCode(), eq.hashCode());
+
+ Object clone = null;
+ try {
+ // look for public clone method and call it if available
+ Method method_clone = cls.getMethod("clone", EMPTY_CLASSES);
+ clone = method_clone.invoke(obj, EMPTY_ARGS);
+ assertNotNull(clone);
+ }
+ catch(NoSuchMethodException e) {
+ // ok
+ }
+ catch(InvocationTargetException e) {
+ // ok
+ }
+ catch(IllegalAccessException e) {
+ // ok
+ }
+
+ if (clone != null) {
+ assertEquals(obj, clone);
+ assertEquals(clone, obj);
+ }
+
+ if (obj instanceof Serializable || obj instanceof Externalizable) {
+ Object ser = null;
+ try {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ ObjectOutputStream oos = new ObjectOutputStream(bos);
+ oos.writeObject(clone);
+ oos.close();
+
+ ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
+ ObjectInputStream ois = new ObjectInputStream(bis);
+ ser = ois.readObject();
+ ois.close();
+ }
+ catch(IOException e) {
+ System.err.println(e.getMessage());
+ throw new RuntimeException(e);
+ }
+ catch(ClassNotFoundException e) {
+ System.err.println(e.getMessage());
+ throw new RuntimeException(e);
+ }
+
+ if (ser != null) {
+ assertEquals(obj, ser);
+ assertEquals(ser, obj);
+ assertEquals(obj.hashCode(), ser.hashCode());
+ }
+ }
+ }
+
+ /**
+ * Fail if the arrays are not equal. To be equal, the arrays must
+ * be the same length, and each element in the left array must compare
+ * equal to the corresponding element of the right array.
+ * Also fails if one of the objects is not an array.
+ * @param lhs the left array
+ * @param rhs the right array
+ */
+ public static void assertArraysEqual(Object lhs, Object rhs) {
+ Class extends Object> lcls = lhs.getClass();
+ Class extends Object> rcls = rhs.getClass();
+ if (!(lcls.isArray() && rcls.isArray())) {
+ fail("objects are not arrays");
+ }
+ String result = arraysAreEqual(lhs, rhs);
+ if (result != null) {
+ fail(result);
+ }
+ }
+
+ /**
+ * Fail if the arrays are equal. Also fails if one or the other
+ * argument is not an array.
+ * @param lhs the left array
+ * @param rhs the right array
+ */
+ public static void assertArraysNotEqual(Object lhs, Object rhs) {
+ Class extends Object> lcls = lhs.getClass();
+ Class extends Object> rcls = rhs.getClass();
+ if (!(lcls.isArray() && rcls.isArray())) {
+ fail("objects are not arrays");
+ }
+ String result = arraysAreEqual(lhs, rhs);
+ if (result == null) {
+ fail("arrays are equal");
+ }
+ }
+
+ // slow but general
+ private static String arraysAreEqual(Object lhsa, Object rhsa) {
+ int lhsl = Array.getLength(lhsa);
+ int rhsl = Array.getLength(rhsa);
+ if (lhsl != rhsl) {
+ return "length " + lhsl + " != " + rhsl;
+ }
+ boolean lhsaA = lhsa.getClass().getComponentType().isArray();
+ boolean rhsaA = rhsa.getClass().getComponentType().isArray();
+ if (lhsaA != rhsaA) {
+ return (lhsaA ? "" : "non-") + "array != " + (rhsaA ? "" : "non-") + "array";
+ }
+ for (int i = 0; i < lhsl; ++i) {
+ Object lhse = Array.get(lhsa, i);
+ Object rhse = Array.get(rhsa, i);
+ if (lhse == null) {
+ if (rhse != null) {
+ return "null != " + rhse;
+ }
+ } else {
+ if (lhsaA) {
+ String result = arraysAreEqual(lhse, rhse);
+ if (result != null) {
+ if (result.charAt(0) != '[') {
+ result = " " + result;
+ }
+ return "[" + i + "]" + result;
+ }
+ } else {
+ if (!lhse.equals(rhse)) {
+ return lhse.toString() + " != " + rhse;
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ // much more painful and slow than it should be... partly because of the
+ // oddness of clone, partly because arrays don't provide a Method for
+ // 'clone' despite the fact that they implement it and make it public.
+ public static Object cloneComplex(Object obj) {
+ Object result = null;
+ if (obj != null) {
+ Class extends Object> cls = obj.getClass();
+ if (cls.isArray()) {
+ int len = Array.getLength(obj);
+ Class> typ = cls.getComponentType();
+ result = Array.newInstance(typ, len);
+ boolean prim = typ.isPrimitive();
+ for (int i = 0; i < len; ++i) {
+ Object elem = Array.get(obj, i);
+ Array.set(result, i, prim ? elem : cloneComplex(elem));
+ }
+ } else {
+ result = obj; // default
+ try {
+ Method cloneM = cls.getMethod("clone", EMPTY_CLASSES);
+ result = cloneM.invoke(obj, EMPTY_ARGS);
+ }
+ catch (NoSuchMethodException e) {
+ }
+ catch (IllegalAccessException e) {
+ }
+ catch (InvocationTargetException e) {
+ }
+ }
+ }
+ return result;
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/MessageFormatTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/MessageFormatTest.java
new file mode 100644
index 00000000000..9947214313e
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/MessageFormatTest.java
@@ -0,0 +1,307 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.tests;
+
+import java.text.FieldPosition;
+import java.text.Format;
+import java.text.ParseException;
+import java.text.ParsePosition;
+import java.util.Date;
+import java.util.Locale;
+
+import com.ibm.icu.text.DateFormat;
+import com.ibm.icu.text.MessageFormat;
+import com.ibm.icu.text.NumberFormat;
+import com.ibm.icu.util.ULocale;
+
+public class MessageFormatTest extends ICUTestCase {
+ private final String pattern = "Deleted {0,number} files at {1,time,short} on {1,date}.";
+ private final String altPattern = "Deleted {0, number } files at {1, time, short} on {1, date}.";
+ private final Date date = new Date(716698890835L);
+ private final Number num = new Long(3456);
+ private final Object[] args = { num, date };
+ private final Date dateOnly = new Date(716626800000L);
+ private final String englishTarget = "Deleted 3,456 files at 8:01 PM on Sep 16, 1992.";
+ private final String germanTarget = "Deleted 3.456 files at 20:01 on 16.09.1992.";
+ private final String modifiedTarget = "Deleted 3,456 files at 8:01:30 PM PDT on Sep 16, 1992.";
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.hashCode()'
+ */
+ public void testHashCode() {
+ MessageFormat mf = new MessageFormat(pattern);
+ MessageFormat eq = new MessageFormat(altPattern);
+ MessageFormat ne = new MessageFormat("Deleted (0, number, currency} files at {1, time} on {1, date}.");
+ testEHCS(mf, eq, ne);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.MessageFormat(MessageFormat)'
+ */
+ public void testMessageFormatMessageFormat() {
+ // implicitly tested everywhere
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.MessageFormat(String)'
+ */
+ public void testMessageFormatString() {
+ MessageFormat mf = new MessageFormat(pattern);
+ assertEquals(englishTarget, mf.format(args));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.MessageFormat(String, Locale)'
+ */
+ public void testMessageFormatStringLocale() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ assertEquals(englishTarget, mf.format(args));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.MessageFormat(String, ULocale)'
+ */
+ public void testMessageFormatStringULocale() {
+ MessageFormat mf = new MessageFormat(pattern, ULocale.US);
+ assertEquals(englishTarget, mf.format(args));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.setLocale(Locale)'
+ */
+ public void testSetLocaleLocale() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ mf.setLocale(Locale.GERMANY);
+ mf.applyPattern(pattern);
+ assertEquals(germanTarget, mf.format(args));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.setLocale(ULocale)'
+ */
+ public void testSetLocaleULocale() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ mf.setLocale(ULocale.GERMANY);
+ mf.applyPattern(pattern);
+ assertEquals(germanTarget, mf.format(args));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.getLocale()'
+ */
+ public void testGetLocale() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ mf.setLocale(Locale.GERMANY);
+ assertEquals(Locale.GERMANY, mf.getLocale());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.getULocale()'
+ */
+ public void testGetULocale() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ mf.setLocale(ULocale.GERMANY);
+ assertEquals(ULocale.GERMANY, mf.getULocale());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.applyPattern(String)'
+ */
+ public void testApplyPattern() {
+ MessageFormat mf = new MessageFormat("foo");
+ mf.applyPattern(pattern);
+ assertEquals(englishTarget, mf.format(args));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.toPattern()'
+ */
+ public void testToPattern() {
+ MessageFormat mf = new MessageFormat(altPattern);
+ assertEquals(pattern, mf.toPattern());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.setFormatsByArgumentIndex(Format[])'
+ public void testSetFormatsByArgumentIndex() {
+ // this api is broken. if the same argument is used twice with two different
+ // formats, this can't be used, since it sets only one format per argument.
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ Format[] formats = {
+ NumberFormat.getIntegerInstance(),
+ DateFormat.getTimeInstance(DateFormat.SHORT),
+ DateFormat.getDateInstance(),
+ };
+ mf.setFormatsByArgumentIndex(formats);
+ assertEquals(brokenButConformantTarget, mf.format(args));
+ }
+ */
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.setFormats(Format[])'
+ */
+ public void testSetFormats() {
+ // this api, while it has the problem that the order of formats depends
+ // on the order in the string, at least lets you set all the formats.
+
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ Format[] formats = {
+ NumberFormat.getIntegerInstance(),
+ DateFormat.getTimeInstance(DateFormat.SHORT),
+ DateFormat.getDateInstance(),
+ };
+ mf.setFormats(formats);
+ assertEquals(englishTarget, mf.format(args));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.setFormatByArgumentIndex(int, Format)'
+ public void testSetFormatByArgumentIndex() {
+ // same problem, once you set a format for an argument, you've set all of them
+
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ mf.setFormatByArgumentIndex(1, DateFormat.getTimeInstance(DateFormat.SHORT));
+ assertEquals(brokenButConformantTarget, mf.format(args));
+
+ }
+ */
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.setFormat(int, Format)'
+ */
+ public void testSetFormat() {
+ // and ok again
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ mf.setFormat(1, DateFormat.getTimeInstance(DateFormat.LONG));
+ assertEquals(modifiedTarget, mf.format(args));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.getFormatsByArgumentIndex()'
+ public void testGetFormatsByArgumentIndex() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ Format[] formats = mf.getFormatsByArgumentIndex();
+ NumberFormat nf = NumberFormat.getNumberInstance(Locale.US);
+ assertEquals(formats[0], nf);
+ DateFormat df = DateFormat.getDateInstance(DateFormat.DEFAULT, Locale.US);
+ assertEquals(formats[1], df);
+ }
+ */
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.getFormats()'
+ */
+ public void testGetFormats() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ Format[] formats = mf.getFormats();
+ NumberFormat nf = NumberFormat.getNumberInstance(Locale.US);
+ assertEquals(formats[0], nf);
+ DateFormat tf = DateFormat.getTimeInstance(DateFormat.SHORT, Locale.US);
+ assertEquals(formats[1], tf);
+ DateFormat df = DateFormat.getDateInstance(DateFormat.DEFAULT, Locale.US);
+ assertEquals(formats[2], df);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.format(Object[], StringBuffer, FieldPosition)'
+ */
+ public void testFormatObjectArrayStringBufferFieldPosition() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ StringBuffer buf = new StringBuffer();
+ FieldPosition fp = new FieldPosition(0);
+ mf.format(args, buf, fp);
+ assertEquals(englishTarget, buf.toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.format(String, Object[])'
+ */
+ public void testFormatStringObjectArray() {
+ assertEquals(englishTarget, MessageFormat.format(pattern, args));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.format(Object, StringBuffer, FieldPosition)'
+ */
+ public void testFormatObjectStringBufferFieldPosition() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ StringBuffer buf = new StringBuffer();
+ FieldPosition fp = new FieldPosition(0);
+ mf.format((Object)args, buf, fp);
+ assertEquals(englishTarget, buf.toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.parse(String, ParsePosition)'
+ */
+ public void testParseStringParsePosition() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ ParsePosition pp = new ParsePosition(1);
+ Object[] result = mf.parse("!" + englishTarget, pp);
+ assertEquals(num, result[0]);
+ assertEquals(dateOnly, result[1]);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.parse(String)'
+ */
+ public void testParseString() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ try {
+ Object[] result = mf.parse(englishTarget);
+ assertEquals(num, result[0]);
+ assertEquals(dateOnly, result[1]);
+ }
+ catch (ParseException e) {
+ fail(e.getMessage());
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.parseObject(String, ParsePosition)'
+ */
+ public void testParseObjectStringParsePosition() {
+ MessageFormat mf = new MessageFormat(pattern, Locale.US);
+ ParsePosition pp = new ParsePosition(0);
+ Object result = mf.parseObject(englishTarget, pp);
+ assertEquals(num, ((Object[])result)[0]);
+ assertEquals(dateOnly, ((Object[])result)[1]);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.autoQuoteApostrophe(String)'
+ */
+ public void testAutoQuoteApostrophe() {
+ String str = "Let's meet at {1,time,h 'o'' clock'} at l'Orange Bleue";
+ String pat = MessageFormat.autoQuoteApostrophe(str);
+ MessageFormat mf = new MessageFormat(pat, Locale.US);
+ String result = mf.format(args);
+ assertEquals("Let's meet at 8 o' clock at l'Orange Bleue", result);
+ assertEquals("Let''s meet at {1,time,h 'o'' clock'} at l''Orange Bleue", pat);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.clone()'
+ */
+ public void testClone() {
+ // tested already in testHashcode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.equals(Object)'
+ */
+ public void testEqualsObject() {
+ // tested already in testHashcode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.MessageFormat.toString()'
+ */
+ public void testToString() {
+ // no need to test
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/NumberFormatTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/NumberFormatTest.java
new file mode 100644
index 00000000000..ec7188ff5b0
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/NumberFormatTest.java
@@ -0,0 +1,447 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2007-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.math.BigInteger;
+import java.text.FieldPosition;
+import java.text.ParseException;
+import java.text.ParsePosition;
+import java.util.Locale;
+
+import com.ibm.icu.text.NumberFormat;
+import com.ibm.icu.util.ULocale;
+
+public class NumberFormatTest extends ICUTestCase {
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.NumberFormat(NumberFormat)'
+ */
+ public void testNumberFormat() {
+ NumberFormat nf = new NumberFormat(java.text.NumberFormat.getInstance());
+ assertEquals(nf, NumberFormat.getInstance());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.format(Object, StringBuffer, FieldPosition)'
+ */
+ public void testFormatObjectStringBufferFieldPosition() {
+ Number num = new Long(1234L);
+ StringBuffer buf = new StringBuffer();
+ FieldPosition fp = new FieldPosition(NumberFormat.INTEGER_FIELD);
+ NumberFormat.getInstance().format(num, buf, fp);
+ assertEquals("1,234", buf.toString());
+ assertEquals(0, fp.getBeginIndex());
+ assertEquals(5, fp.getEndIndex());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.parseObject(String, ParsePosition)'
+ */
+ public void testParseObjectStringParsePosition() {
+ ParsePosition pp = new ParsePosition(0);
+ Object result = NumberFormat.getInstance().parse("1,234", pp);
+ assertEquals(result, new Long(1234));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.format(double)'
+ */
+ public void testFormatDouble() {
+ assertEquals("1,234.567", NumberFormat.getInstance().format(1234.567));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.format(long)'
+ */
+ public void testFormatLong() {
+ assertEquals("1,234", NumberFormat.getInstance().format(1234L));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.format(BigInteger)'
+ */
+ public void testFormatBigInteger() {
+ // note, java doesn't handle biginteger with full precision.
+ BigInteger bi = new BigInteger("123456");
+ assertEquals("123,456", java.text.NumberFormat.getInstance().format(bi));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.format(double, StringBuffer, FieldPosition)'
+ */
+ public void testFormatDoubleStringBufferFieldPosition() {
+ StringBuffer buf = new StringBuffer();
+ FieldPosition fp = new FieldPosition(NumberFormat.FRACTION_FIELD);
+ assertEquals("123,456.789", NumberFormat.getInstance().format(123456.789, buf, fp).toString());
+ assertEquals(8, fp.getBeginIndex());
+ assertEquals(11, fp.getEndIndex());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.format(long, StringBuffer, FieldPosition)'
+ */
+ public void testFormatLongStringBufferFieldPosition() {
+ StringBuffer buf = new StringBuffer();
+ FieldPosition fp = new FieldPosition(NumberFormat.Field.GROUPING_SEPARATOR);
+ assertEquals("123,456", NumberFormat.getInstance().format(123456L, buf, fp).toString());
+ assertEquals(3, fp.getBeginIndex());
+ assertEquals(4, fp.getEndIndex());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.format(BigInteger, StringBuffer, FieldPosition)'
+ */
+ public void testFormatBigIntegerStringBufferFieldPosition() {
+ // note, java doesn't handle biginteger with full precision.
+ StringBuffer buf = new StringBuffer();
+ FieldPosition fp = new FieldPosition(0);
+ BigInteger bi = new BigInteger("123456");
+ assertEquals("123,456", java.text.NumberFormat.getInstance().format(bi, buf, fp).toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.parse(String, ParsePosition)'
+ */
+ public void testParseStringParsePosition() {
+ ParsePosition pp = new ParsePosition(3);
+ assertEquals(new Long(123456), NumberFormat.getInstance().parse("xxx123,456yyy", pp));
+ assertEquals(10, pp.getIndex());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.parse(String)'
+ */
+ public void testParseString() throws ParseException {
+ Number result = NumberFormat.getInstance().parse("123,456,yyy");
+ assertEquals(new Long(123456), result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.isParseIntegerOnly()'
+ */
+ public void testIsParseIntegerOnly() {
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setParseIntegerOnly(true);
+ assertTrue(nf.isParseIntegerOnly());
+ nf.setParseIntegerOnly(false);
+ assertFalse(nf.isParseIntegerOnly());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.setParseIntegerOnly(boolean)'
+ */
+ public void testSetParseIntegerOnly() throws ParseException {
+ String str = "123.456,yyy";
+ NumberFormat nf = NumberFormat.getInstance();
+ assertEquals(new Double(123.456), nf.parse(str));
+ nf.setParseIntegerOnly(true);
+ assertEquals(new Long(123), nf.parse(str));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getInstance()'
+ */
+ public void testGetInstance() {
+ // used everywhere, no need to test
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getInstance(Locale)'
+ */
+ public void testGetInstanceLocale() {
+ NumberFormat nf = NumberFormat.getInstance(Locale.GERMANY);
+ assertEquals("123,456", nf.format(123.456));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getInstance(ULocale)'
+ */
+ public void testGetInstanceULocale() {
+ NumberFormat nf = NumberFormat.getInstance(ULocale.GERMANY);
+ assertEquals("123,456", nf.format(123.456));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getNumberInstance()'
+ */
+ public void testGetNumberInstance() {
+ NumberFormat nf = NumberFormat.getNumberInstance();
+ assertEquals("123,456.789", nf.format(123456.789));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getNumberInstance(Locale)'
+ */
+ public void testGetNumberInstanceLocale() {
+ NumberFormat nf = NumberFormat.getNumberInstance(Locale.GERMANY);
+ assertEquals("123.456,789", nf.format(123456.789));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getNumberInstance(ULocale)'
+ */
+ public void testGetNumberInstanceULocale() {
+ NumberFormat nf = NumberFormat.getNumberInstance(ULocale.GERMANY);
+ assertEquals("123.456,789", nf.format(123456.789));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getIntegerInstance()'
+ */
+ public void testGetIntegerInstance() {
+ NumberFormat nf = NumberFormat.getIntegerInstance();
+ assertEquals("123,457", nf.format(123456.789)); // rounds
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getIntegerInstance(Locale)'
+ */
+ public void testGetIntegerInstanceLocale() {
+ NumberFormat nf = NumberFormat.getIntegerInstance(Locale.GERMANY);
+ assertEquals("123.457", nf.format(123456.789)); // rounds
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getIntegerInstance(ULocale)'
+ */
+ public void testGetIntegerInstanceULocale() {
+ NumberFormat nf = NumberFormat.getIntegerInstance(ULocale.GERMANY);
+ assertEquals("123.457", nf.format(123456.789)); // rounds
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getCurrencyInstance()'
+ */
+ public void testGetCurrencyInstance() {
+ NumberFormat nf = NumberFormat.getCurrencyInstance();
+ assertEquals("$123,456.99", nf.format(123456.99));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getCurrencyInstance(Locale)'
+ */
+ public void testGetCurrencyInstanceLocale() {
+ NumberFormat nf = NumberFormat.getCurrencyInstance(Locale.GERMANY);
+ assertEquals("123.456,99 \u20AC", nf.format(123456.99));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getCurrencyInstance(ULocale)'
+ */
+ public void testGetCurrencyInstanceULocale() {
+ NumberFormat nf = NumberFormat.getCurrencyInstance(ULocale.GERMANY);
+ assertEquals("123.456,99 \u20AC", nf.format(123456.99));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getPercentInstance()'
+ */
+ public void testGetPercentInstance() {
+ NumberFormat nf = NumberFormat.getPercentInstance();
+ assertEquals("123,456%", nf.format(1234.56));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getPercentInstance(Locale)'
+ */
+ public void testGetPercentInstanceLocale() {
+ NumberFormat nf = NumberFormat.getPercentInstance(Locale.GERMANY);
+ assertEquals("123.456%", nf.format(1234.56));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getPercentInstance(ULocale)'
+ */
+ public void testGetPercentInstanceULocale() {
+ NumberFormat nf = NumberFormat.getPercentInstance(ULocale.GERMANY);
+ assertEquals("123.456%", nf.format(1234.56));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getScientificInstance()'
+ */
+ public void testGetScientificInstance() {
+ NumberFormat nf = NumberFormat.getScientificInstance();
+ assertEquals(".123456E4", nf.format(1234.56));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getScientificInstance(Locale)'
+ */
+ public void testGetScientificInstanceLocale() {
+ NumberFormat nf = NumberFormat.getScientificInstance(Locale.GERMANY);
+ assertEquals(",123456E4", nf.format(1234.56));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getScientificInstance(ULocale)'
+ */
+ public void testGetScientificInstanceULocale() {
+ NumberFormat nf = NumberFormat.getScientificInstance(ULocale.GERMANY);
+ assertEquals(",123456E4", nf.format(1234.56));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getAvailableLocales()'
+ */
+ public void testGetAvailableLocales() {
+ Locale[] ilocales = NumberFormat.getAvailableLocales();
+ if (ICUTestCase.testingWrapper) {
+ Locale[] jlocales = java.text.NumberFormat.getAvailableLocales();
+ for (int i = 0; i < ilocales.length; ++i) {
+ assertEquals(jlocales[i], ilocales[i]);
+ }
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getAvailableULocales()'
+ */
+ public void testGetAvailableULocales() {
+ ULocale[] ulocales = NumberFormat.getAvailableULocales();
+ if (ICUTestCase.testingWrapper) {
+ Locale[] jlocales = java.text.NumberFormat.getAvailableLocales();
+ for (int i = 0; i < ulocales.length; ++i) {
+ assertEquals(jlocales[i], ulocales[i].toLocale());
+ }
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.isGroupingUsed()'
+ */
+ public void testIsGroupingUsed() {
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setGroupingUsed(true);
+ assertTrue(nf.isGroupingUsed());
+ nf.setGroupingUsed(false);
+ assertFalse(nf.isGroupingUsed());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.setGroupingUsed(boolean)'
+ */
+ public void testSetGroupingUsed() {
+ NumberFormat nf = NumberFormat.getInstance();
+ assertEquals("123,456,789", nf.format(123456789));
+ nf.setGroupingUsed(false);
+ assertEquals("123456789", nf.format(123456789));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getMaximumIntegerDigits()'
+ */
+ public void testGetMaximumIntegerDigits() {
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setMaximumIntegerDigits(4);
+ assertEquals(4, nf.getMaximumIntegerDigits());
+ nf.setMaximumIntegerDigits(6);
+ assertEquals(6, nf.getMaximumIntegerDigits());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.setMaximumIntegerDigits(int)'
+ */
+ public void testSetMaximumIntegerDigits() {
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setMaximumIntegerDigits(4);
+ assertEquals("3,456", nf.format(123456)); // high digits truncated
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getMinimumIntegerDigits()'
+ */
+ public void testGetMinimumIntegerDigits() {
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setMinimumIntegerDigits(4);
+ assertEquals(4, nf.getMinimumIntegerDigits());
+ nf.setMinimumIntegerDigits(6);
+ assertEquals(6, nf.getMinimumIntegerDigits());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.setMinimumIntegerDigits(int)'
+ */
+ public void testSetMinimumIntegerDigits() {
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setMinimumIntegerDigits(4);
+ assertEquals("0,012", nf.format(12)); // pad out with zero, grouping still used
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getMaximumFractionDigits()'
+ */
+ public void testGetMaximumFractionDigits() {
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setMaximumFractionDigits(4);
+ assertEquals(4, nf.getMaximumFractionDigits());
+ nf.setMaximumFractionDigits(6);
+ assertEquals(6, nf.getMaximumFractionDigits());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.setMaximumFractionDigits(int)'
+ */
+ public void testSetMaximumFractionDigits() {
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setMaximumFractionDigits(4);
+ assertEquals("1.2346", nf.format(1.2345678)); // low digits rounded
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.getMinimumFractionDigits()'
+ */
+ public void testGetMinimumFractionDigits() {
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setMinimumFractionDigits(4);
+ assertEquals(4, nf.getMinimumFractionDigits());
+ nf.setMinimumFractionDigits(6);
+ assertEquals(6, nf.getMinimumFractionDigits());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.setMinimumFractionDigits(int)'
+ */
+ public void testSetMinimumFractionDigits() {
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setMinimumFractionDigits(4);
+ assertEquals("1.2000", nf.format(1.2));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.toString()'
+ */
+ public void testToString() {
+ assertNotNull(NumberFormat.getInstance().toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.hashCode()'
+ */
+ public void testHashCode() {
+ NumberFormat nf = NumberFormat.getInstance();
+ NumberFormat eq = NumberFormat.getInstance(Locale.US);
+ NumberFormat neq = NumberFormat.getInstance(Locale.GERMANY);
+
+ ICUTestCase.testEHCS(nf, eq, neq);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.clone()'
+ */
+ public void testClone() {
+ // see testHashCode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.text.NumberFormat.equals(Object)'
+ */
+ public void testEqualsObject() {
+ // see testHashCode
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/SimpleDateFormatTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/SimpleDateFormatTest.java
new file mode 100644
index 00000000000..0136a82ab9e
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/SimpleDateFormatTest.java
@@ -0,0 +1,202 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.text.FieldPosition;
+import java.text.ParseException;
+import java.text.ParsePosition;
+import java.util.Date;
+import java.util.Locale;
+
+import com.ibm.icu.text.DateFormatSymbols;
+import com.ibm.icu.text.SimpleDateFormat;
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.TimeZone;
+import com.ibm.icu.util.ULocale;
+
+public class SimpleDateFormatTest extends ICUTestCase {
+ private static final String mdy = "MMM dd yyyy";
+ private static final String md2 = "MMM dd yy";
+ private static final String hmz = "'The time is' HH:mm:ss zzz";
+ private static final String hmzmdy = hmz + " 'on' " + mdy;
+ private static final String hmzmdyStr = "The time is 15:05:20 CST on Jan 10 2006";
+
+ private static final TimeZone tzc = TimeZone.getTimeZone("CST");
+ private static final TimeZone tzp = TimeZone.getTimeZone("PST");
+ private static final Calendar cal = Calendar.getInstance(tzc);
+ private static final Date date;
+ static {
+ cal.clear();
+ cal.set(2006, 0, 10, 15, 5, 20); // arrgh, doesn't clear millis
+ date = cal.getTime();
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.format(Calendar, StringBuffer, FieldPosition)'
+ */
+ public void testFormatCalendarStringBufferFieldPosition() {
+ StringBuffer buf = new StringBuffer();
+ FieldPosition fp = new FieldPosition(0);
+ SimpleDateFormat sdf = new SimpleDateFormat(hmzmdy);
+ sdf.format(cal, buf, fp);
+ assertEquals(hmzmdyStr, buf.toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.parse(String, Calendar, ParsePosition)'
+ */
+ public void testParseStringCalendarParsePosition() {
+ Calendar cal = Calendar.getInstance(tzp);
+ cal.clear();
+ ParsePosition pp = new ParsePosition(0);
+ SimpleDateFormat sdf = new SimpleDateFormat(hmzmdy);
+ sdf.parse(hmzmdyStr, cal, pp);
+ assertEquals(date, cal.getTime());
+ // note: java doesn't return the parsed time zone
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.clone()'
+ */
+ public void testClone() {
+
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.SimpleDateFormat()'
+ */
+ public void testSimpleDateFormat() {
+ SimpleDateFormat sdf = new SimpleDateFormat();
+ java.text.SimpleDateFormat jsdf = new java.text.SimpleDateFormat();
+ assertEquals(jsdf.format(date), sdf.format(date));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.SimpleDateFormat(String)'
+ */
+ public void testSimpleDateFormatString() {
+ SimpleDateFormat sdf = new SimpleDateFormat(mdy);
+ java.text.SimpleDateFormat jsdf = new java.text.SimpleDateFormat(mdy);
+ assertEquals(jsdf.format(date), sdf.format(date));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.SimpleDateFormat(String, Locale)'
+ */
+ public void testSimpleDateFormatStringLocale() {
+ Locale l = Locale.JAPAN;
+ SimpleDateFormat sdf = new SimpleDateFormat(mdy, l);
+ java.text.SimpleDateFormat jsdf = new java.text.SimpleDateFormat(mdy, l);
+ assertEquals(jsdf.format(date), sdf.format(date));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.SimpleDateFormat(String, ULocale)'
+ */
+ public void testSimpleDateFormatStringULocale() {
+ ULocale l = ULocale.JAPAN;
+ SimpleDateFormat sdf = new SimpleDateFormat(mdy, l);
+ java.text.SimpleDateFormat jsdf = new java.text.SimpleDateFormat(mdy, l.toLocale());
+ assertEquals(jsdf.format(date), sdf.format(date));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.SimpleDateFormat(String, DateFormatSymbols)'
+ */
+ public void testSimpleDateFormatStringDateFormatSymbols() {
+ Locale l = Locale.US;
+ DateFormatSymbols dfs = new DateFormatSymbols(l);
+ java.text.DateFormatSymbols jdfs = new java.text.DateFormatSymbols(l);
+ SimpleDateFormat sdf = new SimpleDateFormat(mdy, dfs);
+ java.text.SimpleDateFormat jsdf = new java.text.SimpleDateFormat(mdy, jdfs);
+ assertEquals(jsdf.format(date), sdf.format(date));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.set2DigitYearStart(Date)'
+ */
+ public void testSet2DigitYearStart() {
+ SimpleDateFormat sdf = new SimpleDateFormat(md2);
+ sdf.set2DigitYearStart(date);
+ try {
+ Date d = sdf.parse("Jan 15 04");
+ assertNotEqual(-1, d.toString().indexOf("2104"));
+ }
+ catch (ParseException pe) {
+ fail(pe.getMessage());
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.get2DigitYearStart()'
+ */
+ public void testGet2DigitYearStart() {
+ SimpleDateFormat sdf = new SimpleDateFormat(md2);
+ sdf.set2DigitYearStart(date);
+ assertEquals(date, sdf.get2DigitYearStart());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.toPattern()'
+ */
+ public void testToPattern() {
+ SimpleDateFormat sdf = new SimpleDateFormat(mdy);
+ assertEquals(mdy, sdf.toPattern());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.toLocalizedPattern()'
+ */
+ public void testToLocalizedPattern() {
+ Locale l = Locale.getDefault();
+ Locale.setDefault(Locale.US);
+ SimpleDateFormat sdf = new SimpleDateFormat(mdy);
+ assertEquals(mdy, sdf.toLocalizedPattern());
+ Locale.setDefault(l);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.applyPattern(String)'
+ */
+ public void testApplyPattern() {
+ SimpleDateFormat sdf = new SimpleDateFormat();
+ sdf.setTimeZone(tzc);
+ sdf.applyPattern(hmzmdy);
+ assertEquals(hmzmdyStr, sdf.format(date));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.applyLocalizedPattern(String)'
+ */
+ public void testApplyLocalizedPattern() {
+ SimpleDateFormat sdf = new SimpleDateFormat();
+ sdf.setTimeZone(tzc);
+ sdf.applyLocalizedPattern(hmzmdy);
+ assertEquals(hmzmdyStr, sdf.format(date));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.getDateFormatSymbols()'
+ */
+ public void testGetDateFormatSymbols() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.US);
+ SimpleDateFormat sdf = new SimpleDateFormat(mdy, dfs);
+ assertEquals(dfs, sdf.getDateFormatSymbols());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.text.SimpleDateFormat.setDateFormatSymbols(DateFormatSymbols)'
+ */
+ public void testSetDateFormatSymbols() {
+ DateFormatSymbols dfs = new DateFormatSymbols(Locale.JAPAN);
+ SimpleDateFormat sdf = new SimpleDateFormat(hmzmdy);
+ sdf.setDateFormatSymbols(dfs);
+ // assumes Japanese symbols do not have gregorian month names
+ assertEquals(-1, sdf.format(date).indexOf("Jan"));
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/TimeZoneTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/TimeZoneTest.java
new file mode 100644
index 00000000000..f8a2fe16ce1
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/TimeZoneTest.java
@@ -0,0 +1,235 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.util.Date;
+import java.util.Locale;
+
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.TimeZone;
+import com.ibm.icu.util.ULocale;
+
+public class TimeZoneTest extends ICUTestCase {
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.hashCode()'
+ */
+ public void testHashCode() {
+ TimeZone tz1 = TimeZone.getTimeZone("PST");
+ TimeZone tz2 = TimeZone.getTimeZone("PST");
+ TimeZone tzn = TimeZone.getTimeZone("CST");
+ testEHCS(tz1, tz2, tzn);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.TimeZone(TimeZone)'
+ */
+ public void testTimeZone() {
+ // implicitly tested everywhere
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getOffset(int, int, int, int, int, int)'
+ */
+ public void testGetOffset() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ int offset = tz.getOffset(1, 2004, 0, 01, 1, 0);
+ assertEquals(-28800000, offset);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.setRawOffset(int)'
+ */
+ public void testSetRawOffset() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ int value = tz.getRawOffset();
+ int value1 = value + 100000;
+ tz.setRawOffset(value1);
+ int result = tz.getRawOffset();
+ assertNotEqual(value, result);
+ assertEquals(value1, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getRawOffset()'
+ */
+ public void testGetRawOffset() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ int offset = tz.getRawOffset();
+ assertEquals(-28800000, offset);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getID()'
+ */
+ public void testGetID() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ assertEquals("PST", tz.getID());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.setID(String)'
+ */
+ public void testSetID() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ String value1 = tz.getID();
+ String value2 = value1 + "!";
+ tz.setID(value2);
+ String result = tz.getID();
+ assertNotEqual(value1, result);
+ assertEquals(value2, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getDisplayName()'
+ */
+ public void testGetDisplayName() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ assertEquals("Pacific Standard Time", tz.getDisplayName());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getDisplayName(Locale)'
+ */
+ public void testGetDisplayNameLocale() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ assertEquals("Pacific Standard Time", tz.getDisplayName(Locale.US));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getDisplayName(ULocale)'
+ */
+ public void testGetDisplayNameULocale() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ assertEquals("Pacific Standard Time", tz.getDisplayName(ULocale.US));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getDisplayName(boolean, int)'
+ */
+ public void testGetDisplayNameBooleanInt() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ assertEquals("PDT", tz.getDisplayName(true, TimeZone.SHORT));
+ assertEquals("Pacific Daylight Time", tz.getDisplayName(true, TimeZone.LONG));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getDisplayName(boolean, int, Locale)'
+ */
+ public void testGetDisplayNameBooleanIntLocale() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ assertEquals("PDT", tz.getDisplayName(true, TimeZone.SHORT, Locale.US));
+ assertEquals("Pacific Daylight Time", tz.getDisplayName(true, TimeZone.LONG, Locale.US));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getDisplayName(boolean, int, ULocale)'
+ */
+ public void testGetDisplayNameBooleanIntULocale() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ assertEquals("PDT", tz.getDisplayName(true, TimeZone.SHORT, ULocale.US));
+ assertEquals("Pacific Daylight Time", tz.getDisplayName(true, TimeZone.LONG, ULocale.US));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getDSTSavings()'
+ */
+ public void testGetDSTSavings() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ assertEquals(3600000, tz.getDSTSavings());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.useDaylightTime()'
+ */
+ public void testUseDaylightTime() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ assertTrue(tz.useDaylightTime());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.inDaylightTime(Date)'
+ */
+ public void testInDaylightTime() {
+ TimeZone tz = TimeZone.getTimeZone("PST");
+ Calendar cal = Calendar.getInstance();
+ cal.set(2005, 0, 17);
+ Date date = cal.getTime();
+ assertFalse(tz.inDaylightTime(date));
+ cal.set(2005, 6, 17);
+ date = cal.getTime();
+ assertTrue(tz.inDaylightTime(date));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getTimeZone(String)'
+ */
+ public void testGetTimeZone() {
+ // implicitly tested everywhere
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getAvailableIDs(int)'
+ */
+ public void testGetAvailableIDsInt() {
+ String[] ids = TimeZone.getAvailableIDs(-28800000);
+ assertNotNull(ids);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getAvailableIDs()'
+ */
+ public void testGetAvailableIDs() {
+ String[] ids = TimeZone.getAvailableIDs();
+ assertNotNull(ids);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.getDefault()'
+ */
+ public void testGetDefault() {
+ TimeZone tz = TimeZone.getDefault();
+ assertNotNull(tz);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.setDefault(TimeZone)'
+ */
+ public void testSetDefault() {
+ TimeZone tz1 = TimeZone.getDefault();
+ String newCode = "PDT".equals(tz1.getID()) ? "CST" : "PDT";
+ TimeZone tz2 = TimeZone.getTimeZone(newCode);
+ TimeZone.setDefault(tz2);
+ TimeZone result = TimeZone.getDefault();
+ assertNotEqual(tz1, result);
+ assertEquals(tz2, result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.hasSameRules(TimeZone)'
+ */
+ public void testHasSameRules() {
+ TimeZone tz1 = TimeZone.getTimeZone("PST");
+ TimeZone tz2 = TimeZone.getTimeZone("America/Los_Angeles");
+ assertTrue(tz1.hasSameRules(tz2));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.clone()'
+ */
+ public void testClone() {
+ // tested by testHashCode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.util.TimeZone.equals(Object)'
+ */
+ public void testEqualsObject() {
+ // tested by testHashCode
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ULocaleTest.java b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ULocaleTest.java
new file mode 100644
index 00000000000..d8a2b85d80d
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ULocaleTest.java
@@ -0,0 +1,748 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.tests;
+
+import java.util.Iterator;
+import java.util.Locale;
+
+import com.ibm.icu.util.ULocale;
+
+public class ULocaleTest extends ICUTestCase {
+ private String sampleName;
+ private String longULocaleName;
+ private String longULocaleBasename;
+ private String nonNormalizedName;
+ private ULocale longULocale;
+ private Locale sampleLocale;
+
+ /**
+ * @Override
+ */
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ sampleName = "ll_CC_VVVV";
+ longULocaleName = "ll_Ssss_CC_VVVV@collation=phonebook;key=value";
+ longULocaleBasename = longULocaleName.substring(0, longULocaleName.indexOf('@'));
+ nonNormalizedName = "LL_ssss_cc_VVVV@ Key = value ; Collation = phonebook ; ";
+ longULocale = new ULocale(longULocaleName);
+ sampleLocale = new ULocale(sampleName).toLocale();
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.hashCode()'
+ */
+ public void testHashCode() {
+ ULocale obj = ULocale.GERMANY;
+ ULocale eq = new ULocale("de_DE");
+ ULocale neq = new ULocale("de_DE_FRENCH");
+
+ ICUTestCase.testEHCS(obj, eq, neq);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.forLocale(Locale)'
+ */
+ public void testForLocale() {
+ assertEquals(ULocale.GERMANY, ULocale.forLocale(Locale.GERMANY));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.ULocale(String)'
+ */
+ public void testULocaleString() {
+ assertEquals(ULocale.GERMAN, new ULocale("de"));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.ULocale(String, String)'
+ */
+ public void testULocaleStringString() {
+ assertEquals(ULocale.GERMANY, new ULocale("de", "DE"));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.ULocale(String, String, String)'
+ */
+ public void testULocaleStringStringString() {
+ assertEquals(sampleLocale, new ULocale("ll", "cc", "VVVV").toLocale());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.createCanonical(String)'
+ */
+ public void testCreateCanonical() {
+ ULocale result = ULocale.createCanonical("de__PHONEBOOK");
+ assertEquals(new ULocale("de@collation=phonebook"), result);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.toLocale()'
+ */
+ public void testToLocale() {
+ assertEquals(sampleLocale, new ULocale("ll", "cc", "VVVV").toLocale());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDefault()'
+ */
+ public void testGetDefault() {
+ assertEquals(Locale.getDefault(), ULocale.getDefault().toLocale());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.setDefault(ULocale)'
+ */
+ public void testSetDefault() {
+ Locale oldLocale = Locale.getDefault();
+ ULocale oldULocale = ULocale.getDefault();
+ try {
+ ULocale.setDefault(longULocale);
+ ICUTestCase.assertNotEqual(Locale.getDefault(), oldLocale);
+ ICUTestCase.assertNotEqual(ULocale.getDefault(), oldULocale);
+ assertEquals(longULocale, ULocale.getDefault());
+ assertEquals(sampleLocale, Locale.getDefault());
+ }
+ finally {
+ ULocale.setDefault(oldULocale);
+ Locale.setDefault(oldLocale); // in case of some error
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.clone()'
+ */
+ public void testClone() {
+ // see testHashcode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.equals(Object)'
+ */
+ public void testEqualsObject() {
+ // see testHashcode
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getAvailableLocales()'
+ */
+ public void testGetAvailableLocales() {
+ ULocale[] ulocales = ULocale.getAvailableLocales();
+ if (ICUTestCase.testingWrapper) {
+ Locale[] locales = Locale.getAvailableLocales();
+ for (int i = 0; i < ulocales.length; ++i) {
+ assertEquals(ulocales[i].toLocale(), locales[i]);
+ }
+ }
+ // else nothing to test except that the function returned.
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getISOCountries()'
+ */
+ public void testGetISOCountries() {
+ String[] ucountries = ULocale.getISOCountries();
+ assertNotNull(ucountries);
+ if (ICUTestCase.testingWrapper) {
+ // keep our own data for now
+ // our data doesn't match java's so this test would fail
+ // TODO: enable if we decide to use java's data
+ // String[] countries = Locale.getISOCountries();
+ // TestBoilerplate.assertArraysEqual(ucountries, countries);
+ }
+ // else nothing to test except that the function returned.
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getISOLanguages()'
+ */
+ public void testGetISOLanguages() {
+ String[] ulanguages = ULocale.getISOLanguages();
+ assertNotNull(ulanguages);
+ if (ICUTestCase.testingWrapper) {
+ // keep our own data for now
+ // our data doesn't match java's so this test would fail
+ // TODO: enable if we decide to use java's data
+ // String[] languages = Locale.getISOLanguages();
+ // TestBoilerplate.assertArraysEqual(ulanguages, languages);
+ }
+ // else nothing to test except that the function returned.
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getLanguage()'
+ */
+ public void testGetLanguage() {
+ assertEquals("ll", longULocale.getLanguage());
+ assertEquals("ll", longULocale.toLocale().getLanguage());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getLanguage(String)'
+ */
+ public void testGetLanguageString() {
+ assertEquals("ll", ULocale.getLanguage(longULocale.getName()));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getScript()'
+ */
+ public void testGetScript() {
+ assertEquals("Ssss", longULocale.getScript());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getScript(String)'
+ */
+ public void testGetScriptString() {
+ assertEquals("Ssss", ULocale.getScript(longULocale.getName()));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getCountry()'
+ */
+ public void testGetCountry() {
+ assertEquals("CC", longULocale.getCountry());
+ assertEquals("CC", longULocale.toLocale().getCountry());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getCountry(String)'
+ */
+ public void testGetCountryString() {
+ assertEquals("CC", ULocale.getCountry(longULocale.getName()));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getVariant()'
+ */
+ public void testGetVariant() {
+ assertEquals("VVVV", longULocale.getVariant());
+ assertEquals("VVVV", longULocale.toLocale().getVariant());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getVariant(String)'
+ */
+ public void testGetVariantString() {
+ assertEquals("VVVV", ULocale.getVariant(longULocale.getName()));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getFallback(String)'
+ */
+ public void testGetFallbackString() {
+ assertEquals(ULocale.GERMAN, ULocale.getFallback(ULocale.GERMANY.getName()));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getFallback()'
+ */
+ public void testGetFallback() {
+ assertEquals(ULocale.GERMAN, ULocale.GERMANY.getFallback());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getBaseName()'
+ */
+ public void testGetBaseName() {
+ assertEquals(longULocaleBasename, longULocale.getBaseName());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getBaseName(String)'
+ */
+ public void testGetBaseNameString() {
+ assertEquals(longULocaleBasename, longULocale.getBaseName());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getName()'
+ */
+ public void testGetName() {
+ assertEquals(longULocaleName, longULocale.getName());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getName(String)'
+ */
+ public void testGetNameString() {
+ assertEquals(longULocaleName, ULocale.getName(nonNormalizedName));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.toString()'
+ */
+ public void testToString() {
+ assertEquals(longULocaleName, longULocale.toString());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getKeywords()'
+ */
+ public void testGetKeywords() {
+ Iterator iter = longULocale.getKeywords();
+ assertEquals(iter.next(), "collation");
+ assertEquals(iter.next(), "key");
+ assertFalse(iter.hasNext());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getKeywords(String)'
+ */
+ public void testGetKeywordsString() {
+ Iterator iter = ULocale.getKeywords(nonNormalizedName);
+ assertEquals(iter.next(), "collation");
+ assertEquals(iter.next(), "key");
+ assertFalse(iter.hasNext());
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getKeywordValue(String)'
+ */
+ public void testGetKeywordValueString() {
+ assertEquals("value", longULocale.getKeywordValue("key"));
+ assertEquals("phonebook", longULocale.getKeywordValue("collation"));
+ assertNull(longULocale.getKeywordValue("zzyzx"));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getKeywordValue(String, String)'
+ */
+ public void testGetKeywordValueStringString() {
+ assertEquals("value", ULocale.getKeywordValue(longULocaleName, "key"));
+ assertEquals("phonebook", ULocale.getKeywordValue(longULocaleName, "collation"));
+ assertNull(ULocale.getKeywordValue(longULocaleName, "zzyzx"));
+
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.canonicalize(String)'
+ */
+ public void testCanonicalize() {
+ assertEquals("de@collation=phonebook", ULocale.canonicalize("de__PHONEBOOK"));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.setKeywordValue(String, String)'
+ */
+ public void testSetKeywordValueStringString() {
+ ULocale munged = longULocale.setKeywordValue("key", "C#");
+ assertEquals("C#", munged.getKeywordValue("key"));
+ munged = munged.setKeywordValue("zzyzx", "grue");
+ assertEquals("grue", munged.getKeywordValue("zzyzx"));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.setKeywordValue(String, String, String)'
+ */
+ public void testSetKeywordValueStringStringString() {
+ String munged = ULocale.setKeywordValue(longULocaleName, "key", "C#");
+ assertEquals("C#", ULocale.getKeywordValue(munged, "key"));
+ munged = ULocale.setKeywordValue(munged, "zzyzx", "grue");
+ assertEquals("grue", ULocale.getKeywordValue(munged, "zzyzx"));
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getISO3Language()'
+ */
+ public void testGetISO3Language() {
+ String il = ULocale.GERMANY.getISO3Language();
+ String jl = Locale.GERMANY.getISO3Language();
+ assertEquals(il, jl);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getISO3Language(String)'
+ */
+ public void testGetISO3LanguageString() {
+ String il = ULocale.getISO3Language(ULocale.GERMANY.getName());
+ String jl = Locale.GERMANY.getISO3Language();
+ assertEquals(il, jl);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getISO3Country()'
+ */
+ public void testGetISO3Country() {
+ String ic = ULocale.GERMANY.getISO3Country();
+ String jc = Locale.GERMANY.getISO3Country();
+ assertEquals(ic, jc);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getISO3Country(String)'
+ */
+ public void testGetISO3CountryString() {
+ String ic = ULocale.getISO3Country(ULocale.GERMANY.getName());
+ String jc = Locale.GERMANY.getISO3Country();
+ assertEquals(ic, jc);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayLanguage()'
+ */
+ public void testGetDisplayLanguage() {
+ String idl = ULocale.GERMANY.getDisplayLanguage();
+ String jdl = Locale.GERMANY.getDisplayLanguage();
+ assertEquals(idl, jdl);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayLanguage(ULocale)'
+ */
+ public void testGetDisplayLanguageULocale() {
+ String idl = ULocale.GERMANY.getDisplayLanguage(ULocale.GERMANY);
+ String jdl = Locale.GERMANY.getDisplayLanguage(Locale.GERMANY);
+ assertEquals(idl, jdl);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayLanguage(String, String)'
+ */
+ public void testGetDisplayLanguageStringString() {
+ String idl = ULocale.getDisplayLanguage(ULocale.GERMANY.getName(), "de_DE");
+ String jdl = Locale.GERMANY.getDisplayLanguage(Locale.GERMANY);
+ assertEquals(idl, jdl);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayLanguage(String, ULocale)'
+ */
+ public void testGetDisplayLanguageStringULocale() {
+ String idl = ULocale.getDisplayLanguage(ULocale.GERMANY.getName(), ULocale.GERMANY);
+ String jdl = Locale.GERMANY.getDisplayLanguage(Locale.GERMANY);
+ assertEquals(idl, jdl);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayScript()'
+ */
+ public void testGetDisplayScript() {
+ String is = ULocale.TRADITIONAL_CHINESE.getDisplayScript();
+ if (ICUTestCase.testingWrapper) {
+ assertEquals("Hant", is);
+ } else {
+ assertEquals("Traditional Chinese", is);
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayScript(ULocale)'
+ */
+ public void testGetDisplayScriptULocale() {
+ String is = ULocale.TRADITIONAL_CHINESE.getDisplayScript(ULocale.GERMANY);
+ if (ICUTestCase.testingWrapper) {
+ assertEquals("Hant", is);
+ } else {
+ // TODO: look up expected value
+ assertEquals("Hant", is);
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayScript(String, String)'
+ */
+ public void testGetDisplayScriptStringString() {
+ String is = ULocale.getDisplayScript("zh_Hant", "de_DE");
+ if (ICUTestCase.testingWrapper) {
+ assertEquals("Hant", is);
+ } else {
+ // TODO: look up expected value
+ assertEquals("Hant", is);
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayScript(String, ULocale)'
+ */
+ public void testGetDisplayScriptStringULocale() {
+ String is = ULocale.getDisplayScript("zh_Hant", ULocale.GERMANY);
+ if (ICUTestCase.testingWrapper) {
+ assertEquals("Hant", is);
+ } else {
+ // TODO: look up expected value
+ assertEquals("Hant", is);
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayCountry()'
+ */
+ public void testGetDisplayCountry() {
+ String idc = ULocale.GERMANY.getDisplayCountry();
+ String jdc = Locale.GERMANY.getDisplayCountry();
+ assertEquals(idc, jdc);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayCountry(ULocale)'
+ */
+ public void testGetDisplayCountryULocale() {
+ String idc = ULocale.GERMANY.getDisplayCountry(ULocale.GERMANY);
+ String jdc = Locale.GERMANY.getDisplayCountry(Locale.GERMANY);
+ assertEquals(idc, jdc);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayCountry(String, String)'
+ */
+ public void testGetDisplayCountryStringString() {
+ String idc = ULocale.getDisplayCountry("de_DE", "de_DE");
+ String jdc = Locale.GERMANY.getDisplayCountry(Locale.GERMANY);
+ assertEquals(idc, jdc);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayCountry(String, ULocale)'
+ */
+ public void testGetDisplayCountryStringULocale() {
+ String idc = ULocale.getDisplayCountry("de_DE", ULocale.GERMANY);
+ String jdc = Locale.GERMANY.getDisplayCountry(Locale.GERMANY);
+ assertEquals(idc, jdc);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayVariant()'
+ */
+ public void testGetDisplayVariant() {
+ String idv = new ULocale("de_DE_PHONEBOOK").getDisplayVariant();
+ String jdv = new Locale("de", "DE", "PHONEBOOK").getDisplayVariant();
+ assertEquals(jdv, idv);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayVariant(ULocale)'
+ */
+ public void testGetDisplayVariantULocale() {
+ String idv = new ULocale("de_DE_PHONEBOOK").getDisplayVariant(ULocale.GERMANY);
+ String jdv = new Locale("de", "DE", "PHONEBOOK").getDisplayVariant(Locale.GERMANY);
+ assertEquals(jdv, idv);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayVariant(String, String)'
+ */
+ public void testGetDisplayVariantStringString() {
+ String idv = ULocale.getDisplayVariant("de_DE_PHONEBOOK", "de_DE");
+ String jdv = new Locale("de", "DE", "PHONEBOOK").getDisplayVariant(Locale.GERMANY);
+ assertEquals(jdv, idv);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayVariant(String, ULocale)'
+ */
+ public void testGetDisplayVariantStringULocale() {
+ String idv = ULocale.getDisplayVariant("de_DE_PHONEBOOK", ULocale.GERMANY);
+ String jdv = new Locale("de", "DE", "PHONEBOOK").getDisplayVariant(Locale.GERMANY);
+ assertEquals(jdv, idv);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayKeyword(String)'
+ */
+ public void testGetDisplayKeywordString() {
+ String idk = ULocale.getDisplayKeyword("collation");
+ assertEquals("collation", idk);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayKeyword(String, String)'
+ */
+ public void testGetDisplayKeywordStringString() {
+ String idk = ULocale.getDisplayKeyword("collation", "de_DE");
+ if (ICUTestCase.testingWrapper) {
+ assertEquals("collation", idk);
+ } else {
+ // TODO: find real value
+ assertEquals("collation", idk);
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayKeyword(String, ULocale)'
+ */
+ public void testGetDisplayKeywordStringULocale() {
+ String idk = ULocale.getDisplayKeyword("collation", ULocale.GERMANY);
+ if (ICUTestCase.testingWrapper) {
+ assertEquals("collation", idk);
+ } else {
+ // TODO: find real value
+ assertEquals("collation", idk);
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayKeywordValue(String)'
+ */
+ public void testGetDisplayKeywordValueString() {
+ ULocale ul = new ULocale("de_DE@collation=phonebook");
+ String idk = ul.getDisplayKeywordValue("collation");
+ if (ICUTestCase.testingWrapper) {
+ assertEquals("phonebook", idk);
+ } else {
+ // TODO: find real value
+ assertEquals("phonebook", idk);
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayKeywordValue(String, ULocale)'
+ */
+ public void testGetDisplayKeywordValueStringULocale() {
+ ULocale ul = new ULocale("de_DE@collation=phonebook");
+ String idk = ul.getDisplayKeywordValue("collation", ULocale.GERMANY);
+ if (ICUTestCase.testingWrapper) {
+ assertEquals("phonebook", idk);
+ } else {
+ // TODO: find real value
+ assertEquals("phonebook", idk);
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayKeywordValue(String, String, String)'
+ */
+ public void testGetDisplayKeywordValueStringStringString() {
+ String idk = ULocale.getDisplayKeywordValue("de_DE@collation=phonebook", "collation", "de_DE");
+ if (ICUTestCase.testingWrapper) {
+ assertEquals("phonebook", idk);
+ } else {
+ // TODO: find real value
+ assertEquals("phonebook", idk);
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayKeywordValue(String, String, ULocale)'
+ */
+ public void testGetDisplayKeywordValueStringStringULocale() {
+ String idk = ULocale.getDisplayKeywordValue("de_DE@collation=phonebook", "collation", ULocale.GERMANY);
+ if (ICUTestCase.testingWrapper) {
+ assertEquals("phonebook", idk);
+ } else {
+ // TODO: find real value
+ assertEquals("phonebook", idk);
+ }
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayName()'
+ */
+ public void testGetDisplayName() {
+ String idn = ULocale.GERMANY.getDisplayName();
+ String jdn = Locale.GERMANY.getDisplayName();
+ assertEquals(idn, jdn);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayName(ULocale)'
+ */
+ public void testGetDisplayNameULocale() {
+ String idn = ULocale.GERMANY.getDisplayName(ULocale.GERMANY);
+ String jdn = Locale.GERMANY.getDisplayName(Locale.GERMANY);
+ assertEquals(idn, jdn);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayName(String, String)'
+ */
+ public void testGetDisplayNameStringString() {
+ String idn = ULocale.getDisplayName("de_DE", "de_DE");
+ String jdn = Locale.GERMANY.getDisplayName(Locale.GERMANY);
+ assertEquals(idn, jdn);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.getDisplayName(String, ULocale)'
+ */
+ public void testGetDisplayNameStringULocale() {
+ String idn = ULocale.getDisplayName("de_DE", ULocale.GERMANY);
+ String jdn = Locale.GERMANY.getDisplayName(Locale.GERMANY);
+ assertEquals(idn, jdn);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.acceptLanguage(String, ULocale[], boolean[])'
+ */
+ public void testAcceptLanguageStringULocaleArrayBooleanArray() {
+ boolean[] fallback = new boolean[1];
+ ULocale[] locales = {
+ new ULocale("en_CA"),
+ new ULocale("es_US"),
+ };
+ ULocale result = ULocale.acceptLanguage("en-US, en-GB, en-CA, es-US", locales, fallback);
+ assertEquals(new ULocale("en_CA"), result);
+ assertFalse(fallback[0]);
+ result = ULocale.acceptLanguage("en-US, en-GB, es-US-NEWMEXICO", locales, fallback);
+ assertEquals(new ULocale("es_US"), result);
+ assertTrue(fallback[0]);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.acceptLanguage(ULocale[], ULocale[], boolean[])'
+ */
+ public void testAcceptLanguageULocaleArrayULocaleArrayBooleanArray() {
+ boolean[] fallback = new boolean[1];
+ ULocale[] locales = {
+ new ULocale("en_CA"),
+ new ULocale("es_US"),
+ };
+ ULocale[] accept_locales = {
+ new ULocale("en_US"),
+ new ULocale("en_GB"),
+ new ULocale("en_CA"),
+ new ULocale("es_US"),
+ };
+ ULocale[] accept_locales2 = {
+ new ULocale("en_US"),
+ new ULocale("en_GB"),
+ new ULocale("es_US_NEWMEXICO"),
+ };
+ ULocale result = ULocale.acceptLanguage(accept_locales, locales, fallback);
+ assertEquals(new ULocale("en_CA"), result);
+ assertFalse(fallback[0]);
+ result = ULocale.acceptLanguage(accept_locales2, locales, fallback);
+ assertEquals(new ULocale("es_US"), result);
+ assertTrue(fallback[0]);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.acceptLanguage(String, boolean[])'
+ */
+ public void testAcceptLanguageStringBooleanArray() {
+ boolean[] fallback = new boolean[1];
+ ULocale result = ULocale.acceptLanguage("en-CA, en-GB, es-US", fallback);
+ assertEquals(new ULocale("en_CA"), result);
+ assertFalse(fallback[0]);
+ result = ULocale.acceptLanguage("es-US-NEWMEXICO", fallback);
+ assertNotNull(result); // actual result depends on jdk
+ assertTrue(fallback[0]);
+ }
+
+ /*
+ * Test method for 'com.ibm.icu.x.util.ULocale.acceptLanguage(ULocale[], boolean[])'
+ */
+ public void testAcceptLanguageULocaleArrayBooleanArray() {
+ boolean[] fallback = new boolean[1];
+ ULocale[] accept_locales = {
+ new ULocale("en_CA"),
+ new ULocale("en_GB"),
+ new ULocale("es_US"),
+ };
+ ULocale[] accept_locales2 = {
+ new ULocale("es_US_NEWMEXICO"),
+ };
+ ULocale result = ULocale.acceptLanguage(accept_locales, fallback);
+ assertEquals(new ULocale("en_CA"), result);
+ assertFalse(fallback[0]);
+ result = ULocale.acceptLanguage(accept_locales2, fallback);
+ assertNotNull(result); // actual result depends on jdk
+ assertTrue(fallback[0]);
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/.classpath b/eclipse-build/plugins.template/com.ibm.icu.base/.classpath
new file mode 100644
index 00000000000..02159672985
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/.classpath
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/.project b/eclipse-build/plugins.template/com.ibm.icu.base/.project
new file mode 100644
index 00000000000..5e6c7c3bced
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/.project
@@ -0,0 +1,28 @@
+
+
+ com.ibm.icu.base
+
+
+
+
+
+ org.eclipse.pde.ManifestBuilder
+
+
+
+
+ org.eclipse.pde.SchemaBuilder
+
+
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+
+
+ org.eclipse.jdt.core.javanature
+ org.eclipse.pde.PluginNature
+
+
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/.settings/org.eclipse.jdt.core.prefs b/eclipse-build/plugins.template/com.ibm.icu.base/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 00000000000..852d94d7dfb
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,77 @@
+#Mon Aug 30 14:05:56 EDT 2010
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=ignore
+org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
+org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning
+org.eclipse.jdt.core.compiler.problem.deadCode=warning
+org.eclipse.jdt.core.compiler.problem.deprecation=ignore
+org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
+org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
+org.eclipse.jdt.core.compiler.problem.discouragedReference=warning
+org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=ignore
+org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore
+org.eclipse.jdt.core.compiler.problem.fatalOptionalError=disabled
+org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
+org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning
+org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=error
+org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning
+org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning
+org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore
+org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
+org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
+org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning
+org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotationForInterfaceMethodImplementation=enabled
+org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning
+org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore
+org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning
+org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning
+org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
+org.eclipse.jdt.core.compiler.problem.nullReference=warning
+org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning
+org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
+org.eclipse.jdt.core.compiler.problem.rawTypeReference=warning
+org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
+org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
+org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
+org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning
+org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=disabled
+org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
+org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
+org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning
+org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=warning
+org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
+org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
+org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
+org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.unusedImport=warning
+org.eclipse.jdt.core.compiler.problem.unusedLabel=warning
+org.eclipse.jdt.core.compiler.problem.unusedLocal=warning
+org.eclipse.jdt.core.compiler.problem.unusedObjectAllocation=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
+org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning
+org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
+org.eclipse.jdt.core.compiler.source=1.5
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/.settings/org.eclipse.jdt.ui.prefs b/eclipse-build/plugins.template/com.ibm.icu.base/.settings/org.eclipse.jdt.ui.prefs
new file mode 100644
index 00000000000..22f322c51ad
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/.settings/org.eclipse.jdt.ui.prefs
@@ -0,0 +1,3 @@
+#Thu Dec 14 11:50:17 EST 2006
+eclipse.preferences.version=1
+internal.default.compliance=default
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/META-INF/MANIFEST.MF b/eclipse-build/plugins.template/com.ibm.icu.base/META-INF/MANIFEST.MF
new file mode 100644
index 00000000000..fb58cf711ea
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/META-INF/MANIFEST.MF
@@ -0,0 +1,15 @@
+Manifest-Version: 1.0
+Bundle-ManifestVersion: 2
+Bundle-Name: %pluginName
+Bundle-SymbolicName: com.ibm.icu.base; singleton:=true
+Bundle-Version: @BUILD_VERSION@
+Bundle-Vendor: %providerName
+Bundle-Localization: plugin
+Bundle-Copyright: @COPYRIGHT@
+Export-Package: com.ibm.icu.text;base=true;version="@IMPL_VERSION@",
+ com.ibm.icu.util;base=true;version="@IMPL_VERSION@",
+ com.ibm.icu.math;base=true;version="@IMPL_VERSION@",
+ com.ibm.icu.impl;x-internal:=true,
+ com.ibm.icu.impl.locale;x-internal:=true
+Eclipse-LazyStart: true
+Bundle-RequiredExecutionEnvironment: J2SE-1.5
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/build.properties b/eclipse-build/plugins.template/com.ibm.icu.base/build.properties
new file mode 100644
index 00000000000..8b3ddc31ddb
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/build.properties
@@ -0,0 +1,19 @@
+###############################################################################
+# Copyright (c) 2011 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+###############################################################################
+source.. = src/
+output.. = bin/
+src.includes = about.html,\
+ about_files/
+bin.includes = .,\
+ about.html,\
+ about_files/,\
+ plugin.properties,\
+ META-INF/
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/plugin.properties b/eclipse-build/plugins.template/com.ibm.icu.base/plugin.properties
new file mode 100644
index 00000000000..05f97ff88b9
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/plugin.properties
@@ -0,0 +1,12 @@
+###############################################################################
+# Copyright (c) 2011 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+###############################################################################
+pluginName = International Components for Unicode for Java (ICU4J) Replacement plug-in
+providerName = IBM Corporation
\ No newline at end of file
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/ICUCache.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/ICUCache.java
new file mode 100644
index 00000000000..17828f7763d
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/ICUCache.java
@@ -0,0 +1,21 @@
+/*
+ ***************************************************************************
+ * Copyright (c) 2007-2011 International Business Machines Corporation and *
+ * others. All rights reserved. *
+ ***************************************************************************
+*/
+
+package com.ibm.icu.impl;
+
+public interface ICUCache {
+ // Type of reference holding the Map instance
+ public static final int SOFT = 0;
+ public static final int WEAK = 1;
+
+ // NULL object, which may be used for a cache key
+ public static final Object NULL = new Object();
+
+ public void clear();
+ public void put(K key, V value);
+ public V get(Object key);
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/LocaleIDParser.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/LocaleIDParser.java
new file mode 100644
index 00000000000..67690da5796
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/LocaleIDParser.java
@@ -0,0 +1,741 @@
+/*
+******************************************************************************
+* Copyright (C) 2003-2011, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+******************************************************************************
+*/
+
+package com.ibm.icu.impl;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import com.ibm.icu.impl.locale.AsciiUtil;
+
+/**
+ * Utility class to parse and normalize locale ids (including POSIX style)
+ */
+public final class LocaleIDParser {
+ private char[] id;
+ private int index;
+ private char[] buffer;
+ private int blen;
+ // um, don't handle POSIX ids unless we request it. why not? well... because.
+ private boolean canonicalize;
+ private boolean hadCountry;
+
+ // used when canonicalizing
+ Map keywords;
+ String baseName;
+
+ /**
+ * Parsing constants.
+ */
+ private static final char KEYWORD_SEPARATOR = '@';
+ private static final char HYPHEN = '-';
+ private static final char KEYWORD_ASSIGN = '=';
+ private static final char COMMA = ',';
+ private static final char ITEM_SEPARATOR = ';';
+ private static final char DOT = '.';
+ private static final char UNDERSCORE = '_';
+
+ public LocaleIDParser(String localeID) {
+ this(localeID, false);
+ }
+
+ public LocaleIDParser(String localeID, boolean canonicalize) {
+ id = localeID.toCharArray();
+ index = 0;
+ buffer = new char[id.length + 5];
+ blen = 0;
+ this.canonicalize = canonicalize;
+ }
+
+ private void reset() {
+ index = blen = 0;
+ }
+
+ // utilities for working on text in the buffer
+
+ /**
+ * Append c to the buffer.
+ */
+ private void append(char c) {
+ try {
+ buffer[blen] = c;
+ }
+ catch (IndexOutOfBoundsException e) {
+ if (buffer.length > 512) {
+ // something is seriously wrong, let this go
+ throw e;
+ }
+ char[] nbuffer = new char[buffer.length * 2];
+ System.arraycopy(buffer, 0, nbuffer, 0, buffer.length);
+ nbuffer[blen] = c;
+ buffer = nbuffer;
+ }
+ ++blen;
+ }
+
+ private void addSeparator() {
+ append(UNDERSCORE);
+ }
+
+ /**
+ * Returns the text in the buffer from start to blen as a String.
+ */
+ private String getString(int start) {
+ if (start == blen) {
+ return "";
+ }
+ return new String(buffer, start, blen-start);
+ }
+
+ /**
+ * Set the length of the buffer to pos, then append the string.
+ */
+ private void set(int pos, String s) {
+ this.blen = pos; // no safety
+ append(s);
+ }
+
+ /**
+ * Append the string to the buffer.
+ */
+ private void append(String s) {
+ for (int i = 0; i < s.length(); ++i) {
+ append(s.charAt(i));
+ }
+ }
+
+ // utilities for parsing text out of the id
+
+ /**
+ * Character to indicate no more text is available in the id.
+ */
+ private static final char DONE = '\uffff';
+
+ /**
+ * Returns the character at index in the id, and advance index. The returned character
+ * is DONE if index was at the limit of the buffer. The index is advanced regardless
+ * so that decrementing the index will always 'unget' the last character returned.
+ */
+ private char next() {
+ if (index == id.length) {
+ index++;
+ return DONE;
+ }
+
+ return id[index++];
+ }
+
+ /**
+ * Advance index until the next terminator or id separator, and leave it there.
+ */
+ private void skipUntilTerminatorOrIDSeparator() {
+ while (!isTerminatorOrIDSeparator(next())) {
+ }
+ --index;
+ }
+
+ /**
+ * Returns true if the character at index in the id is a terminator.
+ */
+ private boolean atTerminator() {
+ return index >= id.length || isTerminator(id[index]);
+ }
+
+ /*
+ * Returns true if the character is an id separator (underscore or hyphen).
+ */
+ /* private boolean isIDSeparator(char c) {
+ return c == UNDERSCORE || c == HYPHEN;
+ }*/
+
+ /**
+ * Returns true if the character is a terminator (keyword separator, dot, or DONE).
+ * Dot is a terminator because of the POSIX form, where dot precedes the codepage.
+ */
+ private boolean isTerminator(char c) {
+ // always terminate at DOT, even if not handling POSIX. It's an error...
+ return c == KEYWORD_SEPARATOR || c == DONE || c == DOT;
+ }
+
+ /**
+ * Returns true if the character is a terminator or id separator.
+ */
+ private boolean isTerminatorOrIDSeparator(char c) {
+ return c == KEYWORD_SEPARATOR || c == UNDERSCORE || c == HYPHEN ||
+ c == DONE || c == DOT;
+ }
+
+ /**
+ * Returns true if the start of the buffer has an experimental or private language
+ * prefix, the pattern '[ixIX][-_].' shows the syntax checked.
+ */
+ private boolean haveExperimentalLanguagePrefix() {
+ if (id.length > 2) {
+ char c = id[1];
+ if (c == HYPHEN || c == UNDERSCORE) {
+ c = id[0];
+ return c == 'x' || c == 'X' || c == 'i' || c == 'I';
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if a value separator occurs at or after index.
+ */
+ private boolean haveKeywordAssign() {
+ // assume it is safe to start from index
+ for (int i = index; i < id.length; ++i) {
+ if (id[i] == KEYWORD_ASSIGN) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Advance index past language, and accumulate normalized language code in buffer.
+ * Index must be at 0 when this is called. Index is left at a terminator or id
+ * separator. Returns the start of the language code in the buffer.
+ */
+ private int parseLanguage() {
+ if (haveExperimentalLanguagePrefix()) {
+ append(Character.toLowerCase(id[0]));
+ append(HYPHEN);
+ index = 2;
+ }
+
+ char c;
+ while(!isTerminatorOrIDSeparator(c = next())) {
+ append(Character.toLowerCase(c));
+ }
+ --index; // unget
+
+ if (blen == 3) {
+ String lang = LocaleIDs.threeToTwoLetterLanguage(getString(0));
+ if (lang != null) {
+ set(0, lang);
+ }
+ }
+
+ return 0;
+ }
+
+ /**
+ * Advance index past language. Index must be at 0 when this is called. Index
+ * is left at a terminator or id separator.
+ */
+ private void skipLanguage() {
+ if (haveExperimentalLanguagePrefix()) {
+ index = 2;
+ }
+ skipUntilTerminatorOrIDSeparator();
+ }
+
+ /**
+ * Advance index past script, and accumulate normalized script in buffer.
+ * Index must be immediately after the language.
+ * If the item at this position is not a script (is not four characters
+ * long) leave index and buffer unchanged. Otherwise index is left at
+ * a terminator or id separator. Returns the start of the script code
+ * in the buffer (this may be equal to the buffer length, if there is no
+ * script).
+ */
+ private int parseScript() {
+ if (!atTerminator()) {
+ int oldIndex = index; // save original index
+ ++index;
+
+ int oldBlen = blen; // get before append hyphen, if we truncate everything is undone
+ char c;
+ while(!isTerminatorOrIDSeparator(c = next())) {
+ if (blen == oldBlen) { // first pass
+ addSeparator();
+ append(Character.toUpperCase(c));
+ } else {
+ append(Character.toLowerCase(c));
+ }
+ }
+ --index; // unget
+
+ /* If it's not exactly 4 characters long, then it's not a script. */
+ if (index - oldIndex != 5) { // +1 to account for separator
+ index = oldIndex;
+ blen = oldBlen;
+ } else {
+ oldBlen++; // index past hyphen, for clients who want to extract just the script
+ }
+
+ return oldBlen;
+ }
+ return blen;
+ }
+
+ /**
+ * Advance index past script.
+ * Index must be immediately after the language and IDSeparator.
+ * If the item at this position is not a script (is not four characters
+ * long) leave index. Otherwise index is left at a terminator or
+ * id separator.
+ */
+ private void skipScript() {
+ if (!atTerminator()) {
+ int oldIndex = index;
+ ++index;
+
+ skipUntilTerminatorOrIDSeparator();
+ if (index - oldIndex != 5) { // +1 to account for separator
+ index = oldIndex;
+ }
+ }
+ }
+
+ /**
+ * Advance index past country, and accumulate normalized country in buffer.
+ * Index must be immediately after the script (if there is one, else language)
+ * and IDSeparator. Return the start of the country code in the buffer.
+ */
+ private int parseCountry() {
+ if (!atTerminator()) {
+ int oldIndex = index;
+ ++index;
+
+ int oldBlen = blen;
+ char c;
+ while (!isTerminatorOrIDSeparator(c = next())) {
+ if (oldBlen == blen) { // first, add hyphen
+ hadCountry = true; // we have a country, let variant parsing know
+ addSeparator();
+ ++oldBlen; // increment past hyphen
+ }
+ append(Character.toUpperCase(c));
+ }
+ --index; // unget
+
+ int charsAppended = blen - oldBlen;
+
+ if (charsAppended == 0) {
+ // Do nothing.
+ }
+ else if (charsAppended < 2 || charsAppended > 3) {
+ // It's not a country, so return index and blen to
+ // their previous values.
+ index = oldIndex;
+ --oldBlen;
+ blen = oldBlen;
+ hadCountry = false;
+ }
+ else if (charsAppended == 3) {
+ String region = LocaleIDs.threeToTwoLetterRegion(getString(oldBlen));
+ if (region != null) {
+ set(oldBlen, region);
+ }
+ }
+
+ return oldBlen;
+ }
+
+ return blen;
+ }
+
+ /**
+ * Advance index past country.
+ * Index must be immediately after the script (if there is one, else language)
+ * and IDSeparator.
+ */
+ private void skipCountry() {
+ if (!atTerminator()) {
+ ++index;
+ /*
+ * Save the index point after the separator, since the format
+ * requires two separators if the country is not present.
+ */
+ int oldIndex = index;
+
+ skipUntilTerminatorOrIDSeparator();
+ int charsSkipped = index - oldIndex;
+ if (charsSkipped < 2 || charsSkipped > 3) {
+ index = oldIndex;
+ }
+ }
+ }
+
+ /**
+ * Advance index past variant, and accumulate normalized variant in buffer. This ignores
+ * the codepage information from POSIX ids. Index must be immediately after the country
+ * or script. Index is left at the keyword separator or at the end of the text. Return
+ * the start of the variant code in the buffer.
+ *
+ * In standard form, we can have the following forms:
+ * ll__VVVV
+ * ll_CC_VVVV
+ * ll_Ssss_VVVV
+ * ll_Ssss_CC_VVVV
+ *
+ * This also handles POSIX ids, which can have the following forms (pppp is code page id):
+ * ll_CC.pppp --> ll_CC
+ * ll_CC.pppp@VVVV --> ll_CC_VVVV
+ * ll_CC@VVVV --> ll_CC_VVVV
+ *
+ * We identify this use of '@' in POSIX ids by looking for an '=' following
+ * the '@'. If there is one, we consider '@' to start a keyword list, instead of
+ * being part of a POSIX id.
+ *
+ * Note: since it was decided that we want an option to not handle POSIX ids, this
+ * becomes a bit more complex.
+ */
+ private int parseVariant() {
+ int oldBlen = blen;
+
+ boolean start = true;
+ boolean needSeparator = true;
+ boolean skipping = false;
+ char c;
+ while ((c = next()) != DONE) {
+ if (c == DOT) {
+ start = false;
+ skipping = true;
+ } else if (c == KEYWORD_SEPARATOR) {
+ if (haveKeywordAssign()) {
+ break;
+ }
+ skipping = false;
+ start = false;
+ needSeparator = true; // add another underscore if we have more text
+ } else if (start) {
+ start = false;
+ } else if (!skipping) {
+ if (needSeparator) {
+ boolean incOldBlen = blen == oldBlen; // need to skip separators
+ needSeparator = false;
+ if (incOldBlen && !hadCountry) { // no country, we'll need two
+ addSeparator();
+ ++oldBlen; // for sure
+ }
+ addSeparator();
+ if (incOldBlen) { // only for the first separator
+ ++oldBlen;
+ }
+ }
+ c = Character.toUpperCase(c);
+ if (c == HYPHEN || c == COMMA) {
+ c = UNDERSCORE;
+ }
+ append(c);
+ }
+ }
+ --index; // unget
+
+ return oldBlen;
+ }
+
+ // no need for skipvariant, to get the keywords we'll just scan directly for
+ // the keyword separator
+
+ /**
+ * Returns the normalized language id, or the empty string.
+ */
+ public String getLanguage() {
+ reset();
+ return getString(parseLanguage());
+ }
+
+ /**
+ * Returns the normalized script id, or the empty string.
+ */
+ public String getScript() {
+ reset();
+ skipLanguage();
+ return getString(parseScript());
+ }
+
+ /**
+ * return the normalized country id, or the empty string.
+ */
+ public String getCountry() {
+ reset();
+ skipLanguage();
+ skipScript();
+ return getString(parseCountry());
+ }
+
+ /**
+ * Returns the normalized variant id, or the empty string.
+ */
+ public String getVariant() {
+ reset();
+ skipLanguage();
+ skipScript();
+ skipCountry();
+ return getString(parseVariant());
+ }
+
+ /**
+ * Returns the language, script, country, and variant as separate strings.
+ */
+ public String[] getLanguageScriptCountryVariant() {
+ reset();
+ return new String[] {
+ getString(parseLanguage()),
+ getString(parseScript()),
+ getString(parseCountry()),
+ getString(parseVariant())
+ };
+ }
+
+ public void setBaseName(String baseName) {
+ this.baseName = baseName;
+ }
+
+ public void parseBaseName() {
+ if (baseName != null) {
+ set(0, baseName);
+ } else {
+ reset();
+ parseLanguage();
+ parseScript();
+ parseCountry();
+ parseVariant();
+
+ // catch unwanted trailing underscore after country if there was no variant
+ if (blen > 1 && buffer[blen-1] == UNDERSCORE) {
+ --blen;
+ }
+ }
+ }
+
+ /**
+ * Returns the normalized base form of the locale id. The base
+ * form does not include keywords.
+ */
+ public String getBaseName() {
+ if (baseName != null) {
+ return baseName;
+ }
+ parseBaseName();
+ return getString(0);
+ }
+
+ /**
+ * Returns the normalized full form of the locale id. The full
+ * form includes keywords if they are present.
+ */
+ public String getName() {
+ parseBaseName();
+ parseKeywords();
+ return getString(0);
+ }
+
+ // keyword utilities
+
+ /**
+ * If we have keywords, advance index to the start of the keywords and return true,
+ * otherwise return false.
+ */
+ private boolean setToKeywordStart() {
+ for (int i = index; i < id.length; ++i) {
+ if (id[i] == KEYWORD_SEPARATOR) {
+ if (canonicalize) {
+ for (int j = ++i; j < id.length; ++j) { // increment i past separator for return
+ if (id[j] == KEYWORD_ASSIGN) {
+ index = i;
+ return true;
+ }
+ }
+ } else {
+ if (++i < id.length) {
+ index = i;
+ return true;
+ }
+ }
+ break;
+ }
+ }
+ return false;
+ }
+
+ private static boolean isDoneOrKeywordAssign(char c) {
+ return c == DONE || c == KEYWORD_ASSIGN;
+ }
+
+ private static boolean isDoneOrItemSeparator(char c) {
+ return c == DONE || c == ITEM_SEPARATOR;
+ }
+
+ private String getKeyword() {
+ int start = index;
+ while (!isDoneOrKeywordAssign(next())) {
+ }
+ --index;
+ return AsciiUtil.toLowerString(new String(id, start, index-start).trim());
+ }
+
+ private String getValue() {
+ int start = index;
+ while (!isDoneOrItemSeparator(next())) {
+ }
+ --index;
+ return new String(id, start, index-start).trim(); // leave case alone
+ }
+
+ private Comparator getKeyComparator() {
+ final Comparator comp = new Comparator() {
+ public int compare(String lhs, String rhs) {
+ return lhs.compareTo(rhs);
+ }
+ };
+ return comp;
+ }
+
+ /**
+ * Returns a map of the keywords and values, or null if there are none.
+ */
+ public Map getKeywordMap() {
+ if (keywords == null) {
+ TreeMap m = null;
+ if (setToKeywordStart()) {
+ // trim spaces and convert to lower case, both keywords and values.
+ do {
+ String key = getKeyword();
+ if (key.length() == 0) {
+ break;
+ }
+ char c = next();
+ if (c != KEYWORD_ASSIGN) {
+ // throw new IllegalArgumentException("key '" + key + "' missing a value.");
+ if (c == DONE) {
+ break;
+ } else {
+ continue;
+ }
+ }
+ String value = getValue();
+ if (value.length() == 0) {
+ // throw new IllegalArgumentException("key '" + key + "' missing a value.");
+ continue;
+ }
+ if (m == null) {
+ m = new TreeMap(getKeyComparator());
+ } else if (m.containsKey(key)) {
+ // throw new IllegalArgumentException("key '" + key + "' already has a value.");
+ continue;
+ }
+ m.put(key, value);
+ } while (next() == ITEM_SEPARATOR);
+ }
+ keywords = m != null ? m : Collections.emptyMap();
+ }
+
+ return keywords;
+ }
+
+
+ /**
+ * Parse the keywords and return start of the string in the buffer.
+ */
+ private int parseKeywords() {
+ int oldBlen = blen;
+ Map m = getKeywordMap();
+ if (!m.isEmpty()) {
+ boolean first = true;
+ for (Map.Entry e : m.entrySet()) {
+ append(first ? KEYWORD_SEPARATOR : ITEM_SEPARATOR);
+ first = false;
+ append(e.getKey());
+ append(KEYWORD_ASSIGN);
+ append(e.getValue());
+ }
+ if (blen != oldBlen) {
+ ++oldBlen;
+ }
+ }
+ return oldBlen;
+ }
+
+ /**
+ * Returns an iterator over the keywords, or null if we have an empty map.
+ */
+ public Iterator getKeywords() {
+ Map m = getKeywordMap();
+ return m.isEmpty() ? null : m.keySet().iterator();
+ }
+
+ /**
+ * Returns the value for the named keyword, or null if the keyword is not
+ * present.
+ */
+ public String getKeywordValue(String keywordName) {
+ Map m = getKeywordMap();
+ return m.isEmpty() ? null : m.get(AsciiUtil.toLowerString(keywordName.trim()));
+ }
+
+ /**
+ * Set the keyword value only if it is not already set to something else.
+ */
+ public void defaultKeywordValue(String keywordName, String value) {
+ setKeywordValue(keywordName, value, false);
+ }
+
+ /**
+ * Set the value for the named keyword, or unset it if value is null. If
+ * keywordName itself is null, unset all keywords. If keywordName is not null,
+ * value must not be null.
+ */
+ public void setKeywordValue(String keywordName, String value) {
+ setKeywordValue(keywordName, value, true);
+ }
+
+ /**
+ * Set the value for the named keyword, or unset it if value is null. If
+ * keywordName itself is null, unset all keywords. If keywordName is not null,
+ * value must not be null. If reset is true, ignore any previous value for
+ * the keyword, otherwise do not change the keyword (including removal of
+ * one or all keywords).
+ */
+ private void setKeywordValue(String keywordName, String value, boolean reset) {
+ if (keywordName == null) {
+ if (reset) {
+ // force new map, ignore value
+ keywords = Collections.emptyMap();
+ }
+ } else {
+ keywordName = AsciiUtil.toLowerString(keywordName.trim());
+ if (keywordName.length() == 0) {
+ throw new IllegalArgumentException("keyword must not be empty");
+ }
+ if (value != null) {
+ value = value.trim();
+ if (value.length() == 0) {
+ throw new IllegalArgumentException("value must not be empty");
+ }
+ }
+ Map m = getKeywordMap();
+ if (m.isEmpty()) { // it is EMPTY_MAP
+ if (value != null) {
+ // force new map
+ keywords = new TreeMap(getKeyComparator());
+ keywords.put(keywordName, value.trim());
+ }
+ } else {
+ if (reset || !m.containsKey(keywordName)) {
+ if (value != null) {
+ m.put(keywordName, value);
+ } else {
+ m.remove(keywordName);
+ if (m.isEmpty()) {
+ // force new map
+ keywords = Collections.emptyMap();
+ }
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/LocaleIDs.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/LocaleIDs.java
new file mode 100644
index 00000000000..34c0a5b26d6
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/LocaleIDs.java
@@ -0,0 +1,536 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2009-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.MissingResourceException;
+
+import com.ibm.icu.util.ULocale;
+
+
+/**
+ * Utilities for mapping between old and new language, country, and other
+ * locale ID related names.
+ */
+public class LocaleIDs {
+
+ /**
+ * Returns a list of all 2-letter country codes defined in ISO 3166.
+ * Can be used to create Locales.
+ * @stable ICU 3.0
+ */
+ public static String[] getISOCountries() {
+ initCountryTables();
+ return _countries.clone();
+ }
+
+ /**
+ * Returns a list of all 2-letter language codes defined in ISO 639.
+ * Can be used to create Locales.
+ * [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed.
+ * The list this function returns includes both the new and the old codes for the
+ * languages whose codes have changed.]
+ * @stable ICU 3.0
+ */
+ public static String[] getISOLanguages() {
+ initLanguageTables();
+ return _languages.clone();
+ }
+
+ /**
+ * Returns a three-letter abbreviation for the provided country. If the provided
+ * country is empty, returns the empty string. Otherwise, returns
+ * an uppercase ISO 3166 3-letter country code.
+ * @exception MissingResourceException Throws MissingResourceException if the
+ * three-letter country abbreviation is not available for this locale.
+ * @stable ICU 3.0
+ */
+ public static String getISO3Country(String country){
+ initCountryTables();
+
+ int offset = findIndex(_countries, country);
+ if(offset>=0){
+ return _countries3[offset];
+ }else{
+ offset = findIndex(_obsoleteCountries, country);
+ if(offset>=0){
+ return _obsoleteCountries3[offset];
+ }
+ }
+ return "";
+ }
+ /**
+ * Returns a three-letter abbreviation for the language. If language is
+ * empty, returns the empty string. Otherwise, returns
+ * a lowercase ISO 639-2/T language code.
+ * The ISO 639-2 language codes can be found on-line at
+ * ftp://dkuug.dk/i18n/iso-639-2.txt
+ * @exception MissingResourceException Throws MissingResourceException if the
+ * three-letter language abbreviation is not available for this locale.
+ * @stable ICU 3.0
+ */
+ public static String getISO3Language(String language) {
+ initLanguageTables();
+
+ int offset = findIndex(_languages, language);
+ if(offset>=0){
+ return _languages3[offset];
+ } else {
+ offset = findIndex(_obsoleteLanguages, language);
+ if (offset >= 0) {
+ return _obsoleteLanguages3[offset];
+ }
+ }
+ return "";
+ }
+
+ public static String threeToTwoLetterLanguage(String lang) {
+ initLanguageTables();
+
+ /* convert 3 character code to 2 character code if possible *CWB*/
+ int offset = findIndex(_languages3, lang);
+ if (offset >= 0) {
+ return _languages[offset];
+ }
+
+ offset = findIndex(_obsoleteLanguages3, lang);
+ if (offset >= 0) {
+ return _obsoleteLanguages[offset];
+ }
+
+ return null;
+ }
+
+ public static String threeToTwoLetterRegion(String region) {
+ initCountryTables();
+
+ /* convert 3 character code to 2 character code if possible *CWB*/
+ int offset = findIndex(_countries3, region);
+ if (offset >= 0) {
+ return _countries[offset];
+ }
+
+ offset = findIndex(_obsoleteCountries3, region);
+ if (offset >= 0) {
+ return _obsoleteCountries[offset];
+ }
+
+ return null;
+ }
+
+ /**
+ * linear search of the string array. the arrays are unfortunately ordered by the
+ * two-letter target code, not the three-letter search code, which seems backwards.
+ */
+ private static int findIndex(String[] array, String target){
+ for (int i = 0; i < array.length; i++) {
+ if (target.equals(array[i])) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+
+ /**
+ * Tables used in normalizing portions of the id.
+ */
+ /* tables updated per http://lcweb.loc.gov/standards/iso639-2/
+ to include the revisions up to 2001/7/27 *CWB*/
+ /* The 3 character codes are the terminology codes like RFC 3066.
+ This is compatible with prior ICU codes */
+ /* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in
+ the table but now at the end of the table because
+ 3 character codes are duplicates. This avoids bad searches
+ going from 3 to 2 character codes.*/
+ /* The range qaa-qtz is reserved for local use. */
+
+ private static String[] _languages;
+ private static String[] _replacementLanguages;
+ private static String[] _obsoleteLanguages;
+ private static String[] _languages3;
+ private static String[] _obsoleteLanguages3;
+
+ // Avoid initializing languages tables unless we have to.
+ private static void initLanguageTables() {
+ if (_languages == null) {
+
+ /* This list MUST be in sorted order, and MUST contain the two-letter codes
+ if one exists otherwise use the three letter code */
+ String[] tempLanguages = {
+ "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
+ "afh", "ak", "akk", "ale", "alg", "am", "an", "ang", "apa",
+ "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
+ "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
+ "bai", "bal", "ban", "bas", "bat", "be", "bej",
+ "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
+ "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
+ "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
+ "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
+ "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
+ "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
+ "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
+ "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
+ "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
+ "enm", "eo", "es", "et", "eu", "ewo", "fa",
+ "fan", "fat", "ff", "fi", "fiu", "fj", "fo", "fon",
+ "fr", "frm", "fro", "fur", "fy", "ga", "gaa", "gay",
+ "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn",
+ "goh", "gon", "gor", "got", "grb", "grc", "gu", "gv",
+ "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him",
+ "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
+ "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
+ "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
+ "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
+ "kac", "kam", "kar", "kaw", "kbd", "kg", "kha", "khi",
+ "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
+ "ko", "kok", "kos", "kpe", "kr", "krc", "kro", "kru", "ks",
+ "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
+ "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
+ "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
+ "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
+ "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min",
+ "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
+ "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
+ "mus", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
+ "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
+ "niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub",
+ "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
+ "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
+ "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
+ "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
+ "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
+ "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
+ "sas", "sat", "sc", "sco", "sd", "se", "sel", "sem",
+ "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
+ "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
+ "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
+ "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
+ "sv", "sw", "syr", "ta", "tai", "te", "tem", "ter",
+ "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
+ "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr",
+ "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
+ "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
+ "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
+ "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
+ "yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd",
+ "zu", "zun",
+ };
+
+ String[] tempReplacementLanguages = {
+ "id", "he", "yi", "jv", "sr", "nb",/* replacement language codes */
+ };
+
+ String[] tempObsoleteLanguages = {
+ "in", "iw", "ji", "jw", "sh", "no", /* obsolete language codes */
+ };
+
+ /* This list MUST contain a three-letter code for every two-letter code in the
+ list above, and they MUST ne in the same order (i.e., the same language must
+ be in the same place in both lists)! */
+ String[] tempLanguages3 = {
+ /*"aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
+ "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
+ /*"afh", "ak", "akk", "ale", "alg", "am", "an", "ang", "apa", */
+ "afh", "aka", "akk", "ale", "alg", "amh", "arg", "ang", "apa",
+ /*"ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
+ "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
+ /*"ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
+ "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
+ /*"bai", "bal", "ban", "bas", "bat", "be", "bej", */
+ "bai", "bal", "ban", "bas", "bat", "bel", "bej",
+ /*"bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
+ "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
+ /*"bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
+ "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
+ /*"btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
+ "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
+ /*"ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
+ "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
+ /*"chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
+ "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
+ /*"cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
+ "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
+ /*"cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
+ "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
+ /*"dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
+ "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
+ /*"dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
+ "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
+ /*"enm", "eo", "es", "et", "eu", "ewo", "fa", */
+ "enm", "epo", "spa", "est", "eus", "ewo", "fas",
+ /*"fan", "fat", "ff", "fi", "fiu", "fj", "fo", "fon", */
+ "fan", "fat", "ful", "fin", "fiu", "fij", "fao", "fon",
+ /*"fr", "frm", "fro", "fur", "fy", "ga", "gaa", "gay", */
+ "fra", "frm", "fro", "fur", "fry", "gle", "gaa", "gay",
+ /*"gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
+ "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
+ /*"goh", "gon", "gor", "got", "grb", "grc", "gu", "gv", */
+ "goh", "gon", "gor", "got", "grb", "grc", "guj", "glv",
+ /*"gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
+ "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
+ /*"hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
+ "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
+ /*"ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
+ "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
+ /*"ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
+ "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
+ /*"iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
+ "iku", "jpn", "jbo", "jpr", "jrb", "jaw", "kat", "kaa", "kab",
+ /*"kac", "kam", "kar", "kaw", "kbd", "kg", "kha", "khi", */
+ "kac", "kam", "kar", "kaw", "kbd", "kon", "kha", "khi",
+ /*"kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
+ "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
+ /*"ko", "kok", "kos", "kpe", "kr", "krc", "kro", "kru", "ks", */
+ "kor", "kok", "kos", "kpe", "kau", "krc", "kro", "kru", "kas",
+ /*"ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
+ "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
+ /*"lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
+ "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
+ /*"loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
+ "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
+ /*"lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
+ "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
+ /*"mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min", */
+ "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
+ /*"mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
+ "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
+ /*"mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
+ "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
+ /*"mus", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
+ "mus", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
+ /*"nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
+ "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
+ /*"niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub", */
+ "niu", "nld", "nno", "nor", "nog", "non", "nbl", "nso", "nub",
+ /*"nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
+ "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
+ /*"om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
+ "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
+ /*"pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
+ "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
+ /*"pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
+ "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
+ /*"raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
+ "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
+ /*"ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
+ "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
+ /*"sas", "sat", "sc", "sco", "sd", "se", "sel", "sem", */
+ "sas", "sat", "srd", "sco", "snd", "sme", "sel", "sem",
+ /*"sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
+ "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
+ /*"sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
+ "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
+ /*"sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
+ "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
+ /*"srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
+ "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
+ /*"sv", "sw", "syr", "ta", "tai", "te", "tem", "ter", */
+ "swe", "swa", "syr", "tam", "tai", "tel", "tem", "ter",
+ /*"tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
+ "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
+ /*"tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", */
+ "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
+ /*"ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
+ "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
+ /*"ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
+ "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
+ /*"uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
+ "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
+ /*"wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
+ "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
+ /*"yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd", */
+ "yid", "yor", "ypk", "zha", "zap", "zen", "zho", "znd",
+ /*"zu", "zun", */
+ "zul", "zun",
+ };
+
+ String[] tempObsoleteLanguages3 = {
+ /* "in", "iw", "ji", "jw", "sh", */
+ "ind", "heb", "yid", "jaw", "srp",
+ };
+
+ synchronized (ULocale.class) {
+ if (_languages == null) {
+ _languages = tempLanguages;
+ _replacementLanguages = tempReplacementLanguages;
+ _obsoleteLanguages = tempObsoleteLanguages;
+ _languages3 = tempLanguages3;
+ _obsoleteLanguages3 = tempObsoleteLanguages3;
+ }
+ }
+ }
+ }
+
+ private static String[] _countries;
+ private static String[] _deprecatedCountries;
+ private static String[] _replacementCountries;
+ private static String[] _obsoleteCountries;
+ private static String[] _countries3;
+ private static String[] _obsoleteCountries3;
+
+ // Avoid initializing country tables unless we have to.
+ private static void initCountryTables() {
+ if (_countries == null) {
+ /* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
+ http://www.evertype.com/standards/iso3166/iso3166-1-en.html
+ added new codes keeping the old ones for compatibility
+ updated to include 1999/12/03 revisions *CWB*/
+
+ /* RO(ROM) is now RO(ROU) according to
+ http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
+ */
+
+ /* This list MUST be in sorted order, and MUST contain only two-letter codes! */
+ String[] tempCountries = {
+ "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
+ "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
+ "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
+ "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
+ "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
+ "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
+ "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
+ "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
+ "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
+ "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
+ "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
+ "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
+ "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
+ "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
+ "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
+ "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
+ "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
+ "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
+ "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
+ "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
+ "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
+ "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
+ "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
+ "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
+ "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
+ "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
+ "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
+ "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
+ "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
+ "WS", "YE", "YT", "ZA", "ZM", "ZW",
+ };
+
+ /* this table is used for 3 letter codes */
+ String[] tempObsoleteCountries = {
+ "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */
+ };
+
+ String[] tempDeprecatedCountries = {
+ "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" /* deprecated country list */
+ };
+ String[] tempReplacementCountries = {
+ /* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
+ "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", /* replacement country codes */
+ };
+
+ /* This list MUST contain a three-letter code for every two-letter code in
+ the above list, and they MUST be listed in the same order! */
+ String[] tempCountries3 = {
+ /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
+ "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
+ /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
+ "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
+ /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
+ "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
+ /* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
+ "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
+ /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
+ "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
+ /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
+ "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
+ /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
+ "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
+ /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
+ "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
+ /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
+ "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
+ /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
+ "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
+ /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
+ "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
+ /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
+ "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
+ /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
+ "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
+ /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
+ "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
+ /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
+ "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
+ /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
+ "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
+ /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
+ "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
+ /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
+ "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
+ /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
+ "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
+ /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
+ "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
+ /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
+ "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
+ /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
+ "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
+ /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
+ "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
+ /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
+ "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
+ /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
+ "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
+ /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
+ "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
+ /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
+ "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
+ /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
+ "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
+ /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
+ "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
+ /* "WS", "YE", "YT", "ZA", "ZM", "ZW" */
+ "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
+ };
+
+ String[] tempObsoleteCountries3 = {
+ /*"FX", "CS", "RO", "TP", "YU", "ZR", */
+ "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
+ };
+
+ synchronized (ULocale.class) {
+ if (_countries == null) {
+ _countries = tempCountries;
+ _deprecatedCountries = tempDeprecatedCountries;
+ _replacementCountries = tempReplacementCountries;
+ _obsoleteCountries = tempObsoleteCountries;
+ _countries3 = tempCountries3;
+ _obsoleteCountries3 = tempObsoleteCountries3;
+ }
+ }
+ }
+ }
+
+ public static String getCurrentCountryID(String oldID){
+ initCountryTables();
+ int offset = findIndex(_deprecatedCountries, oldID);
+ if (offset >= 0) {
+ return _replacementCountries[offset];
+ }
+ return oldID;
+ }
+
+ public static String getCurrentLanguageID(String oldID){
+ initLanguageTables();
+ int offset = findIndex(_obsoleteLanguages, oldID);
+ if (offset >= 0) {
+ return _replacementLanguages[offset];
+ }
+ return oldID;
+ }
+
+
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/LocaleUtility.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/LocaleUtility.java
new file mode 100644
index 00000000000..143ac9d2d29
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/LocaleUtility.java
@@ -0,0 +1,132 @@
+/*
+ ******************************************************************************
+ * Copyright (C) 1996-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ ******************************************************************************
+ *
+ ******************************************************************************
+ */
+
+package com.ibm.icu.impl;
+
+import java.util.Locale;
+
+/**
+ * A class to hold utility functions missing from java.util.Locale.
+ */
+public class LocaleUtility {
+
+ /**
+ * A helper function to convert a string of the form
+ * aa_BB_CC to a locale object. Why isn't this in Locale?
+ */
+ public static Locale getLocaleFromName(String name) {
+ String language = "";
+ String country = "";
+ String variant = "";
+
+ int i1 = name.indexOf('_');
+ if (i1 < 0) {
+ language = name;
+ } else {
+ language = name.substring(0, i1);
+ ++i1;
+ int i2 = name.indexOf('_', i1);
+ if (i2 < 0) {
+ country = name.substring(i1);
+ } else {
+ country = name.substring(i1, i2);
+ variant = name.substring(i2+1);
+ }
+ }
+
+ return new Locale(language, country, variant);
+ }
+
+ /**
+ * Compare two locale strings of the form aa_BB_CC, and
+ * return true if parent is a 'strict' fallback of child, that is,
+ * if child =~ "^parent(_.+)*" (roughly).
+ */
+ public static boolean isFallbackOf(String parent, String child) {
+ if (!child.startsWith(parent)) {
+ return false;
+ }
+ int i = parent.length();
+ return (i == child.length() ||
+ child.charAt(i) == '_');
+ }
+
+ /**
+ * Compare two locales, and return true if the parent is a
+ * 'strict' fallback of the child (parent string is a fallback
+ * of child string).
+ */
+ public static boolean isFallbackOf(Locale parent, Locale child) {
+ return isFallbackOf(parent.toString(), child.toString());
+ }
+
+
+ /*
+ * Convenience method that calls canonicalLocaleString(String) with
+ * locale.toString();
+ */
+ /*public static String canonicalLocaleString(Locale locale) {
+ return canonicalLocaleString(locale.toString());
+ }*/
+
+ /*
+ * You'd think that Locale canonicalizes, since it munges the
+ * renamed languages, but it doesn't quite. It forces the region
+ * to be upper case but doesn't do anything about the language or
+ * variant. Our canonical form is 'lower_UPPER_UPPER'.
+ */
+ /*public static String canonicalLocaleString(String id) {
+ if (id != null) {
+ int x = id.indexOf("_");
+ if (x == -1) {
+ id = id.toLowerCase(Locale.ENGLISH);
+ } else {
+ StringBuffer buf = new StringBuffer();
+ buf.append(id.substring(0, x).toLowerCase(Locale.ENGLISH));
+ buf.append(id.substring(x).toUpperCase(Locale.ENGLISH));
+
+ int len = buf.length();
+ int n = len;
+ while (--n >= 0 && buf.charAt(n) == '_') {
+ }
+ if (++n != len) {
+ buf.delete(n, len);
+ }
+ id = buf.toString();
+ }
+ }
+ return id;
+ }*/
+
+ /**
+ * Fallback from the given locale name by removing the rightmost _-delimited
+ * element. If there is none, return the root locale ("", "", ""). If this
+ * is the root locale, return null. NOTE: The string "root" is not
+ * recognized; do not use it.
+ *
+ * @return a new Locale that is a fallback from the given locale, or null.
+ */
+ public static Locale fallback(Locale loc) {
+
+ // Split the locale into parts and remove the rightmost part
+ String[] parts = new String[]
+ { loc.getLanguage(), loc.getCountry(), loc.getVariant() };
+ int i;
+ for (i=2; i>=0; --i) {
+ if (parts[i].length() != 0) {
+ parts[i] = "";
+ break;
+ }
+ }
+ if (i<0) {
+ return null; // All parts were empty
+ }
+ return new Locale(parts[0], parts[1], parts[2]);
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/SimpleCache.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/SimpleCache.java
new file mode 100644
index 00000000000..7ee2dc50dff
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/SimpleCache.java
@@ -0,0 +1,73 @@
+/*
+ ****************************************************************************
+ * Copyright (c) 2007-2011 International Business Machines Corporation and *
+ * others. All rights reserved. *
+ ****************************************************************************
+ */
+
+package com.ibm.icu.impl;
+
+import java.lang.ref.Reference;
+import java.lang.ref.SoftReference;
+import java.lang.ref.WeakReference;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+public class SimpleCache implements ICUCache {
+ private static final int DEFAULT_CAPACITY = 16;
+
+ private Reference> cacheRef = null;
+ private int type = ICUCache.SOFT;
+ private int capacity = DEFAULT_CAPACITY;
+
+ public SimpleCache() {
+ }
+
+ public SimpleCache(int cacheType) {
+ this(cacheType, DEFAULT_CAPACITY);
+ }
+
+ public SimpleCache(int cacheType, int initialCapacity) {
+ if (cacheType == ICUCache.WEAK) {
+ type = cacheType;
+ }
+ if (initialCapacity > 0) {
+ capacity = initialCapacity;
+ }
+ }
+
+ public V get(Object key) {
+ Reference> ref = cacheRef;
+ if (ref != null) {
+ Map map = ref.get();
+ if (map != null) {
+ return map.get(key);
+ }
+ }
+ return null;
+ }
+
+ public void put(K key, V value) {
+ Reference> ref = cacheRef;
+ Map map = null;
+ if (ref != null) {
+ map = ref.get();
+ }
+ if (map == null) {
+ map = Collections.synchronizedMap(new HashMap(capacity));
+ if (type == ICUCache.WEAK) {
+ ref = new WeakReference>(map);
+ } else {
+ ref = new SoftReference>(map);
+ }
+ cacheRef = ref;
+ }
+ map.put(key, value);
+ }
+
+ public void clear() {
+ cacheRef = null;
+ }
+
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/AsciiUtil.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/AsciiUtil.java
new file mode 100644
index 00000000000..7600914c5f3
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/AsciiUtil.java
@@ -0,0 +1,180 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2009-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl.locale;
+
+public final class AsciiUtil {
+ public static boolean caseIgnoreMatch(String s1, String s2) {
+ if (s1 == s2) {
+ return true;
+ }
+ int len = s1.length();
+ if (len != s2.length()) {
+ return false;
+ }
+ int i = 0;
+ while (i < len) {
+ char c1 = s1.charAt(i);
+ char c2 = s2.charAt(i);
+ if (c1 != c2 && toLower(c1) != toLower(c2)) {
+ break;
+ }
+ i++;
+ }
+ return (i == len);
+ }
+
+ public static int caseIgnoreCompare(String s1, String s2) {
+ if (s1 == s2) {
+ return 0;
+ }
+ return AsciiUtil.toLowerString(s1).compareTo(AsciiUtil.toLowerString(s2));
+ }
+
+
+ public static char toUpper(char c) {
+ if (c >= 'a' && c <= 'z') {
+ c -= 0x20;
+ }
+ return c;
+ }
+
+ public static char toLower(char c) {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ return c;
+ }
+
+ public static String toLowerString(String s) {
+ int idx = 0;
+ for (; idx < s.length(); idx++) {
+ char c = s.charAt(idx);
+ if (c >= 'A' && c <= 'Z') {
+ break;
+ }
+ }
+ if (idx == s.length()) {
+ return s;
+ }
+ StringBuilder buf = new StringBuilder(s.substring(0, idx));
+ for (; idx < s.length(); idx++) {
+ buf.append(toLower(s.charAt(idx)));
+ }
+ return buf.toString();
+ }
+
+ public static String toUpperString(String s) {
+ int idx = 0;
+ for (; idx < s.length(); idx++) {
+ char c = s.charAt(idx);
+ if (c >= 'a' && c <= 'z') {
+ break;
+ }
+ }
+ if (idx == s.length()) {
+ return s;
+ }
+ StringBuilder buf = new StringBuilder(s.substring(0, idx));
+ for (; idx < s.length(); idx++) {
+ buf.append(toUpper(s.charAt(idx)));
+ }
+ return buf.toString();
+ }
+
+ public static String toTitleString(String s) {
+ if (s.length() == 0) {
+ return s;
+ }
+ int idx = 0;
+ char c = s.charAt(idx);
+ if (!(c >= 'a' && c <= 'z')) {
+ for (idx = 1; idx < s.length(); idx++) {
+ if (c >= 'A' && c <= 'Z') {
+ break;
+ }
+ }
+ }
+ if (idx == s.length()) {
+ return s;
+ }
+ StringBuilder buf = new StringBuilder(s.substring(0, idx));
+ if (idx == 0) {
+ buf.append(toUpper(s.charAt(idx)));
+ idx++;
+ }
+ for (; idx < s.length(); idx++) {
+ buf.append(toLower(s.charAt(idx)));
+ }
+ return buf.toString();
+ }
+
+ public static boolean isAlpha(char c) {
+ return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+ }
+
+ public static boolean isAlphaString(String s) {
+ boolean b = true;
+ for (int i = 0; i < s.length(); i++) {
+ if (!isAlpha(s.charAt(i))) {
+ b = false;
+ break;
+ }
+ }
+ return b;
+ }
+
+ public static boolean isNumeric(char c) {
+ return (c >= '0' && c <= '9');
+ }
+
+ public static boolean isNumericString(String s) {
+ boolean b = true;
+ for (int i = 0; i < s.length(); i++) {
+ if (!isNumeric(s.charAt(i))) {
+ b = false;
+ break;
+ }
+ }
+ return b;
+ }
+
+ public static boolean isAlphaNumeric(char c) {
+ return isAlpha(c) || isNumeric(c);
+ }
+
+ public static boolean isAlphaNumericString(String s) {
+ boolean b = true;
+ for (int i = 0; i < s.length(); i++) {
+ if (!isAlphaNumeric(s.charAt(i))) {
+ b = false;
+ break;
+ }
+ }
+ return b;
+ }
+
+ public static class CaseInsensitiveKey {
+ private String _key;
+ private int _hash;
+
+ public CaseInsensitiveKey(String key) {
+ _key = key;
+ _hash = AsciiUtil.toLowerString(key).hashCode();
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof CaseInsensitiveKey) {
+ return AsciiUtil.caseIgnoreMatch(_key, ((CaseInsensitiveKey)o)._key);
+ }
+ return false;
+ }
+
+ public int hashCode() {
+ return _hash;
+ }
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/math/BigDecimal.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/math/BigDecimal.java
new file mode 100644
index 00000000000..a90150384f7
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/math/BigDecimal.java
@@ -0,0 +1,3880 @@
+/* Generated from 'BigDecimal.nrx' 8 Sep 2000 11:10:50 [v2.00] */
+/* Options: Binary Comments Crossref Format Java Logo Strictargs Strictcase Trace2 Verbose3 */
+package com.ibm.icu.math;
+
+import java.math.BigInteger;
+
+/* ------------------------------------------------------------------ */
+/* BigDecimal -- Decimal arithmetic for Java */
+/* ------------------------------------------------------------------ */
+/* Copyright IBM Corporation, 1996-2011. All Rights Reserved. */
+/* */
+/* The BigDecimal class provides immutable arbitrary-precision */
+/* floating point (including integer) decimal numbers. */
+/* */
+/* As the numbers are decimal, there is an exact correspondence */
+/* between an instance of a BigDecimal object and its String */
+/* representation; the BigDecimal class provides direct conversions */
+/* to and from String and character array objects, and well as */
+/* conversions to and from the Java primitive types (which may not */
+/* be exact). */
+/* ------------------------------------------------------------------ */
+/* Notes: */
+/* */
+/* 1. A BigDecimal object is never changed in value once constructed; */
+/* this avoids the need for locking. Note in particular that the */
+/* mantissa array may be shared between many BigDecimal objects, */
+/* so that once exposed it must not be altered. */
+/* */
+/* 2. This class looks at MathContext class fields directly (for */
+/* performance). It must not and does not change them. */
+/* */
+/* 3. Exponent checking is delayed until finish(), as we know */
+/* intermediate calculations cannot cause 31-bit overflow. */
+/* [This assertion depends on MAX_DIGITS in MathContext.] */
+/* */
+/* 4. Comments for the public API now follow the javadoc conventions. */
+/* The NetRexx -comments option is used to pass these comments */
+/* through to the generated Java code (with -format, if desired). */
+/* */
+/* 5. System.arraycopy is faster than explicit loop as follows */
+/* Mean length 4: equal */
+/* Mean length 8: x2 */
+/* Mean length 16: x3 */
+/* Mean length 24: x4 */
+/* From prior experience, we expect mean length a little below 8, */
+/* but arraycopy is still the one to use, in general, until later */
+/* measurements suggest otherwise. */
+/* */
+/* 6. 'DMSRCN' referred to below is the original (1981) IBM S/370 */
+/* assembler code implementation of the algorithms below; it is */
+/* now called IXXRCN and is available with the OS/390 and VM/ESA */
+/* operating systems. */
+/* ------------------------------------------------------------------ */
+/* Change History: */
+/* 1997.09.02 Initial version (derived from netrexx.lang classes) */
+/* 1997.09.12 Add lostDigits checking */
+/* 1997.10.06 Change mantissa to a byte array */
+/* 1997.11.22 Rework power [did not prepare arguments, etc.] */
+/* 1997.12.13 multiply did not prepare arguments */
+/* 1997.12.14 add did not prepare and align arguments correctly */
+/* 1998.05.02 0.07 packaging changes suggested by Sun and Oracle */
+/* 1998.05.21 adjust remainder operator finalization */
+/* 1998.06.04 rework to pass MathContext to finish() and round() */
+/* 1998.06.06 change format to use round(); support rounding modes */
+/* 1998.06.25 rename to BigDecimal and begin merge */
+/* zero can now have trailing zeros (i.e., exp\=0) */
+/* 1998.06.28 new methods: movePointXxxx, scale, toBigInteger */
+/* unscaledValue, valueof */
+/* 1998.07.01 improve byteaddsub to allow array reuse, etc. */
+/* 1998.07.01 make null testing explicit to avoid JIT bug [Win32] */
+/* 1998.07.07 scaled division [divide(BigDecimal, int, int)] */
+/* 1998.07.08 setScale, faster equals */
+/* 1998.07.11 allow 1E6 (no sign) ; new double/float conversion */
+/* 1998.10.12 change package to com.ibm.icu.math */
+/* 1998.12.14 power operator no longer rounds RHS [to match ANSI] */
+/* add toBigDecimal() and BigDecimal(java.math.BigDecimal) */
+/* 1998.12.29 improve byteaddsub by using table lookup */
+/* 1999.02.04 lostdigits=0 behaviour rounds instead of digits+1 guard */
+/* 1999.02.05 cleaner code for BigDecimal(char[]) */
+/* 1999.02.06 add javadoc comments */
+/* 1999.02.11 format() changed from 7 to 2 method form */
+/* 1999.03.05 null pointer checking is no longer explicit */
+/* 1999.03.05 simplify; changes from discussion with J. Bloch: */
+/* null no longer permitted for MathContext; drop boolean, */
+/* byte, char, float, short constructor, deprecate double */
+/* constructor, no blanks in string constructor, add */
+/* offset and length version of char[] constructor; */
+/* add valueOf(double); drop booleanValue, charValue; */
+/* add ...Exact versions of remaining convertors */
+/* 1999.03.13 add toBigIntegerExact */
+/* 1999.03.13 1.00 release to IBM Centre for Java Technology */
+/* 1999.05.27 1.01 correct 0-0.2 bug under scaled arithmetic */
+/* 1999.06.29 1.02 constructors should not allow exponent > 9 digits */
+/* 1999.07.03 1.03 lost digits should not be checked if digits=0 */
+/* 1999.07.06 lost digits Exception message changed */
+/* 1999.07.10 1.04 more work on 0-0.2 (scaled arithmetic) */
+/* 1999.07.17 improve messages from pow method */
+/* 1999.08.08 performance tweaks */
+/* 1999.08.15 fastpath in multiply */
+/* 1999.11.05 1.05 fix problem in intValueExact [e.g., 5555555555] */
+/* 1999.12.22 1.06 remove multiply fastpath, and improve performance */
+/* 2000.01.01 copyright update [Y2K has arrived] */
+/* 2000.06.18 1.08 no longer deprecate BigDecimal(double) */
+/* ------------------------------------------------------------------ */
+
+/**
+ * The BigDecimal
class implements immutable arbitrary-precision decimal numbers. The methods of the
+ * BigDecimal
class provide operations for fixed and floating point arithmetic, comparison, format
+ * conversions, and hashing.
+ *
+ * As the numbers are decimal, there is an exact correspondence between an instance of a BigDecimal
object
+ * and its String
representation; the BigDecimal
class provides direct conversions to and from
+ * String
and character array (char[]
) objects, as well as conversions to and from the Java
+ * primitive types (which may not be exact) and BigInteger
.
+ *
+ * In the descriptions of constructors and methods in this documentation, the value of a BigDecimal
number
+ * object is shown as the result of invoking the toString()
method on the object. The internal
+ * representation of a decimal number is neither defined nor exposed, and is not permitted to affect the result of any
+ * operation.
+ *
+ * The floating point arithmetic provided by this class is defined by the ANSI X3.274-1996 standard, and is also
+ * documented at http://www2.hursley.ibm.com/decimal
+ * [This URL will change.]
+ *
+ *
Operator methods
+ *
+ * Operations on BigDecimal
numbers are controlled by a {@link MathContext} object, which provides the
+ * context (precision and other information) for the operation. Methods that can take a MathContext
+ * parameter implement the standard arithmetic operators for BigDecimal
objects and are known as
+ * operator methods . The default settings provided by the constant {@link MathContext#DEFAULT} (digits=9,
+ * form=SCIENTIFIC, lostDigits=false, roundingMode=ROUND_HALF_UP
) perform general-purpose floating point
+ * arithmetic to nine digits of precision. The MathContext
parameter must not be null
.
+ *
+ * Each operator method also has a version provided which does not take a MathContext
parameter. For this
+ * version of each method, the context settings used are digits=0,
+ * form=PLAIN, lostDigits=false, roundingMode=ROUND_HALF_UP
; these settings perform fixed point arithmetic with
+ * unlimited precision, as defined for the original BigDecimal class in Java 1.1 and Java 1.2.
+ *
+ * For monadic operators, only the optional MathContext
parameter is present; the operation acts upon the
+ * current object.
+ *
+ * For dyadic operators, a BigDecimal
parameter is always present; it must not be null
. The
+ * operation acts with the current object being the left-hand operand and the BigDecimal
parameter being
+ * the right-hand operand.
+ *
+ * For example, adding two BigDecimal
objects referred to by the names award
and
+ * extra
could be written as any of:
+ *
+ *
+ * award.add(extra)
+ * award.add(extra, MathContext.DEFAULT)
+ * award.add(extra, acontext)
+ *
+ *
+ * (where acontext
is a MathContext
object), which would return a BigDecimal
+ * object whose value is the result of adding award
and extra
under the appropriate context
+ * settings.
+ *
+ * When a BigDecimal
operator method is used, a set of rules define what the result will be (and, by
+ * implication, how the result would be represented as a character string). These rules are defined in the BigDecimal
+ * arithmetic documentation (see the URL above), but in summary:
+ *
+ * Results are normally calculated with up to some maximum number of significant digits. For example, if the
+ * MathContext
parameter for an operation were MathContext.DEFAULT
then the result would be
+ * rounded to 9 digits; the division of 2 by 3 would then result in 0.666666667.
+ * You can change the default of 9 significant digits by providing the method with a suitable MathContext
+ * object. This lets you calculate using as many digits as you need -- thousands, if necessary. Fixed point (scaled)
+ * arithmetic is indicated by using a digits
setting of 0 (or omitting the MathContext
+ * parameter).
+ * Similarly, you can change the algorithm used for rounding from the default "classic" algorithm.
+ *
+ * In standard arithmetic (that is, when the form
setting is not PLAIN
), a zero result is
+ * always expressed as the single digit '0'
(that is, with no sign, decimal point, or exponent part).
+ *
+ * Except for the division and power operators in standard arithmetic, trailing zeros are preserved (this is in contrast
+ * to binary floating point operations and most electronic calculators, which lose the information about trailing zeros
+ * in the fractional part of results).
+ * So, for example:
+ *
+ *
+ * new BigDecimal("2.40").add( new BigDecimal("2")) => "4.40"
+ * new BigDecimal("2.40").subtract(new BigDecimal("2")) => "0.40"
+ * new BigDecimal("2.40").multiply(new BigDecimal("2")) => "4.80"
+ * new BigDecimal("2.40").divide( new BigDecimal("2"), def) => "1.2"
+ *
+ *
+ * where the value on the right of the =>
would be the result of the operation, expressed as a
+ * String
, and def
(in this and following examples) refers to MathContext.DEFAULT
+ * ). This preservation of trailing zeros is desirable for most calculations (including financial calculations). If
+ * necessary, trailing zeros may be easily removed using division by 1.
+ *
+ * In standard arithmetic, exponential form is used for a result depending on its value and the current setting of
+ * digits
(the default is 9 digits). If the number of places needed before the decimal point exceeds the
+ * digits
setting, or the absolute value of the number is less than 0.000001
, then the number
+ * will be expressed in exponential notation; thus
+ *
+ *
+ * new BigDecimal("1e+6").multiply(new BigDecimal("1e+6"), def)
+ *
+ *
+ * results in 1E+12
instead of 1000000000000
, and
+ *
+ *
+ * new BigDecimal("1").divide(new BigDecimal("3E+10"), def)
+ *
+ *
+ * results in 3.33333333E-11
instead of 0.0000000000333333333
.
+ *
+ * The form of the exponential notation (scientific or engineering) is determined by the form
setting.
+ *
+ *
+ * The names of methods in this class follow the conventions established by java.lang.Number
,
+ * java.math.BigInteger
, and java.math.BigDecimal
in Java 1.1 and Java 1.2.
+ *
+ * @see MathContext
+ * @author Mike Cowlishaw
+ * @stable ICU 2.0
+ */
+
+public class BigDecimal extends java.lang.Number implements java.io.Serializable, java.lang.Comparable {
+ // private static final java.lang.String $0="BigDecimal.nrx";
+
+ /* ----- Constants ----- */
+ /* properties constant public */// useful to others
+ /**
+ * The BigDecimal
constant "0".
+ *
+ * @see #ONE
+ * @see #TEN
+ * @stable ICU 2.0
+ */
+ public static final com.ibm.icu.math.BigDecimal ZERO = new com.ibm.icu.math.BigDecimal((long) 0); // use long as we
+ // want the int
+ // constructor
+ // .. to be able to use this, for speed
+
+ /**
+ * The BigDecimal
constant "1".
+ *
+ * @see #TEN
+ * @see #ZERO
+ * @stable ICU 2.0
+ */
+ public static final com.ibm.icu.math.BigDecimal ONE = new com.ibm.icu.math.BigDecimal((long) 1); // use long as we
+ // want the int
+ // constructor
+ // .. to be able to use this, for speed
+
+ /**
+ * The BigDecimal
constant "10".
+ *
+ * @see #ONE
+ * @see #ZERO
+ * @stable ICU 2.0
+ */
+ public static final com.ibm.icu.math.BigDecimal TEN = new com.ibm.icu.math.BigDecimal(10);
+
+ // the rounding modes (copied here for upwards compatibility)
+ /**
+ * Rounding mode to round to a more positive number.
+ *
+ * @see MathContext#ROUND_CEILING
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_CEILING = com.ibm.icu.math.MathContext.ROUND_CEILING;
+
+ /**
+ * Rounding mode to round towards zero.
+ *
+ * @see MathContext#ROUND_DOWN
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_DOWN = com.ibm.icu.math.MathContext.ROUND_DOWN;
+
+ /**
+ * Rounding mode to round to a more negative number.
+ *
+ * @see MathContext#ROUND_FLOOR
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_FLOOR = com.ibm.icu.math.MathContext.ROUND_FLOOR;
+
+ /**
+ * Rounding mode to round to nearest neighbor, where an equidistant value is rounded down.
+ *
+ * @see MathContext#ROUND_HALF_DOWN
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_HALF_DOWN = com.ibm.icu.math.MathContext.ROUND_HALF_DOWN;
+
+ /**
+ * Rounding mode to round to nearest neighbor, where an equidistant value is rounded to the nearest even neighbor.
+ *
+ * @see MathContext#ROUND_HALF_EVEN
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_HALF_EVEN = com.ibm.icu.math.MathContext.ROUND_HALF_EVEN;
+
+ /**
+ * Rounding mode to round to nearest neighbor, where an equidistant value is rounded up.
+ *
+ * @see MathContext#ROUND_HALF_UP
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_HALF_UP = com.ibm.icu.math.MathContext.ROUND_HALF_UP;
+
+ /**
+ * Rounding mode to assert that no rounding is necessary.
+ *
+ * @see MathContext#ROUND_UNNECESSARY
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_UNNECESSARY = com.ibm.icu.math.MathContext.ROUND_UNNECESSARY;
+
+ /**
+ * Rounding mode to round away from zero.
+ *
+ * @see MathContext#ROUND_UP
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_UP = com.ibm.icu.math.MathContext.ROUND_UP;
+
+ /* properties constant private */// locals
+ private static final byte ispos = 1; // ind: indicates positive (must be 1)
+ private static final byte iszero = 0; // ind: indicates zero (must be 0)
+ private static final byte isneg = -1; // ind: indicates negative (must be -1)
+ // [later could add NaN, +/- infinity, here]
+
+ private static final int MinExp = -999999999; // minimum exponent allowed
+ private static final int MaxExp = 999999999; // maximum exponent allowed
+ private static final int MinArg = -999999999; // minimum argument integer
+ private static final int MaxArg = 999999999; // maximum argument integer
+
+ private static final com.ibm.icu.math.MathContext plainMC = new com.ibm.icu.math.MathContext(0,
+ com.ibm.icu.math.MathContext.PLAIN); // context for plain unlimited math
+
+ /* properties constant private unused */// present but not referenced
+ // Serialization version
+ private static final long serialVersionUID = 8245355804974198832L;
+
+ // private static final java.lang.String
+ // copyright=" Copyright (c) IBM Corporation 1996, 2000. All rights reserved. ";
+
+ /* properties static private */
+ // Precalculated constant arrays (used by byteaddsub)
+ private static byte bytecar[] = new byte[(90 + 99) + 1]; // carry/borrow array
+ private static byte bytedig[] = diginit(); // next digit array
+
+ /* ----- Instance properties [all private and immutable] ----- */
+ /* properties private */
+
+ /**
+ * The indicator. This may take the values:
+ *
+ * ispos -- the number is positive iszero -- the number is zero isneg -- the number is negative
+ *
+ *
+ * @serial
+ */
+ private byte ind; // assumed undefined
+ // Note: some code below assumes IND = Sign [-1, 0, 1], at present.
+ // We only need two bits for this, but use a byte [also permits
+ // smooth future extension].
+
+ /**
+ * The formatting style. This may take the values:
+ *
+ * MathContext.PLAIN -- no exponent needed MathContext.SCIENTIFIC -- scientific notation required
+ * MathContext.ENGINEERING -- engineering notation required
+ *
+ *
+ * This property is an optimization; it allows us to defer number layout until it is actually needed as a string,
+ * hence avoiding unnecessary formatting.
+ *
+ * @serial
+ */
+ private byte form = (byte) com.ibm.icu.math.MathContext.PLAIN; // assumed PLAIN
+ // We only need two bits for this, at present, but use a byte
+ // [again, to allow for smooth future extension]
+
+ /**
+ * The value of the mantissa.
+ *
+ * Once constructed, this may become shared between several BigDecimal objects, so must not be altered.
+ *
+ * For efficiency (speed), this is a byte array, with each byte taking a value of 0 -> 9.
+ *
+ * If the first byte is 0 then the value of the number is zero (and mant.length=1, except when constructed from a
+ * plain number, for example, 0.000).
+ *
+ * @serial
+ */
+ private byte mant[]; // assumed null
+
+ /**
+ * The exponent.
+ *
+ * For fixed point arithmetic, scale is -exp
, and can apply to zero.
+ *
+ * Note that this property can have a value less than MinExp when the mantissa has more than one digit.
+ *
+ * @serial
+ */
+ private int exp;
+
+ // assumed 0
+
+ /* ---------------------------------------------------------------- */
+ /* Constructors */
+ /* ---------------------------------------------------------------- */
+
+ /**
+ * Constructs a BigDecimal
object from a java.math.BigDecimal
.
+ *
+ * Constructs a BigDecimal
as though the parameter had been represented as a String
(using
+ * its toString
method) and the {@link #BigDecimal(java.lang.String)} constructor had then been used.
+ * The parameter must not be null
.
+ *
+ * (Note: this constructor is provided only in the com.ibm.icu.math
version of the BigDecimal class.
+ * It would not be present in a java.math
version.)
+ *
+ * @param bd The BigDecimal
to be translated.
+ * @stable ICU 2.0
+ */
+
+ public BigDecimal(java.math.BigDecimal bd) {
+ this(bd.toString());
+ return;
+ }
+
+ /**
+ * Constructs a BigDecimal
object from a BigInteger
, with scale 0.
+ *
+ * Constructs a BigDecimal
which is the exact decimal representation of the BigInteger
,
+ * with a scale of zero. The value of the BigDecimal
is identical to the value of the BigInteger
+ *
. The parameter must not be null
.
+ *
+ * The BigDecimal
will contain only decimal digits, prefixed with a leading minus sign (hyphen) if the
+ * BigInteger
is negative. A leading zero will be present only if the BigInteger
is zero.
+ *
+ * @param bi The BigInteger
to be converted.
+ * @stable ICU 2.0
+ */
+
+ public BigDecimal(java.math.BigInteger bi) {
+ this(bi.toString(10));
+ return;
+ }
+
+ // exp remains 0
+
+ /**
+ * Constructs a BigDecimal
object from a BigInteger
and a scale.
+ *
+ * Constructs a BigDecimal
which is the exact decimal representation of the BigInteger
,
+ * scaled by the second parameter, which may not be negative. The value of the BigDecimal
is the
+ * BigInteger
divided by ten to the power of the scale. The BigInteger
parameter must not be
+ * null
.
+ *
+ * The BigDecimal
will contain only decimal digits, (with an embedded decimal point followed by
+ * scale
decimal digits if the scale is positive), prefixed with a leading minus sign (hyphen) if the
+ * BigInteger
is negative. A leading zero will be present only if the BigInteger
is zero.
+ *
+ * @param bi The BigInteger
to be converted.
+ * @param scale The int
specifying the scale.
+ * @throws NumberFormatException If the scale is negative.
+ * @stable ICU 2.0
+ */
+
+ public BigDecimal(java.math.BigInteger bi, int scale) {
+ this(bi.toString(10));
+ if (scale < 0)
+ throw new java.lang.NumberFormatException("Negative scale:" + " " + scale);
+ exp = -scale; // exponent is -scale
+ return;
+ }
+
+ /**
+ * Constructs a BigDecimal
object from an array of characters.
+ *
+ * Constructs a BigDecimal
as though a String
had been constructed from the character
+ * array and the {@link #BigDecimal(java.lang.String)} constructor had then been used. The parameter must not be
+ * null
.
+ *
+ * Using this constructor is faster than using the BigDecimal(String)
constructor if the string is
+ * already available in character array form.
+ *
+ * @param inchars The char[]
array containing the number to be converted.
+ * @throws NumberFormatException If the parameter is not a valid number.
+ * @stable ICU 2.0
+ */
+
+ public BigDecimal(char inchars[]) {
+ this(inchars, 0, inchars.length);
+ return;
+ }
+
+ /**
+ * Constructs a BigDecimal
object from an array of characters.
+ *
+ * Constructs a BigDecimal
as though a String
had been constructed from the character
+ * array (or a subarray of that array) and the {@link #BigDecimal(java.lang.String)} constructor had then been used.
+ * The first parameter must not be null
, and the subarray must be wholly contained within it.
+ *
+ * Using this constructor is faster than using the BigDecimal(String)
constructor if the string is
+ * already available within a character array.
+ *
+ * @param inchars The char[]
array containing the number to be converted.
+ * @param offset The int
offset into the array of the start of the number to be converted.
+ * @param length The int
length of the number.
+ * @throws NumberFormatException If the parameter is not a valid number for any reason.
+ * @stable ICU 2.0
+ */
+
+ public BigDecimal(char inchars[], int offset, int length) {
+ super();
+ boolean exotic;
+ boolean hadexp;
+ int d;
+ int dotoff;
+ int last;
+ int i = 0;
+ char si = 0;
+ boolean eneg = false;
+ int k = 0;
+ int elen = 0;
+ int j = 0;
+ char sj = 0;
+ int dvalue = 0;
+ int mag = 0;
+ // This is the primary constructor; all incoming strings end up
+ // here; it uses explicit (inline) parsing for speed and to avoid
+ // generating intermediate (temporary) objects of any kind.
+ // 1998.06.25: exponent form built only if E/e in string
+ // 1998.06.25: trailing zeros not removed for zero
+ // 1999.03.06: no embedded blanks; allow offset and length
+ if (length <= 0)
+ bad(inchars); // bad conversion (empty string)
+ // [bad offset will raise array bounds exception]
+
+ /* Handle and step past sign */
+ ind = ispos; // assume positive
+ if (inchars[offset] == ('-')) {
+ length--;
+ if (length == 0)
+ bad(inchars); // nothing after sign
+ ind = isneg;
+ offset++;
+ } else if (inchars[offset] == ('+')) {
+ length--;
+ if (length == 0)
+ bad(inchars); // nothing after sign
+ offset++;
+ }
+
+ /* We're at the start of the number */
+ exotic = false; // have extra digits
+ hadexp = false; // had explicit exponent
+ d = 0; // count of digits found
+ dotoff = -1; // offset where dot was found
+ last = -1; // last character of mantissa
+ {
+ int $1 = length;
+ i = offset;
+ i: for (; $1 > 0; $1--, i++) {
+ si = inchars[i];
+ if (si >= '0') // test for Arabic digit
+ if (si <= '9') {
+ last = i;
+ d++; // still in mantissa
+ continue i;
+ }
+ if (si == '.') { // record and ignore
+ if (dotoff >= 0)
+ bad(inchars); // two dots
+ dotoff = i - offset; // offset into mantissa
+ continue i;
+ }
+ if (si != 'e')
+ if (si != 'E') { // expect an extra digit
+ if ((!(Character.isDigit(si))))
+ bad(inchars); // not a number
+ // defer the base 10 check until later to avoid extra method call
+ exotic = true; // will need conversion later
+ last = i;
+ d++; // still in mantissa
+ continue i;
+ }
+ /* Found 'e' or 'E' -- now process explicit exponent */
+ // 1998.07.11: sign no longer required
+ if ((i - offset) > (length - 2))
+ bad(inchars); // no room for even one digit
+ eneg = false;
+ if ((inchars[i + 1]) == ('-')) {
+ eneg = true;
+ k = i + 2;
+ } else if ((inchars[i + 1]) == ('+'))
+ k = i + 2;
+ else
+ k = i + 1;
+ // k is offset of first expected digit
+ elen = length - ((k - offset)); // possible number of digits
+ if ((elen == 0) | (elen > 9))
+ bad(inchars); // 0 or more than 9 digits
+ {
+ int $2 = elen;
+ j = k;
+ for (; $2 > 0; $2--, j++) {
+ sj = inchars[j];
+ if (sj < '0')
+ bad(inchars); // always bad
+ if (sj > '9') { // maybe an exotic digit
+ if ((!(Character.isDigit(sj))))
+ bad(inchars); // not a number
+ dvalue = Character.digit(sj, 10); // check base
+ if (dvalue < 0)
+ bad(inchars); // not base 10
+ } else
+ dvalue = ((int) (sj)) - ((int) ('0'));
+ exp = (exp * 10) + dvalue;
+ }
+ }/* j */
+ if (eneg)
+ exp = -exp; // was negative
+ hadexp = true; // remember we had one
+ break i; // we are done
+ }
+ }/* i */
+
+ /* Here when all inspected */
+ if (d == 0)
+ bad(inchars); // no mantissa digits
+ if (dotoff >= 0)
+ exp = (exp + dotoff) - d; // adjust exponent if had dot
+
+ /* strip leading zeros/dot (leave final if all 0's) */
+ {
+ int $3 = last - 1;
+ i = offset;
+ i: for (; i <= $3; i++) {
+ si = inchars[i];
+ if (si == '0') {
+ offset++;
+ dotoff--;
+ d--;
+ } else if (si == '.') {
+ offset++; // step past dot
+ dotoff--;
+ } else if (si <= '9')
+ break i;/* non-0 */
+ else {/* exotic */
+ if ((Character.digit(si, 10)) != 0)
+ break i; // non-0 or bad
+ // is 0 .. strip like '0'
+ offset++;
+ dotoff--;
+ d--;
+ }
+ }
+ }/* i */
+
+ /* Create the mantissa array */
+ mant = new byte[d]; // we know the length
+ j = offset; // input offset
+ if (exotic) {
+ do { // slow: check for exotica
+ {
+ int $4 = d;
+ i = 0;
+ for (; $4 > 0; $4--, i++) {
+ if (i == dotoff)
+ j++; // at dot
+ sj = inchars[j];
+ if (sj <= '9')
+ mant[i] = (byte) (((int) (sj)) - ((int) ('0')));/* easy */
+ else {
+ dvalue = Character.digit(sj, 10);
+ if (dvalue < 0)
+ bad(inchars); // not a number after all
+ mant[i] = (byte) dvalue;
+ }
+ j++;
+ }
+ }/* i */
+ } while (false);
+ }/* exotica */
+ else {
+ do {
+ {
+ int $5 = d;
+ i = 0;
+ for (; $5 > 0; $5--, i++) {
+ if (i == dotoff)
+ j++;
+ mant[i] = (byte) (((int) (inchars[j])) - ((int) ('0')));
+ j++;
+ }
+ }/* i */
+ } while (false);
+ }/* simple */
+
+ /* Looks good. Set the sign indicator and form, as needed. */
+ // Trailing zeros are preserved
+ // The rule here for form is:
+ // If no E-notation, then request plain notation
+ // Otherwise act as though add(0,DEFAULT) and request scientific notation
+ // [form is already PLAIN]
+ if (mant[0] == 0) {
+ ind = iszero; // force to show zero
+ // negative exponent is significant (e.g., -3 for 0.000) if plain
+ if (exp > 0)
+ exp = 0; // positive exponent can be ignored
+ if (hadexp) { // zero becomes single digit from add
+ mant = ZERO.mant;
+ exp = 0;
+ }
+ } else { // non-zero
+ // [ind was set earlier]
+ // now determine form
+ if (hadexp) {
+ form = (byte) com.ibm.icu.math.MathContext.SCIENTIFIC;
+ // 1999.06.29 check for overflow
+ mag = (exp + mant.length) - 1; // true exponent in scientific notation
+ if ((mag < MinExp) | (mag > MaxExp))
+ bad(inchars);
+ }
+ }
+ // say 'BD(c[]): mant[0] mantlen exp ind form:' mant[0] mant.length exp ind form
+ return;
+ }
+
+ /**
+ * Constructs a BigDecimal
object directly from a double
.
+ *
+ * Constructs a BigDecimal
which is the exact decimal representation of the 64-bit signed binary
+ * floating point parameter.
+ *
+ * Note that this constructor it an exact conversion; it does not give the same result as converting num
+ *
to a String
using the Double.toString()
method and then using the
+ * {@link #BigDecimal(java.lang.String)} constructor. To get that result, use the static {@link #valueOf(double)}
+ * method to construct a BigDecimal
from a double
.
+ *
+ * @param num The double
to be converted.
+ * @throws NumberFormatException If the parameter is infinite or not a number.
+ * @stable ICU 2.0
+ */
+
+ public BigDecimal(double num) {
+ // 1999.03.06: use exactly the old algorithm
+ // 2000.01.01: note that this constructor does give an exact result,
+ // so perhaps it should not be deprecated
+ // 2000.06.18: no longer deprecated
+ this((new java.math.BigDecimal(num)).toString());
+ return;
+ }
+
+ /**
+ * Constructs a BigDecimal
object directly from a int
.
+ *
+ * Constructs a BigDecimal
which is the exact decimal representation of the 32-bit signed binary
+ * integer parameter. The BigDecimal
will contain only decimal digits, prefixed with a leading minus
+ * sign (hyphen) if the parameter is negative. A leading zero will be present only if the parameter is zero.
+ *
+ * @param num The int
to be converted.
+ * @stable ICU 2.0
+ */
+
+ public BigDecimal(int num) {
+ super();
+ int mun;
+ int i = 0;
+ // We fastpath commoners
+ if (num <= 9)
+ if (num >= (-9)) {
+ do {
+ // very common single digit case
+ {/* select */
+ if (num == 0) {
+ mant = ZERO.mant;
+ ind = iszero;
+ } else if (num == 1) {
+ mant = ONE.mant;
+ ind = ispos;
+ } else if (num == (-1)) {
+ mant = ONE.mant;
+ ind = isneg;
+ } else {
+ {
+ mant = new byte[1];
+ if (num > 0) {
+ mant[0] = (byte) num;
+ ind = ispos;
+ } else { // num<-1
+ mant[0] = (byte) -num;
+ ind = isneg;
+ }
+ }
+ }
+ }
+ return;
+ } while (false);
+ }/* singledigit */
+
+ /* We work on negative numbers so we handle the most negative number */
+ if (num > 0) {
+ ind = ispos;
+ num = -num;
+ } else
+ ind = isneg;/* negative */// [0 case already handled]
+ // [it is quicker, here, to pre-calculate the length with
+ // one loop, then allocate exactly the right length of byte array,
+ // then re-fill it with another loop]
+ mun = num; // working copy
+ {
+ i = 9;
+ i: for (;; i--) {
+ mun = mun / 10;
+ if (mun == 0)
+ break i;
+ }
+ }/* i */
+ // i is the position of the leftmost digit placed
+ mant = new byte[10 - i];
+ {
+ i = (10 - i) - 1;
+ i: for (;; i--) {
+ mant[i] = (byte) -(((byte) (num % 10)));
+ num = num / 10;
+ if (num == 0)
+ break i;
+ }
+ }/* i */
+ return;
+ }
+
+ /**
+ * Constructs a BigDecimal
object directly from a long
.
+ *
+ * Constructs a BigDecimal
which is the exact decimal representation of the 64-bit signed binary
+ * integer parameter. The BigDecimal
will contain only decimal digits, prefixed with a leading minus
+ * sign (hyphen) if the parameter is negative. A leading zero will be present only if the parameter is zero.
+ *
+ * @param num The long
to be converted.
+ * @stable ICU 2.0
+ */
+
+ public BigDecimal(long num) {
+ super();
+ long mun;
+ int i = 0;
+ // Not really worth fastpathing commoners in this constructor [also,
+ // we use this to construct the static constants].
+ // This is much faster than: this(String.valueOf(num).toCharArray())
+ /* We work on negative num so we handle the most negative number */
+ if (num > 0) {
+ ind = ispos;
+ num = -num;
+ } else if (num == 0)
+ ind = iszero;
+ else
+ ind = isneg;/* negative */
+ mun = num;
+ {
+ i = 18;
+ i: for (;; i--) {
+ mun = mun / 10;
+ if (mun == 0)
+ break i;
+ }
+ }/* i */
+ // i is the position of the leftmost digit placed
+ mant = new byte[19 - i];
+ {
+ i = (19 - i) - 1;
+ i: for (;; i--) {
+ mant[i] = (byte) -(((byte) (num % 10)));
+ num = num / 10;
+ if (num == 0)
+ break i;
+ }
+ }/* i */
+ return;
+ }
+
+ /**
+ * Constructs a BigDecimal
object from a String
.
+ *
+ * Constructs a BigDecimal
from the parameter, which must not be null
and must represent a
+ * valid number , as described formally in the documentation referred to {@link BigDecimal above}.
+ *
+ * In summary, numbers in String
form must have at least one digit, may have a leading sign, may have a
+ * decimal point, and exponential notation may be used. They follow conventional syntax, and may not contain blanks.
+ *
+ * Some valid strings from which a BigDecimal
might be constructed are:
+ *
+ *
+ *
+ * "0" -- Zero "12" -- A whole number "-76" -- A signed whole number "12.70" -- Some decimal places "+0.003" -- Plus
+ * sign is allowed "17." -- The same as 17 ".5" -- The same as 0.5 "4E+9" -- Exponential notation "0.73e-7" --
+ * Exponential notation
+ *
+ *
+ *
+ * (Exponential notation means that the number includes an optional sign and a power of ten following an
+ * 'E' that indicates how the decimal point will be shifted. Thus the "4E+9"
above is
+ * just a short way of writing 4000000000
, and the "0.73e-7"
is short for
+ * 0.000000073
.)
+ *
+ * The BigDecimal
constructed from the String is in a standard form, with no blanks, as though the
+ * {@link #add(BigDecimal)} method had been used to add zero to the number with unlimited precision. If the string
+ * uses exponential notation (that is, includes an e
or an E
), then the BigDecimal
+ *
number will be expressed in scientific notation (where the power of ten is adjusted so there is a single
+ * non-zero digit to the left of the decimal point); in this case if the number is zero then it will be expressed as
+ * the single digit 0, and if non-zero it will have an exponent unless that exponent would be 0. The exponent must
+ * fit in nine digits both before and after it is expressed in scientific notation.
+ *
+ * Any digits in the parameter must be decimal; that is, Character.digit(c, 10)
(where c
+ * is the character in question) would not return -1.
+ *
+ * @param string The String
to be converted.
+ * @throws NumberFormatException If the parameter is not a valid number.
+ * @stable ICU 2.0
+ */
+
+ public BigDecimal(java.lang.String string) {
+ this(string.toCharArray(), 0, string.length());
+ return;
+ }
+
+ /* Make a default BigDecimal object for local use. */
+
+ private BigDecimal() {
+ super();
+ return;
+ }
+
+ /* ---------------------------------------------------------------- */
+ /* Operator methods [methods which take a context parameter] */
+ /* ---------------------------------------------------------------- */
+
+ /**
+ * Returns a plain BigDecimal
whose value is the absolute value of this BigDecimal
.
+ *
+ * The same as {@link #abs(MathContext)}, where the context is new MathContext(0, MathContext.PLAIN)
.
+ *
+ * The length of the decimal part (the scale) of the result will be this.scale()
+ *
+ * @return A BigDecimal
whose value is the absolute value of this BigDecimal
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal abs() {
+ return this.abs(plainMC);
+ }
+
+ /**
+ * Returns a BigDecimal
whose value is the absolute value of this BigDecimal
.
+ *
+ * If the current object is zero or positive, then the same result as invoking the {@link #plus(MathContext)} method
+ * with the same parameter is returned. Otherwise, the same result as invoking the {@link #negate(MathContext)}
+ * method with the same parameter is returned.
+ *
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is the absolute value of this BigDecimal
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal abs(com.ibm.icu.math.MathContext set) {
+ if (this.ind == isneg)
+ return this.negate(set);
+ return this.plus(set);
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is this+rhs
, using fixed point arithmetic.
+ *
+ * The same as {@link #add(BigDecimal, MathContext)}, where the BigDecimal
is rhs
, and the
+ * context is new MathContext(0, MathContext.PLAIN)
.
+ *
+ * The length of the decimal part (the scale) of the result will be the maximum of the scales of the two operands.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the addition.
+ * @return A BigDecimal
whose value is this+rhs
, using fixed point arithmetic.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal add(com.ibm.icu.math.BigDecimal rhs) {
+ return this.add(rhs, plainMC);
+ }
+
+ /**
+ * Returns a BigDecimal
whose value is this+rhs
.
+ *
+ * Implements the addition (+
) operator (as defined in the decimal documentation, see
+ * {@link BigDecimal class header}), and returns the result as a BigDecimal
object.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the addition.
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is this+rhs
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal add(com.ibm.icu.math.BigDecimal rhs, com.ibm.icu.math.MathContext set) {
+ com.ibm.icu.math.BigDecimal lhs;
+ int reqdig;
+ com.ibm.icu.math.BigDecimal res;
+ byte usel[];
+ int usellen;
+ byte user[];
+ int userlen;
+ int newlen = 0;
+ int tlen = 0;
+ int mult = 0;
+ byte t[] = null;
+ int ia = 0;
+ int ib = 0;
+ int ea = 0;
+ int eb = 0;
+ byte ca = 0;
+ byte cb = 0;
+ /* determine requested digits and form */
+ if (set.lostDigits)
+ checkdigits(rhs, set.digits);
+ lhs = this; // name for clarity and proxy
+
+ /* Quick exit for add floating 0 */
+ // plus() will optimize to return same object if possible
+ if (lhs.ind == 0)
+ if (set.form != com.ibm.icu.math.MathContext.PLAIN)
+ return rhs.plus(set);
+ if (rhs.ind == 0)
+ if (set.form != com.ibm.icu.math.MathContext.PLAIN)
+ return lhs.plus(set);
+
+ /* Prepare numbers (round, unless unlimited precision) */
+ reqdig = set.digits; // local copy (heavily used)
+ if (reqdig > 0) {
+ if (lhs.mant.length > reqdig)
+ lhs = clone(lhs).round(set);
+ if (rhs.mant.length > reqdig)
+ rhs = clone(rhs).round(set);
+ // [we could reuse the new LHS for result in this case]
+ }
+
+ res = new com.ibm.icu.math.BigDecimal(); // build result here
+
+ /*
+ * Now see how much we have to pad or truncate lhs or rhs in order to align the numbers. If one number is much
+ * larger than the other, then the smaller cannot affect the answer [but we may still need to pad with up to
+ * DIGITS trailing zeros].
+ */
+ // Note sign may be 0 if digits (reqdig) is 0
+ // usel and user will be the byte arrays passed to the adder; we'll
+ // use them on all paths except quick exits
+ usel = lhs.mant;
+ usellen = lhs.mant.length;
+ user = rhs.mant;
+ userlen = rhs.mant.length;
+ {
+ do {/* select */
+ if (lhs.exp == rhs.exp) {/* no padding needed */
+ // This is the most common, and fastest, path
+ res.exp = lhs.exp;
+ } else if (lhs.exp > rhs.exp) { // need to pad lhs and/or truncate rhs
+ newlen = (usellen + lhs.exp) - rhs.exp;
+ /*
+ * If, after pad, lhs would be longer than rhs by digits+1 or more (and digits>0) then rhs cannot
+ * affect answer, so we only need to pad up to a length of DIGITS+1.
+ */
+ if (newlen >= ((userlen + reqdig) + 1))
+ if (reqdig > 0) {
+ // LHS is sufficient
+ res.mant = usel;
+ res.exp = lhs.exp;
+ res.ind = lhs.ind;
+ if (usellen < reqdig) { // need 0 padding
+ res.mant = extend(lhs.mant, reqdig);
+ res.exp = res.exp - ((reqdig - usellen));
+ }
+ return res.finish(set, false);
+ }
+ // RHS may affect result
+ res.exp = rhs.exp; // expected final exponent
+ if (newlen > (reqdig + 1))
+ if (reqdig > 0) {
+ // LHS will be max; RHS truncated
+ tlen = (newlen - reqdig) - 1; // truncation length
+ userlen = userlen - tlen;
+ res.exp = res.exp + tlen;
+ newlen = reqdig + 1;
+ }
+ if (newlen > usellen)
+ usellen = newlen; // need to pad LHS
+ } else { // need to pad rhs and/or truncate lhs
+ newlen = (userlen + rhs.exp) - lhs.exp;
+ if (newlen >= ((usellen + reqdig) + 1))
+ if (reqdig > 0) {
+ // RHS is sufficient
+ res.mant = user;
+ res.exp = rhs.exp;
+ res.ind = rhs.ind;
+ if (userlen < reqdig) { // need 0 padding
+ res.mant = extend(rhs.mant, reqdig);
+ res.exp = res.exp - ((reqdig - userlen));
+ }
+ return res.finish(set, false);
+ }
+ // LHS may affect result
+ res.exp = lhs.exp; // expected final exponent
+ if (newlen > (reqdig + 1))
+ if (reqdig > 0) {
+ // RHS will be max; LHS truncated
+ tlen = (newlen - reqdig) - 1; // truncation length
+ usellen = usellen - tlen;
+ res.exp = res.exp + tlen;
+ newlen = reqdig + 1;
+ }
+ if (newlen > userlen)
+ userlen = newlen; // need to pad RHS
+ }
+ } while (false);
+ }/* padder */
+
+ /* OK, we have aligned mantissas. Now add or subtract. */
+ // 1998.06.27 Sign may now be 0 [e.g., 0.000] .. treat as positive
+ // 1999.05.27 Allow for 00 on lhs [is not larger than 2 on rhs]
+ // 1999.07.10 Allow for 00 on rhs [is not larger than 2 on rhs]
+ if (lhs.ind == iszero)
+ res.ind = ispos;
+ else
+ res.ind = lhs.ind; // likely sign, all paths
+ if (((lhs.ind == isneg) ? 1 : 0) == ((rhs.ind == isneg) ? 1 : 0)) // same sign, 0 non-negative
+ mult = 1;
+ else {
+ do { // different signs, so subtraction is needed
+ mult = -1; // will cause subtract
+ /*
+ * Before we can subtract we must determine which is the larger, as our add/subtract routine only
+ * handles non-negative results so we may need to swap the operands.
+ */
+ {
+ do {/* select */
+ if (rhs.ind == iszero) {
+ // original A bigger
+ } else if ((usellen < userlen) | (lhs.ind == iszero)) { // original B bigger
+ t = usel;
+ usel = user;
+ user = t; // swap
+ tlen = usellen;
+ usellen = userlen;
+ userlen = tlen; // ..
+ res.ind = (byte) -res.ind; // and set sign
+ } else if (usellen > userlen) {
+ // original A bigger
+ } else {
+ {/* logical lengths the same */// need compare
+ /* may still need to swap: compare the strings */
+ ia = 0;
+ ib = 0;
+ ea = usel.length - 1;
+ eb = user.length - 1;
+ {
+ compare: for (;;) {
+ if (ia <= ea)
+ ca = usel[ia];
+ else {
+ if (ib > eb) {/* identical */
+ if (set.form != com.ibm.icu.math.MathContext.PLAIN)
+ return ZERO;
+ // [if PLAIN we must do the subtract, in case of 0.000 results]
+ break compare;
+ }
+ ca = (byte) 0;
+ }
+ if (ib <= eb)
+ cb = user[ib];
+ else
+ cb = (byte) 0;
+ if (ca != cb) {
+ if (ca < cb) {/* swap needed */
+ t = usel;
+ usel = user;
+ user = t; // swap
+ tlen = usellen;
+ usellen = userlen;
+ userlen = tlen; // ..
+ res.ind = (byte) -res.ind;
+ }
+ break compare;
+ }
+ /* mantissas the same, so far */
+ ia++;
+ ib++;
+ }
+ }/* compare */
+ } // lengths the same
+ }
+ } while (false);
+ }/* swaptest */
+ } while (false);
+ }/* signdiff */
+
+ /* here, A is > B if subtracting */
+ // add [A+B*1] or subtract [A+(B*-1)]
+ res.mant = byteaddsub(usel, usellen, user, userlen, mult, false);
+ // [reuse possible only after chop; accounting makes not worthwhile]
+
+ // Finish() rounds before stripping leading 0's, then sets form, etc.
+ return res.finish(set, false);
+ }
+
+ /**
+ * Compares this BigDecimal
to another, using unlimited precision.
+ *
+ * The same as {@link #compareTo(BigDecimal, MathContext)}, where the BigDecimal
is rhs
,
+ * and the context is new MathContext(0, MathContext.PLAIN)
.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the comparison.
+ * @return An int
whose value is -1, 0, or 1 as this
is numerically less than, equal to,
+ * or greater than rhs
.
+ * @stable ICU 2.0
+ */
+
+ public int compareTo(com.ibm.icu.math.BigDecimal rhs) {
+ return this.compareTo(rhs, plainMC);
+ }
+
+ /**
+ * Compares this BigDecimal
to another.
+ *
+ * Implements numeric comparison, (as defined in the decimal documentation, see {@link BigDecimal class header}),
+ * and returns a result of type int
.
+ *
+ * The result will be:
+ *
+ *
+ * -1 if the current object is less than the first parameter
+ *
+ *
+ * 0 if the current object is equal to the first parameter
+ *
+ *
+ * 1 if the current object is greater than the first parameter.
+ *
+ *
+ *
+ * A {@link #compareTo(BigDecimal)} method is also provided.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the comparison.
+ * @param set The MathContext
arithmetic settings.
+ * @return An int
whose value is -1, 0, or 1 as this
is numerically less than, equal to,
+ * or greater than rhs
.
+ * @stable ICU 2.0
+ */
+
+ public int compareTo(com.ibm.icu.math.BigDecimal rhs, com.ibm.icu.math.MathContext set) {
+ int thislength = 0;
+ int i = 0;
+ com.ibm.icu.math.BigDecimal newrhs;
+ // rhs=null will raise NullPointerException, as per Comparable interface
+ if (set.lostDigits)
+ checkdigits(rhs, set.digits);
+ // [add will recheck in slowpath cases .. but would report -rhs]
+ if ((this.ind == rhs.ind) & (this.exp == rhs.exp)) {
+ /* sign & exponent the same [very common] */
+ thislength = this.mant.length;
+ if (thislength < rhs.mant.length)
+ return (byte) -this.ind;
+ if (thislength > rhs.mant.length)
+ return this.ind;
+ /*
+ * lengths are the same; we can do a straight mantissa compare unless maybe rounding [rounding is very
+ * unusual]
+ */
+ if ((thislength <= set.digits) | (set.digits == 0)) {
+ {
+ int $6 = thislength;
+ i = 0;
+ for (; $6 > 0; $6--, i++) {
+ if (this.mant[i] < rhs.mant[i])
+ return (byte) -this.ind;
+ if (this.mant[i] > rhs.mant[i])
+ return this.ind;
+ }
+ }/* i */
+ return 0; // identical
+ }
+ /* drop through for full comparison */
+ } else {
+ /* More fastpaths possible */
+ if (this.ind < rhs.ind)
+ return -1;
+ if (this.ind > rhs.ind)
+ return 1;
+ }
+ /* carry out a subtract to make the comparison */
+ newrhs = clone(rhs); // safe copy
+ newrhs.ind = (byte) -newrhs.ind; // prepare to subtract
+ return this.add(newrhs, set).ind; // add, and return sign of result
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is this/rhs
, using fixed point arithmetic.
+ *
+ * The same as {@link #divide(BigDecimal, int)}, where the BigDecimal
is rhs
, and the
+ * rounding mode is {@link MathContext#ROUND_HALF_UP}.
+ *
+ * The length of the decimal part (the scale) of the result will be the same as the scale of the current object, if
+ * the latter were formatted without exponential notation.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the division.
+ * @return A plain BigDecimal
whose value is this/rhs
, using fixed point arithmetic.
+ * @throws ArithmeticException If rhs
is zero.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal divide(com.ibm.icu.math.BigDecimal rhs) {
+ return this.dodivide('D', rhs, plainMC, -1);
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is this/rhs
, using fixed point arithmetic and a
+ * rounding mode.
+ *
+ * The same as {@link #divide(BigDecimal, int, int)}, where the BigDecimal
is rhs
, and the
+ * second parameter is this.scale()
, and the third is round
.
+ *
+ * The length of the decimal part (the scale) of the result will therefore be the same as the scale of the current
+ * object, if the latter were formatted without exponential notation.
+ *
+ *
+ * @param rhs The BigDecimal
for the right hand side of the division.
+ * @param round The int
rounding mode to be used for the division (see the {@link MathContext} class).
+ * @return A plain BigDecimal
whose value is this/rhs
, using fixed point arithmetic and
+ * the specified rounding mode.
+ * @throws IllegalArgumentException if round
is not a valid rounding mode.
+ * @throws ArithmeticException if rhs
is zero.
+ * @throws ArithmeticException if round
is {@link MathContext#ROUND_UNNECESSARY} and this.scale()
is insufficient to represent the result exactly.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal divide(com.ibm.icu.math.BigDecimal rhs, int round) {
+ com.ibm.icu.math.MathContext set;
+ set = new com.ibm.icu.math.MathContext(0, com.ibm.icu.math.MathContext.PLAIN, false, round); // [checks round,
+ // too]
+ return this.dodivide('D', rhs, set, -1); // take scale from LHS
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is this/rhs
, using fixed point arithmetic and a
+ * given scale and rounding mode.
+ *
+ * The same as {@link #divide(BigDecimal, MathContext)}, where the BigDecimal
is rhs
,
+ * new MathContext(0, MathContext.PLAIN, false, round)
, except that the length of the decimal part (the
+ * scale) to be used for the result is explicit rather than being taken from this
.
+ *
+ * The length of the decimal part (the scale) of the result will be the same as the scale of the current object, if
+ * the latter were formatted without exponential notation.
+ *
+ *
+ * @param rhs The BigDecimal
for the right hand side of the division.
+ * @param scale The int
scale to be used for the result.
+ * @param round The int
rounding mode to be used for the division (see the {@link MathContext} class).
+ * @return A plain BigDecimal
whose value is this/rhs
, using fixed point arithmetic and
+ * the specified rounding mode.
+ * @throws IllegalArgumentException if round
is not a valid rounding mode.
+ * @throws ArithmeticException if rhs
is zero.
+ * @throws ArithmeticException if scale
is negative.
+ * @throws ArithmeticException if round
is {@link MathContext#ROUND_UNNECESSARY} and scale
is insufficient
+ * to represent the result exactly.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal divide(com.ibm.icu.math.BigDecimal rhs, int scale, int round) {
+ com.ibm.icu.math.MathContext set;
+ if (scale < 0)
+ throw new java.lang.ArithmeticException("Negative scale:" + " " + scale);
+ set = new com.ibm.icu.math.MathContext(0, com.ibm.icu.math.MathContext.PLAIN, false, round); // [checks round]
+ return this.dodivide('D', rhs, set, scale);
+ }
+
+ /**
+ * Returns a BigDecimal
whose value is this/rhs
.
+ *
+ * Implements the division (/
) operator (as defined in the decimal documentation, see
+ * {@link BigDecimal class header}), and returns the result as a BigDecimal
object.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the division.
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is this/rhs
.
+ * @throws ArithmeticException if rhs
is zero.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal divide(com.ibm.icu.math.BigDecimal rhs, com.ibm.icu.math.MathContext set) {
+ return this.dodivide('D', rhs, set, -1);
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is the integer part of this/rhs
.
+ *
+ * The same as {@link #divideInteger(BigDecimal, MathContext)}, where the BigDecimal
is rhs
+ *
, and the context is new MathContext(0, MathContext.PLAIN)
.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the integer division.
+ * @return A BigDecimal
whose value is the integer part of this/rhs
.
+ * @throws ArithmeticException if rhs
is zero.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal divideInteger(com.ibm.icu.math.BigDecimal rhs) {
+ // scale 0 to drop .000 when plain
+ return this.dodivide('I', rhs, plainMC, 0);
+ }
+
+ /**
+ * Returns a BigDecimal
whose value is the integer part of this/rhs
.
+ *
+ * Implements the integer division operator (as defined in the decimal documentation, see {@link BigDecimal class
+ * header}), and returns the result as a BigDecimal
object.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the integer division.
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is the integer part of this/rhs
.
+ * @throws ArithmeticException if rhs
is zero.
+ * @throws ArithmeticException if the result will not fit in the number of digits specified for the context.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal divideInteger(com.ibm.icu.math.BigDecimal rhs, com.ibm.icu.math.MathContext set) {
+ // scale 0 to drop .000 when plain
+ return this.dodivide('I', rhs, set, 0);
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is the maximum of this
and rhs
.
+ *
+ * The same as {@link #max(BigDecimal, MathContext)}, where the BigDecimal
is rhs
, and the
+ * context is new MathContext(0, MathContext.PLAIN)
.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the comparison.
+ * @return A BigDecimal
whose value is the maximum of this
and rhs
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal max(com.ibm.icu.math.BigDecimal rhs) {
+ return this.max(rhs, plainMC);
+ }
+
+ /**
+ * Returns a BigDecimal
whose value is the maximum of this
and rhs
.
+ *
+ * Returns the larger of the current object and the first parameter.
+ *
+ * If calling the {@link #compareTo(BigDecimal, MathContext)} method with the same parameters would return 1
+ *
or 0
, then the result of calling the {@link #plus(MathContext)} method on the current object
+ * (using the same MathContext
parameter) is returned. Otherwise, the result of calling the
+ * {@link #plus(MathContext)} method on the first parameter object (using the same MathContext
+ * parameter) is returned.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the comparison.
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is the maximum of this
and rhs
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal max(com.ibm.icu.math.BigDecimal rhs, com.ibm.icu.math.MathContext set) {
+ if ((this.compareTo(rhs, set)) >= 0)
+ return this.plus(set);
+ else
+ return rhs.plus(set);
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is the minimum of this
and rhs
.
+ *
+ * The same as {@link #min(BigDecimal, MathContext)}, where the BigDecimal
is rhs
, and the
+ * context is new MathContext(0, MathContext.PLAIN)
.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the comparison.
+ * @return A BigDecimal
whose value is the minimum of this
and rhs
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal min(com.ibm.icu.math.BigDecimal rhs) {
+ return this.min(rhs, plainMC);
+ }
+
+ /**
+ * Returns a BigDecimal
whose value is the minimum of this
and rhs
.
+ *
+ * Returns the smaller of the current object and the first parameter.
+ *
+ * If calling the {@link #compareTo(BigDecimal, MathContext)} method with the same parameters would return -1
+ *
or 0
, then the result of calling the {@link #plus(MathContext)} method on the current object
+ * (using the same MathContext
parameter) is returned. Otherwise, the result of calling the
+ * {@link #plus(MathContext)} method on the first parameter object (using the same MathContext
+ * parameter) is returned.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the comparison.
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is the minimum of this
and rhs
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal min(com.ibm.icu.math.BigDecimal rhs, com.ibm.icu.math.MathContext set) {
+ if ((this.compareTo(rhs, set)) <= 0)
+ return this.plus(set);
+ else
+ return rhs.plus(set);
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is this*rhs
, using fixed point arithmetic.
+ *
+ * The same as {@link #add(BigDecimal, MathContext)}, where the BigDecimal
is rhs
, and the
+ * context is new MathContext(0, MathContext.PLAIN)
.
+ *
+ * The length of the decimal part (the scale) of the result will be the sum of the scales of the operands, if they
+ * were formatted without exponential notation.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the multiplication.
+ * @return A BigDecimal
whose value is this*rhs
, using fixed point arithmetic.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal multiply(com.ibm.icu.math.BigDecimal rhs) {
+ return this.multiply(rhs, plainMC);
+ }
+
+ /**
+ * Returns a BigDecimal
whose value is this*rhs
.
+ *
+ * Implements the multiplication (
) operator (as defined in the decimal documentation, see
+ * {@link BigDecimal class header}), and returns the result as a BigDecimal
object.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the multiplication.
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is this*rhs
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal multiply(com.ibm.icu.math.BigDecimal rhs, com.ibm.icu.math.MathContext set) {
+ com.ibm.icu.math.BigDecimal lhs;
+ int padding;
+ int reqdig;
+ byte multer[] = null;
+ byte multand[] = null;
+ int multandlen;
+ int acclen = 0;
+ com.ibm.icu.math.BigDecimal res;
+ byte acc[];
+ int n = 0;
+ byte mult = 0;
+ if (set.lostDigits)
+ checkdigits(rhs, set.digits);
+ lhs = this; // name for clarity and proxy
+
+ /* Prepare numbers (truncate, unless unlimited precision) */
+ padding = 0; // trailing 0's to add
+ reqdig = set.digits; // local copy
+ if (reqdig > 0) {
+ if (lhs.mant.length > reqdig)
+ lhs = clone(lhs).round(set);
+ if (rhs.mant.length > reqdig)
+ rhs = clone(rhs).round(set);
+ // [we could reuse the new LHS for result in this case]
+ } else {/* unlimited */
+ // fixed point arithmetic will want every trailing 0; we add these
+ // after the calculation rather than before, for speed.
+ if (lhs.exp > 0)
+ padding = padding + lhs.exp;
+ if (rhs.exp > 0)
+ padding = padding + rhs.exp;
+ }
+
+ // For best speed, as in DMSRCN, we use the shorter number as the
+ // multiplier and the longer as the multiplicand.
+ // 1999.12.22: We used to special case when the result would fit in
+ // a long, but with Java 1.3 this gave no advantage.
+ if (lhs.mant.length < rhs.mant.length) {
+ multer = lhs.mant;
+ multand = rhs.mant;
+ } else {
+ multer = rhs.mant;
+ multand = lhs.mant;
+ }
+
+ /* Calculate how long result byte array will be */
+ multandlen = (multer.length + multand.length) - 1; // effective length
+ // optimize for 75% of the cases where a carry is expected...
+ if ((multer[0] * multand[0]) > 9)
+ acclen = multandlen + 1;
+ else
+ acclen = multandlen;
+
+ /* Now the main long multiplication loop */
+ res = new com.ibm.icu.math.BigDecimal(); // where we'll build result
+ acc = new byte[acclen]; // accumulator, all zeros
+ // 1998.07.01: calculate from left to right so that accumulator goes
+ // to likely final length on first addition; this avoids a one-digit
+ // extension (and object allocation) each time around the loop.
+ // Initial number therefore has virtual zeros added to right.
+ {
+ int $7 = multer.length;
+ n = 0;
+ for (; $7 > 0; $7--, n++) {
+ mult = multer[n];
+ if (mult != 0) { // [optimization]
+ // accumulate [accumulator is reusable array]
+ acc = byteaddsub(acc, acc.length, multand, multandlen, mult, true);
+ }
+ // divide multiplicand by 10 for next digit to right
+ multandlen--; // 'virtual length'
+ }
+ }/* n */
+
+ res.ind = (byte) (lhs.ind * rhs.ind); // final sign
+ res.exp = (lhs.exp + rhs.exp) - padding; // final exponent
+ // [overflow is checked by finish]
+
+ /* add trailing zeros to the result, if necessary */
+ if (padding == 0)
+ res.mant = acc;
+ else
+ res.mant = extend(acc, acc.length + padding); // add trailing 0s
+ return res.finish(set, false);
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is -this
.
+ *
+ * The same as {@link #negate(MathContext)}, where the context is new MathContext(0, MathContext.PLAIN)
+ * .
+ *
+ * The length of the decimal part (the scale) of the result will be be this.scale()
+ *
+ *
+ * @return A BigDecimal
whose value is -this
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal negate() {
+ return this.negate(plainMC);
+ }
+
+ /**
+ * Returns a BigDecimal
whose value is -this
.
+ *
+ * Implements the negation (Prefix -
) operator (as defined in the decimal documentation, see
+ * {@link BigDecimal class header}), and returns the result as a BigDecimal
object.
+ *
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is -this
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal negate(com.ibm.icu.math.MathContext set) {
+ com.ibm.icu.math.BigDecimal res;
+ // Originally called minus(), changed to matched Java precedents
+ // This simply clones, flips the sign, and possibly rounds
+ if (set.lostDigits)
+ checkdigits((com.ibm.icu.math.BigDecimal) null, set.digits);
+ res = clone(this); // safe copy
+ res.ind = (byte) -res.ind;
+ return res.finish(set, false);
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is +this
. Note that this
is not
+ * necessarily a plain BigDecimal
, but the result will always be.
+ *
+ * The same as {@link #plus(MathContext)}, where the context is new MathContext(0, MathContext.PLAIN)
.
+ *
+ * The length of the decimal part (the scale) of the result will be be this.scale()
+ *
+ * @return A BigDecimal
whose value is +this
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal plus() {
+ return this.plus(plainMC);
+ }
+
+ /**
+ * Returns a BigDecimal
whose value is +this
.
+ *
+ * Implements the plus (Prefix +
) operator (as defined in the decimal documentation, see
+ * {@link BigDecimal class header}), and returns the result as a BigDecimal
object.
+ *
+ * This method is useful for rounding or otherwise applying a context to a decimal value.
+ *
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is +this
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal plus(com.ibm.icu.math.MathContext set) {
+ // This clones and forces the result to the new settings
+ // May return same object
+ if (set.lostDigits)
+ checkdigits((com.ibm.icu.math.BigDecimal) null, set.digits);
+ // Optimization: returns same object for some common cases
+ if (set.form == com.ibm.icu.math.MathContext.PLAIN)
+ if (this.form == com.ibm.icu.math.MathContext.PLAIN) {
+ if (this.mant.length <= set.digits)
+ return this;
+ if (set.digits == 0)
+ return this;
+ }
+ return clone(this).finish(set, false);
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is this**rhs
, using fixed point arithmetic.
+ *
+ * The same as {@link #pow(BigDecimal, MathContext)}, where the BigDecimal
is rhs
, and the
+ * context is new MathContext(0, MathContext.PLAIN)
.
+ *
+ * The parameter is the power to which the this
will be raised; it must be in the range 0 through
+ * 999999999, and must have a decimal part of zero. Note that these restrictions may be removed in the future, so
+ * they should not be used as a test for a whole number.
+ *
+ * In addition, the power must not be negative, as no MathContext
is used and so the result would then
+ * always be 0.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the operation (the power).
+ * @return A BigDecimal
whose value is this**rhs
, using fixed point arithmetic.
+ * @throws ArithmeticException if rhs
is out of range or is not a whole number.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal pow(com.ibm.icu.math.BigDecimal rhs) {
+ return this.pow(rhs, plainMC);
+ }
+
+ // The name for this method is inherited from the precedent set by the
+ // BigInteger and Math classes.
+
+ /**
+ * Returns a BigDecimal
whose value is this**rhs
.
+ *
+ * Implements the power (
) operator (as defined in the decimal documentation, see
+ * {@link BigDecimal class header}), and returns the result as a BigDecimal
object.
+ *
+ * The first parameter is the power to which the this
will be raised; it must be in the range
+ * -999999999 through 999999999, and must have a decimal part of zero. Note that these restrictions may be removed
+ * in the future, so they should not be used as a test for a whole number.
+ *
+ * If the digits
setting of the MathContext
parameter is 0, the power must be zero or
+ * positive.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the operation (the power).
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is this**rhs
.
+ * @throws ArithmeticException if rhs
is out of range or is not a whole number.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal pow(com.ibm.icu.math.BigDecimal rhs, com.ibm.icu.math.MathContext set) {
+ int n;
+ com.ibm.icu.math.BigDecimal lhs;
+ int reqdig;
+ int workdigits = 0;
+ int L = 0;
+ com.ibm.icu.math.MathContext workset;
+ com.ibm.icu.math.BigDecimal res;
+ boolean seenbit;
+ int i = 0;
+ if (set.lostDigits)
+ checkdigits(rhs, set.digits);
+ n = rhs.intcheck(MinArg, MaxArg); // check RHS by the rules
+ lhs = this; // clarified name
+
+ reqdig = set.digits; // local copy (heavily used)
+ if (reqdig == 0) {
+ if (rhs.ind == isneg)
+ throw new java.lang.ArithmeticException("Negative power:" + " " + rhs.toString());
+ workdigits = 0;
+ } else {/* non-0 digits */
+ if ((rhs.mant.length + rhs.exp) > reqdig)
+ throw new java.lang.ArithmeticException("Too many digits:" + " " + rhs.toString());
+
+ /* Round the lhs to DIGITS if need be */
+ if (lhs.mant.length > reqdig)
+ lhs = clone(lhs).round(set);
+
+ /* L for precision calculation [see ANSI X3.274-1996] */
+ L = rhs.mant.length + rhs.exp; // length without decimal zeros/exp
+ workdigits = (reqdig + L) + 1; // calculate the working DIGITS
+ }
+
+ /* Create a copy of set for working settings */
+ // Note: no need to check for lostDigits again.
+ // 1999.07.17 Note: this construction must follow RHS check
+ workset = new com.ibm.icu.math.MathContext(workdigits, set.form, false, set.roundingMode);
+
+ res = ONE; // accumulator
+ if (n == 0)
+ return res; // x**0 == 1
+ if (n < 0)
+ n = -n; // [rhs.ind records the sign]
+ seenbit = false; // set once we've seen a 1-bit
+ {
+ i = 1;
+ i: for (;; i++) { // for each bit [top bit ignored]
+ n = n + n; // shift left 1 bit
+ if (n < 0) { // top bit is set
+ seenbit = true; // OK, we're off
+ res = res.multiply(lhs, workset); // acc=acc*x
+ }
+ if (i == 31)
+ break i; // that was the last bit
+ if ((!seenbit))
+ continue i; // we don't have to square 1
+ res = res.multiply(res, workset); // acc=acc*acc [square]
+ }
+ }/* i */// 32 bits
+ if (rhs.ind < 0) // was a **-n [hence digits>0]
+ res = ONE.divide(res, workset); // .. so acc=1/acc
+ return res.finish(set, true); // round and strip [original digits]
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is the remainder of this/rhs
, using fixed point
+ * arithmetic.
+ *
+ * The same as {@link #remainder(BigDecimal, MathContext)}, where the BigDecimal
is rhs
,
+ * and the context is new MathContext(0, MathContext.PLAIN)
.
+ *
+ * This is not the modulo operator -- the result may be negative.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the remainder operation.
+ * @return A BigDecimal
whose value is the remainder of this/rhs
, using fixed point
+ * arithmetic.
+ * @throws ArithmeticException if rhs
is zero.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal remainder(com.ibm.icu.math.BigDecimal rhs) {
+ return this.dodivide('R', rhs, plainMC, -1);
+ }
+
+ /**
+ * Returns a BigDecimal
whose value is the remainder of this/rhs
.
+ *
+ * Implements the remainder operator (as defined in the decimal documentation, see {@link BigDecimal class header}),
+ * and returns the result as a BigDecimal
object.
+ *
+ * This is not the modulo operator -- the result may be negative.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the remainder operation.
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is the remainder of this+rhs
.
+ * @throws ArithmeticException if rhs
is zero.
+ * @throws ArithmeticException if the integer part of the result will not fit in the number of digits specified for the context.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal remainder(com.ibm.icu.math.BigDecimal rhs, com.ibm.icu.math.MathContext set) {
+ return this.dodivide('R', rhs, set, -1);
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose value is this-rhs
, using fixed point arithmetic.
+ *
+ * The same as {@link #subtract(BigDecimal, MathContext)}, where the BigDecimal
is rhs
,
+ * and the context is new MathContext(0, MathContext.PLAIN)
.
+ *
+ * The length of the decimal part (the scale) of the result will be the maximum of the scales of the two operands.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the subtraction.
+ * @return A BigDecimal
whose value is this-rhs
, using fixed point arithmetic.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal subtract(com.ibm.icu.math.BigDecimal rhs) {
+ return this.subtract(rhs, plainMC);
+ }
+
+ /**
+ * Returns a BigDecimal
whose value is this-rhs
.
+ *
+ * Implements the subtraction (-
) operator (as defined in the decimal documentation, see
+ * {@link BigDecimal class header}), and returns the result as a BigDecimal
object.
+ *
+ * @param rhs The BigDecimal
for the right hand side of the subtraction.
+ * @param set The MathContext
arithmetic settings.
+ * @return A BigDecimal
whose value is this-rhs
.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal subtract(com.ibm.icu.math.BigDecimal rhs, com.ibm.icu.math.MathContext set) {
+ com.ibm.icu.math.BigDecimal newrhs;
+ if (set.lostDigits)
+ checkdigits(rhs, set.digits);
+ // [add will recheck .. but would report -rhs]
+ /* carry out the subtraction */
+ // we could fastpath -0, but it is too rare.
+ newrhs = clone(rhs); // safe copy
+ newrhs.ind = (byte) -newrhs.ind; // prepare to subtract
+ return this.add(newrhs, set); // arithmetic
+ }
+
+ /* ---------------------------------------------------------------- */
+ /* Other methods */
+ /* ---------------------------------------------------------------- */
+
+ /**
+ * Converts this BigDecimal
to a byte
. If the BigDecimal
has a non-zero
+ * decimal part or is out of the possible range for a byte
(8-bit signed integer) result then an
+ * ArithmeticException
is thrown.
+ *
+ * @return A byte
equal in value to this
.
+ * @throws ArithmeticException if this
has a non-zero decimal part, or will not fit in a byte
.
+ * @stable ICU 2.0
+ */
+
+ public byte byteValueExact() {
+ int num;
+ num = this.intValueExact(); // will check decimal part too
+ if ((num > 127) | (num < (-128)))
+ throw new java.lang.ArithmeticException("Conversion overflow:" + " " + this.toString());
+ return (byte) num;
+ }
+
+ /**
+ * Converts this BigDecimal
to a double
. If the BigDecimal
is out of the
+ * possible range for a double
(64-bit signed floating point) result then an ArithmeticException
+ *
is thrown.
+ *
+ * The double produced is identical to result of expressing the BigDecimal
as a String
and
+ * then converting it using the Double(String)
constructor; this can result in values of
+ * Double.NEGATIVE_INFINITY
or Double.POSITIVE_INFINITY
.
+ *
+ * @return A double
corresponding to this
.
+ * @stable ICU 2.0
+ */
+
+ public double doubleValue() {
+ // We go via a String [as does BigDecimal in JDK 1.2]
+ // Next line could possibly raise NumberFormatException
+ return java.lang.Double.valueOf(this.toString()).doubleValue();
+ }
+
+ /**
+ * Compares this BigDecimal
with rhs
for equality.
+ *
+ * If the parameter is null
, or is not an instance of the BigDecimal type, or is not exactly equal to
+ * the current BigDecimal
object, then false is returned. Otherwise, true is returned.
+ *
+ * "Exactly equal", here, means that the String
representations of the BigDecimal
numbers
+ * are identical (they have the same characters in the same sequence).
+ *
+ * The {@link #compareTo(BigDecimal, MathContext)} method should be used for more general comparisons.
+ *
+ * @param obj The Object
for the right hand side of the comparison.
+ * @return A boolean
whose value true if and only if the operands have identical string
+ * representations.
+ * @throws ClassCastException if rhs
cannot be cast to a BigDecimal
object.
+ * @stable ICU 2.0
+ * @see #compareTo(BigDecimal)
+ * @see #compareTo(BigDecimal, MathContext)
+ */
+
+ public boolean equals(java.lang.Object obj) {
+ com.ibm.icu.math.BigDecimal rhs;
+ int i = 0;
+ char lca[] = null;
+ char rca[] = null;
+ // We are equal iff toString of both are exactly the same
+ if (obj == null)
+ return false; // not equal
+ if ((!(((obj instanceof com.ibm.icu.math.BigDecimal)))))
+ return false; // not a decimal
+ rhs = (com.ibm.icu.math.BigDecimal) obj; // cast; we know it will work
+ if (this.ind != rhs.ind)
+ return false; // different signs never match
+ if (((this.mant.length == rhs.mant.length) & (this.exp == rhs.exp)) & (this.form == rhs.form))
+
+ { // mantissas say all
+ // here with equal-length byte arrays to compare
+ {
+ int $8 = this.mant.length;
+ i = 0;
+ for (; $8 > 0; $8--, i++) {
+ if (this.mant[i] != rhs.mant[i])
+ return false;
+ }
+ }/* i */
+ } else { // need proper layout
+ lca = this.layout(); // layout to character array
+ rca = rhs.layout();
+ if (lca.length != rca.length)
+ return false; // mismatch
+ // here with equal-length character arrays to compare
+ {
+ int $9 = lca.length;
+ i = 0;
+ for (; $9 > 0; $9--, i++) {
+ if (lca[i] != rca[i])
+ return false;
+ }
+ }/* i */
+ }
+ return true; // arrays have identical content
+ }
+
+ /**
+ * Converts this BigDecimal
to a float
. If the BigDecimal
is out of the
+ * possible range for a float
(32-bit signed floating point) result then an ArithmeticException
+ *
is thrown.
+ *
+ * The float produced is identical to result of expressing the BigDecimal
as a String
and
+ * then converting it using the Float(String)
constructor; this can result in values of
+ * Float.NEGATIVE_INFINITY
or Float.POSITIVE_INFINITY
.
+ *
+ * @return A float
corresponding to this
.
+ * @stable ICU 2.0
+ */
+
+ public float floatValue() {
+ return java.lang.Float.valueOf(this.toString()).floatValue();
+ }
+
+ /**
+ * Returns the String
representation of this BigDecimal
, modified by layout parameters.
+ *
+ * This method is provided as a primitive for use by more sophisticated classes, such as DecimalFormat
+ *
, that can apply locale-sensitive editing of the result. The level of formatting that it provides is a
+ * necessary part of the BigDecimal class as it is sensitive to and must follow the calculation and rounding rules
+ * for BigDecimal arithmetic. However, if the function is provided elsewhere, it may be removed from this class.
+ *
+ *
+ * The parameters, for both forms of the format
method are all of type int
. A value of -1
+ * for any parameter indicates that the default action or value for that parameter should be used.
+ *
+ * The parameters, before
and after
, specify the number of characters to be used for the
+ * integer part and decimal part of the result respectively. Exponential notation is not used. If either parameter
+ * is -1 (which indicates the default action), the number of characters used will be exactly as many as are needed
+ * for that part.
+ *
+ * before
must be a positive number; if it is larger than is needed to contain the integer part, that
+ * part is padded on the left with blanks to the requested length. If before
is not large enough to
+ * contain the integer part of the number (including the sign, for negative numbers) an exception is thrown.
+ *
+ * after
must be a non-negative number; if it is not the same size as the decimal part of the number,
+ * the number will be rounded (or extended with zeros) to fit. Specifying 0 for after
will cause the
+ * number to be rounded to an integer (that is, it will have no decimal part or decimal point). The rounding method
+ * will be the default, MathContext.ROUND_HALF_UP
.
+ *
+ * Other rounding methods, and the use of exponential notation, can be selected by using
+ * {@link #format(int,int,int,int,int,int)}. Using the two-parameter form of the method has exactly the same effect
+ * as using the six-parameter form with the final four parameters all being -1.
+ *
+ * @param before The int
specifying the number of places before the decimal point. Use -1 for 'as many as are needed'.
+ * @param after The int
specifying the number of places after the decimal point. Use -1 for 'as many as are needed'.
+ * @return A String
representing this BigDecimal
, laid out according to the specified parameters
+ * @throws ArithmeticException if the number cannot be laid out as requested.
+ * @throws IllegalArgumentException if a parameter is out of range.
+ * @stable ICU 2.0
+ * @see #toString
+ * @see #toCharArray
+ */
+
+ public java.lang.String format(int before, int after) {
+ return format(before, after, -1, -1, com.ibm.icu.math.MathContext.SCIENTIFIC, ROUND_HALF_UP);
+ }
+
+ /**
+ * Returns the String
representation of this BigDecimal
, modified by layout parameters and
+ * allowing exponential notation.
+ *
+ * This method is provided as a primitive for use by more sophisticated classes, such as DecimalFormat
+ *
, that can apply locale-sensitive editing of the result. The level of formatting that it provides is a
+ * necessary part of the BigDecimal class as it is sensitive to and must follow the calculation and rounding rules
+ * for BigDecimal arithmetic. However, if the function is provided elsewhere, it may be removed from this class.
+ *
+ *
+ * The parameters are all of type int
. A value of -1 for any parameter indicates that the default
+ * action or value for that parameter should be used.
+ *
+ * The first two parameters (before
and after
) specify the number of characters to be used
+ * for the integer part and decimal part of the result respectively, as defined for {@link #format(int,int)}. If
+ * either of these is -1 (which indicates the default action), the number of characters used will be exactly as many
+ * as are needed for that part.
+ *
+ * The remaining parameters control the use of exponential notation and rounding. Three (explaces
,
+ * exdigits
, and exform
) control the exponent part of the result. As before, the default
+ * action for any of these parameters may be selected by using the value -1.
+ *
+ * explaces
must be a positive number; it sets the number of places (digits after the sign of the
+ * exponent) to be used for any exponent part, the default (when explaces
is -1) being to use as many
+ * as are needed. If explaces
is not -1, space is always reserved for an exponent; if one is not needed
+ * (for example, if the exponent will be 0) then explaces
+2 blanks are appended to the result. If explaces
+ *
is not -1 and is not large enough to contain the exponent, an exception is thrown.
+ *
+ * exdigits
sets the trigger point for use of exponential notation. If, before any rounding, the number
+ * of places needed before the decimal point exceeds exdigits
, or if the absolute value of the result
+ * is less than 0.000001
, then exponential form will be used, provided that exdigits
was
+ * specified. When exdigits
is -1, exponential notation will never be used. If 0 is specified for
+ * exdigits
, exponential notation is always used unless the exponent would be 0.
+ *
+ * exform
sets the form for exponential notation (if needed). It may be either
+ * {@link MathContext#SCIENTIFIC} or {@link MathContext#ENGINEERING}. If the latter, engineering, form is requested,
+ * up to three digits (plus sign, if negative) may be needed for the integer part of the result (before
+ * ). Otherwise, only one digit (plus sign, if negative) is needed.
+ *
+ * Finally, the sixth argument, exround
, selects the rounding algorithm to be used, and must be one of
+ * the values indicated by a public constant in the {@link MathContext} class whose name starts with ROUND_
+ *
. The default (ROUND_HALF_UP
) may also be selected by using the value -1, as before.
+ *
+ * The special value MathContext.ROUND_UNNECESSARY
may be used to detect whether non-zero digits are
+ * discarded -- if exround
has this value than if non-zero digits would be discarded (rounded) during
+ * formatting then an ArithmeticException
is thrown.
+ *
+ * @param before The int
specifying the number of places before the decimal point. Use -1 for 'as many as
+ * are needed'.
+ * @param after The int
specifying the number of places after the decimal point. Use -1 for 'as many as
+ * are needed'.
+ * @param explaces The int
specifying the number of places to be used for any exponent. Use -1 for 'as many
+ * as are needed'.
+ * @param exdigits The int
specifying the trigger (digits before the decimal point) which if exceeded causes
+ * exponential notation to be used. Use 0 to force exponential notation. Use -1 to force plain notation
+ * (no exponential notation).
+ * @param exformint The int
specifying the form of exponential notation to be used (
+ * {@link MathContext#SCIENTIFIC} or {@link MathContext#ENGINEERING}).
+ * @param exround The int
specifying the rounding mode to use. Use -1 for the default,
+ * {@link MathContext#ROUND_HALF_UP}.
+ * @return A String
representing this BigDecimal
, laid out according to the specified
+ * parameters
+ * @throws ArithmeticException if the number cannot be laid out as requested.
+ * @throws IllegalArgumentException if a parameter is out of range.
+ * @see #toString
+ * @see #toCharArray
+ * @stable ICU 2.0
+ */
+
+ public java.lang.String format(int before, int after, int explaces, int exdigits, int exformint, int exround) {
+ com.ibm.icu.math.BigDecimal num;
+ int mag = 0;
+ int thisafter = 0;
+ int lead = 0;
+ byte newmant[] = null;
+ int chop = 0;
+ int need = 0;
+ int oldexp = 0;
+ char a[];
+ int p = 0;
+ char newa[] = null;
+ int i = 0;
+ int places = 0;
+
+ /* Check arguments */
+ if ((before < (-1)) | (before == 0))
+ badarg("format", 1, java.lang.String.valueOf(before));
+ if (after < (-1))
+ badarg("format", 2, java.lang.String.valueOf(after));
+ if ((explaces < (-1)) | (explaces == 0))
+ badarg("format", 3, java.lang.String.valueOf(explaces));
+ if (exdigits < (-1))
+ badarg("format", 4, java.lang.String.valueOf(explaces));
+ {/* select */
+ if (exformint == com.ibm.icu.math.MathContext.SCIENTIFIC) {
+ } else if (exformint == com.ibm.icu.math.MathContext.ENGINEERING) {
+ } else if (exformint == (-1))
+ exformint = com.ibm.icu.math.MathContext.SCIENTIFIC;
+ // note PLAIN isn't allowed
+ else {
+ badarg("format", 5, java.lang.String.valueOf(exformint));
+ }
+ }
+ // checking the rounding mode is done by trying to construct a
+ // MathContext object with that mode; it will fail if bad
+ if (exround != ROUND_HALF_UP) {
+ try { // if non-default...
+ if (exround == (-1))
+ exround = ROUND_HALF_UP;
+ else
+ new com.ibm.icu.math.MathContext(9, com.ibm.icu.math.MathContext.SCIENTIFIC, false, exround);
+ } catch (java.lang.IllegalArgumentException $10) {
+ badarg("format", 6, java.lang.String.valueOf(exround));
+ }
+ }
+
+ num = clone(this); // make private copy
+
+ /*
+ * Here: num is BigDecimal to format before is places before point [>0] after is places after point [>=0]
+ * explaces is exponent places [>0] exdigits is exponent digits [>=0] exformint is exponent form [one of two]
+ * exround is rounding mode [one of eight] 'before' through 'exdigits' are -1 if not specified
+ */
+
+ /* determine form */
+ {
+ do {/* select */
+ if (exdigits == (-1))
+ num.form = (byte) com.ibm.icu.math.MathContext.PLAIN;
+ else if (num.ind == iszero)
+ num.form = (byte) com.ibm.icu.math.MathContext.PLAIN;
+ else {
+ // determine whether triggers
+ mag = num.exp + num.mant.length;
+ if (mag > exdigits)
+ num.form = (byte) exformint;
+ else if (mag < (-5))
+ num.form = (byte) exformint;
+ else
+ num.form = (byte) com.ibm.icu.math.MathContext.PLAIN;
+ }
+ } while (false);
+ }/* setform */
+
+ /*
+ * If 'after' was specified then we may need to adjust the mantissa. This is a little tricky, as we must conform
+ * to the rules of exponential layout if necessary (e.g., we cannot end up with 10.0 if scientific).
+ */
+ if (after >= 0) {
+ setafter: for (;;) {
+ // calculate the current after-length
+ {/* select */
+ if (num.form == com.ibm.icu.math.MathContext.PLAIN)
+ thisafter = -num.exp; // has decimal part
+ else if (num.form == com.ibm.icu.math.MathContext.SCIENTIFIC)
+ thisafter = num.mant.length - 1;
+ else { // engineering
+ lead = (((num.exp + num.mant.length) - 1)) % 3; // exponent to use
+ if (lead < 0)
+ lead = 3 + lead; // negative exponent case
+ lead++; // number of leading digits
+ if (lead >= num.mant.length)
+ thisafter = 0;
+ else
+ thisafter = num.mant.length - lead;
+ }
+ }
+ if (thisafter == after)
+ break setafter; // we're in luck
+ if (thisafter < after) { // need added trailing zeros
+ // [thisafter can be negative]
+ newmant = extend(num.mant, (num.mant.length + after) - thisafter);
+ num.mant = newmant;
+ num.exp = num.exp - ((after - thisafter)); // adjust exponent
+ if (num.exp < MinExp)
+ throw new java.lang.ArithmeticException("Exponent Overflow:" + " " + num.exp);
+ break setafter;
+ }
+ // We have too many digits after the decimal point; this could
+ // cause a carry, which could change the mantissa...
+ // Watch out for implied leading zeros in PLAIN case
+ chop = thisafter - after; // digits to lop [is >0]
+ if (chop > num.mant.length) { // all digits go, no chance of carry
+ // carry on with zero
+ num.mant = ZERO.mant;
+ num.ind = iszero;
+ num.exp = 0;
+ continue setafter; // recheck: we may need trailing zeros
+ }
+ // we have a digit to inspect from existing mantissa
+ // round the number as required
+ need = num.mant.length - chop; // digits to end up with [may be 0]
+ oldexp = num.exp; // save old exponent
+ num.round(need, exround);
+ // if the exponent grew by more than the digits we chopped, then
+ // we must have had a carry, so will need to recheck the layout
+ if ((num.exp - oldexp) == chop)
+ break setafter; // number did not have carry
+ // mantissa got extended .. so go around and check again
+ }
+ }/* setafter */
+
+ a = num.layout(); // lay out, with exponent if required, etc.
+
+ /* Here we have laid-out number in 'a' */
+ // now apply 'before' and 'explaces' as needed
+ if (before > 0) {
+ // look for '.' or 'E'
+ {
+ int $11 = a.length;
+ p = 0;
+ p: for (; $11 > 0; $11--, p++) {
+ if (a[p] == '.')
+ break p;
+ if (a[p] == 'E')
+ break p;
+ }
+ }/* p */
+ // p is now offset of '.', 'E', or character after end of array
+ // that is, the current length of before part
+ if (p > before)
+ badarg("format", 1, java.lang.String.valueOf(before)); // won't fit
+ if (p < before) { // need leading blanks
+ newa = new char[(a.length + before) - p];
+ {
+ int $12 = before - p;
+ i = 0;
+ for (; $12 > 0; $12--, i++) {
+ newa[i] = ' ';
+ }
+ }/* i */
+ java.lang.System.arraycopy((java.lang.Object) a, 0, (java.lang.Object) newa, i, a.length);
+ a = newa;
+ }
+ // [if p=before then it's just the right length]
+ }
+
+ if (explaces > 0) {
+ // look for 'E' [cannot be at offset 0]
+ {
+ int $13 = a.length - 1;
+ p = a.length - 1;
+ p: for (; $13 > 0; $13--, p--) {
+ if (a[p] == 'E')
+ break p;
+ }
+ }/* p */
+ // p is now offset of 'E', or 0
+ if (p == 0) { // no E part; add trailing blanks
+ newa = new char[(a.length + explaces) + 2];
+ java.lang.System.arraycopy((java.lang.Object) a, 0, (java.lang.Object) newa, 0, a.length);
+ {
+ int $14 = explaces + 2;
+ i = a.length;
+ for (; $14 > 0; $14--, i++) {
+ newa[i] = ' ';
+ }
+ }/* i */
+ a = newa;
+ } else {/* found E */// may need to insert zeros
+ places = (a.length - p) - 2; // number so far
+ if (places > explaces)
+ badarg("format", 3, java.lang.String.valueOf(explaces));
+ if (places < explaces) { // need to insert zeros
+ newa = new char[(a.length + explaces) - places];
+ java.lang.System.arraycopy((java.lang.Object) a, 0, (java.lang.Object) newa, 0, p + 2); // through E
+ // and sign
+ {
+ int $15 = explaces - places;
+ i = p + 2;
+ for (; $15 > 0; $15--, i++) {
+ newa[i] = '0';
+ }
+ }/* i */
+ java.lang.System.arraycopy((java.lang.Object) a, p + 2, (java.lang.Object) newa, i, places); // remainder
+ // of
+ // exponent
+ a = newa;
+ }
+ // [if places=explaces then it's just the right length]
+ }
+ }
+ return new java.lang.String(a);
+ }
+
+ /**
+ * Returns the hashcode for this BigDecimal
. This hashcode is suitable for use by the
+ * java.util.Hashtable
class.
+ *
+ * Note that two BigDecimal
objects are only guaranteed to produce the same hashcode if they are
+ * exactly equal (that is, the String
representations of the BigDecimal
numbers are
+ * identical -- they have the same characters in the same sequence).
+ *
+ * @return An int
that is the hashcode for this
.
+ * @stable ICU 2.0
+ */
+
+ public int hashCode() {
+ // Maybe calculate ourselves, later. If so, note that there can be
+ // more than one internal representation for a given toString() result.
+ return this.toString().hashCode();
+ }
+
+ /**
+ * Converts this BigDecimal
to an int
. If the BigDecimal
has a non-zero
+ * decimal part it is discarded. If the BigDecimal
is out of the possible range for an int
+ * (32-bit signed integer) result then only the low-order 32 bits are used. (That is, the number may be
+ * decapitated .) To avoid unexpected errors when these conditions occur, use the {@link #intValueExact}
+ * method.
+ *
+ * @return An int
converted from this
, truncated and decapitated if necessary.
+ * @stable ICU 2.0
+ */
+
+ public int intValue() {
+ return toBigInteger().intValue();
+ }
+
+ /**
+ * Converts this BigDecimal
to an int
. If the BigDecimal
has a non-zero
+ * decimal part or is out of the possible range for an int
(32-bit signed integer) result then an
+ * ArithmeticException
is thrown.
+ *
+ * @return An int
equal in value to this
.
+ * @throws ArithmeticException if this
has a non-zero decimal part, or will not fit in an int
.
+ * @stable ICU 2.0
+ */
+
+ public int intValueExact() {
+ int lodigit;
+ int useexp = 0;
+ int result;
+ int i = 0;
+ int topdig = 0;
+ // This does not use longValueExact() as the latter can be much
+ // slower.
+ // intcheck (from pow) relies on this to check decimal part
+ if (ind == iszero)
+ return 0; // easy, and quite common
+ /* test and drop any trailing decimal part */
+ lodigit = mant.length - 1;
+ if (exp < 0) {
+ lodigit = lodigit + exp; // reduces by -(-exp)
+ /* all decimal places must be 0 */
+ if ((!(allzero(mant, lodigit + 1))))
+ throw new java.lang.ArithmeticException("Decimal part non-zero:" + " " + this.toString());
+ if (lodigit < 0)
+ return 0; // -1=0 */
+ if ((exp + lodigit) > 9) // early exit
+ throw new java.lang.ArithmeticException("Conversion overflow:" + " " + this.toString());
+ useexp = exp;
+ }
+ /* convert the mantissa to binary, inline for speed */
+ result = 0;
+ {
+ int $16 = lodigit + useexp;
+ i = 0;
+ for (; i <= $16; i++) {
+ result = result * 10;
+ if (i <= lodigit)
+ result = result + mant[i];
+ }
+ }/* i */
+
+ /* Now, if the risky length, check for overflow */
+ if ((lodigit + useexp) == 9) {
+ // note we cannot just test for -ve result, as overflow can move a
+ // zero into the top bit [consider 5555555555]
+ topdig = result / 1000000000; // get top digit, preserving sign
+ if (topdig != mant[0]) { // digit must match and be positive
+ // except in the special case ...
+ if (result == java.lang.Integer.MIN_VALUE) // looks like the special
+ if (ind == isneg) // really was negative
+ if (mant[0] == 2)
+ return result; // really had top digit 2
+ throw new java.lang.ArithmeticException("Conversion overflow:" + " " + this.toString());
+ }
+ }
+
+ /* Looks good */
+ if (ind == ispos)
+ return result;
+ return -result;
+ }
+
+ /**
+ * Converts this BigDecimal
to a long
. If the BigDecimal
has a non-zero
+ * decimal part it is discarded. If the BigDecimal
is out of the possible range for a long
+ * (64-bit signed integer) result then only the low-order 64 bits are used. (That is, the number may be
+ * decapitated .) To avoid unexpected errors when these conditions occur, use the {@link #longValueExact}
+ * method.
+ *
+ * @return A long
converted from this
, truncated and decapitated if necessary.
+ * @stable ICU 2.0
+ */
+
+ public long longValue() {
+ return toBigInteger().longValue();
+ }
+
+ /**
+ * Converts this BigDecimal
to a long
. If the BigDecimal
has a non-zero
+ * decimal part or is out of the possible range for a long
(64-bit signed integer) result then an
+ * ArithmeticException
is thrown.
+ *
+ * @return A long
equal in value to this
.
+ * @throws ArithmeticException if this
has a non-zero decimal part, or will not fit in a long
.
+ * @stable ICU 2.0
+ */
+
+ public long longValueExact() {
+ int lodigit;
+ int cstart = 0;
+ int useexp = 0;
+ long result;
+ int i = 0;
+ long topdig = 0;
+ // Identical to intValueExact except for result=long, and exp>=20 test
+ if (ind == 0)
+ return 0; // easy, and quite common
+ lodigit = mant.length - 1; // last included digit
+ if (exp < 0) {
+ lodigit = lodigit + exp; // -(-exp)
+ /* all decimal places must be 0 */
+ if (lodigit < 0)
+ cstart = 0;
+ else
+ cstart = lodigit + 1;
+ if ((!(allzero(mant, cstart))))
+ throw new java.lang.ArithmeticException("Decimal part non-zero:" + " " + this.toString());
+ if (lodigit < 0)
+ return 0; // -1=0 */
+ if ((exp + mant.length) > 18) // early exit
+ throw new java.lang.ArithmeticException("Conversion overflow:" + " " + this.toString());
+ useexp = exp;
+ }
+
+ /* convert the mantissa to binary, inline for speed */
+ // note that we could safely use the 'test for wrap to negative'
+ // algorithm here, but instead we parallel the intValueExact
+ // algorithm for ease of checking and maintenance.
+ result = (long) 0;
+ {
+ int $17 = lodigit + useexp;
+ i = 0;
+ for (; i <= $17; i++) {
+ result = result * 10;
+ if (i <= lodigit)
+ result = result + mant[i];
+ }
+ }/* i */
+
+ /* Now, if the risky length, check for overflow */
+ if ((lodigit + useexp) == 18) {
+ topdig = result / 1000000000000000000L; // get top digit, preserving sign
+ if (topdig != mant[0]) { // digit must match and be positive
+ // except in the special case ...
+ if (result == java.lang.Long.MIN_VALUE) // looks like the special
+ if (ind == isneg) // really was negative
+ if (mant[0] == 9)
+ return result; // really had top digit 9
+ throw new java.lang.ArithmeticException("Conversion overflow:" + " " + this.toString());
+ }
+ }
+
+ /* Looks good */
+ if (ind == ispos)
+ return result;
+ return -result;
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose decimal point has been moved to the left by a specified number of
+ * positions. The parameter, n
, specifies the number of positions to move the decimal point. That is,
+ * if n
is 0 or positive, the number returned is given by:
+ *
+ * this.multiply(TEN.pow(new BigDecimal(-n)))
+ *
+ * n
may be negative, in which case the method returns the same result as movePointRight(-n)
+ *
.
+ *
+ * @param n The int
specifying the number of places to move the decimal point leftwards.
+ * @return A BigDecimal
derived from this
, with the decimal point moved n
+ * places to the left.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal movePointLeft(int n) {
+ com.ibm.icu.math.BigDecimal res;
+ // very little point in optimizing for shift of 0
+ res = clone(this);
+ res.exp = res.exp - n;
+ return res.finish(plainMC, false); // finish sets form and checks exponent
+ }
+
+ /**
+ * Returns a plain BigDecimal
whose decimal point has been moved to the right by a specified number of
+ * positions. The parameter, n
, specifies the number of positions to move the decimal point. That is,
+ * if n
is 0 or positive, the number returned is given by:
+ *
+ * this.multiply(TEN.pow(new BigDecimal(n)))
+ *
+ * n
may be negative, in which case the method returns the same result as movePointLeft(-n)
+ *
.
+ *
+ * @param n The int
specifying the number of places to move the decimal point rightwards.
+ * @return A BigDecimal
derived from this
, with the decimal point moved n
+ * places to the right.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal movePointRight(int n) {
+ com.ibm.icu.math.BigDecimal res;
+ res = clone(this);
+ res.exp = res.exp + n;
+ return res.finish(plainMC, false);
+ }
+
+ /**
+ * Returns the scale of this BigDecimal
. Returns a non-negative int
which is the scale of
+ * the number. The scale is the number of digits in the decimal part of the number if the number were formatted
+ * without exponential notation.
+ *
+ * @return An int
whose value is the scale of this BigDecimal
.
+ * @stable ICU 2.0
+ */
+
+ public int scale() {
+ if (exp >= 0)
+ return 0; // scale can never be negative
+ return -exp;
+ }
+
+ /**
+ * Returns a plain BigDecimal
with a given scale.
+ *
+ * If the given scale (which must be zero or positive) is the same as or greater than the length of the decimal part
+ * (the scale) of this BigDecimal
then trailing zeros will be added to the decimal part as necessary.
+ *
+ * If the given scale is less than the length of the decimal part (the scale) of this BigDecimal
then
+ * trailing digits will be removed, and in this case an ArithmeticException
is thrown if any discarded
+ * digits are non-zero.
+ *
+ * The same as {@link #setScale(int, int)}, where the first parameter is the scale, and the second is
+ * MathContext.ROUND_UNNECESSARY
.
+ *
+ * @param scale The int
specifying the scale of the resulting BigDecimal
.
+ * @return A plain BigDecimal
with the given scale.
+ * @throws ArithmeticException if scale
is negative.
+ * @throws ArithmeticException if reducing scale would discard non-zero digits.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal setScale(int scale) {
+ return setScale(scale, ROUND_UNNECESSARY);
+ }
+
+ /**
+ * Returns a plain BigDecimal
with a given scale.
+ *
+ * If the given scale (which must be zero or positive) is the same as or greater than the length of the decimal part
+ * (the scale) of this BigDecimal
then trailing zeros will be added to the decimal part as necessary.
+ *
+ * If the given scale is less than the length of the decimal part (the scale) of this BigDecimal
then
+ * trailing digits will be removed, and the rounding mode given by the second parameter is used to determine if the
+ * remaining digits are affected by a carry. In this case, an IllegalArgumentException
is thrown if
+ * round
is not a valid rounding mode.
+ *
+ * If round
is MathContext.ROUND_UNNECESSARY
, an ArithmeticException
is
+ * thrown if any discarded digits are non-zero.
+ *
+ * @param scale The int
specifying the scale of the resulting BigDecimal
.
+ * @param round The int
rounding mode to be used for the division (see the {@link MathContext} class).
+ * @return A plain BigDecimal
with the given scale.
+ * @throws IllegalArgumentException if round
is not a valid rounding mode.
+ * @throws ArithmeticException if scale
is negative.
+ * @throws ArithmeticException if round
is MathContext.ROUND_UNNECESSARY
, and reducing scale would discard
+ * non-zero digits.
+ * @stable ICU 2.0
+ */
+
+ public com.ibm.icu.math.BigDecimal setScale(int scale, int round) {
+ int ourscale;
+ com.ibm.icu.math.BigDecimal res;
+ int padding = 0;
+ int newlen = 0;
+ // at present this naughtily only checks the round value if it is
+ // needed (used), for speed
+ ourscale = this.scale();
+ if (ourscale == scale) // already correct scale
+ if (this.form == com.ibm.icu.math.MathContext.PLAIN) // .. and form
+ return this;
+ res = clone(this); // need copy
+ if (ourscale <= scale) { // simply zero-padding/changing form
+ // if ourscale is 0 we may have lots of 0s to add
+ if (ourscale == 0)
+ padding = res.exp + scale;
+ else
+ padding = scale - ourscale;
+ res.mant = extend(res.mant, res.mant.length + padding);
+ res.exp = -scale; // as requested
+ } else {/* ourscale>scale: shortening, probably */
+ if (scale < 0)
+ throw new java.lang.ArithmeticException("Negative scale:" + " " + scale);
+ // [round() will raise exception if invalid round]
+ newlen = res.mant.length - ((ourscale - scale)); // [<=0 is OK]
+ res = res.round(newlen, round); // round to required length
+ // This could have shifted left if round (say) 0.9->1[.0]
+ // Repair if so by adding a zero and reducing exponent
+ if (res.exp != -scale) {
+ res.mant = extend(res.mant, res.mant.length + 1);
+ res.exp = res.exp - 1;
+ }
+ }
+ res.form = (byte) com.ibm.icu.math.MathContext.PLAIN; // by definition
+ return res;
+ }
+
+ /**
+ * Converts this BigDecimal
to a short
. If the BigDecimal
has a non-zero
+ * decimal part or is out of the possible range for a short
(16-bit signed integer) result then an
+ * ArithmeticException
is thrown.
+ *
+ * @return A short
equal in value to this
.
+ * @throws ArithmeticException if this
has a non-zero decimal part, or will not fit in a short
.
+ * @stable ICU 2.0
+ */
+
+ public short shortValueExact() {
+ int num;
+ num = this.intValueExact(); // will check decimal part too
+ if ((num > 32767) | (num < (-32768)))
+ throw new java.lang.ArithmeticException("Conversion overflow:" + " " + this.toString());
+ return (short) num;
+ }
+
+ /**
+ * Returns the sign of this BigDecimal
, as an int
. This returns the signum function
+ * value that represents the sign of this BigDecimal
. That is, -1 if the BigDecimal
is
+ * negative, 0 if it is numerically equal to zero, or 1 if it is positive.
+ *
+ * @return An int
which is -1 if the BigDecimal
is negative, 0 if it is numerically equal
+ * to zero, or 1 if it is positive.
+ * @stable ICU 2.0
+ */
+
+ public int signum() {
+ return (int) this.ind; // [note this assumes values for ind.]
+ }
+
+ /**
+ * Converts this BigDecimal
to a java.math.BigDecimal
.
+ *
+ * This is an exact conversion; the result is the same as if the BigDecimal
were formatted as a plain
+ * number without any rounding or exponent and then the java.math.BigDecimal(java.lang.String)
+ * constructor were used to construct the result.
+ *
+ * (Note: this method is provided only in the com.ibm.icu.math
version of the BigDecimal class. It
+ * would not be present in a java.math
version.)
+ *
+ * @return The java.math.BigDecimal
equal in value to this BigDecimal
.
+ * @stable ICU 2.0
+ */
+
+ public java.math.BigDecimal toBigDecimal() {
+ return new java.math.BigDecimal(this.unscaledValue(), this.scale());
+ }
+
+ /**
+ * Converts this BigDecimal
to a java.math.BigInteger
.
+ *
+ * Any decimal part is truncated (discarded). If an exception is desired should the decimal part be non-zero, use
+ * {@link #toBigIntegerExact()}.
+ *
+ * @return The java.math.BigInteger
equal in value to the integer part of this BigDecimal
.
+ * @stable ICU 2.0
+ */
+
+ public java.math.BigInteger toBigInteger() {
+ com.ibm.icu.math.BigDecimal res = null;
+ int newlen = 0;
+ byte newmant[] = null;
+ {/* select */
+ if ((exp >= 0) & (form == com.ibm.icu.math.MathContext.PLAIN))
+ res = this; // can layout simply
+ else if (exp >= 0) {
+ res = clone(this); // safe copy
+ res.form = (byte) com.ibm.icu.math.MathContext.PLAIN; // .. and request PLAIN
+ } else {
+ { // exp<0; scale to be truncated
+ // we could use divideInteger, but we may as well be quicker
+ if (-this.exp >= this.mant.length)
+ res = ZERO; // all blows away
+ else {
+ res = clone(this); // safe copy
+ newlen = res.mant.length + res.exp;
+ newmant = new byte[newlen]; // [shorter]
+ java.lang.System.arraycopy((java.lang.Object) res.mant, 0, (java.lang.Object) newmant, 0,
+ newlen);
+ res.mant = newmant;
+ res.form = (byte) com.ibm.icu.math.MathContext.PLAIN;
+ res.exp = 0;
+ }
+ }
+ }
+ }
+ return new BigInteger(new java.lang.String(res.layout()));
+ }
+
+ /**
+ * Converts this BigDecimal
to a java.math.BigInteger
.
+ *
+ * An exception is thrown if the decimal part (if any) is non-zero.
+ *
+ * @return The java.math.BigInteger
equal in value to the integer part of this BigDecimal
.
+ * @throws ArithmeticException if this
has a non-zero decimal part.
+ * @stable ICU 2.0
+ */
+
+ public java.math.BigInteger toBigIntegerExact() {
+ /* test any trailing decimal part */
+ if (exp < 0) { // possible decimal part
+ /* all decimal places must be 0; note exp<0 */
+ if ((!(allzero(mant, mant.length + exp))))
+ throw new java.lang.ArithmeticException("Decimal part non-zero:" + " " + this.toString());
+ }
+ return toBigInteger();
+ }
+
+ /**
+ * Returns the BigDecimal
as a character array. The result of this method is the same as using the
+ * sequence toString().toCharArray()
, but avoids creating the intermediate String
and
+ * char[]
objects.
+ *
+ * @return The char[]
array corresponding to this BigDecimal
.
+ * @stable ICU 2.0
+ */
+
+ public char[] toCharArray() {
+ return layout();
+ }
+
+ /**
+ * Returns the BigDecimal
as a String
. This returns a String
that exactly
+ * represents this BigDecimal
, as defined in the decimal documentation (see {@link BigDecimal class
+ * header}).
+ *
+ * By definition, using the {@link #BigDecimal(String)} constructor on the result String
will create a
+ * BigDecimal
that is exactly equal to the original BigDecimal
.
+ *
+ * @return The String
exactly corresponding to this BigDecimal
.
+ * @see #format(int, int)
+ * @see #format(int, int, int, int, int, int)
+ * @see #toCharArray()
+ * @stable ICU 2.0
+ */
+
+ public java.lang.String toString() {
+ return new java.lang.String(layout());
+ }
+
+ /**
+ * Returns the number as a BigInteger
after removing the scale. That is, the number is expressed as a
+ * plain number, any decimal point is then removed (retaining the digits of any decimal part), and the result is
+ * then converted to a BigInteger
.
+ *
+ * @return The java.math.BigInteger
equal in value to this BigDecimal
multiplied by ten to
+ * the power of this.scale()
.
+ * @stable ICU 2.0
+ */
+
+ public java.math.BigInteger unscaledValue() {
+ com.ibm.icu.math.BigDecimal res = null;
+ if (exp >= 0)
+ res = this;
+ else {
+ res = clone(this); // safe copy
+ res.exp = 0; // drop scale
+ }
+ return res.toBigInteger();
+ }
+
+ /**
+ * Translates a double
to a BigDecimal
.
+ *
+ * Returns a BigDecimal
which is the decimal representation of the 64-bit signed binary floating point
+ * parameter. If the parameter is infinite, or is not a number (NaN), a NumberFormatException
is
+ * thrown.
+ *
+ * The number is constructed as though num
had been converted to a String
using the
+ * Double.toString()
method and the {@link #BigDecimal(java.lang.String)} constructor had then been used.
+ * This is typically not an exact conversion.
+ *
+ * @param dub The double
to be translated.
+ * @return The BigDecimal
equal in value to dub
.
+ * @throws NumberFormatException if the parameter is infinite or not a number.
+ * @stable ICU 2.0
+ */
+
+ public static com.ibm.icu.math.BigDecimal valueOf(double dub) {
+ // Reminder: a zero double returns '0.0', so we cannot fastpath to
+ // use the constant ZERO. This might be important enough to justify
+ // a factory approach, a cache, or a few private constants, later.
+ return new com.ibm.icu.math.BigDecimal((new java.lang.Double(dub)).toString());
+ }
+
+ /**
+ * Translates a long
to a BigDecimal
. That is, returns a plain BigDecimal
+ * whose value is equal to the given long
.
+ *
+ * @param lint The long
to be translated.
+ * @return The BigDecimal
equal in value to lint
.
+ * @stable ICU 2.0
+ */
+
+ public static com.ibm.icu.math.BigDecimal valueOf(long lint) {
+ return valueOf(lint, 0);
+ }
+
+ /**
+ * Translates a long
to a BigDecimal
with a given scale. That is, returns a plain
+ * BigDecimal
whose unscaled value is equal to the given long
, adjusted by the second parameter,
+ * scale
.
+ *
+ * The result is given by:
+ *
+ * (new BigDecimal(lint)).divide(TEN.pow(new BigDecimal(scale)))
+ *
+ * A NumberFormatException
is thrown if scale
is negative.
+ *
+ * @param lint The long
to be translated.
+ * @param scale The int
scale to be applied.
+ * @return The BigDecimal
equal in value to lint
.
+ * @throws NumberFormatException if the scale is negative.
+ * @stable ICU 2.0
+ */
+
+ public static com.ibm.icu.math.BigDecimal valueOf(long lint, int scale) {
+ com.ibm.icu.math.BigDecimal res = null;
+ {/* select */
+ if (lint == 0)
+ res = ZERO;
+ else if (lint == 1)
+ res = ONE;
+ else if (lint == 10)
+ res = TEN;
+ else {
+ res = new com.ibm.icu.math.BigDecimal(lint);
+ }
+ }
+ if (scale == 0)
+ return res;
+ if (scale < 0)
+ throw new java.lang.NumberFormatException("Negative scale:" + " " + scale);
+ res = clone(res); // safe copy [do not mutate]
+ res.exp = -scale; // exponent is -scale
+ return res;
+ }
+
+ /* ---------------------------------------------------------------- */
+ /* Private methods */
+ /* ---------------------------------------------------------------- */
+
+ /*
+ * Return char array value of a BigDecimal (conversion from BigDecimal to laid-out canonical char array).
+ * The mantissa will either already have been rounded (following an operation) or will be of length appropriate
+ * (in the case of construction from an int, for example).
We must not alter the mantissa, here.
'form'
+ * describes whether we are to use exponential notation (and if so, which), or if we are to lay out as a plain/pure
+ * numeric.
+ */
+
+ private char[] layout() {
+ char cmant[];
+ int i = 0;
+ StringBuilder sb = null;
+ int euse = 0;
+ int sig = 0;
+ char csign = 0;
+ char rec[] = null;
+ int needsign;
+ int mag;
+ int len = 0;
+ cmant = new char[mant.length]; // copy byte[] to a char[]
+ {
+ int $18 = mant.length;
+ i = 0;
+ for (; $18 > 0; $18--, i++) {
+ cmant[i] = (char) (mant[i] + ((int) ('0')));
+ }
+ }/* i */
+
+ if (form != com.ibm.icu.math.MathContext.PLAIN) {/* exponential notation needed */
+ sb = new StringBuilder(cmant.length + 15); // -x.xxxE+999999999
+ if (ind == isneg)
+ sb.append('-');
+ euse = (exp + cmant.length) - 1; // exponent to use
+ /* setup sig=significant digits and copy to result */
+ if (form == com.ibm.icu.math.MathContext.SCIENTIFIC) { // [default]
+ sb.append(cmant[0]); // significant character
+ if (cmant.length > 1) // have decimal part
+ sb.append('.').append(cmant, 1, cmant.length - 1);
+ } else {
+ do {
+ sig = euse % 3; // common
+ if (sig < 0)
+ sig = 3 + sig; // negative exponent
+ euse = euse - sig;
+ sig++;
+ if (sig >= cmant.length) { // zero padding may be needed
+ sb.append(cmant, 0, cmant.length);
+ {
+ int $19 = sig - cmant.length;
+ for (; $19 > 0; $19--) {
+ sb.append('0');
+ }
+ }
+ } else { // decimal point needed
+ sb.append(cmant, 0, sig).append('.').append(cmant, sig, cmant.length - sig);
+ }
+ } while (false);
+ }/* engineering */
+ if (euse != 0) {
+ if (euse < 0) {
+ csign = '-';
+ euse = -euse;
+ } else
+ csign = '+';
+ sb.append('E').append(csign).append(euse);
+ }
+ rec = new char[sb.length()];
+ int srcEnd = sb.length();
+ if (0 != srcEnd) {
+ sb.getChars(0, srcEnd, rec, 0);
+ }
+ return rec;
+ }
+
+ /* Here for non-exponential (plain) notation */
+ if (exp == 0) {/* easy */
+ if (ind >= 0)
+ return cmant; // non-negative integer
+ rec = new char[cmant.length + 1];
+ rec[0] = '-';
+ java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, 1, cmant.length);
+ return rec;
+ }
+
+ /* Need a '.' and/or some zeros */
+ needsign = (ind == isneg) ? 1 : 0; // space for sign? 0 or 1
+
+ /*
+ * MAG is the position of the point in the mantissa (index of the character it follows)
+ */
+ mag = exp + cmant.length;
+
+ if (mag < 1) {/* 0.00xxxx form */
+ len = (needsign + 2) - exp; // needsign+2+(-mag)+cmant.length
+ rec = new char[len];
+ if (needsign != 0)
+ rec[0] = '-';
+ rec[needsign] = '0';
+ rec[needsign + 1] = '.';
+ {
+ int $20 = -mag;
+ i = needsign + 2;
+ for (; $20 > 0; $20--, i++) { // maybe none
+ rec[i] = '0';
+ }
+ }/* i */
+ java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, (needsign + 2) - mag,
+ cmant.length);
+ return rec;
+ }
+
+ if (mag > cmant.length) {/* xxxx0000 form */
+ len = needsign + mag;
+ rec = new char[len];
+ if (needsign != 0)
+ rec[0] = '-';
+ java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, needsign, cmant.length);
+ {
+ int $21 = mag - cmant.length;
+ i = needsign + cmant.length;
+ for (; $21 > 0; $21--, i++) { // never 0
+ rec[i] = '0';
+ }
+ }/* i */
+ return rec;
+ }
+
+ /* decimal point is in the middle of the mantissa */
+ len = (needsign + 1) + cmant.length;
+ rec = new char[len];
+ if (needsign != 0)
+ rec[0] = '-';
+ java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, needsign, mag);
+ rec[needsign + mag] = '.';
+ java.lang.System.arraycopy((java.lang.Object) cmant, mag, (java.lang.Object) rec, (needsign + mag) + 1,
+ cmant.length - mag);
+ return rec;
+ }
+
+ /*
+ * Checks a BigDecimal argument to ensure it's a true integer in a given range. If OK, returns it as an
+ * int.
+ */
+ // [currently only used by pow]
+ private int intcheck(int min, int max) {
+ int i;
+ i = this.intValueExact(); // [checks for non-0 decimal part]
+ // Use same message as though intValueExact failed due to size
+ if ((i < min) | (i > max))
+ throw new java.lang.ArithmeticException("Conversion overflow:" + " " + i);
+ return i;
+ }
+
+ /* Carry out division operations. */
+ /*
+ * Arg1 is operation code: D=divide, I=integer divide, R=remainder Arg2 is the rhs. Arg3 is the context. Arg4 is
+ * explicit scale iff code='D' or 'I' (-1 if none).
+ *
+ * Underlying algorithm (complications for Remainder function and scaled division are omitted for clarity):
+ *
+ * Test for x/0 and then 0/x Exp =Exp1 - Exp2 Exp =Exp +len(var1) -len(var2) Sign=Sign1 Sign2 Pad accumulator (Var1)
+ * to double-length with 0's (pad1) Pad Var2 to same length as Var1 B2B=1st two digits of var2, +1 to allow for
+ * roundup have=0 Do until (have=digits+1 OR residue=0) if exp<0 then if integer divide/residue then leave
+ * this_digit=0 Do forever compare numbers if <0 then leave inner_loop if =0 then (- quick exit without subtract -)
+ * do this_digit=this_digit+1; output this_digit leave outer_loop; end Compare lengths of numbers (mantissae): If
+ * same then CA=first_digit_of_Var1 else CA=first_two_digits_of_Var1 mult=ca10/b2b -- Good and safe guess at divisor
+ * if mult=0 then mult=1 this_digit=this_digit+mult subtract end inner_loop if have\=0 | this_digit\=0 then do
+ * output this_digit have=have+1; end var2=var2/10 exp=exp-1 end outer_loop exp=exp+1 -- set the proper exponent if
+ * have=0 then generate answer=0 Return to FINISHED Result defined by MATHV1
+ *
+ * For extended commentary, see DMSRCN.
+ */
+
+ private com.ibm.icu.math.BigDecimal dodivide(char code, com.ibm.icu.math.BigDecimal rhs,
+ com.ibm.icu.math.MathContext set, int scale) {
+ com.ibm.icu.math.BigDecimal lhs;
+ int reqdig;
+ int newexp;
+ com.ibm.icu.math.BigDecimal res;
+ int newlen;
+ byte var1[];
+ int var1len;
+ byte var2[];
+ int var2len;
+ int b2b;
+ int have;
+ int thisdigit = 0;
+ int i = 0;
+ byte v2 = 0;
+ int ba = 0;
+ int mult = 0;
+ int start = 0;
+ int padding = 0;
+ int d = 0;
+ byte newvar1[] = null;
+ byte lasthave = 0;
+ int actdig = 0;
+ byte newmant[] = null;
+
+ if (set.lostDigits)
+ checkdigits(rhs, set.digits);
+ lhs = this; // name for clarity
+
+ // [note we must have checked lostDigits before the following checks]
+ if (rhs.ind == 0)
+ throw new java.lang.ArithmeticException("Divide by 0"); // includes 0/0
+ if (lhs.ind == 0) { // 0/x => 0 [possibly with .0s]
+ if (set.form != com.ibm.icu.math.MathContext.PLAIN)
+ return ZERO;
+ if (scale == (-1))
+ return lhs;
+ return lhs.setScale(scale);
+ }
+
+ /* Prepare numbers according to BigDecimal rules */
+ reqdig = set.digits; // local copy (heavily used)
+ if (reqdig > 0) {
+ if (lhs.mant.length > reqdig)
+ lhs = clone(lhs).round(set);
+ if (rhs.mant.length > reqdig)
+ rhs = clone(rhs).round(set);
+ } else {/* scaled divide */
+ if (scale == (-1))
+ scale = lhs.scale();
+ // set reqdig to be at least large enough for the computation
+ reqdig = lhs.mant.length; // base length
+ // next line handles both positive lhs.exp and also scale mismatch
+ if (scale != -lhs.exp)
+ reqdig = (reqdig + scale) + lhs.exp;
+ reqdig = (reqdig - ((rhs.mant.length - 1))) - rhs.exp; // reduce by RHS effect
+ if (reqdig < lhs.mant.length)
+ reqdig = lhs.mant.length; // clamp
+ if (reqdig < rhs.mant.length)
+ reqdig = rhs.mant.length; // ..
+ }
+
+ /* precalculate exponent */
+ newexp = ((lhs.exp - rhs.exp) + lhs.mant.length) - rhs.mant.length;
+ /* If new exponent -ve, then some quick exits are possible */
+ if (newexp < 0)
+ if (code != 'D') {
+ if (code == 'I')
+ return ZERO; // easy - no integer part
+ /* Must be 'R'; remainder is [finished clone of] input value */
+ return clone(lhs).finish(set, false);
+ }
+
+ /* We need slow division */
+ res = new com.ibm.icu.math.BigDecimal(); // where we'll build result
+ res.ind = (byte) (lhs.ind * rhs.ind); // final sign (for D/I)
+ res.exp = newexp; // initial exponent (for D/I)
+ res.mant = new byte[reqdig + 1]; // where build the result
+
+ /* Now [virtually pad the mantissae with trailing zeros */
+ // Also copy the LHS, which will be our working array
+ newlen = (reqdig + reqdig) + 1;
+ var1 = extend(lhs.mant, newlen); // always makes longer, so new safe array
+ var1len = newlen; // [remaining digits are 0]
+
+ var2 = rhs.mant;
+ var2len = newlen;
+
+ /* Calculate first two digits of rhs (var2), +1 for later estimations */
+ b2b = (var2[0] * 10) + 1;
+ if (var2.length > 1)
+ b2b = b2b + var2[1];
+
+ /* start the long-division loops */
+ have = 0;
+ {
+ outer: for (;;) {
+ thisdigit = 0;
+ /* find the next digit */
+ {
+ inner: for (;;) {
+ if (var1len < var2len)
+ break inner; // V1 too low
+ if (var1len == var2len) { // compare needed
+ {
+ compare: do { // comparison
+ {
+ int $22 = var1len;
+ i = 0;
+ for (; $22 > 0; $22--, i++) {
+ // var1len is always <= var1.length
+ if (i < var2.length)
+ v2 = var2[i];
+ else
+ v2 = (byte) 0;
+ if (var1[i] < v2)
+ break inner; // V1 too low
+ if (var1[i] > v2)
+ break compare; // OK to subtract
+ }
+ }/* i */
+ /*
+ * reach here if lhs and rhs are identical; subtraction will increase digit by one,
+ * and the residue will be 0 so we are done; leave the loop with residue set to 0
+ * (in case code is 'R' or ROUND_UNNECESSARY or a ROUND_HALF_xxxx is being checked)
+ */
+ thisdigit++;
+ res.mant[have] = (byte) thisdigit;
+ have++;
+ var1[0] = (byte) 0; // residue to 0 [this is all we'll test]
+ // var1len=1 -- [optimized out]
+ break outer;
+ } while (false);
+ }/* compare */
+ /* prepare for subtraction. Estimate BA (lengths the same) */
+ ba = (int) var1[0]; // use only first digit
+ } // lengths the same
+ else {/* lhs longer than rhs */
+ /* use first two digits for estimate */
+ ba = var1[0] * 10;
+ if (var1len > 1)
+ ba = ba + var1[1];
+ }
+ /* subtraction needed; V1>=V2 */
+ mult = (ba * 10) / b2b;
+ if (mult == 0)
+ mult = 1;
+ thisdigit = thisdigit + mult;
+ // subtract; var1 reusable
+ var1 = byteaddsub(var1, var1len, var2, var2len, -mult, true);
+ if (var1[0] != 0)
+ continue inner; // maybe another subtract needed
+ /*
+ * V1 now probably has leading zeros, remove leading 0's and try again. (It could be longer than
+ * V2)
+ */
+ {
+ int $23 = var1len - 2;
+ start = 0;
+ start: for (; start <= $23; start++) {
+ if (var1[start] != 0)
+ break start;
+ var1len--;
+ }
+ }/* start */
+ if (start == 0)
+ continue inner;
+ // shift left
+ java.lang.System.arraycopy((java.lang.Object) var1, start, (java.lang.Object) var1, 0, var1len);
+ }
+ }/* inner */
+
+ /* We have the next digit */
+ if ((have != 0) | (thisdigit != 0)) { // put the digit we got
+ res.mant[have] = (byte) thisdigit;
+ have++;
+ if (have == (reqdig + 1))
+ break outer; // we have all we need
+ if (var1[0] == 0)
+ break outer; // residue now 0
+ }
+ /* can leave now if a scaled divide and exponent is small enough */
+ if (scale >= 0)
+ if (-res.exp > scale)
+ break outer;
+ /* can leave now if not Divide and no integer part left */
+ if (code != 'D')
+ if (res.exp <= 0)
+ break outer;
+ res.exp = res.exp - 1; // reduce the exponent
+ /*
+ * to get here, V1 is less than V2, so divide V2 by 10 and go for the next digit
+ */
+ var2len--;
+ }
+ }/* outer */
+
+ /* here when we have finished dividing, for some reason */
+ // have is the number of digits we collected in res.mant
+ if (have == 0)
+ have = 1; // res.mant[0] is 0; we always want a digit
+
+ if ((code == 'I') | (code == 'R')) {/* check for integer overflow needed */
+ if ((have + res.exp) > reqdig)
+ throw new java.lang.ArithmeticException("Integer overflow");
+
+ if (code == 'R') {
+ do {
+ /* We were doing Remainder -- return the residue */
+ if (res.mant[0] == 0) // no integer part was found
+ return clone(lhs).finish(set, false); // .. so return lhs, canonical
+ if (var1[0] == 0)
+ return ZERO; // simple 0 residue
+ res.ind = lhs.ind; // sign is always as LHS
+ /*
+ * Calculate the exponent by subtracting the number of padding zeros we added and adding the
+ * original exponent
+ */
+ padding = ((reqdig + reqdig) + 1) - lhs.mant.length;
+ res.exp = (res.exp - padding) + lhs.exp;
+
+ /*
+ * strip insignificant padding zeros from residue, and create/copy the resulting mantissa if need be
+ */
+ d = var1len;
+ {
+ i = d - 1;
+ i: for (; i >= 1; i--) {
+ if (!((res.exp < lhs.exp) & (res.exp < rhs.exp)))
+ break;
+ if (var1[i] != 0)
+ break i;
+ d--;
+ res.exp = res.exp + 1;
+ }
+ }/* i */
+ if (d < var1.length) {/* need to reduce */
+ newvar1 = new byte[d];
+ java.lang.System.arraycopy((java.lang.Object) var1, 0, (java.lang.Object) newvar1, 0, d); // shorten
+ var1 = newvar1;
+ }
+ res.mant = var1;
+ return res.finish(set, false);
+ } while (false);
+ }/* remainder */
+ }
+
+ else {/* 'D' -- no overflow check needed */
+ // If there was a residue then bump the final digit (iff 0 or 5)
+ // so that the residue is visible for ROUND_UP, ROUND_HALF_xxx and
+ // ROUND_UNNECESSARY checks (etc.) later.
+ // [if we finished early, the residue will be 0]
+ if (var1[0] != 0) { // residue not 0
+ lasthave = res.mant[have - 1];
+ if (((lasthave % 5)) == 0)
+ res.mant[have - 1] = (byte) (lasthave + 1);
+ }
+ }
+
+ /* Here for Divide or Integer Divide */
+ // handle scaled results first ['I' always scale 0, optional for 'D']
+ if (scale >= 0) {
+ do {
+ // say 'scale have res.exp len' scale have res.exp res.mant.length
+ if (have != res.mant.length)
+ // already padded with 0's, so just adjust exponent
+ res.exp = res.exp - ((res.mant.length - have));
+ // calculate number of digits we really want [may be 0]
+ actdig = res.mant.length - (-res.exp - scale);
+ res.round(actdig, set.roundingMode); // round to desired length
+ // This could have shifted left if round (say) 0.9->1[.0]
+ // Repair if so by adding a zero and reducing exponent
+ if (res.exp != -scale) {
+ res.mant = extend(res.mant, res.mant.length + 1);
+ res.exp = res.exp - 1;
+ }
+ return res.finish(set, true); // [strip if not PLAIN]
+ } while (false);
+ }/* scaled */
+
+ // reach here only if a non-scaled
+ if (have == res.mant.length) { // got digits+1 digits
+ res.round(set);
+ have = reqdig;
+ } else {/* have<=reqdig */
+ if (res.mant[0] == 0)
+ return ZERO; // fastpath
+ // make the mantissa truly just 'have' long
+ // [we could let finish do this, during strip, if we adjusted
+ // the exponent; however, truncation avoids the strip loop]
+ newmant = new byte[have]; // shorten
+ java.lang.System.arraycopy((java.lang.Object) res.mant, 0, (java.lang.Object) newmant, 0, have);
+ res.mant = newmant;
+ }
+ return res.finish(set, true);
+ }
+
+ /* Report a conversion exception. */
+
+ private void bad(char s[]) {
+ throw new java.lang.NumberFormatException("Not a number:" + " " + java.lang.String.valueOf(s));
+ }
+
+ /*
+ * Report a bad argument to a method. Arg1 is method name Arg2 is argument position Arg3 is what was
+ * found
+ */
+
+ private void badarg(java.lang.String name, int pos, java.lang.String value) {
+ throw new java.lang.IllegalArgumentException("Bad argument" + " " + pos + " " + "to" + " " + name + ":" + " "
+ + value);
+ }
+
+ /*
+ * Extend byte array to given length, padding with 0s. If no extension is required then return the same
+ * array.
+ *
+ * Arg1 is the source byte array Arg2 is the new length (longer)
+ */
+
+ private static final byte[] extend(byte inarr[], int newlen) {
+ byte newarr[];
+ if (inarr.length == newlen)
+ return inarr;
+ newarr = new byte[newlen];
+ java.lang.System.arraycopy((java.lang.Object) inarr, 0, (java.lang.Object) newarr, 0, inarr.length);
+ // 0 padding is carried out by the JVM on allocation initialization
+ return newarr;
+ }
+
+ /*
+ * Add or subtract two >=0 integers in byte arrays This routine performs the calculation:
C=A+(BM)
+ * Where M is in the range -9 through +9 If M<0 then A>=B must be true, so the result is always
+ * non-negative.
+ *
+ * Leading zeros are not removed after a subtraction. The result is either the same length as the longer of A and B,
+ * or 1 longer than that (if a carry occurred).
+ *
+ * A is not altered unless Arg6 is 1. B is never altered.
+ *
+ * Arg1 is A Arg2 is A length to use (if longer than A, pad with 0's) Arg3 is B Arg4 is B length to use (if longer
+ * than B, pad with 0's) Arg5 is M, the multiplier Arg6 is 1 if A can be used to build the result (if it fits)
+ *
+ * This routine is severely performance-critical;any change here must be measured (timed) to assure no performance
+ * degradation.
+ */
+ // 1996.02.20 -- enhanced version of DMSRCN algorithm (1981)
+ // 1997.10.05 -- changed to byte arrays (from char arrays)
+ // 1998.07.01 -- changed to allow destructive reuse of LHS
+ // 1998.07.01 -- changed to allow virtual lengths for the arrays
+ // 1998.12.29 -- use lookaside for digit/carry calculation
+ // 1999.08.07 -- avoid multiply when mult=1, and make db an int
+ // 1999.12.22 -- special case m=-1, also drop 0 special case
+ private static final byte[] byteaddsub(byte a[], int avlen, byte b[], int bvlen, int m, boolean reuse) {
+ int alength;
+ int blength;
+ int ap;
+ int bp;
+ int maxarr;
+ byte reb[];
+ boolean quickm;
+ int digit;
+ int op = 0;
+ int dp90 = 0;
+ byte newarr[];
+ int i = 0;
+
+ // We'll usually be right if we assume no carry
+ alength = a.length; // physical lengths
+ blength = b.length; // ..
+ ap = avlen - 1; // -> final (rightmost) digit
+ bp = bvlen - 1; // ..
+ maxarr = bp;
+ if (maxarr < ap)
+ maxarr = ap;
+ reb = (byte[]) null; // result byte array
+ if (reuse)
+ if ((maxarr + 1) == alength)
+ reb = a; // OK to reuse A
+ if (reb == null)
+ reb = new byte[maxarr + 1]; // need new array
+
+ quickm = false; // 1 if no multiply needed
+ if (m == 1)
+ quickm = true; // most common
+ else if (m == (-1))
+ quickm = true; // also common
+
+ digit = 0; // digit, with carry or borrow
+ {
+ op = maxarr;
+ op: for (; op >= 0; op--) {
+ if (ap >= 0) {
+ if (ap < alength)
+ digit = digit + a[ap]; // within A
+ ap--;
+ }
+ if (bp >= 0) {
+ if (bp < blength) { // within B
+ if (quickm) {
+ if (m > 0)
+ digit = digit + b[bp]; // most common
+ else
+ digit = digit - b[bp]; // also common
+ } else
+ digit = digit + (b[bp] * m);
+ }
+ bp--;
+ }
+ /* result so far (digit) could be -90 through 99 */
+ if (digit < 10)
+ if (digit >= 0) {
+ do { // 0-9
+ reb[op] = (byte) digit;
+ digit = 0; // no carry
+ continue op;
+ } while (false);
+ }/* quick */
+ dp90 = digit + 90;
+ reb[op] = bytedig[dp90]; // this digit
+ digit = bytecar[dp90]; // carry or borrow
+ }
+ }/* op */
+
+ if (digit == 0)
+ return reb; // no carry
+ // following line will become an Assert, later
+ // if digit<0 then signal ArithmeticException("internal.error ["digit"]")
+
+ /* We have carry -- need to make space for the extra digit */
+ newarr = (byte[]) null;
+ if (reuse)
+ if ((maxarr + 2) == a.length)
+ newarr = a; // OK to reuse A
+ if (newarr == null)
+ newarr = new byte[maxarr + 2];
+ newarr[0] = (byte) digit; // the carried digit ..
+ // .. and all the rest [use local loop for short numbers]
+ if (maxarr < 10) {
+ int $24 = maxarr + 1;
+ i = 0;
+ for (; $24 > 0; $24--, i++) {
+ newarr[i + 1] = reb[i];
+ }
+ }/* i */
+ else
+ java.lang.System.arraycopy((java.lang.Object) reb, 0, (java.lang.Object) newarr, 1, maxarr + 1);
+ return newarr;
+ }
+
+ /*
+ * Initializer for digit array properties (lookaside). Returns the digit array, and initializes the
+ * carry array.
+ */
+
+ private static final byte[] diginit() {
+ byte work[];
+ int op = 0;
+ int digit = 0;
+ work = new byte[(90 + 99) + 1];
+ {
+ op = 0;
+ op: for (; op <= (90 + 99); op++) {
+ digit = op - 90;
+ if (digit >= 0) {
+ work[op] = (byte) (digit % 10);
+ bytecar[op] = (byte) (digit / 10); // calculate carry
+ continue op;
+ }
+ // borrowing...
+ digit = digit + 100; // yes, this is right [consider -50]
+ work[op] = (byte) (digit % 10);
+ bytecar[op] = (byte) ((digit / 10) - 10); // calculate borrow [NB: - after %]
+ }
+ }/* op */
+ return work;
+ }
+
+ /*
+ * Create a copy of BigDecimal object for local use. This does NOT make a copy of the mantissa array.
+ *
Arg1 is the BigDecimal to clone (non-null)
+ */
+
+ private static final com.ibm.icu.math.BigDecimal clone(com.ibm.icu.math.BigDecimal dec) {
+ com.ibm.icu.math.BigDecimal copy;
+ copy = new com.ibm.icu.math.BigDecimal();
+ copy.ind = dec.ind;
+ copy.exp = dec.exp;
+ copy.form = dec.form;
+ copy.mant = dec.mant;
+ return copy;
+ }
+
+ /*
+ * Check one or two numbers for lost digits. Arg1 is RHS (or null, if none) Arg2 is current DIGITS
+ * setting returns quietly or throws an exception
+ */
+
+ private void checkdigits(com.ibm.icu.math.BigDecimal rhs, int dig) {
+ if (dig == 0)
+ return; // don't check if digits=0
+ // first check lhs...
+ if (this.mant.length > dig)
+ if ((!(allzero(this.mant, dig))))
+ throw new java.lang.ArithmeticException("Too many digits:" + " " + this.toString());
+ if (rhs == null)
+ return; // monadic
+ if (rhs.mant.length > dig)
+ if ((!(allzero(rhs.mant, dig))))
+ throw new java.lang.ArithmeticException("Too many digits:" + " " + rhs.toString());
+ }
+
+ /*
+ * Round to specified digits, if necessary. Arg1 is requested MathContext [with length and rounding
+ * mode] returns this, for convenience
+ */
+
+ private com.ibm.icu.math.BigDecimal round(com.ibm.icu.math.MathContext set) {
+ return round(set.digits, set.roundingMode);
+ }
+
+ /*
+ * Round to specified digits, if necessary. Arg1 is requested length (digits to round to) [may be <=0 when
+ * called from format, dodivide, etc.] Arg2 is rounding mode returns this, for convenience
+ *
+ * ind and exp are adjusted, but not cleared for a mantissa of zero
+ *
+ * The length of the mantissa returned will be Arg1, except when Arg1 is 0, in which case the returned mantissa
+ * length will be 1.
+ */
+
+ private com.ibm.icu.math.BigDecimal round(int len, int mode) {
+ int adjust;
+ int sign;
+ byte oldmant[];
+ boolean reuse = false;
+ byte first = 0;
+ int increment;
+ byte newmant[] = null;
+ adjust = mant.length - len;
+ if (adjust <= 0)
+ return this; // nowt to do
+
+ exp = exp + adjust; // exponent of result
+ sign = (int) ind; // save [assumes -1, 0, 1]
+ oldmant = mant; // save
+ if (len > 0) {
+ // remove the unwanted digits
+ mant = new byte[len];
+ java.lang.System.arraycopy((java.lang.Object) oldmant, 0, (java.lang.Object) mant, 0, len);
+ reuse = true; // can reuse mantissa
+ first = oldmant[len]; // first of discarded digits
+ } else {/* len<=0 */
+ mant = ZERO.mant;
+ ind = iszero;
+ reuse = false; // cannot reuse mantissa
+ if (len == 0)
+ first = oldmant[0];
+ else
+ first = (byte) 0; // [virtual digit]
+ }
+
+ // decide rounding adjustment depending on mode, sign, and discarded digits
+ increment = 0; // bumper
+ {
+ do {/* select */
+ if (mode == ROUND_HALF_UP) { // default first [most common]
+ if (first >= 5)
+ increment = sign;
+ } else if (mode == ROUND_UNNECESSARY) { // default for setScale()
+ // discarding any non-zero digits is an error
+ if ((!(allzero(oldmant, len))))
+ throw new java.lang.ArithmeticException("Rounding necessary");
+ } else if (mode == ROUND_HALF_DOWN) { // 0.5000 goes down
+ if (first > 5)
+ increment = sign;
+ else if (first == 5)
+ if ((!(allzero(oldmant, len + 1))))
+ increment = sign;
+ } else if (mode == ROUND_HALF_EVEN) { // 0.5000 goes down if left digit even
+ if (first > 5)
+ increment = sign;
+ else if (first == 5) {
+ if ((!(allzero(oldmant, len + 1))))
+ increment = sign;
+ else /* 0.5000 */
+ if ((((mant[mant.length - 1]) % 2)) == 1)
+ increment = sign;
+ }
+ } else if (mode == ROUND_DOWN) {
+ // never increment
+ } else if (mode == ROUND_UP) { // increment if discarded non-zero
+ if ((!(allzero(oldmant, len))))
+ increment = sign;
+ } else if (mode == ROUND_CEILING) { // more positive
+ if (sign > 0)
+ if ((!(allzero(oldmant, len))))
+ increment = sign;
+ } else if (mode == ROUND_FLOOR) { // more negative
+ if (sign < 0)
+ if ((!(allzero(oldmant, len))))
+ increment = sign;
+ } else {
+ throw new java.lang.IllegalArgumentException("Bad round value:" + " " + mode);
+ }
+ } while (false);
+ }/* modes */
+
+ if (increment != 0) {
+ do {
+ if (ind == iszero) {
+ // we must not subtract from 0, but result is trivial anyway
+ mant = ONE.mant;
+ ind = (byte) increment;
+ } else {
+ // mantissa is non-0; we can safely add or subtract 1
+ if (ind == isneg)
+ increment = -increment;
+ newmant = byteaddsub(mant, mant.length, ONE.mant, 1, increment, reuse);
+ if (newmant.length > mant.length) { // had a carry
+ // drop rightmost digit and raise exponent
+ exp++;
+ // mant is already the correct length
+ java.lang.System.arraycopy((java.lang.Object) newmant, 0, (java.lang.Object) mant, 0,
+ mant.length);
+ } else
+ mant = newmant;
+ }
+ } while (false);
+ }/* bump */
+ // rounding can increase exponent significantly
+ if (exp > MaxExp)
+ throw new java.lang.ArithmeticException("Exponent Overflow:" + " " + exp);
+ return this;
+ }
+
+ /*
+ * Test if rightmost digits are all 0. Arg1 is a mantissa array to test Arg2 is the offset of first digit to
+ * check [may be negative; if so, digits to left are 0's] returns 1 if all the digits starting at Arg2 are 0
+ *
+ * Arg2 may be beyond array bounds, in which case 1 is returned
+ */
+
+ private static final boolean allzero(byte array[], int start) {
+ int i = 0;
+ if (start < 0)
+ start = 0;
+ {
+ int $25 = array.length - 1;
+ i = start;
+ for (; i <= $25; i++) {
+ if (array[i] != 0)
+ return false;
+ }
+ }/* i */
+ return true;
+ }
+
+ /*
+ * Carry out final checks and canonicalization This finishes off the current number by: 1. Rounding if
+ * necessary (NB: length includes leading zeros) 2. Stripping trailing zeros (if requested and \PLAIN) 3. Stripping
+ * leading zeros (always) 4. Selecting exponential notation (if required) 5. Converting a zero result to just '0'
+ * (if \PLAIN) In practice, these operations overlap and share code. It always sets form.
Arg1 is requested
+ * MathContext (length to round to, trigger, and FORM) Arg2 is 1 if trailing insignificant zeros should be removed
+ * after round (for division, etc.), provided that set.form isn't PLAIN. returns this, for convenience
+ */
+
+ private com.ibm.icu.math.BigDecimal finish(com.ibm.icu.math.MathContext set, boolean strip) {
+ int d = 0;
+ int i = 0;
+ byte newmant[] = null;
+ int mag = 0;
+ int sig = 0;
+ /* Round if mantissa too long and digits requested */
+ if (set.digits != 0)
+ if (this.mant.length > set.digits)
+ this.round(set);
+
+ /*
+ * If strip requested (and standard formatting), remove insignificant trailing zeros.
+ */
+ if (strip)
+ if (set.form != com.ibm.icu.math.MathContext.PLAIN) {
+ d = this.mant.length;
+ /* see if we need to drop any trailing zeros */
+ {
+ i = d - 1;
+ i: for (; i >= 1; i--) {
+ if (this.mant[i] != 0)
+ break i;
+ d--;
+ exp++;
+ }
+ }/* i */
+ if (d < this.mant.length) {/* need to reduce */
+ newmant = new byte[d];
+ java.lang.System.arraycopy((java.lang.Object) this.mant, 0, (java.lang.Object) newmant, 0, d);
+ this.mant = newmant;
+ }
+ }
+
+ form = (byte) com.ibm.icu.math.MathContext.PLAIN; // preset
+
+ /* Now check for leading- and all- zeros in mantissa */
+ {
+ int $26 = this.mant.length;
+ i = 0;
+ for (; $26 > 0; $26--, i++) {
+ if (this.mant[i] != 0) {
+ // non-0 result; ind will be correct
+ // remove leading zeros [e.g., after subtract]
+ if (i > 0) {
+ do {
+ newmant = new byte[this.mant.length - i];
+ java.lang.System.arraycopy((java.lang.Object) this.mant, i, (java.lang.Object) newmant, 0,
+ this.mant.length - i);
+ this.mant = newmant;
+ } while (false);
+ }/* delead */
+ // now determine form if not PLAIN
+ mag = exp + mant.length;
+ if (mag > 0) { // most common path
+ if (mag > set.digits)
+ if (set.digits != 0)
+ form = (byte) set.form;
+ if ((mag - 1) <= MaxExp)
+ return this; // no overflow; quick return
+ } else if (mag < (-5))
+ form = (byte) set.form;
+ /* check for overflow */
+ mag--;
+ if ((mag < MinExp) | (mag > MaxExp)) {
+ overflow: do {
+ // possible reprieve if form is engineering
+ if (form == com.ibm.icu.math.MathContext.ENGINEERING) {
+ sig = mag % 3; // leftover
+ if (sig < 0)
+ sig = 3 + sig; // negative exponent
+ mag = mag - sig; // exponent to use
+ // 1999.06.29: second test here must be MaxExp
+ if (mag >= MinExp)
+ if (mag <= MaxExp)
+ break overflow;
+ }
+ throw new java.lang.ArithmeticException("Exponent Overflow:" + " " + mag);
+ } while (false);
+ }/* overflow */
+ return this;
+ }
+ }
+ }/* i */
+
+ // Drop through to here only if mantissa is all zeros
+ ind = iszero;
+ {/* select */
+ if (set.form != com.ibm.icu.math.MathContext.PLAIN)
+ exp = 0; // standard result; go to '0'
+ else if (exp > 0)
+ exp = 0; // +ve exponent also goes to '0'
+ else {
+ // a plain number with -ve exponent; preserve and check exponent
+ if (exp < MinExp)
+ throw new java.lang.ArithmeticException("Exponent Overflow:" + " " + exp);
+ }
+ }
+ mant = ZERO.mant; // canonical mantissa
+ return this;
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/math/MathContext.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/math/MathContext.java
new file mode 100644
index 00000000000..010c083596b
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/math/MathContext.java
@@ -0,0 +1,601 @@
+/* Generated from 'MathContext.nrx' 8 Sep 2000 11:07:48 [v2.00] */
+/* Options: Binary Comments Crossref Format Java Logo Strictargs Strictcase Trace2 Verbose3 */
+package com.ibm.icu.math;
+
+/* ------------------------------------------------------------------ */
+/* MathContext -- Math context settings */
+/* ------------------------------------------------------------------ */
+/* Copyright IBM Corporation, 1997-2011. All Rights Reserved. */
+/* */
+/* The MathContext object encapsulates the settings used by the */
+/* BigDecimal class; it could also be used by other arithmetics. */
+/* ------------------------------------------------------------------ */
+/* Notes: */
+/* */
+/* 1. The properties are checked for validity on construction, so */
+/* the BigDecimal class may assume that they are correct. */
+/* ------------------------------------------------------------------ */
+/* Author: Mike Cowlishaw */
+/* 1997.09.03 Initial version (edited from netrexx.lang.RexxSet) */
+/* 1997.09.12 Add lostDigits property */
+/* 1998.05.02 Make the class immutable and final; drop set methods */
+/* 1998.06.05 Add Round (rounding modes) property */
+/* 1998.06.25 Rename from DecimalContext; allow digits=0 */
+/* 1998.10.12 change to com.ibm.icu.math package */
+/* 1999.02.06 add javadoc comments */
+/* 1999.03.05 simplify; changes from discussion with J. Bloch */
+/* 1999.03.13 1.00 release to IBM Centre for Java Technology */
+/* 1999.07.10 1.04 flag serialization unused */
+/* 2000.01.01 1.06 copyright update */
+/* ------------------------------------------------------------------ */
+
+
+
+
+/**
+ * The MathContext
immutable class encapsulates the
+ * settings understood by the operator methods of the {@link BigDecimal}
+ * class (and potentially other classes). Operator methods are those
+ * that effect an operation on a number or a pair of numbers.
+ *
+ * The settings, which are not base-dependent, comprise:
+ *
+ * digits
:
+ * the number of digits (precision) to be used for an operation
+ * form
:
+ * the form of any exponent that results from the operation
+ * lostDigits
:
+ * whether checking for lost digits is enabled
+ * roundingMode
:
+ * the algorithm to be used for rounding.
+ *
+ *
+ * When provided, a MathContext
object supplies the
+ * settings for an operation directly.
+ *
+ * When MathContext.DEFAULT
is provided for a
+ * MathContext
parameter then the default settings are used
+ * (9, SCIENTIFIC, false, ROUND_HALF_UP
).
+ *
+ * In the BigDecimal
class, all methods which accept a
+ * MathContext
object defaults) also have a version of the
+ * method which does not accept a MathContext parameter. These versions
+ * carry out unlimited precision fixed point arithmetic (as though the
+ * settings were (0, PLAIN, false, ROUND_HALF_UP
).
+ *
+ * The instance variables are shared with default access (so they are
+ * directly accessible to the BigDecimal
class), but must
+ * never be changed.
+ *
+ * The rounding mode constants have the same names and values as the
+ * constants of the same name in java.math.BigDecimal
, to
+ * maintain compatibility with earlier versions of
+ * BigDecimal
.
+ *
+ * @see BigDecimal
+ * @author Mike Cowlishaw
+ * @stable ICU 2.0
+ */
+
+public final class MathContext implements java.io.Serializable{
+ //private static final java.lang.String $0="MathContext.nrx";
+
+ /* ----- Properties ----- */
+ /* properties public constant */
+ /**
+ * Plain (fixed point) notation, without any exponent.
+ * Used as a setting to control the form of the result of a
+ * BigDecimal
operation.
+ * A zero result in plain form may have a decimal part of one or
+ * more zeros.
+ *
+ * @see #ENGINEERING
+ * @see #SCIENTIFIC
+ * @stable ICU 2.0
+ */
+ public static final int PLAIN=0; // [no exponent]
+
+ /**
+ * Standard floating point notation (with scientific exponential
+ * format, where there is one digit before any decimal point).
+ * Used as a setting to control the form of the result of a
+ * BigDecimal
operation.
+ * A zero result in plain form may have a decimal part of one or
+ * more zeros.
+ *
+ * @see #ENGINEERING
+ * @see #PLAIN
+ * @stable ICU 2.0
+ */
+ public static final int SCIENTIFIC=1; // 1 digit before .
+
+ /**
+ * Standard floating point notation (with engineering exponential
+ * format, where the power of ten is a multiple of 3).
+ * Used as a setting to control the form of the result of a
+ * BigDecimal
operation.
+ * A zero result in plain form may have a decimal part of one or
+ * more zeros.
+ *
+ * @see #PLAIN
+ * @see #SCIENTIFIC
+ * @stable ICU 2.0
+ */
+ public static final int ENGINEERING=2; // 1-3 digits before .
+
+ // The rounding modes match the original BigDecimal class values
+ /**
+ * Rounding mode to round to a more positive number.
+ * Used as a setting to control the rounding mode used during a
+ * BigDecimal
operation.
+ *
+ * If any of the discarded digits are non-zero then the result
+ * should be rounded towards the next more positive digit.
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_CEILING=2;
+
+ /**
+ * Rounding mode to round towards zero.
+ * Used as a setting to control the rounding mode used during a
+ * BigDecimal
operation.
+ *
+ * All discarded digits are ignored (truncated). The result is
+ * neither incremented nor decremented.
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_DOWN=1;
+
+ /**
+ * Rounding mode to round to a more negative number.
+ * Used as a setting to control the rounding mode used during a
+ * BigDecimal
operation.
+ *
+ * If any of the discarded digits are non-zero then the result
+ * should be rounded towards the next more negative digit.
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_FLOOR=3;
+
+ /**
+ * Rounding mode to round to nearest neighbor, where an equidistant
+ * value is rounded down.
+ * Used as a setting to control the rounding mode used during a
+ * BigDecimal
operation.
+ *
+ * If the discarded digits represent greater than half (0.5 times)
+ * the value of a one in the next position then the result should be
+ * rounded up (away from zero). Otherwise the discarded digits are
+ * ignored.
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_HALF_DOWN=5;
+
+ /**
+ * Rounding mode to round to nearest neighbor, where an equidistant
+ * value is rounded to the nearest even neighbor.
+ * Used as a setting to control the rounding mode used during a
+ * BigDecimal
operation.
+ *
+ * If the discarded digits represent greater than half (0.5 times)
+ * the value of a one in the next position then the result should be
+ * rounded up (away from zero). If they represent less than half,
+ * then the result should be rounded down.
+ *
+ * Otherwise (they represent exactly half) the result is rounded
+ * down if its rightmost digit is even, or rounded up if its
+ * rightmost digit is odd (to make an even digit).
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_HALF_EVEN=6;
+
+ /**
+ * Rounding mode to round to nearest neighbor, where an equidistant
+ * value is rounded up.
+ * Used as a setting to control the rounding mode used during a
+ * BigDecimal
operation.
+ *
+ * If the discarded digits represent greater than or equal to half
+ * (0.5 times) the value of a one in the next position then the result
+ * should be rounded up (away from zero). Otherwise the discarded
+ * digits are ignored.
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_HALF_UP=4;
+
+ /**
+ * Rounding mode to assert that no rounding is necessary.
+ * Used as a setting to control the rounding mode used during a
+ * BigDecimal
operation.
+ *
+ * Rounding (potential loss of information) is not permitted.
+ * If any of the discarded digits are non-zero then an
+ * ArithmeticException
should be thrown.
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_UNNECESSARY=7;
+
+ /**
+ * Rounding mode to round away from zero.
+ * Used as a setting to control the rounding mode used during a
+ * BigDecimal
operation.
+ *
+ * If any of the discarded digits are non-zero then the result will
+ * be rounded up (away from zero).
+ * @stable ICU 2.0
+ */
+ public static final int ROUND_UP=0;
+
+
+ /* properties shared */
+ /**
+ * The number of digits (precision) to be used for an operation.
+ * A value of 0 indicates that unlimited precision (as many digits
+ * as are required) will be used.
+ *
+ * The {@link BigDecimal} operator methods use this value to
+ * determine the precision of results.
+ * Note that leading zeros (in the integer part of a number) are
+ * never significant.
+ *
+ * digits
will always be non-negative.
+ *
+ * @serial
+ */
+ int digits;
+
+ /**
+ * The form of results from an operation.
+ *
+ * The {@link BigDecimal} operator methods use this value to
+ * determine the form of results, in particular whether and how
+ * exponential notation should be used.
+ *
+ * @see #ENGINEERING
+ * @see #PLAIN
+ * @see #SCIENTIFIC
+ * @serial
+ */
+ int form; // values for this must fit in a byte
+
+ /**
+ * Controls whether lost digits checking is enabled for an
+ * operation.
+ * Set to true
to enable checking, or
+ * to false
to disable checking.
+ *
+ * When enabled, the {@link BigDecimal} operator methods check
+ * the precision of their operand or operands, and throw an
+ * ArithmeticException
if an operand is more precise
+ * than the digits setting (that is, digits would be lost).
+ * When disabled, operands are rounded to the specified digits.
+ *
+ * @serial
+ */
+ boolean lostDigits;
+
+ /**
+ * The rounding algorithm to be used for an operation.
+ *
+ * The {@link BigDecimal} operator methods use this value to
+ * determine the algorithm to be used when non-zero digits have to
+ * be discarded in order to reduce the precision of a result.
+ * The value must be one of the public constants whose name starts
+ * with ROUND_
.
+ *
+ * @see #ROUND_CEILING
+ * @see #ROUND_DOWN
+ * @see #ROUND_FLOOR
+ * @see #ROUND_HALF_DOWN
+ * @see #ROUND_HALF_EVEN
+ * @see #ROUND_HALF_UP
+ * @see #ROUND_UNNECESSARY
+ * @see #ROUND_UP
+ * @serial
+ */
+ int roundingMode;
+
+ /* properties private constant */
+ // default settings
+ private static final int DEFAULT_FORM=SCIENTIFIC;
+ private static final int DEFAULT_DIGITS=9;
+ private static final boolean DEFAULT_LOSTDIGITS=false;
+ private static final int DEFAULT_ROUNDINGMODE=ROUND_HALF_UP;
+
+ /* properties private constant */
+
+ private static final int MIN_DIGITS=0; // smallest value for DIGITS.
+ private static final int MAX_DIGITS=999999999; // largest value for DIGITS. If increased,
+ // the BigDecimal class may need update.
+ // list of valid rounding mode values, most common two first
+ private static final int ROUNDS[]=new int[]{ROUND_HALF_UP,ROUND_UNNECESSARY,ROUND_CEILING,ROUND_DOWN,ROUND_FLOOR,ROUND_HALF_DOWN,ROUND_HALF_EVEN,ROUND_UP};
+
+
+ private static final java.lang.String ROUNDWORDS[]=new java.lang.String[]{"ROUND_HALF_UP","ROUND_UNNECESSARY","ROUND_CEILING","ROUND_DOWN","ROUND_FLOOR","ROUND_HALF_DOWN","ROUND_HALF_EVEN","ROUND_UP"}; // matching names of the ROUNDS values
+
+
+
+
+ /* properties private constant unused */
+
+ // Serialization version
+ private static final long serialVersionUID=7163376998892515376L;
+
+ /* properties public constant */
+ /**
+ * A MathContext
object initialized to the default
+ * settings for general-purpose arithmetic. That is,
+ * digits=9 form=SCIENTIFIC lostDigits=false
+ * roundingMode=ROUND_HALF_UP
.
+ *
+ * @see #SCIENTIFIC
+ * @see #ROUND_HALF_UP
+ * @stable ICU 2.0
+ */
+ public static final com.ibm.icu.math.MathContext DEFAULT=new com.ibm.icu.math.MathContext(DEFAULT_DIGITS,DEFAULT_FORM,DEFAULT_LOSTDIGITS,DEFAULT_ROUNDINGMODE);
+
+
+
+
+ /* ----- Constructors ----- */
+
+ /**
+ * Constructs a new MathContext
with a specified
+ * precision.
+ * The other settings are set to the default values
+ * (see {@link #DEFAULT}).
+ *
+ * An IllegalArgumentException
is thrown if the
+ * setdigits
parameter is out of range
+ * (<0 or >999999999).
+ *
+ * @param setdigits The int
digits setting
+ * for this MathContext
.
+ * @throws IllegalArgumentException parameter out of range.
+ * @stable ICU 2.0
+ */
+
+ public MathContext(int setdigits){
+ this(setdigits,DEFAULT_FORM,DEFAULT_LOSTDIGITS,DEFAULT_ROUNDINGMODE);
+ return;}
+
+
+ /**
+ * Constructs a new MathContext
with a specified
+ * precision and form.
+ * The other settings are set to the default values
+ * (see {@link #DEFAULT}).
+ *
+ * An IllegalArgumentException
is thrown if the
+ * setdigits
parameter is out of range
+ * (<0 or >999999999), or if the value given for the
+ * setform
parameter is not one of the appropriate
+ * constants.
+ *
+ * @param setdigits The int
digits setting
+ * for this MathContext
.
+ * @param setform The int
form setting
+ * for this MathContext
.
+ * @throws IllegalArgumentException parameter out of range.
+ * @stable ICU 2.0
+ */
+
+ public MathContext(int setdigits,int setform){
+ this(setdigits,setform,DEFAULT_LOSTDIGITS,DEFAULT_ROUNDINGMODE);
+ return;}
+
+ /**
+ * Constructs a new MathContext
with a specified
+ * precision, form, and lostDigits setting.
+ * The roundingMode setting is set to its default value
+ * (see {@link #DEFAULT}).
+ *
+ * An IllegalArgumentException
is thrown if the
+ * setdigits
parameter is out of range
+ * (<0 or >999999999), or if the value given for the
+ * setform
parameter is not one of the appropriate
+ * constants.
+ *
+ * @param setdigits The int
digits setting
+ * for this MathContext
.
+ * @param setform The int
form setting
+ * for this MathContext
.
+ * @param setlostdigits The boolean
lostDigits
+ * setting for this MathContext
.
+ * @throws IllegalArgumentException parameter out of range.
+ * @stable ICU 2.0
+ */
+
+ public MathContext(int setdigits,int setform,boolean setlostdigits){
+ this(setdigits,setform,setlostdigits,DEFAULT_ROUNDINGMODE);
+ return;}
+
+ /**
+ * Constructs a new MathContext
with a specified
+ * precision, form, lostDigits, and roundingMode setting.
+ *
+ * An IllegalArgumentException
is thrown if the
+ * setdigits
parameter is out of range
+ * (<0 or >999999999), or if the value given for the
+ * setform
or setroundingmode
parameters is
+ * not one of the appropriate constants.
+ *
+ * @param setdigits The int
digits setting
+ * for this MathContext
.
+ * @param setform The int
form setting
+ * for this MathContext
.
+ * @param setlostdigits The boolean
lostDigits
+ * setting for this MathContext
.
+ * @param setroundingmode The int
roundingMode setting
+ * for this MathContext
.
+ * @throws IllegalArgumentException parameter out of range.
+ * @stable ICU 2.0
+ */
+
+ public MathContext(int setdigits,int setform,boolean setlostdigits,int setroundingmode){super();
+
+
+ // set values, after checking
+ if (setdigits!=DEFAULT_DIGITS)
+ {
+ if (setdigitsMAX_DIGITS)
+ throw new java.lang.IllegalArgumentException("Digits too large:"+" "+setdigits);
+ }
+ {/*select*/
+ if (setform==SCIENTIFIC){
+ // [most common]
+ }else if (setform==ENGINEERING){
+ }else if (setform==PLAIN){
+ }else{
+ throw new java.lang.IllegalArgumentException("Bad form value:"+" "+setform);
+ }
+ }
+ if ((!(isValidRound(setroundingmode))))
+ throw new java.lang.IllegalArgumentException("Bad roundingMode value:"+" "+setroundingmode);
+ digits=setdigits;
+ form=setform;
+ lostDigits=setlostdigits; // [no bad value possible]
+ roundingMode=setroundingmode;
+ return;}
+
+ /**
+ * Returns the digits setting.
+ * This value is always non-negative.
+ *
+ * @return an int
which is the value of the digits
+ * setting
+ * @stable ICU 2.0
+ */
+
+ public int getDigits(){
+ return digits;
+ }
+
+ /**
+ * Returns the form setting.
+ * This will be one of
+ * {@link #ENGINEERING},
+ * {@link #PLAIN}, or
+ * {@link #SCIENTIFIC}.
+ *
+ * @return an int
which is the value of the form setting
+ * @stable ICU 2.0
+ */
+
+ public int getForm(){
+ return form;
+ }
+
+ /**
+ * Returns the lostDigits setting.
+ * This will be either true
(enabled) or
+ * false
(disabled).
+ *
+ * @return a boolean
which is the value of the lostDigits
+ * setting
+ * @stable ICU 2.0
+ */
+
+ public boolean getLostDigits(){
+ return lostDigits;
+ }
+
+ /**
+ * Returns the roundingMode setting.
+ * This will be one of
+ * {@link #ROUND_CEILING},
+ * {@link #ROUND_DOWN},
+ * {@link #ROUND_FLOOR},
+ * {@link #ROUND_HALF_DOWN},
+ * {@link #ROUND_HALF_EVEN},
+ * {@link #ROUND_HALF_UP},
+ * {@link #ROUND_UNNECESSARY}, or
+ * {@link #ROUND_UP}.
+ *
+ * @return an int
which is the value of the roundingMode
+ * setting
+ * @stable ICU 2.0
+ */
+
+ public int getRoundingMode(){
+ return roundingMode;
+ }
+
+ /** Returns the MathContext
as a readable string.
+ * The String
returned represents the settings of the
+ * MathContext
object as four blank-delimited words
+ * separated by a single blank and with no leading or trailing blanks,
+ * as follows:
+ *
+ *
+ * digits=
, immediately followed by
+ * the value of the digits setting as a numeric word.
+ *
+ * form=
, immediately followed by
+ * the value of the form setting as an uppercase word
+ * (one of SCIENTIFIC
, PLAIN
, or
+ * ENGINEERING
).
+ *
+ * lostDigits=
, immediately followed by
+ * the value of the lostDigits setting
+ * (1
if enabled, 0
if disabled).
+ *
+ * roundingMode=
, immediately followed by
+ * the value of the roundingMode setting as a word.
+ * This word will be the same as the name of the corresponding public
+ * constant.
+ *
+ *
+ * For example:
+ *
+ * digits=9 form=SCIENTIFIC lostDigits=0 roundingMode=ROUND_HALF_UP
+ *
+ *
+ * Additional words may be appended to the result of
+ * toString
in the future if more properties are added
+ * to the class.
+ *
+ * @return a String
representing the context settings.
+ * @stable ICU 2.0
+ */
+
+ public java.lang.String toString(){
+ java.lang.String formstr=null;
+ int r=0;
+ java.lang.String roundword=null;
+ {/*select*/
+ if (form==SCIENTIFIC)
+ formstr="SCIENTIFIC";
+ else if (form==ENGINEERING)
+ formstr="ENGINEERING";
+ else{
+ formstr="PLAIN";/* form=PLAIN */
+ }
+ }
+ {int $1=ROUNDS.length;r=0;r:for(;$1>0;$1--,r++){
+ if (roundingMode==ROUNDS[r])
+ {
+ roundword=ROUNDWORDS[r];
+ break r;
+ }
+ }
+ }/*r*/
+ return "digits="+digits+" "+"form="+formstr+" "+"lostDigits="+(lostDigits?"1":"0")+" "+"roundingMode="+roundword;
+ }
+
+
+ /* Test whether round is valid. */
+ // This could be made shared for use by BigDecimal for setScale.
+
+ private static boolean isValidRound(int testround){
+ int r=0;
+ {int $2=ROUNDS.length;for(r=0;$2>0;$2--,r++){
+ if (testround==ROUNDS[r])
+ return true;
+ }
+ }/*r*/
+ return false;
+ }
+ }
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/Bidi.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/Bidi.java
new file mode 100644
index 00000000000..c37d38500c8
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/Bidi.java
@@ -0,0 +1,2586 @@
+/*
+*******************************************************************************
+* Copyright (C) 2001-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*/
+
+/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
+ * algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre
+ * concept of RUNS_ONLY which is a double operation.
+ * It could be advantageous to divide this into 3 concepts:
+ * a) Operation: direct / inverse / RUNS_ONLY
+ * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L
+ * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
+ * This would allow combinations not possible today like RUNS_ONLY with
+ * NUMBERS_SPECIAL.
+ * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
+ * REMOVE_CONTROLS for the inverse step.
+ * Not all combinations would be supported, and probably not all do make sense.
+ * This would need to document which ones are supported and what are the
+ * fallbacks for unsupported combinations.
+ */
+
+//TODO: make sample program do something simple but real and complete
+
+package com.ibm.icu.text;
+
+import java.text.AttributedCharacterIterator;
+
+/**
+ *
+ *
Bidi algorithm for ICU
+ *
+ * This is an implementation of the Unicode Bidirectional algorithm. The
+ * algorithm is defined in the Unicode Standard Annex #9 ,
+ * version 13, also described in The Unicode Standard, Version 4.0 .
+ *
+ *
+ * Note: Libraries that perform a bidirectional algorithm and reorder strings
+ * accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and
+ * shaping (ArabicShaping) classes can be used at the core of such "Storage
+ * Layout Engines".
+ *
+ *
General remarks about the API:
+ *
+ * The "limit" of a sequence of characters is the position just after
+ * their last character, i.e., one more than that position.
+ *
+ *
+ * Some of the API methods provide access to "runs". Such a
+ * "run" is defined as a sequence of characters that are at the same
+ * embedding level after performing the Bidi algorithm.
+ *
+ *
+ *
Basic concept: paragraph
+ * A piece of text can be divided into several paragraphs by characters
+ * with the Bidi class Block Separator
. For handling of
+ * paragraphs, see:
+ *
+ * {@link #countParagraphs}
+ * {@link #getParaLevel}
+ * {@link #getParagraph}
+ * {@link #getParagraphByIndex}
+ *
+ *
+ * Basic concept: text direction
+ * The direction of a piece of text may be:
+ *
+ * {@link #LTR}
+ * {@link #RTL}
+ * {@link #MIXED}
+ *
+ *
+ * Basic concept: levels
+ *
+ * Levels in this API represent embedding levels according to the Unicode
+ * Bidirectional Algorithm.
+ * Their low-order bit (even/odd value) indicates the visual direction.
+ *
+ * Levels can be abstract values when used for the
+ * paraLevel
and embeddingLevels
+ * arguments of setPara()
; there:
+ *
+ * the high-order bit of an embeddingLevels[]
+ * value indicates whether the using application is
+ * specifying the level of a character to override whatever the
+ * Bidi implementation would resolve it to.
+ * paraLevel
can be set to the
+ * pseudo-level values LEVEL_DEFAULT_LTR
+ * and LEVEL_DEFAULT_RTL
.
+ *
+ *
+ * The related constants are not real, valid level values.
+ * DEFAULT_XXX
can be used to specify
+ * a default for the paragraph level for
+ * when the setPara()
method
+ * shall determine it but there is no
+ * strongly typed character in the input.
+ *
+ * Note that the value for LEVEL_DEFAULT_LTR
is even
+ * and the one for LEVEL_DEFAULT_RTL
is odd,
+ * just like with normal LTR and RTL level values -
+ * these special values are designed that way. Also, the implementation
+ * assumes that MAX_EXPLICIT_LEVEL is odd.
+ *
+ *
See Also:
+ * {@link #LEVEL_DEFAULT_LTR}
+ * {@link #LEVEL_DEFAULT_RTL}
+ * {@link #LEVEL_OVERRIDE}
+ * {@link #MAX_EXPLICIT_LEVEL}
+ * {@link #setPara}
+ *
+ *
+ * Basic concept: Reordering Mode
+ * Reordering mode values indicate which variant of the Bidi algorithm to
+ * use.
+ *
+ * See Also:
+ * {@link #setReorderingMode}
+ * {@link #REORDER_DEFAULT}
+ * {@link #REORDER_NUMBERS_SPECIAL}
+ * {@link #REORDER_GROUP_NUMBERS_WITH_R}
+ * {@link #REORDER_RUNS_ONLY}
+ * {@link #REORDER_INVERSE_NUMBERS_AS_L}
+ * {@link #REORDER_INVERSE_LIKE_DIRECT}
+ * {@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL}
+ *
+ *
+ * Basic concept: Reordering Options
+ * Reordering options can be applied during Bidi text transformations.
+ * See Also:
+ * {@link #setReorderingOptions}
+ * {@link #OPTION_DEFAULT}
+ * {@link #OPTION_INSERT_MARKS}
+ * {@link #OPTION_REMOVE_CONTROLS}
+ * {@link #OPTION_STREAMING}
+ *
+ *
+ *
+ * @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer)
+ * @stable ICU 3.8
+ *
+ *
+ * Sample code for the ICU Bidi API
+ *
+ * Rendering a paragraph with the ICU Bidi API
+ *
+ * This is (hypothetical) sample code that illustrates how the ICU Bidi API
+ * could be used to render a paragraph of text. Rendering code depends highly on
+ * the graphics system, therefore this sample code must make a lot of
+ * assumptions, which may or may not match any existing graphics system's
+ * properties.
+ *
+ *
+ * The basic assumptions are:
+ *
+ *
+ * Rendering is done from left to right on a horizontal line.
+ * A run of single-style, unidirectional text can be rendered at once.
+ *
+ * Such a run of text is passed to the graphics system with characters
+ * (code units) in logical order.
+ * The line-breaking algorithm is very complicated and Locale-dependent -
+ * and therefore its implementation omitted from this sample code.
+ *
+ *
+ *
+ *
+ * package com.ibm.icu.dev.test.bidi;
+ *
+ * import com.ibm.icu.text.Bidi;
+ * import com.ibm.icu.text.BidiRun;
+ *
+ * public class Sample {
+ *
+ * static final int styleNormal = 0;
+ * static final int styleSelected = 1;
+ * static final int styleBold = 2;
+ * static final int styleItalics = 4;
+ * static final int styleSuper=8;
+ * static final int styleSub = 16;
+ *
+ * static class StyleRun {
+ * int limit;
+ * int style;
+ *
+ * public StyleRun(int limit, int style) {
+ * this.limit = limit;
+ * this.style = style;
+ * }
+ * }
+ *
+ * static class Bounds {
+ * int start;
+ * int limit;
+ *
+ * public Bounds(int start, int limit) {
+ * this.start = start;
+ * this.limit = limit;
+ * }
+ * }
+ *
+ * static int getTextWidth(String text, int start, int limit,
+ * StyleRun[] styleRuns, int styleRunCount) {
+ * // simplistic way to compute the width
+ * return limit - start;
+ * }
+ *
+ * // set limit and StyleRun limit for a line
+ * // from text[start] and from styleRuns[styleRunStart]
+ * // using Bidi.getLogicalRun(...)
+ * // returns line width
+ * static int getLineBreak(String text, Bounds line, Bidi para,
+ * StyleRun styleRuns[], Bounds styleRun) {
+ * // dummy return
+ * return 0;
+ * }
+ *
+ * // render runs on a line sequentially, always from left to right
+ *
+ * // prepare rendering a new line
+ * static void startLine(byte textDirection, int lineWidth) {
+ * System.out.println();
+ * }
+ *
+ * // render a run of text and advance to the right by the run width
+ * // the text[start..limit-1] is always in logical order
+ * static void renderRun(String text, int start, int limit,
+ * byte textDirection, int style) {
+ * }
+ *
+ * // We could compute a cross-product
+ * // from the style runs with the directional runs
+ * // and then reorder it.
+ * // Instead, here we iterate over each run type
+ * // and render the intersections -
+ * // with shortcuts in simple (and common) cases.
+ * // renderParagraph() is the main function.
+ *
+ * // render a directional run with
+ * // (possibly) multiple style runs intersecting with it
+ * static void renderDirectionalRun(String text, int start, int limit,
+ * byte direction, StyleRun styleRuns[],
+ * int styleRunCount) {
+ * int i;
+ *
+ * // iterate over style runs
+ * if (direction == Bidi.LTR) {
+ * int styleLimit;
+ * for (i = 0; i < styleRunCount; ++i) {
+ * styleLimit = styleRuns[i].limit;
+ * if (start < styleLimit) {
+ * if (styleLimit > limit) {
+ * styleLimit = limit;
+ * }
+ * renderRun(text, start, styleLimit,
+ * direction, styleRuns[i].style);
+ * if (styleLimit == limit) {
+ * break;
+ * }
+ * start = styleLimit;
+ * }
+ * }
+ * } else {
+ * int styleStart;
+ *
+ * for (i = styleRunCount-1; i >= 0; --i) {
+ * if (i > 0) {
+ * styleStart = styleRuns[i-1].limit;
+ * } else {
+ * styleStart = 0;
+ * }
+ * if (limit >= styleStart) {
+ * if (styleStart < start) {
+ * styleStart = start;
+ * }
+ * renderRun(text, styleStart, limit, direction,
+ * styleRuns[i].style);
+ * if (styleStart == start) {
+ * break;
+ * }
+ * limit = styleStart;
+ * }
+ * }
+ * }
+ * }
+ *
+ * // the line object represents text[start..limit-1]
+ * static void renderLine(Bidi line, String text, int start, int limit,
+ * StyleRun styleRuns[], int styleRunCount) {
+ * byte direction = line.getDirection();
+ * if (direction != Bidi.MIXED) {
+ * // unidirectional
+ * if (styleRunCount <= 1) {
+ * renderRun(text, start, limit, direction, styleRuns[0].style);
+ * } else {
+ * renderDirectionalRun(text, start, limit, direction,
+ * styleRuns, styleRunCount);
+ * }
+ * } else {
+ * // mixed-directional
+ * int count, i;
+ * BidiRun run;
+ *
+ * try {
+ * count = line.countRuns();
+ * } catch (IllegalStateException e) {
+ * e.printStackTrace();
+ * return;
+ * }
+ * if (styleRunCount <= 1) {
+ * int style = styleRuns[0].style;
+ *
+ * // iterate over directional runs
+ * for (i = 0; i < count; ++i) {
+ * run = line.getVisualRun(i);
+ * renderRun(text, run.getStart(), run.getLimit(),
+ * run.getDirection(), style);
+ * }
+ * } else {
+ * // iterate over both directional and style runs
+ * for (i = 0; i < count; ++i) {
+ * run = line.getVisualRun(i);
+ * renderDirectionalRun(text, run.getStart(),
+ * run.getLimit(), run.getDirection(),
+ * styleRuns, styleRunCount);
+ * }
+ * }
+ * }
+ * }
+ *
+ * static void renderParagraph(String text, byte textDirection,
+ * StyleRun styleRuns[], int styleRunCount,
+ * int lineWidth) {
+ * int length = text.length();
+ * Bidi para = new Bidi();
+ * try {
+ * para.setPara(text,
+ * textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL
+ * : Bidi.LEVEL_DEFAULT_LTR,
+ * null);
+ * } catch (Exception e) {
+ * e.printStackTrace();
+ * return;
+ * }
+ * byte paraLevel = (byte)(1 & para.getParaLevel());
+ * StyleRun styleRun = new StyleRun(length, styleNormal);
+ *
+ * if (styleRuns == null || styleRunCount <= 0) {
+ * styleRuns = new StyleRun[1];
+ * styleRunCount = 1;
+ * styleRuns[0] = styleRun;
+ * }
+ * // assume styleRuns[styleRunCount-1].limit>=length
+ *
+ * int width = getTextWidth(text, 0, length, styleRuns, styleRunCount);
+ * if (width <= lineWidth) {
+ * // everything fits onto one line
+ *
+ * // prepare rendering a new line from either left or right
+ * startLine(paraLevel, width);
+ *
+ * renderLine(para, text, 0, length, styleRuns, styleRunCount);
+ * } else {
+ * // we need to render several lines
+ * Bidi line = new Bidi(length, 0);
+ * int start = 0, limit;
+ * int styleRunStart = 0, styleRunLimit;
+ *
+ * for (;;) {
+ * limit = length;
+ * styleRunLimit = styleRunCount;
+ * width = getLineBreak(text, new Bounds(start, limit),
+ * para, styleRuns,
+ * new Bounds(styleRunStart, styleRunLimit));
+ * try {
+ * line = para.setLine(start, limit);
+ * } catch (Exception e) {
+ * e.printStackTrace();
+ * return;
+ * }
+ * // prepare rendering a new line
+ * // from either left or right
+ * startLine(paraLevel, width);
+ *
+ * if (styleRunStart > 0) {
+ * int newRunCount = styleRuns.length - styleRunStart;
+ * StyleRun[] newRuns = new StyleRun[newRunCount];
+ * System.arraycopy(styleRuns, styleRunStart, newRuns, 0,
+ * newRunCount);
+ * renderLine(line, text, start, limit, newRuns,
+ * styleRunLimit - styleRunStart);
+ * } else {
+ * renderLine(line, text, start, limit, styleRuns,
+ * styleRunLimit - styleRunStart);
+ * }
+ * if (limit == length) {
+ * break;
+ * }
+ * start = limit;
+ * styleRunStart = styleRunLimit - 1;
+ * if (start >= styleRuns[styleRunStart].limit) {
+ * ++styleRunStart;
+ * }
+ * }
+ * }
+ * }
+ *
+ * public static void main(String[] args)
+ * {
+ * renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80);
+ * renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60);
+ * }
+ * }
+ *
+ *
+ */
+
+public class Bidi {
+
+ private java.text.Bidi bidi;
+
+ private Bidi(java.text.Bidi delegate) {
+ this.bidi = delegate;
+ }
+
+ /** Paragraph level setting
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 0 (left-to-right).
+ *
+ * If this value is used in conjunction with reordering modes
+ * REORDER_INVERSE_LIKE_DIRECT
or
+ * REORDER_INVERSE_FOR_NUMBERS_SPECIAL
, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, the direction will be LTR otherwise.
+ *
+ * If reordering option OPTION_INSERT_MARKS
is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see #REORDER_INVERSE_LIKE_DIRECT
+ * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 3.8
+ */
+ public static final byte LEVEL_DEFAULT_LTR = (byte)0x7e;
+
+ /** Paragraph level setting
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 1 (right-to-left).
+ *
+ * If this value is used in conjunction with reordering modes
+ * REORDER_INVERSE_LIKE_DIRECT
or
+ * REORDER_INVERSE_FOR_NUMBERS_SPECIAL
, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, or if the text contains no strong character;
+ * the direction will be LTR otherwise.
+ *
+ * If reordering option OPTION_INSERT_MARKS
is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see #REORDER_INVERSE_LIKE_DIRECT
+ * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 3.8
+ */
+ public static final byte LEVEL_DEFAULT_RTL = (byte)0x7f;
+
+ /**
+ * Maximum explicit embedding level.
+ * (The maximum resolved level can be up to MAX_EXPLICIT_LEVEL+1
).
+ * @stable ICU 3.8
+ */
+ public static final byte MAX_EXPLICIT_LEVEL = 61;
+
+ /**
+ * Bit flag for level input.
+ * Overrides directional properties.
+ * @stable ICU 3.8
+ */
+ public static final byte LEVEL_OVERRIDE = (byte)0x80;
+
+ /**
+ * Special value which can be returned by the mapping methods when a
+ * logical index has no corresponding visual index or vice-versa. This may
+ * happen for the logical-to-visual mapping of a Bidi control when option
+ * OPTION_REMOVE_CONTROLS
is
+ * specified. This can also happen for the visual-to-logical mapping of a
+ * Bidi mark (LRM or RLM) inserted by option
+ * OPTION_INSERT_MARKS
.
+ * @see #getVisualIndex
+ * @see #getVisualMap
+ * @see #getLogicalIndex
+ * @see #getLogicalMap
+ * @see #OPTION_INSERT_MARKS
+ * @see #OPTION_REMOVE_CONTROLS
+ * @stable ICU 3.8
+ */
+ public static final int MAP_NOWHERE = -1;
+
+ /**
+ * All left-to-right text.
+ * @stable ICU 3.8
+ */
+ public static final byte LTR = 0;
+
+ /**
+ * All right-to-left text.
+ * @stable ICU 3.8
+ */
+ public static final byte RTL = 1;
+
+ /**
+ * Mixed-directional text.
+ * @stable ICU 3.8
+ */
+ public static final byte MIXED = 2;
+
+ /**
+ * option bit for writeReordered():
+ * keep combining characters after their base characters in RTL runs
+ *
+ * @see #writeReordered
+ * @stable ICU 3.8
+ */
+ public static final short KEEP_BASE_COMBINING = 1;
+
+ /**
+ * option bit for writeReordered():
+ * replace characters with the "mirrored" property in RTL runs
+ * by their mirror-image mappings
+ *
+ * @see #writeReordered
+ * @stable ICU 3.8
+ */
+ public static final short DO_MIRRORING = 2;
+
+ /**
+ * option bit for writeReordered():
+ * surround the run with LRMs if necessary;
+ * this is part of the approximate "inverse Bidi" algorithm
+ *
+ *
This option does not imply corresponding adjustment of the index
+ * mappings.
+ *
+ * @see #setInverse
+ * @see #writeReordered
+ * @stable ICU 3.8
+ */
+ public static final short INSERT_LRM_FOR_NUMERIC = 4;
+
+ /**
+ * option bit for writeReordered():
+ * remove Bidi control characters
+ * (this does not affect INSERT_LRM_FOR_NUMERIC)
+ *
+ * This option does not imply corresponding adjustment of the index
+ * mappings.
+ *
+ * @see #writeReordered
+ * @see #INSERT_LRM_FOR_NUMERIC
+ * @stable ICU 3.8
+ */
+ public static final short REMOVE_BIDI_CONTROLS = 8;
+
+ /**
+ * option bit for writeReordered():
+ * write the output in reverse order
+ *
+ * This has the same effect as calling writeReordered()
+ * first without this option, and then calling
+ * writeReverse()
without mirroring.
+ * Doing this in the same step is faster and avoids a temporary buffer.
+ * An example for using this option is output to a character terminal that
+ * is designed for RTL scripts and stores text in reverse order.
+ *
+ * @see #writeReordered
+ * @stable ICU 3.8
+ */
+ public static final short OUTPUT_REVERSE = 16;
+
+ /** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode.
+ * @see #setReorderingMode
+ * @stable ICU 3.8
+ */
+ public static final short REORDER_DEFAULT = 0;
+
+ /** Reordering mode: Logical to Visual algorithm which handles numbers in
+ * a way which mimicks the behavior of Windows XP.
+ * @see #setReorderingMode
+ * @stable ICU 3.8
+ */
+ public static final short REORDER_NUMBERS_SPECIAL = 1;
+
+ /** Reordering mode: Logical to Visual algorithm grouping numbers with
+ * adjacent R characters (reversible algorithm).
+ * @see #setReorderingMode
+ * @stable ICU 3.8
+ */
+ public static final short REORDER_GROUP_NUMBERS_WITH_R = 2;
+
+ /** Reordering mode: Reorder runs only to transform a Logical LTR string
+ * to the logical RTL string with the same display, or vice-versa.
+ * If this mode is set together with option
+ * OPTION_INSERT_MARKS
, some Bidi controls in the source
+ * text may be removed and other controls may be added to produce the
+ * minimum combination which has the required display.
+ * @see #OPTION_INSERT_MARKS
+ * @see #setReorderingMode
+ * @stable ICU 3.8
+ */
+ public static final short REORDER_RUNS_ONLY = 3;
+
+ /** Reordering mode: Visual to Logical algorithm which handles numbers
+ * like L (same algorithm as selected by setInverse(true)
.
+ * @see #setInverse
+ * @see #setReorderingMode
+ * @stable ICU 3.8
+ */
+ public static final short REORDER_INVERSE_NUMBERS_AS_L = 4;
+
+ /** Reordering mode: Visual to Logical algorithm equivalent to the regular
+ * Logical to Visual algorithm.
+ * @see #setReorderingMode
+ * @stable ICU 3.8
+ */
+ public static final short REORDER_INVERSE_LIKE_DIRECT = 5;
+
+ /** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the
+ * REORDER_NUMBERS_SPECIAL
Bidi algorithm.
+ * @see #setReorderingMode
+ * @stable ICU 3.8
+ */
+ public static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;
+
+ /**
+ * Option value for setReorderingOptions
:
+ * disable all the options which can be set with this method
+ * @see #setReorderingOptions
+ * @stable ICU 3.8
+ */
+ public static final int OPTION_DEFAULT = 0;
+
+ /**
+ * Option bit for setReorderingOptions
:
+ * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
+ * a reordering to a Logical order
+ *
+ * This option must be set or reset before calling
+ * setPara
.
+ *
+ * This option is significant only with reordering modes which generate
+ * a result with Logical order, specifically.
+ *
+ * REORDER_RUNS_ONLY
+ * REORDER_INVERSE_NUMBERS_AS_L
+ * REORDER_INVERSE_LIKE_DIRECT
+ * REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ *
+ *
+ * If this option is set in conjunction with reordering mode
+ * REORDER_INVERSE_NUMBERS_AS_L
or with calling
+ * setInverse(true)
, it implies option
+ * INSERT_LRM_FOR_NUMERIC
in calls to method
+ * writeReordered()
.
+ *
+ * For other reordering modes, a minimum number of LRM or RLM characters
+ * will be added to the source text after reordering it so as to ensure
+ * round trip, i.e. when applying the inverse reordering mode on the
+ * resulting logical text with removal of Bidi marks
+ * (option OPTION_REMOVE_CONTROLS
set before calling
+ * setPara()
or option
+ * REMOVE_BIDI_CONTROLS
in
+ * writeReordered
), the result will be identical to the
+ * source text in the first transformation.
+ *
+ *
This option will be ignored if specified together with option
+ * OPTION_REMOVE_CONTROLS
. It inhibits option
+ * REMOVE_BIDI_CONTROLS
in calls to method
+ * writeReordered()
and it implies option
+ * INSERT_LRM_FOR_NUMERIC
in calls to method
+ * writeReordered()
if the reordering mode is
+ * REORDER_INVERSE_NUMBERS_AS_L
.
+ *
+ * @see #setReorderingMode
+ * @see #setReorderingOptions
+ * @see #INSERT_LRM_FOR_NUMERIC
+ * @see #REMOVE_BIDI_CONTROLS
+ * @see #OPTION_REMOVE_CONTROLS
+ * @see #REORDER_RUNS_ONLY
+ * @see #REORDER_INVERSE_NUMBERS_AS_L
+ * @see #REORDER_INVERSE_LIKE_DIRECT
+ * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 3.8
+ */
+ public static final int OPTION_INSERT_MARKS = 1;
+
+ /**
+ * Option bit for setReorderingOptions
:
+ * remove Bidi control characters
+ *
+ * This option must be set or reset before calling
+ * setPara
.
+ *
+ * This option nullifies option
+ * OPTION_INSERT_MARKS
. It inhibits option
+ * INSERT_LRM_FOR_NUMERIC
in calls to method
+ * writeReordered()
and it implies option
+ * REMOVE_BIDI_CONTROLS
in calls to that method.
+ *
+ * @see #setReorderingMode
+ * @see #setReorderingOptions
+ * @see #OPTION_INSERT_MARKS
+ * @see #INSERT_LRM_FOR_NUMERIC
+ * @see #REMOVE_BIDI_CONTROLS
+ * @stable ICU 3.8
+ */
+ public static final int OPTION_REMOVE_CONTROLS = 2;
+
+ /**
+ * Option bit for setReorderingOptions
:
+ * process the output as part of a stream to be continued
+ *
+ * This option must be set or reset before calling
+ * setPara
.
+ *
+ * This option specifies that the caller is interested in processing
+ * large text object in parts. The results of the successive calls are
+ * expected to be concatenated by the caller. Only the call for the last
+ * part will have this option bit off.
+ *
+ * When this option bit is on, setPara()
may process
+ * less than the full source text in order to truncate the text at a
+ * meaningful boundary. The caller should call
+ * getProcessedLength()
immediately after calling
+ * setPara()
in order to determine how much of the source
+ * text has been processed. Source text beyond that length should be
+ * resubmitted in following calls to setPara
. The
+ * processed length may be less than the length of the source text if a
+ * character preceding the last character of the source text constitutes a
+ * reasonable boundary (like a block separator) for text to be continued.
+ * If the last character of the source text constitutes a reasonable
+ * boundary, the whole text will be processed at once.
+ * If nowhere in the source text there exists
+ * such a reasonable boundary, the processed length will be zero.
+ * The caller should check for such an occurrence and do one of the following:
+ *
submit a larger amount of text with a better chance to include
+ * a reasonable boundary.
+ * resubmit the same text after turning off option
+ * OPTION_STREAMING
.
+ * In all cases, this option should be turned off before processing the last
+ * part of the text.
+ *
+ * When the OPTION_STREAMING
option is used, it is
+ * recommended to call orderParagraphsLTR(true)
before calling
+ * setPara()
so that later paragraphs may be concatenated to
+ * previous paragraphs on the right.
+ *
+ *
+ * @see #setReorderingMode
+ * @see #setReorderingOptions
+ * @see #getProcessedLength
+ * @stable ICU 3.8
+ */
+ public static final int OPTION_STREAMING = 4;
+
+ /**
+ * Value returned by BidiClassifier
when there is no need to
+ * override the standard Bidi class for a given code point.
+ * @see BidiClassifier
+ * @stable ICU 3.8
+ */
+ public static final int CLASS_DEFAULT = 19; //UCharacterDirection.CHAR_DIRECTION_COUNT;
+
+ /**
+ * Allocate a Bidi
object.
+ * Such an object is initially empty. It is assigned
+ * the Bidi properties of a piece of text containing one or more paragraphs
+ * by setPara()
+ * or the Bidi properties of a line within a paragraph by
+ * setLine()
.
+ * This object can be reused.
+ * setPara()
and setLine()
will allocate
+ * additional memory for internal structures as necessary.
+ *
+ * @stable ICU 3.8
+ */
+ public Bidi()
+ {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Allocate a Bidi
object with preallocated memory
+ * for internal structures.
+ * This method provides a Bidi
object like the default constructor
+ * but it also preallocates memory for internal structures
+ * according to the sizings supplied by the caller.
+ * The preallocation can be limited to some of the internal memory
+ * by setting some values to 0 here. That means that if, e.g.,
+ * maxRunCount
cannot be reasonably predetermined and should not
+ * be set to maxLength
(the only failproof value) to avoid
+ * wasting memory, then maxRunCount
could be set to 0 here
+ * and the internal structures that are associated with it will be allocated
+ * on demand, just like with the default constructor.
+ *
+ * @param maxLength is the maximum text or line length that internal memory
+ * will be preallocated for. An attempt to associate this object with a
+ * longer text will fail, unless this value is 0, which leaves the allocation
+ * up to the implementation.
+ *
+ * @param maxRunCount is the maximum anticipated number of same-level runs
+ * that internal memory will be preallocated for. An attempt to access
+ * visual runs on an object that was not preallocated for as many runs
+ * as the text was actually resolved to will fail,
+ * unless this value is 0, which leaves the allocation up to the implementation.
+ * The number of runs depends on the actual text and maybe anywhere between
+ * 1 and maxLength
. It is typically small.
+ *
+ * @throws IllegalArgumentException if maxLength or maxRunCount is less than 0
+ * @stable ICU 3.8
+ */
+ public Bidi(int maxLength, int maxRunCount)
+ {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Modify the operation of the Bidi algorithm such that it
+ * approximates an "inverse Bidi" algorithm. This method
+ * must be called before setPara()
.
+ *
+ *
The normal operation of the Bidi algorithm as described
+ * in the Unicode Technical Report is to take text stored in logical
+ * (keyboard, typing) order and to determine the reordering of it for visual
+ * rendering.
+ * Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * to logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi" and that the current implementation provides only an
+ * approximation of "inverse Bidi".
+ *
+ * With isInversed
set to true
,
+ * this method changes the behavior of some of the subsequent methods
+ * in a way that they can be used for the inverse Bidi algorithm.
+ * Specifically, runs of text with numeric characters will be treated in a
+ * special way and may need to be surrounded with LRM characters when they are
+ * written in reordered sequence.
+ *
+ * Output runs should be retrieved using getVisualRun()
.
+ * Since the actual input for "inverse Bidi" is visually ordered text and
+ * getVisualRun()
gets the reordered runs, these are actually
+ * the runs of the logically ordered output.
+ *
+ * Calling this method with argument isInverse
set to
+ * true
is equivalent to calling setReorderingMode
+ * with argument reorderingMode
+ * set to REORDER_INVERSE_NUMBERS_AS_L
.
+ * Calling this method with argument isInverse
set to
+ * false
is equivalent to calling setReorderingMode
+ * with argument reorderingMode
+ * set to REORDER_DEFAULT
.
+ *
+ * @param isInverse specifies "forward" or "inverse" Bidi operation.
+ *
+ * @see #setPara
+ * @see #writeReordered
+ * @see #setReorderingMode
+ * @see #REORDER_INVERSE_NUMBERS_AS_L
+ * @see #REORDER_DEFAULT
+ * @stable ICU 3.8
+ */
+ public void setInverse(boolean isInverse) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Is this Bidi
object set to perform the inverse Bidi
+ * algorithm?
+ *
Note: calling this method after setting the reordering mode with
+ * setReorderingMode
will return true
if the
+ * reordering mode was set to
+ * REORDER_INVERSE_NUMBERS_AS_L, false
+ * for all other values.
+ *
+ * @return true
if the Bidi
object is set to
+ * perform the inverse Bidi algorithm by handling numbers as L.
+ *
+ * @see #setInverse
+ * @see #setReorderingMode
+ * @see #REORDER_INVERSE_NUMBERS_AS_L
+ * @stable ICU 3.8
+ */
+ public boolean isInverse() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Modify the operation of the Bidi algorithm such that it implements some
+ * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
+ * algorithm, depending on different values of the "reordering mode".
+ * This method must be called before setPara()
, and stays in
+ * effect until called again with a different argument.
+ *
+ * The normal operation of the Bidi algorithm as described in the Unicode
+ * Standard Annex #9 is to take text stored in logical (keyboard, typing)
+ * order and to determine how to reorder it for visual rendering.
+ *
+ * With the reordering mode set to a value other than
+ * REORDER_DEFAULT
, this method changes the behavior of some of
+ * the subsequent methods in a way such that they implement an inverse Bidi
+ * algorithm or some other algorithm variants.
+ *
+ * Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * into logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi", so a number of variants are implemented here.
+ *
+ * In other cases, it may be desirable to emulate some variant of the
+ * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
+ * Logical to Logical transformation.
+ *
+ *
+ * When the Reordering Mode is set to
+ * REORDER_DEFAULT
,
+ * the standard Bidi Logical to Visual algorithm is applied.
+ *
+ * When the reordering mode is set to
+ * REORDER_NUMBERS_SPECIAL
,
+ * the algorithm used to perform Bidi transformations when calling
+ * setPara
should approximate the algorithm used in Microsoft
+ * Windows XP rather than strictly conform to the Unicode Bidi algorithm.
+ *
+ * The differences between the basic algorithm and the algorithm addressed
+ * by this option are as follows:
+ *
+ * Within text at an even embedding level, the sequence "123AB"
+ * (where AB represent R or AL letters) is transformed to "123BA" by the
+ * Unicode algorithm and to "BA123" by the Windows algorithm.
+ *
+ * Arabic-Indic numbers (AN) are handled by the Windows algorithm just
+ * like regular numbers (EN).
+ *
+ *
+ * When the reordering mode is set to
+ * REORDER_GROUP_NUMBERS_WITH_R
,
+ * numbers located between LTR text and RTL text are associated with the RTL
+ * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
+ * upper case letters represent RTL characters) will be transformed to
+ * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
+ * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
+ * This makes the algorithm reversible and makes it useful when round trip
+ * (from visual to logical and back to visual) must be achieved without
+ * adding LRM characters. However, this is a variation from the standard
+ * Unicode Bidi algorithm.
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.
+ *
+ * When the reordering mode is set to
+ * REORDER_RUNS_ONLY
,
+ * a "Logical to Logical" transformation must be performed:
+ *
+ * If the default text level of the source text (argument
+ * paraLevel
in setPara
) is even, the source text
+ * will be handled as LTR logical text and will be transformed to the RTL
+ * logical text which has the same LTR visual display.
+ * If the default level of the source text is odd, the source text
+ * will be handled as RTL logical text and will be transformed to the
+ * LTR logical text which has the same LTR visual display.
+ *
+ * This mode may be needed when logical text which is basically Arabic or
+ * Hebrew, with possible included numbers or phrases in English, has to be
+ * displayed as if it had an even embedding level (this can happen if the
+ * displaying application treats all text as if it was basically LTR).
+ *
+ * This mode may also be needed in the reverse case, when logical text which
+ * is basically English, with possible included phrases in Arabic or Hebrew,
+ * has to be displayed as if it had an odd embedding level.
+ *
+ * Both cases could be handled by adding LRE or RLE at the head of the
+ * text, if the display subsystem supports these formatting controls. If it
+ * does not, the problem may be handled by transforming the source text in
+ * this mode before displaying it, so that it will be displayed properly.
+ *
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.
+ *
+ * When the reordering mode is set to
+ * REORDER_INVERSE_NUMBERS_AS_L
, an "inverse Bidi"
+ * algorithm is applied.
+ * Runs of text with numeric characters will be treated like LTR letters and
+ * may need to be surrounded with LRM characters when they are written in
+ * reordered sequence (the option INSERT_LRM_FOR_NUMERIC
can
+ * be used with method writeReordered
to this end. This mode
+ * is equivalent to calling setInverse()
with
+ * argument isInverse
set to true
.
+ *
+ * When the reordering mode is set to
+ * REORDER_INVERSE_LIKE_DIRECT
, the "direct" Logical to
+ * Visual Bidi algorithm is used as an approximation of an "inverse Bidi"
+ * algorithm. This mode is similar to mode
+ * REORDER_INVERSE_NUMBERS_AS_L
but is closer to the
+ * regular Bidi algorithm.
+ *
+ * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
+ * upper case represents RTL characters) will be transformed to
+ * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
+ * with mode REORDER_INVERSE_NUMBERS_AS_L
.
+ * When used in conjunction with option
+ * OPTION_INSERT_MARKS
, this mode generally
+ * adds Bidi marks to the output significantly more sparingly than mode
+ * REORDER_INVERSE_NUMBERS_AS_L
. with option
+ * INSERT_LRM_FOR_NUMERIC
in calls to
+ * writeReordered
.
+ *
+ * When the reordering mode is set to
+ * REORDER_INVERSE_FOR_NUMBERS_SPECIAL
, the Logical to Visual
+ * Bidi algorithm used in Windows XP is used as an approximation of an "inverse
+ * Bidi" algorithm.
+ *
+ * For example, an LTR paragraph with the content "abc FED123" (where
+ * upper case represents RTL characters) will be transformed to
+ * "abc 123DEF.
+ *
+ *
+ * In all the reordering modes specifying an "inverse Bidi" algorithm
+ * (i.e. those with a name starting with REORDER_INVERSE
),
+ * output runs should be retrieved using getVisualRun()
, and
+ * the output text with writeReordered()
. The caller should
+ * keep in mind that in "inverse Bidi" modes the input is actually visually
+ * ordered text and reordered output returned by getVisualRun()
+ * or writeReordered()
are actually runs or character string
+ * of logically ordered output.
+ * For all the "inverse Bidi" modes, the source text should not contain
+ * Bidi control characters other than LRM or RLM.
+ *
+ * Note that option OUTPUT_REVERSE
of
+ * writeReordered
has no useful meaning and should not be used
+ * in conjunction with any value of the reordering mode specifying "inverse
+ * Bidi" or with value REORDER_RUNS_ONLY
.
+ *
+ * @param reorderingMode specifies the required variant of the Bidi
+ * algorithm.
+ *
+ * @see #setInverse
+ * @see #setPara
+ * @see #writeReordered
+ * @see #INSERT_LRM_FOR_NUMERIC
+ * @see #OUTPUT_REVERSE
+ * @see #REORDER_DEFAULT
+ * @see #REORDER_NUMBERS_SPECIAL
+ * @see #REORDER_GROUP_NUMBERS_WITH_R
+ * @see #REORDER_RUNS_ONLY
+ * @see #REORDER_INVERSE_NUMBERS_AS_L
+ * @see #REORDER_INVERSE_LIKE_DIRECT
+ * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 3.8
+ */
+ public void setReorderingMode(int reorderingMode) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * What is the requested reordering mode for a given Bidi object?
+ *
+ * @return the current reordering mode of the Bidi object
+ *
+ * @see #setReorderingMode
+ * @stable ICU 3.8
+ */
+ public int getReorderingMode() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Specify which of the reordering options should be applied during Bidi
+ * transformations.
+ *
+ * @param options A combination of zero or more of the following
+ * reordering options:
+ * OPTION_DEFAULT
, OPTION_INSERT_MARKS
,
+ * OPTION_REMOVE_CONTROLS
, OPTION_STREAMING
.
+ *
+ * @see #getReorderingOptions
+ * @see #OPTION_DEFAULT
+ * @see #OPTION_INSERT_MARKS
+ * @see #OPTION_REMOVE_CONTROLS
+ * @see #OPTION_STREAMING
+ * @stable ICU 3.8
+ */
+ public void setReorderingOptions(int options) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * What are the reordering options applied to a given Bidi object?
+ *
+ * @return the current reordering options of the Bidi object
+ *
+ * @see #setReorderingOptions
+ * @stable ICU 3.8
+ */
+ public int getReorderingOptions() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Perform the Unicode Bidi algorithm. It is defined in the
+ * Unicode Standard Annex #9 ,
+ * version 13,
+ * also described in The Unicode Standard, Version 4.0 .
+ *
+ * This method takes a piece of plain text containing one or more paragraphs,
+ * with or without externally specified embedding levels from styled
+ * text and computes the left-right-directionality of each character.
+ *
+ * If the entire text is all of the same directionality, then
+ * the method may not perform all the steps described by the algorithm,
+ * i.e., some levels may not be the same as if all steps were performed.
+ * This is not relevant for unidirectional text.
+ * For example, in pure LTR text with numbers the numbers would get
+ * a resolved level of 2 higher than the surrounding text according to
+ * the algorithm. This implementation may set all resolved levels to
+ * the same value in such a case.
+ *
+ * The text can be composed of multiple paragraphs. Occurrence of a block
+ * separator in the text terminates a paragraph, and whatever comes next starts
+ * a new paragraph. The exception to this rule is when a Carriage Return (CR)
+ * is followed by a Line Feed (LF). Both CR and LF are block separators, but
+ * in that case, the pair of characters is considered as terminating the
+ * preceding paragraph, and a new paragraph will be started by a character
+ * coming after the LF.
+ *
+ * Although the text is passed here as a String
, it is
+ * stored internally as an array of characters. Therefore the
+ * documentation will refer to indexes of the characters in the text.
+ *
+ * @param text contains the text that the Bidi algorithm will be performed
+ * on. This text can be retrieved with getText()
or
+ * getTextAsString
.
+ *
+ * @param paraLevel specifies the default level for the text;
+ * it is typically 0 (LTR) or 1 (RTL).
+ * If the method shall determine the paragraph level from the text,
+ * then paraLevel
can be set to
+ * either LEVEL_DEFAULT_LTR
+ * or LEVEL_DEFAULT_RTL
; if the text contains multiple
+ * paragraphs, the paragraph level shall be determined separately for
+ * each paragraph; if a paragraph does not include any strongly typed
+ * character, then the desired default is used (0 for LTR or 1 for RTL).
+ * Any other value between 0 and MAX_EXPLICIT_LEVEL
+ * is also valid, with odd levels indicating RTL.
+ *
+ * @param embeddingLevels (in) may be used to preset the embedding and override levels,
+ * ignoring characters like LRE and PDF in the text.
+ * A level overrides the directional property of its corresponding
+ * (same index) character if the level has the
+ * LEVEL_OVERRIDE
bit set.
+ * Except for that bit, it must be
+ * paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL
,
+ * with one exception: a level of zero may be specified for a
+ * paragraph separator even if paraLevel>0
when multiple
+ * paragraphs are submitted in the same call to setPara()
.
+ * Caution: A reference to this array, not a copy
+ * of the levels, will be stored in the Bidi
object;
+ * the embeddingLevels
+ * should not be modified to avoid unexpected results on subsequent
+ * Bidi operations. However, the setPara()
and
+ * setLine()
methods may modify some or all of the
+ * levels.
+ * Note: the embeddingLevels
array must
+ * have one entry for each character in text
.
+ *
+ * @throws IllegalArgumentException if the values in embeddingLevels are
+ * not within the allowed range
+ *
+ * @see #LEVEL_DEFAULT_LTR
+ * @see #LEVEL_DEFAULT_RTL
+ * @see #LEVEL_OVERRIDE
+ * @see #MAX_EXPLICIT_LEVEL
+ * @stable ICU 3.8
+ */
+ public void setPara(String text, byte paraLevel, byte[] embeddingLevels)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Perform the Unicode Bidi algorithm. It is defined in the
+ * Unicode Standard Annex #9 ,
+ * version 13,
+ * also described in The Unicode Standard, Version 4.0 .
+ *
+ * This method takes a piece of plain text containing one or more paragraphs,
+ * with or without externally specified embedding levels from styled
+ * text and computes the left-right-directionality of each character.
+ *
+ * If the entire text is all of the same directionality, then
+ * the method may not perform all the steps described by the algorithm,
+ * i.e., some levels may not be the same as if all steps were performed.
+ * This is not relevant for unidirectional text.
+ * For example, in pure LTR text with numbers the numbers would get
+ * a resolved level of 2 higher than the surrounding text according to
+ * the algorithm. This implementation may set all resolved levels to
+ * the same value in such a case.
+ *
+ * The text can be composed of multiple paragraphs. Occurrence of a block
+ * separator in the text terminates a paragraph, and whatever comes next starts
+ * a new paragraph. The exception to this rule is when a Carriage Return (CR)
+ * is followed by a Line Feed (LF). Both CR and LF are block separators, but
+ * in that case, the pair of characters is considered as terminating the
+ * preceding paragraph, and a new paragraph will be started by a character
+ * coming after the LF.
+ *
+ * The text is stored internally as an array of characters. Therefore the
+ * documentation will refer to indexes of the characters in the text.
+ *
+ * @param chars contains the text that the Bidi algorithm will be performed
+ * on. This text can be retrieved with getText()
or
+ * getTextAsString
.
+ *
+ * @param paraLevel specifies the default level for the text;
+ * it is typically 0 (LTR) or 1 (RTL).
+ * If the method shall determine the paragraph level from the text,
+ * then paraLevel
can be set to
+ * either LEVEL_DEFAULT_LTR
+ * or LEVEL_DEFAULT_RTL
; if the text contains multiple
+ * paragraphs, the paragraph level shall be determined separately for
+ * each paragraph; if a paragraph does not include any strongly typed
+ * character, then the desired default is used (0 for LTR or 1 for RTL).
+ * Any other value between 0 and MAX_EXPLICIT_LEVEL
+ * is also valid, with odd levels indicating RTL.
+ *
+ * @param embeddingLevels (in) may be used to preset the embedding and
+ * override levels, ignoring characters like LRE and PDF in the text.
+ * A level overrides the directional property of its corresponding
+ * (same index) character if the level has the
+ * LEVEL_OVERRIDE
bit set.
+ * Except for that bit, it must be
+ * paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL
,
+ * with one exception: a level of zero may be specified for a
+ * paragraph separator even if paraLevel>0
when multiple
+ * paragraphs are submitted in the same call to setPara()
.
+ * Caution: A reference to this array, not a copy
+ * of the levels, will be stored in the Bidi
object;
+ * the embeddingLevels
+ * should not be modified to avoid unexpected results on subsequent
+ * Bidi operations. However, the setPara()
and
+ * setLine()
methods may modify some or all of the
+ * levels.
+ * Note: the embeddingLevels
array must
+ * have one entry for each character in text
.
+ *
+ * @throws IllegalArgumentException if the values in embeddingLevels are
+ * not within the allowed range
+ *
+ * @see #LEVEL_DEFAULT_LTR
+ * @see #LEVEL_DEFAULT_RTL
+ * @see #LEVEL_OVERRIDE
+ * @see #MAX_EXPLICIT_LEVEL
+ * @stable ICU 3.8
+ */
+ public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
+ * Unicode Standard Annex #9 ,
+ * version 13,
+ * also described in The Unicode Standard, Version 4.0 .
+ *
+ * This method takes a paragraph of text and computes the
+ * left-right-directionality of each character. The text should not
+ * contain any Unicode block separators.
+ *
+ * The RUN_DIRECTION attribute in the text, if present, determines the base
+ * direction (left-to-right or right-to-left). If not present, the base
+ * direction is computed using the Unicode Bidirectional Algorithm,
+ * defaulting to left-to-right if there are no strong directional characters
+ * in the text. This attribute, if present, must be applied to all the text
+ * in the paragraph.
+ *
+ * The BIDI_EMBEDDING attribute in the text, if present, represents
+ * embedding level information. Negative values from -1 to -62 indicate
+ * overrides at the absolute value of the level. Positive values from 1 to
+ * 62 indicate embeddings. Where values are zero or not defined, the base
+ * embedding level as determined by the base direction is assumed.
+ *
+ * The NUMERIC_SHAPING attribute in the text, if present, converts European
+ * digits to other decimal digits before running the bidi algorithm. This
+ * attribute, if present, must be applied to all the text in the paragraph.
+ *
+ * If the entire text is all of the same directionality, then
+ * the method may not perform all the steps described by the algorithm,
+ * i.e., some levels may not be the same as if all steps were performed.
+ * This is not relevant for unidirectional text.
+ * For example, in pure LTR text with numbers the numbers would get
+ * a resolved level of 2 higher than the surrounding text according to
+ * the algorithm. This implementation may set all resolved levels to
+ * the same value in such a case.
+ *
+ * @param paragraph a paragraph of text with optional character and
+ * paragraph attribute information
+ * @stable ICU 3.8
+ */
+ public void setPara(AttributedCharacterIterator paragraph)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Specify whether block separators must be allocated level zero,
+ * so that successive paragraphs will progress from left to right.
+ * This method must be called before setPara()
.
+ * Paragraph separators (B) may appear in the text. Setting them to level zero
+ * means that all paragraph separators (including one possibly appearing
+ * in the last text position) are kept in the reordered text after the text
+ * that they follow in the source text.
+ * When this feature is not enabled, a paragraph separator at the last
+ * position of the text before reordering will go to the first position
+ * of the reordered text when the paragraph level is odd.
+ *
+ * @param ordarParaLTR specifies whether paragraph separators (B) must
+ * receive level 0, so that successive paragraphs progress from left to right.
+ *
+ * @see #setPara
+ * @stable ICU 3.8
+ */
+ public void orderParagraphsLTR(boolean ordarParaLTR) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Is this Bidi
object set to allocate level 0 to block
+ * separators so that successive paragraphs progress from left to right?
+ *
+ * @return true
if the Bidi
object is set to
+ * allocate level 0 to block separators.
+ *
+ * @stable ICU 3.8
+ */
+ public boolean isOrderParagraphsLTR() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get the directionality of the text.
+ *
+ * @return a value of LTR
, RTL
or MIXED
+ * that indicates if the entire text
+ * represented by this object is unidirectional,
+ * and which direction, or if it is mixed-directional.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @see #LTR
+ * @see #RTL
+ * @see #MIXED
+ * @stable ICU 3.8
+ */
+ public byte getDirection()
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get the text.
+ *
+ * @return A String
containing the text that the
+ * Bidi
object was created for.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @see #setPara
+ * @see #setLine
+ * @stable ICU 3.8
+ */
+ public String getTextAsString()
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get the text.
+ *
+ * @return A char
array containing the text that the
+ * Bidi
object was created for.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @see #setPara
+ * @see #setLine
+ * @stable ICU 3.8
+ */
+ public char[] getText()
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get the length of the text.
+ *
+ * @return The length of the text that the Bidi
object was
+ * created for.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @stable ICU 3.8
+ */
+ public int getLength()
+ {
+ return bidi.getLength();
+ }
+
+ /**
+ * Get the length of the source text processed by the last call to
+ * setPara()
. This length may be different from the length of
+ * the source text if option OPTION_STREAMING
has been
+ * set.
+ *
+ * Note that whenever the length of the text affects the execution or the
+ * result of a method, it is the processed length which must be considered,
+ * except for setPara
(which receives unprocessed source text)
+ * and getLength
(which returns the original length of the
+ * source text).
+ * In particular, the processed length is the one to consider in the
+ * following cases:
+ *
+ * maximum value of the limit
argument of
+ * setLine
+ * maximum value of the charIndex
argument of
+ * getParagraph
+ * maximum value of the charIndex
argument of
+ * getLevelAt
+ * number of elements in the array returned by getLevels
+ *
+ * maximum value of the logicalStart
argument of
+ * getLogicalRun
+ * maximum value of the logicalIndex
argument of
+ * getVisualIndex
+ * number of elements returned by getLogicalMap
+ * length of text processed by writeReordered
+ *
+ *
+ * @return The length of the part of the source text processed by
+ * the last call to setPara
.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @see #setPara
+ * @see #OPTION_STREAMING
+ * @stable ICU 3.8
+ */
+ public int getProcessedLength() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get the length of the reordered text resulting from the last call to
+ * setPara()
. This length may be different from the length
+ * of the source text if option OPTION_INSERT_MARKS
+ * or option OPTION_REMOVE_CONTROLS
has been set.
+ *
+ * This resulting length is the one to consider in the following cases:
+ *
+ * maximum value of the visualIndex
argument of
+ * getLogicalIndex
+ * number of elements returned by getVisualMap
+ *
+ * Note that this length stays identical to the source text length if
+ * Bidi marks are inserted or removed using option bits of
+ * writeReordered
, or if option
+ * REORDER_INVERSE_NUMBERS_AS_L
has been set.
+ *
+ * @return The length of the reordered text resulting from
+ * the last call to setPara
.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @see #setPara
+ * @see #OPTION_INSERT_MARKS
+ * @see #OPTION_REMOVE_CONTROLS
+ * @see #REORDER_INVERSE_NUMBERS_AS_L
+ * @stable ICU 3.8
+ */
+ public int getResultLength() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /* paragraphs API methods ------------------------------------------------- */
+
+ /**
+ * Get the paragraph level of the text.
+ *
+ * @return The paragraph level. If there are multiple paragraphs, their
+ * level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or
+ * LEVEL_DEFAULT_RTL. In that case, the level of the first paragraph
+ * is returned.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @see #LEVEL_DEFAULT_LTR
+ * @see #LEVEL_DEFAULT_RTL
+ * @see #getParagraph
+ * @see #getParagraphByIndex
+ * @stable ICU 3.8
+ */
+ public byte getParaLevel()
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get the number of paragraphs.
+ *
+ * @return The number of paragraphs.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @stable ICU 3.8
+ */
+ public int countParagraphs()
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get a paragraph, given the index of this paragraph.
+ *
+ * This method returns information about a paragraph.
+ *
+ * @param paraIndex is the number of the paragraph, in the
+ * range [0..countParagraphs()-1]
.
+ *
+ * @return a BidiRun object with the details of the paragraph:
+ * start
will receive the index of the first character
+ * of the paragraph in the text.
+ * limit
will receive the limit of the paragraph.
+ * embeddingLevel
will receive the level of the paragraph.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @throws IllegalArgumentException if paraIndex is not in the range
+ * [0..countParagraphs()-1]
+ *
+ * @see com.ibm.icu.text.BidiRun
+ * @stable ICU 3.8
+ */
+ public BidiRun getParagraphByIndex(int paraIndex)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get a paragraph, given a position within the text.
+ * This method returns information about a paragraph.
+ * Note: if the paragraph index is known, it is more efficient to
+ * retrieve the paragraph information using getParagraphByIndex().
+ *
+ * @param charIndex is the index of a character within the text, in the
+ * range [0..getProcessedLength()-1]
.
+ *
+ * @return a BidiRun object with the details of the paragraph:
+ * start
will receive the index of the first character
+ * of the paragraph in the text.
+ * limit
will receive the limit of the paragraph.
+ * embeddingLevel
will receive the level of the paragraph.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @throws IllegalArgumentException if charIndex is not within the legal range
+ *
+ * @see com.ibm.icu.text.BidiRun
+ * @see #getParagraphByIndex
+ * @see #getProcessedLength
+ * @stable ICU 3.8
+ */
+ public BidiRun getParagraph(int charIndex)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get the index of a paragraph, given a position within the text.
+ *
+ * @param charIndex is the index of a character within the text, in the
+ * range [0..getProcessedLength()-1]
.
+ *
+ * @return The index of the paragraph containing the specified position,
+ * starting from 0.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @throws IllegalArgumentException if charIndex is not within the legal range
+ *
+ * @see com.ibm.icu.text.BidiRun
+ * @see #getProcessedLength
+ * @stable ICU 3.8
+ */
+ public int getParagraphIndex(int charIndex)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Set a custom Bidi classifier used by the UBA implementation for Bidi
+ * class determination.
+ *
+ * @param classifier A new custom classifier. This can be null.
+ *
+ * @see #getCustomClassifier
+ * @stable ICU 3.8
+ */
+ public void setCustomClassifier(BidiClassifier classifier) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Gets the current custom class classifier used for Bidi class
+ * determination.
+ *
+ * @return An instance of class BidiClassifier
+ *
+ * @see #setCustomClassifier
+ * @stable ICU 3.8
+ */
+ public BidiClassifier getCustomClassifier() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Retrieves the Bidi class for a given code point.
+ *
If a BidiClassifier
is defined and returns a value
+ * other than CLASS_DEFAULT
, that value is used; otherwise
+ * the default class determination mechanism is invoked.
+ *
+ * @param c The code point to get a Bidi class for.
+ *
+ * @return The Bidi class for the character c
that is in effect
+ * for this Bidi
instance.
+ *
+ * @see BidiClassifier
+ * @stable ICU 3.8
+ */
+ public int getCustomizedClass(int c) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * setLine()
returns a Bidi
object to
+ * contain the reordering information, especially the resolved levels,
+ * for all the characters in a line of text. This line of text is
+ * specified by referring to a Bidi
object representing
+ * this information for a piece of text containing one or more paragraphs,
+ * and by specifying a range of indexes in this text.
+ * In the new line object, the indexes will range from 0 to limit-start-1
.
+ *
+ * This is used after calling setPara()
+ * for a piece of text, and after line-breaking on that text.
+ * It is not necessary if each paragraph is treated as a single line.
+ *
+ * After line-breaking, rules (L1) and (L2) for the treatment of
+ * trailing WS and for reordering are performed on
+ * a Bidi
object that represents a line.
+ *
+ * Important: the line Bidi
object may
+ * reference data within the global text Bidi
object.
+ * You should not alter the content of the global text object until
+ * you are finished using the line object.
+ *
+ * @param start is the line's first index into the text.
+ *
+ * @param limit is just behind the line's last index into the text
+ * (its last index +1).
+ *
+ * @return a Bidi
object that will now represent a line of the text.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
+ * @throws IllegalArgumentException if start and limit are not in the range
+ * 0<=start<limit<=getProcessedLength()
,
+ * or if the specified line crosses a paragraph boundary
+ *
+ * @see #setPara
+ * @see #getProcessedLength
+ * @stable ICU 3.8
+ */
+ public Bidi setLine(int start, int limit)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get the level for one character.
+ *
+ * @param charIndex the index of a character.
+ *
+ * @return The level for the character at charIndex
.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @throws IllegalArgumentException if charIndex is not in the range
+ * 0<=charIndex<getProcessedLength()
+ *
+ * @see #getProcessedLength
+ * @stable ICU 3.8
+ */
+ public byte getLevelAt(int charIndex)
+ {
+ return (byte)bidi.getLevelAt(charIndex);
+ }
+
+ /**
+ * Get an array of levels for each character.
+ *
+ * Note that this method may allocate memory under some
+ * circumstances, unlike getLevelAt()
.
+ *
+ * @return The levels array for the text,
+ * or null
if an error occurs.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @stable ICU 3.8
+ */
+ public byte[] getLevels()
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get a logical run.
+ * This method returns information about a run and is used
+ * to retrieve runs in logical order.
+ * This is especially useful for line-breaking on a paragraph.
+ *
+ * @param logicalPosition is a logical position within the source text.
+ *
+ * @return a BidiRun object filled with start
containing
+ * the first character of the run, limit
containing
+ * the limit of the run, and embeddingLevel
containing
+ * the level of the run.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @throws IllegalArgumentException if logicalPosition is not in the range
+ * 0<=logicalPosition<getProcessedLength()
+ *
+ * @see com.ibm.icu.text.BidiRun
+ * @see com.ibm.icu.text.BidiRun#getStart()
+ * @see com.ibm.icu.text.BidiRun#getLimit()
+ * @see com.ibm.icu.text.BidiRun#getEmbeddingLevel()
+ *
+ * @stable ICU 3.8
+ */
+ public BidiRun getLogicalRun(int logicalPosition)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get the number of runs.
+ * This method may invoke the actual reordering on the
+ * Bidi
object, after setPara()
+ * may have resolved only the levels of the text. Therefore,
+ * countRuns()
may have to allocate memory,
+ * and may throw an exception if it fails to do so.
+ *
+ * @return The number of runs.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @stable ICU 3.8
+ */
+ public int countRuns()
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ *
+ * Get a BidiRun
object according to its index. BidiRun methods
+ * may be used to retrieve the run's logical start, length and level,
+ * which can be even for an LTR run or odd for an RTL run.
+ * In an RTL run, the character at the logical start is
+ * visually on the right of the displayed run.
+ * The length is the number of characters in the run.
+ * countRuns()
is normally called
+ * before the runs are retrieved.
+ *
+ *
+ * Example:
+ *
+ * Bidi bidi = new Bidi();
+ * String text = "abc 123 DEFG xyz";
+ * bidi.setPara(text, Bidi.RTL, null);
+ * int i, count=bidi.countRuns(), logicalStart, visualIndex=0, length;
+ * BidiRun run;
+ * for (i = 0; i < count; ++i) {
+ * run = bidi.getVisualRun(i);
+ * logicalStart = run.getStart();
+ * length = run.getLength();
+ * if (Bidi.LTR == run.getEmbeddingLevel()) {
+ * do { // LTR
+ * show_char(text.charAt(logicalStart++), visualIndex++);
+ * } while (--length > 0);
+ * } else {
+ * logicalStart += length; // logicalLimit
+ * do { // RTL
+ * show_char(text.charAt(--logicalStart), visualIndex++);
+ * } while (--length > 0);
+ * }
+ * }
+ *
+ *
+ * Note that in right-to-left runs, code like this places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ *
+ * Use of {@link #writeReordered}
, optionally with the
+ * {@link #KEEP_BASE_COMBINING}
option, can be considered in
+ * order to avoid these issues.
+ *
+ * @param runIndex is the number of the run in visual order, in the
+ * range [0..countRuns()-1]
.
+ *
+ * @return a BidiRun object containing the details of the run. The
+ * directionality of the run is
+ * LTR==0
or RTL==1
,
+ * never MIXED
.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @throws IllegalArgumentException if runIndex
is not in
+ * the range 0<=runIndex<countRuns()
+ *
+ * @see #countRuns()
+ * @see com.ibm.icu.text.BidiRun
+ * @see com.ibm.icu.text.BidiRun#getStart()
+ * @see com.ibm.icu.text.BidiRun#getLength()
+ * @see com.ibm.icu.text.BidiRun#getEmbeddingLevel()
+ * @stable ICU 3.8
+ */
+ public BidiRun getVisualRun(int runIndex)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get the visual position from a logical text position.
+ * If such a mapping is used many times on the same
+ * Bidi
object, then calling
+ * getLogicalMap()
is more efficient.
+ *
+ * The value returned may be MAP_NOWHERE
if there is no
+ * visual position because the corresponding text character is a Bidi
+ * control removed from output by the option
+ * OPTION_REMOVE_CONTROLS
.
+ *
+ * When the visual output is altered by using options of
+ * writeReordered()
such as INSERT_LRM_FOR_NUMERIC
,
+ * KEEP_BASE_COMBINING
, OUTPUT_REVERSE
,
+ * REMOVE_BIDI_CONTROLS
, the visual position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
+ *
+ * Note that in right-to-left runs, this mapping places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * Use of {@link #writeReordered}
, optionally with the
+ * {@link #KEEP_BASE_COMBINING}
option can be considered instead
+ * of using the mapping, in order to avoid these issues.
+ *
+ * @param logicalIndex is the index of a character in the text.
+ *
+ * @return The visual position of this character.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @throws IllegalArgumentException if logicalIndex
is not in
+ * the range 0<=logicalIndex<getProcessedLength()
+ *
+ * @see #getLogicalMap
+ * @see #getLogicalIndex
+ * @see #getProcessedLength
+ * @see #MAP_NOWHERE
+ * @see #OPTION_REMOVE_CONTROLS
+ * @see #writeReordered
+ * @stable ICU 3.8
+ */
+ public int getVisualIndex(int logicalIndex)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+
+ /**
+ * Get the logical text position from a visual position.
+ * If such a mapping is used many times on the same
+ * Bidi
object, then calling
+ * getVisualMap()
is more efficient.
+ *
+ * The value returned may be MAP_NOWHERE
if there is no
+ * logical position because the corresponding text character is a Bidi
+ * mark inserted in the output by option
+ * OPTION_INSERT_MARKS
.
+ *
+ * This is the inverse method to getVisualIndex()
.
+ *
+ * When the visual output is altered by using options of
+ * writeReordered()
such as INSERT_LRM_FOR_NUMERIC
,
+ * KEEP_BASE_COMBINING
, OUTPUT_REVERSE
,
+ * REMOVE_BIDI_CONTROLS
, the logical position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
+ *
+ * @param visualIndex is the visual position of a character.
+ *
+ * @return The index of this character in the text.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @throws IllegalArgumentException if visualIndex
is not in
+ * the range 0<=visualIndex<getResultLength()
+ *
+ * @see #getVisualMap
+ * @see #getVisualIndex
+ * @see #getResultLength
+ * @see #MAP_NOWHERE
+ * @see #OPTION_INSERT_MARKS
+ * @see #writeReordered
+ * @stable ICU 3.8
+ */
+ public int getLogicalIndex(int visualIndex)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get a logical-to-visual index map (array) for the characters in the
+ * Bidi
(paragraph or line) object.
+ *
+ * Some values in the map may be MAP_NOWHERE
if the
+ * corresponding text characters are Bidi controls removed from the visual
+ * output by the option OPTION_REMOVE_CONTROLS
.
+ *
+ * When the visual output is altered by using options of
+ * writeReordered()
such as INSERT_LRM_FOR_NUMERIC
,
+ * KEEP_BASE_COMBINING
, OUTPUT_REVERSE
,
+ * REMOVE_BIDI_CONTROLS
, the visual positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
+ *
+ * Note that in right-to-left runs, this mapping places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * Use of {@link #writeReordered}
, optionally with the
+ * {@link #KEEP_BASE_COMBINING}
option can be considered instead
+ * of using the mapping, in order to avoid these issues.
+ *
+ * @return an array of getProcessedLength()
+ * indexes which will reflect the reordering of the characters.
+ * The index map will result in
+ * indexMap[logicalIndex]==visualIndex
, where
+ * indexMap
represents the returned array.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @see #getVisualMap
+ * @see #getVisualIndex
+ * @see #getProcessedLength
+ * @see #MAP_NOWHERE
+ * @see #OPTION_REMOVE_CONTROLS
+ * @see #writeReordered
+ * @stable ICU 3.8
+ */
+ public int[] getLogicalMap()
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Get a visual-to-logical index map (array) for the characters in the
+ * Bidi
(paragraph or line) object.
+ *
+ * Some values in the map may be MAP_NOWHERE
if the
+ * corresponding text characters are Bidi marks inserted in the visual
+ * output by the option OPTION_INSERT_MARKS
.
+ *
+ * When the visual output is altered by using options of
+ * writeReordered()
such as INSERT_LRM_FOR_NUMERIC
,
+ * KEEP_BASE_COMBINING
, OUTPUT_REVERSE
,
+ * REMOVE_BIDI_CONTROLS
, the logical positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
+ *
+ * @return an array of getResultLength()
+ * indexes which will reflect the reordering of the characters.
+ * The index map will result in
+ * indexMap[visualIndex]==logicalIndex
, where
+ * indexMap
represents the returned array.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @see #getLogicalMap
+ * @see #getLogicalIndex
+ * @see #getResultLength
+ * @see #MAP_NOWHERE
+ * @see #OPTION_INSERT_MARKS
+ * @see #writeReordered
+ * @stable ICU 3.8
+ */
+ public int[] getVisualMap()
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * This is a convenience method that does not use a Bidi
object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using getLogicalMap()
on a
+ * Bidi
object.
+ *
+ * @param levels is an array of levels that have been determined by
+ * the application.
+ *
+ * @return an array of levels.length
+ * indexes which will reflect the reordering of the characters.
+ * The index map will result in
+ * indexMap[logicalIndex]==visualIndex
, where
+ * indexMap
represents the returned array.
+ *
+ * @stable ICU 3.8
+ */
+ public static int[] reorderLogical(byte[] levels)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * This is a convenience method that does not use a Bidi
object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using getVisualMap()
on a
+ * Bidi
object.
+ *
+ * @param levels is an array of levels that have been determined by
+ * the application.
+ *
+ * @return an array of levels.length
+ * indexes which will reflect the reordering of the characters.
+ * The index map will result in
+ * indexMap[visualIndex]==logicalIndex
, where
+ * indexMap
represents the returned array.
+ *
+ * @stable ICU 3.8
+ */
+ public static int[] reorderVisual(byte[] levels)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Invert an index map.
+ * The index mapping of the argument map is inverted and returned as
+ * an array of indexes that we will call the inverse map.
+ *
+ * @param srcMap is an array whose elements define the original mapping
+ * from a source array to a destination array.
+ * Some elements of the source array may have no mapping in the
+ * destination array. In that case, their value will be
+ * the special value MAP_NOWHERE
.
+ * All elements must be >=0 or equal to MAP_NOWHERE
.
+ * Some elements in the source map may have a value greater than the
+ * srcMap.length if the destination array has more elements than the
+ * source array.
+ * There must be no duplicate indexes (two or more elements with the
+ * same value except MAP_NOWHERE
).
+ *
+ * @return an array representing the inverse map.
+ * This array has a number of elements equal to 1 + the highest
+ * value in srcMap
.
+ * For elements of the result array which have no matching elements
+ * in the source array, the corresponding elements in the inverse
+ * map will receive a value equal to MAP_NOWHERE
.
+ * If element with index i in srcMap
has a value k different
+ * from MAP_NOWHERE
, this means that element i of
+ * the source array maps to element k in the destination array.
+ * The inverse map will have value i in its k-th element.
+ * For all elements of the destination array which do not map to
+ * an element in the source array, the corresponding element in the
+ * inverse map will have a value equal to MAP_NOWHERE
.
+ *
+ * @see #MAP_NOWHERE
+ * @stable ICU 3.8
+ */
+ public static int[] invertMap(int[] srcMap)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /*
+ * Fields and methods for compatibility with java.text.bidi (Sun implementation)
+ */
+
+ /**
+ * Constant indicating base direction is left-to-right.
+ * @stable ICU 3.8
+ */
+ public static final int DIRECTION_LEFT_TO_RIGHT = LTR;
+
+ /**
+ * Constant indicating base direction is right-to-left.
+ * @stable ICU 3.8
+ */
+ public static final int DIRECTION_RIGHT_TO_LEFT = RTL;
+
+ /**
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present, the base
+ * direction is left-to-right.
+ * @stable ICU 3.8
+ */
+ public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = LEVEL_DEFAULT_LTR;
+
+ /**
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present, the base
+ * direction is right-to-left.
+ * @stable ICU 3.8
+ */
+ public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = LEVEL_DEFAULT_RTL;
+
+ /**
+ * Create Bidi from the given paragraph of text and base direction.
+ *
+ * @param paragraph a paragraph of text
+ * @param flags a collection of flags that control the algorithm. The
+ * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
+ * DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
+ * DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
+ * @see #DIRECTION_LEFT_TO_RIGHT
+ * @see #DIRECTION_RIGHT_TO_LEFT
+ * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
+ * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
+ * @stable ICU 3.8
+ */
+ public Bidi(String paragraph, int flags)
+ {
+ // Note: ICU and Oracle JDK are using the
+ // same DIRECTION_* flags definitions.
+ this(new java.text.Bidi(paragraph, flags));
+ }
+
+ /**
+ * Create Bidi from the given paragraph of text.
+ *
+ * The RUN_DIRECTION attribute in the text, if present, determines the base
+ * direction (left-to-right or right-to-left). If not present, the base
+ * direction is computed using the Unicode Bidirectional Algorithm,
+ * defaulting to left-to-right if there are no strong directional characters
+ * in the text. This attribute, if present, must be applied to all the text
+ * in the paragraph.
+ *
+ * The BIDI_EMBEDDING attribute in the text, if present, represents
+ * embedding level information. Negative values from -1 to -62 indicate
+ * overrides at the absolute value of the level. Positive values from 1 to
+ * 62 indicate embeddings. Where values are zero or not defined, the base
+ * embedding level as determined by the base direction is assumed.
+ *
+ * The NUMERIC_SHAPING attribute in the text, if present, converts European
+ * digits to other decimal digits before running the bidi algorithm. This
+ * attribute, if present, must be applied to all the text in the paragraph.
+ *
+ * Note: this constructor calls setPara() internally.
+ *
+ * @param paragraph a paragraph of text with optional character and
+ * paragraph attribute information
+ * @stable ICU 3.8
+ */
+ public Bidi(AttributedCharacterIterator paragraph)
+ {
+ // ICU does not define its own attributes and just
+ // use java.awt.font.TextAttribute. Thus, no mappings
+ // are necessary.
+ this(new java.text.Bidi(paragraph));
+ }
+
+ /**
+ * Create Bidi from the given text, embedding, and direction information.
+ * The embeddings array may be null. If present, the values represent
+ * embedding level information. Negative values from -1 to -61 indicate
+ * overrides at the absolute value of the level. Positive values from 1 to
+ * 61 indicate embeddings. Where values are zero, the base embedding level
+ * as determined by the base direction is assumed.
+ *
+ * Note: this constructor calls setPara() internally.
+ *
+ * @param text an array containing the paragraph of text to process.
+ * @param textStart the index into the text array of the start of the
+ * paragraph.
+ * @param embeddings an array containing embedding values for each character
+ * in the paragraph. This can be null, in which case it is assumed
+ * that there is no external embedding information.
+ * @param embStart the index into the embedding array of the start of the
+ * paragraph.
+ * @param paragraphLength the length of the paragraph in the text and
+ * embeddings arrays.
+ * @param flags a collection of flags that control the algorithm. The
+ * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
+ * DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
+ * DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
+ *
+ * @throws IllegalArgumentException if the values in embeddings are
+ * not within the allowed range
+ *
+ * @see #DIRECTION_LEFT_TO_RIGHT
+ * @see #DIRECTION_RIGHT_TO_LEFT
+ * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
+ * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
+ * @stable ICU 3.8
+ */
+ public Bidi(char[] text,
+ int textStart,
+ byte[] embeddings,
+ int embStart,
+ int paragraphLength,
+ int flags)
+ {
+ // Note: ICU and Oracle JDK are using the
+ // same DIRECTION_* flags definitions.
+ this(new java.text.Bidi(text, textStart, embeddings, embStart, paragraphLength, flags));
+ }
+
+ /**
+ * Create a Bidi object representing the bidi information on a line of text
+ * within the paragraph represented by the current Bidi. This call is not
+ * required if the entire paragraph fits on one line.
+ *
+ * @param lineStart the offset from the start of the paragraph to the start
+ * of the line.
+ * @param lineLimit the offset from the start of the paragraph to the limit
+ * of the line.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
+ * @throws IllegalArgumentException if lineStart and lineLimit are not in the range
+ * 0<=lineStart<lineLimit<=getProcessedLength()
,
+ * or if the specified line crosses a paragraph boundary
+ * @stable ICU 3.8
+ */
+ public Bidi createLineBidi(int lineStart, int lineLimit)
+ {
+ return new Bidi(bidi.createLineBidi(lineStart, lineLimit));
+ }
+
+ /**
+ * Return true if the line is not left-to-right or right-to-left. This means
+ * it either has mixed runs of left-to-right and right-to-left text, or the
+ * base direction differs from the direction of the only run of text.
+ *
+ * @return true if the line is not left-to-right or right-to-left.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
+ * @stable ICU 3.8
+ */
+ public boolean isMixed()
+ {
+ return bidi.isMixed();
+ }
+
+ /**
+ * Return true if the line is all left-to-right text and the base direction
+ * is left-to-right.
+ *
+ * @return true if the line is all left-to-right text and the base direction
+ * is left-to-right.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
+ * @stable ICU 3.8
+ */
+ public boolean isLeftToRight()
+ {
+ return bidi.isLeftToRight();
+ }
+
+ /**
+ * Return true if the line is all right-to-left text, and the base direction
+ * is right-to-left
+ *
+ * @return true if the line is all right-to-left text, and the base
+ * direction is right-to-left
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
+ * @stable ICU 3.8
+ */
+ public boolean isRightToLeft()
+ {
+ return bidi.isRightToLeft();
+ }
+
+ /**
+ * Return true if the base direction is left-to-right
+ *
+ * @return true if the base direction is left-to-right
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @stable ICU 3.8
+ */
+ public boolean baseIsLeftToRight()
+ {
+ return bidi.baseIsLeftToRight();
+ }
+
+ /**
+ * Return the base level (0 if left-to-right, 1 if right-to-left).
+ *
+ * @return the base level
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @stable ICU 3.8
+ */
+ public int getBaseLevel()
+ {
+ return bidi.getBaseLevel();
+ }
+
+ /**
+ * Return the number of level runs.
+ *
+ * @return the number of level runs
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @stable ICU 3.8
+ */
+ public int getRunCount()
+ {
+ return bidi.getRunCount();
+ }
+
+ /**
+ * Return the level of the nth logical run in this line.
+ *
+ * @param run the index of the run, between 0 and countRuns()-1
+ *
+ * @return the level of the run
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @throws IllegalArgumentException if run
is not in
+ * the range 0<=run<countRuns()
+ * @stable ICU 3.8
+ */
+ public int getRunLevel(int run)
+ {
+ return bidi.getRunLevel(run);
+ }
+
+ /**
+ * Return the index of the character at the start of the nth logical run in
+ * this line, as an offset from the start of the line.
+ *
+ * @param run the index of the run, between 0 and countRuns()
+ *
+ * @return the start of the run
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @throws IllegalArgumentException if run
is not in
+ * the range 0<=run<countRuns()
+ * @stable ICU 3.8
+ */
+ public int getRunStart(int run)
+ {
+ return bidi.getRunStart(run);
+ }
+
+ /**
+ * Return the index of the character past the end of the nth logical run in
+ * this line, as an offset from the start of the line. For example, this
+ * will return the length of the line for the last run on the line.
+ *
+ * @param run the index of the run, between 0 and countRuns()
+ *
+ * @return the limit of the run
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ * @throws IllegalArgumentException if run
is not in
+ * the range 0<=run<countRuns()
+ * @stable ICU 3.8
+ */
+ public int getRunLimit(int run)
+ {
+ return bidi.getRunLimit(run);
+ }
+
+ /**
+ * Return true if the specified text requires bidi analysis. If this returns
+ * false, the text will display left-to-right. Clients can then avoid
+ * constructing a Bidi object. Text in the Arabic Presentation Forms area of
+ * Unicode is presumed to already be shaped and ordered for display, and so
+ * will not cause this method to return true.
+ *
+ * @param text the text containing the characters to test
+ * @param start the start of the range of characters to test
+ * @param limit the limit of the range of characters to test
+ *
+ * @return true if the range of characters requires bidi analysis
+ *
+ * @stable ICU 3.8
+ */
+ public static boolean requiresBidi(char[] text,
+ int start,
+ int limit)
+ {
+ return java.text.Bidi.requiresBidi(text, start, limit);
+ }
+
+ /**
+ * Reorder the objects in the array into visual order based on their levels.
+ * This is a utility method to use when you have a collection of objects
+ * representing runs of text in logical order, each run containing text at a
+ * single level. The elements at index
from
+ * objectStart
up to objectStart + count
in the
+ * objects array will be reordered into visual order assuming
+ * each run of text has the level indicated by the corresponding element in
+ * the levels array (at index - objectStart + levelStart
).
+ *
+ * @param levels an array representing the bidi level of each object
+ * @param levelStart the start position in the levels array
+ * @param objects the array of objects to be reordered into visual order
+ * @param objectStart the start position in the objects array
+ * @param count the number of objects to reorder
+ * @stable ICU 3.8
+ */
+ public static void reorderVisually(byte[] levels,
+ int levelStart,
+ Object[] objects,
+ int objectStart,
+ int count)
+ {
+ java.text.Bidi.reorderVisually(levels, levelStart, objects, objectStart, count);
+ }
+
+ /**
+ * Take a Bidi
object containing the reordering
+ * information for a piece of text (one or more paragraphs) set by
+ * setPara()
or for a line of text set by setLine()
+ * and return a string containing the reordered text.
+ *
+ *
The text may have been aliased (only a reference was stored
+ * without copying the contents), thus it must not have been modified
+ * since the setPara()
call.
+ *
+ * This method preserves the integrity of characters with multiple
+ * code units and (optionally) combining characters.
+ * Characters in RTL runs can be replaced by mirror-image characters
+ * in the returned string. Note that "real" mirroring has to be done in a
+ * rendering engine by glyph selection and that for many "mirrored"
+ * characters there are no Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters; see the descriptions of the return value and the
+ * options
parameter, and of the option bit flags.
+ *
+ * @param options A bit set of options for the reordering that control
+ * how the reordered text is written.
+ * The options include mirroring the characters on a code
+ * point basis and inserting LRM characters, which is used
+ * especially for transforming visually stored text
+ * to logically stored text (although this is still an
+ * imperfect implementation of an "inverse Bidi" algorithm
+ * because it uses the "forward Bidi" algorithm at its core).
+ * The available options are:
+ * DO_MIRRORING
,
+ * INSERT_LRM_FOR_NUMERIC
,
+ * KEEP_BASE_COMBINING
,
+ * OUTPUT_REVERSE
,
+ * REMOVE_BIDI_CONTROLS
,
+ * STREAMING
+ *
+ * @return The reordered text.
+ * If the INSERT_LRM_FOR_NUMERIC
option is set, then
+ * the length of the returned string could be as large as
+ * getLength()+2*countRuns()
.
+ * If the REMOVE_BIDI_CONTROLS
option is set, then the
+ * length of the returned string may be less than
+ * getLength()
.
+ * If none of these options is set, then the length of the returned
+ * string will be exactly getProcessedLength()
.
+ *
+ * @throws IllegalStateException if this call is not preceded by a successful
+ * call to setPara
or setLine
+ *
+ * @see #DO_MIRRORING
+ * @see #INSERT_LRM_FOR_NUMERIC
+ * @see #KEEP_BASE_COMBINING
+ * @see #OUTPUT_REVERSE
+ * @see #REMOVE_BIDI_CONTROLS
+ * @see #OPTION_STREAMING
+ * @see #getProcessedLength
+ * @stable ICU 3.8
+ */
+ public String writeReordered(int options)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Reverse a Right-To-Left run of Unicode text.
+ *
+ * This method preserves the integrity of characters with multiple
+ * code units and (optionally) combining characters.
+ * Characters can be replaced by mirror-image characters
+ * in the destination buffer. Note that "real" mirroring has
+ * to be done in a rendering engine by glyph selection
+ * and that for many "mirrored" characters there are no
+ * Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters.
+ *
+ * This method is the implementation for reversing RTL runs as part
+ * of writeReordered()
. For detailed descriptions
+ * of the parameters, see there.
+ * Since no Bidi controls are inserted here, the output string length
+ * will never exceed src.length()
.
+ *
+ * @see #writeReordered
+ *
+ * @param src The RTL run text.
+ *
+ * @param options A bit set of options for the reordering that control
+ * how the reordered text is written.
+ * See the options
parameter in writeReordered()
.
+ *
+ * @return The reordered text.
+ * If the REMOVE_BIDI_CONTROLS
option
+ * is set, then the length of the returned string may be less than
+ * src.length()
. If this option is not set,
+ * then the length of the returned string will be exactly
+ * src.length()
.
+ *
+ * @throws IllegalArgumentException if src
is null.
+ * @stable ICU 3.8
+ */
+ public static String writeReverse(String src, int options)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/BidiClassifier.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/BidiClassifier.java
new file mode 100644
index 00000000000..39ffdf1411a
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/BidiClassifier.java
@@ -0,0 +1,14 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+/*
+ * Empty stub
+ */
+public class BidiClassifier {
+ private BidiClassifier() {}
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/BidiRun.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/BidiRun.java
new file mode 100644
index 00000000000..b0a74483aea
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/BidiRun.java
@@ -0,0 +1,14 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+/*
+ * Empty stub
+ */
+public class BidiRun {
+ private BidiRun() {}
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/BreakIterator.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/BreakIterator.java
new file mode 100644
index 00000000000..bf915953388
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/BreakIterator.java
@@ -0,0 +1,833 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.text;
+
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.Locale;
+
+import com.ibm.icu.util.ULocale;
+
+/**
+ * {@icuenhanced java.text.BreakIterator}.{@icu _usage_}
+ *
+ * A class that locates boundaries in text. This class defines a protocol for
+ * objects that break up a piece of natural-language text according to a set
+ * of criteria. Instances or subclasses of BreakIterator can be provided, for
+ * example, to break a piece of text into words, sentences, or logical characters
+ * according to the conventions of some language or group of languages.
+ *
+ * We provide five built-in types of BreakIterator:
+ *
getTitleInstance() returns a BreakIterator that locates boundaries
+ * between title breaks.
+ * getSentenceInstance() returns a BreakIterator that locates boundaries
+ * between sentences. This is useful for triple-click selection, for example.
+ * getWordInstance() returns a BreakIterator that locates boundaries between
+ * words. This is useful for double-click selection or "find whole words" searches.
+ * This type of BreakIterator makes sure there is a boundary position at the
+ * beginning and end of each legal word. (Numbers count as words, too.) Whitespace
+ * and punctuation are kept separate from real words.
+ * getLineInstance() returns a BreakIterator that locates positions where it is
+ * legal for a text editor to wrap lines. This is similar to word breaking, but
+ * not the same: punctuation and whitespace are generally kept with words (you don't
+ * want a line to start with whitespace, for example), and some special characters
+ * can force a position to be considered a line-break position or prevent a position
+ * from being a line-break position.
+ * getCharacterInstance() returns a BreakIterator that locates boundaries between
+ * logical characters. Because of the structure of the Unicode encoding, a logical
+ * character may be stored internally as more than one Unicode code point. (A with an
+ * umlaut may be stored as an a followed by a separate combining umlaut character,
+ * for example, but the user still thinks of it as one character.) This iterator allows
+ * various processes (especially text editors) to treat as characters the units of text
+ * that a user would think of as characters, rather than the units of text that the
+ * computer sees as "characters".
+ *
+ * BreakIterator's interface follows an "iterator" model (hence the name), meaning it
+ * has a concept of a "current position" and methods like first(), last(), next(),
+ * and previous() that update the current position. All BreakIterators uphold the
+ * following invariants:
+ * The beginning and end of the text are always treated as boundary positions.
+ * The current position of the iterator is always a boundary position (random-
+ * access methods move the iterator to the nearest boundary position before or
+ * after the specified position, not _to_ the specified position).
+ * DONE is used as a flag to indicate when iteration has stopped. DONE is only
+ * returned when the current position is the end of the text and the user calls next(),
+ * or when the current position is the beginning of the text and the user calls
+ * previous().
+ * Break positions are numbered by the positions of the characters that follow
+ * them. Thus, under normal circumstances, the position before the first character
+ * is 0, the position after the first character is 1, and the position after the
+ * last character is 1 plus the length of the string.
+ * The client can change the position of an iterator, or the text it analyzes,
+ * at will, but cannot change the behavior. If the user wants different behavior, he
+ * must instantiate a new iterator.
+ *
+ * BreakIterator accesses the text it analyzes through a CharacterIterator, which makes
+ * it possible to use BreakIterator to analyze text in any text-storage vehicle that
+ * provides a CharacterIterator interface.
+ *
+ * Note: Some types of BreakIterator can take a long time to create, and
+ * instances of BreakIterator are not currently cached by the system. For
+ * optimal performance, keep instances of BreakIterator around as long as makes
+ * sense. For example, when word-wrapping a document, don't create and destroy a
+ * new BreakIterator for each line. Create one break iterator for the whole document
+ * (or whatever stretch of text you're wrapping) and use it to do the whole job of
+ * wrapping the text.
+ *
+ *
+ * Examples :
+ * Creating and using text boundaries
+ *
+ *
+ * public static void main(String args[]) {
+ * if (args.length == 1) {
+ * String stringToExamine = args[0];
+ * //print each word in order
+ * BreakIterator boundary = BreakIterator.getWordInstance();
+ * boundary.setText(stringToExamine);
+ * printEachForward(boundary, stringToExamine);
+ * //print each sentence in reverse order
+ * boundary = BreakIterator.getSentenceInstance(Locale.US);
+ * boundary.setText(stringToExamine);
+ * printEachBackward(boundary, stringToExamine);
+ * printFirst(boundary, stringToExamine);
+ * printLast(boundary, stringToExamine);
+ * }
+ * }
+ *
+ *
+ *
+ * Print each element in order
+ *
+ *
+ * public static void printEachForward(BreakIterator boundary, String source) {
+ * int start = boundary.first();
+ * for (int end = boundary.next();
+ * end != BreakIterator.DONE;
+ * start = end, end = boundary.next()) {
+ * System.out.println(source.substring(start,end));
+ * }
+ * }
+ *
+ *
+ *
+ * Print each element in reverse order
+ *
+ *
+ * public static void printEachBackward(BreakIterator boundary, String source) {
+ * int end = boundary.last();
+ * for (int start = boundary.previous();
+ * start != BreakIterator.DONE;
+ * end = start, start = boundary.previous()) {
+ * System.out.println(source.substring(start,end));
+ * }
+ * }
+ *
+ *
+ *
+ * Print first element
+ *
+ *
+ * public static void printFirst(BreakIterator boundary, String source) {
+ * int start = boundary.first();
+ * int end = boundary.next();
+ * System.out.println(source.substring(start,end));
+ * }
+ *
+ *
+ *
+ * Print last element
+ *
+ *
+ * public static void printLast(BreakIterator boundary, String source) {
+ * int end = boundary.last();
+ * int start = boundary.previous();
+ * System.out.println(source.substring(start,end));
+ * }
+ *
+ *
+ *
+ * Print the element at a specified position
+ *
+ *
+ * public static void printAt(BreakIterator boundary, int pos, String source) {
+ * int end = boundary.following(pos);
+ * int start = boundary.previous();
+ * System.out.println(source.substring(start,end));
+ * }
+ *
+ *
+ *
+ * Find the next word
+ *
+ *
+ * public static int nextWordStartAfter(int pos, String text) {
+ * BreakIterator wb = BreakIterator.getWordInstance();
+ * wb.setText(text);
+ * int last = wb.following(pos);
+ * int current = wb.next();
+ * while (current != BreakIterator.DONE) {
+ * for (int p = last; p < current; p++) {
+ * if (Character.isLetter(text.charAt(p)))
+ * return last;
+ * }
+ * last = current;
+ * current = wb.next();
+ * }
+ * return BreakIterator.DONE;
+ * }
+ *
+ * (The iterator returned by BreakIterator.getWordInstance() is unique in that
+ * the break positions it returns don't represent both the start and end of the
+ * thing being iterated over. That is, a sentence-break iterator returns breaks
+ * that each represent the end of one sentence and the beginning of the next.
+ * With the word-break iterator, the characters between two boundaries might be a
+ * word, or they might be the punctuation or whitespace between two words. The
+ * above code uses a simple heuristic to determine which boundary is the beginning
+ * of a word: If the characters between this boundary and the next boundary
+ * include at least one letter (this can be an alphabetical letter, a CJK ideograph,
+ * a Hangul syllable, a Kana character, etc.), then the text between this boundary
+ * and the next is a word; otherwise, it's the material between words.)
+ *
+ *
+ * @see CharacterIterator
+ * @stable ICU 2.0
+ *
+ */
+
+public abstract class BreakIterator implements Cloneable
+{
+
+ /**
+ * Default constructor. There is no state that is carried by this abstract
+ * base class.
+ * @stable ICU 2.0
+ */
+ protected BreakIterator()
+ {
+ }
+
+ /**
+ * Clone method. Creates another BreakIterator with the same behavior and
+ * current state as this one.
+ * @return The clone.
+ * @stable ICU 2.0
+ */
+ public Object clone()
+ {
+ try {
+ return super.clone();
+ }
+ catch (CloneNotSupportedException e) {
+ ///CLOVER:OFF
+ throw new IllegalStateException();
+ ///CLOVER:ON
+ }
+ }
+
+ /**
+ * DONE is returned by previous() and next() after all valid
+ * boundaries have been returned.
+ * @stable ICU 2.0
+ */
+ public static final int DONE = -1;
+
+ /**
+ * Return the first boundary position. This is always the beginning
+ * index of the text this iterator iterates over. For example, if
+ * the iterator iterates over a whole string, this function will
+ * always return 0. This function also updates the iteration position
+ * to point to the beginning of the text.
+ * @return The character offset of the beginning of the stretch of text
+ * being broken.
+ * @stable ICU 2.0
+ */
+ public abstract int first();
+
+ /**
+ * Return the last boundary position. This is always the "past-the-end"
+ * index of the text this iterator iterates over. For example, if the
+ * iterator iterates over a whole string (call it "text"), this function
+ * will always return text.length(). This function also updated the
+ * iteration position to point to the end of the text.
+ * @return The character offset of the end of the stretch of text
+ * being broken.
+ * @stable ICU 2.0
+ */
+ public abstract int last();
+
+ /**
+ * Advances the specified number of steps forward in the text (a negative
+ * number, therefore, advances backwards). If this causes the iterator
+ * to advance off either end of the text, this function returns DONE;
+ * otherwise, this function returns the position of the appropriate
+ * boundary. Calling this function is equivalent to calling next() or
+ * previous() n times.
+ * @param n The number of boundaries to advance over (if positive, moves
+ * forward; if negative, moves backwards).
+ * @return The position of the boundary n boundaries from the current
+ * iteration position, or DONE if moving n boundaries causes the iterator
+ * to advance off either end of the text.
+ * @stable ICU 2.0
+ */
+ public abstract int next(int n);
+
+ /**
+ * Advances the iterator forward one boundary. The current iteration
+ * position is updated to point to the next boundary position after the
+ * current position, and this is also the value that is returned. If
+ * the current position is equal to the value returned by last(), or to
+ * DONE, this function returns DONE and sets the current position to
+ * DONE.
+ * @return The position of the first boundary position following the
+ * iteration position.
+ * @stable ICU 2.0
+ */
+ public abstract int next();
+
+ /**
+ * Advances the iterator backward one boundary. The current iteration
+ * position is updated to point to the last boundary position before
+ * the current position, and this is also the value that is returned. If
+ * the current position is equal to the value returned by first(), or to
+ * DONE, this function returns DONE and sets the current position to
+ * DONE.
+ * @return The position of the last boundary position preceding the
+ * iteration position.
+ * @stable ICU 2.0
+ */
+ public abstract int previous();
+
+ /**
+ * Sets the iterator's current iteration position to be the first
+ * boundary position following the specified position. (Whether the
+ * specified position is itself a boundary position or not doesn't
+ * matter-- this function always moves the iteration position to the
+ * first boundary after the specified position.) If the specified
+ * position is the past-the-end position, returns DONE.
+ * @param offset The character position to start searching from.
+ * @return The position of the first boundary position following
+ * "offset" (whether or not "offset" itself is a boundary position),
+ * or DONE if "offset" is the past-the-end offset.
+ * @stable ICU 2.0
+ */
+ public abstract int following(int offset);
+
+ /**
+ * Sets the iterator's current iteration position to be the last
+ * boundary position preceding the specified position. (Whether the
+ * specified position is itself a boundary position or not doesn't
+ * matter-- this function always moves the iteration position to the
+ * last boundary before the specified position.) If the specified
+ * position is the starting position, returns DONE.
+ * @param offset The character position to start searching from.
+ * @return The position of the last boundary position preceding
+ * "offset" (whether of not "offset" itself is a boundary position),
+ * or DONE if "offset" is the starting offset of the iterator.
+ * @stable ICU 2.0
+ */
+ public int preceding(int offset) {
+ // NOTE: This implementation is here solely because we can't add new
+ // abstract methods to an existing class. There is almost ALWAYS a
+ // better, faster way to do this.
+ int pos = following(offset);
+ while (pos >= offset && pos != DONE)
+ pos = previous();
+ return pos;
+ }
+
+ /**
+ * Return true if the specfied position is a boundary position. If the
+ * function returns true, the current iteration position is set to the
+ * specified position; if the function returns false, the current
+ * iteration position is set as though following() had been called.
+ * @param offset the offset to check.
+ * @return True if "offset" is a boundary position.
+ * @stable ICU 2.0
+ */
+ public boolean isBoundary(int offset) {
+ // Again, this is the default implementation, which is provided solely because
+ // we couldn't add a new abstract method to an existing class. The real
+ // implementations will usually need to do a little more work.
+ if (offset == 0) {
+ return true;
+ }
+ else
+ return following(offset - 1) == offset;
+ }
+
+ /**
+ * Return the iterator's current position.
+ * @return The iterator's current position.
+ * @stable ICU 2.0
+ */
+ public abstract int current();
+
+ /**
+ * Returns a CharacterIterator over the text being analyzed.
+ * For at least some subclasses of BreakIterator, this is a reference
+ * to the actual iterator being used by the BreakIterator,
+ * and therefore, this function's return value should be treated as
+ * const . No guarantees are made about the current position
+ * of this iterator when it is returned. If you need to move that
+ * position to examine the text, clone this function's return value first.
+ * @return A CharacterIterator over the text being analyzed.
+ * @stable ICU 2.0
+ */
+ public abstract CharacterIterator getText();
+
+ /**
+ * Sets the iterator to analyze a new piece of text. The new
+ * piece of text is passed in as a String, and the current
+ * iteration position is reset to the beginning of the string.
+ * (The old text is dropped.)
+ * @param newText A String containing the text to analyze with
+ * this BreakIterator.
+ * @stable ICU 2.0
+ */
+ public void setText(String newText)
+ {
+ setText(new StringCharacterIterator(newText));
+ }
+
+ /**
+ * Sets the iterator to analyze a new piece of text. The
+ * BreakIterator is passed a CharacterIterator through which
+ * it will access the text itself. The current iteration
+ * position is reset to the CharacterIterator's start index.
+ * (The old iterator is dropped.)
+ * @param newText A CharacterIterator referring to the text
+ * to analyze with this BreakIterator (the iterator's current
+ * position is ignored, but its other state is significant).
+ * @stable ICU 2.0
+ */
+ public abstract void setText(CharacterIterator newText);
+
+ /**
+ * {@icu}
+ * @stable ICU 2.4
+ */
+ public static final int KIND_CHARACTER = 0;
+ /**
+ * {@icu}
+ * @stable ICU 2.4
+ */
+ public static final int KIND_WORD = 1;
+ /**
+ * {@icu}
+ * @stable ICU 2.4
+ */
+ public static final int KIND_LINE = 2;
+ /**
+ * {@icu}
+ * @stable ICU 2.4
+ */
+ public static final int KIND_SENTENCE = 3;
+ /**
+ * {@icu}
+ * @stable ICU 2.4
+ */
+ public static final int KIND_TITLE = 4;
+
+ /**
+ * Returns a new instance of BreakIterator that locates word boundaries.
+ * This function assumes that the text being analyzed is in the default
+ * locale's language.
+ * @return An instance of BreakIterator that locates word boundaries.
+ * @stable ICU 2.0
+ */
+ public static BreakIterator getWordInstance()
+ {
+ return getWordInstance(Locale.getDefault());
+ }
+
+ /**
+ * Returns a new instance of BreakIterator that locates word boundaries.
+ * @param where A locale specifying the language of the text to be
+ * analyzed.
+ * @return An instance of BreakIterator that locates word boundaries.
+ * @stable ICU 2.0
+ */
+ public static BreakIterator getWordInstance(Locale where)
+ {
+ return getBreakInstance(where, KIND_WORD);
+ }
+
+ /**
+ * {@icu} Returns a new instance of BreakIterator that locates word boundaries.
+ * @param where A locale specifying the language of the text to be
+ * analyzed.
+ * @return An instance of BreakIterator that locates word boundaries.
+ * @stable ICU 3.2
+ */
+ public static BreakIterator getWordInstance(ULocale where)
+ {
+ return getBreakInstance(where.toLocale(), KIND_WORD);
+ }
+
+ /**
+ * Returns a new instance of BreakIterator that locates legal line-
+ * wrapping positions. This function assumes the text being broken
+ * is in the default locale's language.
+ * @return A new instance of BreakIterator that locates legal
+ * line-wrapping positions.
+ * @stable ICU 2.0
+ */
+ public static BreakIterator getLineInstance()
+ {
+ return getLineInstance(Locale.getDefault());
+ }
+
+ /**
+ * Returns a new instance of BreakIterator that locates legal line-
+ * wrapping positions.
+ * @param where A Locale specifying the language of the text being broken.
+ * @return A new instance of BreakIterator that locates legal
+ * line-wrapping positions.
+ * @stable ICU 2.0
+ */
+ public static BreakIterator getLineInstance(Locale where)
+ {
+ return getBreakInstance(where, KIND_LINE);
+ }
+
+ /**
+ * {@icu} Returns a new instance of BreakIterator that locates legal line-
+ * wrapping positions.
+ * @param where A Locale specifying the language of the text being broken.
+ * @return A new instance of BreakIterator that locates legal
+ * line-wrapping positions.
+ * @stable ICU 3.2
+ */
+ public static BreakIterator getLineInstance(ULocale where)
+ {
+ return getBreakInstance(where.toLocale(), KIND_LINE);
+ }
+
+ /**
+ * Returns a new instance of BreakIterator that locates logical-character
+ * boundaries. This function assumes that the text being analyzed is
+ * in the default locale's language.
+ * @return A new instance of BreakIterator that locates logical-character
+ * boundaries.
+ * @stable ICU 2.0
+ */
+ public static BreakIterator getCharacterInstance()
+ {
+ return getCharacterInstance(Locale.getDefault());
+ }
+
+ /**
+ * Returns a new instance of BreakIterator that locates logical-character
+ * boundaries.
+ * @param where A Locale specifying the language of the text being analyzed.
+ * @return A new instance of BreakIterator that locates logical-character
+ * boundaries.
+ * @stable ICU 2.0
+ */
+ public static BreakIterator getCharacterInstance(Locale where)
+ {
+ return getBreakInstance(where, KIND_CHARACTER);
+ }
+
+ /**
+ * {@icu} Returns a new instance of BreakIterator that locates logical-character
+ * boundaries.
+ * @param where A Locale specifying the language of the text being analyzed.
+ * @return A new instance of BreakIterator that locates logical-character
+ * boundaries.
+ * @stable ICU 3.2
+ */
+ public static BreakIterator getCharacterInstance(ULocale where)
+ {
+ return getBreakInstance(where.toLocale(), KIND_CHARACTER);
+ }
+
+ /**
+ * Returns a new instance of BreakIterator that locates sentence boundaries.
+ * This function assumes the text being analyzed is in the default locale's
+ * language.
+ * @return A new instance of BreakIterator that locates sentence boundaries.
+ * @stable ICU 2.0
+ */
+ public static BreakIterator getSentenceInstance()
+ {
+ return getSentenceInstance(Locale.getDefault());
+ }
+
+ /**
+ * Returns a new instance of BreakIterator that locates sentence boundaries.
+ * @param where A Locale specifying the language of the text being analyzed.
+ * @return A new instance of BreakIterator that locates sentence boundaries.
+ * @stable ICU 2.0
+ */
+ public static BreakIterator getSentenceInstance(Locale where)
+ {
+ return getBreakInstance(where, KIND_SENTENCE);
+ }
+
+ /**
+ * {@icu} Returns a new instance of BreakIterator that locates sentence boundaries.
+ * @param where A Locale specifying the language of the text being analyzed.
+ * @return A new instance of BreakIterator that locates sentence boundaries.
+ * @stable ICU 3.2
+ */
+ public static BreakIterator getSentenceInstance(ULocale where)
+ {
+ return getBreakInstance(where.toLocale(), KIND_SENTENCE);
+ }
+
+ /**
+ * {@icu} Returns a new instance of BreakIterator that locates title boundaries.
+ * This function assumes the text being analyzed is in the default locale's
+ * language. The iterator returned locates title boundaries as described for
+ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+ * please use a word boundary iterator. {@link #getWordInstance}
+ * @return A new instance of BreakIterator that locates title boundaries.
+ * @stable ICU 2.0
+ */
+ public static BreakIterator getTitleInstance()
+ {
+ return getTitleInstance(Locale.getDefault());
+ }
+
+ /**
+ * {@icu} Returns a new instance of BreakIterator that locates title boundaries.
+ * The iterator returned locates title boundaries as described for
+ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+ * please use Word Boundary iterator.{@link #getWordInstance}
+ * @param where A Locale specifying the language of the text being analyzed.
+ * @return A new instance of BreakIterator that locates title boundaries.
+ * @stable ICU 2.0
+ */
+ public static BreakIterator getTitleInstance(Locale where)
+ {
+ return getBreakInstance(where, KIND_TITLE);
+ }
+
+ /**
+ * {@icu} Returns a new instance of BreakIterator that locates title boundaries.
+ * The iterator returned locates title boundaries as described for
+ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+ * please use Word Boundary iterator.{@link #getWordInstance}
+ * @param where A Locale specifying the language of the text being analyzed.
+ * @return A new instance of BreakIterator that locates title boundaries.
+ * @stable ICU 3.2
+s */
+ public static BreakIterator getTitleInstance(ULocale where)
+ {
+ return getBreakInstance(where.toLocale(), KIND_TITLE);
+ }
+
+ /**
+ * {@icu} Registers a new break iterator of the indicated kind, to use in the given
+ * locale. Clones of the iterator will be returned if a request for a break iterator
+ * of the given kind matches or falls back to this locale.
+ * @param iter the BreakIterator instance to adopt.
+ * @param locale the Locale for which this instance is to be registered
+ * @param kind the type of iterator for which this instance is to be registered
+ * @return a registry key that can be used to unregister this instance
+ * @stable ICU 2.4
+ */
+ public static Object registerInstance(BreakIterator iter, Locale locale, int kind) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Registers a new break iterator of the indicated kind, to use in the given
+ * locale. Clones of the iterator will be returned if a request for a break iterator
+ * of the given kind matches or falls back to this locale.
+ * @param iter the BreakIterator instance to adopt.
+ * @param locale the Locale for which this instance is to be registered
+ * @param kind the type of iterator for which this instance is to be registered
+ * @return a registry key that can be used to unregister this instance
+ * @stable ICU 3.2
+ */
+ public static Object registerInstance(BreakIterator iter, ULocale locale, int kind) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Unregisters a previously-registered BreakIterator using the key returned
+ * from the register call. Key becomes invalid after this call and should not be used
+ * again.
+ * @param key the registry key returned by a previous call to registerInstance
+ * @return true if the iterator for the key was successfully unregistered
+ * @stable ICU 2.4
+ */
+ public static boolean unregister(Object key) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ // end of registration
+
+ /**
+ * Returns a particular kind of BreakIterator for a locale.
+ * Avoids writing a switch statement with getXYZInstance(where) calls.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public static BreakIterator getBreakInstance(ULocale where, int kind) {
+ return getBreakInstance(where.toLocale(), KIND_SENTENCE);
+ }
+
+ private static BreakIterator getBreakInstance(Locale where, int kind) {
+ java.text.BreakIterator br = null;
+ switch(kind) {
+ case KIND_CHARACTER: br = java.text.BreakIterator.getCharacterInstance(where); break;
+ case KIND_WORD: br = java.text.BreakIterator.getWordInstance(where); break;
+ case KIND_LINE: br = java.text.BreakIterator.getLineInstance(where); break;
+ case KIND_SENTENCE: br = java.text.BreakIterator.getSentenceInstance(where); break;
+ case KIND_TITLE: throw new UnsupportedOperationException("Title break is not supported by com.ibm.icu.base");
+ }
+ return new BreakIteratorHandle(br);
+ }
+
+ /**
+ * Returns a list of locales for which BreakIterators can be used.
+ * @return An array of Locales. All of the locales in the array can
+ * be used when creating a BreakIterator.
+ * @stable ICU 2.6
+ */
+ public static synchronized Locale[] getAvailableLocales() {
+ return java.text.BreakIterator.getAvailableLocales();
+ }
+
+ /**
+ * {@icu} Returns a list of locales for which BreakIterators can be used.
+ * @return An array of Locales. All of the locales in the array can
+ * be used when creating a BreakIterator.
+ * @draft ICU 3.2 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static synchronized ULocale[] getAvailableULocales() {
+ Locale[] locales = java.text.BreakIterator.getAvailableLocales();
+ ULocale[] ulocales = new ULocale[locales.length];
+ for (int i = 0; i < locales.length; ++i) {
+ ulocales[i] = ULocale.forLocale(locales[i]);
+ }
+ return ulocales;
+ }
+
+ /**
+ * {@icu} Returns the locale that was used to create this object, or null.
+ * This may may differ from the locale requested at the time of
+ * this object's creation. For example, if an object is created
+ * for locale en_US_CALIFORNIA , the actual data may be
+ * drawn from en (the actual locale), and
+ * en_US may be the most specific locale that exists (the
+ * valid locale).
+ *
+ * Note: The actual locale is returned correctly, but the valid
+ * locale is not, in most cases.
+ * @param type type of information requested, either {@link
+ * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
+ * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
+ * @return the information specified by type , or null if
+ * this object was not constructed from locale data.
+ * @see com.ibm.icu.util.ULocale
+ * @see com.ibm.icu.util.ULocale#VALID_LOCALE
+ * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
+ * @draft ICU 2.8 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final ULocale getLocale(ULocale.Type type) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ // forwarding implementation class
+ static final class BreakIteratorHandle extends BreakIterator {
+ /**
+ * @internal
+ */
+ public final java.text.BreakIterator breakIterator;
+
+ /**
+ * @internal
+ * @param delegate the BreakIterator to which to delegate
+ */
+ public BreakIteratorHandle(java.text.BreakIterator delegate) {
+ this.breakIterator = delegate;
+ }
+
+ public int first() {
+ return breakIterator.first();
+ }
+ public int last() {
+ return breakIterator.last();
+ }
+ public int next(int n) {
+ return breakIterator.next(n);
+ }
+ public int next() {
+ return breakIterator.next();
+ }
+ public int previous() {
+ return breakIterator.previous();
+ }
+ public int following(int offset) {
+ return breakIterator.following(offset);
+ }
+ public int preceding(int offset) {
+ return breakIterator.preceding(offset);
+ }
+ public boolean isBoundary(int offset) {
+ return breakIterator.isBoundary(offset);
+ }
+ public int current() {
+ return breakIterator.current();
+ }
+ public CharacterIterator getText() {
+ return breakIterator.getText();
+ }
+ public void setText(CharacterIterator newText) {
+ breakIterator.setText(newText);
+ }
+
+ /**
+ * Return a string suitable for debugging.
+ * @return a string suitable for debugging
+ * @stable ICU 3.4.3
+ */
+ public String toString() {
+ return breakIterator.toString();
+ }
+
+ /**
+ * Return a clone of this BreakIterator.
+ * @return a clone of this BreakIterator
+ * @stable ICU 3.4.3
+ */
+ public Object clone() {
+ return new BreakIteratorHandle((java.text.BreakIterator)breakIterator.clone());
+ }
+
+ /**
+ * Return true if rhs is a BreakIterator with the same break behavior as this.
+ * @return true if rhs equals this
+ * @stable ICU 3.4.3
+ */
+ public boolean equals(Object rhs) {
+ try {
+ return breakIterator.equals(((BreakIteratorHandle)rhs).breakIterator);
+ }
+ catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Return a hashCode.
+ * @return a hashCode
+ * @stable ICU 3.4.3
+ */
+ public int hashCode() {
+ return breakIterator.hashCode();
+ }
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/CollationKey.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/CollationKey.java
new file mode 100644
index 00000000000..7de3a95a73a
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/CollationKey.java
@@ -0,0 +1,415 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2011, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+/**
+ *
A CollationKey
represents a String
+ * under the rules of a specific Collator
+ * object. Comparing two CollationKey
s returns the
+ * relative order of the String
s they represent.
+ *
+ * Since the rule set of Collator
s can differ, the
+ * sort orders of the same string under two different
+ * Collator
s might differ. Hence comparing
+ * CollationKey
s generated from different
+ * Collator
s can give incorrect results.
+
+ * Both the method
+ * CollationKey.compareTo(CollationKey)
and the method
+ * Collator.compare(String, String)
compare two strings
+ * and returns their relative order. The performance characterictics
+ * of these two approaches can differ.
+ *
+ * During the construction of a CollationKey
, the
+ * entire source string is examined and processed into a series of
+ * bits terminated by a null, that are stored in the CollationKey
.
+ * When CollationKey.compareTo(CollationKey)
executes, it
+ * performs bitwise comparison on the bit sequences. This can incurs
+ * startup cost when creating the CollationKey
, but once
+ * the key is created, binary comparisons are fast. This approach is
+ * recommended when the same strings are to be compared over and over
+ * again.
+ *
+ * On the other hand, implementations of
+ * Collator.compare(String, String)
can examine and
+ * process the strings only until the first characters differing in
+ * order. This approach is recommended if the strings are to be
+ * compared only once.
+ *
+ * More information about the composition of the bit sequence can
+ * be found in the
+ *
+ * user guide .
+ *
+ * The following example shows how CollationKey
s can be used
+ * to sort a list of String
s.
+ *
+ *
+ * // Create an array of CollationKeys for the Strings to be sorted.
+ * Collator myCollator = Collator.getInstance();
+ * CollationKey[] keys = new CollationKey[3];
+ * keys[0] = myCollator.getCollationKey("Tom");
+ * keys[1] = myCollator.getCollationKey("Dick");
+ * keys[2] = myCollator.getCollationKey("Harry");
+ * sort( keys );
+ *
+ * //...
+ *
+ * // Inside body of sort routine, compare keys this way
+ * if( keys[i].compareTo( keys[j] ) > 0 )
+ * // swap keys[i] and keys[j]
+ *
+ * //...
+ *
+ * // Finally, when we've returned from sort.
+ * System.out.println( keys[0].getSourceString() );
+ * System.out.println( keys[1].getSourceString() );
+ * System.out.println( keys[2].getSourceString() );
+ *
+ *
+ *
+ *
+ * This class is not subclassable
+ *
+ * @see Collator
+ * @see RuleBasedCollator
+ * @author Syn Wee Quek
+ * @stable ICU 2.8
+ */
+public final class CollationKey implements Comparable
+{
+ /**
+ * @internal
+ */
+ final java.text.CollationKey key;
+
+ /**
+ * @internal
+ */
+ CollationKey(java.text.CollationKey delegate) {
+ this.key = delegate;
+ }
+
+ // public inner classes -------------------------------------------------
+
+ /**
+ * Options that used in the API CollationKey.getBound() for getting a
+ * CollationKey based on the bound mode requested.
+ * @stable ICU 2.6
+ */
+ public static final class BoundMode
+ {
+ /*
+ * do not change the values assigned to the members of this enum.
+ * Underlying code depends on them having these numbers
+ */
+
+ /**
+ * Lower bound
+ * @stable ICU 2.6
+ */
+ public static final int LOWER = 0;
+
+ /**
+ * Upper bound that will match strings of exact size
+ * @stable ICU 2.6
+ */
+ public static final int UPPER = 1;
+
+ /**
+ * Upper bound that will match all the strings that have the same
+ * initial substring as the given string
+ * @stable ICU 2.6
+ */
+ public static final int UPPER_LONG = 2;
+
+ /**
+ * Number of bound mode
+ * @stable ICU 2.6
+ */
+ public static final int COUNT = 3;
+
+ /**
+ * Private Constructor
+ */
+ ///CLOVER:OFF
+ private BoundMode(){}
+ ///CLOVER:ON
+ }
+
+ // public constructor ---------------------------------------------------
+
+ /**
+ * CollationKey constructor.
+ * This constructor is given public access, unlike the JDK version, to
+ * allow access to users extending the Collator class. See
+ * {@link Collator#getCollationKey(String)}.
+ * @param source string this CollationKey is to represent
+ * @param key array of bytes that represent the collation order of argument
+ * source terminated by a null
+ * @see Collator
+ * @stable ICU 2.8
+ */
+ public CollationKey(String source, byte key[])
+ {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * CollationKey constructor that forces key to release its internal byte
+ * array for adoption. key will have a null byte array after this
+ * construction.
+ * @param source string this CollationKey is to represent
+ * @param key RawCollationKey object that represents the collation order of
+ * argument source.
+ * @see Collator
+ * @see RawCollationKey
+ * @stable ICU 2.8
+ */
+ public CollationKey(String source, RawCollationKey key)
+ {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ // public getters -------------------------------------------------------
+
+ /**
+ * Return the source string that this CollationKey represents.
+ * @return source string that this CollationKey represents
+ * @stable ICU 2.8
+ */
+ public String getSourceString()
+ {
+ return key.getSourceString();
+ }
+
+ /**
+ * Duplicates and returns the value of this CollationKey as a sequence
+ * of big-endian bytes terminated by a null.
+ *
+ * If two CollationKeys can be legitimately compared, then one can
+ * compare the byte arrays of each to obtain the same result, e.g.
+ *
+ * byte key1[] = collationkey1.toByteArray();
+ * byte key2[] = collationkey2.toByteArray();
+ * int key, targetkey;
+ * int i = 0;
+ * do {
+ * key = key1[i] & 0xFF;
+ * targetkey = key2[i] & 0xFF;
+ * if (key < targetkey) {
+ * System.out.println("String 1 is less than string 2");
+ * return;
+ * }
+ * if (targetkey < key) {
+ * System.out.println("String 1 is more than string 2");
+ * }
+ * i ++;
+ * } while (key != 0 && targetKey != 0);
+ *
+ * System.out.println("Strings are equal.");
+ *
+ *
+ * @return CollationKey value in a sequence of big-endian byte bytes
+ * terminated by a null.
+ * @stable ICU 2.8
+ */
+ public byte[] toByteArray()
+ {
+ return key.toByteArray();
+ }
+
+ // public other methods -------------------------------------------------
+
+ /**
+ * Compare this CollationKey to another CollationKey. The
+ * collation rules of the Collator that created this key are
+ * applied.
+ *
+ * Note: Comparison between CollationKeys
+ * created by different Collators might return incorrect
+ * results. See class documentation.
+ *
+ * @param target target CollationKey
+ * @return an integer value. If the value is less than zero this CollationKey
+ * is less than than target, if the value is zero they are equal, and
+ * if the value is greater than zero this CollationKey is greater
+ * than target.
+ * @exception NullPointerException is thrown if argument is null.
+ * @see Collator#compare(String, String)
+ * @stable ICU 2.8
+ */
+ public int compareTo(CollationKey target)
+ {
+ return key.compareTo(target.key);
+ }
+
+ /**
+ * Compare this CollationKey and the specified Object for
+ * equality. The collation rules of the Collator that created
+ * this key are applied.
+ *
+ * See note in compareTo(CollationKey) for warnings about
+ * possible incorrect results.
+ *
+ * @param target the object to compare to.
+ * @return true if the two keys compare as equal, false otherwise.
+ * @see #compareTo(CollationKey)
+ * @exception ClassCastException is thrown when the argument is not
+ * a CollationKey. NullPointerException is thrown when the argument
+ * is null.
+ * @stable ICU 2.8
+ */
+ public boolean equals(Object target)
+ {
+ if (!(target instanceof CollationKey)) {
+ return false;
+ }
+
+ return equals((CollationKey)target);
+ }
+
+ /**
+ *
+ * Compare this CollationKey and the argument target CollationKey for
+ * equality.
+ * The collation
+ * rules of the Collator object which created these objects are applied.
+ *
+ *
+ * See note in compareTo(CollationKey) for warnings of incorrect results
+ *
+ * @param target the CollationKey to compare to.
+ * @return true if two objects are equal, false otherwise.
+ * @exception NullPointerException is thrown when the argument is null.
+ * @stable ICU 2.8
+ */
+ public boolean equals(CollationKey target)
+ {
+ return key.equals(target.key);
+ }
+
+ /**
+ * Returns a hash code for this CollationKey. The hash value is calculated
+ * on the key itself, not the String from which the key was created. Thus
+ * if x and y are CollationKeys, then x.hashCode(x) == y.hashCode()
+ * if x.equals(y) is true. This allows language-sensitive comparison in a
+ * hash table.
+ *
+ * @return the hash value.
+ * @stable ICU 2.8
+ */
+ public int hashCode()
+ {
+ return key.hashCode();
+ }
+
+ /**
+ *
+ * Produce a bound for the sort order of a given collation key and a
+ * strength level. This API does not attempt to find a bound for the
+ * CollationKey String representation, hence null will be returned in its
+ * place.
+ *
+ *
+ * Resulting bounds can be used to produce a range of strings that are
+ * between upper and lower bounds. For example, if bounds are produced
+ * for a sortkey of string "smith", strings between upper and lower
+ * bounds with primary strength would include "Smith", "SMITH", "sMiTh".
+ *
+ *
+ * There are two upper bounds that can be produced. If BoundMode.UPPER
+ * is produced, strings matched would be as above. However, if a bound
+ * is produced using BoundMode.UPPER_LONG is used, the above example will
+ * also match "Smithsonian" and similar.
+ *
+ *
+ * For more on usage, see example in test procedure
+ *
+ * src/com/ibm/icu/dev/test/collator/CollationAPITest/TestBounds.
+ *
+ *
+ *
+ * Collation keys produced may be compared using the compare API.
+ *
+ * @param boundType Mode of bound required. It can be BoundMode.LOWER, which
+ * produces a lower inclusive bound, BoundMode.UPPER, that
+ * produces upper bound that matches strings of the same
+ * length or BoundMode.UPPER_LONG that matches strings that
+ * have the same starting substring as the source string.
+ * @param noOfLevels Strength levels required in the resulting bound
+ * (for most uses, the recommended value is PRIMARY). This
+ * strength should be less than the maximum strength of
+ * this CollationKey.
+ * See users guide for explanation on the strength levels a
+ * collation key can have.
+ * @return the result bounded CollationKey with a valid sort order but
+ * a null String representation.
+ * @exception IllegalArgumentException thrown when the strength level
+ * requested is higher than or equal to the strength in this
+ * CollationKey.
+ * In the case of an Exception, information
+ * about the maximum strength to use will be returned in the
+ * Exception. The user can then call getBound() again with the
+ * appropriate strength.
+ * @see CollationKey
+ * @see CollationKey.BoundMode
+ * @see Collator#PRIMARY
+ * @see Collator#SECONDARY
+ * @see Collator#TERTIARY
+ * @see Collator#QUATERNARY
+ * @see Collator#IDENTICAL
+ * @stable ICU 2.6
+ */
+ public CollationKey getBound(int boundType, int noOfLevels)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ *
+ * Merges this CollationKey with another. Only the sorting order of the
+ * CollationKeys will be merged. This API does not attempt to merge the
+ * String representations of the CollationKeys, hence null will be returned
+ * as the String representation.
+ *
+ *
+ * The strength levels are merged with their corresponding counterparts
+ * (PRIMARIES with PRIMARIES, SECONDARIES with SECONDARIES etc.).
+ *
+ *
+ * The merged String representation of the result CollationKey will be a
+ * concatenation of the String representations of the 2 source
+ * CollationKeys.
+ *
+ *
+ * Between the values from the same level a separator is inserted.
+ * example (uncompressed):
+ *
+ * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
+ * will be merged as
+ * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
+ *
+ *
+ *
+ * This allows for concatenating of first and last names for sorting, among
+ * other things.
+ *
+ *
+ * @param source CollationKey to merge with
+ * @return a CollationKey that contains the valid merged sorting order
+ * with a null String representation,
+ * i.e. new CollationKey(null, merge_sort_order)
+ * @exception IllegalArgumentException thrown if source CollationKey
+ * argument is null or of 0 length.
+ * @stable ICU 2.6
+ */
+ public CollationKey merge(CollationKey source)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/Collator.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/Collator.java
new file mode 100644
index 00000000000..2623e467dba
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/Collator.java
@@ -0,0 +1,932 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2011, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.util.Comparator;
+import java.util.Locale;
+import java.util.Set;
+
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.VersionInfo;
+
+/**
+* {@icuenhanced java.text.Collator}.{@icu _usage_}
+*
+* Collator performs locale-sensitive string comparison. A concrete
+* subclass, RuleBasedCollator, allows customization of the collation
+* ordering by the use of rule sets.
+*
+* Following the Unicode
+* Consortium 's specifications for the
+* Unicode Collation
+* Algorithm (UCA) , there are 5 different levels of strength used
+* in comparisons:
+*
+*
+* PRIMARY strength: Typically, this is used to denote differences between
+* base characters (for example, "a" < "b").
+* It is the strongest difference. For example, dictionaries are divided
+* into different sections by base character.
+* SECONDARY strength: Accents in the characters are considered secondary
+* differences (for example, "as" < "às" < "at"). Other
+* differences
+* between letters can also be considered secondary differences, depending
+* on the language. A secondary difference is ignored when there is a
+* primary difference anywhere in the strings.
+* TERTIARY strength: Upper and lower case differences in characters are
+* distinguished at tertiary strength (for example, "ao" < "Ao" <
+* "aò"). In addition, a variant of a letter differs from the base
+* form on the tertiary strength (such as "A" and "Ⓐ"). Another
+* example is the
+* difference between large and small Kana. A tertiary difference is ignored
+* when there is a primary or secondary difference anywhere in the strings.
+* QUATERNARY strength: When punctuation is ignored
+*
+* (see Ignoring Punctuations in the user guide) at PRIMARY to TERTIARY
+* strength, an additional strength level can
+* be used to distinguish words with and without punctuation (for example,
+* "ab" < "a-b" < "aB").
+* This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
+* difference. The QUATERNARY strength should only be used if ignoring
+* punctuation is required.
+* IDENTICAL strength:
+* When all other strengths are equal, the IDENTICAL strength is used as a
+* tiebreaker. The Unicode code point values of the NFD form of each string
+* are compared, just in case there is no difference.
+* For example, Hebrew cantellation marks are only distinguished at this
+* strength. This strength should be used sparingly, as only code point
+* value differences between two strings is an extremely rare occurrence.
+* Using this strength substantially decreases the performance for both
+* comparison and collation key generation APIs. This strength also
+* increases the size of the collation key.
+*
+*
+* Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
+* the canonical decomposition mode and one that does not use any decomposition.
+* The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
+* is not supported here. If the canonical
+* decomposition mode is set, the Collator handles un-normalized text properly,
+* producing the same results as if the text were normalized in NFD. If
+* canonical decomposition is turned off, it is the user's responsibility to
+* ensure that all text is already in the appropriate form before performing
+* a comparison or before getting a CollationKey.
+*
+* For more information about the collation service see the
+* users
+* guide .
+*
+* Examples of use
+*
+* // Get the Collator for US English and set its strength to PRIMARY
+* Collator usCollator = Collator.getInstance(Locale.US);
+* usCollator.setStrength(Collator.PRIMARY);
+* if (usCollator.compare("abc", "ABC") == 0) {
+* System.out.println("Strings are equivalent");
+* }
+*
+* The following example shows how to compare two strings using the
+* Collator for the default locale.
+*
+* // Compare two strings in the default locale
+* Collator myCollator = Collator.getInstance();
+* myCollator.setDecomposition(NO_DECOMPOSITION);
+* if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
+* System.out.println("à\u0325 is not equals to a\u0325̀ without decomposition");
+* myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
+* if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
+* System.out.println("Error: à\u0325 should be equals to a\u0325̀ with decomposition");
+* }
+* else {
+* System.out.println("à\u0325 is equals to a\u0325̀ with decomposition");
+* }
+* }
+* else {
+* System.out.println("Error: à\u0325 should be not equals to a\u0325̀ without decomposition");
+* }
+*
+*
+* @see RuleBasedCollator
+* @see CollationKey
+* @author Syn Wee Quek
+* @stable ICU 2.8
+*/
+public class Collator implements Comparator, Cloneable
+{
+ /**
+ * @internal
+ */
+ private final java.text.Collator collator;
+
+ /**
+ * @internal
+ */
+ private Collator(java.text.Collator delegate) {
+ this.collator = delegate;
+ }
+
+ /**
+ * Create a collator with a null delegate.
+ * For use by possible subclassers. This is present since
+ * the original Collator is abstract, and so, in theory
+ * subclassable. All member APIs must be overridden.
+ */
+ protected Collator() {
+ this.collator = null;
+ }
+
+ // public data members ---------------------------------------------------
+
+ /**
+ * Strongest collator strength value. Typically used to denote differences
+ * between base characters. See class documentation for more explanation.
+ * @see #setStrength
+ * @see #getStrength
+ * @stable ICU 2.8
+ */
+ public final static int PRIMARY = java.text.Collator.PRIMARY;
+
+ /**
+ * Second level collator strength value.
+ * Accents in the characters are considered secondary differences.
+ * Other differences between letters can also be considered secondary
+ * differences, depending on the language.
+ * See class documentation for more explanation.
+ * @see #setStrength
+ * @see #getStrength
+ * @stable ICU 2.8
+ */
+ public final static int SECONDARY = java.text.Collator.SECONDARY;
+
+ /**
+ * Third level collator strength value.
+ * Upper and lower case differences in characters are distinguished at this
+ * strength level. In addition, a variant of a letter differs from the base
+ * form on the tertiary level.
+ * See class documentation for more explanation.
+ * @see #setStrength
+ * @see #getStrength
+ * @stable ICU 2.8
+ */
+ public final static int TERTIARY = java.text.Collator.TERTIARY;
+
+ /**
+ * {@icu} Fourth level collator strength value.
+ * When punctuation is ignored
+ *
+ * (see Ignoring Punctuations in the user guide) at PRIMARY to TERTIARY
+ * strength, an additional strength level can
+ * be used to distinguish words with and without punctuation.
+ * See class documentation for more explanation.
+ * @see #setStrength
+ * @see #getStrength
+ * @stable ICU 2.8
+ */
+ public final static int QUATERNARY = java.text.Collator.IDENTICAL;
+
+ /**
+ * Smallest Collator strength value. When all other strengths are equal,
+ * the IDENTICAL strength is used as a tiebreaker. The Unicode code point
+ * values of the NFD form of each string are compared, just in case there
+ * is no difference.
+ * See class documentation for more explanation.
+ *
+ *
+ * Note this value is different from JDK's
+ *
+ * @stable ICU 2.8
+ */
+ public final static int IDENTICAL = java.text.Collator.FULL_DECOMPOSITION;
+
+ /**
+ * {@icunote} This is for backwards compatibility with Java APIs only. It
+ * should not be used, IDENTICAL should be used instead. ICU's
+ * collation does not support Java's FULL_DECOMPOSITION mode.
+ * @stable ICU 3.4
+ */
+ public final static int FULL_DECOMPOSITION = java.text.Collator.FULL_DECOMPOSITION;
+
+ /**
+ * Decomposition mode value. With NO_DECOMPOSITION set, Strings
+ * will not be decomposed for collation. This is the default
+ * decomposition setting unless otherwise specified by the locale
+ * used to create the Collator.
+ *
+ * Note this value is different from the JDK's.
+ * @see #CANONICAL_DECOMPOSITION
+ * @see #getDecomposition
+ * @see #setDecomposition
+ * @stable ICU 2.8
+ */
+ public final static int NO_DECOMPOSITION = java.text.Collator.NO_DECOMPOSITION;
+
+ /**
+ * Decomposition mode value. With CANONICAL_DECOMPOSITION set,
+ * characters that are canonical variants according to the Unicode standard
+ * will be decomposed for collation.
+ *
+ * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
+ * described in
+ * Unicode Technical Report #15 .
+ *
+ * @see #NO_DECOMPOSITION
+ * @see #getDecomposition
+ * @see #setDecomposition
+ * @stable ICU 2.8
+ */
+ public final static int CANONICAL_DECOMPOSITION = java.text.Collator.CANONICAL_DECOMPOSITION;
+
+ // public methods --------------------------------------------------------
+
+ // public setters --------------------------------------------------------
+
+ /**
+ * Sets this Collator's strength property. The strength property
+ * determines the minimum level of difference considered significant
+ * during comparison.
+ *
+ * The default strength for the Collator is TERTIARY, unless specified
+ * otherwise by the locale used to create the Collator.
+ *
+ * See the Collator class description for an example of use.
+ * @param newStrength the new strength value.
+ * @see #getStrength
+ * @see #PRIMARY
+ * @see #SECONDARY
+ * @see #TERTIARY
+ * @see #QUATERNARY
+ * @see #IDENTICAL
+ * @throws IllegalArgumentException if the new strength value is not one
+ * of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
+ * @stable ICU 2.8
+ */
+ public void setStrength(int newStrength)
+ {
+ collator.setStrength(newStrength);
+ }
+
+ /**
+ * Sets the decomposition mode of this Collator. Setting this
+ * decomposition property with CANONICAL_DECOMPOSITION allows the
+ * Collator to handle un-normalized text properly, producing the
+ * same results as if the text were normalized. If
+ * NO_DECOMPOSITION is set, it is the user's responsibility to
+ * insure that all text is already in the appropriate form before
+ * a comparison or before getting a CollationKey. Adjusting
+ * decomposition mode allows the user to select between faster and
+ * more complete collation behavior.
+ *
+ * Since a great many of the world's languages do not require
+ * text normalization, most locales set NO_DECOMPOSITION as the
+ * default decomposition mode.
+ *
+ * The default decompositon mode for the Collator is
+ * NO_DECOMPOSITON, unless specified otherwise by the locale used
+ * to create the Collator.
+ *
+ * See getDecomposition for a description of decomposition
+ * mode.
+ *
+ * @param decomposition the new decomposition mode
+ * @see #getDecomposition
+ * @see #NO_DECOMPOSITION
+ * @see #CANONICAL_DECOMPOSITION
+ * @throws IllegalArgumentException If the given value is not a valid
+ * decomposition mode.
+ * @stable ICU 2.8
+ */
+ public void setDecomposition(int decomposition)
+ {
+ collator.setDecomposition(decomposition);
+ }
+
+ // public getters --------------------------------------------------------
+
+ /**
+ * Returns the Collator for the current default locale.
+ * The default locale is determined by java.util.Locale.getDefault().
+ * @return the Collator for the default locale (for example, en_US) if it
+ * is created successfully. Otherwise if there is no Collator
+ * associated with the current locale, the default UCA collator
+ * will be returned.
+ * @see java.util.Locale#getDefault()
+ * @see #getInstance(Locale)
+ * @stable ICU 2.8
+ */
+ public static final Collator getInstance()
+ {
+ return new Collator(java.text.Collator.getInstance());
+ }
+
+ /**
+ * Clones the collator.
+ * @stable ICU 2.6
+ * @return a clone of this collator.
+ */
+ public Object clone() throws CloneNotSupportedException {
+ return new Collator((java.text.Collator)collator.clone());
+ }
+
+ // begin registry stuff
+
+ /**
+ * A factory used with registerFactory to register multiple collators and provide
+ * display names for them. If standard locale display names are sufficient,
+ * Collator instances may be registered instead.
+ * Note: as of ICU4J 3.2, the default API for CollatorFactory uses
+ * ULocale instead of Locale. Instead of overriding createCollator(Locale),
+ * new implementations should override createCollator(ULocale). Note that
+ * one of these two methods MUST be overridden or else an infinite
+ * loop will occur.
+ * @stable ICU 2.6
+ */
+ public static abstract class CollatorFactory {
+ /**
+ * Return true if this factory will be visible. Default is true.
+ * If not visible, the locales supported by this factory will not
+ * be listed by getAvailableLocales.
+ *
+ * @return true if this factory is visible
+ * @stable ICU 2.6
+ */
+ public boolean visible() {
+ return true;
+ }
+
+ /**
+ * Return an instance of the appropriate collator. If the locale
+ * is not supported, return null.
+ * Note: as of ICU4J 3.2, implementations should override
+ * this method instead of createCollator(Locale).
+ * @param loc the locale for which this collator is to be created.
+ * @return the newly created collator.
+ * @stable ICU 3.2
+ */
+ public Collator createCollator(ULocale loc) {
+ return createCollator(loc.toLocale());
+ }
+
+ /**
+ * Return an instance of the appropriate collator. If the locale
+ * is not supported, return null.
+ *
Note: as of ICU4J 3.2, implementations should override
+ * createCollator(ULocale) instead of this method, and inherit this
+ * method's implementation. This method is no longer abstract
+ * and instead delegates to createCollator(ULocale).
+ * @param loc the locale for which this collator is to be created.
+ * @return the newly created collator.
+ * @stable ICU 2.6
+ */
+ public Collator createCollator(Locale loc) {
+ return createCollator(ULocale.forLocale(loc));
+ }
+
+ /**
+ * Return the name of the collator for the objectLocale, localized for the displayLocale.
+ * If objectLocale is not visible or not defined by the factory, return null.
+ * @param objectLocale the locale identifying the collator
+ * @param displayLocale the locale for which the display name of the collator should be localized
+ * @return the display name
+ * @stable ICU 2.6
+ */
+ public String getDisplayName(Locale objectLocale, Locale displayLocale) {
+ return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale));
+ }
+
+ /**
+ * Return the name of the collator for the objectLocale, localized for the displayLocale.
+ * If objectLocale is not visible or not defined by the factory, return null.
+ * @param objectLocale the locale identifying the collator
+ * @param displayLocale the locale for which the display name of the collator should be localized
+ * @return the display name
+ * @stable ICU 3.2
+ */
+ public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
+ if (visible()) {
+ Set supported = getSupportedLocaleIDs();
+ String name = objectLocale.getBaseName();
+ if (supported.contains(name)) {
+ return objectLocale.getDisplayName(displayLocale);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Return an unmodifiable collection of the locale names directly
+ * supported by this factory.
+ *
+ * @return the set of supported locale IDs.
+ * @stable ICU 2.6
+ */
+ public abstract Set getSupportedLocaleIDs();
+
+ /**
+ * Empty default constructor.
+ * @stable ICU 2.6
+ */
+ protected CollatorFactory() {
+ }
+ }
+
+ /**
+ * {@icu} Returns the Collator for the desired locale.
+ * @param locale the desired locale.
+ * @return Collator for the desired locale if it is created successfully.
+ * Otherwise if there is no Collator
+ * associated with the current locale, a default UCA collator will
+ * be returned.
+ * @see java.util.Locale
+ * @see java.util.ResourceBundle
+ * @see #getInstance(Locale)
+ * @see #getInstance()
+ * @stable ICU 3.0
+ */
+ public static final Collator getInstance(ULocale locale) {
+ return getInstance(locale.toLocale());
+ }
+
+ /**
+ * Returns the Collator for the desired locale.
+ * @param locale the desired locale.
+ * @return Collator for the desired locale if it is created successfully.
+ * Otherwise if there is no Collator
+ * associated with the current locale, a default UCA collator will
+ * be returned.
+ * @see java.util.Locale
+ * @see java.util.ResourceBundle
+ * @see #getInstance(ULocale)
+ * @see #getInstance()
+ * @stable ICU 2.8
+ */
+ public static final Collator getInstance(Locale locale) {
+ return new Collator(java.text.Collator.getInstance(locale));
+ }
+
+ /**
+ * {@icu} Registers a collator as the default collator for the provided locale. The
+ * collator should not be modified after it is registered.
+ *
+ * @param collator the collator to register
+ * @param locale the locale for which this is the default collator
+ * @return an object that can be used to unregister the registered collator.
+ *
+ * @stable ICU 3.2
+ */
+ public static final Object registerInstance(Collator collator, ULocale locale) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Registers a collator factory.
+ *
+ * @param factory the factory to register
+ * @return an object that can be used to unregister the registered factory.
+ *
+ * @stable ICU 2.6
+ */
+ public static final Object registerFactory(CollatorFactory factory) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Unregisters a collator previously registered using registerInstance.
+ * @param registryKey the object previously returned by registerInstance.
+ * @return true if the collator was successfully unregistered.
+ * @stable ICU 2.6
+ */
+ public static final boolean unregister(Object registryKey) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns the set of locales, as Locale objects, for which collators
+ * are installed. Note that Locale objects do not support RFC 3066.
+ * @return the list of locales in which collators are installed.
+ * This list includes any that have been registered, in addition to
+ * those that are installed with ICU4J.
+ * @stable ICU 2.4
+ */
+ public static Locale[] getAvailableLocales() {
+ return java.text.Collator.getAvailableLocales();
+ }
+
+ /**
+ * {@icu} Returns the set of locales, as ULocale objects, for which collators
+ * are installed. ULocale objects support RFC 3066.
+ * @return the list of locales in which collators are installed.
+ * This list includes any that have been registered, in addition to
+ * those that are installed with ICU4J.
+ * @stable ICU 3.0
+ */
+ public static final ULocale[] getAvailableULocales() {
+ Locale[] locales = java.text.Collator.getAvailableLocales();
+ ULocale[] ulocales = new ULocale[locales.length];
+ for (int i = 0; i < locales.length; ++i) {
+ ulocales[i] = ULocale.forLocale(locales[i]);
+ }
+ return ulocales;
+ }
+
+ /**
+ * {@icu} Returns an array of all possible keywords that are relevant to
+ * collation. At this point, the only recognized keyword for this
+ * service is "collation".
+ * @return an array of valid collation keywords.
+ * @see #getKeywordValues
+ * @stable ICU 3.0
+ */
+ public static final String[] getKeywords() {
+ // No keywords support in com.ibm.icu.base
+ return new String[0];
+ }
+
+ /**
+ * {@icu} Given a keyword, returns an array of all values for
+ * that keyword that are currently in use.
+ * @param keyword one of the keywords returned by getKeywords.
+ * @see #getKeywords
+ * @stable ICU 3.0
+ */
+ public static final String[] getKeywordValues(String keyword) {
+ // No keywords support in com.ibm.icu.base
+ return new String[0];
+ }
+
+ /**
+ * {@icu} Given a key and a locale, returns an array of string values in a preferred
+ * order that would make a difference. These are all and only those values where
+ * the open (creation) of the service with the locale formed from the input locale
+ * plus input keyword and that value has different behavior than creation with the
+ * input locale alone.
+ * @param key one of the keys supported by this service. For now, only
+ * "collation" is supported.
+ * @param locale the locale
+ * @param commonlyUsed if set to true it will return only commonly used values
+ * with the given locale in preferred order. Otherwise,
+ * it will return all the available values for the locale.
+ * @return an array of string values for the given key and the locale.
+ * @stable ICU 4.2
+ */
+ public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
+ boolean commonlyUsed) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the functionally equivalent locale for the given
+ * requested locale, with respect to given keyword, for the
+ * collation service. If two locales return the same result, then
+ * collators instantiated for these locales will behave
+ * equivalently. The converse is not always true; two collators
+ * may in fact be equivalent, but return different results, due to
+ * internal details. The return result has no other meaning than
+ * that stated above, and implies nothing as to the relationship
+ * between the two locales. This is intended for use by
+ * applications who wish to cache collators, or otherwise reuse
+ * collators when possible. The functional equivalent may change
+ * over time. For more information, please see the
+ * Locales and Services section of the ICU User Guide.
+ * @param keyword a particular keyword as enumerated by
+ * getKeywords.
+ * @param locID The requested locale
+ * @param isAvailable If non-null, isAvailable[0] will receive and
+ * output boolean that indicates whether the requested locale was
+ * 'available' to the collation service. If non-null, isAvailable
+ * must have length >= 1.
+ * @return the locale
+ * @stable ICU 3.0
+ */
+ public static final ULocale getFunctionalEquivalent(String keyword,
+ ULocale locID,
+ boolean isAvailable[]) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the functionally equivalent locale for the given
+ * requested locale, with respect to given keyword, for the
+ * collation service.
+ * @param keyword a particular keyword as enumerated by
+ * getKeywords.
+ * @param locID The requested locale
+ * @return the locale
+ * @see #getFunctionalEquivalent(String,ULocale,boolean[])
+ * @stable ICU 3.0
+ */
+ public static final ULocale getFunctionalEquivalent(String keyword,
+ ULocale locID) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the name of the collator for the objectLocale, localized for the
+ * displayLocale.
+ * @param objectLocale the locale of the collator
+ * @param displayLocale the locale for the collator's display name
+ * @return the display name
+ * @stable ICU 2.6
+ */
+ static public String getDisplayName(Locale objectLocale, Locale displayLocale) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the name of the collator for the objectLocale, localized for the
+ * displayLocale.
+ * @param objectLocale the locale of the collator
+ * @param displayLocale the locale for the collator's display name
+ * @return the display name
+ * @stable ICU 3.2
+ */
+ static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the name of the collator for the objectLocale, localized for the
+ * current locale.
+ * @param objectLocale the locale of the collator
+ * @return the display name
+ * @stable ICU 2.6
+ */
+ static public String getDisplayName(Locale objectLocale) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the name of the collator for the objectLocale, localized for the
+ * current locale.
+ * @param objectLocale the locale of the collator
+ * @return the display name
+ * @stable ICU 3.2
+ */
+ static public String getDisplayName(ULocale objectLocale) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns this Collator's strength property. The strength property
+ * determines the minimum level of difference considered significant.
+ *
+ * {@icunote} This can return QUATERNARY strength, which is not supported by the
+ * JDK version.
+ *
+ * See the Collator class description for more details.
+ *
+ * @return this Collator's current strength property.
+ * @see #setStrength
+ * @see #PRIMARY
+ * @see #SECONDARY
+ * @see #TERTIARY
+ * @see #QUATERNARY
+ * @see #IDENTICAL
+ * @stable ICU 2.8
+ */
+ public int getStrength()
+ {
+ return collator.getStrength();
+ }
+
+ /**
+ * Returns the decomposition mode of this Collator. The decomposition mode
+ * determines how Unicode composed characters are handled.
+ *
+ *
+ * See the Collator class description for more details.
+ *
+ * @return the decomposition mode
+ * @see #setDecomposition
+ * @see #NO_DECOMPOSITION
+ * @see #CANONICAL_DECOMPOSITION
+ * @stable ICU 2.8
+ */
+ public int getDecomposition()
+ {
+ return collator.getDecomposition();
+ }
+
+ // public other methods -------------------------------------------------
+
+ /**
+ * Compares the equality of two text Strings using
+ * this Collator's rules, strength and decomposition mode. Convenience method.
+ * @param source the source string to be compared.
+ * @param target the target string to be compared.
+ * @return true if the strings are equal according to the collation
+ * rules, otherwise false.
+ * @see #compare
+ * @throws NullPointerException thrown if either arguments is null.
+ * @stable ICU 2.8
+ */
+ public boolean equals(String source, String target)
+ {
+ return (compare(source, target) == 0);
+ }
+
+ /**
+ * {@icu} Returns a UnicodeSet that contains all the characters and sequences tailored
+ * in this collator.
+ * @return a pointer to a UnicodeSet object containing all the
+ * code points and sequences that may sort differently than
+ * in the UCA.
+ * @stable ICU 2.4
+ */
+ public UnicodeSet getTailoredSet()
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Compares the source text String to the target text String according to
+ * this Collator's rules, strength and decomposition mode.
+ * Returns an integer less than,
+ * equal to or greater than zero depending on whether the source String is
+ * less than, equal to or greater than the target String. See the Collator
+ * class description for an example of use.
+ *
+ * @param source the source String.
+ * @param target the target String.
+ * @return Returns an integer value. Value is less than zero if source is
+ * less than target, value is zero if source and target are equal,
+ * value is greater than zero if source is greater than target.
+ * @see CollationKey
+ * @see #getCollationKey
+ * @throws NullPointerException thrown if either argument is null.
+ * @stable ICU 2.8
+ */
+ public int compare(String source, String target) {
+ return collator.compare(source, target);
+ }
+
+ /**
+ * Compares the source Object to the target Object.
+ *
+ * @param source the source Object.
+ * @param target the target Object.
+ * @return Returns an integer value. Value is less than zero if source is
+ * less than target, value is zero if source and target are equal,
+ * value is greater than zero if source is greater than target.
+ * @throws ClassCastException thrown if either arguments cannot be cast to String.
+ * @stable ICU 4.2
+ */
+ public int compare(Object source, Object target) {
+ return compare((String)source, (String)target);
+ }
+
+ /**
+ *
+ * Transforms the String into a CollationKey suitable for efficient
+ * repeated comparison. The resulting key depends on the collator's
+ * rules, strength and decomposition mode.
+ *
+ * See the CollationKey class documentation for more information.
+ * @param source the string to be transformed into a CollationKey.
+ * @return the CollationKey for the given String based on this Collator's
+ * collation rules. If the source String is null, a null
+ * CollationKey is returned.
+ * @see CollationKey
+ * @see #compare(String, String)
+ * @see #getRawCollationKey
+ * @stable ICU 2.8
+ */
+ public CollationKey getCollationKey(String source) {
+ return new CollationKey(collator.getCollationKey(source));
+ }
+
+ /**
+ * {@icu} Returns the simpler form of a CollationKey for the String source following
+ * the rules of this Collator and stores the result into the user provided argument
+ * key. If key has a internal byte array of length that's too small for the result,
+ * the internal byte array will be grown to the exact required size.
+ * @param source the text String to be transformed into a RawCollationKey
+ * @return If key is null, a new instance of RawCollationKey will be
+ * created and returned, otherwise the user provided key will be
+ * returned.
+ * @see #compare(String, String)
+ * @see #getCollationKey
+ * @see RawCollationKey
+ * @stable ICU 2.8
+ */
+ public RawCollationKey getRawCollationKey(String source, RawCollationKey key) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Variable top is a two byte primary value which causes all the codepoints
+ * with primary values that are less or equal than the variable top to be
+ * shifted when alternate handling is set to SHIFTED.
+ *
+ *
+ * Sets the variable top to a collation element value of a string supplied.
+ *
+ * @param varTop one or more (if contraction) characters to which the
+ * variable top should be set
+ * @return a int value containing the value of the variable top in upper 16
+ * bits. Lower 16 bits are undefined.
+ * @throws IllegalArgumentException is thrown if varTop argument is not
+ * a valid variable top element. A variable top element is
+ * invalid when it is a contraction that does not exist in the
+ * Collation order or when the PRIMARY strength collation
+ * element for the variable top has more than two bytes
+ * @see #getVariableTop
+ * @see RuleBasedCollator#setAlternateHandlingShifted
+ * @stable ICU 2.6
+ */
+ public int setVariableTop(String varTop) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the variable top value of a Collator.
+ * Lower 16 bits are undefined and should be ignored.
+ * @return the variable top value of a Collator.
+ * @see #setVariableTop
+ * @stable ICU 2.6
+ */
+ public int getVariableTop() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the variable top to a collation element value supplied.
+ * Variable top is set to the upper 16 bits.
+ * Lower 16 bits are ignored.
+ * @param varTop Collation element value, as returned by setVariableTop or
+ * getVariableTop
+ * @see #getVariableTop
+ * @see #setVariableTop
+ * @stable ICU 2.6
+ */
+ public void setVariableTop(int varTop) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the version of this collator object.
+ * @return the version object associated with this collator
+ * @stable ICU 2.8
+ */
+ public VersionInfo getVersion() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the UCA version of this collator object.
+ * @return the version object associated with this collator
+ * @stable ICU 2.8
+ */
+ public VersionInfo getUCAVersion() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the locale that was used to create this object, or null.
+ * This may may differ from the locale requested at the time of
+ * this object's creation. For example, if an object is created
+ * for locale en_US_CALIFORNIA , the actual data may be
+ * drawn from en (the actual locale), and
+ * en_US may be the most specific locale that exists (the
+ * valid locale).
+ *
+ * Note: This method will be implemented in ICU 3.0; ICU 2.8
+ * contains a partial preview implementation. The * actual
+ * locale is returned correctly, but the valid locale is
+ * not, in most cases.
+ * @param type type of information requested, either {@link
+ * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
+ * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
+ * @return the information specified by type , or null if
+ * this object was not constructed from locale data.
+ * @see com.ibm.icu.util.ULocale
+ * @see com.ibm.icu.util.ULocale#VALID_LOCALE
+ * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
+ * @draft ICU 2.8 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final ULocale getLocale(ULocale.Type type) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ // com.ibm.icu.base specific overrides
+ public String toString() {
+ return collator.toString();
+ }
+
+ public boolean equals(Object rhs) {
+ try {
+ return collator.equals(((Collator)rhs).collator);
+ }
+ catch (Exception e) {
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ return collator.hashCode();
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/CurrencyPluralInfo.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/CurrencyPluralInfo.java
new file mode 100644
index 00000000000..40289fd940d
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/CurrencyPluralInfo.java
@@ -0,0 +1,14 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+/*
+ * Empty stub
+ */
+public class CurrencyPluralInfo {
+ private CurrencyPluralInfo() {}
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormat.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormat.java
new file mode 100644
index 00000000000..730d3f7df4e
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormat.java
@@ -0,0 +1,1949 @@
+/*
+ * Copyright (C) 1996-2011, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ */
+
+package com.ibm.icu.text;
+
+import java.io.InvalidObjectException;
+import java.text.FieldPosition;
+import java.text.Format;
+import java.text.ParseException;
+import java.text.ParsePosition;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.TimeZone;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * {@icuenhanced java.text.DateFormat}.{@icu _usage_}
+ *
+ *
DateFormat is an abstract class for date/time formatting subclasses which
+ * formats and parses dates or time in a language-independent manner.
+ * The date/time formatting subclass, such as SimpleDateFormat, allows for
+ * formatting (i.e., date -> text), parsing (text -> date), and
+ * normalization. The date is represented as a Date
object or
+ * as the milliseconds since January 1, 1970, 00:00:00 GMT.
+ *
+ *
DateFormat provides many class methods for obtaining default date/time
+ * formatters based on the default or a given locale and a number of formatting
+ * styles. The formatting styles include FULL, LONG, MEDIUM, and SHORT. More
+ * detail and examples of using these styles are provided in the method
+ * descriptions.
+ *
+ *
DateFormat helps you to format and parse dates for any locale.
+ * Your code can be completely independent of the locale conventions for
+ * months, days of the week, or even the calendar format: lunar vs. solar.
+ *
+ *
To format a date for the current Locale, use one of the
+ * static factory methods:
+ *
+ * myString = DateFormat.getDateInstance().format(myDate);
+ *
+ * If you are formatting multiple numbers, it is
+ * more efficient to get the format and use it multiple times so that
+ * the system doesn't have to fetch the information about the local
+ * language and country conventions multiple times.
+ *
+ * DateFormat df = DateFormat.getDateInstance();
+ * for (int i = 0; i < a.length; ++i) {
+ * output.println(df.format(myDate[i]) + "; ");
+ * }
+ *
+ * To format a number for a different Locale, specify it in the
+ * call to getDateInstance().
+ *
+ * DateFormat df = DateFormat.getDateInstance(DateFormat.LONG, Locale.FRANCE);
+ *
+ * You can use a DateFormat to parse also.
+ *
+ * myDate = df.parse(myString);
+ *
+ * Use getDateInstance to get the normal date format for that country.
+ * There are other static factory methods available.
+ * Use getTimeInstance to get the time format for that country.
+ * Use getDateTimeInstance to get a date and time format. You can pass in
+ * different options to these factory methods to control the length of the
+ * result; from SHORT to MEDIUM to LONG to FULL. The exact result depends
+ * on the locale, but generally:
+ *
SHORT is completely numeric, such as 12.13.52 or 3:30pm
+ * MEDIUM is longer, such as Jan 12, 1952
+ * LONG is longer, such as January 12, 1952 or 3:30:32pm
+ * FULL is pretty completely specified, such as
+ * Tuesday, April 12, 1952 AD or 3:30:42pm PST.
+ *
+ *
+ * You can also set the time zone on the format if you wish.
+ * If you want even more control over the format or parsing,
+ * (or want to give your users more control),
+ * you can try casting the DateFormat you get from the factory methods
+ * to a SimpleDateFormat. This will work for the majority
+ * of countries; just remember to put it in a try block in case you
+ * encounter an unusual one.
+ *
+ *
You can also use forms of the parse and format methods with
+ * ParsePosition and FieldPosition to
+ * allow you to
+ *
progressively parse through pieces of a string.
+ * align any particular field, or find out where it is for selection
+ * on the screen.
+ *
+ *
+ * Synchronization
+ *
+ * Date formats are not synchronized. It is recommended to create separate
+ * format instances for each thread. If multiple threads access a format
+ * concurrently, it must be synchronized externally.
+ *
+ * @see UFormat
+ * @see NumberFormat
+ * @see SimpleDateFormat
+ * @see com.ibm.icu.util.Calendar
+ * @see com.ibm.icu.util.GregorianCalendar
+ * @see com.ibm.icu.util.TimeZone
+ * @author Mark Davis, Chen-Lieh Huang, Alan Liu
+ * @stable ICU 2.0
+ */
+public class DateFormat extends Format {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * @internal
+ */
+ public final java.text.DateFormat dateFormat;
+
+ /**
+ * @internal
+ * @param delegate the DateFormat to which to delegate
+ */
+ public DateFormat(java.text.DateFormat delegate) {
+ this.dateFormat = delegate;
+ }
+
+ /**
+ * For subclass use. Subclasses will generally not
+ * work correctly unless they manipulate the delegate.
+ */
+ protected DateFormat() {
+ this.dateFormat = java.text.DateFormat.getInstance();
+ }
+
+ /**
+ * FieldPosition selector for 'G' field alignment,
+ * corresponding to the {@link Calendar#ERA} field.
+ * @stable ICU 2.0
+ */
+ public final static int ERA_FIELD = 0;
+
+ /**
+ * FieldPosition selector for 'y' field alignment,
+ * corresponding to the {@link Calendar#YEAR} field.
+ * @stable ICU 2.0
+ */
+ public final static int YEAR_FIELD = 1;
+
+ /**
+ * FieldPosition selector for 'M' field alignment,
+ * corresponding to the {@link Calendar#MONTH} field.
+ * @stable ICU 2.0
+ */
+ public final static int MONTH_FIELD = 2;
+
+ /**
+ * FieldPosition selector for 'd' field alignment,
+ * corresponding to the {@link Calendar#DATE} field.
+ * @stable ICU 2.0
+ */
+ public final static int DATE_FIELD = 3;
+
+ /**
+ * FieldPosition selector for 'k' field alignment,
+ * corresponding to the {@link Calendar#HOUR_OF_DAY} field.
+ * HOUR_OF_DAY1_FIELD is used for the one-based 24-hour clock.
+ * For example, 23:59 + 01:00 results in 24:59.
+ * @stable ICU 2.0
+ */
+ public final static int HOUR_OF_DAY1_FIELD = 4;
+
+ /**
+ * FieldPosition selector for 'H' field alignment,
+ * corresponding to the {@link Calendar#HOUR_OF_DAY} field.
+ * HOUR_OF_DAY0_FIELD is used for the zero-based 24-hour clock.
+ * For example, 23:59 + 01:00 results in 00:59.
+ * @stable ICU 2.0
+ */
+ public final static int HOUR_OF_DAY0_FIELD = 5;
+
+ /**
+ * FieldPosition selector for 'm' field alignment,
+ * corresponding to the {@link Calendar#MINUTE} field.
+ * @stable ICU 2.0
+ */
+ public final static int MINUTE_FIELD = 6;
+
+ /**
+ * FieldPosition selector for 's' field alignment,
+ * corresponding to the {@link Calendar#SECOND} field.
+ * @stable ICU 2.0
+ */
+ public final static int SECOND_FIELD = 7;
+
+ /**
+ * {@icu} FieldPosition selector for 'S' field alignment,
+ * corresponding to the {@link Calendar#MILLISECOND} field.
+ * @stable ICU 3.0
+ */
+ public final static int FRACTIONAL_SECOND_FIELD = 8;
+
+ /**
+ * Alias for FRACTIONAL_SECOND_FIELD.
+ * @deprecated ICU 3.0 use FRACTIONAL_SECOND_FIELD.
+ */
+ public final static int MILLISECOND_FIELD = FRACTIONAL_SECOND_FIELD;
+
+ /**
+ * FieldPosition selector for 'E' field alignment,
+ * corresponding to the {@link Calendar#DAY_OF_WEEK} field.
+ * @stable ICU 2.0
+ */
+ public final static int DAY_OF_WEEK_FIELD = 9;
+
+ /**
+ * FieldPosition selector for 'D' field alignment,
+ * corresponding to the {@link Calendar#DAY_OF_YEAR} field.
+ * @stable ICU 2.0
+ */
+ public final static int DAY_OF_YEAR_FIELD = 10;
+
+ /**
+ * FieldPosition selector for 'F' field alignment,
+ * corresponding to the {@link Calendar#DAY_OF_WEEK_IN_MONTH} field.
+ * @stable ICU 2.0
+ */
+ public final static int DAY_OF_WEEK_IN_MONTH_FIELD = 11;
+
+ /**
+ * FieldPosition selector for 'w' field alignment,
+ * corresponding to the {@link Calendar#WEEK_OF_YEAR} field.
+ * @stable ICU 2.0
+ */
+ public final static int WEEK_OF_YEAR_FIELD = 12;
+
+ /**
+ * FieldPosition selector for 'W' field alignment,
+ * corresponding to the {@link Calendar#WEEK_OF_MONTH} field.
+ * @stable ICU 2.0
+ */
+ public final static int WEEK_OF_MONTH_FIELD = 13;
+
+ /**
+ * FieldPosition selector for 'a' field alignment,
+ * corresponding to the {@link Calendar#AM_PM} field.
+ * @stable ICU 2.0
+ */
+ public final static int AM_PM_FIELD = 14;
+
+ /**
+ * FieldPosition selector for 'h' field alignment,
+ * corresponding to the {@link Calendar#HOUR} field.
+ * HOUR1_FIELD is used for the one-based 12-hour clock.
+ * For example, 11:30 PM + 1 hour results in 12:30 AM.
+ * @stable ICU 2.0
+ */
+ public final static int HOUR1_FIELD = 15;
+
+ /**
+ * FieldPosition selector for 'K' field alignment,
+ * corresponding to the {@link Calendar#HOUR} field.
+ * HOUR0_FIELD is used for the zero-based 12-hour clock.
+ * For example, 11:30 PM + 1 hour results in 00:30 AM.
+ * @stable ICU 2.0
+ */
+ public final static int HOUR0_FIELD = 16;
+
+ /**
+ * FieldPosition selector for 'z' field alignment,
+ * corresponding to the {@link Calendar#ZONE_OFFSET} and
+ * {@link Calendar#DST_OFFSET} fields.
+ * @stable ICU 2.0
+ */
+ public final static int TIMEZONE_FIELD = 17;
+
+ /**
+ * {@icu} FieldPosition selector for 'Y' field alignment,
+ * corresponding to the {@link Calendar#YEAR_WOY} field.
+ * @stable ICU 3.0
+ */
+ public final static int YEAR_WOY_FIELD = 18;
+
+ /**
+ * {@icu} FieldPosition selector for 'e' field alignment,
+ * corresponding to the {@link Calendar#DOW_LOCAL} field.
+ * @stable ICU 3.0
+ */
+ public final static int DOW_LOCAL_FIELD = 19;
+
+ /**
+ * {@icu} FieldPosition selector for 'u' field alignment,
+ * corresponding to the {@link Calendar#EXTENDED_YEAR} field.
+ * @stable ICU 3.0
+ */
+ public final static int EXTENDED_YEAR_FIELD = 20;
+
+ /**
+ * {@icu} FieldPosition selector for 'g' field alignment,
+ * corresponding to the {@link Calendar#JULIAN_DAY} field.
+ * @stable ICU 3.0
+ */
+ public final static int JULIAN_DAY_FIELD = 21;
+
+ /**
+ * {@icu} FieldPosition selector for 'A' field alignment,
+ * corresponding to the {@link Calendar#MILLISECONDS_IN_DAY} field.
+ * @stable ICU 3.0
+ */
+ public final static int MILLISECONDS_IN_DAY_FIELD = 22;
+
+ /**
+ * {@icu} FieldPosition selector for 'Z' field alignment,
+ * corresponding to the {@link Calendar#ZONE_OFFSET} and
+ * {@link Calendar#DST_OFFSET} fields.
+ * @stable ICU 3.0
+ */
+ public final static int TIMEZONE_RFC_FIELD = 23;
+
+ /**
+ * {@icu} FieldPosition selector for 'v' field alignment,
+ * corresponding to the {@link Calendar#ZONE_OFFSET} and
+ * {@link Calendar#DST_OFFSET} fields. This displays the generic zone
+ * name, if available.
+ * @stable ICU 3.4
+ */
+ public final static int TIMEZONE_GENERIC_FIELD = 24;
+
+ /**
+ * {@icu} FieldPosition selector for 'c' field alignment,
+ * corresponding to the {@link Calendar#DAY_OF_WEEK} field.
+ * This displays the stand alone day name, if available.
+ * @stable ICU 3.4
+ */
+ public final static int STANDALONE_DAY_FIELD = 25;
+
+ /**
+ * {@icu} FieldPosition selector for 'L' field alignment,
+ * corresponding to the {@link Calendar#MONTH} field.
+ * This displays the stand alone month name, if available.
+ * @stable ICU 3.4
+ */
+ public final static int STANDALONE_MONTH_FIELD = 26;
+
+ /**
+ * {@icu} FieldPosition selector for 'Q' field alignment,
+ * corresponding to the {@link Calendar#MONTH} field.
+ * This displays the quarter.
+ * @stable ICU 3.6
+ */
+ public final static int QUARTER_FIELD = 27;
+
+ /**
+ * {@icu} FieldPosition selector for 'q' field alignment,
+ * corresponding to the {@link Calendar#MONTH} field.
+ * This displays the stand alone quarter, if available.
+ * @stable ICU 3.6
+ */
+ public final static int STANDALONE_QUARTER_FIELD = 28;
+
+ /**
+ * {@icu} FieldPosition selector for 'V' field alignment,
+ * corresponding to the {@link Calendar#ZONE_OFFSET} and
+ * {@link Calendar#DST_OFFSET} fields. This displays the fallback timezone
+ * name when VVVV is specified, and the short standard or daylight
+ * timezone name ignoring commonlyUsed when a single V is specified.
+ * @stable ICU 3.8
+ */
+ public final static int TIMEZONE_SPECIAL_FIELD = 29;
+
+ /**
+ * {@icu} Number of FieldPosition selectors for DateFormat.
+ * Valid selectors range from 0 to FIELD_COUNT-1.
+ * @stable ICU 3.0
+ */
+ public final static int FIELD_COUNT = 30; // must == DateFormatSymbols.patternChars.length()
+
+ /**
+ * Formats a time object into a time string. Examples of time objects
+ * are a time value expressed in milliseconds and a Date object.
+ * @param obj must be a Number or a Date or a Calendar.
+ * @param toAppendTo the string buffer for the returning time string.
+ * @return the formatted time string.
+ * @param fieldPosition keeps track of the position of the field
+ * within the returned string.
+ * On input: an alignment field,
+ * if desired. On output: the offsets of the alignment field. For
+ * example, given a time text "1996.07.10 AD at 15:08:56 PDT",
+ * if the given fieldPosition is DateFormat.YEAR_FIELD, the
+ * begin index and end index of fieldPosition will be set to
+ * 0 and 4, respectively.
+ * Notice that if the same time field appears
+ * more than once in a pattern, the fieldPosition will be set for the first
+ * occurrence of that time field. For instance, formatting a Date to
+ * the time string "1 PM PDT (Pacific Daylight Time)" using the pattern
+ * "h a z (zzzz)" and the alignment field DateFormat.TIMEZONE_FIELD,
+ * the begin index and end index of fieldPosition will be set to
+ * 5 and 8, respectively, for the first occurrence of the timezone
+ * pattern character 'z'.
+ * @see java.text.Format
+ * @stable ICU 2.0
+ */
+ public final StringBuffer format(Object obj, StringBuffer toAppendTo,
+ FieldPosition fieldPosition)
+ {
+ if (obj instanceof Calendar) {
+ return format((Calendar)obj, toAppendTo, fieldPosition);
+ } else if (obj instanceof Date) {
+ return format((Date)obj, toAppendTo, fieldPosition);
+ } else if (obj instanceof Number) {
+ return format(new Date(((Number)obj).longValue()), toAppendTo, fieldPosition );
+ }
+
+ throw new IllegalArgumentException("Cannot format given Object (" +
+ obj.getClass().getName() + ") as a Date");
+ }
+
+ /**
+ * Formats a date into a date/time string.
+ * @param cal a Calendar set to the date and time to be formatted
+ * into a date/time string. When the calendar type is different from
+ * the internal calendar held by this DateFormat instance, the date
+ * and the time zone will be inherited from the input calendar, but
+ * other calendar field values will be calculated by the internal calendar.
+ * @param toAppendTo the string buffer for the returning date/time string.
+ * @param fieldPosition keeps track of the position of the field
+ * within the returned string.
+ * On input: an alignment field,
+ * if desired. On output: the offsets of the alignment field. For
+ * example, given a time text "1996.07.10 AD at 15:08:56 PDT",
+ * if the given fieldPosition is DateFormat.YEAR_FIELD, the
+ * begin index and end index of fieldPosition will be set to
+ * 0 and 4, respectively.
+ * Notice that if the same time field appears
+ * more than once in a pattern, the fieldPosition will be set for the first
+ * occurrence of that time field. For instance, formatting a Date to
+ * the time string "1 PM PDT (Pacific Daylight Time)" using the pattern
+ * "h a z (zzzz)" and the alignment field DateFormat.TIMEZONE_FIELD,
+ * the begin index and end index of fieldPosition will be set to
+ * 5 and 8, respectively, for the first occurrence of the timezone
+ * pattern character 'z'.
+ * @return the formatted date/time string.
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(Calendar cal, StringBuffer toAppendTo,
+ FieldPosition fieldPosition) {
+ return format(cal.getTime(), toAppendTo, fieldPosition);
+ }
+
+ /**
+ * Formats a Date into a date/time string.
+ * @param date a Date to be formatted into a date/time string.
+ * @param toAppendTo the string buffer for the returning date/time string.
+ * @param fieldPosition keeps track of the position of the field
+ * within the returned string.
+ * On input: an alignment field,
+ * if desired. On output: the offsets of the alignment field. For
+ * example, given a time text "1996.07.10 AD at 15:08:56 PDT",
+ * if the given fieldPosition is DateFormat.YEAR_FIELD, the
+ * begin index and end index of fieldPosition will be set to
+ * 0 and 4, respectively.
+ * Notice that if the same time field appears
+ * more than once in a pattern, the fieldPosition will be set for the first
+ * occurrence of that time field. For instance, formatting a Date to
+ * the time string "1 PM PDT (Pacific Daylight Time)" using the pattern
+ * "h a z (zzzz)" and the alignment field DateFormat.TIMEZONE_FIELD,
+ * the begin index and end index of fieldPosition will be set to
+ * 5 and 8, respectively, for the first occurrence of the timezone
+ * pattern character 'z'.
+ * @return the formatted date/time string.
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(Date date, StringBuffer toAppendTo,
+ FieldPosition fieldPosition) {
+ FieldPosition jdkPos = toJDKFieldPosition(fieldPosition);
+ StringBuffer buf = dateFormat.format(date, toAppendTo, jdkPos);
+ if (jdkPos != null) {
+ fieldPosition.setBeginIndex(jdkPos.getBeginIndex());
+ fieldPosition.setEndIndex(jdkPos.getEndIndex());
+ }
+ return buf;
+ }
+
+ /**
+ * Formats a Date into a date/time string.
+ * @param date the time value to be formatted into a time string.
+ * @return the formatted time string.
+ * @stable ICU 2.0
+ */
+ public final String format(Date date)
+ {
+ return dateFormat.format(date);
+ }
+
+ /**
+ * Parses a date/time string.
+ *
+ * @param text The date/time string to be parsed
+ *
+ * @return A Date, or null if the input could not be parsed
+ *
+ * @exception ParseException If the given string cannot be parsed as a date.
+ *
+ * @see #parse(String, ParsePosition)
+ * @stable ICU 2.0
+ */
+ public Date parse(String text) throws ParseException
+ {
+ return dateFormat.parse(text);
+ }
+
+ /**
+ * Parses a date/time string according to the given parse position.
+ * For example, a time text "07/10/96 4:5 PM, PDT" will be parsed
+ * into a Calendar that is equivalent to Date(837039928046). The
+ * caller should clear the calendar before calling this method,
+ * unless existing field information is to be kept.
+ *
+ * By default, parsing is lenient: If the input is not in the form used
+ * by this object's format method but can still be parsed as a date, then
+ * the parse succeeds. Clients may insist on strict adherence to the
+ * format by calling setLenient(false).
+ *
+ * @see #setLenient(boolean)
+ *
+ * @param text The date/time string to be parsed
+ *
+ * @param cal The calendar into which parsed data will be stored.
+ * In general, this should be cleared before calling this
+ * method. If this parse fails, the calendar may still
+ * have been modified. When the calendar type is different
+ * from the internal calendar held by this DateFormat
+ * instance, calendar field values will be parsed based
+ * on the internal calendar initialized with the time and
+ * the time zone taken from this calendar, then the
+ * parse result (time in milliseconds and time zone) will
+ * be set back to this calendar.
+ *
+ * @param pos On input, the position at which to start parsing; on
+ * output, the position at which parsing terminated, or the
+ * start position if the parse failed.
+ * @stable ICU 2.0
+ */
+ public void parse(String text, Calendar cal, ParsePosition pos) {
+ Date result = dateFormat.parse(text, pos);
+ cal.setTime(result);
+ }
+
+ /**
+ * Parses a date/time string according to the given parse position. For
+ * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date
+ * that is equivalent to Date(837039928046).
+ *
+ *
By default, parsing is lenient: If the input is not in the form used
+ * by this object's format method but can still be parsed as a date, then
+ * the parse succeeds. Clients may insist on strict adherence to the
+ * format by calling setLenient(false).
+ *
+ * @see #setLenient(boolean)
+ *
+ * @param text The date/time string to be parsed
+ *
+ * @param pos On input, the position at which to start parsing; on
+ * output, the position at which parsing terminated, or the
+ * start position if the parse failed.
+ *
+ * @return A Date, or null if the input could not be parsed
+ * @stable ICU 2.0
+ */
+ public Date parse(String text, ParsePosition pos) {
+ return dateFormat.parse(text, pos);
+ }
+
+ /**
+ * Parses a date/time string into an Object. This convenience method simply
+ * calls parse(String, ParsePosition).
+ *
+ * @see #parse(String, ParsePosition)
+ * @stable ICU 2.0
+ */
+ public Object parseObject (String source, ParsePosition pos)
+ {
+ return parse(source, pos);
+ }
+
+ /**
+ * {@icu} Constant for empty style pattern.
+ * @stable ICU 3.8
+ */
+ public static final int NONE = -1;
+
+ /**
+ * Constant for full style pattern.
+ * @stable ICU 2.0
+ */
+ public static final int FULL = 0;
+
+ /**
+ * Constant for long style pattern.
+ * @stable ICU 2.0
+ */
+ public static final int LONG = 1;
+
+ /**
+ * Constant for medium style pattern.
+ * @stable ICU 2.0
+ */
+ public static final int MEDIUM = 2;
+
+ /**
+ * Constant for short style pattern.
+ * @stable ICU 2.0
+ */
+ public static final int SHORT = 3;
+
+ /**
+ * Constant for default style pattern. Its value is MEDIUM.
+ * @stable ICU 2.0
+ */
+ public static final int DEFAULT = MEDIUM;
+
+ /**
+ * {@icu} Constant for relative style mask.
+ * @stable ICU 3.8
+ */
+ public static final int RELATIVE = (1 << 7);
+
+ /**
+ * {@icu} Constant for relative full style pattern.
+ * @stable ICU 3.8
+ */
+ public static final int RELATIVE_FULL = RELATIVE | FULL;
+
+ /**
+ * {@icu} Constant for relative style pattern.
+ * @stable ICU 3.8
+ */
+ public static final int RELATIVE_LONG = RELATIVE | LONG;
+
+ /**
+ * {@icu} Constant for relative style pattern.
+ * @stable ICU 3.8
+ */
+ public static final int RELATIVE_MEDIUM = RELATIVE | MEDIUM;
+
+ /**
+ * {@icu} Constant for relative style pattern.
+ * @stable ICU 3.8
+ */
+ public static final int RELATIVE_SHORT = RELATIVE | SHORT;
+
+ /**
+ * {@icu} Constant for relative default style pattern.
+ * @stable ICU 3.8
+ */
+ public static final int RELATIVE_DEFAULT = RELATIVE | DEFAULT;
+
+ /* Below are pre-defined skeletons
+ *
+ *
+ * A skeleton
+ *
+ *
+ * 1. only keeps the field pattern letter and ignores all other parts
+ * in a pattern, such as space, punctuations, and string literals.
+ *
+ * 2. hides the order of fields.
+ *
+ * 3. might hide a field's pattern letter length.
+ *
+ * For those non-digit calendar fields, the pattern letter length is
+ * important, such as MMM, MMMM, and MMMMM; E and EEEE,
+ * and the field's pattern letter length is honored.
+ *
+ * For the digit calendar fields, such as M or MM, d or dd, yy or yyyy,
+ * the field pattern length is ignored and the best match, which is
+ * defined in date time patterns, will be returned without honor
+ * the field pattern letter length in skeleton.
+ *
+ */
+ /**
+ * {@icu} Constant for date pattern with minute and second.
+ * @stable ICU 4.0
+ */
+ public static final String MINUTE_SECOND = "ms";
+
+ /**
+ * {@icu} Constant for date pattern with hour and minute in 24-hour presentation.
+ * @stable ICU 4.0
+ */
+ public static final String HOUR24_MINUTE = "Hm";
+
+ /**
+ * {@icu} Constant for date pattern with hour, minute, and second in
+ * 24-hour presentation.
+ * @stable ICU 4.0
+ */
+ public static final String HOUR24_MINUTE_SECOND = "Hms";
+
+ /**
+ * {@icu} Constant for date pattern with hour, minute, and second.
+ * @stable ICU 4.0
+ */
+ public static final String HOUR_MINUTE_SECOND = "hms";
+
+ /**
+ * {@icu} Constant for date pattern with standalone month.
+ * @stable ICU 4.0
+ */
+ public static final String STANDALONE_MONTH = "LLLL";
+
+ /**
+ * {@icu} Constant for date pattern with standalone abbreviated month.
+ * @stable ICU 4.0
+ */
+ public static final String ABBR_STANDALONE_MONTH = "LLL";
+
+ /**
+ * {@icu} Constant for date pattern with year and quarter.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR_QUARTER = "yQQQ";
+
+ /**
+ * {@icu} Constant for date pattern with year and abbreviated quarter.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR_ABBR_QUARTER = "yQ";
+
+
+ /* Below are skeletons that date interval pre-defined in resource file.
+ * Users are encouraged to use them in date interval format factory methods.
+ */
+ /**
+ * {@icu} Constant for date pattern with hour and minute.
+ * @stable ICU 4.0
+ */
+ public static final String HOUR_MINUTE = "hm";
+
+ /**
+ * {@icu} Constant for date pattern with year.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR = "y";
+
+ /**
+ * {@icu} Constant for date pattern with day.
+ * @stable ICU 4.0
+ */
+ public static final String DAY = "d";
+
+ /**
+ * {@icu} Constant for date pattern with numeric month, weekday, and day.
+ * @stable ICU 4.0
+ */
+ public static final String NUM_MONTH_WEEKDAY_DAY = "MEd";
+
+ /**
+ * {@icu} Constant for date pattern with year and numeric month.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR_NUM_MONTH = "yM";
+
+ /**
+ * {@icu} Constant for date pattern with numeric month and day.
+ * @stable ICU 4.0
+ */
+ public static final String NUM_MONTH_DAY = "Md";
+
+ /**
+ * {@icu} Constant for date pattern with year, numeric month, weekday, and day.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR_NUM_MONTH_WEEKDAY_DAY = "yMEd";
+
+ /**
+ * {@icu} Constant for date pattern with abbreviated month, weekday, and day.
+ * @stable ICU 4.0
+ */
+ public static final String ABBR_MONTH_WEEKDAY_DAY = "MMMEd";
+
+ /**
+ * {@icu} Constant for date pattern with year and month.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR_MONTH = "yMMMM";
+
+ /**
+ * {@icu} Constant for date pattern with year and abbreviated month.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR_ABBR_MONTH = "yMMM";
+
+ /**
+ * {@icu} Constant for date pattern having month and day.
+ * @stable ICU 4.0
+ */
+ public static final String MONTH_DAY = "MMMMd";
+
+ /**
+ * {@icu} Constant for date pattern with abbreviated month and day.
+ * @stable ICU 4.0
+ */
+ public static final String ABBR_MONTH_DAY = "MMMd";
+
+ /**
+ * {@icu} Constant for date pattern with month, weekday, and day.
+ * @stable ICU 4.0
+ */
+ public static final String MONTH_WEEKDAY_DAY = "MMMMEEEEd";
+
+ /**
+ * {@icu} Constant for date pattern with year, abbreviated month, weekday,
+ * and day.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR_ABBR_MONTH_WEEKDAY_DAY = "yMMMEd";
+
+ /**
+ * {@icu} Constant for date pattern with year, month, weekday, and day.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR_MONTH_WEEKDAY_DAY = "yMMMMEEEEd";
+
+ /**
+ * {@icu} Constant for date pattern with year, month, and day.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR_MONTH_DAY = "yMMMMd";
+
+ /**
+ * {@icu} Constant for date pattern with year, abbreviated month, and day.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR_ABBR_MONTH_DAY = "yMMMd";
+
+ /**
+ * {@icu} Constant for date pattern with year, numeric month, and day.
+ * @stable ICU 4.0
+ */
+ public static final String YEAR_NUM_MONTH_DAY = "yMd";
+
+ /**
+ * {@icu} Constant for date pattern with numeric month.
+ * @stable ICU 4.0
+ */
+ public static final String NUM_MONTH = "M";
+
+ /**
+ * {@icu} Constant for date pattern with abbreviated month.
+ * @stable ICU 4.0
+ */
+ public static final String ABBR_MONTH = "MMM";
+
+ /**
+ * {@icu} Constant for date pattern with month.
+ * @stable ICU 4.0
+ */
+ public static final String MONTH = "MMMM";
+
+ /**
+ * {@icu} Constant for date pattern with hour, minute, and generic timezone.
+ * @stable ICU 4.0
+ */
+ public static final String HOUR_MINUTE_GENERIC_TZ = "hmv";
+
+ /**
+ * {@icu} Constant for date pattern with hour, minute, and timezone.
+ * @stable ICU 4.0
+ */
+ public static final String HOUR_MINUTE_TZ = "hmz";
+
+ /**
+ * {@icu} Constant for date pattern with hour.
+ * @stable ICU 4.0
+ */
+ public static final String HOUR = "h";
+
+ /**
+ * {@icu} Constant for date pattern with hour and generic timezone.
+ * @stable ICU 4.0
+ */
+ public static final String HOUR_GENERIC_TZ = "hv";
+
+ /**
+ * {@icu} Constant for date pattern with hour and timezone.
+ * @stable ICU 4.0
+ */
+ public static final String HOUR_TZ = "hz";
+
+ /**
+ * Gets the time formatter with the default formatting style
+ * for the default locale.
+ * @return a time formatter.
+ * @stable ICU 2.0
+ */
+ public final static DateFormat getTimeInstance()
+ {
+ return new DateFormat(java.text.DateFormat.getTimeInstance());
+ }
+
+ /**
+ * Returns the time formatter with the given formatting style
+ * for the default locale.
+ * @param style the given formatting style. For example,
+ * SHORT for "h:mm a" in the US locale.
+ * @return a time formatter.
+ * @stable ICU 2.0
+ */
+ public final static DateFormat getTimeInstance(int style)
+ {
+ return new DateFormat(java.text.DateFormat.getTimeInstance(getJDKFormatStyle(style)));
+ }
+
+ /**
+ * Returns the time formatter with the given formatting style
+ * for the given locale.
+ * @param style the given formatting style. For example,
+ * SHORT for "h:mm a" in the US locale.
+ * @param aLocale the given locale.
+ * @return a time formatter.
+ * @stable ICU 2.0
+ */
+ public final static DateFormat getTimeInstance(int style,
+ Locale aLocale)
+ {
+ return new DateFormat(java.text.DateFormat.getTimeInstance(getJDKFormatStyle(style), aLocale));
+ }
+
+ /**
+ * Returns the time formatter with the given formatting style
+ * for the given locale.
+ * @param style the given formatting style. For example,
+ * SHORT for "h:mm a" in the US locale.
+ * @param locale the given ulocale.
+ * @return a time formatter.
+ * @stable ICU 3.2
+ */
+ public final static DateFormat getTimeInstance(int style,
+ ULocale locale)
+ {
+ return new DateFormat(java.text.DateFormat.getTimeInstance(getJDKFormatStyle(style), locale.toLocale()));
+ }
+
+ /**
+ * Returns the date formatter with the default formatting style
+ * for the default locale.
+ * @return a date formatter.
+ * @stable ICU 2.0
+ */
+ public final static DateFormat getDateInstance()
+ {
+ return new DateFormat(java.text.DateFormat.getDateInstance());
+ }
+
+ /**
+ * Returns the date formatter with the given formatting style
+ * for the default locale.
+ * @param style the given formatting style. For example,
+ * SHORT for "M/d/yy" in the US locale.
+ * @return a date formatter.
+ * @stable ICU 2.0
+ */
+ public final static DateFormat getDateInstance(int style)
+ {
+ return new DateFormat(java.text.DateFormat.getDateInstance(getJDKFormatStyle(style)));
+ }
+
+ /**
+ * Returns the date formatter with the given formatting style
+ * for the given locale.
+ * @param style the given formatting style. For example,
+ * SHORT for "M/d/yy" in the US locale.
+ * @param aLocale the given locale.
+ * @return a date formatter.
+ * @stable ICU 2.0
+ */
+ public final static DateFormat getDateInstance(int style,
+ Locale aLocale)
+ {
+ return new DateFormat(java.text.DateFormat.getDateInstance(getJDKFormatStyle(style), aLocale));
+ }
+
+ /**
+ * Returns the date formatter with the given formatting style
+ * for the given locale.
+ * @param style the given formatting style. For example,
+ * SHORT for "M/d/yy" in the US locale.
+ * @param locale the given ulocale.
+ * @return a date formatter.
+ * @stable ICU 3.2
+ */
+ public final static DateFormat getDateInstance(int style,
+ ULocale locale)
+ {
+ return new DateFormat(java.text.DateFormat.getDateInstance(getJDKFormatStyle(style), locale.toLocale()));
+ }
+
+ /**
+ * Returns the date/time formatter with the default formatting style
+ * for the default locale.
+ * @return a date/time formatter.
+ * @stable ICU 2.0
+ */
+ public final static DateFormat getDateTimeInstance()
+ {
+ return new DateFormat(java.text.DateFormat.getDateTimeInstance());
+ }
+
+ /**
+ * Returns the date/time formatter with the given date and time
+ * formatting styles for the default locale.
+ * @param dateStyle the given date formatting style. For example,
+ * SHORT for "M/d/yy" in the US locale.
+ * @param timeStyle the given time formatting style. For example,
+ * SHORT for "h:mm a" in the US locale.
+ * @return a date/time formatter.
+ * @stable ICU 2.0
+ */
+ public final static DateFormat getDateTimeInstance(int dateStyle,
+ int timeStyle)
+ {
+ if (dateStyle != NONE) {
+ if (timeStyle != NONE) {
+ return new DateFormat(java.text.DateFormat.getDateTimeInstance(getJDKFormatStyle(dateStyle), getJDKFormatStyle(timeStyle)));
+ } else {
+ return new DateFormat(java.text.DateFormat.getDateInstance(getJDKFormatStyle(dateStyle)));
+ }
+ }
+ if (timeStyle != NONE) {
+ return new DateFormat(java.text.DateFormat.getTimeInstance(getJDKFormatStyle(timeStyle)));
+ }
+ return null;
+ }
+
+ /**
+ * Returns the date/time formatter with the given formatting styles
+ * for the given locale.
+ * @param dateStyle the given date formatting style.
+ * @param timeStyle the given time formatting style.
+ * @param aLocale the given locale.
+ * @return a date/time formatter.
+ * @stable ICU 2.0
+ */
+ public final static DateFormat getDateTimeInstance(
+ int dateStyle, int timeStyle, Locale aLocale)
+ {
+ if (dateStyle != NONE) {
+ if (timeStyle != NONE) {
+ return new DateFormat(java.text.DateFormat.getDateTimeInstance(getJDKFormatStyle(dateStyle), getJDKFormatStyle(timeStyle), aLocale));
+ } else {
+ return new DateFormat(java.text.DateFormat.getDateInstance(getJDKFormatStyle(dateStyle), aLocale));
+ }
+ }
+ if (timeStyle != NONE) {
+ return new DateFormat(java.text.DateFormat.getTimeInstance(getJDKFormatStyle(timeStyle), aLocale));
+ }
+ return null;
+ }
+
+ /**
+ * Returns the date/time formatter with the given formatting styles
+ * for the given locale.
+ * @param dateStyle the given date formatting style.
+ * @param timeStyle the given time formatting style.
+ * @param locale the given ulocale.
+ * @return a date/time formatter.
+ * @stable ICU 3.2
+ */
+ public final static DateFormat getDateTimeInstance(
+ int dateStyle, int timeStyle, ULocale locale)
+ {
+ if (dateStyle != NONE) {
+ if (timeStyle != NONE) {
+ return new DateFormat(java.text.DateFormat.getDateTimeInstance(getJDKFormatStyle(dateStyle), getJDKFormatStyle(timeStyle), locale.toLocale()));
+ } else {
+ return new DateFormat(java.text.DateFormat.getDateInstance(getJDKFormatStyle(dateStyle), locale.toLocale()));
+ }
+ }
+ if (timeStyle != NONE) {
+ return new DateFormat(java.text.DateFormat.getTimeInstance(getJDKFormatStyle(timeStyle), locale.toLocale()));
+ }
+ return null;
+ }
+
+ /**
+ * Returns a default date/time formatter that uses the SHORT style for both the
+ * date and the time.
+ * @stable ICU 2.0
+ */
+ public final static DateFormat getInstance() {
+ return new DateFormat(java.text.DateFormat.getInstance());
+ }
+
+ /**
+ * Returns the set of locales for which DateFormats are installed.
+ * @return the set of locales for which DateFormats are installed.
+ * @stable ICU 2.0
+ */
+ public static Locale[] getAvailableLocales()
+ {
+ return java.text.DateFormat.getAvailableLocales();
+ }
+
+ /**
+ * {@icu} Returns the set of locales for which DateFormats are installed.
+ * @return the set of locales for which DateFormats are installed.
+ * @draft ICU 3.2 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static ULocale[] getAvailableULocales()
+ {
+ if (availableULocales == null) {
+ synchronized(DateFormat.class) {
+ if (availableULocales == null) {
+ Locale[] locales = java.text.DateFormat.getAvailableLocales();
+ availableULocales = new ULocale[locales.length];
+ for (int i = 0; i < locales.length; ++i) {
+ availableULocales[i] = ULocale.forLocale(locales[i]);
+ }
+ }
+ }
+ }
+ return availableULocales;
+ }
+ private static volatile ULocale[] availableULocales;
+
+ /**
+ * Sets the calendar to be used by this date format. Initially, the default
+ * calendar for the specified or default locale is used.
+ * @param newCalendar the new Calendar to be used by the date format
+ * @stable ICU 2.0
+ */
+ public void setCalendar(Calendar newCalendar)
+ {
+ dateFormat.setCalendar(newCalendar.calendar);
+ }
+
+ /**
+ * Returns the calendar associated with this date/time formatter.
+ * @return the calendar associated with this date/time formatter.
+ * @stable ICU 2.0
+ */
+ public Calendar getCalendar()
+ {
+ return new Calendar(dateFormat.getCalendar());
+ }
+
+ /**
+ * Sets the number formatter.
+ * @param newNumberFormat the given new NumberFormat.
+ * @stable ICU 2.0
+ */
+ public void setNumberFormat(NumberFormat newNumberFormat)
+ {
+ dateFormat.setNumberFormat(newNumberFormat.numberFormat);
+ }
+
+ /**
+ * Returns the number formatter which this date/time formatter uses to
+ * format and parse a time.
+ * @return the number formatter which this date/time formatter uses.
+ * @stable ICU 2.0
+ */
+ public NumberFormat getNumberFormat()
+ {
+ return new NumberFormat(dateFormat.getNumberFormat());
+ }
+
+ /**
+ * Sets the time zone for the calendar of this DateFormat object.
+ * @param zone the given new time zone.
+ * @stable ICU 2.0
+ */
+ public void setTimeZone(TimeZone zone)
+ {
+ dateFormat.setTimeZone(zone.timeZone);
+ }
+
+ /**
+ * Returns the time zone.
+ * @return the time zone associated with the calendar of DateFormat.
+ * @stable ICU 2.0
+ */
+ public TimeZone getTimeZone()
+ {
+ return new TimeZone(dateFormat.getTimeZone());
+ }
+
+ /**
+ * Specifies whether date/time parsing is to be lenient. With
+ * lenient parsing, the parser may use heuristics to interpret inputs that
+ * do not precisely match this object's format. With strict parsing,
+ * inputs must match this object's format.
+ * @param lenient when true, parsing is lenient
+ * @see com.ibm.icu.util.Calendar#setLenient
+ * @stable ICU 2.0
+ */
+ public void setLenient(boolean lenient)
+ {
+ dateFormat.setLenient(lenient);
+ }
+
+ /**
+ * Returns whether date/time parsing is lenient.
+ * @stable ICU 2.0
+ */
+ public boolean isLenient()
+ {
+ return dateFormat.isLenient();
+ }
+
+ /**
+ * Overrides hashCode.
+ * @stable ICU 2.0
+ */
+ public int hashCode() {
+ return dateFormat.hashCode();
+ }
+
+ /**
+ * Overrides equals.
+ * @stable ICU 2.0
+ */
+ public boolean equals(Object obj) {
+ try {
+ return dateFormat.equals(((DateFormat)obj).dateFormat);
+ }
+ catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Overrides clone.
+ * @stable ICU 2.0
+ */
+ public Object clone()
+ {
+ return new DateFormat((java.text.DateFormat)dateFormat.clone());
+ }
+
+ //-------------------------------------------------------------------------
+ // Public static interface for creating custon DateFormats for different
+ // types of Calendars.
+ //-------------------------------------------------------------------------
+
+ /**
+ * Creates a {@link DateFormat} object that can be used to format dates in
+ * the calendar system specified by cal
.
+ *
+ * @param cal The calendar system for which a date format is desired.
+ *
+ * @param dateStyle The type of date format desired. This can be
+ * {@link DateFormat#SHORT}, {@link DateFormat#MEDIUM},
+ * etc.
+ *
+ * @param locale The locale for which the date format is desired.
+ * @stable ICU 2.0
+ */
+ static final public DateFormat getDateInstance(Calendar cal, int dateStyle, Locale locale)
+ {
+ DateFormat df = getDateInstance(dateStyle, locale);
+ df.setCalendar(cal);
+ return df;
+ }
+
+ /**
+ * Creates a {@link DateFormat} object that can be used to format dates in
+ * the calendar system specified by cal
.
+ *
+ * @param cal The calendar system for which a date format is desired.
+ *
+ * @param dateStyle The type of date format desired. This can be
+ * {@link DateFormat#SHORT}, {@link DateFormat#MEDIUM},
+ * etc.
+ *
+ * @param locale The locale for which the date format is desired.
+ * @stable ICU 3.2
+ */
+ static final public DateFormat getDateInstance(Calendar cal, int dateStyle, ULocale locale)
+ {
+ DateFormat df = getDateInstance(dateStyle, locale);
+ df.setCalendar(cal);
+ return df;
+ }
+
+ /**
+ * Creates a {@link DateFormat} object that can be used to format times in
+ * the calendar system specified by cal
.
+ *
+ * Note: When this functionality is moved into the core JDK, this method
+ * will probably be replaced by a new overload of {@link DateFormat#getInstance}.
+ *
+ * @param cal The calendar system for which a time format is desired.
+ *
+ * @param timeStyle The type of time format desired. This can be
+ * {@link DateFormat#SHORT}, {@link DateFormat#MEDIUM},
+ * etc.
+ *
+ * @param locale The locale for which the time format is desired.
+ *
+ * @see DateFormat#getTimeInstance
+ * @stable ICU 2.0
+ */
+ static final public DateFormat getTimeInstance(Calendar cal, int timeStyle, Locale locale)
+ {
+ DateFormat df = getTimeInstance(timeStyle, locale);
+ df.setCalendar(cal);
+ return df;
+ }
+
+ /**
+ * Creates a {@link DateFormat} object that can be used to format times in
+ * the calendar system specified by cal
.
+ *
+ * Note: When this functionality is moved into the core JDK, this method
+ * will probably be replaced by a new overload of {@link DateFormat#getInstance}.
+ *
+ * @param cal The calendar system for which a time format is desired.
+ *
+ * @param timeStyle The type of time format desired. This can be
+ * {@link DateFormat#SHORT}, {@link DateFormat#MEDIUM},
+ * etc.
+ *
+ * @param locale The locale for which the time format is desired.
+ *
+ * @see DateFormat#getTimeInstance
+ * @stable ICU 3.2
+ */
+ static final public DateFormat getTimeInstance(Calendar cal, int timeStyle, ULocale locale)
+ {
+ DateFormat df = getTimeInstance(timeStyle, locale);
+ df.setCalendar(cal);
+ return df;
+ }
+
+ /**
+ * Creates a {@link DateFormat} object that can be used to format dates and times in
+ * the calendar system specified by cal
.
+ *
+ * Note: When this functionality is moved into the core JDK, this method
+ * will probably be replaced by a new overload of {@link DateFormat#getInstance}.
+ *
+ * @param cal The calendar system for which a date/time format is desired.
+ *
+ * @param dateStyle The type of date format desired. This can be
+ * {@link DateFormat#SHORT}, {@link DateFormat#MEDIUM},
+ * etc.
+ *
+ * @param timeStyle The type of time format desired. This can be
+ * {@link DateFormat#SHORT}, {@link DateFormat#MEDIUM},
+ * etc.
+ *
+ * @param locale The locale for which the date/time format is desired.
+ *
+ * @see DateFormat#getDateTimeInstance
+ * @stable ICU 2.0
+ */
+ static final public DateFormat getDateTimeInstance(Calendar cal, int dateStyle,
+ int timeStyle, Locale locale)
+ {
+ DateFormat df = getDateTimeInstance(dateStyle, timeStyle, locale);
+ df.setCalendar(cal);
+ return df;
+ }
+
+ /**
+ * Creates a {@link DateFormat} object that can be used to format dates and times in
+ * the calendar system specified by cal
.
+ *
+ * Note: When this functionality is moved into the core JDK, this method
+ * will probably be replaced by a new overload of {@link DateFormat#getInstance}.
+ *
+ * @param cal The calendar system for which a date/time format is desired.
+ *
+ * @param dateStyle The type of date format desired. This can be
+ * {@link DateFormat#SHORT}, {@link DateFormat#MEDIUM},
+ * etc.
+ *
+ * @param timeStyle The type of time format desired. This can be
+ * {@link DateFormat#SHORT}, {@link DateFormat#MEDIUM},
+ * etc.
+ *
+ * @param locale The locale for which the date/time format is desired.
+ *
+ * @see DateFormat#getDateTimeInstance
+ * @stable ICU 3.2
+ */
+ static final public DateFormat getDateTimeInstance(Calendar cal, int dateStyle,
+ int timeStyle, ULocale locale)
+ {
+ DateFormat df = getDateTimeInstance(dateStyle, timeStyle, locale);
+ df.setCalendar(cal);
+ return df;
+ }
+
+ /**
+ * Convenience overload.
+ * @stable ICU 2.0
+ */
+ static final public DateFormat getInstance(Calendar cal, Locale locale) {
+ return getDateTimeInstance(cal, DateFormat.MEDIUM, DateFormat.SHORT, locale);
+ }
+
+ /**
+ * Convenience overload.
+ * @stable ICU 3.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ static final public DateFormat getInstance(Calendar cal, ULocale locale) {
+ return getDateTimeInstance(cal, DateFormat.MEDIUM, DateFormat.SHORT, locale);
+ }
+
+ /**
+ * Convenience overload.
+ * @stable ICU 2.0
+ */
+ static final public DateFormat getInstance(Calendar cal) {
+ return getInstance(cal, ULocale.getDefault());
+ }
+
+ /**
+ * Convenience overload.
+ * @stable ICU 2.0
+ */
+ static final public DateFormat getDateInstance(Calendar cal, int dateStyle) {
+ return getDateInstance(cal, dateStyle, ULocale.getDefault());
+ }
+
+ /**
+ * Convenience overload.
+ * @stable ICU 2.0
+ */
+ static final public DateFormat getTimeInstance(Calendar cal, int timeStyle) {
+ return getTimeInstance(cal, timeStyle, ULocale.getDefault());
+ }
+
+ /**
+ * Convenience overload.
+ * @stable ICU 2.0
+ */
+ static final public DateFormat getDateTimeInstance(Calendar cal, int dateStyle, int timeStyle) {
+ return getDateTimeInstance(cal, dateStyle, timeStyle, ULocale.getDefault());
+ }
+
+ /**
+ * {@icu} Convenience overload.
+ * @stable ICU 4.0
+ */
+ public final static DateFormat getPatternInstance(String pattern) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Convenience overload.
+ * @stable ICU 4.0
+ */
+ public final static DateFormat getPatternInstance(String pattern, Locale locale) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns a {@link DateFormat} object that can be used to format dates and times in
+ * the given locale.
+ *
+ * Note: When this functionality is moved into the core JDK, this method
+ * will probably be replaced by a new overload of {@link DateFormat#getInstance}.
+ *
+ *
+ * @param pattern The pattern that selects the fields to be formatted. (Uses the
+ * {@link DateTimePatternGenerator}.) This can be {@link DateFormat#ABBR_MONTH},
+ * {@link DateFormat#MONTH_WEEKDAY_DAY}, etc.
+ *
+ * @param locale The locale for which the date/time format is desired.
+ *
+ * @stable ICU 4.0
+ */
+ public final static DateFormat getPatternInstance(String pattern, ULocale locale) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Convenience overload.
+ * @stable ICU 4.0
+ */
+ public final static DateFormat getPatternInstance(Calendar cal, String pattern, Locale locale) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Creates a {@link DateFormat} object that can be used to format dates and
+ * times in the calendar system specified by cal
.
+ *
+ *
Note: When this functionality is moved into the core JDK, this method
+ * will probably be replaced by a new overload of {@link DateFormat#getInstance}.
+ *
+ * @param cal The calendar system for which a date/time format is desired.
+ *
+ * @param pattern The pattern that selects the fields to be formatted. (Uses the
+ * {@link DateTimePatternGenerator}.) This can be
+ * {@link DateFormat#ABBR_MONTH}, {@link DateFormat#MONTH_WEEKDAY_DAY},
+ * etc.
+ *
+ * @param locale The locale for which the date/time format is desired.
+ *
+ * @stable ICU 4.0
+ */
+ public final static DateFormat getPatternInstance(
+ Calendar cal, String pattern, ULocale locale) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * The instances of this inner class are used as attribute keys and values
+ * in AttributedCharacterIterator that
+ * DateFormat.formatToCharacterIterator() method returns.
+ *
+ *
There is no public constructor to this class, the only instances are the
+ * constants defined here.
+ *
+ * @stable ICU 3.8
+ */
+ public static class Field extends Format.Field {
+
+ private static final long serialVersionUID = -3627456821000730829L;
+
+ // Max number of calendar fields
+ private static final int CAL_FIELD_COUNT;
+
+ // Table for mapping calendar field number to DateFormat.Field
+ private static final Field[] CAL_FIELDS;
+
+ // Map for resolving DateFormat.Field by name
+ private static final Map FIELD_NAME_MAP;
+
+ static {
+ Calendar cal = Calendar.getInstance();
+ CAL_FIELD_COUNT = cal.getFieldCount();
+ CAL_FIELDS = new Field[CAL_FIELD_COUNT];
+ FIELD_NAME_MAP = new HashMap(CAL_FIELD_COUNT);
+ }
+
+ // Java fields -------------------
+
+ /**
+ * Constant identifying the time of day indicator(am/pm).
+ * @stable ICU 3.8
+ */
+ public static final Field AM_PM = new Field("am pm", Calendar.AM_PM);
+
+ /**
+ * Constant identifying the day of month field.
+ * @stable ICU 3.8
+ */
+ public static final Field DAY_OF_MONTH = new Field("day of month", Calendar.DAY_OF_MONTH);
+
+ /**
+ * Constant identifying the day of week field.
+ * @stable ICU 3.8
+ */
+ public static final Field DAY_OF_WEEK = new Field("day of week", Calendar.DAY_OF_WEEK);
+
+ /**
+ * Constant identifying the day of week in month field.
+ * @stable ICU 3.8
+ */
+ public static final Field DAY_OF_WEEK_IN_MONTH =
+ new Field("day of week in month", Calendar.DAY_OF_WEEK_IN_MONTH);
+
+ /**
+ * Constant identifying the day of year field.
+ * @stable ICU 3.8
+ */
+ public static final Field DAY_OF_YEAR = new Field("day of year", Calendar.DAY_OF_YEAR);
+
+ /**
+ * Constant identifying the era field.
+ * @stable ICU 3.8
+ */
+ public static final Field ERA = new Field("era", Calendar.ERA);
+
+ /**
+ * Constant identifying the hour(0-23) of day field.
+ * @stable ICU 3.8
+ */
+ public static final Field HOUR_OF_DAY0 = new Field("hour of day", Calendar.HOUR_OF_DAY);
+
+ /**
+ * Constant identifying the hour(1-24) of day field.
+ * @stable ICU 3.8
+ */
+ public static final Field HOUR_OF_DAY1 = new Field("hour of day 1", -1);
+
+ /**
+ * Constant identifying the hour(0-11) field.
+ * @stable ICU 3.8
+ */
+ public static final Field HOUR0 = new Field("hour", Calendar.HOUR);
+
+ /**
+ * Constant identifying the hour(1-12) field.
+ * @stable ICU 3.8
+ */
+ public static final Field HOUR1 = new Field("hour 1", -1);
+
+ /**
+ * Constant identifying the millisecond field.
+ * @stable ICU 3.8
+ */
+ public static final Field MILLISECOND = new Field("millisecond", Calendar.MILLISECOND);
+
+ /**
+ * Constant identifying the minute field.
+ * @stable ICU 3.8
+ */
+ public static final Field MINUTE = new Field("minute", Calendar.MINUTE);
+
+ /**
+ * Constant identifying the month field.
+ * @stable ICU 3.8
+ */
+ public static final Field MONTH = new Field("month", Calendar.MONTH);
+
+ /**
+ * Constant identifying the second field.
+ * @stable ICU 3.8
+ */
+ public static final Field SECOND = new Field("second", Calendar.SECOND);
+
+ /**
+ * Constant identifying the time zone field.
+ * @stable ICU 3.8
+ */
+ public static final Field TIME_ZONE = new Field("time zone", -1);
+
+ /**
+ * Constant identifying the week of month field.
+ * @stable ICU 3.8
+ */
+ public static final Field WEEK_OF_MONTH =
+ new Field("week of month", Calendar.WEEK_OF_MONTH);
+
+ /**
+ * Constant identifying the week of year field.
+ * @stable ICU 3.8
+ */
+ public static final Field WEEK_OF_YEAR = new Field("week of year", Calendar.WEEK_OF_YEAR);
+
+ /**
+ * Constant identifying the year field.
+ * @stable ICU 3.8
+ */
+ public static final Field YEAR = new Field("year", Calendar.YEAR);
+
+
+ // ICU only fields -------------------
+
+ /**
+ * Constant identifying the local day of week field.
+ * @stable ICU 3.8
+ */
+ public static final Field DOW_LOCAL = new Field("local day of week", Calendar.DOW_LOCAL);
+
+ /**
+ * Constant identifying the extended year field.
+ * @stable ICU 3.8
+ */
+ public static final Field EXTENDED_YEAR = new Field("extended year",
+ Calendar.EXTENDED_YEAR);
+
+ /**
+ * Constant identifying the Julian day field.
+ * @stable ICU 3.8
+ */
+ public static final Field JULIAN_DAY = new Field("Julian day", Calendar.JULIAN_DAY);
+
+ /**
+ * Constant identifying the milliseconds in day field.
+ * @stable ICU 3.8
+ */
+ public static final Field MILLISECONDS_IN_DAY =
+ new Field("milliseconds in day", Calendar.MILLISECONDS_IN_DAY);
+
+ /**
+ * Constant identifying the year used with week of year field.
+ * @stable ICU 3.8
+ */
+ public static final Field YEAR_WOY = new Field("year for week of year", Calendar.YEAR_WOY);
+
+ /**
+ * Constant identifying the quarter field.
+ * @stable ICU 3.8
+ */
+ public static final Field QUARTER = new Field("quarter", -1);
+
+ // Stand alone types are variants for its base types. So we do not define Field for
+ // them.
+ /*
+ public static final Field STANDALONE_DAY =
+ new Field("stand alone day of week", Calendar.DAY_OF_WEEK);
+ public static final Field STANDALONE_MONTH = new Field("stand alone month", Calendar.MONTH);
+ public static final Field STANDALONE_QUARTER = new Field("stand alone quarter", -1);
+ */
+
+ // Corresponding calendar field
+ private final int calendarField;
+
+ /**
+ * Constructs a DateFormat.Field
with the given name and
+ * the Calendar
field which this attribute represents. Use -1 for
+ * calendarField
if this field does not have a corresponding
+ * Calendar
field.
+ *
+ * @param name Name of the attribute
+ * @param calendarField Calendar
field constant
+ *
+ * @stable ICU 3.8
+ */
+ protected Field(String name, int calendarField) {
+ super(name);
+ this.calendarField = calendarField;
+ if (this.getClass() == DateFormat.Field.class) {
+ FIELD_NAME_MAP.put(name, this);
+ if (calendarField >= 0 && calendarField < CAL_FIELD_COUNT) {
+ CAL_FIELDS[calendarField] = this;
+ }
+ }
+ }
+
+ /**
+ * Returns the Field
constant that corresponds to the
+ * Calendar
field calendarField
. If there is no
+ * corresponding Field
is available, null is returned.
+ *
+ * @param calendarField Calendar
field constant
+ * @return Field
associated with the calendarField
,
+ * or null if no associated Field
is available.
+ * @throws IllegalArgumentException if calendarField
is not
+ * a valid Calendar
field constant.
+ *
+ * @stable ICU 3.8
+ */
+ public static DateFormat.Field ofCalendarField(int calendarField) {
+ if (calendarField < 0 || calendarField >= CAL_FIELD_COUNT) {
+ throw new IllegalArgumentException("Calendar field number is out of range");
+ }
+ return CAL_FIELDS[calendarField];
+ }
+
+ /**
+ * Returns the Calendar
field associated with this attribute.
+ * If there is no corresponding Calendar
available, this will
+ * return -1.
+ *
+ * @return Calendar
constant for this attribute.
+ *
+ * @stable ICU 3.8
+ */
+ public int getCalendarField() {
+ return calendarField;
+ }
+
+ /**
+ * Resolves instances being deserialized to the predefined constants.
+ *
+ * @throws InvalidObjectException if the constant could not be resolved.
+ *
+ * @stable ICU 3.8
+ */
+ protected Object readResolve() throws InvalidObjectException {
+ ///CLOVER:OFF
+ if (this.getClass() != DateFormat.Field.class) {
+ throw new InvalidObjectException(
+ "A subclass of DateFormat.Field must implement readResolve.");
+ }
+ ///CLOVER:ON
+ Object o = FIELD_NAME_MAP.get(this.getName());
+ ///CLOVER:OFF
+ if (o == null) {
+ throw new InvalidObjectException("Unknown attribute name.");
+ }
+ ///CLOVER:ON
+ return o;
+ }
+ }
+
+ private static int getJDKFormatStyle(int icuFormatStyle) {
+ switch (icuFormatStyle) {
+ case DateFormat.FULL:
+ return java.text.DateFormat.FULL;
+ case DateFormat.LONG:
+ return java.text.DateFormat.LONG;
+ case DateFormat.MEDIUM:
+ return java.text.DateFormat.MEDIUM;
+ case DateFormat.SHORT:
+ return java.text.DateFormat.SHORT;
+ default:
+ throw new UnsupportedOperationException("Style not supported by com.ibm.icu.base");
+ }
+ }
+
+
+ protected static FieldPosition toJDKFieldPosition(FieldPosition icuPos) {
+ if (icuPos == null) {
+ return null;
+ }
+
+ int fieldID = icuPos.getField();
+ Format.Field fieldAttribute = icuPos.getFieldAttribute();
+
+ FieldPosition jdkPos = null;
+
+ if (fieldID >= 0) {
+ switch (fieldID) {
+ case ERA_FIELD:
+ fieldID = java.text.DateFormat.ERA_FIELD;
+ break;
+ case YEAR_FIELD:
+ fieldID = java.text.DateFormat.YEAR_FIELD;
+ break;
+ case MONTH_FIELD:
+ fieldID = java.text.DateFormat.MONTH_FIELD;
+ break;
+ case DATE_FIELD:
+ fieldID = java.text.DateFormat.DATE_FIELD;
+ break;
+ case HOUR_OF_DAY1_FIELD:
+ fieldID = java.text.DateFormat.HOUR_OF_DAY1_FIELD;
+ break;
+ case HOUR_OF_DAY0_FIELD:
+ fieldID = java.text.DateFormat.HOUR_OF_DAY0_FIELD;
+ break;
+ case MINUTE_FIELD:
+ fieldID = java.text.DateFormat.MINUTE_FIELD;
+ break;
+ case SECOND_FIELD:
+ fieldID = java.text.DateFormat.SECOND_FIELD;
+ break;
+ case FRACTIONAL_SECOND_FIELD: // MILLISECOND_FIELD
+ fieldID = java.text.DateFormat.MILLISECOND_FIELD;
+ break;
+ case DAY_OF_WEEK_FIELD:
+ fieldID = java.text.DateFormat.DAY_OF_WEEK_FIELD;
+ break;
+ case DAY_OF_YEAR_FIELD:
+ fieldID = java.text.DateFormat.DAY_OF_YEAR_FIELD;
+ break;
+ case DAY_OF_WEEK_IN_MONTH_FIELD:
+ fieldID = java.text.DateFormat.DAY_OF_WEEK_IN_MONTH_FIELD;
+ break;
+ case WEEK_OF_YEAR_FIELD:
+ fieldID = java.text.DateFormat.WEEK_OF_YEAR_FIELD;
+ break;
+ case WEEK_OF_MONTH_FIELD:
+ fieldID = java.text.DateFormat.WEEK_OF_MONTH_FIELD;
+ break;
+ case AM_PM_FIELD:
+ fieldID = java.text.DateFormat.AM_PM_FIELD;
+ break;
+ case HOUR1_FIELD:
+ fieldID = java.text.DateFormat.HOUR1_FIELD;
+ break;
+ case HOUR0_FIELD:
+ fieldID = java.text.DateFormat.HOUR0_FIELD;
+ break;
+ case TIMEZONE_FIELD:
+ fieldID = java.text.DateFormat.TIMEZONE_FIELD;
+ break;
+
+ case YEAR_WOY_FIELD:
+ case DOW_LOCAL_FIELD:
+ case EXTENDED_YEAR_FIELD:
+ case JULIAN_DAY_FIELD:
+ case MILLISECONDS_IN_DAY_FIELD:
+ case TIMEZONE_RFC_FIELD:
+ case TIMEZONE_GENERIC_FIELD:
+ case STANDALONE_DAY_FIELD:
+ case STANDALONE_MONTH_FIELD:
+ case QUARTER_FIELD:
+ case STANDALONE_QUARTER_FIELD:
+ case TIMEZONE_SPECIAL_FIELD:
+ throw new UnsupportedOperationException("Format Field ID not supported by com.ibm.icu.base");
+
+ default:
+ // just let it go
+ break;
+ }
+ }
+
+ if (fieldAttribute != null) {
+ // map field
+ if (fieldAttribute.equals(Field.AM_PM)) {
+ fieldAttribute = java.text.DateFormat.Field.AM_PM;
+ } else if (fieldAttribute.equals(Field.DAY_OF_MONTH)) {
+ fieldAttribute = java.text.DateFormat.Field.DAY_OF_MONTH;
+ } else if (fieldAttribute.equals(Field.DAY_OF_WEEK)) {
+ fieldAttribute = java.text.DateFormat.Field.DAY_OF_WEEK;
+ } else if (fieldAttribute.equals(Field.DAY_OF_WEEK_IN_MONTH)) {
+ fieldAttribute = java.text.DateFormat.Field.DAY_OF_WEEK_IN_MONTH;
+ } else if (fieldAttribute.equals(Field.DAY_OF_YEAR)) {
+ fieldAttribute = java.text.DateFormat.Field.DAY_OF_YEAR;
+ } else if (fieldAttribute.equals(Field.ERA)) {
+ fieldAttribute = java.text.DateFormat.Field.ERA;
+ } else if (fieldAttribute.equals(Field.HOUR_OF_DAY0)) {
+ fieldAttribute = java.text.DateFormat.Field.HOUR_OF_DAY0;
+ } else if (fieldAttribute.equals(Field.HOUR_OF_DAY1)) {
+ fieldAttribute = java.text.DateFormat.Field.HOUR_OF_DAY1;
+ } else if (fieldAttribute.equals(Field.HOUR0)) {
+ fieldAttribute = java.text.DateFormat.Field.HOUR0;
+ } else if (fieldAttribute.equals(Field.HOUR1)) {
+ fieldAttribute = java.text.DateFormat.Field.HOUR1;
+ } else if (fieldAttribute.equals(Field.MILLISECOND)) {
+ fieldAttribute = java.text.DateFormat.Field.MILLISECOND;
+ } else if (fieldAttribute.equals(Field.MINUTE)) {
+ fieldAttribute = java.text.DateFormat.Field.MINUTE;
+ } else if (fieldAttribute.equals(Field.MONTH)) {
+ fieldAttribute = java.text.DateFormat.Field.MONTH;
+ } else if (fieldAttribute.equals(Field.SECOND)) {
+ fieldAttribute = java.text.DateFormat.Field.SECOND;
+ } else if (fieldAttribute.equals(Field.TIME_ZONE)) {
+ fieldAttribute = java.text.DateFormat.Field.TIME_ZONE;
+ } else if (fieldAttribute.equals(Field.WEEK_OF_MONTH)) {
+ fieldAttribute = java.text.DateFormat.Field.WEEK_OF_MONTH;
+ } else if (fieldAttribute.equals(Field.WEEK_OF_YEAR)) {
+ fieldAttribute = java.text.DateFormat.Field.WEEK_OF_YEAR;
+ } else if (fieldAttribute.equals(Field.YEAR)) {
+ fieldAttribute = java.text.DateFormat.Field.YEAR;
+ } else if (fieldAttribute.equals(Field.DOW_LOCAL)
+ || fieldAttribute.equals(Field.EXTENDED_YEAR)
+ || fieldAttribute.equals(Field.JULIAN_DAY)
+ || fieldAttribute.equals(Field.MILLISECONDS_IN_DAY)
+ || fieldAttribute.equals(Field.YEAR_WOY)
+ || fieldAttribute.equals(Field.QUARTER)) {
+ // Not supported
+ throw new UnsupportedOperationException("Format Field not supported by com.ibm.icu.base");
+ }
+
+ jdkPos = new FieldPosition(fieldAttribute, fieldID);
+ } else {
+ jdkPos = new FieldPosition(fieldID);
+ }
+
+ jdkPos.setBeginIndex(icuPos.getBeginIndex());
+ jdkPos.setEndIndex(icuPos.getEndIndex());
+
+ return jdkPos;
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormatSymbols.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormatSymbols.java
new file mode 100644
index 00000000000..7077e4ba689
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DateFormatSymbols.java
@@ -0,0 +1,840 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.text;
+
+import java.io.Serializable;
+import java.util.Locale;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * {@icuenhanced java.text.DateFormatSymbols}.{@icu _usage_}
+ *
+ * DateFormatSymbols
is a public class for encapsulating
+ * localizable date-time formatting data, such as the names of the
+ * months, the names of the days of the week, and the time zone data.
+ * DateFormat
and SimpleDateFormat
both use
+ * DateFormatSymbols
to encapsulate this information.
+ *
+ *
Typically you shouldn't use DateFormatSymbols
directly.
+ * Rather, you are encouraged to create a date-time formatter with the
+ * DateFormat
class's factory methods: getTimeInstance
,
+ * getDateInstance
, or getDateTimeInstance
.
+ * These methods automatically create a DateFormatSymbols
for
+ * the formatter so that you don't have to. After the
+ * formatter is created, you may modify its format pattern using the
+ * setPattern
method. For more information about
+ * creating formatters using DateFormat
's factory methods,
+ * see {@link DateFormat}.
+ *
+ *
If you decide to create a date-time formatter with a specific
+ * format pattern for a specific locale, you can do so with:
+ *
+ *
+ * new SimpleDateFormat(aPattern, new DateFormatSymbols(aLocale)).
+ *
+ *
+ *
+ * DateFormatSymbols
objects are clonable. When you obtain
+ * a DateFormatSymbols
object, feel free to modify the
+ * date-time formatting data. For instance, you can replace the localized
+ * date-time format pattern characters with the ones that you feel easy
+ * to remember. Or you can change the representative cities
+ * to your favorite ones.
+ *
+ *
New DateFormatSymbols
subclasses may be added to support
+ * SimpleDateFormat
for date-time formatting for additional locales.
+ *
+ * @see DateFormat
+ * @see SimpleDateFormat
+ * @see com.ibm.icu.util.SimpleTimeZone
+ * @author Chen-Lieh Huang
+ * @stable ICU 2.0
+ */
+public class DateFormatSymbols implements Serializable, Cloneable {
+
+ private static final long serialVersionUID = 1L;
+
+ /** @internal */
+ public java.text.DateFormatSymbols dfs;
+
+ /** @internal */
+ public DateFormatSymbols(java.text.DateFormatSymbols delegate) {
+ this.dfs = delegate;
+ }
+
+ // TODO make sure local pattern char string is 18 characters long,
+ // that is, that it encompasses the new 'u' char for
+ // EXTENDED_YEAR. Two options: 1. Make sure resource data is
+ // correct; 2. Make code add in 'u' at end if len == 17.
+
+ // Constants for context
+ /**
+ * {@icu} Constant for context.
+ * @stable ICU 3.6
+ */
+ public static final int FORMAT = 0;
+
+ /**
+ * {@icu} Constant for context.
+ * @stable ICU 3.6
+ */
+ public static final int STANDALONE = 1;
+
+ /**
+ * {@icu} Constant for context.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public static final int DT_CONTEXT_COUNT = 2;
+
+ // Constants for width
+
+ /**
+ * {@icu} Constant for width.
+ * @stable ICU 3.6
+ */
+ public static final int ABBREVIATED = 0;
+
+ /**
+ * {@icu} Constant for width.
+ * @stable ICU 3.6
+ */
+ public static final int WIDE = 1;
+
+ /**
+ * {@icu} Constant for width.
+ * @stable ICU 3.6
+ */
+ public static final int NARROW = 2;
+
+ /**
+ * {@icu} Constant for width.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public static final int DT_WIDTH_COUNT = 3;
+
+ /**
+ * Constructs a DateFormatSymbols object by loading format data from
+ * resources for the default locale.
+ *
+ * @throws java.util.MissingResourceException if the resources for the default locale
+ * cannot be found or cannot be loaded.
+ * @stable ICU 2.0
+ */
+ public DateFormatSymbols()
+ {
+ this(new java.text.DateFormatSymbols());
+ }
+
+ /**
+ * Constructs a DateFormatSymbols object by loading format data from
+ * resources for the given locale.
+ *
+ * @throws java.util.MissingResourceException if the resources for the specified
+ * locale cannot be found or cannot be loaded.
+ * @stable ICU 2.0
+ */
+ public DateFormatSymbols(Locale locale)
+ {
+ this(new java.text.DateFormatSymbols(locale));
+ }
+
+ /**
+ * {@icu} Constructs a DateFormatSymbols object by loading format data from
+ * resources for the given ulocale.
+ *
+ * @throws java.util.MissingResourceException if the resources for the specified
+ * locale cannot be found or cannot be loaded.
+ * @stable ICU 3.2
+ */
+ public DateFormatSymbols(ULocale locale)
+ {
+ this(new java.text.DateFormatSymbols(locale.toLocale()));
+ }
+
+ /**
+ * Returns a DateFormatSymbols instance for the default locale.
+ *
+ * {@icunote} Unlike java.text.DateFormatSymbols#getInstance
,
+ * this method simply returns new com.ibm.icu.text.DateFormatSymbols()
.
+ * ICU does not support DateFormatSymbolsProvider
introduced in Java 6
+ * or its equivalent implementation for now.
+ *
+ * @return A DateFormatSymbols instance.
+ * @stable ICU 3.8
+ */
+ public static DateFormatSymbols getInstance() {
+ return new DateFormatSymbols(java.text.DateFormatSymbols.getInstance());
+ }
+
+ /**
+ * Returns a DateFormatSymbols instance for the given locale.
+ *
+ * {@icunote} Unlike java.text.DateFormatSymbols#getInstance
,
+ * this method simply returns new com.ibm.icu.text.DateFormatSymbols(locale)
.
+ * ICU does not support DateFormatSymbolsProvider
introduced in Java 6
+ * or its equivalent implementation for now.
+ *
+ * @param locale the locale.
+ * @return A DateFormatSymbols instance.
+ * @stable ICU 3.8
+ */
+ public static DateFormatSymbols getInstance(Locale locale) {
+ return new DateFormatSymbols(java.text.DateFormatSymbols.getInstance(locale));
+ }
+
+ /**
+ * {@icu} Returns a DateFormatSymbols instance for the given locale.
+ *
+ * {@icunote} Unlike java.text.DateFormatSymbols#getInstance
,
+ * this method simply returns new com.ibm.icu.text.DateFormatSymbols(locale)
.
+ * ICU does not support DateFormatSymbolsProvider
introduced in Java 6
+ * or its equivalent implementation for now.
+ *
+ * @param locale the locale.
+ * @return A DateFormatSymbols instance.
+ * @stable ICU 3.8
+ */
+ public static DateFormatSymbols getInstance(ULocale locale) {
+ return new DateFormatSymbols(java.text.DateFormatSymbols.getInstance(locale.toLocale()));
+ }
+
+ /**
+ * Returns an array of all locales for which the getInstance
methods of
+ * this class can return localized instances.
+ *
+ * {@icunote} Unlike java.text.DateFormatSymbols#getAvailableLocales
,
+ * this method simply returns the array of Locale
s available in this
+ * class. ICU does not support DateFormatSymbolsProvider
introduced in
+ * Java 6 or its equivalent implementation for now.
+ *
+ * @return An array of Locale
s for which localized
+ * DateFormatSymbols
instances are available.
+ * @stable ICU 3.8
+ */
+ public static Locale[] getAvailableLocales() {
+ return java.text.DateFormatSymbols.getAvailableLocales();
+ }
+
+ /**
+ * {@icu} Returns an array of all locales for which the getInstance
+ * methods of this class can return localized instances.
+ *
+ * {@icunote} Unlike java.text.DateFormatSymbols#getAvailableLocales
,
+ * this method simply returns the array of ULocale
s available in this
+ * class. ICU does not support DateFormatSymbolsProvider
introduced in
+ * Java 6 or its equivalent implementation for now.
+ *
+ * @return An array of ULocale
s for which localized
+ * DateFormatSymbols
instances are available.
+ * @draft ICU 3.8 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static ULocale[] getAvailableULocales() {
+ Locale[] locales = java.text.DateFormatSymbols.getAvailableLocales();
+ ULocale[] ulocales = new ULocale[locales.length];
+ for (int i = 0; i < locales.length; ++i) {
+ ulocales[i] = ULocale.forLocale(locales[i]);
+ }
+ return ulocales;
+ }
+
+ /**
+ * Returns era strings. For example: "AD" and "BC".
+ * @return the era strings.
+ * @stable ICU 2.0
+ */
+ public String[] getEras() {
+ return dfs.getEras();
+ }
+
+ /**
+ * Sets era strings. For example: "AD" and "BC".
+ * @param newEras the new era strings.
+ * @stable ICU 2.0
+ */
+ public void setEras(String[] newEras) {
+ dfs.setEras(newEras);
+ }
+
+ /**
+ * {@icu} Returns era name strings. For example: "Anno Domini" and "Before Christ".
+ * @return the era strings.
+ * @stable ICU 3.4
+ */
+ public String[] getEraNames() {
+ return getEras(); // Java has no distinction between era strings and era name strings
+ }
+
+ /**
+ * {@icu} Sets era name strings. For example: "Anno Domini" and "Before Christ".
+ * @param newEraNames the new era strings.
+ * @stable ICU 3.8
+ */
+ public void setEraNames(String[] newEraNames) {
+ setEras(newEraNames); // Java has no distinction between era strings and era name strings
+ }
+
+ /**
+ * Returns month strings. For example: "January", "February", etc.
+ * @return the month strings.
+ * @stable ICU 2.0
+ */
+ public String[] getMonths() {
+ return dfs.getMonths();
+ }
+
+ /**
+ * Returns month strings. For example: "January", "February", etc.
+ * @param context The month context, FORMAT or STANDALONE.
+ * @param width The width or the returned month string,
+ * either WIDE, ABBREVIATED, or NARROW.
+ * @return the month strings.
+ * @stable ICU 3.4
+ */
+ public String[] getMonths(int context, int width) {
+ // JDK does not support context / narrow months
+ switch (width) {
+ case WIDE:
+ return dfs.getMonths();
+
+ case ABBREVIATED:
+ case NARROW:
+ return dfs.getShortMonths();
+
+ default:
+ throw new IllegalArgumentException("Unsupported width argument value");
+ }
+ }
+
+ /**
+ * Sets month strings. For example: "January", "February", etc.
+ * @param newMonths the new month strings.
+ * @stable ICU 2.0
+ */
+ public void setMonths(String[] newMonths) {
+ dfs.setMonths(newMonths);
+ }
+
+ /**
+ * Sets month strings. For example: "January", "February", etc.
+ * @param newMonths the new month strings.
+ * @param context The formatting context, FORMAT or STANDALONE.
+ * @param width The width of the month string,
+ * either WIDE, ABBREVIATED, or NARROW.
+ * @stable ICU 3.8
+ */
+ public void setMonths(String[] newMonths, int context, int width) {
+ // JDK does not support context / narrow months
+ switch (width) {
+ case WIDE:
+ dfs.setMonths(newMonths);
+ break;
+
+ case ABBREVIATED:
+ case NARROW:
+ dfs.setShortMonths(newMonths);
+ break;
+
+ default:
+ throw new IllegalArgumentException("Unsupported width argument value");
+ }
+ }
+
+ /**
+ * Returns short month strings. For example: "Jan", "Feb", etc.
+ * @return the short month strings.
+ * @stable ICU 2.0
+ */
+ public String[] getShortMonths() {
+ return dfs.getShortMonths();
+ }
+
+ /**
+ * Sets short month strings. For example: "Jan", "Feb", etc.
+ * @param newShortMonths the new short month strings.
+ * @stable ICU 2.0
+ */
+ public void setShortMonths(String[] newShortMonths) {
+ dfs.setShortMonths(newShortMonths);
+ }
+
+ /**
+ * Returns weekday strings. For example: "Sunday", "Monday", etc.
+ * @return the weekday strings. Use Calendar.SUNDAY
,
+ * Calendar.MONDAY
, etc. to index the result array.
+ * @stable ICU 2.0
+ */
+ public String[] getWeekdays() {
+ return dfs.getWeekdays();
+ }
+
+ /**
+ * Returns weekday strings. For example: "Sunday", "Monday", etc.
+ * @return the weekday strings. Use Calendar.SUNDAY
,
+ * Calendar.MONDAY
, etc. to index the result array.
+ * @param context Formatting context, either FORMAT or STANDALONE.
+ * @param width Width of strings to be returned, either
+ * WIDE, ABBREVIATED, or NARROW
+ * @stable ICU 3.4
+ */
+ public String[] getWeekdays(int context, int width) {
+ // JDK does not support context / narrow weekdays
+ switch (width) {
+ case WIDE:
+ return dfs.getWeekdays();
+
+ case ABBREVIATED:
+ case NARROW:
+ return dfs.getShortWeekdays();
+
+ default:
+ throw new IllegalArgumentException("Unsupported width argument value");
+ }
+ }
+
+ /**
+ * Sets weekday strings. For example: "Sunday", "Monday", etc.
+ * @param newWeekdays The new weekday strings.
+ * @param context The formatting context, FORMAT or STANDALONE.
+ * @param width The width of the strings,
+ * either WIDE, ABBREVIATED, or NARROW.
+ * @stable ICU 3.8
+ */
+ public void setWeekdays(String[] newWeekdays, int context, int width) {
+ // JDK does not support context / narrow weekdays
+ switch (width) {
+ case WIDE:
+ dfs.setWeekdays(newWeekdays);
+ break;
+
+ case ABBREVIATED:
+ case NARROW:
+ dfs.setShortWeekdays(newWeekdays);
+ break;
+
+ default:
+ throw new IllegalArgumentException("Unsupported width argument value");
+ }
+ }
+
+ /**
+ * Sets weekday strings. For example: "Sunday", "Monday", etc.
+ * @param newWeekdays the new weekday strings. The array should
+ * be indexed by Calendar.SUNDAY
,
+ * Calendar.MONDAY
, etc.
+ * @stable ICU 2.0
+ */
+ public void setWeekdays(String[] newWeekdays) {
+ dfs.setWeekdays(newWeekdays);
+ }
+
+ /**
+ * Returns short weekday strings. For example: "Sun", "Mon", etc.
+ * @return the short weekday strings. Use Calendar.SUNDAY
,
+ * Calendar.MONDAY
, etc. to index the result array.
+ * @stable ICU 2.0
+ */
+ public String[] getShortWeekdays() {
+ return dfs.getShortWeekdays();
+ }
+
+ /**
+ * Sets short weekday strings. For example: "Sun", "Mon", etc.
+ * @param newShortWeekdays the new short weekday strings. The array should
+ * be indexed by Calendar.SUNDAY
,
+ * Calendar.MONDAY
, etc.
+ * @stable ICU 2.0
+ */
+ public void setShortWeekdays(String[] newShortWeekdays) {
+ dfs.setShortWeekdays(newShortWeekdays);
+ }
+ /**
+ * {@icu} Returns quarter strings. For example: "1st Quarter", "2nd Quarter", etc.
+ * @param context The quarter context, FORMAT or STANDALONE.
+ * @param width The width or the returned quarter string,
+ * either WIDE or ABBREVIATED. There are no NARROW quarters.
+ * @return the quarter strings.
+ * @stable ICU 3.6
+ */
+ public String[] getQuarters(int context, int width) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets quarter strings. For example: "1st Quarter", "2nd Quarter", etc.
+ * @param newQuarters the new quarter strings.
+ * @param context The formatting context, FORMAT or STANDALONE.
+ * @param width The width of the quarter string,
+ * either WIDE or ABBREVIATED. There are no NARROW quarters.
+ * @stable ICU 3.8
+ */
+ public void setQuarters(String[] newQuarters, int context, int width) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns am/pm strings. For example: "AM" and "PM".
+ * @return the weekday strings.
+ * @stable ICU 2.0
+ */
+ public String[] getAmPmStrings() {
+ return dfs.getAmPmStrings();
+ }
+
+ /**
+ * Sets am/pm strings. For example: "AM" and "PM".
+ * @param newAmpms the new ampm strings.
+ * @stable ICU 2.0
+ */
+ public void setAmPmStrings(String[] newAmpms) {
+ dfs.setAmPmStrings(newAmpms);
+ }
+
+ /**
+ * Returns timezone strings.
+ * @return the timezone strings.
+ * @stable ICU 2.0
+ */
+ public String[][] getZoneStrings() {
+ return dfs.getZoneStrings();
+ }
+
+ /**
+ * Sets timezone strings.
+ * @param newZoneStrings the new timezone strings.
+ * @stable ICU 2.0
+ */
+ public void setZoneStrings(String[][] newZoneStrings) {
+ dfs.setZoneStrings(newZoneStrings);
+ }
+
+ /**
+ * Returns localized date-time pattern characters. For example: 'u', 't', etc.
+ *
+ *
Note: ICU no longer provides localized date-time pattern characters for a locale
+ * starting ICU 3.8. This method returns the non-localized date-time pattern
+ * characters unless user defined localized data is set by setLocalPatternChars.
+ * @return the localized date-time pattern characters.
+ * @stable ICU 2.0
+ */
+ public String getLocalPatternChars() {
+ return dfs.getLocalPatternChars();
+ }
+
+ /**
+ * Sets localized date-time pattern characters. For example: 'u', 't', etc.
+ * @param newLocalPatternChars the new localized date-time
+ * pattern characters.
+ * @stable ICU 2.0
+ */
+ public void setLocalPatternChars(String newLocalPatternChars) {
+ dfs.setLocalPatternChars(newLocalPatternChars);
+ }
+
+ /**
+ * Overrides clone.
+ * @stable ICU 2.0
+ */
+ public Object clone()
+ {
+ return new DateFormatSymbols((java.text.DateFormatSymbols)dfs.clone());
+ }
+
+ /**
+ * Override hashCode.
+ * Generates a hash code for the DateFormatSymbols object.
+ * @stable ICU 2.0
+ */
+ public int hashCode() {
+ return dfs.hashCode();
+ }
+
+ /**
+ * Overrides equals.
+ * @stable ICU 2.0
+ */
+ public boolean equals(Object obj)
+ {
+ try {
+ return dfs.equals(((DateFormatSymbols)obj).dfs);
+ }
+ catch (Exception e) {
+ return false;
+ }
+ }
+
+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ /**
+ * Returns the {@link DateFormatSymbols} object that should be used to format a
+ * calendar system's dates in the given locale.
+ *
+ * Subclassing:
+ * When creating a new Calendar subclass, you must create the
+ * {@link ResourceBundle ResourceBundle}
+ * containing its {@link DateFormatSymbols DateFormatSymbols} in a specific place.
+ * The resource bundle name is based on the calendar's fully-specified
+ * class name, with ".resources" inserted at the end of the package name
+ * (just before the class name) and "Symbols" appended to the end.
+ * For example, the bundle corresponding to "com.ibm.icu.util.HebrewCalendar"
+ * is "com.ibm.icu.impl.data.HebrewCalendarSymbols".
+ *
+ * Within the ResourceBundle, this method searches for five keys:
+ *
+ * DayNames -
+ * An array of strings corresponding to each possible
+ * value of the DAY_OF_WEEK
field. Even though
+ * DAY_OF_WEEK
starts with SUNDAY
= 1,
+ * This array is 0-based; the name for Sunday goes in the
+ * first position, at index 0. If this key is not found
+ * in the bundle, the day names are inherited from the
+ * default DateFormatSymbols
for the requested locale.
+ *
+ * DayAbbreviations -
+ * An array of abbreviated day names corresponding
+ * to the values in the "DayNames" array. If this key
+ * is not found in the resource bundle, the "DayNames"
+ * values are used instead. If neither key is found,
+ * the day abbreviations are inherited from the default
+ * DateFormatSymbols
for the locale.
+ *
+ * MonthNames -
+ * An array of strings corresponding to each possible
+ * value of the MONTH
field. If this key is not found
+ * in the bundle, the month names are inherited from the
+ * default DateFormatSymbols
for the requested locale.
+ *
+ * MonthAbbreviations -
+ * An array of abbreviated day names corresponding
+ * to the values in the "MonthNames" array. If this key
+ * is not found in the resource bundle, the "MonthNames"
+ * values are used instead. If neither key is found,
+ * the day abbreviations are inherited from the default
+ * DateFormatSymbols
for the locale.
+ *
+ * Eras -
+ * An array of strings corresponding to each possible
+ * value of the ERA
field. If this key is not found
+ * in the bundle, the era names are inherited from the
+ * default DateFormatSymbols
for the requested locale.
+ *
+ *
+ * @param cal The calendar system whose date format symbols are desired.
+ * @param locale The locale whose symbols are desired.
+ *
+ * @see DateFormatSymbols#DateFormatSymbols(java.util.Locale)
+ * @stable ICU 2.0
+ */
+ public DateFormatSymbols(Calendar cal, Locale locale) {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns the {@link DateFormatSymbols} object that should be used to format a
+ * calendar system's dates in the given locale.
+ *
+ * Subclassing:
+ * When creating a new Calendar subclass, you must create the
+ * {@link ResourceBundle ResourceBundle}
+ * containing its {@link DateFormatSymbols DateFormatSymbols} in a specific place.
+ * The resource bundle name is based on the calendar's fully-specified
+ * class name, with ".resources" inserted at the end of the package name
+ * (just before the class name) and "Symbols" appended to the end.
+ * For example, the bundle corresponding to "com.ibm.icu.util.HebrewCalendar"
+ * is "com.ibm.icu.impl.data.HebrewCalendarSymbols".
+ *
+ * Within the ResourceBundle, this method searches for five keys:
+ *
+ * DayNames -
+ * An array of strings corresponding to each possible
+ * value of the DAY_OF_WEEK
field. Even though
+ * DAY_OF_WEEK
starts with SUNDAY
= 1,
+ * This array is 0-based; the name for Sunday goes in the
+ * first position, at index 0. If this key is not found
+ * in the bundle, the day names are inherited from the
+ * default DateFormatSymbols
for the requested locale.
+ *
+ * DayAbbreviations -
+ * An array of abbreviated day names corresponding
+ * to the values in the "DayNames" array. If this key
+ * is not found in the resource bundle, the "DayNames"
+ * values are used instead. If neither key is found,
+ * the day abbreviations are inherited from the default
+ * DateFormatSymbols
for the locale.
+ *
+ * MonthNames -
+ * An array of strings corresponding to each possible
+ * value of the MONTH
field. If this key is not found
+ * in the bundle, the month names are inherited from the
+ * default DateFormatSymbols
for the requested locale.
+ *
+ * MonthAbbreviations -
+ * An array of abbreviated day names corresponding
+ * to the values in the "MonthNames" array. If this key
+ * is not found in the resource bundle, the "MonthNames"
+ * values are used instead. If neither key is found,
+ * the day abbreviations are inherited from the default
+ * DateFormatSymbols
for the locale.
+ *
+ * Eras -
+ * An array of strings corresponding to each possible
+ * value of the ERA
field. If this key is not found
+ * in the bundle, the era names are inherited from the
+ * default DateFormatSymbols
for the requested locale.
+ *
+ *
+ * @param cal The calendar system whose date format symbols are desired.
+ * @param locale The ulocale whose symbols are desired.
+ *
+ * @see DateFormatSymbols#DateFormatSymbols(java.util.Locale)
+ * @stable ICU 3.2
+ */
+ public DateFormatSymbols(Calendar cal, ULocale locale) {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Variant of DateFormatSymbols(Calendar, Locale) that takes the Calendar class
+ * instead of a Calandar instance.
+ * @see #DateFormatSymbols(Calendar, Locale)
+ * @stable ICU 2.2
+ */
+ public DateFormatSymbols(Class extends Calendar> calendarClass, Locale locale) {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Variant of DateFormatSymbols(Calendar, ULocale) that takes the Calendar class
+ * instead of a Calandar instance.
+ * @see #DateFormatSymbols(Calendar, Locale)
+ * @stable ICU 3.2
+ */
+ public DateFormatSymbols(Class extends Calendar> calendarClass, ULocale locale) {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Fetches a custom calendar's DateFormatSymbols out of the given resource
+ * bundle. Symbols that are not overridden are inherited from the
+ * default DateFormatSymbols for the locale.
+ * @see DateFormatSymbols#DateFormatSymbols(java.util.Locale)
+ * @stable ICU 2.0
+ */
+ public DateFormatSymbols(ResourceBundle bundle, Locale locale) {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Fetches a custom calendar's DateFormatSymbols out of the given resource
+ * bundle. Symbols that are not overridden are inherited from the
+ * default DateFormatSymbols for the locale.
+ * @see DateFormatSymbols#DateFormatSymbols(java.util.Locale)
+ * @stable ICU 3.2
+ */
+ public DateFormatSymbols(ResourceBundle bundle, ULocale locale) {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Finds the ResourceBundle containing the date format information for
+ * a specified calendar subclass in a given locale.
+ *
+ * The resource bundle name is based on the calendar's fully-specified
+ * class name, with ".resources" inserted at the end of the package name
+ * (just before the class name) and "Symbols" appended to the end.
+ * For example, the bundle corresponding to "com.ibm.icu.util.HebrewCalendar"
+ * is "com.ibm.icu.impl.data.HebrewCalendarSymbols".
+ *
+ * Note: Because of the structural changes in the ICU locale bundle,
+ * this API no longer works as described. This method always returns null.
+ * @deprecated ICU 4.0
+ */
+ // This API was formerly @stable ICU 2.0
+ static public ResourceBundle getDateFormatBundle(Class extends Calendar> calendarClass,
+ Locale locale) throws MissingResourceException {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); }
+
+ /**
+ * Finds the ResourceBundle containing the date format information for
+ * a specified calendar subclass in a given locale.
+ *
+ * The resource bundle name is based on the calendar's fully-specified
+ * class name, with ".resources" inserted at the end of the package name
+ * (just before the class name) and "Symbols" appended to the end.
+ * For example, the bundle corresponding to "com.ibm.icu.util.HebrewCalendar"
+ * is "com.ibm.icu.impl.data.HebrewCalendarSymbols".
+ *
+ * Note: Because of the structural changes in the ICU locale bundle,
+ * this API no longer works as described. This method always returns null.
+ * @deprecated ICU 4.0
+ */
+ // This API was formerly @stable ICU 3.2
+ static public ResourceBundle getDateFormatBundle(Class extends Calendar> calendarClass,
+ ULocale locale) throws MissingResourceException {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); }
+
+ /**
+ * Variant of getDateFormatBundle(java.lang.Class, java.util.Locale) that takes
+ * a Calendar instance instead of a Calendar class.
+ *
+ * Note: Because of the structural changes in the ICU locale bundle,
+ * this API no longer works as described. This method always returns null.
+ * @see #getDateFormatBundle(java.lang.Class, java.util.Locale)
+ * @deprecated ICU 4.0
+ */
+ // This API was formerly @stable ICU 2.2
+ public static ResourceBundle getDateFormatBundle(Calendar cal, Locale locale) throws MissingResourceException {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); }
+
+ /**
+ * Variant of getDateFormatBundle(java.lang.Class, java.util.Locale) that takes
+ * a Calendar instance instead of a Calendar class.
+ *
+ * Note: Because of the structural changes in the ICU locale bundle,
+ * this API no longer works as described. This method always returns null.
+ * @see #getDateFormatBundle(java.lang.Class, java.util.Locale)
+ * @deprecated ICU 4.0
+ */
+ // This API was formerly @stable ICU 3.2
+ public static ResourceBundle getDateFormatBundle(Calendar cal, ULocale locale) throws MissingResourceException {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base"); }
+
+ /**
+ * Returns the locale that was used to create this object, or null.
+ * This may may differ from the locale requested at the time of
+ * this object's creation. For example, if an object is created
+ * for locale en_US_CALIFORNIA , the actual data may be
+ * drawn from en (the actual locale), and
+ * en_US may be the most specific locale that exists (the
+ * valid locale).
+ *
+ *
Note: This method will be implemented in ICU 3.0; ICU 2.8
+ * contains a partial preview implementation. The * actual
+ * locale is returned correctly, but the valid locale is
+ * not, in most cases.
+ * @param type type of information requested, either {@link
+ * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
+ * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
+ * @return the information specified by type , or null if
+ * this object was not constructed from locale data.
+ * @see com.ibm.icu.util.ULocale
+ * @see com.ibm.icu.util.ULocale#VALID_LOCALE
+ * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
+ * @draft ICU 2.8 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final ULocale getLocale(ULocale.Type type) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DecimalFormat.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DecimalFormat.java
new file mode 100644
index 00000000000..14eea149402
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DecimalFormat.java
@@ -0,0 +1,1762 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+import java.math.BigInteger;
+import java.text.AttributedCharacterIterator;
+import java.text.AttributedCharacterIterator.Attribute;
+import java.text.AttributedString;
+import java.text.CharacterIterator;
+import java.text.FieldPosition;
+import java.text.ParsePosition;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import com.ibm.icu.math.BigDecimal;
+import com.ibm.icu.math.MathContext;
+import com.ibm.icu.util.Currency;
+import com.ibm.icu.util.CurrencyAmount;
+
+/**
+ * {@icuenhanced java.text.DecimalFormat}.{@icu _usage_}
+ *
+ * DecimalFormat
is a concrete subclass of {@link NumberFormat} that formats
+ * decimal numbers. It has a variety of features designed to make it possible to parse and
+ * format numbers in any locale, including support for Western, Arabic, or Indic digits.
+ * It also supports different flavors of numbers, including integers ("123"), fixed-point
+ * numbers ("123.4"), scientific notation ("1.23E4"), percentages ("12%"), and currency
+ * amounts ("$123.00", "USD123.00", "123.00 US dollars"). All of these flavors can be
+ * easily localized.
+ *
+ *
To obtain a {@link NumberFormat} for a specific locale (including the default
+ * locale) call one of NumberFormat
's factory methods such as {@link
+ * NumberFormat#getInstance}. Do not call the DecimalFormat
constructors
+ * directly, unless you know what you are doing, since the {@link NumberFormat} factory
+ * methods may return subclasses other than DecimalFormat
. If you need to
+ * customize the format object, do something like this:
+ *
+ *
+ * NumberFormat f = NumberFormat.getInstance(loc);
+ * if (f instanceof DecimalFormat) {
+ * ((DecimalFormat) f).setDecimalSeparatorAlwaysShown(true);
+ * }
+ *
+ * Example Usage
+ *
+ * Print out a number using the localized number, currency, and percent
+ * format for each locale.
+ *
+ *
+ * Locale[] locales = NumberFormat.getAvailableLocales();
+ * double myNumber = -1234.56;
+ * NumberFormat format;
+ * for (int j=0; j<3; ++j) {
+ * System.out.println("FORMAT");
+ * for (int i = 0; i < locales.length; ++i) {
+ * if (locales[i].getCountry().length() == 0) {
+ * // Skip language-only locales
+ * continue;
+ * }
+ * System.out.print(locales[i].getDisplayName());
+ * switch (j) {
+ * case 0:
+ * format = NumberFormat.getInstance(locales[i]); break;
+ * case 1:
+ * format = NumberFormat.getCurrencyInstance(locales[i]); break;
+ * default:
+ * format = NumberFormat.getPercentInstance(locales[i]); break;
+ * }
+ * try {
+ * // Assume format is a DecimalFormat
+ * System.out.print(": " + ((DecimalFormat) format).toPattern()
+ * + " -> " + form.format(myNumber));
+ * } catch (Exception e) {}
+ * try {
+ * System.out.println(" -> " + format.parse(form.format(myNumber)));
+ * } catch (ParseException e) {}
+ * }
+ * }
+ *
+ * Another example use getInstance(style).
+ * Print out a number using the localized number, currency, percent,
+ * scientific, integer, iso currency, and plural currency format for each locale.
+ *
+ *
+ * ULocale locale = new ULocale("en_US");
+ * double myNumber = 1234.56;
+ * for (int j=NumberFormat.NUMBERSTYLE; j<=NumberFormat.PLURALCURRENCYSTYLE; ++j) {
+ * NumberFormat format = NumberFormat.getInstance(locale, j);
+ * try {
+ * // Assume format is a DecimalFormat
+ * System.out.print(": " + ((DecimalFormat) format).toPattern()
+ * + " -> " + form.format(myNumber));
+ * } catch (Exception e) {}
+ * try {
+ * System.out.println(" -> " + format.parse(form.format(myNumber)));
+ * } catch (ParseException e) {}
+ * }
+ *
+ * Patterns
+ *
+ * A DecimalFormat
consists of a pattern and a set of
+ * symbols . The pattern may be set directly using {@link #applyPattern}, or
+ * indirectly using other API methods which manipulate aspects of the pattern, such as the
+ * minimum number of integer digits. The symbols are stored in a {@link
+ * DecimalFormatSymbols} object. When using the {@link NumberFormat} factory methods, the
+ * pattern and symbols are read from ICU's locale data.
+ *
+ *
Special Pattern Characters
+ *
+ * Many characters in a pattern are taken literally; they are matched during parsing
+ * and output unchanged during formatting. Special characters, on the other hand, stand
+ * for other characters, strings, or classes of characters. For example, the '#'
+ * character is replaced by a localized digit. Often the replacement character is the
+ * same as the pattern character; in the U.S. locale, the ',' grouping character is
+ * replaced by ','. However, the replacement is still happening, and if the symbols are
+ * modified, the grouping character changes. Some special characters affect the behavior
+ * of the formatter by their presence; for example, if the percent character is seen, then
+ * the value is multiplied by 100 before being displayed.
+ *
+ *
To insert a special character in a pattern as a literal, that is, without any
+ * special meaning, the character must be quoted. There are some exceptions to this which
+ * are noted below.
+ *
+ *
The characters listed here are used in non-localized patterns. Localized patterns
+ * use the corresponding characters taken from this formatter's {@link
+ * DecimalFormatSymbols} object instead, and these characters lose their special status.
+ * Two exceptions are the currency sign and quote, which are not localized.
+ *
+ *
+ *
+ *
+ * Symbol
+ * Location
+ * Localized?
+ * Meaning
+ *
+ * 0
+ * Number
+ * Yes
+ * Digit
+ *
+ * 1-9
+ * Number
+ * Yes
+ * '1' through '9' indicate rounding.
+ *
+ * @
+ * Number
+ * No
+ * Significant digit
+ *
+ * #
+ * Number
+ * Yes
+ * Digit, zero shows as absent
+ *
+ * .
+ * Number
+ * Yes
+ * Decimal separator or monetary decimal separator
+ *
+ * -
+ * Number
+ * Yes
+ * Minus sign
+ *
+ * ,
+ * Number
+ * Yes
+ * Grouping separator
+ *
+ * E
+ * Number
+ * Yes
+ * Separates mantissa and exponent in scientific notation.
+ * Need not be quoted in prefix or suffix.
+ *
+ * +
+ * Exponent
+ * Yes
+ * Prefix positive exponents with localized plus sign.
+ * Need not be quoted in prefix or suffix.
+ *
+ * ;
+ * Subpattern boundary
+ * Yes
+ * Separates positive and negative subpatterns
+ *
+ * %
+ * Prefix or suffix
+ * Yes
+ * Multiply by 100 and show as percentage
+ *
+ * \u2030
+ * Prefix or suffix
+ * Yes
+ * Multiply by 1000 and show as per mille
+ *
+ * ¤
(\u00A4
)
+ * Prefix or suffix
+ * No
+ * Currency sign, replaced by currency symbol. If
+ * doubled, replaced by international currency symbol.
+ * If tripled, replaced by currency plural names, for example,
+ * "US dollar" or "US dollars" for America.
+ * If present in a pattern, the monetary decimal separator
+ * is used instead of the decimal separator.
+ *
+ * '
+ * Prefix or suffix
+ * No
+ * Used to quote special characters in a prefix or suffix,
+ * for example, "'#'#"
formats 123 to
+ * "#123"
. To create a single quote
+ * itself, use two in a row: "# o''clock"
.
+ *
+ * *
+ * Prefix or suffix boundary
+ * Yes
+ * Pad escape, precedes pad character
+ *
+ *
+ *
+ * A DecimalFormat
pattern contains a postive and negative subpattern, for
+ * example, "#,##0.00;(#,##0.00)". Each subpattern has a prefix, a numeric part, and a
+ * suffix. If there is no explicit negative subpattern, the negative subpattern is the
+ * localized minus sign prefixed to the positive subpattern. That is, "0.00" alone is
+ * equivalent to "0.00;-0.00". If there is an explicit negative subpattern, it serves
+ * only to specify the negative prefix and suffix; the number of digits, minimal digits,
+ * and other characteristics are ignored in the negative subpattern. That means that
+ * "#,##0.0#;(#)" has precisely the same result as "#,##0.0#;(#,##0.0#)".
+ *
+ *
The prefixes, suffixes, and various symbols used for infinity, digits, thousands
+ * separators, decimal separators, etc. may be set to arbitrary values, and they will
+ * appear properly during formatting. However, care must be taken that the symbols and
+ * strings do not conflict, or parsing will be unreliable. For example, either the
+ * positive and negative prefixes or the suffixes must be distinct for {@link #parse} to
+ * be able to distinguish positive from negative values. Another example is that the
+ * decimal separator and thousands separator should be distinct characters, or parsing
+ * will be impossible.
+ *
+ *
The grouping separator is a character that separates clusters of integer
+ * digits to make large numbers more legible. It commonly used for thousands, but in some
+ * locales it separates ten-thousands. The grouping size is the number of digits
+ * between the grouping separators, such as 3 for "100,000,000" or 4 for "1 0000
+ * 0000". There are actually two different grouping sizes: One used for the least
+ * significant integer digits, the primary grouping size , and one used for all
+ * others, the secondary grouping size . In most locales these are the same, but
+ * sometimes they are different. For example, if the primary grouping interval is 3, and
+ * the secondary is 2, then this corresponds to the pattern "#,##,##0", and the number
+ * 123456789 is formatted as "12,34,56,789". If a pattern contains multiple grouping
+ * separators, the interval between the last one and the end of the integer defines the
+ * primary grouping size, and the interval between the last two defines the secondary
+ * grouping size. All others are ignored, so "#,##,###,####" == "###,###,####" ==
+ * "##,#,###,####".
+ *
+ *
Illegal patterns, such as "#.#.#" or "#.###,###", will cause
+ * DecimalFormat
to throw an {@link IllegalArgumentException} with a message
+ * that describes the problem.
+ *
+ *
Pattern BNF
+ *
+ *
+ * pattern := subpattern (';' subpattern)?
+ * subpattern := prefix? number exponent? suffix?
+ * number := (integer ('.' fraction)?) | sigDigits
+ * prefix := '\u0000'..'\uFFFD' - specialCharacters
+ * suffix := '\u0000'..'\uFFFD' - specialCharacters
+ * integer := '#'* '0'* '0'
+ * fraction := '0'* '#'*
+ * sigDigits := '#'* '@' '@'* '#'*
+ * exponent := 'E' '+'? '0'* '0'
+ * padSpec := '*' padChar
+ * padChar := '\u0000'..'\uFFFD' - quote
+ *
+ * Notation:
+ * X* 0 or more instances of X
+ * X? 0 or 1 instances of X
+ * X|Y either X or Y
+ * C..D any character from C up to D, inclusive
+ * S-T characters in S, except those in T
+ *
+ * The first subpattern is for positive numbers. The second (optional)
+ * subpattern is for negative numbers.
+ *
+ * Not indicated in the BNF syntax above:
+ *
+ *
+ *
+ * The grouping separator ',' can occur inside the integer and sigDigits
+ * elements, between any two pattern characters of that element, as long as the integer or
+ * sigDigits element is not followed by the exponent element.
+ *
+ * Two grouping intervals are recognized: That between the decimal point and the first
+ * grouping symbol, and that between the first and second grouping symbols. These
+ * intervals are identical in most locales, but in some locales they differ. For example,
+ * the pattern "#,##,###" formats the number 123456789 as
+ * "12,34,56,789".
+ *
+ * The pad specifier padSpec
may appear before the prefix, after the
+ * prefix, before the suffix, after the suffix, or not at all.
+ *
+ * In place of '0', the digits '1' through '9' may be used to indicate a rounding
+ * increment.
+ *
+ *
+ *
+ * Parsing
+ *
+ * DecimalFormat
parses all Unicode characters that represent decimal
+ * digits, as defined by {@link UCharacter#digit}. In addition,
+ * DecimalFormat
also recognizes as digits the ten consecutive characters
+ * starting with the localized zero digit defined in the {@link DecimalFormatSymbols}
+ * object. During formatting, the {@link DecimalFormatSymbols}-based digits are output.
+ *
+ *
During parsing, grouping separators are ignored.
+ *
+ *
For currency parsing, the formatter is able to parse every currency style formats no
+ * matter which style the formatter is constructed with. For example, a formatter
+ * instance gotten from NumberFormat.getInstance(ULocale, NumberFormat.CURRENCYSTYLE) can
+ * parse formats such as "USD1.00" and "3.00 US dollars".
+ *
+ *
If {@link #parse(String, ParsePosition)} fails to parse a string, it returns
+ * null
and leaves the parse position unchanged. The convenience method
+ * {@link #parse(String)} indicates parse failure by throwing a {@link
+ * java.text.ParseException}.
+ *
+ *
Formatting
+ *
+ * Formatting is guided by several parameters, all of which can be specified either
+ * using a pattern or using the API. The following description applies to formats that do
+ * not use scientific notation or significant
+ * digits .
+ *
+ *
If the number of actual integer digits exceeds the maximum integer
+ * digits , then only the least significant digits are shown. For example, 1997 is
+ * formatted as "97" if the maximum integer digits is set to 2.
+ *
+ * If the number of actual integer digits is less than the minimum integer
+ * digits , then leading zeros are added. For example, 1997 is formatted as "01997"
+ * if the minimum integer digits is set to 5.
+ *
+ * If the number of actual fraction digits exceeds the maximum fraction
+ * digits , then half-even rounding it performed to the maximum fraction digits. For
+ * example, 0.125 is formatted as "0.12" if the maximum fraction digits is 2. This
+ * behavior can be changed by specifying a rounding increment and a rounding mode.
+ *
+ * If the number of actual fraction digits is less than the minimum fraction
+ * digits , then trailing zeros are added. For example, 0.125 is formatted as
+ * "0.1250" if the mimimum fraction digits is set to 4.
+ *
+ * Trailing fractional zeros are not displayed if they occur j positions
+ * after the decimal, where j is less than the maximum fraction digits. For
+ * example, 0.10004 is formatted as "0.1" if the maximum fraction digits is four or less.
+ *
+ *
+ * Special Values
+ *
+ *
NaN
is represented as a single character, typically
+ * \uFFFD
. This character is determined by the {@link
+ * DecimalFormatSymbols} object. This is the only value for which the prefixes and
+ * suffixes are not used.
+ *
+ *
Infinity is represented as a single character, typically \u221E
,
+ * with the positive or negative prefixes and suffixes applied. The infinity character is
+ * determined by the {@link DecimalFormatSymbols} object.
+ *
+ * Scientific Notation
+ *
+ *
Numbers in scientific notation are expressed as the product of a mantissa and a
+ * power of ten, for example, 1234 can be expressed as 1.234 x 103 . The
+ * mantissa is typically in the half-open interval [1.0, 10.0) or sometimes [0.0, 1.0),
+ * but it need not be. DecimalFormat
supports arbitrary mantissas.
+ * DecimalFormat
can be instructed to use scientific notation through the API
+ * or through the pattern. In a pattern, the exponent character immediately followed by
+ * one or more digit characters indicates scientific notation. Example: "0.###E0" formats
+ * the number 1234 as "1.234E3".
+ *
+ *
+ *
+ * The number of digit characters after the exponent character gives the minimum
+ * exponent digit count. There is no maximum. Negative exponents are formatted using the
+ * localized minus sign, not the prefix and suffix from the pattern. This allows
+ * patterns such as "0.###E0 m/s". To prefix positive exponents with a localized plus
+ * sign, specify '+' between the exponent and the digits: "0.###E+0" will produce formats
+ * "1E+1", "1E+0", "1E-1", etc. (In localized patterns, use the localized plus sign
+ * rather than '+'.)
+ *
+ * The minimum number of integer digits is achieved by adjusting the exponent.
+ * Example: 0.00123 formatted with "00.###E0" yields "12.3E-4". This only happens if
+ * there is no maximum number of integer digits. If there is a maximum, then the minimum
+ * number of integer digits is fixed at one.
+ *
+ * The maximum number of integer digits, if present, specifies the exponent grouping.
+ * The most common use of this is to generate engineering notation , in which the
+ * exponent is a multiple of three, e.g., "##0.###E0". The number 12345 is formatted
+ * using "##0.####E0" as "12.345E3".
+ *
+ * When using scientific notation, the formatter controls the digit counts using
+ * significant digits logic. The maximum number of significant digits limits the total
+ * number of integer and fraction digits that will be shown in the mantissa; it does not
+ * affect parsing. For example, 12345 formatted with "##0.##E0" is "12.3E3". See the
+ * section on significant digits for more details.
+ *
+ * The number of significant digits shown is determined as follows: If
+ * areSignificantDigitsUsed() returns false, then the minimum number of significant digits
+ * shown is one, and the maximum number of significant digits shown is the sum of the
+ * minimum integer and maximum fraction digits, and is unaffected by the
+ * maximum integer digits. If this sum is zero, then all significant digits are shown.
+ * If areSignificantDigitsUsed() returns true, then the significant digit counts are
+ * specified by getMinimumSignificantDigits() and getMaximumSignificantDigits(). In this
+ * case, the number of integer digits is fixed at one, and there is no exponent grouping.
+ *
+ * Exponential patterns may not contain grouping separators.
+ *
+ *
+ *
+ * Significant Digits
+ *
+ * DecimalFormat
has two ways of controlling how many digits are shows: (a)
+ * significant digits counts, or (b) integer and fraction digit counts. Integer and
+ * fraction digit counts are described above. When a formatter is using significant
+ * digits counts, the number of integer and fraction digits is not specified directly, and
+ * the formatter settings for these counts are ignored. Instead, the formatter uses
+ * however many integer and fraction digits are required to display the specified number
+ * of significant digits. Examples:
+ *
+ *
+ *
+ *
+ * Pattern
+ * Minimum significant digits
+ * Maximum significant digits
+ * Number
+ * Output of format()
+ *
+ * @@@
+ * 3
+ * 3
+ * 12345
+ * 12300
+ *
+ * @@@
+ * 3
+ * 3
+ * 0.12345
+ * 0.123
+ *
+ * @@##
+ * 2
+ * 4
+ * 3.14159
+ * 3.142
+ *
+ * @@##
+ * 2
+ * 4
+ * 1.23004
+ * 1.23
+ *
+ *
+ *
+ *
+ *
+ * Significant digit counts may be expressed using patterns that specify a minimum and
+ * maximum number of significant digits. These are indicated by the '@'
and
+ * '#'
characters. The minimum number of significant digits is the number of
+ * '@'
characters. The maximum number of significant digits is the number of
+ * '@'
characters plus the number of '#'
characters following on
+ * the right. For example, the pattern "@@@"
indicates exactly 3 significant
+ * digits. The pattern "@##"
indicates from 1 to 3 significant digits.
+ * Trailing zero digits to the right of the decimal separator are suppressed after the
+ * minimum number of significant digits have been shown. For example, the pattern
+ * "@##"
formats the number 0.1203 as "0.12"
.
+ *
+ * If a pattern uses significant digits, it may not contain a decimal separator, nor
+ * the '0'
pattern character. Patterns such as "@00"
or
+ * "@.###"
are disallowed.
+ *
+ * Any number of '#'
characters may be prepended to the left of the
+ * leftmost '@'
character. These have no effect on the minimum and maximum
+ * significant digits counts, but may be used to position grouping separators. For
+ * example, "#,#@#"
indicates a minimum of one significant digits, a maximum
+ * of two significant digits, and a grouping size of three.
+ *
+ * In order to enable significant digits formatting, use a pattern containing the
+ * '@'
pattern character. Alternatively, call {@link
+ * #setSignificantDigitsUsed setSignificantDigitsUsed(true)}.
+ *
+ * In order to disable significant digits formatting, use a pattern that does not
+ * contain the '@'
pattern character. Alternatively, call {@link
+ * #setSignificantDigitsUsed setSignificantDigitsUsed(false)}.
+ *
+ * The number of significant digits has no effect on parsing.
+ *
+ * Significant digits may be used together with exponential notation. Such patterns
+ * are equivalent to a normal exponential pattern with a minimum and maximum integer digit
+ * count of one, a minimum fraction digit count of getMinimumSignificantDigits() -
+ * 1
, and a maximum fraction digit count of getMaximumSignificantDigits() -
+ * 1
. For example, the pattern "@@###E0"
is equivalent to
+ * "0.0###E0"
.
+ *
+ * If signficant digits are in use, then the integer and fraction digit counts, as set
+ * via the API, are ignored. If significant digits are not in use, then the signficant
+ * digit counts, as set via the API, are ignored.
+ *
+ *
+ *
+ * Padding
+ *
+ * DecimalFormat
supports padding the result of {@link #format} to a
+ * specific width. Padding may be specified either through the API or through the pattern
+ * syntax. In a pattern the pad escape character, followed by a single pad character,
+ * causes padding to be parsed and formatted. The pad escape character is '*' in
+ * unlocalized patterns, and can be localized using {@link
+ * DecimalFormatSymbols#setPadEscape}. For example, "$*x#,##0.00"
formats
+ * 123 to "$xx123.00"
, and 1234 to "$1,234.00"
.
+ *
+ *
+ *
+ * When padding is in effect, the width of the positive subpattern, including prefix
+ * and suffix, determines the format width. For example, in the pattern "* #0
+ * o''clock"
, the format width is 10.
+ *
+ * The width is counted in 16-bit code units (Java char
s).
+ *
+ * Some parameters which usually do not matter have meaning when padding is used,
+ * because the pattern width is significant with padding. In the pattern "*
+ * ##,##,#,##0.##", the format width is 14. The initial characters "##,##," do not affect
+ * the grouping size or maximum integer digits, but they do affect the format width.
+ *
+ * Padding may be inserted at one of four locations: before the prefix, after the
+ * prefix, before the suffix, or after the suffix. If padding is specified in any other
+ * location, {@link #applyPattern} throws an {@link IllegalArgumentException}. If there
+ * is no prefix, before the prefix and after the prefix are equivalent, likewise for the
+ * suffix.
+ *
+ * When specified in a pattern, the 16-bit char
immediately following the
+ * pad escape is the pad character. This may be any character, including a special pattern
+ * character. That is, the pad escape escapes the following character. If there
+ * is no character after the pad escape, then the pattern is illegal.
+ *
+ *
+ *
+ *
+ * Rounding
+ *
+ *
DecimalFormat
supports rounding to a specific increment. For example,
+ * 1230 rounded to the nearest 50 is 1250. 1.234 rounded to the nearest 0.65 is 1.3. The
+ * rounding increment may be specified through the API or in a pattern. To specify a
+ * rounding increment in a pattern, include the increment in the pattern itself. "#,#50"
+ * specifies a rounding increment of 50. "#,##0.05" specifies a rounding increment of
+ * 0.05.
+ *
+ *
+ *
+ * Rounding only affects the string produced by formatting. It does not affect
+ * parsing or change any numerical values.
+ *
+ * A rounding mode determines how values are rounded; see the {@link
+ * com.ibm.icu.math.BigDecimal} documentation for a description of the modes. Rounding
+ * increments specified in patterns use the default mode, {@link
+ * com.ibm.icu.math.BigDecimal#ROUND_HALF_EVEN}.
+ *
+ * Some locales use rounding in their currency formats to reflect the smallest
+ * currency denomination.
+ *
+ * In a pattern, digits '1' through '9' specify rounding, but otherwise behave
+ * identically to digit '0'.
+ *
+ *
+ *
+ * Synchronization
+ *
+ * DecimalFormat
objects are not synchronized. Multiple threads should
+ * not access one formatter concurrently.
+ *
+ * @see java.text.Format
+ * @see NumberFormat
+ * @author Mark Davis
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+public class DecimalFormat extends NumberFormat {
+
+ private static final long serialVersionUID = 1L;
+ /**
+ * @internal
+ * @param delegate the NumberFormat to which to delegate
+ */
+ public DecimalFormat(java.text.DecimalFormat delegate) {
+ super(delegate);
+ }
+
+ /**
+ * Creates a DecimalFormat using the default pattern and symbols for the default
+ * locale. This is a convenient way to obtain a DecimalFormat when
+ * internationalization is not the main concern.
+ *
+ *
To obtain standard formats for a given locale, use the factory methods on
+ * NumberFormat such as getNumberInstance. These factories will return the most
+ * appropriate sub-class of NumberFormat for a given locale.
+ *
+ * @see NumberFormat#getInstance
+ * @see NumberFormat#getNumberInstance
+ * @see NumberFormat#getCurrencyInstance
+ * @see NumberFormat#getPercentInstance
+ * @stable ICU 2.0
+ */
+ public DecimalFormat() {
+ this(new java.text.DecimalFormat());
+ }
+
+ /**
+ * Creates a DecimalFormat from the given pattern and the symbols for the default
+ * locale. This is a convenient way to obtain a DecimalFormat when
+ * internationalization is not the main concern.
+ *
+ *
To obtain standard formats for a given locale, use the factory methods on
+ * NumberFormat such as getNumberInstance. These factories will return the most
+ * appropriate sub-class of NumberFormat for a given locale.
+ *
+ * @param pattern A non-localized pattern string.
+ * @throws IllegalArgumentException if the given pattern is invalid.
+ * @see NumberFormat#getInstance
+ * @see NumberFormat#getNumberInstance
+ * @see NumberFormat#getCurrencyInstance
+ * @see NumberFormat#getPercentInstance
+ * @stable ICU 2.0
+ */
+ public DecimalFormat(String pattern) {
+ this(new java.text.DecimalFormat(pattern));
+ }
+
+ /**
+ * Creates a DecimalFormat from the given pattern and symbols. Use this constructor
+ * when you need to completely customize the behavior of the format.
+ *
+ *
To obtain standard formats for a given locale, use the factory methods on
+ * NumberFormat such as getInstance or getCurrencyInstance. If you need only minor
+ * adjustments to a standard format, you can modify the format returned by a
+ * NumberFormat factory method.
+ *
+ * @param pattern a non-localized pattern string
+ * @param symbols the set of symbols to be used
+ * @exception IllegalArgumentException if the given pattern is invalid
+ * @see NumberFormat#getInstance
+ * @see NumberFormat#getNumberInstance
+ * @see NumberFormat#getCurrencyInstance
+ * @see NumberFormat#getPercentInstance
+ * @see DecimalFormatSymbols
+ * @stable ICU 2.0
+ */
+ public DecimalFormat(String pattern, DecimalFormatSymbols symbols) {
+ this(new java.text.DecimalFormat(pattern, symbols.dfs));
+ }
+
+ /**
+ * Creates a DecimalFormat from the given pattern, symbols, information used for
+ * currency plural format, and format style. Use this constructor when you need to
+ * completely customize the behavior of the format.
+ *
+ *
To obtain standard formats for a given locale, use the factory methods on
+ * NumberFormat such as getInstance or getCurrencyInstance.
+ *
+ *
If you need only minor adjustments to a standard format, you can modify the
+ * format returned by a NumberFormat factory method using the setters.
+ *
+ *
If you want to completely customize a decimal format, using your own
+ * DecimalFormatSymbols (such as group separators) and your own information for
+ * currency plural formatting (such as plural rule and currency plural patterns), you
+ * can use this constructor.
+ *
+ * @param pattern a non-localized pattern string
+ * @param symbols the set of symbols to be used
+ * @param infoInput the information used for currency plural format, including
+ * currency plural patterns and plural rules.
+ * @param style the decimal formatting style, it is one of the following values:
+ * NumberFormat.NUMBERSTYLE; NumberFormat.CURRENCYSTYLE; NumberFormat.PERCENTSTYLE;
+ * NumberFormat.SCIENTIFICSTYLE; NumberFormat.INTEGERSTYLE;
+ * NumberFormat.ISOCURRENCYSTYLE; NumberFormat.PLURALCURRENCYSTYLE;
+ * @stable ICU 4.2
+ */
+ public DecimalFormat(String pattern, DecimalFormatSymbols symbols, CurrencyPluralInfo infoInput,
+ int style) {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@inheritDoc}
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(double number, StringBuffer result, FieldPosition fieldPosition) {
+ return super.format(number, result, fieldPosition);
+ }
+
+ /**
+ * @stable ICU 2.0
+ */
+ // [Spark/CDL] Delegate to format_long_StringBuffer_FieldPosition_boolean
+ public StringBuffer format(long number, StringBuffer result, FieldPosition fieldPosition) {
+ return super.format(number, result, fieldPosition);
+ }
+
+ /**
+ * Formats a BigInteger number.
+ *
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(BigInteger number, StringBuffer result,
+ FieldPosition fieldPosition) {
+ return super.format(number, result, fieldPosition);
+ }
+
+ /**
+ * Formats a BigDecimal number.
+ *
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(java.math.BigDecimal number, StringBuffer result,
+ FieldPosition fieldPosition) {
+ return super.format(number, result, fieldPosition);
+ }
+
+ /**
+ * Formats a BigDecimal number.
+ *
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(BigDecimal number, StringBuffer result,
+ FieldPosition fieldPosition) {
+ return super.format(number, result, fieldPosition);
+ }
+
+ /**
+ * Parses the given string, returning a Number
object to represent the
+ * parsed value. Double
objects are returned to represent non-integral
+ * values which cannot be stored in a BigDecimal
. These are
+ * NaN
, infinity, -infinity, and -0.0. If {@link #isParseBigDecimal()} is
+ * false (the default), all other values are returned as Long
,
+ * BigInteger
, or BigDecimal
values, in that order of
+ * preference. If {@link #isParseBigDecimal()} is true, all other values are returned
+ * as BigDecimal
valuse. If the parse fails, null is returned.
+ *
+ * @param text the string to be parsed
+ * @param parsePosition defines the position where parsing is to begin, and upon
+ * return, the position where parsing left off. If the position has not changed upon
+ * return, then parsing failed.
+ * @return a Number
object with the parsed value or
+ * null
if the parse failed
+ * @stable ICU 2.0
+ */
+ public Number parse(String text, ParsePosition parsePosition) {
+ return super.parse(text, parsePosition);
+ }
+
+ /**
+ * Parses text from the given string as a CurrencyAmount. Unlike the parse() method,
+ * this method will attempt to parse a generic currency name, searching for a match of
+ * this object's locale's currency display names, or for a 3-letter ISO currency
+ * code. This method will fail if this format is not a currency format, that is, if it
+ * does not contain the currency pattern symbol (U+00A4) in its prefix or suffix.
+ *
+ * @param text the string to parse
+ * @param pos input-output position; on input, the position within text to match; must
+ * have 0 <= pos.getIndex() < text.length(); on output, the position after the last
+ * matched character. If the parse fails, the position in unchanged upon output.
+ * @return a CurrencyAmount, or null upon failure
+ */
+ CurrencyAmount parseCurrency(String text, ParsePosition pos) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns a copy of the decimal format symbols used by this format.
+ *
+ * @return desired DecimalFormatSymbols
+ * @see DecimalFormatSymbols
+ * @stable ICU 2.0
+ */
+ public DecimalFormatSymbols getDecimalFormatSymbols() {
+ return new DecimalFormatSymbols(((java.text.DecimalFormat)numberFormat).getDecimalFormatSymbols());
+ }
+
+ /**
+ * Sets the decimal format symbols used by this format. The format uses a copy of the
+ * provided symbols.
+ *
+ * @param newSymbols desired DecimalFormatSymbols
+ * @see DecimalFormatSymbols
+ * @stable ICU 2.0
+ */
+ public void setDecimalFormatSymbols(DecimalFormatSymbols newSymbols) {
+ ((java.text.DecimalFormat)numberFormat).setDecimalFormatSymbols(newSymbols.dfs);
+ }
+
+ /**
+ * Returns the positive prefix.
+ *
+ *
Examples: +123, $123, sFr123
+ * @return the prefix
+ * @stable ICU 2.0
+ */
+ public String getPositivePrefix() {
+ return ((java.text.DecimalFormat)numberFormat).getPositivePrefix();
+ }
+
+ /**
+ * Sets the positive prefix.
+ *
+ *
Examples: +123, $123, sFr123
+ * @param newValue the prefix
+ * @stable ICU 2.0
+ */
+ public void setPositivePrefix(String newValue) {
+ ((java.text.DecimalFormat)numberFormat).setPositivePrefix(newValue);
+ }
+
+ /**
+ * Returns the negative prefix.
+ *
+ *
Examples: -123, ($123) (with negative suffix), sFr-123
+ *
+ * @return the prefix
+ * @stable ICU 2.0
+ */
+ public String getNegativePrefix() {
+ return ((java.text.DecimalFormat)numberFormat).getNegativePrefix();
+ }
+
+ /**
+ * Sets the negative prefix.
+ *
+ *
Examples: -123, ($123) (with negative suffix), sFr-123
+ * @param newValue the prefix
+ * @stable ICU 2.0
+ */
+ public void setNegativePrefix(String newValue) {
+ ((java.text.DecimalFormat)numberFormat).setNegativePrefix(newValue);
+ }
+
+ /**
+ * Returns the positive suffix.
+ *
+ *
Example: 123%
+ *
+ * @return the suffix
+ * @stable ICU 2.0
+ */
+ public String getPositiveSuffix() {
+ return ((java.text.DecimalFormat)numberFormat).getPositiveSuffix();
+ }
+
+ /**
+ * Sets the positive suffix.
+ *
+ *
Example: 123%
+ * @param newValue the suffix
+ * @stable ICU 2.0
+ */
+ public void setPositiveSuffix(String newValue) {
+ ((java.text.DecimalFormat)numberFormat).setPositiveSuffix(newValue);
+ }
+
+ /**
+ * Returns the negative suffix.
+ *
+ *
Examples: -123%, ($123) (with positive suffixes)
+ *
+ * @return the suffix
+ * @stable ICU 2.0
+ */
+ public String getNegativeSuffix() {
+ return ((java.text.DecimalFormat)numberFormat).getNegativeSuffix();
+ }
+
+ /**
+ * Sets the positive suffix.
+ *
+ *
Examples: 123%
+ * @param newValue the suffix
+ * @stable ICU 2.0
+ */
+ public void setNegativeSuffix(String newValue) {
+ ((java.text.DecimalFormat)numberFormat).setNegativeSuffix(newValue);
+ }
+
+ /**
+ * Returns the multiplier for use in percent, permill, etc. For a percentage, set the
+ * suffixes to have "%" and the multiplier to be 100. (For Arabic, use arabic percent
+ * symbol). For a permill, set the suffixes to have "\u2031" and the multiplier to be
+ * 1000.
+ *
+ *
Examples: with 100, 1.23 -> "123", and "123" -> 1.23
+ *
+ * @return the multiplier
+ * @stable ICU 2.0
+ */
+ public int getMultiplier() {
+ return ((java.text.DecimalFormat)numberFormat).getMultiplier();
+ }
+
+ /**
+ * Sets the multiplier for use in percent, permill, etc. For a percentage, set the
+ * suffixes to have "%" and the multiplier to be 100. (For Arabic, use arabic percent
+ * symbol). For a permill, set the suffixes to have "\u2031" and the multiplier to be
+ * 1000.
+ *
+ *
Examples: with 100, 1.23 -> "123", and "123" -> 1.23
+ *
+ * @param newValue the multiplier
+ * @stable ICU 2.0
+ */
+ public void setMultiplier(int newValue) {
+ ((java.text.DecimalFormat)numberFormat).setMultiplier(newValue);
+ }
+
+ /**
+ * {@icu} Returns the rounding increment.
+ *
+ * @return A positive rounding increment, or null
if rounding is not in
+ * effect.
+ * @see #setRoundingIncrement
+ * @see #getRoundingMode
+ * @see #setRoundingMode
+ * @stable ICU 2.0
+ */
+ public BigDecimal getRoundingIncrement() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the rounding increment. This method also controls whether rounding is
+ * enabled.
+ *
+ * @param newValue A positive rounding increment, or null
or
+ * BigDecimal(0.0)
to disable rounding.
+ * @throws IllegalArgumentException if newValue
is < 0.0
+ * @see #getRoundingIncrement
+ * @see #getRoundingMode
+ * @see #setRoundingMode
+ * @stable ICU 2.0
+ */
+ public void setRoundingIncrement(java.math.BigDecimal newValue) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the rounding increment. This method also controls whether rounding is
+ * enabled.
+ *
+ * @param newValue A positive rounding increment, or null
or
+ * BigDecimal(0.0)
to disable rounding.
+ * @throws IllegalArgumentException if newValue
is < 0.0
+ * @see #getRoundingIncrement
+ * @see #getRoundingMode
+ * @see #setRoundingMode
+ * @stable ICU 3.6
+ */
+ public void setRoundingIncrement(BigDecimal newValue) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the rounding increment. This method also controls whether rounding is
+ * enabled.
+ *
+ * @param newValue A positive rounding increment, or 0.0 to disable rounding.
+ * @throws IllegalArgumentException if newValue
is < 0.0
+ * @see #getRoundingIncrement
+ * @see #getRoundingMode
+ * @see #setRoundingMode
+ * @stable ICU 2.0
+ */
+ public void setRoundingIncrement(double newValue) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns the rounding mode.
+ *
+ * @return A rounding mode, between BigDecimal.ROUND_UP
and
+ * BigDecimal.ROUND_UNNECESSARY
.
+ * @see #setRoundingIncrement
+ * @see #getRoundingIncrement
+ * @see #setRoundingMode
+ * @see java.math.BigDecimal
+ * @stable ICU 2.0
+ */
+ public int getRoundingMode() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the rounding mode. This has no effect unless the rounding increment is greater
+ * than zero.
+ *
+ * @param roundingMode A rounding mode, between BigDecimal.ROUND_UP
and
+ * BigDecimal.ROUND_UNNECESSARY
.
+ * @exception IllegalArgumentException if roundingMode
is unrecognized.
+ * @see #setRoundingIncrement
+ * @see #getRoundingIncrement
+ * @see #getRoundingMode
+ * @see java.math.BigDecimal
+ * @stable ICU 2.0
+ */
+ public void setRoundingMode(int roundingMode) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns the width to which the output of format()
is padded. The width is
+ * counted in 16-bit code units.
+ *
+ * @return the format width, or zero if no padding is in effect
+ * @see #setFormatWidth
+ * @see #getPadCharacter
+ * @see #setPadCharacter
+ * @see #getPadPosition
+ * @see #setPadPosition
+ * @stable ICU 2.0
+ */
+ public int getFormatWidth() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the width to which the output of format()
is
+ * padded. The width is counted in 16-bit code units. This method
+ * also controls whether padding is enabled.
+ *
+ * @param width the width to which to pad the result of
+ * format()
, or zero to disable padding
+ * @exception IllegalArgumentException if width
is < 0
+ * @see #getFormatWidth
+ * @see #getPadCharacter
+ * @see #setPadCharacter
+ * @see #getPadPosition
+ * @see #setPadPosition
+ * @stable ICU 2.0
+ */
+ public void setFormatWidth(int width) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the character used to pad to the format width. The default is ' '.
+ *
+ * @return the pad character
+ * @see #setFormatWidth
+ * @see #getFormatWidth
+ * @see #setPadCharacter
+ * @see #getPadPosition
+ * @see #setPadPosition
+ * @stable ICU 2.0
+ */
+ public char getPadCharacter() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the character used to pad to the format width. If padding is not
+ * enabled, then this will take effect if padding is later enabled.
+ *
+ * @param padChar the pad character
+ * @see #setFormatWidth
+ * @see #getFormatWidth
+ * @see #getPadCharacter
+ * @see #getPadPosition
+ * @see #setPadPosition
+ * @stable ICU 2.0
+ */
+ public void setPadCharacter(char padChar) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the position at which padding will take place. This is the location at
+ * which padding will be inserted if the result of format()
is shorter
+ * than the format width.
+ *
+ * @return the pad position, one of PAD_BEFORE_PREFIX
,
+ * PAD_AFTER_PREFIX
, PAD_BEFORE_SUFFIX
, or
+ * PAD_AFTER_SUFFIX
.
+ * @see #setFormatWidth
+ * @see #getFormatWidth
+ * @see #setPadCharacter
+ * @see #getPadCharacter
+ * @see #setPadPosition
+ * @see #PAD_BEFORE_PREFIX
+ * @see #PAD_AFTER_PREFIX
+ * @see #PAD_BEFORE_SUFFIX
+ * @see #PAD_AFTER_SUFFIX
+ * @stable ICU 2.0
+ */
+ public int getPadPosition() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the position at which padding will take place. This is the location at
+ * which padding will be inserted if the result of format()
is shorter
+ * than the format width. This has no effect unless padding is enabled.
+ *
+ * @param padPos the pad position, one of PAD_BEFORE_PREFIX
,
+ * PAD_AFTER_PREFIX
, PAD_BEFORE_SUFFIX
, or
+ * PAD_AFTER_SUFFIX
.
+ * @exception IllegalArgumentException if the pad position in unrecognized
+ * @see #setFormatWidth
+ * @see #getFormatWidth
+ * @see #setPadCharacter
+ * @see #getPadCharacter
+ * @see #getPadPosition
+ * @see #PAD_BEFORE_PREFIX
+ * @see #PAD_AFTER_PREFIX
+ * @see #PAD_BEFORE_SUFFIX
+ * @see #PAD_AFTER_SUFFIX
+ * @stable ICU 2.0
+ */
+ public void setPadPosition(int padPos) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns whether or not scientific notation is used.
+ *
+ * @return true if this object formats and parses scientific notation
+ * @see #setScientificNotation
+ * @see #getMinimumExponentDigits
+ * @see #setMinimumExponentDigits
+ * @see #isExponentSignAlwaysShown
+ * @see #setExponentSignAlwaysShown
+ * @stable ICU 2.0
+ */
+ public boolean isScientificNotation() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets whether or not scientific notation is used. When scientific notation is
+ * used, the effective maximum number of integer digits is <= 8. If the maximum number
+ * of integer digits is set to more than 8, the effective maximum will be 1. This
+ * allows this call to generate a 'default' scientific number format without
+ * additional changes.
+ *
+ * @param useScientific true if this object formats and parses scientific notation
+ * @see #isScientificNotation
+ * @see #getMinimumExponentDigits
+ * @see #setMinimumExponentDigits
+ * @see #isExponentSignAlwaysShown
+ * @see #setExponentSignAlwaysShown
+ * @stable ICU 2.0
+ */
+ public void setScientificNotation(boolean useScientific) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the minimum exponent digits that will be shown.
+ *
+ * @return the minimum exponent digits that will be shown
+ * @see #setScientificNotation
+ * @see #isScientificNotation
+ * @see #setMinimumExponentDigits
+ * @see #isExponentSignAlwaysShown
+ * @see #setExponentSignAlwaysShown
+ * @stable ICU 2.0
+ */
+ public byte getMinimumExponentDigits() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the minimum exponent digits that will be shown. This has no effect
+ * unless scientific notation is in use.
+ *
+ * @param minExpDig a value >= 1 indicating the fewest exponent
+ * digits that will be shown
+ * @exception IllegalArgumentException if minExpDig
< 1
+ * @see #setScientificNotation
+ * @see #isScientificNotation
+ * @see #getMinimumExponentDigits
+ * @see #isExponentSignAlwaysShown
+ * @see #setExponentSignAlwaysShown
+ * @stable ICU 2.0
+ */
+ public void setMinimumExponentDigits(byte minExpDig) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns whether the exponent sign is always shown.
+ *
+ * @return true if the exponent is always prefixed with either the localized minus
+ * sign or the localized plus sign, false if only negative exponents are prefixed with
+ * the localized minus sign.
+ * @see #setScientificNotation
+ * @see #isScientificNotation
+ * @see #setMinimumExponentDigits
+ * @see #getMinimumExponentDigits
+ * @see #setExponentSignAlwaysShown
+ * @stable ICU 2.0
+ */
+ public boolean isExponentSignAlwaysShown() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets whether the exponent sign is always shown. This has no effect unless
+ * scientific notation is in use.
+ *
+ * @param expSignAlways true if the exponent is always prefixed with either the
+ * localized minus sign or the localized plus sign, false if only negative exponents
+ * are prefixed with the localized minus sign.
+ * @see #setScientificNotation
+ * @see #isScientificNotation
+ * @see #setMinimumExponentDigits
+ * @see #getMinimumExponentDigits
+ * @see #isExponentSignAlwaysShown
+ * @stable ICU 2.0
+ */
+ public void setExponentSignAlwaysShown(boolean expSignAlways) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns the grouping size. Grouping size is the number of digits between grouping
+ * separators in the integer portion of a number. For example, in the number
+ * "123,456.78", the grouping size is 3.
+ *
+ * @see #setGroupingSize
+ * @see NumberFormat#isGroupingUsed
+ * @see DecimalFormatSymbols#getGroupingSeparator
+ * @stable ICU 2.0
+ */
+ public int getGroupingSize() {
+ return ((java.text.DecimalFormat)numberFormat).getGroupingSize();
+ }
+
+ /**
+ * Sets the grouping size. Grouping size is the number of digits between grouping
+ * separators in the integer portion of a number. For example, in the number
+ * "123,456.78", the grouping size is 3.
+ *
+ * @see #getGroupingSize
+ * @see NumberFormat#setGroupingUsed
+ * @see DecimalFormatSymbols#setGroupingSeparator
+ * @stable ICU 2.0
+ */
+ public void setGroupingSize(int newValue) {
+ ((java.text.DecimalFormat)numberFormat).setGroupingSize(newValue);
+ }
+
+ /**
+ * {@icu} Returns the secondary grouping size. In some locales one grouping interval
+ * is used for the least significant integer digits (the primary grouping size), and
+ * another is used for all others (the secondary grouping size). A formatter
+ * supporting a secondary grouping size will return a positive integer unequal to the
+ * primary grouping size returned by getGroupingSize()
. For example, if
+ * the primary grouping size is 4, and the secondary grouping size is 2, then the
+ * number 123456789 formats as "1,23,45,6789", and the pattern appears as "#,##,###0".
+ *
+ * @return the secondary grouping size, or a value less than one if there is none
+ * @see #setSecondaryGroupingSize
+ * @see NumberFormat#isGroupingUsed
+ * @see DecimalFormatSymbols#getGroupingSeparator
+ * @stable ICU 2.0
+ */
+ public int getSecondaryGroupingSize() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the secondary grouping size. If set to a value less than 1, then
+ * secondary grouping is turned off, and the primary grouping size is used for all
+ * intervals, not just the least significant.
+ *
+ * @see #getSecondaryGroupingSize
+ * @see NumberFormat#setGroupingUsed
+ * @see DecimalFormatSymbols#setGroupingSeparator
+ * @stable ICU 2.0
+ */
+ public void setSecondaryGroupingSize(int newValue) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the MathContext used by this format.
+ *
+ * @return desired MathContext
+ * @see #getMathContext
+ * @stable ICU 4.2
+ */
+ public MathContext getMathContextICU() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the MathContext used by this format.
+ *
+ * @return desired MathContext
+ * @see #getMathContext
+ * @stable ICU 4.2
+ */
+ public java.math.MathContext getMathContext() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the MathContext used by this format.
+ *
+ * @param newValue desired MathContext
+ * @see #getMathContext
+ * @stable ICU 4.2
+ */
+ public void setMathContextICU(MathContext newValue) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the MathContext used by this format.
+ *
+ * @param newValue desired MathContext
+ * @see #getMathContext
+ * @stable ICU 4.2
+ */
+ public void setMathContext(java.math.MathContext newValue) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns the behavior of the decimal separator with integers. (The decimal
+ * separator will always appear with decimals.)
Example: Decimal ON: 12345 ->
+ * 12345.; OFF: 12345 -> 12345
+ *
+ * @stable ICU 2.0
+ */
+ public boolean isDecimalSeparatorAlwaysShown() {
+ return ((java.text.DecimalFormat)numberFormat).isDecimalSeparatorAlwaysShown();
+ }
+
+ /**
+ * Sets the behavior of the decimal separator with integers. (The decimal separator
+ * will always appear with decimals.)
+ *
+ *
This only affects formatting, and only where there might be no digits after the
+ * decimal point, e.g., if true, 3456.00 -> "3,456." if false, 3456.00 -> "3456" This
+ * is independent of parsing. If you want parsing to stop at the decimal point, use
+ * setParseIntegerOnly.
+ *
+ *
+ * Example: Decimal ON: 12345 -> 12345.; OFF: 12345 -> 12345
+ *
+ * @stable ICU 2.0
+ */
+ public void setDecimalSeparatorAlwaysShown(boolean newValue) {
+ ((java.text.DecimalFormat)numberFormat).setDecimalSeparatorAlwaysShown(newValue);
+ }
+
+ /**
+ * {@icu} Returns a copy of the CurrencyPluralInfo used by this format. It might
+ * return null if the decimal format is not a plural type currency decimal
+ * format. Plural type currency decimal format means either the pattern in the decimal
+ * format contains 3 currency signs, or the decimal format is initialized with
+ * PLURALCURRENCYSTYLE.
+ *
+ * @return desired CurrencyPluralInfo
+ * @see CurrencyPluralInfo
+ * @stable ICU 4.2
+ */
+ public CurrencyPluralInfo getCurrencyPluralInfo() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the CurrencyPluralInfo used by this format. The format uses a copy of
+ * the provided information.
+ *
+ * @param newInfo desired CurrencyPluralInfo
+ * @see CurrencyPluralInfo
+ * @stable ICU 4.2
+ */
+ public void setCurrencyPluralInfo(CurrencyPluralInfo newInfo) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Overrides clone.
+ * @stable ICU 2.0
+ */
+ public Object clone() {
+ return new DecimalFormatSymbols((java.text.DecimalFormatSymbols)numberFormat.clone());
+ }
+
+ /**
+ * Overrides equals.
+ * @stable ICU 2.0
+ */
+ public boolean equals(Object obj) {
+ return super.equals(obj);
+ }
+
+ /**
+ * Overrides hashCode.
+ * @stable ICU 2.0
+ */
+ public int hashCode() {
+ return super.hashCode();
+ }
+
+ /**
+ * Synthesizes a pattern string that represents the current state of this Format
+ * object.
+ *
+ * @see #applyPattern
+ * @stable ICU 2.0
+ */
+ public String toPattern() {
+ return ((java.text.DecimalFormat)numberFormat).toPattern();
+ }
+
+ /**
+ * Synthesizes a localized pattern string that represents the current state of this
+ * Format object.
+ *
+ * @see #applyPattern
+ * @stable ICU 2.0
+ */
+ public String toLocalizedPattern() {
+ return ((java.text.DecimalFormat)numberFormat).toLocalizedPattern();
+ }
+
+ /**
+ * Formats the object to an attributed string, and return the corresponding iterator.
+ *
+ * @stable ICU 3.6
+ */
+ public AttributedCharacterIterator formatToCharacterIterator(Object obj) {
+ AttributedCharacterIterator it = numberFormat.formatToCharacterIterator(obj);
+
+ // Extract formatted String first
+ StringBuilder sb = new StringBuilder();
+ for (char c = it.first(); c != CharacterIterator.DONE; c = it.next()) {
+ sb.append(c);
+ }
+
+ // Create AttributedString
+ AttributedString attrstr = new AttributedString(sb.toString());
+
+ // Map JDK Field to ICU Field
+ int idx = 0;
+ it.first();
+ while (idx < it.getEndIndex()) {
+ int end = it.getRunLimit();
+ Map attributes = it.getAttributes();
+ if (attributes != null) {
+ for (Entry entry : attributes.entrySet()) {
+ Attribute attr = entry.getKey();
+ Object val = entry.getValue();
+ if (attr.equals(java.text.NumberFormat.Field.CURRENCY)) {
+ val = attr = Field.CURRENCY;
+ } else if (attr.equals(java.text.NumberFormat.Field.DECIMAL_SEPARATOR)) {
+ val = attr = Field.DECIMAL_SEPARATOR;
+ } else if (attr.equals(java.text.NumberFormat.Field.EXPONENT)) {
+ val = attr = Field.EXPONENT;
+ } else if (attr.equals(java.text.NumberFormat.Field.EXPONENT_SIGN)) {
+ val = attr = Field.EXPONENT_SIGN;
+ } else if (attr.equals(java.text.NumberFormat.Field.EXPONENT_SYMBOL)) {
+ val = attr = Field.EXPONENT_SYMBOL;
+ } else if (attr.equals(java.text.NumberFormat.Field.FRACTION)) {
+ val = attr = Field.FRACTION;
+ } else if (attr.equals(java.text.NumberFormat.Field.GROUPING_SEPARATOR)) {
+ val = attr = Field.GROUPING_SEPARATOR;
+ } else if (attr.equals(java.text.NumberFormat.Field.INTEGER)) {
+ val = attr = Field.INTEGER;
+ } else if (attr.equals(java.text.NumberFormat.Field.PERCENT)) {
+ val = attr = Field.PERCENT;
+ } else if (attr.equals(java.text.NumberFormat.Field.PERMILLE)) {
+ val = attr = Field.PERMILLE;
+ } else if (attr.equals(java.text.NumberFormat.Field.SIGN)) {
+ val = attr = Field.SIGN;
+ }
+ attrstr.addAttribute(attr, val, idx, end);
+ }
+ }
+ idx = end;
+ while (it.getIndex() < idx) {
+ it.next();
+ }
+ }
+
+ return attrstr.getIterator();
+ }
+
+ /**
+ * Applies the given pattern to this Format object. A pattern is a short-hand
+ * specification for the various formatting properties. These properties can also be
+ * changed individually through the various setter methods.
+ *
+ * There is no limit to integer digits are set by this routine, since that is the
+ * typical end-user desire; use setMaximumInteger if you want to set a real value. For
+ * negative numbers, use a second pattern, separated by a semicolon
+ *
+ *
Example "#,#00.0#" -> 1,234.56
+ *
+ *
This means a minimum of 2 integer digits, 1 fraction digit, and a maximum of 2
+ * fraction digits.
+ *
+ *
Example: "#,#00.0#;(#,#00.0#)" for negatives in parentheses.
+ *
+ *
In negative patterns, the minimum and maximum counts are ignored; these are
+ * presumed to be set in the positive pattern.
+ *
+ * @stable ICU 2.0
+ */
+ public void applyPattern(String pattern) {
+ ((java.text.DecimalFormat)numberFormat).applyPattern(pattern);
+ }
+
+ /**
+ * Applies the given pattern to this Format object. The pattern is assumed to be in a
+ * localized notation. A pattern is a short-hand specification for the various
+ * formatting properties. These properties can also be changed individually through
+ * the various setter methods.
+ *
+ *
There is no limit to integer digits are set by this routine, since that is the
+ * typical end-user desire; use setMaximumInteger if you want to set a real value. For
+ * negative numbers, use a second pattern, separated by a semicolon
+ *
+ *
Example "#,#00.0#" -> 1,234.56
+ *
+ *
This means a minimum of 2 integer digits, 1 fraction digit, and a maximum of 2
+ * fraction digits.
+ *
+ *
Example: "#,#00.0#;(#,#00.0#)" for negatives in parantheses.
+ *
+ *
In negative patterns, the minimum and maximum counts are ignored; these are
+ * presumed to be set in the positive pattern.
+ *
+ * @stable ICU 2.0
+ */
+ public void applyLocalizedPattern(String pattern) {
+ ((java.text.DecimalFormat)numberFormat).applyLocalizedPattern(pattern);
+ }
+
+ /**
+ * Sets the maximum number of digits allowed in the integer portion of a number. This
+ * override limits the integer digit count to 309.
+ *
+ * @see NumberFormat#setMaximumIntegerDigits
+ * @stable ICU 2.0
+ */
+ public void setMaximumIntegerDigits(int newValue) {
+ super.setMaximumIntegerDigits(newValue);
+ }
+
+ /**
+ * Sets the minimum number of digits allowed in the integer portion of a number. This
+ * override limits the integer digit count to 309.
+ *
+ * @see NumberFormat#setMinimumIntegerDigits
+ * @stable ICU 2.0
+ */
+ public void setMinimumIntegerDigits(int newValue) {
+ super.setMinimumIntegerDigits(newValue);
+ }
+
+ /**
+ * {@icu} Returns the minimum number of significant digits that will be
+ * displayed. This value has no effect unless {@link #areSignificantDigitsUsed()}
+ * returns true.
+ *
+ * @return the fewest significant digits that will be shown
+ * @stable ICU 3.0
+ */
+ public int getMinimumSignificantDigits() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the maximum number of significant digits that will be
+ * displayed. This value has no effect unless {@link #areSignificantDigitsUsed()}
+ * returns true.
+ *
+ * @return the most significant digits that will be shown
+ * @stable ICU 3.0
+ */
+ public int getMaximumSignificantDigits() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the minimum number of significant digits that will be displayed. If
+ * min
is less than one then it is set to one. If the maximum significant
+ * digits count is less than min
, then it is set to
+ * min
. This value has no effect unless {@link #areSignificantDigitsUsed()}
+ * returns true.
+ *
+ * @param min the fewest significant digits to be shown
+ * @stable ICU 3.0
+ */
+ public void setMinimumSignificantDigits(int min) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets the maximum number of significant digits that will be displayed. If
+ * max
is less than one then it is set to one. If the minimum significant
+ * digits count is greater than max
, then it is set to
+ * max
. This value has no effect unless {@link #areSignificantDigitsUsed()}
+ * returns true.
+ *
+ * @param max the most significant digits to be shown
+ * @stable ICU 3.0
+ */
+ public void setMaximumSignificantDigits(int max) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns true if significant digits are in use or false if integer and
+ * fraction digit counts are in use.
+ *
+ * @return true if significant digits are in use
+ * @stable ICU 3.0
+ */
+ public boolean areSignificantDigitsUsed() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Sets whether significant digits are in use, or integer and fraction digit
+ * counts are in use.
+ *
+ * @param useSignificantDigits true to use significant digits, or false to use integer
+ * and fraction digit counts
+ * @stable ICU 3.0
+ */
+ public void setSignificantDigitsUsed(boolean useSignificantDigits) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the Currency object used to display currency amounts. This takes
+ * effect immediately, if this format is a currency format. If this format is not a
+ * currency format, then the currency object is used if and when this object becomes a
+ * currency format through the application of a new pattern.
+ *
+ * @param theCurrency new currency object to use. Must not be null.
+ * @stable ICU 2.2
+ */
+ public void setCurrency(Currency theCurrency) {
+ super.setCurrency(theCurrency);
+ }
+
+ /**
+ * Sets the maximum number of digits allowed in the fraction portion of a number. This
+ * override limits the fraction digit count to 340.
+ *
+ * @see NumberFormat#setMaximumFractionDigits
+ * @stable ICU 2.0
+ */
+ public void setMaximumFractionDigits(int newValue) {
+ super.setMaximumFractionDigits(newValue);
+ }
+
+ /**
+ * Sets the minimum number of digits allowed in the fraction portion of a number. This
+ * override limits the fraction digit count to 340.
+ *
+ * @see NumberFormat#setMinimumFractionDigits
+ * @stable ICU 2.0
+ */
+ public void setMinimumFractionDigits(int newValue) {
+ super.setMinimumFractionDigits(newValue);
+ }
+
+ /**
+ * Sets whether {@link #parse(String, ParsePosition)} returns BigDecimal. The
+ * default value is false.
+ *
+ * @param value true if {@link #parse(String, ParsePosition)}
+ * returns BigDecimal.
+ * @stable ICU 3.6
+ */
+ public void setParseBigDecimal(boolean value) {
+ ((java.text.DecimalFormat)numberFormat).setParseBigDecimal(value);
+ }
+
+ /**
+ * Returns whether {@link #parse(String, ParsePosition)} returns BigDecimal.
+ *
+ * @return true if {@link #parse(String, ParsePosition)} returns BigDecimal.
+ * @stable ICU 3.6
+ */
+ public boolean isParseBigDecimal() {
+ return ((java.text.DecimalFormat)numberFormat).isParseBigDecimal();
+ }
+
+ // ----------------------------------------------------------------------
+ // CONSTANTS
+ // ----------------------------------------------------------------------
+
+ /**
+ * {@icu} Constant for {@link #getPadPosition()} and {@link #setPadPosition(int)} to
+ * specify pad characters inserted before the prefix.
+ *
+ * @see #setPadPosition
+ * @see #getPadPosition
+ * @see #PAD_AFTER_PREFIX
+ * @see #PAD_BEFORE_SUFFIX
+ * @see #PAD_AFTER_SUFFIX
+ * @stable ICU 2.0
+ */
+ public static final int PAD_BEFORE_PREFIX = 0;
+
+ /**
+ * {@icu} Constant for {@link #getPadPosition()} and {@link #setPadPosition(int)} to
+ * specify pad characters inserted after the prefix.
+ *
+ * @see #setPadPosition
+ * @see #getPadPosition
+ * @see #PAD_BEFORE_PREFIX
+ * @see #PAD_BEFORE_SUFFIX
+ * @see #PAD_AFTER_SUFFIX
+ * @stable ICU 2.0
+ */
+ public static final int PAD_AFTER_PREFIX = 1;
+
+ /**
+ * {@icu} Constant for {@link #getPadPosition()} and {@link #setPadPosition(int)} to
+ * specify pad characters inserted before the suffix.
+ *
+ * @see #setPadPosition
+ * @see #getPadPosition
+ * @see #PAD_BEFORE_PREFIX
+ * @see #PAD_AFTER_PREFIX
+ * @see #PAD_AFTER_SUFFIX
+ * @stable ICU 2.0
+ */
+ public static final int PAD_BEFORE_SUFFIX = 2;
+
+ /**
+ * {@icu} Constant for {@link #getPadPosition()} and {@link #setPadPosition(int)} to
+ * specify pad characters inserted after the suffix.
+ *
+ * @see #setPadPosition
+ * @see #getPadPosition
+ * @see #PAD_BEFORE_PREFIX
+ * @see #PAD_AFTER_PREFIX
+ * @see #PAD_BEFORE_SUFFIX
+ * @stable ICU 2.0
+ */
+ public static final int PAD_AFTER_SUFFIX = 3;
+}
+
+// eof
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DecimalFormatSymbols.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DecimalFormatSymbols.java
new file mode 100644
index 00000000000..51fc72ac54d
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/DecimalFormatSymbols.java
@@ -0,0 +1,350 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.text;
+
+
+import java.io.Serializable;
+import java.util.Locale;
+
+import com.ibm.icu.util.ULocale;
+
+/**
+ * This class represents the set of symbols (such as the decimal separator, the
+ * grouping separator, and so on) needed by DecimalFormat
to format
+ * numbers. DecimalFormat
creates for itself an instance of
+ * DecimalFormatSymbols
from its locale data. If you need to
+ * change any of these symbols, you can get the
+ * DecimalFormatSymbols
object from your DecimalFormat
+ * and modify it.
+ *
+ *
This is an enhanced version of DecimalFormatSymbols
that
+ * is based on the standard version in the JDK. New or changed functionality
+ * is labeled
+ * NEW .
+ *
+ * @see java.util.Locale
+ * @see DecimalFormat
+ * @author Mark Davis
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+final public class DecimalFormatSymbols implements Cloneable, Serializable {
+
+ private static final long serialVersionUID =1L;
+
+ /**
+ * @internal
+ */
+ public final java.text.DecimalFormatSymbols dfs;
+
+ /**
+ * @internal
+ */
+ public DecimalFormatSymbols(java.text.DecimalFormatSymbols delegate) {
+ this.dfs = delegate;
+ }
+
+ /**
+ * Create a DecimalFormatSymbols object for the default locale.
+ * @stable ICU 2.0
+ */
+ public DecimalFormatSymbols() {
+ this(new java.text.DecimalFormatSymbols());
+ }
+
+ /**
+ * Create a DecimalFormatSymbols object for the given locale.
+ * @param locale the locale
+ * @stable ICU 2.0
+ */
+ public DecimalFormatSymbols(Locale locale) {
+ this(new java.text.DecimalFormatSymbols(locale));
+ }
+
+ /**
+ * Create a DecimalFormatSymbols object for the given locale.
+ * @param locale the locale
+ * @stable ICU 3.2
+ */
+ public DecimalFormatSymbols(ULocale locale) {
+ this(new java.text.DecimalFormatSymbols(locale.toLocale()));
+ }
+
+ /**
+ * Return the character used for zero. Different for Arabic, etc.
+ * @return the character
+ * @stable ICU 2.0
+ */
+ public char getZeroDigit() {
+ return dfs.getZeroDigit();
+ }
+
+ /**
+ * Set the character used for zero.
+ * @param zeroDigit the zero character.
+ * @stable ICU 2.0
+ */
+ public void setZeroDigit(char zeroDigit) {
+ dfs.setZeroDigit(zeroDigit);
+ }
+
+ /**
+ * Return the character used for thousands separator. Different for French, etc.
+ * @return the thousands character
+ * @stable ICU 2.0
+ */
+ public char getGroupingSeparator() {
+ return dfs.getGroupingSeparator();
+ }
+
+ /**
+ * Set the character used for thousands separator. Different for French, etc.
+ * @param groupingSeparator the thousands character
+ * @stable ICU 2.0
+ */
+ public void setGroupingSeparator(char groupingSeparator) {
+ dfs.setGroupingSeparator(groupingSeparator);
+ }
+
+ /**
+ * Return the character used for decimal sign. Different for French, etc.
+ * @return the decimal character
+ * @stable ICU 2.0
+ */
+ public char getDecimalSeparator() {
+ return dfs.getDecimalSeparator();
+ }
+
+ /**
+ * Set the character used for decimal sign. Different for French, etc.
+ * @param decimalSeparator the decimal character
+ * @stable ICU 2.0
+ */
+ public void setDecimalSeparator(char decimalSeparator) {
+ dfs.setDecimalSeparator(decimalSeparator);
+ }
+
+ /**
+ * Return the character used for mille percent sign. Different for Arabic, etc.
+ * @return the mille percent character
+ * @stable ICU 2.0
+ */
+ public char getPerMill() {
+ return dfs.getPerMill();
+ }
+
+ /**
+ * Set the character used for mille percent sign. Different for Arabic, etc.
+ * @param perMill the mille percent character
+ * @stable ICU 2.0
+ */
+ public void setPerMill(char perMill) {
+ dfs.setPerMill(perMill);
+ }
+
+ /**
+ * Return the character used for percent sign. Different for Arabic, etc.
+ * @return the percent character
+ * @stable ICU 2.0
+ */
+ public char getPercent() {
+ return dfs.getPercent();
+ }
+
+ /**
+ * Set the character used for percent sign. Different for Arabic, etc.
+ * @param percent the percent character
+ * @stable ICU 2.0
+ */
+ public void setPercent(char percent) {
+ dfs.setPercent(percent);
+ }
+
+ /**
+ * Return the character used for a digit in a pattern.
+ * @return the digit pattern character
+ * @stable ICU 2.0
+ */
+ public char getDigit() {
+ return dfs.getDigit();
+ }
+
+ /**
+ * Set the character used for a digit in a pattern.
+ * @param digit the digit pattern character
+ * @stable ICU 2.0
+ */
+ public void setDigit(char digit) {
+ dfs.setDigit(digit);
+ }
+
+ /**
+ * Return the character used to separate positive and negative subpatterns
+ * in a pattern.
+ * @return the pattern separator character
+ * @stable ICU 2.0
+ */
+ public char getPatternSeparator() {
+ return dfs.getPatternSeparator();
+ }
+
+ /**
+ * Set the character used to separate positive and negative subpatterns
+ * in a pattern.
+ * @param patternSeparator the pattern separator character
+ * @stable ICU 2.0
+ */
+ public void setPatternSeparator(char patternSeparator) {
+ dfs.setPatternSeparator(patternSeparator);
+ }
+
+ /**
+ * Return the String used to represent infinity. Almost always left
+ * unchanged.
+ * @return the Infinity string
+ * @stable ICU 2.0
+ */
+ public String getInfinity() {
+ return dfs.getInfinity();
+ }
+
+ /**
+ * Set the String used to represent infinity. Almost always left
+ * unchanged.
+ * @param infinity the Infinity String
+ * @stable ICU 2.0
+ */
+ public void setInfinity(String infinity) {
+ dfs.setInfinity(infinity);
+ }
+
+ /**
+ * Return the String used to represent NaN. Almost always left
+ * unchanged.
+ * @return the NaN String
+ * @stable ICU 2.0
+ */
+ public String getNaN() {
+ return dfs.getNaN();
+ }
+
+ /**
+ * Set the String used to represent NaN. Almost always left
+ * unchanged.
+ * @param NaN the NaN String
+ * @stable ICU 2.0
+ */
+ public void setNaN(String NaN) {
+ dfs.setNaN(NaN);
+ }
+
+ /**
+ * Return the character used to represent minus sign. If no explicit
+ * negative format is specified, one is formed by prefixing
+ * minusSign to the positive format.
+ * @return the minus sign character
+ * @stable ICU 2.0
+ */
+ public char getMinusSign() {
+ return dfs.getMinusSign();
+ }
+
+ /**
+ * Set the character used to represent minus sign. If no explicit
+ * negative format is specified, one is formed by prefixing
+ * minusSign to the positive format.
+ * @param minusSign the minus sign character
+ * @stable ICU 2.0
+ */
+ public void setMinusSign(char minusSign) {
+ dfs.setMinusSign(minusSign);
+ }
+
+ /**
+ * Return the string denoting the local currency.
+ * @return the local currency String.
+ * @stable ICU 2.0
+ */
+ public String getCurrencySymbol() {
+ return dfs.getCurrencySymbol();
+ }
+
+ /**
+ * Set the string denoting the local currency.
+ * @param currency the local currency String.
+ * @stable ICU 2.0
+ */
+ public void setCurrencySymbol(String currency) {
+ dfs.setCurrencySymbol(currency);
+ }
+
+ /**
+ * Return the international string denoting the local currency.
+ * @return the international string denoting the local currency
+ * @stable ICU 2.0
+ */
+ public String getInternationalCurrencySymbol() {
+ return dfs.getInternationalCurrencySymbol();
+ }
+
+ /**
+ * Set the international string denoting the local currency.
+ * @param currency the international string denoting the local currency.
+ * @stable ICU 2.0
+ */
+ public void setInternationalCurrencySymbol(String currency) {
+ dfs.setInternationalCurrencySymbol(currency);
+ }
+
+ /**
+ * Return the monetary decimal separator.
+ * @return the monetary decimal separator character
+ * @stable ICU 2.0
+ */
+ public char getMonetaryDecimalSeparator() {
+ return dfs.getMonetaryDecimalSeparator();
+ }
+
+ /**
+ * Set the monetary decimal separator.
+ * @param sep the monetary decimal separator character
+ * @stable ICU 2.0
+ */
+ public void setMonetaryDecimalSeparator(char sep) {
+ dfs.setMonetaryDecimalSeparator(sep);
+ }
+
+ /**
+ * Standard override.
+ * @stable ICU 2.0
+ */
+ public Object clone() {
+ return new DecimalFormatSymbols((java.text.DecimalFormatSymbols)dfs.clone());
+ }
+
+ /**
+ * Override equals.
+ * @stable ICU 2.0
+ */
+ public boolean equals(Object obj) {
+ try {
+ return dfs.equals(((DecimalFormatSymbols)obj).dfs);
+ }
+ catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Override hashCode
+ * @stable ICU 2.0
+ */
+ public int hashCode() {
+ return dfs.hashCode();
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/MessageFormat.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/MessageFormat.java
new file mode 100644
index 00000000000..31c27132836
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/MessageFormat.java
@@ -0,0 +1,1415 @@
+/*
+**********************************************************************
+* Copyright (c) 2004-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: April 6, 2004
+* Since: ICU 3.0
+**********************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.io.InvalidObjectException;
+import java.text.AttributedCharacterIterator;
+import java.text.AttributedCharacterIterator.Attribute;
+import java.text.AttributedString;
+import java.text.CharacterIterator;
+import java.text.ChoiceFormat;
+import java.text.FieldPosition;
+import java.text.Format;
+import java.text.ParseException;
+import java.text.ParsePosition;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import com.ibm.icu.util.ULocale;
+
+/**
+ * {@icuenhanced java.text.MessageFormat}.{@icu _usage_}
+ *
+ *
MessageFormat produces concatenated messages in a language-neutral
+ * way. Use this whenever concatenating strings that are displayed to
+ * end users.
+ *
+ *
A MessageFormat contains an array of subformats arranged
+ * within a template string . Together, the subformats and
+ * template string determine how the MessageFormat will operate during
+ * formatting and parsing.
+ *
+ *
Typically, both the subformats and the template string are
+ * specified at once in a pattern . By using different
+ * patterns for different locales, messages may be localized.
+ *
+ *
When formatting, MessageFormat takes a collection of arguments
+ * and produces a user-readable string. The arguments may be passed
+ * as an array or as a Map. Each argument is matched up with its
+ * corresponding subformat, which then formats it into a string. The
+ * resulting strings are then assembled within the string template of
+ * the MessageFormat to produce the final output string.
+ *
+ *
Note:
+ * MessageFormat
differs from the other Format
+ * classes in that you create a MessageFormat
object with one
+ * of its constructors (not with a getInstance
style factory
+ * method). The factory methods aren't necessary because MessageFormat
+ * itself doesn't implement locale-specific behavior. Any locale-specific
+ * behavior is defined by the pattern that you provide and the
+ * subformats used for inserted arguments.
+ *
+ *
Note:
+ * In ICU 3.8 MessageFormat supports named arguments. If a named argument
+ * is used, all arguments must be named. Names start with a character in
+ * :ID_START:
and continue with characters in :ID_CONTINUE:
,
+ * in particular they do not start with a digit. If named arguments
+ * are used, {@link #usesNamedArguments()} will return true.
+ *
+ *
The other new methods supporting named arguments are
+ * {@link #setFormatsByArgumentName(Map)},
+ * {@link #setFormatByArgumentName(String, Format)},
+ * {@link #format(Map, StringBuffer, FieldPosition)},
+ * {@link #format(String, Map)}, {@link #parseToMap(String, ParsePosition)},
+ * and {@link #parseToMap(String)}. These methods are all compatible
+ * with patterns that do not used named arguments-- in these cases
+ * the keys in the input or output Map
s use
+ * String
s that name the argument indices, e.g. "0",
+ * "1", "2"... etc.
+ *
+ *
When named arguments are used, certain methods on MessageFormat that take or
+ * return arrays will throw an exception, since it is not possible to
+ * identify positions in an array using a name. These methods are
+ * {@link #setFormatsByArgumentIndex(Format[])},
+ * {@link #setFormatByArgumentIndex(int, Format)},
+ * {@link #getFormatsByArgumentIndex()},
+ * {@link #getFormats()},
+ * {@link #format(Object[], StringBuffer, FieldPosition)},
+ * {@link #format(String, Object[])},
+ * {@link #parse(String, ParsePosition)}, and
+ * {@link #parse(String)}.
+ * These APIs all have corresponding new versions as listed above.
+ *
+ *
The API {@link #format(Object, StringBuffer, FieldPosition)} has
+ * been modified so that the Object
argument can be
+ * either an Object
array or a Map
. If this
+ * format uses named arguments, this argument must not be an
+ * Object
array otherwise an exception will be thrown.
+ * If the argument is a Map
it can be used with Strings that
+ * represent indices as described above.
+ *
+ *
+ *
+ * MessageFormat
uses patterns of the following form:
+ *
+ * MessageFormatPattern:
+ * String
+ * MessageFormatPattern FormatElement String
+ *
+ * FormatElement:
+ * { ArgumentIndexOrName }
+ * { ArgumentIndexOrName , FormatType }
+ * { ArgumentIndexOrName , FormatType , FormatStyle }
+ *
+ * ArgumentIndexOrName: one of
+ * ['0'-'9']+
+ * [:ID_START:][:ID_CONTINUE:]*
+ *
+ * FormatType: one of
+ * number date time choice spellout ordinal duration plural
+ *
+ * FormatStyle:
+ * short
+ * medium
+ * long
+ * full
+ * integer
+ * currency
+ * percent
+ * SubformatPattern
+ * RulesetName
+ *
+ * String:
+ * StringPartopt
+ * String StringPart
+ *
+ * StringPart:
+ * ''
+ * ' QuotedString '
+ * UnquotedString
+ *
+ * SubformatPattern:
+ * SubformatPatternPartopt
+ * SubformatPattern SubformatPatternPart
+ *
+ * SubFormatPatternPart:
+ * ' QuotedPattern '
+ * UnquotedPattern
+ *
+ *
+ * RulesetName:
+ * UnquotedString
+ *
+ * Within a String , "''"
represents a single
+ * quote. A QuotedString can contain arbitrary characters
+ * except single quotes; the surrounding single quotes are removed.
+ * An UnquotedString can contain arbitrary characters
+ * except single quotes and left curly brackets. Thus, a string that
+ * should result in the formatted message "'{0}'" can be written as
+ * "'''{'0}''"
or "'''{0}'''"
.
+ *
+ *
Within a SubformatPattern , different rules apply.
+ * A QuotedPattern can contain arbitrary characters
+ * except single quotes; but the surrounding single quotes are
+ * not removed, so they may be interpreted by the
+ * subformat. For example, "{1,number,$'#',##}"
will
+ * produce a number format with the pound-sign quoted, with a result
+ * such as: "$#31,45".
+ * An UnquotedPattern can contain arbitrary characters
+ * except single quotes, but curly braces within it must be balanced.
+ * For example, "ab {0} de"
and "ab '}' de"
+ * are valid subformat patterns, but "ab {0'}' de"
and
+ * "ab } de"
are not.
+ *
+ *
Warning: The rules for using quotes within message
+ * format patterns unfortunately have shown to be somewhat confusing.
+ * In particular, it isn't always obvious to localizers whether single
+ * quotes need to be doubled or not. Make sure to inform localizers about
+ * the rules, and tell them (for example, by using comments in resource
+ * bundle source files) which strings will be processed by MessageFormat.
+ * Note that localizers may need to use single quotes in translated
+ * strings where the original version doesn't have them.
+ *
+ * Note also that the simplest way to avoid the problem is to
+ * use the real apostrophe (single quote) character \u2019 (') for
+ * human-readable text, and to use the ASCII apostrophe (\u0027 ' )
+ * only in program syntax, like quoting in MessageFormat.
+ * See the annotations for U+0027 Apostrophe in The Unicode Standard.
+ *
+ *
+ * The ArgumentIndex value is a non-negative integer written
+ * using the digits '0' through '9', and represents an index into the
+ * arguments
array passed to the format
methods
+ * or the result array returned by the parse
methods.
+ *
+ *
The FormatType and FormatStyle values are used to create
+ * a Format
instance for the format element. The following
+ * table shows how the values map to Format instances. Combinations not
+ * shown in the table are illegal. A SubformatPattern must
+ * be a valid pattern string for the Format subclass used.
+ *
+ *
+ *
+ * Format Type
+ * Format Style
+ * Subformat Created
+ *
+ * (none)
+ * null
+ *
+ * number
+ * (none)
+ * NumberFormat.getInstance(getLocale())
+ *
+ * integer
+ * NumberFormat.getIntegerInstance(getLocale())
+ *
+ * currency
+ * NumberFormat.getCurrencyInstance(getLocale())
+ *
+ * percent
+ * NumberFormat.getPercentInstance(getLocale())
+ *
+ * SubformatPattern
+ * new DecimalFormat(subformatPattern, new DecimalFormatSymbols(getLocale()))
+ *
+ * date
+ * (none)
+ * DateFormat.getDateInstance(DateFormat.DEFAULT, getLocale())
+ *
+ * short
+ * DateFormat.getDateInstance(DateFormat.SHORT, getLocale())
+ *
+ * medium
+ * DateFormat.getDateInstance(DateFormat.DEFAULT, getLocale())
+ *
+ * long
+ * DateFormat.getDateInstance(DateFormat.LONG, getLocale())
+ *
+ * full
+ * DateFormat.getDateInstance(DateFormat.FULL, getLocale())
+ *
+ * SubformatPattern
+ * new SimpleDateFormat(subformatPattern, getLocale())
+ *
+ * time
+ * (none)
+ * DateFormat.getTimeInstance(DateFormat.DEFAULT, getLocale())
+ *
+ * short
+ * DateFormat.getTimeInstance(DateFormat.SHORT, getLocale())
+ *
+ * medium
+ * DateFormat.getTimeInstance(DateFormat.DEFAULT, getLocale())
+ *
+ * long
+ * DateFormat.getTimeInstance(DateFormat.LONG, getLocale())
+ *
+ * full
+ * DateFormat.getTimeInstance(DateFormat.FULL, getLocale())
+ *
+ * SubformatPattern
+ * new SimpleDateFormat(subformatPattern, getLocale())
+ *
+ * choice
+ * SubformatPattern
+ * new ChoiceFormat(subformatPattern)
+ *
+ * spellout
+ * RulesetName (optional)
+ * new RuleBasedNumberFormat(getLocale(), RuleBasedNumberFormat.SPELLOUT)
+ * .setDefaultRuleset(ruleset);
+ *
+ * ordinal
+ * RulesetName (optional)
+ * new RuleBasedNumberFormat(getLocale(), RuleBasedNumberFormat.ORDINAL)
+ * .setDefaultRuleset(ruleset);
+ *
+ * duration
+ * RulesetName (optional)
+ * new RuleBasedNumberFormat(getLocale(), RuleBasedNumberFormat.DURATION)
+ * .setDefaultRuleset(ruleset);
+ *
+ * plural
+ * SubformatPattern
+ * new PluralFormat(subformatPattern)
+ *
+ * select
+ * SubformatPattern
+ * new SelectFormat(subformatPattern)
+ *
+ *
+ *
+ *
Usage Information
+ *
+ * Here are some examples of usage:
+ *
+ *
+ * Object[] arguments = {
+ * new Integer(7),
+ * new Date(System.currentTimeMillis()),
+ * "a disturbance in the Force"
+ * };
+ *
+ * String result = MessageFormat.format(
+ * "At {1,time} on {1,date}, there was {2} on planet {0,number,integer}.",
+ * arguments);
+ *
+ * output : At 12:30 PM on Jul 3, 2053, there was a disturbance
+ * in the Force on planet 7.
+ *
+ *
+ *
+ * Typically, the message format will come from resources, and the
+ * arguments will be dynamically set at runtime.
+ *
+ * Example 2:
+ *
+ *
+ * Object[] testArgs = {new Long(3), "MyDisk"};
+ *
+ * MessageFormat form = new MessageFormat(
+ * "The disk \"{1}\" contains {0} file(s).");
+ *
+ * System.out.println(form.format(testArgs));
+ *
+ * // output, with different testArgs
+ * output : The disk "MyDisk" contains 0 file(s).
+ * output : The disk "MyDisk" contains 1 file(s).
+ * output : The disk "MyDisk" contains 1,273 file(s).
+ *
+ *
+ *
+ * For more sophisticated patterns, you can use a ChoiceFormat
to get
+ * output such as:
+ *
+ *
+ * MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0}.");
+ * double[] filelimits = {0,1,2};
+ * String[] filepart = {"no files","one file","{0,number} files"};
+ * ChoiceFormat fileform = new ChoiceFormat(filelimits, filepart);
+ * form.setFormatByArgumentIndex(0, fileform);
+ *
+ * Object[] testArgs = {new Long(12373), "MyDisk"};
+ *
+ * System.out.println(form.format(testArgs));
+ *
+ * // output, with different testArgs
+ * output: The disk "MyDisk" contains no files.
+ * output: The disk "MyDisk" contains one file.
+ * output: The disk "MyDisk" contains 1,273 files.
+ *
+ *
+ * You can either do this programmatically, as in the above example,
+ * or by using a pattern (see
+ * {@link ChoiceFormat}
+ * for more information) as in:
+ *
+ *
+ * form.applyPattern(
+ * "There {0,choice,0#are no files|1#is one file|1<are {0,number,integer} files}.");
+ *
+ *
+ *
+ * Note: As we see above, the string produced
+ * by a ChoiceFormat
in MessageFormat
is treated specially;
+ * occurances of '{' are used to indicated subformats, and cause recursion.
+ * If you create both a MessageFormat
and ChoiceFormat
+ * programmatically (instead of using the string patterns), then be careful not to
+ * produce a format that recurses on itself, which will cause an infinite loop.
+ *
+ *
When a single argument is parsed more than once in the string, the last match
+ * will be the final result of the parsing. For example,
+ *
+ * MessageFormat mf = new MessageFormat("{0,number,#.##}, {0,number,#.#}");
+ * Object[] objs = {new Double(3.1415)};
+ * String result = mf.format( objs );
+ * // result now equals "3.14, 3.1"
+ * objs = null;
+ * objs = mf.parse(result, new ParsePosition(0));
+ * // objs now equals {new Double(3.1)}
+ *
+ *
+ * Likewise, parsing with a MessageFormat object using patterns containing
+ * multiple occurances of the same argument would return the last match. For
+ * example,
+ *
+ * MessageFormat mf = new MessageFormat("{0}, {0}, {0}");
+ * String forParsing = "x, y, z";
+ * Object[] objs = mf.parse(forParsing, new ParsePosition(0));
+ * // result now equals {new String("z")}
+ *
+ *
+ *
+ *
+ * Message formats are not synchronized.
+ * It is recommended to create separate format instances for each thread.
+ * If multiple threads access a format concurrently, it must be synchronized
+ * externally.
+ *
+ * @see java.util.Locale
+ * @see Format
+ * @see NumberFormat
+ * @see DecimalFormat
+ * @see ChoiceFormat
+ * @see PluralFormat
+ * @see SelectFormat
+ * @author Mark Davis
+ * @stable ICU 3.0
+ */
+public class MessageFormat extends UFormat {
+ static final long serialVersionUID = 1L;
+
+ /**
+ * @internal
+ */
+ public final java.text.MessageFormat messageFormat;
+
+ /**
+ * @internal
+ * @param delegate the DateFormat to which to delegate
+ */
+ public MessageFormat(java.text.MessageFormat delegate) {
+ wrapNestedFormatters(delegate);
+ this.messageFormat = delegate;
+ }
+
+ /**
+ * Constructs a MessageFormat for the default locale and the
+ * specified pattern.
+ * The constructor first sets the locale, then parses the pattern and
+ * creates a list of subformats for the format elements contained in it.
+ * Patterns and their interpretation are specified in the
+ * class description .
+ *
+ * @param pattern the pattern for this message format
+ * @exception IllegalArgumentException if the pattern is invalid
+ * @stable ICU 3.0
+ */
+ public MessageFormat(String pattern) {
+ this(new java.text.MessageFormat(pattern));
+ }
+
+ /**
+ * Constructs a MessageFormat for the specified locale and
+ * pattern.
+ * The constructor first sets the locale, then parses the pattern and
+ * creates a list of subformats for the format elements contained in it.
+ * Patterns and their interpretation are specified in the
+ * class description .
+ *
+ * @param pattern the pattern for this message format
+ * @param locale the locale for this message format
+ * @exception IllegalArgumentException if the pattern is invalid
+ * @stable ICU 3.0
+ */
+ public MessageFormat(String pattern, Locale locale) {
+ this(new java.text.MessageFormat(pattern, locale));
+ }
+
+ /**
+ * Constructs a MessageFormat for the specified locale and
+ * pattern.
+ * The constructor first sets the locale, then parses the pattern and
+ * creates a list of subformats for the format elements contained in it.
+ * Patterns and their interpretation are specified in the
+ * class description .
+ *
+ * @param pattern the pattern for this message format
+ * @param locale the locale for this message format
+ * @exception IllegalArgumentException if the pattern is invalid
+ * @stable ICU 3.2
+ */
+ public MessageFormat(String pattern, ULocale locale) {
+ this(new java.text.MessageFormat(pattern, locale.toLocale()));
+ }
+
+ /**
+ * Sets the locale to be used when creating or comparing subformats.
+ * This affects subsequent calls to the {@link #applyPattern applyPattern}
+ * and {@link #toPattern toPattern} methods as well as to the
+ * format
and
+ * {@link #formatToCharacterIterator formatToCharacterIterator} methods.
+ *
+ * @param locale the locale to be used when creating or comparing subformats
+ * @stable ICU 3.0
+ */
+ public void setLocale(Locale locale) {
+ messageFormat.setLocale(locale);
+ }
+
+ /**
+ * Sets the locale to be used when creating or comparing subformats.
+ * This affects subsequent calls to the {@link #applyPattern applyPattern}
+ * and {@link #toPattern toPattern} methods as well as to the
+ * format
and
+ * {@link #formatToCharacterIterator formatToCharacterIterator} methods.
+ *
+ * @param locale the locale to be used when creating or comparing subformats
+ * @stable ICU 3.2
+ */
+ public void setLocale(ULocale locale) {
+ messageFormat.setLocale(locale.toLocale());
+ }
+
+ /**
+ * Returns the locale that's used when creating or comparing subformats.
+ *
+ * @return the locale used when creating or comparing subformats
+ * @stable ICU 3.0
+ */
+ public Locale getLocale() {
+ return messageFormat.getLocale();
+ }
+
+ /**
+ * {@icu} Returns the locale that's used when creating or comparing subformats.
+ *
+ * @return the locale used when creating or comparing subformats
+ * @stable ICU 3.2
+ */
+ public ULocale getULocale() {
+ return ULocale.forLocale(messageFormat.getLocale());
+ }
+
+ /**
+ * Sets the pattern used by this message format.
+ * The method parses the pattern and creates a list of subformats
+ * for the format elements contained in it.
+ * Patterns and their interpretation are specified in the
+ * class description .
+ *
+ * The pattern must contain only named or only numeric arguments,
+ * mixing them is not allowed.
+ *
+ * @param pttrn the pattern for this message format
+ * @throws IllegalArgumentException if the pattern is invalid
+ * @stable ICU 3.0
+ */
+ public void applyPattern(String pttrn) {
+ messageFormat.applyPattern(pttrn);
+ wrapNestedFormatters(messageFormat);
+ }
+
+ /**
+ * Returns a pattern representing the current state of the message format.
+ * The string is constructed from internal information and therefore
+ * does not necessarily equal the previously applied pattern.
+ *
+ * @return a pattern representing the current state of the message format
+ * @stable ICU 3.0
+ */
+ public String toPattern() {
+ String pattern = savedPattern == null ? messageFormat.toPattern() : savedPattern;
+ return pattern;
+ }
+
+ /**
+ * Sets the formats to use for the values passed into
+ * format
methods or returned from parse
+ * methods. The indices of elements in newFormats
+ * correspond to the argument indices used in the previously set
+ * pattern string.
+ * The order of formats in newFormats
thus corresponds to
+ * the order of elements in the arguments
array passed
+ * to the format
methods or the result array returned
+ * by the parse
methods.
+ *
+ * If an argument index is used for more than one format element
+ * in the pattern string, then the corresponding new format is used
+ * for all such format elements. If an argument index is not used
+ * for any format element in the pattern string, then the
+ * corresponding new format is ignored. If fewer formats are provided
+ * than needed, then only the formats for argument indices less
+ * than newFormats.length
are replaced.
+ *
+ * This method is only supported if the format does not use
+ * named arguments, otherwise an IllegalArgumentException is thrown.
+ *
+ * @param newFormats the new formats to use
+ * @throws NullPointerException if newFormats
is null
+ * @throws IllegalArgumentException if this formatter uses named arguments
+ * @stable ICU 3.0
+ */
+ public void setFormatsByArgumentIndex(Format[] newFormats) {
+ messageFormat.setFormatsByArgumentIndex(newFormats);
+ savedPattern = null;
+ }
+
+ /**
+ * {@icu} Sets the formats to use for the values passed into
+ * format
methods or returned from parse
+ * methods. The keys in newFormats
are the argument
+ * names in the previously set pattern string, and the values
+ * are the formats.
+ *
+ * Only argument names from the pattern string are considered.
+ * Extra keys in newFormats
that do not correspond
+ * to an argument name are ignored. Similarly, if there is no
+ * format in newFormats for an argument name, the formatter
+ * for that argument remains unchanged.
+ *
+ * This may be called on formats that do not use named arguments.
+ * In this case the map will be queried for key Strings that
+ * represent argument indices, e.g. "0", "1", "2" etc.
+ *
+ * @param newFormats a map from String to Format providing new
+ * formats for named arguments.
+ * @stable ICU 3.8
+ */
+ public void setFormatsByArgumentName(Map newFormats) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the formats to use for the format elements in the
+ * previously set pattern string.
+ * The order of formats in newFormats
corresponds to
+ * the order of format elements in the pattern string.
+ *
+ * If more formats are provided than needed by the pattern string,
+ * the remaining ones are ignored. If fewer formats are provided
+ * than needed, then only the first newFormats.length
+ * formats are replaced.
+ *
+ * Since the order of format elements in a pattern string often
+ * changes during localization, it is generally better to use the
+ * {@link #setFormatsByArgumentIndex setFormatsByArgumentIndex}
+ * method, which assumes an order of formats corresponding to the
+ * order of elements in the arguments
array passed to
+ * the format
methods or the result array returned by
+ * the parse
methods.
+ *
+ * @param newFormats the new formats to use
+ * @exception NullPointerException if newFormats
is null
+ * @stable ICU 3.0
+ */
+ public void setFormats(Format[] newFormats) {
+ messageFormat.setFormats(newFormats);
+ savedPattern = null;
+ }
+
+ /**
+ * Sets the format to use for the format elements within the
+ * previously set pattern string that use the given argument
+ * index.
+ * The argument index is part of the format element definition and
+ * represents an index into the arguments
array passed
+ * to the format
methods or the result array returned
+ * by the parse
methods.
+ *
+ * If the argument index is used for more than one format element
+ * in the pattern string, then the new format is used for all such
+ * format elements. If the argument index is not used for any format
+ * element in the pattern string, then the new format is ignored.
+ *
+ * This method is only supported when exclusively numbers are used for
+ * argument names. Otherwise an IllegalArgumentException is thrown.
+ *
+ * @param argumentIndex the argument index for which to use the new format
+ * @param newFormat the new format to use
+ * @throws IllegalArgumentException if this format uses named arguments
+ * @stable ICU 3.0
+ */
+ public void setFormatByArgumentIndex(int argumentIndex, Format newFormat) {
+ messageFormat.setFormatByArgumentIndex(argumentIndex, newFormat);
+ savedPattern = null;
+ }
+
+ /**
+ * {@icu} Sets the format to use for the format elements within the
+ * previously set pattern string that use the given argument
+ * name.
+ *
+ * If the argument name is used for more than one format element
+ * in the pattern string, then the new format is used for all such
+ * format elements. If the argument name is not used for any format
+ * element in the pattern string, then the new format is ignored.
+ *
+ * This API may be used on formats that do not use named arguments.
+ * In this case argumentName
should be a String that names
+ * an argument index, e.g. "0", "1", "2"... etc. If it does not name
+ * a valid index, the format will be ignored. No error is thrown.
+ *
+ * @param argumentName the name of the argument to change
+ * @param newFormat the new format to use
+ * @stable ICU 3.8
+ */
+ public void setFormatByArgumentName(String argumentName, Format newFormat) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the format to use for the format element with the given
+ * format element index within the previously set pattern string.
+ * The format element index is the zero-based number of the format
+ * element counting from the start of the pattern string.
+ *
+ * Since the order of format elements in a pattern string often
+ * changes during localization, it is generally better to use the
+ * {@link #setFormatByArgumentIndex setFormatByArgumentIndex}
+ * method, which accesses format elements based on the argument
+ * index they specify.
+ *
+ * @param formatElementIndex the index of a format element within the pattern
+ * @param newFormat the format to use for the specified format element
+ * @exception ArrayIndexOutOfBoundsException if formatElementIndex is equal to or
+ * larger than the number of format elements in the pattern string
+ * @stable ICU 3.0
+ */
+ public void setFormat(int formatElementIndex, Format newFormat) {
+ messageFormat.setFormat(formatElementIndex, newFormat);
+ savedPattern = null;
+ }
+
+ /**
+ * Returns the formats used for the values passed into
+ * format
methods or returned from parse
+ * methods. The indices of elements in the returned array
+ * correspond to the argument indices used in the previously set
+ * pattern string.
+ * The order of formats in the returned array thus corresponds to
+ * the order of elements in the arguments
array passed
+ * to the format
methods or the result array returned
+ * by the parse
methods.
+ *
+ * If an argument index is used for more than one format element
+ * in the pattern string, then the format used for the last such
+ * format element is returned in the array. If an argument index
+ * is not used for any format element in the pattern string, then
+ * null is returned in the array.
+ *
+ * This method is only supported when exclusively numbers are used for
+ * argument names. Otherwise an IllegalArgumentException is thrown.
+ *
+ * @return the formats used for the arguments within the pattern
+ * @throws IllegalArgumentException if this format uses named arguments
+ * @stable ICU 3.0
+ */
+ public Format[] getFormatsByArgumentIndex() {
+ return messageFormat.getFormatsByArgumentIndex();
+ }
+
+ /**
+ * Returns the formats used for the format elements in the
+ * previously set pattern string.
+ * The order of formats in the returned array corresponds to
+ * the order of format elements in the pattern string.
+ *
+ * Since the order of format elements in a pattern string often
+ * changes during localization, it's generally better to use the
+ * {@link #getFormatsByArgumentIndex()}
+ * method, which assumes an order of formats corresponding to the
+ * order of elements in the arguments
array passed to
+ * the format
methods or the result array returned by
+ * the parse
methods.
+ *
+ * This method is only supported when exclusively numbers are used for
+ * argument names. Otherwise an IllegalArgumentException is thrown.
+ *
+ * @return the formats used for the format elements in the pattern
+ * @throws IllegalArgumentException if this format uses named arguments
+ * @stable ICU 3.0
+ */
+ public Format[] getFormats() {
+ return messageFormat.getFormats();
+ }
+
+ /**
+ * {@icu} Returns the format argument names. For more details, see
+ * {@link #setFormatByArgumentName(String, Format)}.
+ * @return List of names
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public Set getFormatArgumentNames() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the formats according to their argument names. For more details, see
+ * {@link #setFormatByArgumentName(String, Format)}.
+ * @return format associated with the name, or null if there isn't one.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public Format getFormatByArgumentName(String argumentName) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Formats an array of objects and appends the MessageFormat
's
+ * pattern, with format elements replaced by the formatted objects, to the
+ * provided StringBuffer
.
+ *
+ * The text substituted for the individual format elements is derived from
+ * the current subformat of the format element and the
+ * arguments
element at the format element's argument index
+ * as indicated by the first matching line of the following table. An
+ * argument is unavailable if arguments
is
+ * null
or has fewer than argumentIndex+1 elements. When
+ * an argument is unavailable no substitution is performed.
+ *
+ *
+ *
+ * Subformat
+ * Argument
+ * Formatted Text
+ *
+ * any
+ * unavailable
+ * "{" + argumentIndex + "}"
+ *
+ * any
+ * null
+ * "null"
+ *
+ * instanceof ChoiceFormat
+ * any
+ * subformat.format(argument).indexOf('{') >= 0 ?
+ * (new MessageFormat(subformat.format(argument), getLocale())).format(argument) :
+ * subformat.format(argument)
+ *
+ * != null
+ * any
+ * subformat.format(argument)
+ *
+ * null
+ * instanceof Number
+ * NumberFormat.getInstance(getLocale()).format(argument)
+ *
+ * null
+ * instanceof Date
+ * DateFormat.getDateTimeInstance(DateFormat.SHORT,
+ * DateFormat.SHORT, getLocale()).format(argument)
+ *
+ * null
+ * instanceof String
+ * argument
+ *
+ * null
+ * any
+ * argument.toString()
+ *
+ *
+ * If pos
is non-null, and refers to
+ * Field.ARGUMENT
, the location of the first formatted
+ * string will be returned.
+ *
+ * This method is only supported when the format does not use named
+ * arguments, otherwise an IllegalArgumentException is thrown.
+ *
+ * @param arguments an array of objects to be formatted and substituted.
+ * @param result where text is appended.
+ * @param pos On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @throws IllegalArgumentException if an argument in the
+ * arguments
array is not of the type
+ * expected by the format element(s) that use it.
+ * @throws IllegalArgumentException if this format uses named arguments
+ * @stable ICU 3.0
+ */
+ public final StringBuffer format(Object[] arguments, StringBuffer result,
+ FieldPosition pos) {
+ FieldPosition jdkPos = toJDKFieldPosition(pos);
+ StringBuffer buf = messageFormat.format(arguments, result, jdkPos);
+ if (jdkPos != null) {
+ pos.setBeginIndex(jdkPos.getBeginIndex());
+ pos.setEndIndex(jdkPos.getEndIndex());
+ }
+ return buf;
+ }
+
+ /**
+ * Formats a map of objects and appends the MessageFormat
's
+ * pattern, with format elements replaced by the formatted objects, to the
+ * provided StringBuffer
.
+ *
+ * The text substituted for the individual format elements is derived from
+ * the current subformat of the format element and the
+ * arguments
value corresopnding to the format element's
+ * argument name.
+ *
+ * This API may be called on formats that do not use named arguments.
+ * In this case the the keys in arguments
must be numeric
+ * strings (e.g. "0", "1", "2"...).
+ *
+ * An argument is unavailable if arguments
is
+ * null
or does not have a value corresponding to an argument
+ * name in the pattern. When an argument is unavailable no substitution
+ * is performed.
+ *
+ * @param arguments a map of objects to be formatted and substituted.
+ * @param result where text is appended.
+ * @param pos On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @throws IllegalArgumentException if an argument in the
+ * arguments
array is not of the type
+ * expected by the format element(s) that use it.
+ * @return the passed-in StringBuffer
+ * @stable ICU 3.8
+ */
+ public final StringBuffer format(Map arguments, StringBuffer result,
+ FieldPosition pos) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Creates a MessageFormat with the given pattern and uses it
+ * to format the given arguments. This is equivalent to
+ *
+ * (new {@link #MessageFormat(String) MessageFormat}(pattern)).{@link
+ * #format(java.lang.Object[], java.lang.StringBuffer, java.text.FieldPosition)
+ * format}(arguments, new StringBuffer(), null).toString()
+ *
+ *
+ * @throws IllegalArgumentException if the pattern is invalid,
+ * or if an argument in the arguments
array
+ * is not of the type expected by the format element(s)
+ * that use it.
+ * @throws IllegalArgumentException if this format uses named arguments
+ * @stable ICU 3.0
+ */
+ public static String format(String pattern, Object... arguments) {
+ return java.text.MessageFormat.format(pattern, arguments);
+ }
+
+ /**
+ * Creates a MessageFormat with the given pattern and uses it to
+ * format the given arguments. The pattern must identifyarguments
+ * by name instead of by number.
+ *
+ * @throws IllegalArgumentException if the pattern is invalid,
+ * or if an argument in the arguments
map
+ * is not of the type expected by the format element(s)
+ * that use it.
+ * @see #format(Map, StringBuffer, FieldPosition)
+ * @see #format(String, Object[])
+ * @stable ICU 3.8
+ */
+ public static String format(String pattern, Map arguments) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns true if this MessageFormat uses named arguments,
+ * and false otherwise. See class description.
+ *
+ * @return true if named arguments are used.
+ * @stable ICU 3.8
+ */
+ public boolean usesNamedArguments() {
+ // always false with com.ibm.icu.base
+ return false;
+ }
+
+ // Overrides
+ /**
+ * Formats a map or array of objects and appends the MessageFormat
's
+ * pattern, with format elements replaced by the formatted objects, to the
+ * provided StringBuffer
.
+ * This is equivalent to either of
+ *
+ * {@link #format(java.lang.Object[], java.lang.StringBuffer,
+ * java.text.FieldPosition) format}((Object[]) arguments, result, pos)
+ * {@link #format(java.util.Map, java.lang.StringBuffer,
+ * java.text.FieldPosition) format}((Map) arguments, result, pos)
+ *
+ * A map must be provided if this format uses named arguments, otherwise
+ * an IllegalArgumentException will be thrown.
+ * @param arguments a map or array of objects to be formatted
+ * @param result where text is appended
+ * @param pos On input: an alignment field, if desired
+ * On output: the offsets of the alignment field
+ * @throws IllegalArgumentException if an argument in
+ * arguments
is not of the type
+ * expected by the format element(s) that use it
+ * @throws IllegalArgumentException if arguments is
+ * an array of Object and this format uses named arguments
+ * @stable ICU 3.0
+ */
+ public final StringBuffer format(Object arguments, StringBuffer result,
+ FieldPosition pos) {
+ FieldPosition jdkPos = toJDKFieldPosition(pos);
+ StringBuffer buf = messageFormat.format(arguments, result, jdkPos);
+ if (jdkPos != null) {
+ pos.setBeginIndex(jdkPos.getBeginIndex());
+ pos.setEndIndex(jdkPos.getEndIndex());
+ }
+ return buf;
+ }
+
+ /**
+ * Formats an array of objects and inserts them into the
+ * MessageFormat
's pattern, producing an
+ * AttributedCharacterIterator
.
+ * You can use the returned AttributedCharacterIterator
+ * to build the resulting String, as well as to determine information
+ * about the resulting String.
+ *
+ * The text of the returned AttributedCharacterIterator
is
+ * the same that would be returned by
+ *
+ * {@link #format(java.lang.Object[], java.lang.StringBuffer,
+ * java.text.FieldPosition) format}(arguments, new StringBuffer(), null).toString()
+ *
+ *
+ * In addition, the AttributedCharacterIterator
contains at
+ * least attributes indicating where text was generated from an
+ * argument in the arguments
array. The keys of these attributes are of
+ * type MessageFormat.Field
, their values are
+ * Integer
objects indicating the index in the arguments
+ * array of the argument from which the text was generated.
+ *
+ * The attributes/value from the underlying Format
+ * instances that MessageFormat
uses will also be
+ * placed in the resulting AttributedCharacterIterator
.
+ * This allows you to not only find where an argument is placed in the
+ * resulting String, but also which fields it contains in turn.
+ *
+ * @param arguments an array of objects to be formatted and substituted.
+ * @return AttributedCharacterIterator describing the formatted value.
+ * @exception NullPointerException if arguments
is null.
+ * @exception IllegalArgumentException if an argument in the
+ * arguments
array is not of the type
+ * expected by the format element(s) that use it.
+ * @stable ICU 3.8
+ */
+ public AttributedCharacterIterator formatToCharacterIterator(Object arguments) {
+ AttributedCharacterIterator it = messageFormat.formatToCharacterIterator(arguments);
+
+ // Extract formatted String first
+ StringBuilder sb = new StringBuilder();
+ for (char c = it.first(); c != CharacterIterator.DONE; c = it.next()) {
+ sb.append(c);
+ }
+
+ // Create AttributedString
+ AttributedString attrstr = new AttributedString(sb.toString());
+
+ // Map JDK Field to ICU Field
+ int idx = 0;
+ it.first();
+ while (idx < it.getEndIndex()) {
+ int end = it.getRunLimit();
+ Map attributes = it.getAttributes();
+ if (attributes != null) {
+ for (Entry entry : attributes.entrySet()) {
+ Attribute attr = entry.getKey();
+ Object val = entry.getValue();
+ if (attr.equals(java.text.MessageFormat.Field.ARGUMENT)) {
+ val = attr = Field.ARGUMENT;
+ }
+ attrstr.addAttribute(attr, val, idx, end);
+ }
+ }
+ idx = end;
+ while (it.getIndex() < idx) {
+ it.next();
+ }
+ }
+
+ return attrstr.getIterator();
+ }
+
+ /**
+ * Parses the string.
+ *
+ * Caveats: The parse may fail in a number of circumstances.
+ * For example:
+ *
+ * If one of the arguments does not occur in the pattern.
+ * If the format of an argument loses information, such as
+ * with a choice format where a large number formats to "many".
+ * Does not yet handle recursion (where
+ * the substituted strings contain {n} references.)
+ * Will not always find a match (or the correct match)
+ * if some part of the parse is ambiguous.
+ * For example, if the pattern "{1},{2}" is used with the
+ * string arguments {"a,b", "c"}, it will format as "a,b,c".
+ * When the result is parsed, it will return {"a", "b,c"}.
+ * If a single argument is parsed more than once in the string,
+ * then the later parse wins.
+ *
+ * When the parse fails, use ParsePosition.getErrorIndex() to find out
+ * where in the string did the parsing failed. The returned error
+ * index is the starting offset of the sub-patterns that the string
+ * is comparing with. For example, if the parsing string "AAA {0} BBB"
+ * is comparing against the pattern "AAD {0} BBB", the error index is
+ * 0. When an error occurs, the call to this method will return null.
+ * If the source is null, return an empty array.
+ *
+ * This method is only supported with numbered arguments. If
+ * the format pattern used named argument an
+ * IllegalArgumentException is thrown.
+ *
+ * @throws IllegalArgumentException if this format uses named arguments
+ * @stable ICU 3.0
+ */
+ public Object[] parse(String source, ParsePosition pos) {
+ return messageFormat.parse(source, pos);
+ }
+
+ /**
+ * {@icu} Parses the string, returning the results in a Map.
+ * This is similar to the version that returns an array
+ * of Object. This supports both named and numbered
+ * arguments-- if numbered, the keys in the map are the
+ * corresponding Strings (e.g. "0", "1", "2"...).
+ *
+ * @param source the text to parse
+ * @param pos the position at which to start parsing. on return,
+ * contains the result of the parse.
+ * @return a Map containing key/value pairs for each parsed argument.
+ * @stable ICU 3.8
+ */
+ public Map parseToMap(String source, ParsePosition pos) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Parses text from the beginning of the given string to produce an object
+ * array.
+ * The method may not use the entire text of the given string.
+ *
+ * See the {@link #parse(String, ParsePosition)} method for more information
+ * on message parsing.
+ *
+ * @param source A String
whose beginning should be parsed.
+ * @return An Object
array parsed from the string.
+ * @exception ParseException if the beginning of the specified string cannot be parsed.
+ * @exception IllegalArgumentException if this format uses named arguments
+ * @stable ICU 3.0
+ */
+ public Object[] parse(String source) throws ParseException {
+ return messageFormat.parse(source);
+ }
+
+ /**
+ * {@icu} Parses text from the beginning of the given string to produce a map from
+ * argument to values. The method may not use the entire text of the given string.
+ *
+ *
See the {@link #parse(String, ParsePosition)} method for more information on
+ * message parsing.
+ *
+ * @param source A String
whose beginning should be parsed.
+ * @return A Map
parsed from the string.
+ * @throws ParseException if the beginning of the specified string cannot
+ * be parsed.
+ * @see #parseToMap(String, ParsePosition)
+ * @stable ICU 3.8
+ */
+ public Map parseToMap(String source) throws ParseException {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Parses text from a string to produce an object array or Map.
+ *
+ * The method attempts to parse text starting at the index given by
+ * pos
.
+ * If parsing succeeds, then the index of pos
is updated
+ * to the index after the last character used (parsing does not necessarily
+ * use all characters up to the end of the string), and the parsed
+ * object array is returned. The updated pos
can be used to
+ * indicate the starting point for the next call to this method.
+ * If an error occurs, then the index of pos
is not
+ * changed, the error index of pos
is set to the index of
+ * the character where the error occurred, and null is returned.
+ *
+ * See the {@link #parse(String, ParsePosition)} method for more information
+ * on message parsing.
+ *
+ * @param source A String
, part of which should be parsed.
+ * @param pos A ParsePosition
object with index and error
+ * index information as described above.
+ * @return An Object
parsed from the string, either an
+ * array of Object, or a Map, depending on whether named
+ * arguments are used. This can be queried using usesNamedArguments
.
+ * In case of error, returns null.
+ * @throws NullPointerException if pos
is null.
+ * @stable ICU 3.0
+ */
+ public Object parseObject(String source, ParsePosition pos) {
+ return messageFormat.parse(source, pos);
+ }
+
+ /**
+ * Overrides clone.
+ *
+ * @return a clone of this instance.
+ * @stable ICU 3.0
+ */
+ public Object clone() {
+ MessageFormat fmt = new MessageFormat((java.text.MessageFormat)messageFormat.clone());
+ fmt.savedPattern = savedPattern;
+ return fmt;
+ }
+
+ /**
+ * Overrides equals.
+ * @stable ICU 3.0
+ */
+ public boolean equals(Object obj) {
+ try {
+ return messageFormat.equals(((MessageFormat)obj).messageFormat);
+ }
+ catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Overrides hashCode.
+ * @stable ICU 3.0
+ */
+ public int hashCode() {
+ return messageFormat.hashCode();
+ }
+
+ /**
+ * Defines constants that are used as attribute keys in the
+ * AttributedCharacterIterator
returned
+ * from MessageFormat.formatToCharacterIterator
.
+ *
+ * @stable ICU 3.8
+ */
+ public static class Field extends Format.Field {
+
+ private static final long serialVersionUID = 7510380454602616157L;
+
+ /**
+ * Create a Field
with the specified name.
+ *
+ * @param name The name of the attribute
+ *
+ * @stable ICU 3.8
+ */
+ protected Field(String name) {
+ super(name);
+ }
+
+ /**
+ * Resolves instances being deserialized to the predefined constants.
+ *
+ * @return resolved MessageFormat.Field constant
+ * @throws InvalidObjectException if the constant could not be resolved.
+ *
+ * @stable ICU 3.8
+ */
+ protected Object readResolve() throws InvalidObjectException {
+ if (this.getClass() != MessageFormat.Field.class) {
+ throw new InvalidObjectException(
+ "A subclass of MessageFormat.Field must implement readResolve.");
+ }
+ if (this.getName().equals(ARGUMENT.getName())) {
+ return ARGUMENT;
+ } else {
+ throw new InvalidObjectException("Unknown attribute name.");
+ }
+ }
+
+ /**
+ * Constant identifying a portion of a message that was generated
+ * from an argument passed into formatToCharacterIterator
.
+ * The value associated with the key will be an Integer
+ * indicating the index in the arguments
array of the
+ * argument from which the text was generated.
+ *
+ * @stable ICU 3.8
+ */
+ public static final Field ARGUMENT = new Field("message argument field");
+
+ }
+
+ private static final char SINGLE_QUOTE = '\'';
+ private static final char CURLY_BRACE_LEFT = '{';
+ private static final char CURLY_BRACE_RIGHT = '}';
+
+ private static final int STATE_INITIAL = 0;
+ private static final int STATE_SINGLE_QUOTE = 1;
+ private static final int STATE_IN_QUOTE = 2;
+ private static final int STATE_MSG_ELEMENT = 3;
+
+ /**
+ * {@icu} Converts an 'apostrophe-friendly' pattern into a standard
+ * pattern. Standard patterns treat all apostrophes as
+ * quotes, which is problematic in some languages, e.g.
+ * French, where apostrophe is commonly used. This utility
+ * assumes that only an unpaired apostrophe immediately before
+ * a brace is a true quote. Other unpaired apostrophes are paired,
+ * and the resulting standard pattern string is returned.
+ *
+ *
Note it is not guaranteed that the returned pattern
+ * is indeed a valid pattern. The only effect is to convert
+ * between patterns having different quoting semantics.
+ *
+ * @param pattern the 'apostrophe-friendly' patttern to convert
+ * @return the standard equivalent of the original pattern
+ * @stable ICU 3.4
+ */
+ public static String autoQuoteApostrophe(String pattern) {
+ StringBuilder buf = new StringBuilder(pattern.length() * 2);
+ int state = STATE_INITIAL;
+ int braceCount = 0;
+ for (int i = 0, j = pattern.length(); i < j; ++i) {
+ char c = pattern.charAt(i);
+ switch (state) {
+ case STATE_INITIAL:
+ switch (c) {
+ case SINGLE_QUOTE:
+ state = STATE_SINGLE_QUOTE;
+ break;
+ case CURLY_BRACE_LEFT:
+ state = STATE_MSG_ELEMENT;
+ ++braceCount;
+ break;
+ }
+ break;
+ case STATE_SINGLE_QUOTE:
+ switch (c) {
+ case SINGLE_QUOTE:
+ state = STATE_INITIAL;
+ break;
+ case CURLY_BRACE_LEFT:
+ case CURLY_BRACE_RIGHT:
+ state = STATE_IN_QUOTE;
+ break;
+ default:
+ buf.append(SINGLE_QUOTE);
+ state = STATE_INITIAL;
+ break;
+ }
+ break;
+ case STATE_IN_QUOTE:
+ switch (c) {
+ case SINGLE_QUOTE:
+ state = STATE_INITIAL;
+ break;
+ }
+ break;
+ case STATE_MSG_ELEMENT:
+ switch (c) {
+ case CURLY_BRACE_LEFT:
+ ++braceCount;
+ break;
+ case CURLY_BRACE_RIGHT:
+ if (--braceCount == 0) {
+ state = STATE_INITIAL;
+ }
+ break;
+ }
+ break;
+ ///CLOVER:OFF
+ default: // Never happens.
+ break;
+ ///CLOVER:ON
+ }
+ buf.append(c);
+ }
+ // End of scan
+ if (state == STATE_SINGLE_QUOTE || state == STATE_IN_QUOTE) {
+ buf.append(SINGLE_QUOTE);
+ }
+ return new String(buf);
+ }
+
+ private static FieldPosition toJDKFieldPosition(FieldPosition icuPos) {
+ if (icuPos == null) {
+ return null;
+ }
+
+ int fieldID = icuPos.getField();
+ Format.Field fieldAttribute = icuPos.getFieldAttribute();
+
+ FieldPosition jdkPos = null;
+ if (fieldAttribute != null) {
+ // map field
+ if (fieldAttribute.equals(Field.ARGUMENT)) {
+ fieldAttribute = java.text.MessageFormat.Field.ARGUMENT;
+ }
+ jdkPos = new FieldPosition(fieldAttribute, fieldID);
+ } else {
+ jdkPos = new FieldPosition(fieldID);
+ }
+
+ jdkPos.setBeginIndex(icuPos.getBeginIndex());
+ jdkPos.setEndIndex(icuPos.getEndIndex());
+
+ return jdkPos;
+
+ }
+
+ private void wrapNestedFormatters(java.text.MessageFormat mfmt) {
+ // Update nested formatters created by Java MessageFormat
+ // with ICU versions, so FieldPosition / AttributedText will
+ // use ICU formatter's definition, such as com.ibm.icu.text.NumberFormat.INTEGER_FIELD
+
+ // Replacing nested formatter may change the pattern string
+ // originally used. For example, "{0,integer} files" is replaced
+ // with "{0} files". We preserve the original pattern.
+ savedPattern = mfmt.toPattern();
+
+ Format[] subfmts = mfmt.getFormats();
+ for (int i = 0; i < subfmts.length; i++) {
+ if (subfmts[i] instanceof java.text.DateFormat) {
+ subfmts[i] = new DateFormat((java.text.DateFormat)subfmts[i]);
+ } else if (subfmts[i] instanceof java.text.NumberFormat) {
+ subfmts[i] = new NumberFormat((java.text.NumberFormat)subfmts[i]);
+ }
+ }
+ mfmt.setFormats(subfmts);
+ }
+
+ private String savedPattern;
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/NumberFormat.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/NumberFormat.java
new file mode 100644
index 00000000000..9f808836fb3
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/NumberFormat.java
@@ -0,0 +1,1312 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.text;
+
+import java.io.InvalidObjectException;
+import java.math.BigInteger;
+import java.text.FieldPosition;
+import java.text.Format;
+import java.text.ParseException;
+import java.text.ParsePosition;
+import java.util.Locale;
+import java.util.Set;
+
+import com.ibm.icu.util.Currency;
+import com.ibm.icu.util.CurrencyAmount;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * {@icuenhanced java.text.NumberFormat}.{@icu _usage_}
+ *
+ * NumberFormat
is the abstract base class for all number
+ * formats. This class provides the interface for formatting and parsing
+ * numbers. NumberFormat
also provides methods for determining
+ * which locales have number formats, and what their names are.
+ *
+ * NumberFormat
helps you to format and parse numbers for any locale.
+ * Your code can be completely independent of the locale conventions for
+ * decimal points, thousands-separators, or even the particular decimal
+ * digits used, or whether the number format is even decimal.
+ *
+ *
+ * To format a number for the current Locale, use one of the factory
+ * class methods:
+ *
+ *
+ * myString = NumberFormat.getInstance().format(myNumber);
+ *
+ *
+ * If you are formatting multiple numbers, it is
+ * more efficient to get the format and use it multiple times so that
+ * the system doesn't have to fetch the information about the local
+ * language and country conventions multiple times.
+ *
+ *
+ * NumberFormat nf = NumberFormat.getInstance();
+ * for (int i = 0; i < a.length; ++i) {
+ * output.println(nf.format(myNumber[i]) + "; ");
+ * }
+ *
+ *
+ * To format a number for a different Locale, specify it in the
+ * call to getInstance
.
+ *
+ *
+ * NumberFormat nf = NumberFormat.getInstance(Locale.FRENCH);
+ *
+ *
+ * You can also use a NumberFormat
to parse numbers:
+ *
+ *
+ * myNumber = nf.parse(myString);
+ *
+ *
+ * Use getInstance
or getNumberInstance
to get the
+ * normal number format. Use getIntegerInstance
to get an
+ * integer number format. Use getCurrencyInstance
to get the
+ * currency number format. And use getPercentInstance
to get a
+ * format for displaying percentages. With this format, a fraction like
+ * 0.53 is displayed as 53%.
+ *
+ *
+ * Starting from ICU 4.2, you can use getInstance() by passing in a 'style'
+ * as parameter to get the correct instance.
+ * For example,
+ * use getInstance(...NUMBERSTYLE) to get the normal number format,
+ * getInstance(...PERCENTSTYLE) to get a format for displaying percentage,
+ * getInstance(...SCIENTIFICSTYLE) to get a format for displaying scientific number,
+ * getInstance(...INTEGERSTYLE) to get an integer number format,
+ * getInstance(...CURRENCYSTYLE) to get the currency number format,
+ * in which the currency is represented by its symbol, for example, "$3.00".
+ * getInstance(...ISOCURRENCYSTYLE) to get the currency number format,
+ * in which the currency is represented by its ISO code, for example "USD3.00".
+ * getInstance(...PLURALCURRENCYSTYLE) to get the currency number format,
+ * in which the currency is represented by its full name in plural format,
+ * for example, "3.00 US dollars" or "1.00 US dollar".
+ *
+ *
+ *
+ * You can also control the display of numbers with such methods as
+ * setMinimumFractionDigits
.
+ * If you want even more control over the format or parsing,
+ * or want to give your users more control,
+ * you can try casting the NumberFormat
you get from the factory methods
+ * to a DecimalFormat
. This will work for the vast majority
+ * of locales; just remember to put it in a try
block in case you
+ * encounter an unusual one.
+ *
+ *
+ * NumberFormat is designed such that some controls
+ * work for formatting and others work for parsing. The following is
+ * the detailed description for each these control methods,
+ *
+ * setParseIntegerOnly : only affects parsing, e.g.
+ * if true, "3456.78" -> 3456 (and leaves the parse position just after '6')
+ * if false, "3456.78" -> 3456.78 (and leaves the parse position just after '8')
+ * This is independent of formatting. If you want to not show a decimal point
+ * where there might be no digits after the decimal point, use
+ * setDecimalSeparatorAlwaysShown on DecimalFormat.
+ *
+ * You can also use forms of the parse
and format
+ * methods with ParsePosition
and FieldPosition
to
+ * allow you to:
+ *
+ * progressively parse through pieces of a string
+ * align the decimal point and other areas
+ *
+ * For example, you can align numbers in two ways:
+ *
+ * If you are using a monospaced font with spacing for alignment,
+ * you can pass the FieldPosition
in your format call, with
+ * field
= INTEGER_FIELD
. On output,
+ * getEndIndex
will be set to the offset between the
+ * last character of the integer and the decimal. Add
+ * (desiredSpaceCount - getEndIndex) spaces at the front of the string.
+ *
+ * If you are using proportional fonts,
+ * instead of padding with spaces, measure the width
+ * of the string in pixels from the start to getEndIndex
.
+ * Then move the pen by
+ * (desiredPixelWidth - widthToAlignmentPoint) before drawing the text.
+ * It also works where there is no decimal, but possibly additional
+ * characters at the end, e.g., with parentheses in negative
+ * numbers: "(12)" for -12.
+ *
+ *
+ * Synchronization
+ *
+ * Number formats are generally not synchronized. It is recommended to create
+ * separate format instances for each thread. If multiple threads access a format
+ * concurrently, it must be synchronized externally.
+ *
+ *
+ *
DecimalFormat
+ * DecimalFormat is the concrete implementation of NumberFormat, and the
+ * NumberFormat API is essentially an abstraction from DecimalFormat's API.
+ * Refer to DecimalFormat for more information about this API.
+ *
+ * see DecimalFormat
+ * see java.text.ChoiceFormat
+ * @author Mark Davis
+ * @author Helena Shih
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+public class NumberFormat extends Format {
+ private static final long serialVersionUID = 1;
+
+ /**
+ * @internal
+ */
+ public final java.text.NumberFormat numberFormat;
+
+ /**
+ * @internal
+ * @param delegate the NumberFormat to which to delegate
+ */
+ public NumberFormat(java.text.NumberFormat delegate) {
+ this.numberFormat = delegate;
+ }
+
+ /**
+ * {@icu} Constant to specify normal number style of format.
+ * @stable ICU 4.2
+ */
+ public static final int NUMBERSTYLE = 0;
+ /**
+ * {@icu} Constant to specify currency style of format which uses currency symbol
+ * to represent currency, for example: "$3.00".
+ * @stable ICU 4.2
+ */
+ public static final int CURRENCYSTYLE = 1;
+ /**
+ * {@icu} Constant to specify a style of format to display percent.
+ * @stable ICU 4.2
+ */
+ public static final int PERCENTSTYLE = 2;
+ /**
+ * {@icu} Constant to specify a style of format to display scientific number.
+ * @stable ICU 4.2
+ */
+ public static final int SCIENTIFICSTYLE = 3;
+ /**
+ * {@icu} Constant to specify a integer number style format.
+ * @stable ICU 4.2
+ */
+ public static final int INTEGERSTYLE = 4;
+ /**
+ * {@icu} Constant to specify currency style of format which uses currency
+ * ISO code to represent currency, for example: "USD3.00".
+ * @stable ICU 4.2
+ */
+ public static final int ISOCURRENCYSTYLE = 5;
+ /**
+ * {@icu} Constant to specify currency style of format which uses currency
+ * long name with plural format to represent currency, for example,
+ * "3.00 US Dollars".
+ * @stable ICU 4.2
+ */
+ public static final int PLURALCURRENCYSTYLE = 6;
+
+ /**
+ * Field constant used to construct a FieldPosition object. Signifies that
+ * the position of the integer part of a formatted number should be returned.
+ * @see java.text.FieldPosition
+ * @stable ICU 2.0
+ */
+ public static final int INTEGER_FIELD = 0;
+
+ /**
+ * Field constant used to construct a FieldPosition object. Signifies that
+ * the position of the fraction part of a formatted number should be returned.
+ * @see java.text.FieldPosition
+ * @stable ICU 2.0
+ */
+ public static final int FRACTION_FIELD = 1;
+
+ /**
+ * Formats a number and appends the resulting text to the given string buffer.
+ * {@icunote} recognizes BigInteger
+ * and BigDecimal
objects.
+ * @see java.text.Format#format(Object, StringBuffer, FieldPosition)
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(Object number,
+ StringBuffer toAppendTo,
+ FieldPosition pos) {
+ FieldPosition jdkPos = toJDKFieldPosition(pos);
+ StringBuffer buf = numberFormat.format(number, toAppendTo, jdkPos);
+ if (jdkPos != null) {
+ pos.setBeginIndex(jdkPos.getBeginIndex());
+ pos.setEndIndex(jdkPos.getEndIndex());
+ }
+ return buf;
+ }
+
+ /**
+ * Parses text from a string to produce a number.
+ * @param source the String to parse
+ * @param parsePosition the position at which to start the parse
+ * @return the parsed number, or null
+ * @see java.text.NumberFormat#parseObject(String, ParsePosition)
+ * @stable ICU 2.0
+ */
+ public final Object parseObject(String source,
+ ParsePosition parsePosition) {
+ return numberFormat.parse(source, parsePosition);
+ }
+
+ /**
+ * Specialization of format.
+ * @see java.text.Format#format(Object)
+ * @stable ICU 2.0
+ */
+ public final String format(double number) {
+ return numberFormat.format(number);
+ }
+
+ /**
+ * Specialization of format.
+ * @see java.text.Format#format(Object)
+ * @stable ICU 2.0
+ */
+ public final String format(long number) {
+ return numberFormat.format(number);
+ }
+
+ /**
+ * {@icu} Convenience method to format a BigInteger.
+ * @stable ICU 2.0
+ */
+ public final String format(BigInteger number) {
+ return numberFormat.format(number);
+ }
+
+ /**
+ * Convenience method to format a BigDecimal.
+ * @stable ICU 2.0
+ */
+ public final String format(java.math.BigDecimal number) {
+ return numberFormat.format(number);
+ }
+
+ /**
+ * {@icu} Convenience method to format an ICU BigDecimal.
+ * @stable ICU 2.0
+ */
+ public final String format(com.ibm.icu.math.BigDecimal number) {
+ return numberFormat.format(number.toBigDecimal());
+ }
+
+ /**
+ * {@icu} Convenience method to format a CurrencyAmount.
+ * @stable ICU 3.0
+ */
+ public final String format(CurrencyAmount currAmt) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Specialization of format.
+ * @see java.text.Format#format(Object, StringBuffer, FieldPosition)
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(double number,
+ StringBuffer toAppendTo,
+ FieldPosition pos) {
+ FieldPosition jdkPos = toJDKFieldPosition(pos);
+ StringBuffer buf = numberFormat.format(number, toAppendTo, jdkPos);
+ pos.setBeginIndex(jdkPos.getBeginIndex());
+ pos.setEndIndex(jdkPos.getEndIndex());
+ return buf;
+ }
+
+ /**
+ * Specialization of format.
+ * @see java.text.Format#format(Object, StringBuffer, FieldPosition)
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(long number,
+ StringBuffer toAppendTo,
+ FieldPosition pos) {
+ FieldPosition jdkPos = toJDKFieldPosition(pos);
+ StringBuffer buf = numberFormat.format(number, toAppendTo, jdkPos);
+ pos.setBeginIndex(jdkPos.getBeginIndex());
+ pos.setEndIndex(jdkPos.getEndIndex());
+ return buf;
+ }
+ /**
+ * {@icu} Formats a BigInteger. Specialization of format.
+ * @see java.text.Format#format(Object, StringBuffer, FieldPosition)
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(BigInteger number,
+ StringBuffer toAppendTo,
+ FieldPosition pos) {
+ FieldPosition jdkPos = toJDKFieldPosition(pos);
+ StringBuffer buf = numberFormat.format(number, toAppendTo, jdkPos);
+ pos.setBeginIndex(jdkPos.getBeginIndex());
+ pos.setEndIndex(jdkPos.getEndIndex());
+ return buf;
+ }
+ /**
+ * {@icu} Formats a BigDecimal. Specialization of format.
+ * @see java.text.Format#format(Object, StringBuffer, FieldPosition)
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(java.math.BigDecimal number,
+ StringBuffer toAppendTo,
+ FieldPosition pos) {
+ FieldPosition jdkPos = toJDKFieldPosition(pos);
+ StringBuffer buf = numberFormat.format(number, toAppendTo, jdkPos);
+ pos.setBeginIndex(jdkPos.getBeginIndex());
+ pos.setEndIndex(jdkPos.getEndIndex());
+ return buf;
+ }
+ /**
+ * {@icu} Formats an ICU BigDecimal. Specialization of format.
+ * @see java.text.Format#format(Object, StringBuffer, FieldPosition)
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(com.ibm.icu.math.BigDecimal number,
+ StringBuffer toAppendTo,
+ FieldPosition pos) {
+ FieldPosition jdkPos = toJDKFieldPosition(pos);
+ StringBuffer buf = numberFormat.format(number.toBigDecimal(), toAppendTo, jdkPos);
+ pos.setBeginIndex(jdkPos.getBeginIndex());
+ pos.setEndIndex(jdkPos.getEndIndex());
+ return buf;
+ }
+
+ /**
+ * {@icu} Formats a CurrencyAmount. Specialization of format.
+ * @see java.text.Format#format(Object, StringBuffer, FieldPosition)
+ * @stable ICU 3.0
+ */
+ public StringBuffer format(CurrencyAmount currAmt,
+ StringBuffer toAppendTo,
+ FieldPosition pos) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns a Long if possible (e.g., within the range [Long.MIN_VALUE,
+ * Long.MAX_VALUE] and with no decimals), otherwise a Double.
+ * If IntegerOnly is set, will stop at a decimal
+ * point (or equivalent; e.g., for rational numbers "1 2/3", will stop
+ * after the 1).
+ * Does not throw an exception; if no object can be parsed, index is
+ * unchanged!
+ * @see #isParseIntegerOnly
+ * @see java.text.Format#parseObject(String, ParsePosition)
+ * @stable ICU 2.0
+ */
+ public Number parse(String text, ParsePosition parsePosition) {
+ return numberFormat.parse(text, parsePosition);
+ }
+
+ /**
+ * Parses text from the beginning of the given string to produce a number.
+ * The method might not use the entire text of the given string.
+ *
+ * @param text A String whose beginning should be parsed.
+ * @return A Number parsed from the string.
+ * @throws ParseException if the beginning of the specified string
+ * cannot be parsed.
+ * @see #format
+ * @stable ICU 2.0
+ */
+ public Number parse(String text) throws ParseException {
+ return numberFormat.parse(text);
+ }
+
+ /**
+ * Parses text from the given string as a CurrencyAmount. Unlike
+ * the parse() method, this method will attempt to parse a generic
+ * currency name, searching for a match of this object's locale's
+ * currency display names, or for a 3-letter ISO currency code.
+ * This method will fail if this format is not a currency format,
+ * that is, if it does not contain the currency pattern symbol
+ * (U+00A4) in its prefix or suffix.
+ *
+ * @param text the string to parse
+ * @param pos input-output position; on input, the position within
+ * text to match; must have 0 <= pos.getIndex() < text.length();
+ * on output, the position after the last matched character. If
+ * the parse fails, the position in unchanged upon output.
+ * @return a CurrencyAmount, or null upon failure
+ */
+ CurrencyAmount parseCurrency(String text, ParsePosition pos) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns true if this format will parse numbers as integers only.
+ * For example in the English locale, with ParseIntegerOnly true, the
+ * string "1234." would be parsed as the integer value 1234 and parsing
+ * would stop at the "." character. The decimal separator accepted
+ * by the parse operation is locale-dependent and determined by the
+ * subclass.
+ * @return true if this will parse integers only
+ * @stable ICU 2.0
+ */
+ public boolean isParseIntegerOnly() {
+ return numberFormat.isParseIntegerOnly();
+ }
+
+ /**
+ * Sets whether or not numbers should be parsed as integers only.
+ * @param value true if this should parse integers only
+ * @see #isParseIntegerOnly
+ * @stable ICU 2.0
+ */
+ public void setParseIntegerOnly(boolean value) {
+ numberFormat.setParseIntegerOnly(value);
+ }
+
+ /**
+ * {@icu} Sets whether strict parsing is in effect. When this is true, the
+ * following conditions cause a parse failure (examples use the pattern "#,##0.#"):
+ * Leading zeros
+ * '00', '0123' fail the parse, but '0' and '0.001' pass
+ * Leading or doubled grouping separators
+ * ',123' and '1,,234" fail
+ * Groups of incorrect length when grouping is used
+ * '1,23' and '1234,567' fail, but '1234' passes
+ * Grouping separators used in numbers followed by exponents
+ * '1,234E5' fails, but '1234E5' and '1,234E' pass ('E' is not an exponent when
+ * not followed by a number)
+ *
+ * When strict parsing is off, leading zeros and all grouping separators are ignored.
+ * This is the default behavior.
+ * @param value True to enable strict parsing. Default is false.
+ * @see #isParseStrict
+ * @stable ICU 3.6
+ */
+ public void setParseStrict(boolean value) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns whether strict parsing is in effect.
+ * @return true if strict parsing is in effect
+ * @see #setParseStrict
+ * @stable ICU 3.6
+ */
+ public boolean isParseStrict() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ //============== Locale Stuff =====================
+
+ /**
+ * Returns the default number format for the current default locale.
+ * The default format is one of the styles provided by the other
+ * factory methods: getNumberInstance, getIntegerInstance,
+ * getCurrencyInstance or getPercentInstance.
+ * Exactly which one is locale-dependent.
+ * @stable ICU 2.0
+ */
+ //Bug 4408066 [Richard/GCL]
+ public final static NumberFormat getInstance() {
+ return getInstance(ULocale.getDefault(), NUMBERSTYLE);
+ }
+
+ /**
+ * Returns the default number format for the specified locale.
+ * The default format is one of the styles provided by the other
+ * factory methods: getNumberInstance, getCurrencyInstance or getPercentInstance.
+ * Exactly which one is locale-dependent.
+ * @stable ICU 2.0
+ */
+ public static NumberFormat getInstance(Locale inLocale) {
+ return getInstance(ULocale.forLocale(inLocale), NUMBERSTYLE);
+ }
+
+ /**
+ * {@icu} Returns the default number format for the specified locale.
+ * The default format is one of the styles provided by the other
+ * factory methods: getNumberInstance, getCurrencyInstance or getPercentInstance.
+ * Exactly which one is locale-dependent.
+ * @stable ICU 3.2
+ */
+ public static NumberFormat getInstance(ULocale inLocale) {
+ return getInstance(inLocale, NUMBERSTYLE);
+ }
+
+ /**
+ * {@icu} Returns a specific style number format for default locale.
+ * @param style number format style
+ * @stable ICU 4.2
+ */
+ public final static NumberFormat getInstance(int style) {
+ return getInstance(ULocale.getDefault(), style);
+ }
+
+ /**
+ * {@icu} Returns a specific style number format for a specific locale.
+ * @param inLocale the specific locale.
+ * @param style number format style
+ * @stable ICU 4.2
+ */
+ public static NumberFormat getInstance(Locale inLocale, int style) {
+ return getInstance(ULocale.forLocale(inLocale), style);
+ }
+
+
+ /**
+ * Returns a general-purpose number format for the current default locale.
+ * @stable ICU 2.0
+ */
+ public final static NumberFormat getNumberInstance() {
+ return getInstance(ULocale.getDefault(), NUMBERSTYLE);
+ }
+
+ /**
+ * Returns a general-purpose number format for the specified locale.
+ * @stable ICU 2.0
+ */
+ public static NumberFormat getNumberInstance(Locale inLocale) {
+ return getInstance(ULocale.forLocale(inLocale), NUMBERSTYLE);
+ }
+
+ /**
+ * {@icu} Returns a general-purpose number format for the specified locale.
+ * @stable ICU 3.2
+ */
+ public static NumberFormat getNumberInstance(ULocale inLocale) {
+ return getInstance(inLocale, NUMBERSTYLE);
+ }
+
+ /**
+ * Returns an integer number format for the current default locale. The
+ * returned number format is configured to round floating point numbers
+ * to the nearest integer using IEEE half-even rounding (see {@link
+ * com.ibm.icu.math.BigDecimal#ROUND_HALF_EVEN ROUND_HALF_EVEN}) for formatting,
+ * and to parse only the integer part of an input string (see {@link
+ * #isParseIntegerOnly isParseIntegerOnly}).
+ *
+ * @return a number format for integer values
+ * @stable ICU 2.0
+ */
+ //Bug 4408066 [Richard/GCL]
+ public final static NumberFormat getIntegerInstance() {
+ return getInstance(ULocale.getDefault(), INTEGERSTYLE);
+ }
+
+ /**
+ * Returns an integer number format for the specified locale. The
+ * returned number format is configured to round floating point numbers
+ * to the nearest integer using IEEE half-even rounding (see {@link
+ * com.ibm.icu.math.BigDecimal#ROUND_HALF_EVEN ROUND_HALF_EVEN}) for formatting,
+ * and to parse only the integer part of an input string (see {@link
+ * #isParseIntegerOnly isParseIntegerOnly}).
+ *
+ * @param inLocale the locale for which a number format is needed
+ * @return a number format for integer values
+ * @stable ICU 2.0
+ */
+ //Bug 4408066 [Richard/GCL]
+ public static NumberFormat getIntegerInstance(Locale inLocale) {
+ return getInstance(ULocale.forLocale(inLocale), INTEGERSTYLE);
+ }
+
+ /**
+ * {@icu} Returns an integer number format for the specified locale. The
+ * returned number format is configured to round floating point numbers
+ * to the nearest integer using IEEE half-even rounding (see {@link
+ * com.ibm.icu.math.BigDecimal#ROUND_HALF_EVEN ROUND_HALF_EVEN}) for formatting,
+ * and to parse only the integer part of an input string (see {@link
+ * #isParseIntegerOnly isParseIntegerOnly}).
+ *
+ * @param inLocale the locale for which a number format is needed
+ * @return a number format for integer values
+ * @stable ICU 3.2
+ */
+ public static NumberFormat getIntegerInstance(ULocale inLocale) {
+ return getInstance(inLocale, INTEGERSTYLE);
+ }
+
+ /**
+ * Returns a currency format for the current default locale.
+ * @return a number format for currency
+ * @stable ICU 2.0
+ */
+ public final static NumberFormat getCurrencyInstance() {
+ return getInstance(ULocale.getDefault(), CURRENCYSTYLE);
+ }
+
+ /**
+ * Returns a currency format for the specified locale.
+ * @return a number format for currency
+ * @stable ICU 2.0
+ */
+ public static NumberFormat getCurrencyInstance(Locale inLocale) {
+ return getInstance(ULocale.forLocale(inLocale), CURRENCYSTYLE);
+ }
+
+ /**
+ * {@icu} Returns a currency format for the specified locale.
+ * @return a number format for currency
+ * @stable ICU 3.2
+ */
+ public static NumberFormat getCurrencyInstance(ULocale inLocale) {
+ return getInstance(inLocale, CURRENCYSTYLE);
+ }
+
+ /**
+ * Returns a percentage format for the current default locale.
+ * @return a number format for percents
+ * @stable ICU 2.0
+ */
+ public final static NumberFormat getPercentInstance() {
+ return getInstance(ULocale.getDefault(), PERCENTSTYLE);
+ }
+
+ /**
+ * Returns a percentage format for the specified locale.
+ * @return a number format for percents
+ * @stable ICU 2.0
+ */
+ public static NumberFormat getPercentInstance(Locale inLocale) {
+ return getInstance(ULocale.forLocale(inLocale), PERCENTSTYLE);
+ }
+
+ /**
+ * {@icu} Returns a percentage format for the specified locale.
+ * @return a number format for percents
+ * @stable ICU 3.2
+ */
+ public static NumberFormat getPercentInstance(ULocale inLocale) {
+ return getInstance(inLocale, PERCENTSTYLE);
+ }
+
+ /**
+ * {@icu} Returns a scientific format for the current default locale.
+ * @return a scientific number format
+ * @stable ICU 2.0
+ */
+ public final static NumberFormat getScientificInstance() {
+ return getInstance(ULocale.getDefault(), SCIENTIFICSTYLE);
+ }
+
+ /**
+ * {@icu} Returns a scientific format for the specified locale.
+ * @return a scientific number format
+ * @stable ICU 2.0
+ */
+ public static NumberFormat getScientificInstance(Locale inLocale) {
+ return getInstance(ULocale.forLocale(inLocale), SCIENTIFICSTYLE);
+ }
+
+ /**
+ * {@icu} Returns a scientific format for the specified locale.
+ * @return a scientific number format
+ * @stable ICU 3.2
+ */
+ public static NumberFormat getScientificInstance(ULocale inLocale) {
+ return getInstance(inLocale, SCIENTIFICSTYLE);
+ }
+
+ /**
+ * A NumberFormatFactory is used to register new number formats. The factory
+ * should be able to create any of the predefined formats for each locale it
+ * supports. When registered, the locales it supports extend or override the
+ * locales already supported by ICU.
+ *
+ * Note: as of ICU4J 3.2, the default API for NumberFormatFactory uses
+ * ULocale instead of Locale. Instead of overriding createFormat(Locale, int),
+ * new implementations should override createFactory(ULocale, int). Note that
+ * one of these two methods MUST be overridden or else an infinite
+ * loop will occur.
+ *
+ * @stable ICU 2.6
+ */
+ public static abstract class NumberFormatFactory {
+ /**
+ * Value passed to format requesting a default number format.
+ * @stable ICU 2.6
+ */
+ public static final int FORMAT_NUMBER = NUMBERSTYLE;
+
+ /**
+ * Value passed to format requesting a currency format.
+ * @stable ICU 2.6
+ */
+ public static final int FORMAT_CURRENCY = CURRENCYSTYLE;
+
+ /**
+ * Value passed to format requesting a percent format.
+ * @stable ICU 2.6
+ */
+ public static final int FORMAT_PERCENT = PERCENTSTYLE;
+
+ /**
+ * Value passed to format requesting a scientific format.
+ * @stable ICU 2.6
+ */
+ public static final int FORMAT_SCIENTIFIC = SCIENTIFICSTYLE;
+
+ /**
+ * Value passed to format requesting an integer format.
+ * @stable ICU 2.6
+ */
+ public static final int FORMAT_INTEGER = INTEGERSTYLE;
+
+ /**
+ * Returns true if this factory is visible. Default is true.
+ * If not visible, the locales supported by this factory will not
+ * be listed by getAvailableLocales. This value must not change.
+ * @return true if the factory is visible.
+ * @stable ICU 2.6
+ */
+ public boolean visible() {
+ return true;
+ }
+
+ /**
+ * Returns an immutable collection of the locale names directly
+ * supported by this factory.
+ * @return the supported locale names.
+ * @stable ICU 2.6
+ */
+ public abstract Set getSupportedLocaleNames();
+
+ /**
+ * Returns a number format of the appropriate type. If the locale
+ * is not supported, return null. If the locale is supported, but
+ * the type is not provided by this service, return null. Otherwise
+ * return an appropriate instance of NumberFormat.
+ * Note: as of ICU4J 3.2, implementations should override
+ * this method instead of createFormat(Locale, int).
+ * @param loc the locale for which to create the format
+ * @param formatType the type of format
+ * @return the NumberFormat, or null.
+ * @stable ICU 3.2
+ */
+ public NumberFormat createFormat(ULocale loc, int formatType) {
+ return createFormat(loc.toLocale(), formatType);
+ }
+
+ /**
+ * Returns a number format of the appropriate type. If the locale
+ * is not supported, return null. If the locale is supported, but
+ * the type is not provided by this service, return null. Otherwise
+ * return an appropriate instance of NumberFormat.
+ * Note: as of ICU4J 3.2, createFormat(ULocale, int) should be
+ * overridden instead of this method. This method is no longer
+ * abstract and delegates to that method.
+ * @param loc the locale for which to create the format
+ * @param formatType the type of format
+ * @return the NumberFormat, or null.
+ * @stable ICU 2.6
+ */
+ public NumberFormat createFormat(Locale loc, int formatType) {
+ return createFormat(ULocale.forLocale(loc), formatType);
+ }
+
+ /**
+ * @stable ICU 2.6
+ */
+ protected NumberFormatFactory() {
+ }
+ }
+
+ /**
+ * Returns the list of Locales for which NumberFormats are available.
+ * @return the available locales
+ * @stable ICU 2.0
+ */
+ public static Locale[] getAvailableLocales() {
+ return java.text.NumberFormat.getAvailableLocales();
+ }
+
+ /**
+ * {@icu} Returns the list of Locales for which NumberFormats are available.
+ * @return the available locales
+ * @draft ICU 3.2 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static ULocale[] getAvailableULocales() {
+ if (availableULocales == null) {
+ synchronized (NumberFormat.class) {
+ if (availableULocales == null) {
+ Locale[] locales = java.text.NumberFormat.getAvailableLocales();
+ ULocale[] ulocales = new ULocale[locales.length];
+ for (int i = 0; i < locales.length; ++i) {
+ ulocales[i] = ULocale.forLocale(locales[i]);
+ }
+ availableULocales = ulocales;
+ }
+ }
+ }
+ return (ULocale[])availableULocales.clone();
+ }
+ private static volatile ULocale[] availableULocales;
+
+ /**
+ * {@icu} Registers a new NumberFormatFactory. The factory is adopted by
+ * the service and must not be modified. The returned object is a
+ * key that can be used to unregister this factory.
+ * @param factory the factory to register
+ * @return a key with which to unregister the factory
+ * @stable ICU 2.6
+ */
+ public static Object registerFactory(NumberFormatFactory factory) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Unregisters the factory or instance associated with this key (obtained from
+ * registerInstance or registerFactory).
+ * @param registryKey a key obtained from registerFactory
+ * @return true if the object was successfully unregistered
+ * @stable ICU 2.6
+ */
+ public static boolean unregister(Object registryKey) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Overrides hashCode.
+ * @stable ICU 2.0
+ */
+ public int hashCode() {
+ return numberFormat.hashCode();
+ }
+
+ /**
+ * Overrides equals.
+ * Two NumberFormats are equal if they are of the same class
+ * and the settings (groupingUsed, parseIntegerOnly, maximumIntegerDigits, etc.
+ * are equal.
+ * @param obj the object to compare against
+ * @return true if the object is equal to this.
+ * @stable ICU 2.0
+ */
+ public boolean equals(Object obj) {
+ try {
+ return numberFormat.equals(((NumberFormat)obj).numberFormat);
+ }
+ catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Overrides clone.
+ * @stable ICU 2.0
+ */
+ public Object clone() {
+ return new NumberFormat((java.text.NumberFormat)numberFormat.clone());
+ }
+
+ /**
+ * Returns true if grouping is used in this format. For example, in the
+ * en_US locale, with grouping on, the number 1234567 will be formatted
+ * as "1,234,567". The grouping separator as well as the size of each group
+ * is locale-dependent and is determined by subclasses of NumberFormat.
+ * Grouping affects both parsing and formatting.
+ * @return true if grouping is used
+ * @see #setGroupingUsed
+ * @stable ICU 2.0
+ */
+ public boolean isGroupingUsed() {
+ return numberFormat.isGroupingUsed();
+ }
+
+ /**
+ * Sets whether or not grouping will be used in this format. Grouping
+ * affects both parsing and formatting.
+ * @see #isGroupingUsed
+ * @param newValue true to use grouping.
+ * @stable ICU 2.0
+ */
+ public void setGroupingUsed(boolean newValue) {
+ numberFormat.setGroupingUsed(newValue);
+ }
+
+ /**
+ * Returns the maximum number of digits allowed in the integer portion of a
+ * number. The default value is 40, which subclasses can override.
+ * When formatting, the exact behavior when this value is exceeded is
+ * subclass-specific. When parsing, this has no effect.
+ * @return the maximum number of integer digits
+ * @see #setMaximumIntegerDigits
+ * @stable ICU 2.0
+ */
+ public int getMaximumIntegerDigits() {
+ return numberFormat.getMaximumIntegerDigits();
+ }
+
+ /**
+ * Sets the maximum number of digits allowed in the integer portion of a
+ * number. This must be >= minimumIntegerDigits. If the
+ * new value for maximumIntegerDigits is less than the current value
+ * of minimumIntegerDigits, then minimumIntegerDigits will also be set to
+ * the new value.
+ * @param newValue the maximum number of integer digits to be shown; if
+ * less than zero, then zero is used. Subclasses might enforce an
+ * upper limit to this value appropriate to the numeric type being formatted.
+ * @see #getMaximumIntegerDigits
+ * @stable ICU 2.0
+ */
+ public void setMaximumIntegerDigits(int newValue) {
+ numberFormat.setMaximumIntegerDigits(newValue);
+ }
+
+ /**
+ * Returns the minimum number of digits allowed in the integer portion of a
+ * number. The default value is 1, which subclasses can override.
+ * When formatting, if this value is not reached, numbers are padded on the
+ * left with the locale-specific '0' character to ensure at least this
+ * number of integer digits. When parsing, this has no effect.
+ * @return the minimum number of integer digits
+ * @see #setMinimumIntegerDigits
+ * @stable ICU 2.0
+ */
+ public int getMinimumIntegerDigits() {
+ return numberFormat.getMinimumIntegerDigits();
+ }
+
+ /**
+ * Sets the minimum number of digits allowed in the integer portion of a
+ * number. This must be <= maximumIntegerDigits. If the
+ * new value for minimumIntegerDigits is more than the current value
+ * of maximumIntegerDigits, then maximumIntegerDigits will also be set to
+ * the new value.
+ * @param newValue the minimum number of integer digits to be shown; if
+ * less than zero, then zero is used. Subclasses might enforce an
+ * upper limit to this value appropriate to the numeric type being formatted.
+ * @see #getMinimumIntegerDigits
+ * @stable ICU 2.0
+ */
+ public void setMinimumIntegerDigits(int newValue) {
+ numberFormat.setMinimumIntegerDigits(newValue);
+ }
+
+ /**
+ * Returns the maximum number of digits allowed in the fraction
+ * portion of a number. The default value is 3, which subclasses
+ * can override. When formatting, the exact behavior when this
+ * value is exceeded is subclass-specific. When parsing, this has
+ * no effect.
+ * @return the maximum number of fraction digits
+ * @see #setMaximumFractionDigits
+ * @stable ICU 2.0
+ */
+ public int getMaximumFractionDigits() {
+ return numberFormat.getMaximumFractionDigits();
+ }
+
+ /**
+ * Sets the maximum number of digits allowed in the fraction portion of a
+ * number. This must be >= minimumFractionDigits. If the
+ * new value for maximumFractionDigits is less than the current value
+ * of minimumFractionDigits, then minimumFractionDigits will also be set to
+ * the new value.
+ * @param newValue the maximum number of fraction digits to be shown; if
+ * less than zero, then zero is used. The concrete subclass may enforce an
+ * upper limit to this value appropriate to the numeric type being formatted.
+ * @see #getMaximumFractionDigits
+ * @stable ICU 2.0
+ */
+ public void setMaximumFractionDigits(int newValue) {
+ numberFormat.setMaximumFractionDigits(newValue);
+ }
+
+ /**
+ * Returns the minimum number of digits allowed in the fraction portion of a
+ * number. The default value is 0, which subclasses can override.
+ * When formatting, if this value is not reached, numbers are padded on
+ * the right with the locale-specific '0' character to ensure at least
+ * this number of fraction digits. When parsing, this has no effect.
+ * @return the minimum number of fraction digits
+ * @see #setMinimumFractionDigits
+ * @stable ICU 2.0
+ */
+ public int getMinimumFractionDigits() {
+ return numberFormat.getMinimumFractionDigits();
+ }
+
+ /**
+ * Sets the minimum number of digits allowed in the fraction portion of a
+ * number. This must be <= maximumFractionDigits. If the
+ * new value for minimumFractionDigits exceeds the current value
+ * of maximumFractionDigits, then maximumFractionDigits will also be set to
+ * the new value.
+ * @param newValue the minimum number of fraction digits to be shown; if
+ * less than zero, then zero is used. Subclasses might enforce an
+ * upper limit to this value appropriate to the numeric type being formatted.
+ * @see #getMinimumFractionDigits
+ * @stable ICU 2.0
+ */
+ public void setMinimumFractionDigits(int newValue) {
+ numberFormat.setMinimumFractionDigits(newValue);
+ }
+
+ /**
+ * Sets the Currency object used to display currency
+ * amounts. This takes effect immediately, if this format is a
+ * currency format. If this format is not a currency format, then
+ * the currency object is used if and when this object becomes a
+ * currency format.
+ * @param theCurrency new currency object to use. May be null for
+ * some subclasses.
+ * @stable ICU 2.6
+ */
+ public void setCurrency(Currency theCurrency) {
+ numberFormat.setCurrency(theCurrency.currency);
+ }
+
+ /**
+ * Returns the Currency object used to display currency
+ * amounts. This may be null.
+ * @stable ICU 2.6
+ */
+ public Currency getCurrency() {
+ return new Currency(numberFormat.getCurrency());
+ }
+
+ /**
+ * Returns the rounding mode used in this NumberFormat. The default implementation of
+ * tis method in NumberFormat always throws UnsupportedOperationException
.
+ * @return A rounding mode, between BigDecimal.ROUND_UP
+ * and BigDecimal.ROUND_UNNECESSARY
.
+ * @see #setRoundingMode(int)
+ * @stable ICU 4.0
+ */
+ public int getRoundingMode() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Set the rounding mode used in this NumberFormat. The default implementation of
+ * tis method in NumberFormat always throws UnsupportedOperationException
.
+ * @param roundingMode A rounding mode, between
+ * BigDecimal.ROUND_UP
and
+ * BigDecimal.ROUND_UNNECESSARY
.
+ * @see #getRoundingMode()
+ * @stable ICU 4.0
+ */
+ public void setRoundingMode(int roundingMode) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+
+ /**
+ * Returns a specific style number format for a specific locale.
+ * @param desiredLocale the specific locale.
+ * @param choice number format style
+ * @throws IllegalArgumentException if choice is not one of
+ * NUMBERSTYLE, CURRENCYSTYLE,
+ * PERCENTSTYLE, SCIENTIFICSTYLE,
+ * INTEGERSTYLE,
+ * ISOCURRENCYSTYLE, PLURALCURRENCYSTYLE,
+ * @stable ICU 4.2
+ */
+ public static NumberFormat getInstance(ULocale desiredLocale, int choice) {
+ Locale locale = desiredLocale.toLocale();
+ java.text.NumberFormat nf = null;
+ switch (choice) {
+ case NUMBERSTYLE:
+ nf = java.text.NumberFormat.getInstance(locale);
+ break;
+ case INTEGERSTYLE:
+ nf = java.text.NumberFormat.getIntegerInstance(locale);
+ break;
+ case CURRENCYSTYLE:
+ nf = java.text.NumberFormat.getCurrencyInstance(locale);
+ break;
+ case PERCENTSTYLE:
+ nf = java.text.NumberFormat.getPercentInstance(locale);
+ break;
+ case SCIENTIFICSTYLE:
+ nf = new java.text.DecimalFormat("#E0",
+ new java.text.DecimalFormatSymbols(locale));
+ nf.setMaximumFractionDigits(10);
+ break;
+ }
+ return new NumberFormat(nf);
+ }
+
+ /**
+ * Empty constructor. Public for compatibily with JDK which lets the
+ * compiler generate a default public constructor even though this is
+ * an abstract class.
+ * @stable ICU 2.6
+ */
+ public NumberFormat() {
+ this(java.text.NumberFormat.getInstance());
+ }
+
+ /**
+ * The instances of this inner class are used as attribute keys and values
+ * in AttributedCharacterIterator that
+ * NumberFormat.formatToCharacterIterator() method returns.
+ *
+ * There is no public constructor to this class, the only instances are the
+ * constants defined here.
+ *
+ * @stable ICU 3.6
+ */
+ public static class Field extends Format.Field {
+ // generated by serialver from JDK 1.4.1_01
+ static final long serialVersionUID = -4516273749929385842L;
+
+ /**
+ * @stable ICU 3.6
+ */
+ public static final Field SIGN = new Field("sign");
+
+ /**
+ * @stable ICU 3.6
+ */
+ public static final Field INTEGER = new Field("integer");
+
+ /**
+ * @stable ICU 3.6
+ */
+ public static final Field FRACTION = new Field("fraction");
+
+ /**
+ * @stable ICU 3.6
+ */
+ public static final Field EXPONENT = new Field("exponent");
+
+ /**
+ * @stable ICU 3.6
+ */
+ public static final Field EXPONENT_SIGN = new Field("exponent sign");
+
+ /**
+ * @stable ICU 3.6
+ */
+ public static final Field EXPONENT_SYMBOL = new Field("exponent symbol");
+
+ /**
+ * @stable ICU 3.6
+ */
+ public static final Field DECIMAL_SEPARATOR = new Field("decimal separator");
+ /**
+ * @stable ICU 3.6
+ */
+ public static final Field GROUPING_SEPARATOR = new Field("grouping separator");
+
+ /**
+ * @stable ICU 3.6
+ */
+ public static final Field PERCENT = new Field("percent");
+
+ /**
+ * @stable ICU 3.6
+ */
+ public static final Field PERMILLE = new Field("per mille");
+
+ /**
+ * @stable ICU 3.6
+ */
+ public static final Field CURRENCY = new Field("currency");
+
+ /**
+ * Constructs a new instance of NumberFormat.Field with the given field
+ * name.
+ * @stable ICU 3.6
+ */
+ protected Field(String fieldName) {
+ super(fieldName);
+ }
+
+ /**
+ * serizalization method resolve instances to the constant
+ * NumberFormat.Field values
+ * @stable ICU 3.6
+ */
+ protected Object readResolve() throws InvalidObjectException {
+ if (this.getName().equals(INTEGER.getName()))
+ return INTEGER;
+ if (this.getName().equals(FRACTION.getName()))
+ return FRACTION;
+ if (this.getName().equals(EXPONENT.getName()))
+ return EXPONENT;
+ if (this.getName().equals(EXPONENT_SIGN.getName()))
+ return EXPONENT_SIGN;
+ if (this.getName().equals(EXPONENT_SYMBOL.getName()))
+ return EXPONENT_SYMBOL;
+ if (this.getName().equals(CURRENCY.getName()))
+ return CURRENCY;
+ if (this.getName().equals(DECIMAL_SEPARATOR.getName()))
+ return DECIMAL_SEPARATOR;
+ if (this.getName().equals(GROUPING_SEPARATOR.getName()))
+ return GROUPING_SEPARATOR;
+ if (this.getName().equals(PERCENT.getName()))
+ return PERCENT;
+ if (this.getName().equals(PERMILLE.getName()))
+ return PERMILLE;
+ if (this.getName().equals(SIGN.getName()))
+ return SIGN;
+
+ throw new InvalidObjectException("An invalid object.");
+ }
+ }
+
+ private static FieldPosition toJDKFieldPosition(FieldPosition icuPos) {
+ if (icuPos == null) {
+ return null;
+ }
+
+ int fieldID = icuPos.getField();
+ Format.Field fieldAttribute = icuPos.getFieldAttribute();
+
+ FieldPosition jdkPos = null;
+
+ if (fieldID >= 0) {
+ if (fieldID == FRACTION_FIELD) {
+ fieldID = java.text.NumberFormat.FRACTION_FIELD;
+ } else if (fieldID == INTEGER_FIELD) {
+ fieldID = java.text.NumberFormat.INTEGER_FIELD;
+ }
+ }
+
+ if (fieldAttribute != null) {
+ // map field
+ if (fieldAttribute.equals(Field.CURRENCY)) {
+ fieldAttribute = java.text.NumberFormat.Field.CURRENCY;
+ } else if (fieldAttribute.equals(Field.DECIMAL_SEPARATOR)) {
+ fieldAttribute = java.text.NumberFormat.Field.DECIMAL_SEPARATOR;
+ } else if (fieldAttribute.equals(Field.EXPONENT)) {
+ fieldAttribute = java.text.NumberFormat.Field.EXPONENT;
+ } else if (fieldAttribute.equals(Field.EXPONENT_SIGN)) {
+ fieldAttribute = java.text.NumberFormat.Field.EXPONENT_SIGN;
+ } else if (fieldAttribute.equals(Field.EXPONENT_SYMBOL)) {
+ fieldAttribute = java.text.NumberFormat.Field.EXPONENT_SYMBOL;
+ } else if (fieldAttribute.equals(Field.FRACTION)) {
+ fieldAttribute = java.text.NumberFormat.Field.FRACTION;
+ } else if (fieldAttribute.equals(Field.GROUPING_SEPARATOR)) {
+ fieldAttribute = java.text.NumberFormat.Field.GROUPING_SEPARATOR;
+ } else if (fieldAttribute.equals(Field.INTEGER)) {
+ fieldAttribute = java.text.NumberFormat.Field.INTEGER;
+ } else if (fieldAttribute.equals(Field.PERCENT)) {
+ fieldAttribute = java.text.NumberFormat.Field.PERCENT;
+ } else if (fieldAttribute.equals(Field.PERMILLE)) {
+ fieldAttribute = java.text.NumberFormat.Field.PERMILLE;
+ } else if (fieldAttribute.equals(Field.SIGN)) {
+ fieldAttribute = java.text.NumberFormat.Field.SIGN;
+ }
+
+ jdkPos = new FieldPosition(fieldAttribute, fieldID);
+ } else {
+ jdkPos = new FieldPosition(fieldID);
+ }
+
+ jdkPos.setBeginIndex(icuPos.getBeginIndex());
+ jdkPos.setEndIndex(icuPos.getEndIndex());
+
+ return jdkPos;
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/RawCollationKey.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/RawCollationKey.java
new file mode 100644
index 00000000000..45316dc2179
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/RawCollationKey.java
@@ -0,0 +1,14 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+/*
+ * Empty stub
+ */
+public class RawCollationKey {
+ private RawCollationKey() {}
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/SimpleDateFormat.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/SimpleDateFormat.java
new file mode 100644
index 00000000000..e8e5785fc90
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/SimpleDateFormat.java
@@ -0,0 +1,514 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.text;
+
+import java.text.AttributedCharacterIterator;
+import java.text.AttributedCharacterIterator.Attribute;
+import java.text.AttributedString;
+import java.text.CharacterIterator;
+import java.text.FieldPosition;
+import java.text.ParsePosition;
+import java.util.Date;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import com.ibm.icu.util.Calendar;
+import com.ibm.icu.util.ULocale;
+
+
+/**
+ * {@icuenhanced java.text.SimpleDateFormat}.{@icu _usage_}
+ *
+ *
SimpleDateFormat
is a concrete class for formatting and
+ * parsing dates in a locale-sensitive manner. It allows for formatting
+ * (date -> text), parsing (text -> date), and normalization.
+ *
+ *
+ * SimpleDateFormat
allows you to start by choosing
+ * any user-defined patterns for date-time formatting. However, you
+ * are encouraged to create a date-time formatter with either
+ * getTimeInstance
, getDateInstance
, or
+ * getDateTimeInstance
in DateFormat
. Each
+ * of these class methods can return a date/time formatter initialized
+ * with a default format pattern. You may modify the format pattern
+ * using the applyPattern
methods as desired.
+ * For more information on using these methods, see
+ * {@link DateFormat}.
+ *
+ *
+ * Time Format Syntax:
+ *
+ * To specify the time format use a time pattern string.
+ * In this pattern, all ASCII letters are reserved as pattern letters,
+ * which are defined as the following:
+ *
+ *
+ * Symbol Meaning Presentation Example
+ * ------ ------- ------------ -------
+ * G era designator (Text) AD
+ * y† year (Number) 1996
+ * Y* year (week of year) (Number) 1997
+ * u* extended year (Number) 4601
+ * M month in year (Text & Number) July & 07
+ * d day in month (Number) 10
+ * h hour in am/pm (1~12) (Number) 12
+ * H hour in day (0~23) (Number) 0
+ * m minute in hour (Number) 30
+ * s second in minute (Number) 55
+ * S fractional second (Number) 978
+ * E day of week (Text) Tuesday
+ * e* day of week (local 1~7) (Text & Number) Tuesday & 2
+ * D day in year (Number) 189
+ * F day of week in month (Number) 2 (2nd Wed in July)
+ * w week in year (Number) 27
+ * W week in month (Number) 2
+ * a am/pm marker (Text) PM
+ * k hour in day (1~24) (Number) 24
+ * K hour in am/pm (0~11) (Number) 0
+ * z time zone (Text) Pacific Standard Time
+ * Z time zone (RFC 822) (Number) -0800
+ * v time zone (generic) (Text) Pacific Time
+ * V time zone (location) (Text) United States (Los Angeles)
+ * g* Julian day (Number) 2451334
+ * A* milliseconds in day (Number) 69540000
+ * Q* quarter in year (Text & Number) Q1 & 01
+ * c* stand alone day of week (Text & Number) Tuesday & 2
+ * L* stand alone month (Text & Number) July & 07
+ * q* stand alone quarter (Text & Number) Q1 & 01
+ * ' escape for text (Delimiter) 'Date='
+ * '' single quote (Literal) 'o''clock'
+ *
+ *
+ * * These items are not supported by Java's SimpleDateFormat.
+ * † ICU interprets a single 'y' differently than Java.
+ *
+ * The count of pattern letters determine the format.
+ *
+ * (Text) : 4 or more pattern letters--use full form,
+ * < 4--use short or abbreviated form if one exists.
+ *
+ * (Number) : the minimum number of digits. Shorter
+ * numbers are zero-padded to this amount. Year is handled specially;
+ * that is, if the count of 'y' is 2, the Year will be truncated to 2 digits.
+ * (e.g., if "yyyy" produces "1997", "yy" produces "97".)
+ * Unlike other fields, fractional seconds are padded on the right with zero.
+ *
+ * (Text & Number) : 3 or over, use text, otherwise use number.
+ *
+ * Any characters in the pattern that are not in the ranges of ['a'..'z']
+ * and ['A'..'Z'] will be treated as quoted text. For instance, characters
+ * like ':', '.', ' ', '#' and '@' will appear in the resulting time text
+ * even they are not embraced within single quotes.
+ *
+ * A pattern containing any invalid pattern letter will result in a thrown
+ * exception during formatting or parsing.
+ *
+ *
+ * Examples Using the US Locale:
+ *
+ *
+ * Format Pattern Result
+ * -------------- -------
+ * "yyyy.MM.dd G 'at' HH:mm:ss vvvv" ->> 1996.07.10 AD at 15:08:56 Pacific Time
+ * "EEE, MMM d, ''yy" ->> Wed, July 10, '96
+ * "h:mm a" ->> 12:08 PM
+ * "hh 'o''clock' a, zzzz" ->> 12 o'clock PM, Pacific Daylight Time
+ * "K:mm a, vvv" ->> 0:00 PM, PT
+ * "yyyyy.MMMMM.dd GGG hh:mm aaa" ->> 01996.July.10 AD 12:08 PM
+ *
+ *
+ * Code Sample:
+ *
+ *
+ * SimpleTimeZone pdt = new SimpleTimeZone(-8 * 60 * 60 * 1000, "PST");
+ * pdt.setStartRule(Calendar.APRIL, 1, Calendar.SUNDAY, 2*60*60*1000);
+ * pdt.setEndRule(Calendar.OCTOBER, -1, Calendar.SUNDAY, 2*60*60*1000);
+ *
+ * // Format the current time.
+ * SimpleDateFormat formatter
+ * = new SimpleDateFormat ("yyyy.MM.dd G 'at' hh:mm:ss a zzz");
+ * Date currentTime_1 = new Date();
+ * String dateString = formatter.format(currentTime_1);
+ *
+ * // Parse the previous string back into a Date.
+ * ParsePosition pos = new ParsePosition(0);
+ * Date currentTime_2 = formatter.parse(dateString, pos);
+ *
+ *
+ * In the example, the time value currentTime_2
obtained from
+ * parsing will be equal to currentTime_1
. However, they may not be
+ * equal if the am/pm marker 'a' is left out from the format pattern while
+ * the "hour in am/pm" pattern symbol is used. This information loss can
+ * happen when formatting the time in PM.
+ *
+ * When parsing a date string using the abbreviated year pattern ("yy"),
+ * SimpleDateFormat must interpret the abbreviated year
+ * relative to some century. It does this by adjusting dates to be
+ * within 80 years before and 20 years after the time the SimpleDateFormat
+ * instance is created. For example, using a pattern of "MM/dd/yy" and a
+ * SimpleDateFormat instance created on Jan 1, 1997, the string
+ * "01/11/12" would be interpreted as Jan 11, 2012 while the string "05/04/64"
+ * would be interpreted as May 4, 1964.
+ * During parsing, only strings consisting of exactly two digits, as defined by
+ * {@link com.ibm.icu.lang.UCharacter#isDigit(int)}, will be parsed into the default
+ * century.
+ * Any other numeric string, such as a one digit string, a three or more digit
+ * string, or a two digit string that isn't all digits (for example, "-1"), is
+ * interpreted literally. So "01/02/3" or "01/02/003" are parsed, using the
+ * same pattern, as Jan 2, 3 AD. Likewise, "01/02/-3" is parsed as Jan 2, 4 BC.
+ *
+ *
If the year pattern does not have exactly two 'y' characters, the year is
+ * interpreted literally, regardless of the number of digits. So using the
+ * pattern "MM/dd/yyyy", "01/11/12" parses to Jan 11, 12 A.D.
+ *
+ *
When numeric fields abut one another directly, with no intervening delimiter
+ * characters, they constitute a run of abutting numeric fields. Such runs are
+ * parsed specially. For example, the format "HHmmss" parses the input text
+ * "123456" to 12:34:56, parses the input text "12345" to 1:23:45, and fails to
+ * parse "1234". In other words, the leftmost field of the run is flexible,
+ * while the others keep a fixed width. If the parse fails anywhere in the run,
+ * then the leftmost field is shortened by one character, and the entire run is
+ * parsed again. This is repeated until either the parse succeeds or the
+ * leftmost field is one character in length. If the parse still fails at that
+ * point, the parse of the run fails.
+ *
+ *
For time zones that have no names, use strings GMT+hours:minutes or
+ * GMT-hours:minutes.
+ *
+ *
The calendar defines what is the first day of the week, the first week
+ * of the year, whether hours are zero based or not (0 vs 12 or 24), and the
+ * time zone. There is one common decimal format to handle all the numbers;
+ * the digit count is handled programmatically according to the pattern.
+ *
+ *
Synchronization
+ *
+ * Date formats are not synchronized. It is recommended to create separate
+ * format instances for each thread. If multiple threads access a format
+ * concurrently, it must be synchronized externally.
+ *
+ * @see com.ibm.icu.util.Calendar
+ * @see com.ibm.icu.util.GregorianCalendar
+ * @see com.ibm.icu.util.TimeZone
+ * @see DateFormat
+ * @see DateFormatSymbols
+ * @see DecimalFormat
+ * @author Mark Davis, Chen-Lieh Huang, Alan Liu
+ * @stable ICU 2.0
+ */
+public class SimpleDateFormat extends DateFormat {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Constructs a SimpleDateFormat using the default pattern for the default
+ * locale. Note: Not all locales support SimpleDateFormat; for full
+ * generality, use the factory methods in the DateFormat class.
+ *
+ * @see DateFormat
+ * @stable ICU 2.0
+ */
+ public SimpleDateFormat() {
+ super(new java.text.SimpleDateFormat());
+ }
+
+ /**
+ * Constructs a SimpleDateFormat using the given pattern in the default
+ * locale. Note: Not all locales support SimpleDateFormat; for full
+ * generality, use the factory methods in the DateFormat class.
+ * @stable ICU 2.0
+ */
+ public SimpleDateFormat(String pattern)
+ {
+ super(new java.text.SimpleDateFormat(pattern));
+ }
+
+ /**
+ * Constructs a SimpleDateFormat using the given pattern and locale.
+ * Note: Not all locales support SimpleDateFormat; for full
+ * generality, use the factory methods in the DateFormat class.
+ * @stable ICU 2.0
+ */
+ public SimpleDateFormat(String pattern, Locale loc)
+ {
+ super(new java.text.SimpleDateFormat(pattern, loc));
+ }
+
+ /**
+ * Constructs a SimpleDateFormat using the given pattern and locale.
+ * Note: Not all locales support SimpleDateFormat; for full
+ * generality, use the factory methods in the DateFormat class.
+ * @stable ICU 3.2
+ */
+ public SimpleDateFormat(String pattern, ULocale loc)
+ {
+ this(pattern, loc.toLocale());
+ }
+
+ /**
+ * Constructs a SimpleDateFormat using the given pattern , override and locale.
+ * @param pattern The pattern to be used
+ * @param override The override string. A numbering system override string can take one of the following forms:
+ * 1). If just a numbering system name is specified, it applies to all numeric fields in the date format pattern.
+ * 2). To specify an alternate numbering system on a field by field basis, use the field letters from the pattern
+ * followed by an = sign, followed by the numbering system name. For example, to specify that just the year
+ * be formatted using Hebrew digits, use the override "y=hebr". Multiple overrides can be specified in a single
+ * string by separating them with a semi-colon. For example, the override string "m=thai;y=deva" would format using
+ * Thai digits for the month and Devanagari digits for the year.
+ * @param loc The locale to be used
+ * @stable ICU 4.2
+ */
+ public SimpleDateFormat(String pattern, String override, ULocale loc)
+ {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Constructs a SimpleDateFormat using the given pattern and
+ * locale-specific symbol data.
+ * Warning: uses default locale for digits!
+ * @stable ICU 2.0
+ */
+ public SimpleDateFormat(String pattern, DateFormatSymbols formatData)
+ {
+ super(new java.text.SimpleDateFormat(pattern, formatData.dfs));
+ }
+
+ /**
+ * Sets the 100-year period 2-digit years will be interpreted as being in
+ * to begin on the date the user specifies.
+ * @param startDate During parsing, two digit years will be placed in the range
+ * startDate
to startDate + 100 years
.
+ * @stable ICU 2.0
+ */
+ public void set2DigitYearStart(Date startDate) {
+ ((java.text.SimpleDateFormat)dateFormat).set2DigitYearStart(startDate);
+ }
+
+ /**
+ * Returns the beginning date of the 100-year period 2-digit years are interpreted
+ * as being within.
+ * @return the start of the 100-year period into which two digit years are
+ * parsed
+ * @stable ICU 2.0
+ */
+ public Date get2DigitYearStart() {
+ return ((java.text.SimpleDateFormat)dateFormat).get2DigitYearStart();
+ }
+
+ /**
+ * Formats a date or time, which is the standard millis
+ * since January 1, 1970, 00:00:00 GMT.
+ * Example: using the US locale:
+ * "yyyy.MM.dd G 'at' HH:mm:ss zzz" ->> 1996.07.10 AD at 15:08:56 PDT
+ * @param cal the calendar whose date-time value is to be formatted into a date-time string
+ * @param toAppendTo where the new date-time text is to be appended
+ * @param pos the formatting position. On input: an alignment field,
+ * if desired. On output: the offsets of the alignment field.
+ * @return the formatted date-time string.
+ * @see DateFormat
+ * @stable ICU 2.0
+ */
+ public StringBuffer format(Calendar cal, StringBuffer toAppendTo,
+ FieldPosition pos) {
+ StringBuffer result;
+ FieldPosition jdkPos = toJDKFieldPosition(pos);
+ synchronized(dateFormat) {
+ java.util.Calendar oldCal = dateFormat.getCalendar();
+ dateFormat.setCalendar(cal.calendar);
+ result = dateFormat.format(cal.getTime(), toAppendTo, jdkPos);
+ dateFormat.setCalendar(oldCal);
+ }
+ if (jdkPos != null) {
+ pos.setBeginIndex(jdkPos.getBeginIndex());
+ pos.setEndIndex(jdkPos.getEndIndex());
+ }
+ return result;
+ }
+
+ /**
+ * Overrides superclass method
+ * @stable ICU 2.0
+ */
+ public void setNumberFormat(NumberFormat newNumberFormat) {
+ super.setNumberFormat(newNumberFormat);
+ }
+
+ /**
+ * Overrides DateFormat
+ * @see DateFormat
+ * @stable ICU 2.0
+ */
+ public void parse(String text, Calendar cal, ParsePosition parsePos)
+ {
+ // Note: parsed time zone won't be set in the result calendar
+ cal.setTime(dateFormat.parse(text, parsePos));
+ }
+
+ /**
+ * Return a pattern string describing this date format.
+ * @stable ICU 2.0
+ */
+ public String toPattern() {
+ return ((java.text.SimpleDateFormat)dateFormat).toPattern();
+ }
+
+ /**
+ * Return a localized pattern string describing this date format.
+ * @stable ICU 2.0
+ */
+ public String toLocalizedPattern() {
+ return ((java.text.SimpleDateFormat)dateFormat).toLocalizedPattern();
+ }
+
+ /**
+ * Apply the given unlocalized pattern string to this date format.
+ * @stable ICU 2.0
+ */
+ public void applyPattern(String pat) {
+ ((java.text.SimpleDateFormat)dateFormat).applyPattern(pat);
+ }
+
+ /**
+ * Apply the given localized pattern string to this date format.
+ * @stable ICU 2.0
+ */
+ public void applyLocalizedPattern(String pat) {
+ ((java.text.SimpleDateFormat)dateFormat).applyLocalizedPattern(pat);
+ }
+
+ /**
+ * Gets the date/time formatting data.
+ * @return a copy of the date-time formatting data associated
+ * with this date-time formatter.
+ * @stable ICU 2.0
+ */
+ public DateFormatSymbols getDateFormatSymbols() {
+ return new DateFormatSymbols(((java.text.SimpleDateFormat)dateFormat).getDateFormatSymbols());
+ }
+
+ /**
+ * Allows you to set the date/time formatting data.
+ * @param newFormatSymbols the new symbols
+ * @stable ICU 2.0
+ */
+ public void setDateFormatSymbols(DateFormatSymbols newFormatSymbols) {
+ ((java.text.SimpleDateFormat)dateFormat).setDateFormatSymbols(newFormatSymbols.dfs);
+ }
+
+ // For clone to use
+ private SimpleDateFormat(java.text.SimpleDateFormat sdf) {
+ super(sdf);
+ }
+
+ /**
+ * Overrides Cloneable
+ * @stable ICU 2.0
+ */
+ public Object clone() {
+ return new SimpleDateFormat((java.text.SimpleDateFormat)dateFormat.clone());
+ }
+
+ /**
+ * Override hashCode.
+ * Generates the hash code for the SimpleDateFormat object
+ * @stable ICU 2.0
+ */
+ public int hashCode()
+ {
+ return super.hashCode();
+ }
+
+ /**
+ * Override equals.
+ * @stable ICU 2.0
+ */
+ public boolean equals(Object obj)
+ {
+ return super.equals(obj);
+ }
+
+ /**
+ * Format the object to an attributed string, and return the corresponding iterator
+ * Overrides superclass method.
+ *
+ * @param obj The object to format
+ * @return AttributedCharacterIterator
describing the formatted value.
+ *
+ * @stable ICU 3.8
+ */
+ public AttributedCharacterIterator formatToCharacterIterator(Object obj) {
+ AttributedCharacterIterator it = dateFormat.formatToCharacterIterator(obj);
+
+ // Extract formatted String first
+ StringBuilder sb = new StringBuilder();
+ for (char c = it.first(); c != CharacterIterator.DONE; c = it.next()) {
+ sb.append(c);
+ }
+
+ // Create AttributedString
+ AttributedString attrstr = new AttributedString(sb.toString());
+
+ // Map JDK Field to ICU Field
+ int idx = 0;
+ it.first();
+ while (idx < it.getEndIndex()) {
+ int end = it.getRunLimit();
+ Map attributes = it.getAttributes();
+ if (attributes != null) {
+ for (Entry entry : attributes.entrySet()) {
+ Attribute attr = entry.getKey();
+ Object val = entry.getValue();
+ if (attr.equals(java.text.DateFormat.Field.AM_PM)) {
+ val = attr = Field.AM_PM;
+ } else if (attr.equals(java.text.DateFormat.Field.DAY_OF_MONTH)) {
+ val = attr = Field.DAY_OF_MONTH;
+ } else if (attr.equals(java.text.DateFormat.Field.DAY_OF_WEEK)) {
+ val = attr = Field.DAY_OF_WEEK ;
+ } else if (attr.equals(java.text.DateFormat.Field.DAY_OF_WEEK_IN_MONTH)) {
+ val = attr = Field.DAY_OF_WEEK_IN_MONTH ;
+ } else if (attr.equals(java.text.DateFormat.Field.DAY_OF_YEAR)) {
+ val = attr = Field.DAY_OF_YEAR;
+ } else if (attr.equals(java.text.DateFormat.Field.ERA)) {
+ val = attr = Field.ERA;
+ } else if (attr.equals(java.text.DateFormat.Field.HOUR_OF_DAY0)) {
+ val = attr = Field.HOUR_OF_DAY0;
+ } else if (attr.equals(java.text.DateFormat.Field.HOUR_OF_DAY1)) {
+ val = attr = Field.HOUR_OF_DAY1;
+ } else if (attr.equals(java.text.DateFormat.Field.HOUR0)) {
+ val = attr = Field.HOUR0;
+ } else if (attr.equals(java.text.DateFormat.Field.HOUR1)) {
+ val = attr = Field.HOUR1;
+ } else if (attr.equals(java.text.DateFormat.Field.MILLISECOND)) {
+ val = attr = Field.MILLISECOND;
+ } else if (attr.equals(java.text.DateFormat.Field.MINUTE)) {
+ val = attr = Field.MINUTE;
+ } else if (attr.equals(java.text.DateFormat.Field.MONTH)) {
+ val = attr = Field.MONTH;
+ } else if (attr.equals(java.text.DateFormat.Field.SECOND)) {
+ val = attr = Field.SECOND;
+ } else if (attr.equals(java.text.DateFormat.Field.TIME_ZONE)) {
+ val = attr = Field.TIME_ZONE;
+ } else if (attr.equals(java.text.DateFormat.Field.WEEK_OF_MONTH)) {
+ val = attr = Field.WEEK_OF_MONTH;
+ } else if (attr.equals(java.text.DateFormat.Field.WEEK_OF_YEAR)) {
+ val = attr = Field.WEEK_OF_YEAR;
+ } else if (attr.equals(java.text.DateFormat.Field.YEAR)) {
+ val = attr = Field.YEAR;
+ }
+ attrstr.addAttribute(attr, val, idx, end);
+ }
+ }
+ idx = end;
+ while (it.getIndex() < idx) {
+ it.next();
+ }
+ }
+
+ return attrstr.getIterator();
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/UFormat.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/UFormat.java
new file mode 100644
index 00000000000..52194dbb5c0
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/UFormat.java
@@ -0,0 +1,80 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2003-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+import java.text.Format;
+
+import com.ibm.icu.util.ULocale;
+
+/**
+ * An abstract class that extends {@link java.text.Format} to provide
+ * additional ICU protocol, specifically, the getLocale()
+ * API. All ICU format classes are subclasses of this class.
+ *
+ * @see com.ibm.icu.util.ULocale
+ * @author weiv
+ * @author Alan Liu
+ * @draft ICU 2.8 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+public abstract class UFormat extends Format {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * @draft ICU 2.8 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public UFormat() {}
+
+ /**
+ * Return the locale that was used to create this object, or null.
+ * This may may differ from the locale requested at the time of
+ * this object's creation. For example, if an object is created
+ * for locale en_US_CALIFORNIA , the actual data may be
+ * drawn from en (the actual locale), and
+ * en_US may be the most specific locale that exists (the
+ * valid locale).
+ *
+ * Note: This method will be implemented in ICU 3.0; ICU 2.8
+ * contains a partial preview implementation. The actual
+ * locale is returned correctly, but the valid locale is
+ * not, in most cases.
+ * @param type type of information requested, either {@link
+ * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
+ * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
+ * @return the information specified by type , or null if
+ * this object was not constructed from locale data.
+ * @see com.ibm.icu.util.ULocale
+ * @see com.ibm.icu.util.ULocale#VALID_LOCALE
+ * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
+ * @draft ICU 2.8 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final ULocale getLocale(ULocale.Type type) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Set information about the locales that were used to create this
+ * object. If the object was not constructed from locale data,
+ * both arguments should be set to null. Otherwise, neither
+ * should be null. The actual locale must be at the same level or
+ * less specific than the valid locale. This method is intended
+ * for use by factories or other entities that create objects of
+ * this class.
+ * @param valid the most specific locale containing any resource
+ * data, or null
+ * @param actual the locale containing data used to construct this
+ * object, or null
+ * @see com.ibm.icu.util.ULocale
+ * @see com.ibm.icu.util.ULocale#VALID_LOCALE
+ * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
+ */
+ final void setLocale(ULocale valid, ULocale actual) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/UnicodeSet.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/UnicodeSet.java
new file mode 100644
index 00000000000..e99df5f828d
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/text/UnicodeSet.java
@@ -0,0 +1,14 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+/*
+ * Empty stub
+ */
+public class UnicodeSet {
+ private UnicodeSet() {}
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/Calendar.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/Calendar.java
new file mode 100644
index 00000000000..b09abaef91b
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/Calendar.java
@@ -0,0 +1,2345 @@
+/*
+* Copyright (C) 1996-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*/
+
+package com.ibm.icu.util;
+
+import java.io.Serializable;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.Locale;
+
+import com.ibm.icu.text.DateFormat;
+
+/**
+ * {@icuenhanced java.util.Calendar}.{@icu _usage_}
+ *
+ *
Calendar
is an abstract base class for converting between
+ * a Date
object and a set of integer fields such as
+ * YEAR
, MONTH
, DAY
, HOUR
,
+ * and so on. (A Date
object represents a specific instant in
+ * time with millisecond precision. See
+ * {@link Date}
+ * for information about the Date
class.)
+ *
+ *
Subclasses of Calendar
interpret a Date
+ * according to the rules of a specific calendar system. ICU4J contains
+ * several subclasses implementing different international calendar systems.
+ *
+ *
+ * Like other locale-sensitive classes, Calendar
provides a
+ * class method, getInstance
, for getting a generally useful
+ * object of this type. Calendar
's getInstance
method
+ * returns a calendar of a type appropriate to the locale, whose
+ * time fields have been initialized with the current date and time:
+ *
+ * Calendar rightNow = Calendar.getInstance()
+ *
+ *
+ * When a ULocale
is used by getInstance
, its
+ * 'calendar
' tag and value are retrieved if present. If a recognized
+ * value is supplied, a calendar is provided and configured as appropriate.
+ * Currently recognized tags are "buddhist", "chinese", "coptic", "ethiopic",
+ * "gregorian", "hebrew", "islamic", "islamic-civil", "japanese", and "roc". For
+ * example:
+ * Calendar cal = Calendar.getInstance(new ULocale("en_US@calendar=japanese"));
+ * will return an instance of JapaneseCalendar (using en_US conventions for
+ * minimum days in first week, start day of week, et cetera).
+ *
+ * A Calendar
object can produce all the time field values
+ * needed to implement the date-time formatting for a particular language and
+ * calendar style (for example, Japanese-Gregorian, Japanese-Traditional).
+ * Calendar
defines the range of values returned by certain fields,
+ * as well as their meaning. For example, the first month of the year has value
+ * MONTH
== JANUARY
for all calendars. Other values
+ * are defined by the concrete subclass, such as ERA
and
+ * YEAR
. See individual field documentation and subclass
+ * documentation for details.
+ *
+ *
When a Calendar
is lenient , it accepts a wider range
+ * of field values than it produces. For example, a lenient
+ * GregorianCalendar
interprets MONTH
==
+ * JANUARY
, DAY_OF_MONTH
== 32 as February 1. A
+ * non-lenient GregorianCalendar
throws an exception when given
+ * out-of-range field settings. When calendars recompute field values for
+ * return by get()
, they normalize them. For example, a
+ * GregorianCalendar
always produces DAY_OF_MONTH
+ * values between 1 and the length of the month.
+ *
+ *
Calendar
defines a locale-specific seven day week using two
+ * parameters: the first day of the week and the minimal days in first week
+ * (from 1 to 7). These numbers are taken from the locale resource data when a
+ * Calendar
is constructed. They may also be specified explicitly
+ * through the API.
+ *
+ *
When setting or getting the WEEK_OF_MONTH
or
+ * WEEK_OF_YEAR
fields, Calendar
must determine the
+ * first week of the month or year as a reference point. The first week of a
+ * month or year is defined as the earliest seven day period beginning on
+ * getFirstDayOfWeek()
and containing at least
+ * getMinimalDaysInFirstWeek()
days of that month or year. Weeks
+ * numbered ..., -1, 0 precede the first week; weeks numbered 2, 3,... follow
+ * it. Note that the normalized numbering returned by get()
may be
+ * different. For example, a specific Calendar
subclass may
+ * designate the week before week 1 of a year as week n of the previous
+ * year.
+ *
+ *
When computing a Date
from time fields, two special
+ * circumstances may arise: there may be insufficient information to compute the
+ * Date
(such as only year and month but no day in the month), or
+ * there may be inconsistent information (such as "Tuesday, July 15, 1996" --
+ * July 15, 1996 is actually a Monday).
+ *
+ *
Insufficient information. The calendar will use default
+ * information to specify the missing fields. This may vary by calendar; for
+ * the Gregorian calendar, the default for a field is the same as that of the
+ * start of the epoch: i.e., YEAR = 1970, MONTH = JANUARY, DATE = 1, etc.
+ *
+ *
Inconsistent information. If fields conflict, the calendar
+ * will give preference to fields set more recently. For example, when
+ * determining the day, the calendar will look for one of the following
+ * combinations of fields. The most recent combination, as determined by the
+ * most recently set single field, will be used.
+ *
+ *
+ *
+ * MONTH + DAY_OF_MONTH
+ * MONTH + WEEK_OF_MONTH + DAY_OF_WEEK
+ * MONTH + DAY_OF_WEEK_IN_MONTH + DAY_OF_WEEK
+ * DAY_OF_YEAR
+ * DAY_OF_WEEK + WEEK_OF_YEAR
+ *
+ *
+ * For the time of day:
+ *
+ *
+ *
+ * HOUR_OF_DAY
+ * AM_PM + HOUR
+ *
+ *
+ * Note: for some non-Gregorian calendars, different
+ * fields may be necessary for complete disambiguation. For example, a full
+ * specification of the historial Arabic astronomical calendar requires year,
+ * month, day-of-month and day-of-week in some cases.
+ *
+ *
Note: There are certain possible ambiguities in
+ * interpretation of certain singular times, which are resolved in the
+ * following ways:
+ *
+ * 24:00:00 "belongs" to the following day. That is,
+ * 23:59 on Dec 31, 1969 < 24:00 on Jan 1, 1970 < 24:01:00 on Jan 1, 1970
+ *
+ * Although historically not precise, midnight also belongs to "am",
+ * and noon belongs to "pm", so on the same day,
+ * 12:00 am (midnight) < 12:01 am, and 12:00 pm (noon) < 12:01 pm
+ *
+ *
+ * The date or time format strings are not part of the definition of a
+ * calendar, as those must be modifiable or overridable by the user at
+ * runtime. Use {@link DateFormat}
+ * to format dates.
+ *
+ *
Field manipulation methods
+ *
+ * Calendar
fields can be changed using three methods:
+ * set()
, add()
, and roll()
.
+ *
+ * set(f, value)
changes field
+ * f
to value
. In addition, it sets an
+ * internal member variable to indicate that field f
has
+ * been changed. Although field f
is changed immediately,
+ * the calendar's milliseconds is not recomputed until the next call to
+ * get()
, getTime()
, or
+ * getTimeInMillis()
is made. Thus, multiple calls to
+ * set()
do not trigger multiple, unnecessary
+ * computations. As a result of changing a field using
+ * set()
, other fields may also change, depending on the
+ * field, the field value, and the calendar system. In addition,
+ * get(f)
will not necessarily return value
+ * after the fields have been recomputed. The specifics are determined by
+ * the concrete calendar class.
+ *
+ * Example : Consider a GregorianCalendar
+ * originally set to August 31, 1999. Calling set(Calendar.MONTH,
+ * Calendar.SEPTEMBER)
sets the calendar to September 31,
+ * 1999. This is a temporary internal representation that resolves to
+ * October 1, 1999 if getTime()
is then called. However, a
+ * call to set(Calendar.DAY_OF_MONTH, 30)
before the call to
+ * getTime()
sets the calendar to September 30, 1999, since
+ * no recomputation occurs after set()
itself.
+ *
+ * add(f, delta)
adds delta
+ * to field f
. This is equivalent to calling set(f,
+ * get(f) + delta)
with two adjustments:
+ *
+ *
+ * Add rule 1 . The value of field f
+ * after the call minus the value of field f
before the
+ * call is delta
, modulo any overflow that has occurred in
+ * field f
. Overflow occurs when a field value exceeds its
+ * range and, as a result, the next larger field is incremented or
+ * decremented and the field value is adjusted back into its range.
+ *
+ * Add rule 2 . If a smaller field is expected to be
+ * invariant, but it is impossible for it to be equal to its
+ * prior value because of changes in its minimum or maximum after field
+ * f
is changed, then its value is adjusted to be as close
+ * as possible to its expected value. A smaller field represents a
+ * smaller unit of time. HOUR
is a smaller field than
+ * DAY_OF_MONTH
. No adjustment is made to smaller fields
+ * that are not expected to be invariant. The calendar system
+ * determines what fields are expected to be invariant.
+ *
+ *
+ * In addition, unlike set()
, add()
forces
+ * an immediate recomputation of the calendar's milliseconds and all
+ * fields.
+ *
+ * Example : Consider a GregorianCalendar
+ * originally set to August 31, 1999. Calling add(Calendar.MONTH,
+ * 13)
sets the calendar to September 30, 2000. Add rule
+ * 1 sets the MONTH
field to September, since
+ * adding 13 months to August gives September of the next year. Since
+ * DAY_OF_MONTH
cannot be 31 in September in a
+ * GregorianCalendar
, add rule 2 sets the
+ * DAY_OF_MONTH
to 30, the closest possible value. Although
+ * it is a smaller field, DAY_OF_WEEK
is not adjusted by
+ * rule 2, since it is expected to change when the month changes in a
+ * GregorianCalendar
.
+ *
+ * roll(f, delta)
adds
+ * delta
to field f
without changing larger
+ * fields. This is equivalent to calling add(f, delta)
with
+ * the following adjustment:
+ *
+ *
+ * Roll rule . Larger fields are unchanged after the
+ * call. A larger field represents a larger unit of
+ * time. DAY_OF_MONTH
is a larger field than
+ * HOUR
.
+ *
+ *
+ * Example : Consider a GregorianCalendar
+ * originally set to August 31, 1999. Calling roll(Calendar.MONTH,
+ * 8)
sets the calendar to April 30, 1999 . Add
+ * rule 1 sets the MONTH
field to April. Using a
+ * GregorianCalendar
, the DAY_OF_MONTH
cannot
+ * be 31 in the month April. Add rule 2 sets it to the closest possible
+ * value, 30. Finally, the roll rule maintains the
+ * YEAR
field value of 1999.
+ *
+ * Example : Consider a GregorianCalendar
+ * originally set to Sunday June 6, 1999. Calling
+ * roll(Calendar.WEEK_OF_MONTH, -1)
sets the calendar to
+ * Tuesday June 1, 1999, whereas calling
+ * add(Calendar.WEEK_OF_MONTH, -1)
sets the calendar to
+ * Sunday May 30, 1999. This is because the roll rule imposes an
+ * additional constraint: The MONTH
must not change when the
+ * WEEK_OF_MONTH
is rolled. Taken together with add rule 1,
+ * the resultant date must be between Tuesday June 1 and Saturday June
+ * 5. According to add rule 2, the DAY_OF_WEEK
, an invariant
+ * when changing the WEEK_OF_MONTH
, is set to Tuesday, the
+ * closest possible value to Sunday (where Sunday is the first day of the
+ * week).
+ *
+ * Usage model . To motivate the behavior of
+ * add()
and roll()
, consider a user interface
+ * component with increment and decrement buttons for the month, day, and
+ * year, and an underlying GregorianCalendar
. If the
+ * interface reads January 31, 1999 and the user presses the month
+ * increment button, what should it read? If the underlying
+ * implementation uses set()
, it might read March 3, 1999. A
+ * better result would be February 28, 1999. Furthermore, if the user
+ * presses the month increment button again, it should read March 31,
+ * 1999, not March 28, 1999. By saving the original date and using either
+ * add()
or roll()
, depending on whether larger
+ * fields should be affected, the user interface can behave as most users
+ * will intuitively expect.
+ *
+ * Note: You should always use {@link #roll roll} and {@link #add add} rather
+ * than attempting to perform arithmetic operations directly on the fields
+ * of a Calendar . It is quite possible for Calendar subclasses
+ * to have fields with non-linear behavior, for example missing months
+ * or days during non-leap years. The subclasses' add and roll
+ * methods will take this into account, while simple arithmetic manipulations
+ * may give invalid results.
+ *
+ *
Calendar Architecture in ICU4J
+ *
+ * Recently the implementation of Calendar
has changed
+ * significantly in order to better support subclassing. The original
+ * Calendar
class was designed to support subclassing, but
+ * it had only one implemented subclass, GregorianCalendar
.
+ * With the implementation of several new calendar subclasses, including
+ * the BuddhistCalendar
, ChineseCalendar
,
+ * HebrewCalendar
, IslamicCalendar
, and
+ * JapaneseCalendar
, the subclassing API has been reworked
+ * thoroughly. This section details the new subclassing API and other
+ * ways in which com.ibm.icu.util.Calendar
differs from
+ * java.util.Calendar
.
+ *
+ *
+ * Changes
+ *
+ * Overview of changes between the classic Calendar
+ * architecture and the new architecture.
+ *
+ *
+ *
+ * The fields[]
array is private
now
+ * instead of protected
. Subclasses must access it
+ * using the methods {@link #internalSet} and
+ * {@link #internalGet}. Motivation: Subclasses should
+ * not directly access data members.
+ *
+ * The time
long word is private
now
+ * instead of protected
. Subclasses may access it using
+ * the method {@link #internalGetTimeInMillis}, which does not
+ * provoke an update. Motivation: Subclasses should not
+ * directly access data members.
+ *
+ * The scope of responsibility of subclasses has been drastically
+ * reduced. As much functionality as possible is implemented in the
+ * Calendar
base class. As a result, it is much easier
+ * to subclass Calendar
. Motivation: Subclasses
+ * should not have to reimplement common code. Certain behaviors are
+ * common across calendar systems: The definition and behavior of
+ * week-related fields and time fields, the arithmetic
+ * ({@link #add(int, int) add} and {@link #roll(int, int) roll}) behavior of many
+ * fields, and the field validation system.
+ *
+ * The subclassing API has been completely redesigned.
+ *
+ * The Calendar
base class contains some Gregorian
+ * calendar algorithmic support that subclasses can use (specifically
+ * in {@link #handleComputeFields}). Subclasses can use the
+ * methods getGregorianXxx()
to obtain precomputed
+ * values. Motivation: This is required by all
+ * Calendar
subclasses in order to implement consistent
+ * time zone behavior, and Gregorian-derived systems can use the
+ * already computed data.
+ *
+ * The FIELD_COUNT
constant has been removed. Use
+ * {@link #getFieldCount}. In addition, framework API has been
+ * added to allow subclasses to define additional fields.
+ * Motivation: The number of fields is not constant across
+ * calendar systems.
+ *
+ * The range of handled dates has been narrowed from +/-
+ * ~300,000,000 years to +/- ~5,000,000 years. In practical terms
+ * this should not affect clients. However, it does mean that client
+ * code cannot be guaranteed well-behaved results with dates such as
+ * Date(Long.MIN_VALUE)
or
+ * Date(Long.MAX_VALUE)
. Instead, the
+ * Calendar
protected constants should be used.
+ * Motivation: With
+ * the addition of the {@link #JULIAN_DAY} field, Julian day
+ * numbers must be restricted to a 32-bit int
. This
+ * restricts the overall supported range. Furthermore, restricting
+ * the supported range simplifies the computations by removing
+ * special case code that was used to accomodate arithmetic overflow
+ * at millis near Long.MIN_VALUE
and
+ * Long.MAX_VALUE
.
+ *
+ * New fields are implemented: {@link #JULIAN_DAY} defines
+ * single-field specification of the
+ * date. {@link #MILLISECONDS_IN_DAY} defines a single-field
+ * specification of the wall time. {@link #DOW_LOCAL} and
+ * {@link #YEAR_WOY} implement localized day-of-week and
+ * week-of-year behavior.
+ *
+ * Subclasses can access protected millisecond constants
+ * defined in Calendar
.
+ *
+ * New API has been added to support calendar-specific subclasses
+ * of DateFormat
.
+ *
+ * Several subclasses have been implemented, representing
+ * various international calendar systems.
+ *
+ *
+ *
+ * Subclass API
+ *
+ * The original Calendar
API was based on the experience
+ * of implementing a only a single subclass,
+ * GregorianCalendar
. As a result, all of the subclassing
+ * kinks had not been worked out. The new subclassing API has been
+ * refined based on several implemented subclasses. This includes methods
+ * that must be overridden and methods for subclasses to call. Subclasses
+ * no longer have direct access to fields
and
+ * stamp
. Instead, they have new API to access
+ * these. Subclasses are able to allocate the fields
array
+ * through a protected framework method; this allows subclasses to
+ * specify additional fields.
+ *
+ * More functionality has been moved into the base class. The base
+ * class now contains much of the computational machinery to support the
+ * Gregorian calendar. This is based on two things: (1) Many calendars
+ * are based on the Gregorian calendar (such as the Buddhist and Japanese
+ * imperial calendars). (2) All calendars require basic
+ * Gregorian support in order to handle timezone computations.
+ *
+ * Common computations have been moved into
+ * Calendar
. Subclasses no longer compute the week related
+ * fields and the time related fields. These are commonly handled for all
+ * calendars by the base class.
+ *
+ * Subclass computation of time => fields
+ *
+ *
The {@link #ERA}, {@link #YEAR},
+ * {@link #EXTENDED_YEAR}, {@link #MONTH},
+ * {@link #DAY_OF_MONTH}, and {@link #DAY_OF_YEAR} fields are
+ * computed by the subclass, based on the Julian day. All other fields
+ * are computed by Calendar
.
+ *
+ *
+ *
+ * Subclasses should implement {@link #handleComputeFields}
+ * to compute the {@link #ERA}, {@link #YEAR},
+ * {@link #EXTENDED_YEAR}, {@link #MONTH},
+ * {@link #DAY_OF_MONTH}, and {@link #DAY_OF_YEAR} fields,
+ * based on the value of the {@link #JULIAN_DAY} field. If there
+ * are calendar-specific fields not defined by Calendar
,
+ * they must also be computed. These are the only fields that the
+ * subclass should compute. All other fields are computed by the base
+ * class, so time and week fields behave in a consistent way across
+ * all calendars. The default version of this method in
+ * Calendar
implements a proleptic Gregorian
+ * calendar. Within this method, subclasses may call
+ * getGregorianXxx()
to obtain the Gregorian calendar
+ * month, day of month, and extended year for the given date.
+ *
+ *
+ *
+ * Subclass computation of fields => time
+ *
+ *
The interpretation of most field values is handled entirely by
+ * Calendar
. Calendar
determines which fields
+ * are set, which are not, which are set more recently, and so on. In
+ * addition, Calendar
handles the computation of the time
+ * from the time fields and handles the week-related fields. The only
+ * thing the subclass must do is determine the extended year, based on
+ * the year fields, and then, given an extended year and a month, it must
+ * return a Julian day number.
+ *
+ *
+ *
+ * Subclasses should implement {@link #handleGetExtendedYear}
+ * to return the extended year for this calendar system, based on the
+ * {@link #YEAR}, {@link #EXTENDED_YEAR}, and any fields that
+ * the calendar system uses that are larger than a year, such as
+ * {@link #ERA}.
+ *
+ * Subclasses should implement {@link #handleComputeMonthStart}
+ * to return the Julian day number
+ * associated with a month and extended year. This is the Julian day
+ * number of the day before the first day of the month. The month
+ * number is zero-based. This computation should not depend on any
+ * field values.
+ *
+ *
+ *
+ * Other methods
+ *
+ *
+ *
+ * Subclasses should implement {@link #handleGetMonthLength}
+ * to return the number of days in a
+ * given month of a given extended year. The month number, as always,
+ * is zero-based.
+ *
+ * Subclasses should implement {@link #handleGetYearLength}
+ * to return the number of days in the given
+ * extended year. This method is used by
+ * computeWeekFields to compute the
+ * {@link #WEEK_OF_YEAR} and {@link #YEAR_WOY} fields.
+ *
+ * Subclasses should implement {@link #handleGetLimit}
+ * to return the protected values of a field, depending on the value of
+ * limitType
. This method only needs to handle the
+ * fields {@link #ERA}, {@link #YEAR}, {@link #MONTH},
+ * {@link #WEEK_OF_YEAR}, {@link #WEEK_OF_MONTH},
+ * {@link #DAY_OF_MONTH}, {@link #DAY_OF_YEAR},
+ * {@link #DAY_OF_WEEK_IN_MONTH}, {@link #YEAR_WOY}, and
+ * {@link #EXTENDED_YEAR}. Other fields are invariant (with
+ * respect to calendar system) and are handled by the base
+ * class.
+ *
+ * Optionally, subclasses may override {@link #validateField}
+ * to check any subclass-specific fields. If the
+ * field's value is out of range, the method should throw an
+ * IllegalArgumentException
. The method may call
+ * super.validateField(field)
to handle fields in a
+ * generic way, that is, to compare them to the range
+ * getMinimum(field)
..getMaximum(field)
.
+ *
+ * Optionally, subclasses may override
+ * {@link #handleCreateFields} to create an int[]
+ * array large enough to hold the calendar's fields. This is only
+ * necessary if the calendar defines additional fields beyond those
+ * defined by Calendar
. The length of the result must be
+ * be between the base and maximum field counts.
+ *
+ * Optionally, subclasses may override
+ * {@link #handleGetDateFormat} to create a
+ * DateFormat
appropriate to this calendar. This is only
+ * required if a calendar subclass redefines the use of a field (for
+ * example, changes the {@link #ERA} field from a symbolic field
+ * to a numeric one) or defines an additional field.
+ *
+ * Optionally, subclasses may override {@link #roll roll} and
+ * {@link #add add} to handle fields that are discontinuous. For
+ * example, in the Hebrew calendar the month "Adar I" only
+ * occurs in leap years; in other years the calendar jumps from
+ * Shevat (month #4) to Adar (month #6). The {@link
+ * HebrewCalendar#add HebrewCalendar.add} and {@link
+ * HebrewCalendar#roll HebrewCalendar.roll} methods take this into
+ * account, so that adding 1 month to Shevat gives the proper result
+ * (Adar) in a non-leap year. The protected utility method {@link
+ * #pinField pinField} is often useful when implementing these two
+ * methods.
+ *
+ *
+ *
+ * Normalized behavior
+ *
+ *
The behavior of certain fields has been made consistent across all
+ * calendar systems and implemented in Calendar
.
+ *
+ *
+ *
+ * Time is normalized. Even though some calendar systems transition
+ * between days at sunset or at other times, all ICU4J calendars
+ * transition between days at local zone midnight . This
+ * allows ICU4J to centralize the time computations in
+ * Calendar
and to maintain basic correpsondences
+ * between calendar systems. Affected fields: {@link #AM_PM},
+ * {@link #HOUR}, {@link #HOUR_OF_DAY}, {@link #MINUTE},
+ * {@link #SECOND}, {@link #MILLISECOND},
+ * {@link #ZONE_OFFSET}, and {@link #DST_OFFSET}.
+ *
+ * DST behavior is normalized. Daylight savings time behavior is
+ * computed the same for all calendar systems, and depends on the
+ * value of several GregorianCalendar
fields: the
+ * {@link #YEAR}, {@link #MONTH}, and
+ * {@link #DAY_OF_MONTH}. As a result, Calendar
+ * always computes these fields, even for non-Gregorian calendar
+ * systems. These fields are available to subclasses.
+ *
+ * Weeks are normalized. Although locales define the week
+ * differently, in terms of the day on which it starts, and the
+ * designation of week number one of a month or year, they all use a
+ * common mechanism. Furthermore, the day of the week has a simple
+ * and consistent definition throughout history. For example,
+ * although the Gregorian calendar introduced a discontinuity when
+ * first instituted, the day of week was not disrupted. For this
+ * reason, the fields {@link #DAY_OF_WEEK}, WEEK_OF_YEAR,
+ * WEEK_OF_MONTH
, {@link #DAY_OF_WEEK_IN_MONTH},
+ * {@link #DOW_LOCAL}, {@link #YEAR_WOY} are all computed in
+ * a consistent way in the base class, based on the
+ * {@link #EXTENDED_YEAR}, {@link #DAY_OF_YEAR},
+ * {@link #MONTH}, and {@link #DAY_OF_MONTH}, which are
+ * computed by the subclass.
+ *
+ *
+ *
+ * Supported range
+ *
+ *
The allowable range of Calendar
has been
+ * narrowed. GregorianCalendar
used to attempt to support
+ * the range of dates with millisecond values from
+ * Long.MIN_VALUE
to Long.MAX_VALUE
. This
+ * introduced awkward constructions (hacks) which slowed down
+ * performance. It also introduced non-uniform behavior at the
+ * boundaries. The new Calendar
protocol specifies the
+ * maximum range of supportable dates as those having Julian day numbers
+ * of -0x7F000000
to +0x7F000000
. This
+ * corresponds to years from ~5,000,000 BCE to ~5,000,000 CE. Programmers
+ * should use the protected constants in Calendar
to
+ * specify an extremely early or extremely late date.
+ *
+ * General notes
+ *
+ *
+ *
+ * Calendars implementations are proleptic . For example,
+ * even though the Gregorian calendar was not instituted until the
+ * 16th century, the GregorianCalendar
class supports
+ * dates before the historical onset of the calendar by extending the
+ * calendar system backward in time. Similarly, the
+ * HebrewCalendar
extends backward before the start of
+ * its epoch into zero and negative years. Subclasses do not throw
+ * exceptions because a date precedes the historical start of a
+ * calendar system. Instead, they implement
+ * {@link #handleGetLimit} to return appropriate limits on
+ * {@link #YEAR}, {@link #ERA}, etc. fields. Then, if the
+ * calendar is set to not be lenient, out-of-range field values will
+ * trigger an exception.
+ *
+ * Calendar system subclasses compute a extended
+ * year . This differs from the {@link #YEAR} field in that
+ * it ranges over all integer values, including zero and negative
+ * values, and it encapsulates the information of the
+ * {@link #YEAR} field and all larger fields. Thus, for the
+ * Gregorian calendar, the {@link #EXTENDED_YEAR} is computed as
+ * ERA==AD ? YEAR : 1-YEAR
. Another example is the Mayan
+ * long count, which has years (KUN
) and nested cycles
+ * of years (KATUN
and BAKTUN
). The Mayan
+ * {@link #EXTENDED_YEAR} is computed as TUN + 20 * (KATUN
+ * + 20 * BAKTUN)
. The Calendar
base class uses
+ * the {@link #EXTENDED_YEAR} field to compute the week-related
+ * fields.
+ *
+ *
+ *
+ * @see Date
+ * @see GregorianCalendar
+ * @see TimeZone
+ * @see DateFormat
+ * @author Mark Davis, David Goldsmith, Chen-Lieh Huang, Alan Liu, Laura Werner
+ * @stable ICU 2.0
+ */
+public class Calendar implements Serializable, Cloneable, Comparable {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * @internal
+ */
+ public final java.util.Calendar calendar;
+
+ /**
+ * @internal
+ * @param delegate the Calendar to which to delegate
+ */
+ public Calendar(java.util.Calendar delegate) {
+ this.calendar = delegate;
+ }
+
+ // Data flow in Calendar
+ // ---------------------
+
+ // The current time is represented in two ways by Calendar: as UTC
+ // milliseconds from the epoch start (1 January 1970 0:00 UTC), and as local
+ // fields such as MONTH, HOUR, AM_PM, etc. It is possible to compute the
+ // millis from the fields, and vice versa. The data needed to do this
+ // conversion is encapsulated by a TimeZone object owned by the Calendar.
+ // The data provided by the TimeZone object may also be overridden if the
+ // user sets the ZONE_OFFSET and/or DST_OFFSET fields directly. The class
+ // keeps track of what information was most recently set by the caller, and
+ // uses that to compute any other information as needed.
+
+ // If the user sets the fields using set(), the data flow is as follows.
+ // This is implemented by the Calendar subclass's computeTime() method.
+ // During this process, certain fields may be ignored. The disambiguation
+ // algorithm for resolving which fields to pay attention to is described
+ // above.
+
+ // local fields (YEAR, MONTH, DATE, HOUR, MINUTE, etc.)
+ // |
+ // | Using Calendar-specific algorithm
+ // V
+ // local standard millis
+ // |
+ // | Using TimeZone or user-set ZONE_OFFSET / DST_OFFSET
+ // V
+ // UTC millis (in time data member)
+
+ // If the user sets the UTC millis using setTime(), the data flow is as
+ // follows. This is implemented by the Calendar subclass's computeFields()
+ // method.
+
+ // UTC millis (in time data member)
+ // |
+ // | Using TimeZone getOffset()
+ // V
+ // local standard millis
+ // |
+ // | Using Calendar-specific algorithm
+ // V
+ // local fields (YEAR, MONTH, DATE, HOUR, MINUTE, etc.)
+
+ // In general, a round trip from fields, through local and UTC millis, and
+ // back out to fields is made when necessary. This is implemented by the
+ // complete() method. Resolving a partial set of fields into a UTC millis
+ // value allows all remaining fields to be generated from that value. If
+ // the Calendar is lenient, the fields are also renormalized to standard
+ // ranges when they are regenerated.
+
+ /**
+ * Field number for get
and set
indicating the
+ * era, e.g., AD or BC in the Julian calendar. This is a calendar-specific
+ * value; see subclass documentation.
+ * @see GregorianCalendar#AD
+ * @see GregorianCalendar#BC
+ * @stable ICU 2.0
+ */
+ public final static int ERA = 0;
+
+ /**
+ * Field number for get
and set
indicating the
+ * year. This is a calendar-specific value; see subclass documentation.
+ * @stable ICU 2.0
+ */
+ public final static int YEAR = 1;
+
+ /**
+ * Field number for get
and set
indicating the
+ * month. This is a calendar-specific value. The first month of the year is
+ * JANUARY
; the last depends on the number of months in a year.
+ * @see #JANUARY
+ * @see #FEBRUARY
+ * @see #MARCH
+ * @see #APRIL
+ * @see #MAY
+ * @see #JUNE
+ * @see #JULY
+ * @see #AUGUST
+ * @see #SEPTEMBER
+ * @see #OCTOBER
+ * @see #NOVEMBER
+ * @see #DECEMBER
+ * @see #UNDECIMBER
+ * @stable ICU 2.0
+ */
+ public final static int MONTH = 2;
+
+ /**
+ * Field number for get
and set
indicating the
+ * week number within the current year. The first week of the year, as
+ * defined by {@link #getFirstDayOfWeek()} and
+ * {@link #getMinimalDaysInFirstWeek()}, has value 1. Subclasses define
+ * the value of {@link #WEEK_OF_YEAR} for days before the first week of
+ * the year.
+ * @see #getFirstDayOfWeek
+ * @see #getMinimalDaysInFirstWeek
+ * @stable ICU 2.0
+ */
+ public final static int WEEK_OF_YEAR = 3;
+
+ /**
+ * Field number for get
and set
indicating the
+ * week number within the current month. The first week of the month, as
+ * defined by {@link #getFirstDayOfWeek()} and
+ * {@link #getMinimalDaysInFirstWeek()}, has value 1. Subclasses define
+ * the value of {@link #WEEK_OF_MONTH} for days before the first week of
+ * the month.
+ * @see #getFirstDayOfWeek
+ * @see #getMinimalDaysInFirstWeek
+ * @stable ICU 2.0
+ */
+ public final static int WEEK_OF_MONTH = 4;
+
+ /**
+ * Field number for get
and set
indicating the
+ * day of the month. This is a synonym for {@link #DAY_OF_MONTH}.
+ * The first day of the month has value 1.
+ * @see #DAY_OF_MONTH
+ * @stable ICU 2.0
+ */
+ public final static int DATE = 5;
+
+ /**
+ * Field number for get
and set
indicating the
+ * day of the month. This is a synonym for {@link #DATE}.
+ * The first day of the month has value 1.
+ * @see #DATE
+ * @stable ICU 2.0
+ */
+ public final static int DAY_OF_MONTH = 5;
+
+ /**
+ * Field number for get
and set
indicating the day
+ * number within the current year. The first day of the year has value 1.
+ * @stable ICU 2.0
+ */
+ public final static int DAY_OF_YEAR = 6;
+
+ /**
+ * Field number for get
and set
indicating the day
+ * of the week. This field takes values {@link #SUNDAY},
+ * {@link #MONDAY}, {@link #TUESDAY}, {@link #WEDNESDAY},
+ * {@link #THURSDAY}, {@link #FRIDAY}, and {@link #SATURDAY}.
+ * @see #SUNDAY
+ * @see #MONDAY
+ * @see #TUESDAY
+ * @see #WEDNESDAY
+ * @see #THURSDAY
+ * @see #FRIDAY
+ * @see #SATURDAY
+ * @stable ICU 2.0
+ */
+ public final static int DAY_OF_WEEK = 7;
+
+ /**
+ * Field number for get
and set
indicating the
+ * ordinal number of the day of the week within the current month. Together
+ * with the {@link #DAY_OF_WEEK} field, this uniquely specifies a day
+ * within a month. Unlike {@link #WEEK_OF_MONTH} and
+ * {@link #WEEK_OF_YEAR}, this field's value does not depend on
+ * {@link #getFirstDayOfWeek()} or
+ * {@link #getMinimalDaysInFirstWeek()}. DAY_OF_MONTH 1
+ * through 7
always correspond to DAY_OF_WEEK_IN_MONTH
+ * 1
; 8
through 15
correspond to
+ * DAY_OF_WEEK_IN_MONTH 2
, and so on.
+ * DAY_OF_WEEK_IN_MONTH 0
indicates the week before
+ * DAY_OF_WEEK_IN_MONTH 1
. Negative values count back from the
+ * end of the month, so the last Sunday of a month is specified as
+ * DAY_OF_WEEK = SUNDAY, DAY_OF_WEEK_IN_MONTH = -1
. Because
+ * negative values count backward they will usually be aligned differently
+ * within the month than positive values. For example, if a month has 31
+ * days, DAY_OF_WEEK_IN_MONTH -1
will overlap
+ * DAY_OF_WEEK_IN_MONTH 5
and the end of 4
.
+ * @see #DAY_OF_WEEK
+ * @see #WEEK_OF_MONTH
+ * @stable ICU 2.0
+ */
+ public final static int DAY_OF_WEEK_IN_MONTH = 8;
+
+ /**
+ * Field number for get
and set
indicating
+ * whether the HOUR
is before or after noon.
+ * E.g., at 10:04:15.250 PM the AM_PM
is PM
.
+ * @see #AM
+ * @see #PM
+ * @see #HOUR
+ * @stable ICU 2.0
+ */
+ public final static int AM_PM = 9;
+
+ /**
+ * Field number for get
and set
indicating the
+ * hour of the morning or afternoon. HOUR
is used for the 12-hour
+ * clock.
+ * E.g., at 10:04:15.250 PM the HOUR
is 10.
+ * @see #AM_PM
+ * @see #HOUR_OF_DAY
+ * @stable ICU 2.0
+ */
+ public final static int HOUR = 10;
+
+ /**
+ * Field number for get
and set
indicating the
+ * hour of the day. HOUR_OF_DAY
is used for the 24-hour clock.
+ * E.g., at 10:04:15.250 PM the HOUR_OF_DAY
is 22.
+ * @see #HOUR
+ * @stable ICU 2.0
+ */
+ public final static int HOUR_OF_DAY = 11;
+
+ /**
+ * Field number for get
and set
indicating the
+ * minute within the hour.
+ * E.g., at 10:04:15.250 PM the MINUTE
is 4.
+ * @stable ICU 2.0
+ */
+ public final static int MINUTE = 12;
+
+ /**
+ * Field number for get
and set
indicating the
+ * second within the minute.
+ * E.g., at 10:04:15.250 PM the SECOND
is 15.
+ * @stable ICU 2.0
+ */
+ public final static int SECOND = 13;
+
+ /**
+ * Field number for get
and set
indicating the
+ * millisecond within the second.
+ * E.g., at 10:04:15.250 PM the MILLISECOND
is 250.
+ * @stable ICU 2.0
+ */
+ public final static int MILLISECOND = 14;
+
+ /**
+ * Field number for get
and set
indicating the
+ * raw offset from GMT in milliseconds.
+ * @stable ICU 2.0
+ */
+ public final static int ZONE_OFFSET = 15;
+
+ /**
+ * Field number for get
and set
indicating the
+ * daylight savings offset in milliseconds.
+ * @stable ICU 2.0
+ */
+ public final static int DST_OFFSET = 16;
+
+ /**
+ * {@icu} Field number for get()
and set()
+ * indicating the extended year corresponding to the
+ * {@link #WEEK_OF_YEAR} field. This may be one greater or less
+ * than the value of {@link #EXTENDED_YEAR}.
+ * @stable ICU 2.0
+ */
+ public static final int YEAR_WOY = 17;
+
+ /**
+ * {@icu} Field number for get()
and set()
+ * indicating the localized day of week. This will be a value from 1
+ * to 7 inclusive, with 1 being the localized first day of the week.
+ * @stable ICU 2.0
+ */
+ public static final int DOW_LOCAL = 18;
+
+ /**
+ * {@icu} Field number for get()
and set()
+ * indicating the extended year. This is a single number designating
+ * the year of this calendar system, encompassing all supra-year
+ * fields. For example, for the Julian calendar system, year numbers
+ * are positive, with an era of BCE or CE. An extended year value for
+ * the Julian calendar system assigns positive values to CE years and
+ * negative values to BCE years, with 1 BCE being year 0.
+ * @stable ICU 2.0
+ */
+ public static final int EXTENDED_YEAR = 19;
+
+ /**
+ * {@icu} Field number for get()
and set()
+ * indicating the modified Julian day number. This is different from
+ * the conventional Julian day number in two regards. First, it
+ * demarcates days at local zone midnight, rather than noon GMT.
+ * Second, it is a local number; that is, it depends on the local time
+ * zone. It can be thought of as a single number that encompasses all
+ * the date-related fields.
+ * @stable ICU 2.0
+ */
+ public static final int JULIAN_DAY = 20;
+
+ /**
+ * {@icu} Field number for get()
and set()
+ * indicating the milliseconds in the day. This ranges from 0 to
+ * 23:59:59.999 (regardless of DST). This field behaves
+ * exactly like a composite of all time-related fields, not
+ * including the zone fields. As such, it also reflects
+ * discontinuities of those fields on DST transition days. On a day of
+ * DST onset, it will jump forward. On a day of DST cessation, it will
+ * jump backward. This reflects the fact that is must be combined with
+ * the DST_OFFSET field to obtain a unique local time value.
+ * @stable ICU 2.0
+ */
+ public static final int MILLISECONDS_IN_DAY = 21;
+
+ /**
+ * {@icu} Field indicating whether or not the current month is a leap month.
+ * Should have a value of 0 for non-leap months, and 1 for leap months.
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int IS_LEAP_MONTH = 22;
+
+ /**
+ * Value of the DAY_OF_WEEK
field indicating
+ * Sunday.
+ * @stable ICU 2.0
+ */
+ public final static int SUNDAY = 1;
+
+ /**
+ * Value of the DAY_OF_WEEK
field indicating
+ * Monday.
+ * @stable ICU 2.0
+ */
+ public final static int MONDAY = 2;
+
+ /**
+ * Value of the DAY_OF_WEEK
field indicating
+ * Tuesday.
+ * @stable ICU 2.0
+ */
+ public final static int TUESDAY = 3;
+
+ /**
+ * Value of the DAY_OF_WEEK
field indicating
+ * Wednesday.
+ * @stable ICU 2.0
+ */
+ public final static int WEDNESDAY = 4;
+
+ /**
+ * Value of the DAY_OF_WEEK
field indicating
+ * Thursday.
+ * @stable ICU 2.0
+ */
+ public final static int THURSDAY = 5;
+
+ /**
+ * Value of the DAY_OF_WEEK
field indicating
+ * Friday.
+ * @stable ICU 2.0
+ */
+ public final static int FRIDAY = 6;
+
+ /**
+ * Value of the DAY_OF_WEEK
field indicating
+ * Saturday.
+ * @stable ICU 2.0
+ */
+ public final static int SATURDAY = 7;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * first month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int JANUARY = 0;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * second month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int FEBRUARY = 1;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * third month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int MARCH = 2;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * fourth month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int APRIL = 3;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * fifth month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int MAY = 4;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * sixth month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int JUNE = 5;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * seventh month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int JULY = 6;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * eighth month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int AUGUST = 7;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * ninth month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int SEPTEMBER = 8;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * tenth month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int OCTOBER = 9;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * eleventh month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int NOVEMBER = 10;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * twelfth month of the year.
+ * @stable ICU 2.0
+ */
+ public final static int DECEMBER = 11;
+
+ /**
+ * Value of the MONTH
field indicating the
+ * thirteenth month of the year. Although {@link GregorianCalendar}
+ * does not use this value, lunar calendars do.
+ * @stable ICU 2.0
+ */
+ public final static int UNDECIMBER = 12;
+
+ /**
+ * Value of the AM_PM
field indicating the
+ * period of the day from midnight to just before noon.
+ * @stable ICU 2.0
+ */
+ public final static int AM = 0;
+
+ /**
+ * Value of the AM_PM
field indicating the
+ * period of the day from noon to just before midnight.
+ * @stable ICU 2.0
+ */
+ public final static int PM = 1;
+
+ /**
+ * {@icu} Value returned by getDayOfWeekType(int dayOfWeek) to indicate a
+ * weekday.
+ * @see #WEEKEND
+ * @see #WEEKEND_ONSET
+ * @see #WEEKEND_CEASE
+ * @see #getDayOfWeekType
+ * @stable ICU 2.0
+ */
+ public static final int WEEKDAY = 0;
+
+ /**
+ * {@icu} Value returned by getDayOfWeekType(int dayOfWeek) to indicate a
+ * weekend day.
+ * @see #WEEKDAY
+ * @see #WEEKEND_ONSET
+ * @see #WEEKEND_CEASE
+ * @see #getDayOfWeekType
+ * @stable ICU 2.0
+ */
+ public static final int WEEKEND = 1;
+
+ /**
+ * {@icu} Value returned by getDayOfWeekType(int dayOfWeek) to indicate a
+ * day that starts as a weekday and transitions to the weekend.
+ * Call getWeekendTransition() to get the point of transition.
+ * @see #WEEKDAY
+ * @see #WEEKEND
+ * @see #WEEKEND_CEASE
+ * @see #getDayOfWeekType
+ * @stable ICU 2.0
+ */
+ public static final int WEEKEND_ONSET = 2;
+
+ /**
+ * {@icu} Value returned by getDayOfWeekType(int dayOfWeek) to indicate a
+ * day that starts as the weekend and transitions to a weekday.
+ * Call getWeekendTransition() to get the point of transition.
+ * @see #WEEKDAY
+ * @see #WEEKEND
+ * @see #WEEKEND_ONSET
+ * @see #getDayOfWeekType
+ * @stable ICU 2.0
+ */
+ public static final int WEEKEND_CEASE = 3;
+
+ /**
+ * Constructs a Calendar with the default time zone
+ * and locale.
+ * @see TimeZone#getDefault
+ * @stable ICU 2.0
+ */
+ protected Calendar()
+ {
+ this(TimeZone.getDefault(), ULocale.getDefault());
+ }
+
+ /**
+ * Constructs a calendar with the specified time zone and locale.
+ * @param zone the time zone to use
+ * @param aLocale the locale for the week data
+ * @stable ICU 2.0
+ */
+ protected Calendar(TimeZone zone, Locale aLocale)
+ {
+ this(zone, ULocale.forLocale(aLocale));
+ }
+
+ /**
+ * Constructs a calendar with the specified time zone and locale.
+ * @param zone the time zone to use
+ * @param locale the ulocale for the week data
+ * @stable ICU 3.2
+ */
+ protected Calendar(TimeZone zone, ULocale locale)
+ {
+ calendar = java.util.Calendar.getInstance(zone.timeZone, locale.toLocale());
+ }
+
+ /**
+ * Returns a calendar using the default time zone and locale.
+ * @return a Calendar.
+ * @stable ICU 2.0
+ */
+ public static synchronized Calendar getInstance()
+ {
+ return new Calendar(java.util.Calendar.getInstance());
+ }
+
+ /**
+ * Returns a calendar using the specified time zone and default locale.
+ * @param zone the time zone to use
+ * @return a Calendar.
+ * @stable ICU 2.0
+ */
+ public static synchronized Calendar getInstance(TimeZone zone)
+ {
+ return new Calendar(java.util.Calendar.getInstance(zone.timeZone));
+ }
+
+ /**
+ * Returns a calendar using the default time zone and specified locale.
+ * @param aLocale the locale for the week data
+ * @return a Calendar.
+ * @stable ICU 2.0
+ */
+ public static synchronized Calendar getInstance(Locale aLocale)
+ {
+ return new Calendar(java.util.Calendar.getInstance(aLocale));
+ }
+
+ /**
+ * Returns a calendar using the default time zone and specified locale.
+ * @param locale the ulocale for the week data
+ * @return a Calendar.
+ * @stable ICU 3.2
+ */
+ public static synchronized Calendar getInstance(ULocale locale)
+ {
+ return new Calendar(java.util.Calendar.getInstance(locale.toLocale()));
+ }
+
+ /**
+ * Returns a calendar with the specified time zone and locale.
+ * @param zone the time zone to use
+ * @param aLocale the locale for the week data
+ * @return a Calendar.
+ * @stable ICU 2.0
+ */
+ public static synchronized Calendar getInstance(TimeZone zone,
+ Locale aLocale) {
+ return new Calendar(java.util.Calendar.getInstance(zone.timeZone, aLocale));
+ }
+
+ /**
+ * Returns a calendar with the specified time zone and locale.
+ * @param zone the time zone to use
+ * @param locale the ulocale for the week data
+ * @return a Calendar.
+ * @stable ICU 3.2
+ */
+ public static synchronized Calendar getInstance(TimeZone zone,
+ ULocale locale) {
+ return new Calendar(java.util.Calendar.getInstance(zone.timeZone, locale.toLocale()));
+ }
+
+ /**
+ * Returns the list of locales for which Calendars are installed.
+ * @return the list of locales for which Calendars are installed.
+ * @stable ICU 2.0
+ */
+ public static Locale[] getAvailableLocales()
+ {
+ return java.util.Calendar.getAvailableLocales();
+ }
+
+ /**
+ * {@icu} Returns the list of locales for which Calendars are installed.
+ * @return the list of locales for which Calendars are installed.
+ * @draft ICU 3.2 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static ULocale[] getAvailableULocales()
+ {
+ if (availableLocales == null) {
+ synchronized (Calendar.class) {
+ if (availableLocales == null) {
+ Locale[] locales = Locale.getAvailableLocales();
+ availableLocales = new ULocale[locales.length];
+ for (int i = 0; i < locales.length; i++) {
+ availableLocales[i] = ULocale.forLocale(locales[i]);
+ }
+ }
+ }
+ }
+ return availableLocales.clone();
+ }
+ private static volatile ULocale[] availableLocales;
+
+ /**
+ * {@icu} Given a key and a locale, returns an array of string values in a preferred
+ * order that would make a difference. These are all and only those values where
+ * the open (creation) of the service with the locale formed from the input locale
+ * plus input keyword and that value has different behavior than creation with the
+ * input locale alone.
+ * @param key one of the keys supported by this service. For now, only
+ * "calendar" is supported.
+ * @param locale the locale
+ * @param commonlyUsed if set to true it will return only commonly used values
+ * with the given locale in preferred order. Otherwise,
+ * it will return all the available values for the locale.
+ * @return an array of string values for the given key and the locale.
+ * @stable ICU 4.2
+ */
+ public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
+ boolean commonlyUsed) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns this Calendar's current time.
+ * @return the current time.
+ * @stable ICU 2.0
+ */
+ public final Date getTime() {
+ return calendar.getTime();
+ }
+
+ /**
+ * Sets this Calendar's current time with the given Date.
+ *
+ * Note: Calling setTime
with
+ * Date(Long.MAX_VALUE)
or Date(Long.MIN_VALUE)
+ * may yield incorrect field values from {@link #get(int)}.
+ * @param date the given Date.
+ * @stable ICU 2.0
+ */
+ public final void setTime(Date date) {
+ calendar.setTime(date);
+ }
+
+ /**
+ * Returns this Calendar's current time as a long.
+ * @return the current time as UTC milliseconds from the epoch.
+ * @stable ICU 2.0
+ */
+ public long getTimeInMillis() {
+ return calendar.getTimeInMillis();
+ }
+
+ /**
+ * Sets this Calendar's current time from the given long value.
+ * @param millis the new time in UTC milliseconds from the epoch.
+ * @stable ICU 2.0
+ */
+ public void setTimeInMillis( long millis ) {
+ calendar.setTimeInMillis(millis);
+ }
+
+ /**
+ * Returns the value for a given time field.
+ * @param field the given time field.
+ * @return the value for the given time field.
+ * @stable ICU 2.0
+ */
+ public final int get(int field)
+ {
+ return calendar.get(getJDKField(field));
+ }
+
+ /**
+ * Sets the time field with the given value.
+ * @param field the given time field.
+ * @param value the value to be set for the given time field.
+ * @stable ICU 2.0
+ */
+ public final void set(int field, int value)
+ {
+ calendar.set(getJDKField(field), value);
+ }
+
+ /**
+ * Sets the values for the fields year, month, and date.
+ * Previous values of other fields are retained. If this is not desired,
+ * call {@link #clear()} first.
+ * @param year the value used to set the YEAR time field.
+ * @param month the value used to set the MONTH time field.
+ * Month value is 0-based. e.g., 0 for January.
+ * @param date the value used to set the DATE time field.
+ * @stable ICU 2.0
+ */
+ public final void set(int year, int month, int date)
+ {
+ calendar.set(getJDKField(YEAR), year);
+ calendar.set(getJDKField(MONTH), month);
+ calendar.set(getJDKField(DATE), date);
+ }
+
+ /**
+ * Sets the values for the fields year, month, date, hour, and minute.
+ * Previous values of other fields are retained. If this is not desired,
+ * call {@link #clear()} first.
+ * @param year the value used to set the YEAR time field.
+ * @param month the value used to set the MONTH time field.
+ * Month value is 0-based. e.g., 0 for January.
+ * @param date the value used to set the DATE time field.
+ * @param hour the value used to set the HOUR_OF_DAY time field.
+ * @param minute the value used to set the MINUTE time field.
+ * @stable ICU 2.0
+ */
+ public final void set(int year, int month, int date, int hour, int minute)
+ {
+ calendar.set(getJDKField(YEAR), year);
+ calendar.set(getJDKField(MONTH), month);
+ calendar.set(getJDKField(DATE), date);
+ calendar.set(getJDKField(HOUR_OF_DAY), hour);
+ calendar.set(getJDKField(MINUTE), minute);
+ }
+
+ /**
+ * Sets the values for the fields year, month, date, hour, minute, and second.
+ * Previous values of other fields are retained. If this is not desired,
+ * call {@link #clear} first.
+ * @param year the value used to set the YEAR time field.
+ * @param month the value used to set the MONTH time field.
+ * Month value is 0-based. e.g., 0 for January.
+ * @param date the value used to set the DATE time field.
+ * @param hour the value used to set the HOUR_OF_DAY time field.
+ * @param minute the value used to set the MINUTE time field.
+ * @param second the value used to set the SECOND time field.
+ * @stable ICU 2.0
+ */
+ public final void set(int year, int month, int date, int hour, int minute,
+ int second)
+ {
+ calendar.set(getJDKField(YEAR), year);
+ calendar.set(getJDKField(MONTH), month);
+ calendar.set(getJDKField(DATE), date);
+ calendar.set(getJDKField(HOUR_OF_DAY), hour);
+ calendar.set(getJDKField(MINUTE), minute);
+ calendar.set(getJDKField(SECOND), second);
+ }
+
+ /**
+ * Clears the values of all the time fields.
+ * @stable ICU 2.0
+ */
+ public final void clear()
+ {
+ calendar.clear();
+ }
+
+ /**
+ * Clears the value in the given time field.
+ * @param field the time field to be cleared.
+ * @stable ICU 2.0
+ */
+ public final void clear(int field)
+ {
+ calendar.clear(getJDKField(field));
+ }
+
+ /**
+ * Determines if the given time field has a value set.
+ * @return true if the given time field has a value set; false otherwise.
+ * @stable ICU 2.0
+ */
+ public final boolean isSet(int field)
+ {
+ return calendar.isSet(getJDKField(field));
+ }
+
+ /**
+ * Compares this calendar to the specified object.
+ * The result is true
if and only if the argument is
+ * not null
and is a Calendar
object that
+ * represents the same calendar as this object.
+ * @param obj the object to compare with.
+ * @return true
if the objects are the same;
+ * false
otherwise.
+ * @stable ICU 2.0
+ */
+ public boolean equals(Object obj) {
+ try {
+ return calendar.equals(((Calendar)obj).calendar);
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * {@icu} Returns true if the given Calendar object is equivalent to this
+ * one. An equivalent Calendar will behave exactly as this one
+ * does, but it may be set to a different time. By contrast, for
+ * the equals() method to return true, the other Calendar must
+ * be set to the same time.
+ *
+ * @param other the Calendar to be compared with this Calendar
+ * @stable ICU 2.4
+ */
+ public boolean isEquivalentTo(Calendar other) {
+ return calendar.getClass() == other.calendar.getClass() &&
+ calendar.isLenient() == other.calendar.isLenient() &&
+ calendar.getFirstDayOfWeek() == other.calendar.getFirstDayOfWeek() &&
+ calendar.getMinimalDaysInFirstWeek() == other.calendar.getMinimalDaysInFirstWeek() &&
+ calendar.getTimeZone().equals(other.calendar.getTimeZone());
+ }
+
+ /**
+ * Returns a hash code for this calendar.
+ * @return a hash code value for this object.
+ * @stable ICU 2.0
+ */
+ public int hashCode() {
+ return calendar.hashCode();
+ }
+
+ /**
+ * Returns the difference in milliseconds between the moment this
+ * calendar is set to and the moment the given calendar or Date object
+ * is set to.
+ */
+ private long compare(Object that) {
+ long thatMs;
+ if (that instanceof Calendar) {
+ thatMs = ((Calendar)that).getTimeInMillis();
+ } else if (that instanceof Date) {
+ thatMs = ((Date)that).getTime();
+ } else {
+ throw new IllegalArgumentException(that + "is not a Calendar or Date");
+ }
+ return getTimeInMillis() - thatMs;
+ }
+
+ /**
+ * Compares the time field records.
+ * Equivalent to comparing result of conversion to UTC.
+ * @param when the Calendar to be compared with this Calendar.
+ * @return true if the current time of this Calendar is before
+ * the time of Calendar when; false otherwise.
+ * @stable ICU 2.0
+ */
+ public boolean before(Object when) {
+ return compare(when) < 0;
+ }
+
+ /**
+ * Compares the time field records.
+ * Equivalent to comparing result of conversion to UTC.
+ * @param when the Calendar to be compared with this Calendar.
+ * @return true if the current time of this Calendar is after
+ * the time of Calendar when; false otherwise.
+ * @stable ICU 2.0
+ */
+ public boolean after(Object when) {
+ return compare(when) > 0;
+ }
+
+ /**
+ * Returns the maximum value that this field could have, given the
+ * current date. For example, with the Gregorian date February 3, 1997
+ * and the {@link #DAY_OF_MONTH DAY_OF_MONTH} field, the actual maximum
+ * is 28; for February 3, 1996 it is 29.
+ *
+ *
The actual maximum computation ignores smaller fields and the
+ * current value of like-sized fields. For example, the actual maximum
+ * of the DAY_OF_YEAR or MONTH depends only on the year and supra-year
+ * fields. The actual maximum of the DAY_OF_MONTH depends, in
+ * addition, on the MONTH field and any other fields at that
+ * granularity (such as IS_LEAP_MONTH). The
+ * DAY_OF_WEEK_IN_MONTH field does not depend on the current
+ * DAY_OF_WEEK; it returns the maximum for any day of week in the
+ * current month. Likewise for the WEEK_OF_MONTH and WEEK_OF_YEAR
+ * fields.
+ *
+ * @param field the field whose maximum is desired
+ * @return the maximum of the given field for the current date of this calendar
+ * @see #getMaximum
+ * @see #getLeastMaximum
+ * @stable ICU 2.0
+ */
+ public int getActualMaximum(int field) {
+ return calendar.getActualMaximum(getJDKField(field));
+ }
+
+ /**
+ * Returns the minimum value that this field could have, given the current date.
+ * For most fields, this is the same as {@link #getMinimum getMinimum}
+ * and {@link #getGreatestMinimum getGreatestMinimum}. However, some fields,
+ * especially those related to week number, are more complicated.
+ *
+ * For example, assume {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek}
+ * returns 4 and {@link #getFirstDayOfWeek getFirstDayOfWeek} returns SUNDAY.
+ * If the first day of the month is Sunday, Monday, Tuesday, or Wednesday
+ * there will be four or more days in the first week, so it will be week number 1,
+ * and getActualMinimum(WEEK_OF_MONTH)
will return 1. However,
+ * if the first of the month is a Thursday, Friday, or Saturday, there are
+ * not four days in that week, so it is week number 0, and
+ * getActualMinimum(WEEK_OF_MONTH)
will return 0.
+ *
+ * @param field the field whose actual minimum value is desired.
+ * @return the minimum of the given field for the current date of this calendar
+ *
+ * @see #getMinimum
+ * @see #getGreatestMinimum
+ * @stable ICU 2.0
+ */
+ public int getActualMinimum(int field) {
+ return calendar.getActualMinimum(getJDKField(field));
+ }
+
+ /**
+ * Rolls (up/down) a single unit of time on the given field. If the
+ * field is rolled past its maximum allowable value, it will "wrap" back
+ * to its minimum and continue rolling. For
+ * example, to roll the current date up by one day, you can call:
+ *
+ * roll({@link #DATE}, true)
+ *
+ * When rolling on the {@link #YEAR} field, it will roll the year
+ * value in the range between 1 and the value returned by calling
+ * {@link #getMaximum getMaximum}({@link #YEAR}).
+ *
+ * When rolling on certain fields, the values of other fields may conflict and
+ * need to be changed. For example, when rolling the MONTH
field
+ * for the Gregorian date 1/31/96 upward, the DAY_OF_MONTH
field
+ * must be adjusted so that the result is 2/29/96 rather than the invalid
+ * 2/31/96.
+ *
+ * Note: Calling roll(field, true) N times is not
+ * necessarily equivalent to calling roll(field, N) . For example,
+ * imagine that you start with the date Gregorian date January 31, 1995. If you call
+ * roll(Calendar.MONTH, 2) , the result will be March 31, 1995.
+ * But if you call roll(Calendar.MONTH, true) , the result will be
+ * February 28, 1995. Calling it one more time will give March 28, 1995, which
+ * is usually not the desired result.
+ *
+ * Note: You should always use roll and add rather
+ * than attempting to perform arithmetic operations directly on the fields
+ * of a Calendar . It is quite possible for Calendar subclasses
+ * to have fields with non-linear behavior, for example missing months
+ * or days during non-leap years. The subclasses' add and roll
+ * methods will take this into account, while simple arithmetic manipulations
+ * may give invalid results.
+ *
+ * @param field the calendar field to roll.
+ *
+ * @param up indicates if the value of the specified time field is to be
+ * rolled up or rolled down. Use true
if rolling up,
+ * false
otherwise.
+ *
+ * @exception IllegalArgumentException if the field is invalid or refers
+ * to a field that cannot be handled by this method.
+ * @see #roll(int, int)
+ * @see #add
+ * @stable ICU 2.0
+ */
+ public final void roll(int field, boolean up)
+ {
+ calendar.roll(getJDKField(field), up);
+ }
+
+ /**
+ * Rolls (up/down) a specified amount time on the given field. For
+ * example, to roll the current date up by three days, you can call
+ * roll(Calendar.DATE, 3)
. If the
+ * field is rolled past its maximum allowable value, it will "wrap" back
+ * to its minimum and continue rolling.
+ * For example, calling roll(Calendar.DATE, 10)
+ * on a Gregorian calendar set to 4/25/96 will result in the date 4/5/96.
+ *
+ * When rolling on certain fields, the values of other fields may conflict and
+ * need to be changed. For example, when rolling the {@link #MONTH MONTH} field
+ * for the Gregorian date 1/31/96 by +1, the {@link #DAY_OF_MONTH DAY_OF_MONTH} field
+ * must be adjusted so that the result is 2/29/96 rather than the invalid
+ * 2/31/96.
+ *
+ * {@icunote} the ICU implementation of this method is able to roll
+ * all fields except for {@link #ERA ERA}, {@link #DST_OFFSET DST_OFFSET},
+ * and {@link #ZONE_OFFSET ZONE_OFFSET}. Subclasses may, of course, add support for
+ * additional fields in their overrides of roll
.
+ *
+ * Note: You should always use roll and add rather
+ * than attempting to perform arithmetic operations directly on the fields
+ * of a Calendar . It is quite possible for Calendar subclasses
+ * to have fields with non-linear behavior, for example missing months
+ * or days during non-leap years. The subclasses' add and roll
+ * methods will take this into account, while simple arithmetic manipulations
+ * may give invalid results.
+ *
+ * Subclassing:
+ * This implementation of roll
assumes that the behavior of the
+ * field is continuous between its minimum and maximum, which are found by
+ * calling {@link #getActualMinimum getActualMinimum} and {@link #getActualMaximum getActualMaximum}.
+ * For most such fields, simple addition, subtraction, and modulus operations
+ * are sufficient to perform the roll. For week-related fields,
+ * the results of {@link #getFirstDayOfWeek getFirstDayOfWeek} and
+ * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} are also necessary.
+ * Subclasses can override these two methods if their values differ from the defaults.
+ *
+ * Subclasses that have fields for which the assumption of continuity breaks
+ * down must overide roll
to handle those fields specially.
+ * For example, in the Hebrew calendar the month "Adar I"
+ * only occurs in leap years; in other years the calendar jumps from
+ * Shevat (month #4) to Adar (month #6). The
+ * {@link HebrewCalendar#roll HebrewCalendar.roll} method takes this into account,
+ * so that rolling the month of Shevat by one gives the proper result (Adar) in a
+ * non-leap year.
+ *
+ * @param field the calendar field to roll.
+ * @param amount the amount by which the field should be rolled.
+ *
+ * @exception IllegalArgumentException if the field is invalid or refers
+ * to a field that cannot be handled by this method.
+ * @see #roll(int, boolean)
+ * @see #add
+ * @stable ICU 2.0
+ */
+ public void roll(int field, int amount) {
+ calendar.roll(getJDKField(field), amount);
+ }
+
+ /**
+ * Add a signed amount to a specified field, using this calendar's rules.
+ * For example, to add three days to the current date, you can call
+ * add(Calendar.DATE, 3)
.
+ *
+ * When adding to certain fields, the values of other fields may conflict and
+ * need to be changed. For example, when adding one to the {@link #MONTH MONTH} field
+ * for the Gregorian date 1/31/96, the {@link #DAY_OF_MONTH DAY_OF_MONTH} field
+ * must be adjusted so that the result is 2/29/96 rather than the invalid
+ * 2/31/96.
+ *
+ * {@icunote} The ICU implementation of this method is able to add to
+ * all fields except for {@link #ERA ERA}, {@link #DST_OFFSET DST_OFFSET},
+ * and {@link #ZONE_OFFSET ZONE_OFFSET}. Subclasses may, of course, add support for
+ * additional fields in their overrides of add
.
+ *
+ * Note: You should always use roll and add rather
+ * than attempting to perform arithmetic operations directly on the fields
+ * of a Calendar . It is quite possible for Calendar subclasses
+ * to have fields with non-linear behavior, for example missing months
+ * or days during non-leap years. The subclasses' add and roll
+ * methods will take this into account, while simple arithmetic manipulations
+ * may give invalid results.
+ *
+ * Subclassing:
+ * This implementation of add
assumes that the behavior of the
+ * field is continuous between its minimum and maximum, which are found by
+ * calling {@link #getActualMinimum getActualMinimum} and
+ * {@link #getActualMaximum getActualMaximum}.
+ * For such fields, simple arithmetic operations are sufficient to
+ * perform the add.
+ *
+ * Subclasses that have fields for which this assumption of continuity breaks
+ * down must overide add
to handle those fields specially.
+ * For example, in the Hebrew calendar the month "Adar I"
+ * only occurs in leap years; in other years the calendar jumps from
+ * Shevat (month #4) to Adar (month #6). The
+ * {@link HebrewCalendar#add HebrewCalendar.add} method takes this into account,
+ * so that adding one month
+ * to a date in Shevat gives the proper result (Adar) in a non-leap year.
+ *
+ * @param field the time field.
+ * @param amount the amount to add to the field.
+ *
+ * @exception IllegalArgumentException if the field is invalid or refers
+ * to a field that cannot be handled by this method.
+ * @see #roll(int, int)
+ * @stable ICU 2.0
+ */
+ public void add(int field, int amount) {
+ calendar.add(getJDKField(field), amount);
+ }
+
+ private static String _getDisplayName(Calendar cal) {
+ String type = cal.getType();
+ if (type.equals("japanese")) {
+ return "Japanese Calendar";
+ } else if (type.equals("buddhist")) {
+ return "Buddhist Calendar";
+ }
+ return "Gregorian Calendar";
+ }
+
+ /**
+ * Returns the name of this calendar in the language of the given locale.
+ * @stable ICU 2.0
+ */
+ public String getDisplayName(Locale loc) {
+ return _getDisplayName(this);
+ }
+
+ /**
+ * Returns the name of this calendar in the language of the given locale.
+ * @stable ICU 3.2
+ */
+ public String getDisplayName(ULocale loc) {
+ return _getDisplayName(this);
+ }
+
+ /**
+ * Compares the times (in millis) represented by two
+ * Calendar
objects.
+ *
+ * @param that the Calendar
to compare to this.
+ * @return 0
if the time represented by
+ * this Calendar
is equal to the time represented
+ * by that Calendar
, a value less than
+ * 0
if the time represented by this is before
+ * the time represented by that, and a value greater than
+ * 0
if the time represented by this
+ * is after the time represented by that.
+ * @throws NullPointerException if that
+ * Calendar
is null.
+ * @throws IllegalArgumentException if the time of that
+ * Calendar
can't be obtained because of invalid
+ * calendar values.
+ * @stable ICU 3.4
+ */
+ public int compareTo(Calendar that) {
+ return calendar.compareTo(that.calendar);
+ }
+
+ //-------------------------------------------------------------------------
+ // Interface for creating custon DateFormats for different types of Calendars
+ //-------------------------------------------------------------------------
+
+ /**
+ * {@icu} Returns a DateFormat
appropriate to this calendar.
+ * Subclasses wishing to specialize this behavior should override
+ * {@link #handleGetDateFormat}.
+ * @stable ICU 2.0
+ */
+ public DateFormat getDateTimeFormat(int dateStyle, int timeStyle, Locale loc) {
+ if (dateStyle != DateFormat.NONE) {
+ if (timeStyle == DateFormat.NONE) {
+ return DateFormat.getDateInstance((Calendar)this.clone(), dateStyle, loc);
+ } else {
+ return DateFormat.getDateTimeInstance((Calendar)this.clone(), dateStyle, timeStyle, loc);
+ }
+ } else if (timeStyle != DateFormat.NONE) {
+ return DateFormat.getTimeInstance((Calendar)this.clone(), timeStyle, loc);
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * {@icu} Returns a DateFormat
appropriate to this calendar.
+ * Subclasses wishing to specialize this behavior should override
+ * {@link #handleGetDateFormat}.
+ * @stable ICU 3.2
+ */
+ public DateFormat getDateTimeFormat(int dateStyle, int timeStyle, ULocale loc) {
+ return getDateTimeFormat(dateStyle, timeStyle, loc.toLocale());
+ }
+
+ //-------------------------------------------------------------------------
+ // Constants
+ //-------------------------------------------------------------------------
+
+ /**
+ * {@icu} Returns the difference between the given time and the time this
+ * calendar object is set to. If this calendar is set
+ * before the given time, the returned value will be
+ * positive. If this calendar is set after the given
+ * time, the returned value will be negative. The
+ * field
parameter specifies the units of the return
+ * value. For example, if fieldDifference(when,
+ * Calendar.MONTH)
returns 3, then this calendar is set to
+ * 3 months before when
, and possibly some additional
+ * time less than one month.
+ *
+ *
As a side effect of this call, this calendar is advanced
+ * toward when
by the given amount. That is, calling
+ * this method has the side effect of calling add(field,
+ * n)
, where n
is the return value.
+ *
+ *
Usage: To use this method, call it first with the largest
+ * field of interest, then with progressively smaller fields. For
+ * example:
+ *
+ *
+ * int y = cal.fieldDifference(when, Calendar.YEAR);
+ * int m = cal.fieldDifference(when, Calendar.MONTH);
+ * int d = cal.fieldDifference(when, Calendar.DATE);
+ *
+ * computes the difference between cal
and
+ * when
in years, months, and days.
+ *
+ * Note: fieldDifference()
is
+ * asymmetrical . That is, in the following code:
+ *
+ *
+ * cal.setTime(date1);
+ * int m1 = cal.fieldDifference(date2, Calendar.MONTH);
+ * int d1 = cal.fieldDifference(date2, Calendar.DATE);
+ * cal.setTime(date2);
+ * int m2 = cal.fieldDifference(date1, Calendar.MONTH);
+ * int d2 = cal.fieldDifference(date1, Calendar.DATE);
+ *
+ * one might expect that m1 == -m2 && d1 == -d2
.
+ * However, this is not generally the case, because of
+ * irregularities in the underlying calendar system (e.g., the
+ * Gregorian calendar has a varying number of days per month).
+ *
+ * @param when the date to compare this calendar's time to
+ * @param field the field in which to compute the result
+ * @return the difference, either positive or negative, between
+ * this calendar's time and when
, in terms of
+ * field
.
+ * @stable ICU 2.0
+ */
+ public int fieldDifference(Date when, int field) {
+ int min = 0;
+ long startMs = getTimeInMillis();
+ long targetMs = when.getTime();
+ // Always add from the start millis. This accomodates
+ // operations like adding years from February 29, 2000 up to
+ // February 29, 2004. If 1, 1, 1, 1 is added to the year
+ // field, the DOM gets pinned to 28 and stays there, giving an
+ // incorrect DOM difference of 1. We have to add 1, reset, 2,
+ // reset, 3, reset, 4.
+ if (startMs < targetMs) {
+ int max = 1;
+ // Find a value that is too large
+ for (;;) {
+ setTimeInMillis(startMs);
+ add(field, max);
+ long ms = getTimeInMillis();
+ if (ms == targetMs) {
+ return max;
+ } else if (ms > targetMs) {
+ break;
+ } else {
+ max <<= 1;
+ if (max < 0) {
+ // Field difference too large to fit into int
+ throw new RuntimeException();
+ }
+ }
+ }
+ // Do a binary search
+ while ((max - min) > 1) {
+ int t = (min + max) / 2;
+ setTimeInMillis(startMs);
+ add(field, t);
+ long ms = getTimeInMillis();
+ if (ms == targetMs) {
+ return t;
+ } else if (ms > targetMs) {
+ max = t;
+ } else {
+ min = t;
+ }
+ }
+ } else if (startMs > targetMs) {
+ //Eclipse stated the following is "dead code"
+ /*if (false) {
+ // This works, and makes the code smaller, but costs
+ // an extra object creation and an extra couple cycles
+ // of calendar computation.
+ setTimeInMillis(targetMs);
+ min = -fieldDifference(new Date(startMs), field);
+ }*/
+ int max = -1;
+ // Find a value that is too small
+ for (;;) {
+ setTimeInMillis(startMs);
+ add(field, max);
+ long ms = getTimeInMillis();
+ if (ms == targetMs) {
+ return max;
+ } else if (ms < targetMs) {
+ break;
+ } else {
+ max <<= 1;
+ if (max == 0) {
+ // Field difference too large to fit into int
+ throw new RuntimeException();
+ }
+ }
+ }
+ // Do a binary search
+ while ((min - max) > 1) {
+ int t = (min + max) / 2;
+ setTimeInMillis(startMs);
+ add(field, t);
+ long ms = getTimeInMillis();
+ if (ms == targetMs) {
+ return t;
+ } else if (ms < targetMs) {
+ max = t;
+ } else {
+ min = t;
+ }
+ }
+ }
+ // Set calendar to end point
+ setTimeInMillis(startMs);
+ add(field, min);
+ return min;
+ }
+
+ /**
+ * Sets the time zone with the given time zone value.
+ * @param value the given time zone.
+ * @stable ICU 2.0
+ */
+ public void setTimeZone(TimeZone value)
+ {
+ calendar.setTimeZone(value.timeZone);
+ }
+
+ /**
+ * Returns the time zone.
+ * @return the time zone object associated with this calendar.
+ * @stable ICU 2.0
+ */
+ public TimeZone getTimeZone()
+ {
+ return new TimeZone(calendar.getTimeZone());
+ }
+
+ /**
+ * Specify whether or not date/time interpretation is to be lenient. With
+ * lenient interpretation, a date such as "February 942, 1996" will be
+ * treated as being equivalent to the 941st day after February 1, 1996.
+ * With strict interpretation, such dates will cause an exception to be
+ * thrown.
+ *
+ * @see DateFormat#setLenient
+ * @stable ICU 2.0
+ */
+ public void setLenient(boolean lenient)
+ {
+ calendar.setLenient(lenient);
+ }
+
+ /**
+ * Tell whether date/time interpretation is to be lenient.
+ * @stable ICU 2.0
+ */
+ public boolean isLenient()
+ {
+ return calendar.isLenient();
+ }
+
+ /**
+ * Sets what the first day of the week is; e.g., Sunday in US,
+ * Monday in France.
+ * @param value the given first day of the week.
+ * @stable ICU 2.0
+ */
+ public void setFirstDayOfWeek(int value)
+ {
+ calendar.setFirstDayOfWeek(value);
+ }
+
+ /**
+ * Returns what the first day of the week is; e.g., Sunday in US,
+ * Monday in France.
+ * @return the first day of the week.
+ * @stable ICU 2.0
+ */
+ public int getFirstDayOfWeek()
+ {
+ return calendar.getFirstDayOfWeek();
+ }
+
+ /**
+ * Sets what the minimal days required in the first week of the year are.
+ * For example, if the first week is defined as one that contains the first
+ * day of the first month of a year, call the method with value 1. If it
+ * must be a full week, use value 7.
+ * @param value the given minimal days required in the first week
+ * of the year.
+ * @stable ICU 2.0
+ */
+ public void setMinimalDaysInFirstWeek(int value)
+ {
+ calendar.setMinimalDaysInFirstWeek(value);
+ }
+
+ /**
+ * Returns what the minimal days required in the first week of the year are;
+ * e.g., if the first week is defined as one that contains the first day
+ * of the first month of a year, getMinimalDaysInFirstWeek returns 1. If
+ * the minimal days required must be a full week, getMinimalDaysInFirstWeek
+ * returns 7.
+ * @return the minimal days required in the first week of the year.
+ * @stable ICU 2.0
+ */
+ public int getMinimalDaysInFirstWeek()
+ {
+ return calendar.getMinimalDaysInFirstWeek();
+ }
+
+ /**
+ * Returns the minimum value for the given time field.
+ * e.g., for Gregorian DAY_OF_MONTH, 1.
+ * @param field the given time field.
+ * @return the minimum value for the given time field.
+ * @stable ICU 2.0
+ */
+ public final int getMinimum(int field) {
+ return calendar.getMinimum(getJDKField(field));
+ }
+
+ /**
+ * Returns the maximum value for the given time field.
+ * e.g. for Gregorian DAY_OF_MONTH, 31.
+ * @param field the given time field.
+ * @return the maximum value for the given time field.
+ * @stable ICU 2.0
+ */
+ public final int getMaximum(int field) {
+ return calendar.getMaximum(getJDKField(field));
+ }
+
+ /**
+ * Returns the highest minimum value for the given field if varies.
+ * Otherwise same as getMinimum(). For Gregorian, no difference.
+ * @param field the given time field.
+ * @return the highest minimum value for the given time field.
+ * @stable ICU 2.0
+ */
+ public final int getGreatestMinimum(int field) {
+ return calendar.getGreatestMinimum(getJDKField(field));
+ }
+
+ /**
+ * Returns the lowest maximum value for the given field if varies.
+ * Otherwise same as getMaximum(). e.g., for Gregorian DAY_OF_MONTH, 28.
+ * @param field the given time field.
+ * @return the lowest maximum value for the given time field.
+ * @stable ICU 2.0
+ */
+ public final int getLeastMaximum(int field) {
+ return calendar.getLeastMaximum(getJDKField(field));
+ }
+
+ //-------------------------------------------------------------------------
+ // Weekend support -- determining which days of the week are the weekend
+ // in a given locale
+ //-------------------------------------------------------------------------
+
+ /**
+ * {@icu} Returns whether the given day of the week is a weekday, a
+ * weekend day, or a day that transitions from one to the other,
+ * in this calendar system. If a transition occurs at midnight,
+ * then the days before and after the transition will have the
+ * type WEEKDAY or WEEKEND. If a transition occurs at a time
+ * other than midnight, then the day of the transition will have
+ * the type WEEKEND_ONSET or WEEKEND_CEASE. In this case, the
+ * method getWeekendTransition() will return the point of
+ * transition.
+ * @param dayOfWeek either SUNDAY, MONDAY, TUESDAY, WEDNESDAY,
+ * THURSDAY, FRIDAY, or SATURDAY
+ * @return either WEEKDAY, WEEKEND, WEEKEND_ONSET, or
+ * WEEKEND_CEASE
+ * @exception IllegalArgumentException if dayOfWeek is not
+ * between SUNDAY and SATURDAY, inclusive
+ * @see #WEEKDAY
+ * @see #WEEKEND
+ * @see #WEEKEND_ONSET
+ * @see #WEEKEND_CEASE
+ * @see #getWeekendTransition
+ * @see #isWeekend(Date)
+ * @see #isWeekend()
+ * @stable ICU 2.0
+ */
+ public int getDayOfWeekType(int dayOfWeek) {
+ // weekend always full saturday and sunday with com.ibm.icu.base
+ if (dayOfWeek < SUNDAY || dayOfWeek > 7) {
+ throw new IllegalArgumentException("illegal day of week: " + dayOfWeek);
+ } else if (dayOfWeek == SATURDAY || dayOfWeek == SUNDAY) {
+ return WEEKEND;
+ }
+ return WEEKDAY;}
+
+ /**
+ * {@icu} Returns the time during the day at which the weekend begins or end in this
+ * calendar system. If getDayOfWeekType(dayOfWeek) == WEEKEND_ONSET return the time
+ * at which the weekend begins. If getDayOfWeekType(dayOfWeek) == WEEKEND_CEASE
+ * return the time at which the weekend ends. If getDayOfWeekType(dayOfWeek) has some
+ * other value, then throw an exception.
+ * @param dayOfWeek either SUNDAY, MONDAY, TUESDAY, WEDNESDAY,
+ * THURSDAY, FRIDAY, or SATURDAY
+ * @return the milliseconds after midnight at which the
+ * weekend begins or ends
+ * @exception IllegalArgumentException if dayOfWeek is not
+ * WEEKEND_ONSET or WEEKEND_CEASE
+ * @see #getDayOfWeekType
+ * @see #isWeekend(Date)
+ * @see #isWeekend()
+ * @stable ICU 2.0
+ */
+ public int getWeekendTransition(int dayOfWeek) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns true if the given date and time is in the weekend in this calendar
+ * system. Equivalent to calling setTime() followed by isWeekend(). Note: This
+ * method changes the time this calendar is set to.
+ * @param date the date and time
+ * @return true if the given date and time is part of the
+ * weekend
+ * @see #getDayOfWeekType
+ * @see #getWeekendTransition
+ * @see #isWeekend()
+ * @stable ICU 2.0
+ */
+ public boolean isWeekend(Date date) {
+ calendar.setTime(date);
+ return isWeekend();
+ }
+
+ /**
+ * {@icu} Returns true if this Calendar's current date and time is in the weekend in
+ * this calendar system.
+ * @return true if the given date and time is part of the
+ * weekend
+ * @see #getDayOfWeekType
+ * @see #getWeekendTransition
+ * @see #isWeekend(Date)
+ * @stable ICU 2.0
+ */
+ public boolean isWeekend() {
+ // weekend always full saturday and sunday with com.ibm.icu.base
+ int dow = calendar.get(Calendar.DAY_OF_WEEK);
+ if (dow == SATURDAY || dow == SUNDAY) {
+ return true;
+ }
+ return false;
+ }
+
+ //-------------------------------------------------------------------------
+ // End of weekend support
+ //-------------------------------------------------------------------------
+
+ /**
+ * Overrides Cloneable
+ * @stable ICU 2.0
+ */
+ public Object clone()
+ {
+ return new Calendar((java.util.Calendar)calendar.clone());
+ }
+
+ /**
+ * Returns a string representation of this calendar. This method
+ * is intended to be used only for debugging purposes, and the
+ * format of the returned string may vary between implementations.
+ * The returned string may be empty but may not be null
.
+ *
+ * @return a string representation of this calendar.
+ * @stable ICU 2.0
+ */
+ public String toString() {
+ return calendar.toString();
+ }
+
+ /**
+ * {@icu} Returns the number of fields defined by this calendar. Valid field
+ * arguments to set()
and get()
are
+ * 0..getFieldCount()-1
.
+ * @stable ICU 2.0
+ */
+ public final int getFieldCount() {
+ return FIELD_COUNT;
+ }
+ private static final int FIELD_COUNT = IS_LEAP_MONTH + 1;
+
+ /**
+ * {@icu} Returns the current Calendar type. Note, in 3.0 this function will return
+ * 'gregorian' in Calendar to emulate legacy behavior
+ * @return type of calendar (gregorian, etc)
+ * @stable ICU 3.8
+ */
+ public String getType() {
+ // JDK supports Gregorian, Japanese and Buddhist
+ String name = calendar.getClass().getSimpleName().toLowerCase(Locale.US);
+ if (name.contains("japanese")) {
+ return "japanese";
+ } else if (name.contains("buddhist")) {
+ return "buddhist";
+ }
+ return "gregorian";
+ }
+
+ // -------- BEGIN ULocale boilerplate --------
+
+ /**
+ * {@icu} Returns the locale that was used to create this object, or null.
+ * This may may differ from the locale requested at the time of
+ * this object's creation. For example, if an object is created
+ * for locale en_US_CALIFORNIA , the actual data may be
+ * drawn from en (the actual locale), and
+ * en_US may be the most specific locale that exists (the
+ * valid locale).
+ *
+ * Note: This method will be implemented in ICU 3.0; ICU 2.8
+ * contains a partial preview implementation. The * actual
+ * locale is returned correctly, but the valid locale is
+ * not, in most cases.
+ * @param type type of information requested, either {@link
+ * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
+ * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
+ * @return the information specified by type , or null if
+ * this object was not constructed from locale data.
+ * @see com.ibm.icu.util.ULocale
+ * @see com.ibm.icu.util.ULocale#VALID_LOCALE
+ * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
+ * @draft ICU 2.8 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final ULocale getLocale(ULocale.Type type) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ // -------- END ULocale boilerplate --------
+
+
+ private static int getJDKField(int icuField) {
+ switch (icuField) {
+ case ERA:
+ return java.util.Calendar.ERA;
+ case YEAR:
+ return java.util.Calendar.YEAR;
+ case MONTH:
+ return java.util.Calendar.MONTH;
+ case WEEK_OF_YEAR:
+ return java.util.Calendar.WEEK_OF_YEAR;
+ case WEEK_OF_MONTH:
+ return java.util.Calendar.WEEK_OF_MONTH;
+ case DATE:
+ return java.util.Calendar.DATE;
+// case DAY_OF_MONTH:
+// return java.util.Calendar.DAY_OF_MONTH;
+ case DAY_OF_YEAR:
+ return java.util.Calendar.DAY_OF_YEAR;
+ case DAY_OF_WEEK:
+ return java.util.Calendar.DAY_OF_WEEK;
+ case DAY_OF_WEEK_IN_MONTH:
+ return java.util.Calendar.DAY_OF_WEEK_IN_MONTH;
+ case AM_PM:
+ return java.util.Calendar.AM_PM;
+ case HOUR:
+ return java.util.Calendar.HOUR;
+ case HOUR_OF_DAY:
+ return java.util.Calendar.HOUR_OF_DAY;
+ case MINUTE:
+ return java.util.Calendar.MINUTE;
+ case SECOND:
+ return java.util.Calendar.SECOND;
+ case MILLISECOND:
+ return java.util.Calendar.MILLISECOND;
+ case ZONE_OFFSET:
+ return java.util.Calendar.ZONE_OFFSET;
+ case DST_OFFSET:
+ return java.util.Calendar.DST_OFFSET;
+
+ case YEAR_WOY:
+ case DOW_LOCAL:
+ case EXTENDED_YEAR:
+ case JULIAN_DAY:
+ case MILLISECONDS_IN_DAY:
+ // Unmappable
+ throw new UnsupportedOperationException("Calendar field type not supported by com.ibm.icu.base");
+ default:
+ // Illegal
+ throw new ArrayIndexOutOfBoundsException("Specified calendar field is out of range");
+ }
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/Currency.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/Currency.java
new file mode 100644
index 00000000000..d6350290174
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/Currency.java
@@ -0,0 +1,420 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.util;
+
+import java.io.Serializable;
+import java.text.ParsePosition;
+import java.util.Date;
+import java.util.Locale;
+
+/**
+ * A class encapsulating a currency, as defined by ISO 4217. A
+ * Currency object can be created given a Locale or
+ * given an ISO 4217 code. Once created, the Currency object
+ * can return various data necessary to its proper display:
+ *
+ *
A display symbol, for a specific locale
+ * The number of fraction digits to display
+ * A rounding increment
+ *
+ *
+ * The DecimalFormat class uses these data to display
+ * currencies.
+ *
+ * Note: This class deliberately resembles
+ * java.util.Currency but it has a completely independent
+ * implementation, and adds features not present in the JDK.
+ * @author Alan Liu
+ * @stable ICU 2.2
+ */
+public class Currency implements Serializable {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * @internal
+ */
+ public final java.util.Currency currency;
+
+ /**
+ * @internal
+ * @param delegate the NumberFormat to which to delegate
+ */
+ public Currency(java.util.Currency delegate) {
+ this.currency = delegate;
+ }
+
+ /**
+ * Selector for getName() indicating a symbolic name for a
+ * currency, such as "$" for USD.
+ * @stable ICU 2.6
+ */
+ public static final int SYMBOL_NAME = 0;
+
+ /**
+ * Selector for ucurr_getName indicating the long name for a
+ * currency, such as "US Dollar" for USD.
+ * @stable ICU 2.6
+ */
+ public static final int LONG_NAME = 1;
+
+ /**
+ * Selector for getName() indicating the plural long name for a
+ * currency, such as "US dollar" for USD in "1 US dollar",
+ * and "US dollars" for USD in "2 US dollars".
+ * @stable ICU 4.2
+ */
+ public static final int PLURAL_LONG_NAME = 2;
+
+ /**
+ * Returns a currency object for the default currency in the given
+ * locale.
+ * @param locale the locale
+ * @return the currency object for this locale
+ * @stable ICU 2.2
+ */
+ public static Currency getInstance(Locale locale) {
+ return new Currency(java.util.Currency.getInstance(locale));
+ }
+
+ /**
+ * Returns a currency object for the default currency in the given
+ * locale.
+ * @stable ICU 3.2
+ */
+ public static Currency getInstance(ULocale locale) {
+ return new Currency(java.util.Currency.getInstance(locale.toLocale()));
+ }
+
+ /**
+ * Returns an array of Strings which contain the currency
+ * identifiers that are valid for the given locale on the
+ * given date. If there are no such identifiers, returns null.
+ * Returned identifiers are in preference order.
+ * @param loc the locale for which to retrieve currency codes.
+ * @param d the date for which to retrieve currency codes for the given locale.
+ * @return The array of ISO currency codes.
+ * @stable ICU 4.0
+ */
+ public static String[] getAvailableCurrencyCodes(ULocale loc, Date d) {
+ throw new UnsupportedOperationException("Method not supproted by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns a currency object given an ISO 4217 3-letter code.
+ * @param theISOCode the iso code
+ * @return the currency for this iso code
+ * @throws NullPointerException if theISOCode
is null.
+ * @throws IllegalArgumentException if theISOCode
is not a
+ * 3-letter alpha code.
+ * @stable ICU 2.2
+ */
+ public static Currency getInstance(String theISOCode) {
+ return new Currency(java.util.Currency.getInstance(theISOCode));
+ }
+
+ /**
+ * Registers a new currency for the provided locale. The returned object
+ * is a key that can be used to unregister this currency object.
+ * @param currency the currency to register
+ * @param locale the ulocale under which to register the currency
+ * @return a registry key that can be used to unregister this currency
+ * @see #unregister
+ * @stable ICU 3.2
+ */
+ public static Object registerInstance(Currency currency, ULocale locale) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Unregister the currency associated with this key (obtained from
+ * registerInstance).
+ * @param registryKey the registry key returned from registerInstance
+ * @see #registerInstance
+ * @stable ICU 2.6
+ */
+ public static boolean unregister(Object registryKey) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Return an array of the locales for which a currency
+ * is defined.
+ * @return an array of the available locales
+ * @stable ICU 2.2
+ */
+ public static Locale[] getAvailableLocales() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Return an array of the ulocales for which a currency
+ * is defined.
+ * @return an array of the available ulocales
+ * @stable ICU 3.2
+ */
+ public static ULocale[] getAvailableULocales() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Given a key and a locale, returns an array of values for the key for which data
+ * exists. If commonlyUsed is true, these are the values that typically are used
+ * with this locale, otherwise these are all values for which data exists.
+ * This is a common service API.
+ *
+ * The only supported key is "currency", other values return an empty array.
+ *
+ * Currency information is based on the region of the locale. If the locale does not
+ * indicate a region, {@link ULocale#addLikelySubtags(ULocale)} is used to infer a region,
+ * except for the 'und' locale.
+ *
+ * If commonlyUsed is true, only the currencies known to be in use as of the current date
+ * are returned. When there are more than one, these are returned in preference order
+ * (typically, this occurs when a country is transitioning to a new currency, and the
+ * newer currency is preferred), see
+ * Unicode TR#35 Sec. C1 .
+ * If commonlyUsed is false, all currencies ever used in any locale are returned, in no
+ * particular order.
+ *
+ * @param key key whose values to look up. the only recognized key is "currency"
+ * @param locale the locale
+ * @param commonlyUsed if true, return only values that are currently used in the locale.
+ * Otherwise returns all values.
+ * @return an array of values for the given key and the locale. If there is no data, the
+ * array will be empty.
+ * @stable ICU 4.2
+ */
+ public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
+ boolean commonlyUsed) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Return a hashcode for this currency.
+ * @stable ICU 2.2
+ */
+ public int hashCode() {
+ return currency.hashCode();
+ }
+
+ /**
+ * Return true if rhs is a Currency instance,
+ * is non-null, and has the same currency code.
+ * @stable ICU 2.2
+ */
+ public boolean equals(Object rhs) {
+ try {
+ return currency.equals(((Currency)rhs).currency);
+ }
+ catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Returns the ISO 4217 3-letter code for this currency object.
+ * @stable ICU 2.2
+ */
+ public String getCurrencyCode() {
+ return currency.getCurrencyCode();
+ }
+
+ /**
+ * Convenience and compatibility override of getName that
+ * requests the symbol name.
+ * @see #getName
+ * @stable ICU 3.4
+ */
+ public String getSymbol() {
+ return currency.getSymbol();
+ }
+
+ /**
+ * Convenience and compatibility override of getName that
+ * requests the symbol name.
+ * @param loc the Locale for the symbol
+ * @see #getName
+ * @stable ICU 3.4
+ */
+ public String getSymbol(Locale loc) {
+ return currency.getSymbol(loc);
+ }
+
+ /**
+ * Convenience and compatibility override of getName that
+ * requests the symbol name.
+ * @param uloc the ULocale for the symbol
+ * @see #getName
+ * @stable ICU 3.4
+ */
+ public String getSymbol(ULocale uloc) {
+ return currency.getSymbol(uloc.toLocale());
+ }
+
+ /**
+ * Returns the display name for the given currency in the
+ * given locale.
+ * This is a convenient method for
+ * getName(ULocale, int, boolean[]);
+ * @stable ICU 3.2
+ */
+ public String getName(Locale locale,
+ int nameStyle,
+ boolean[] isChoiceFormat) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns the display name for the given currency in the
+ * given locale. For example, the display name for the USD
+ * currency object in the en_US locale is "$".
+ * @param locale locale in which to display currency
+ * @param nameStyle selector for which kind of name to return.
+ * The nameStyle should be either SYMBOL_NAME or
+ * LONG_NAME. Otherwise, throw IllegalArgumentException.
+ * @param isChoiceFormat fill-in; isChoiceFormat[0] is set to true
+ * if the returned value is a ChoiceFormat pattern; otherwise it
+ * is set to false
+ * @return display string for this currency. If the resource data
+ * contains no entry for this currency, then the ISO 4217 code is
+ * returned. If isChoiceFormat[0] is true, then the result is a
+ * ChoiceFormat pattern. Otherwise it is a static string. Note:
+ * as of ICU 4.4, choice formats are not used, and the value returned
+ * in isChoiceFormat is always false.
+ *
+ * @throws IllegalArgumentException if the nameStyle is not SYMBOL_NAME
+ * or LONG_NAME.
+ * @see #getName(ULocale, int, String, boolean[])
+ * @stable ICU 3.2
+ */
+ public String getName(ULocale locale, int nameStyle, boolean[] isChoiceFormat) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns the display name for the given currency in the given locale.
+ * This is a convenience overload of getName(ULocale, int, String, boolean[]);
+ * @stable ICU 4.2
+ */
+ public String getName(Locale locale, int nameStyle, String pluralCount,
+ boolean[] isChoiceFormat) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns the display name for the given currency in the
+ * given locale. For example, the SYMBOL_NAME for the USD
+ * currency object in the en_US locale is "$".
+ * The PLURAL_LONG_NAME for the USD currency object when the currency
+ * amount is plural is "US dollars", such as in "3.00 US dollars";
+ * while the PLURAL_LONG_NAME for the USD currency object when the currency
+ * amount is singular is "US dollar", such as in "1.00 US dollar".
+ * @param locale locale in which to display currency
+ * @param nameStyle selector for which kind of name to return
+ * @param pluralCount plural count string for this locale
+ * @param isChoiceFormat fill-in; isChoiceFormat[0] is set to true
+ * if the returned value is a ChoiceFormat pattern; otherwise it
+ * is set to false
+ * @return display string for this currency. If the resource data
+ * contains no entry for this currency, then the ISO 4217 code is
+ * returned. If isChoiceFormat[0] is true, then the result is a
+ * ChoiceFormat pattern. Otherwise it is a static string. Note:
+ * as of ICU 4.4, choice formats are not used, and the value returned
+ * in isChoiceFormat is always false.
+ * @throws IllegalArgumentException if the nameStyle is not SYMBOL_NAME,
+ * LONG_NAME, or PLURAL_LONG_NAME.
+ * @stable ICU 4.2
+ */
+ public String getName(ULocale locale, int nameStyle, String pluralCount,
+ boolean[] isChoiceFormat) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Attempt to parse the given string as a currency, either as a
+ * display name in the given locale, or as a 3-letter ISO 4217
+ * code. If multiple display names match, then the longest one is
+ * selected. If both a display name and a 3-letter ISO code
+ * match, then the display name is preferred, unless it's length
+ * is less than 3.
+ *
+ * @param locale the locale of the display names to match
+ * @param text the text to parse
+ * @param type parse against currency type: LONG_NAME only or not
+ * @param pos input-output position; on input, the position within
+ * text to match; must have 0 <= pos.getIndex() < text.length();
+ * on output, the position after the last matched character. If
+ * the parse fails, the position in unchanged upon output.
+ * @return the ISO 4217 code, as a string, of the best match, or
+ * null if there is no match
+ *
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public static String parse(ULocale locale, String text, int type, ParsePosition pos) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns the number of the number of fraction digits that should
+ * be displayed for this currency.
+ * @return a non-negative number of fraction digits to be
+ * displayed
+ * @stable ICU 2.2
+ */
+ public int getDefaultFractionDigits() {
+ return currency.getDefaultFractionDigits();
+ }
+
+ /**
+ * Returns the rounding increment for this currency, or 0.0 if no
+ * rounding is done by this currency.
+ * @return the non-negative rounding increment, or 0.0 if none
+ * @stable ICU 2.2
+ */
+ public double getRoundingIncrement() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns the ISO 4217 code for this currency.
+ * @stable ICU 2.2
+ */
+ public String toString() {
+ return currency.toString();
+ }
+
+ /**
+ * Return the locale that was used to create this object, or null.
+ * This may may differ from the locale requested at the time of
+ * this object's creation. For example, if an object is created
+ * for locale en_US_CALIFORNIA , the actual data may be
+ * drawn from en (the actual locale), and
+ * en_US may be the most specific locale that exists (the
+ * valid locale).
+ *
+ *
Note: This method will be obsoleted. The implementation is
+ * no longer locale-specific and so there is no longer a valid or
+ * actual locale associated with the Currency object. Until
+ * it is removed, this method will return the root locale.
+ * @param type type of information requested, either {@link
+ * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
+ * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
+ * @return the information specified by type , or null if
+ * this object was not constructed from locale data.
+ * @see com.ibm.icu.util.ULocale
+ * @see com.ibm.icu.util.ULocale#VALID_LOCALE
+ * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
+ * @obsolete ICU 3.2 to be removed
+ * @deprecated This API is obsolete.
+ */
+ public final ULocale getLocale(ULocale.Type type) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+}
+
+//eof
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/CurrencyAmount.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/CurrencyAmount.java
new file mode 100644
index 00000000000..229a400ebbd
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/CurrencyAmount.java
@@ -0,0 +1,14 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.util;
+
+/*
+ * Empty stub
+ */
+public class CurrencyAmount {
+ private CurrencyAmount() {}
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/TimeZone.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/TimeZone.java
new file mode 100644
index 00000000000..6f5d08c20c8
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/TimeZone.java
@@ -0,0 +1,710 @@
+/*
+ * @(#)TimeZone.java 1.51 00/01/19
+ *
+ * Copyright (C) 1996-2011, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ */
+
+package com.ibm.icu.util;
+
+import java.io.Serializable;
+import java.util.Date;
+import java.util.Locale;
+import java.util.MissingResourceException;
+
+/**
+ * {@icuenhanced java.util.TimeZone}.{@icu _usage_}
+ *
+ *
TimeZone
represents a time zone offset, and also computes daylight
+ * savings.
+ *
+ *
Typically, you get a TimeZone
using {@link #getDefault()}
+ * which creates a TimeZone
based on the time zone where the program
+ * is running. For example, for a program running in Japan, getDefault
+ * creates a TimeZone
object based on Japanese Standard Time.
+ *
+ *
You can also get a TimeZone
using {@link #getTimeZone(String)}
+ * along with a time zone ID. For instance, the time zone ID for the
+ * U.S. Pacific Time zone is "America/Los_Angeles". So, you can get a
+ * U.S. Pacific Time TimeZone
object with:
+ *
+ *
+ *
+ * TimeZone tz = TimeZone.getTimeZone("America/Los_Angeles");
+ *
+ *
+ * You can use the {@link #getAvailableIDs()} method to iterate through
+ * all the supported time zone IDs. You can then choose a
+ * supported ID to get a TimeZone
.
+ * If the time zone you want is not represented by one of the
+ * supported IDs, then you can create a custom time zone ID with
+ * the following syntax:
+ *
+ *
+ *
+ * GMT[+|-]hh[[:]mm]
+ *
+ *
+ *
+ * For example, you might specify GMT+14:00 as a custom
+ * time zone ID. The TimeZone
that is returned
+ * when you specify a custom time zone ID does not include
+ * daylight savings time.
+ *
+ * For compatibility with JDK 1.1.x, some other three-letter time zone IDs
+ * (such as "PST", "CTT", "AST") are also supported. However, their
+ * use is deprecated because the same abbreviation is often used
+ * for multiple time zones (for example, "CST" could be U.S. "Central Standard
+ * Time" and "China Standard Time"), and the Java platform can then only
+ * recognize one of them.
+ *
+ *
Note: Starting from ICU4J 4.0, you can optionally choose
+ * JDK TimeZone
as the time zone implementation. The TimeZone factory
+ * method getTimeZone
creates an instance of ICU's own TimeZone
+ * subclass by default. If you want to use the JDK implementation always, you can
+ * set the default time zone implementation type by the new method
+ * setDefaultTimeZoneType
. Alternatively, you can change the initial
+ * default implementation type by setting a property below.
+ *
+ *
+ *
+ * #
+ * # The default TimeZone implementation type used by the ICU TimeZone
+ * # factory method. [ ICU | JDK ]
+ * #
+ * com.ibm.icu.util.TimeZone.DefaultTimeZoneType = ICU
+ *
+ *
+ *
+ * This property is included in ICUConfig.properties in com.ibm.icu package. When the
+ * TimeZone
class is loaded, the initialization code checks if the property
+ * com.ibm.icu.util.TimeZone.DefaultTimeZoneType=xxx
is defined by the system
+ * properties. If not available, then it loads ICUConfig.properties to get the default
+ * time zone implementation type. The property setting is only used for the initial
+ * default value and you can change the default type by calling
+ * setDefaultTimeZoneType
at runtime.
+ *
+ * @see Calendar
+ * @see GregorianCalendar
+ * @see SimpleTimeZone
+ * @author Mark Davis, David Goldsmith, Chen-Lieh Huang, Alan Liu
+ * @stable ICU 2.0
+ */
+public class TimeZone implements Serializable, Cloneable {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * @internal
+ */
+ public final java.util.TimeZone timeZone;
+
+ /**
+ * @internal
+ * @param delegate the TimeZone to which to delegate
+ */
+ public TimeZone(java.util.TimeZone delegate) {
+ this.timeZone = delegate;
+ }
+
+// /**
+// * {@icu} A logger for TimeZone. Will be null if logging is not on by way of system
+// * property: "icu4j.debug.logging"
+// * @draft ICU 4.4
+// * @provisional This API might change or be removed in a future release.
+// */
+// public static ICULogger TimeZoneLogger = ICULogger.getICULogger(TimeZone.class.getName());
+
+ /**
+ * Default constructor. (For invocation by subclass constructors,
+ * typically implicit.)
+ * @stable ICU 2.8
+ */
+ public TimeZone() {
+ this.timeZone = java.util.TimeZone.getDefault();
+ }
+
+ /**
+ * {@icu} A time zone implementation type indicating ICU's own TimeZone used by
+ * getTimeZone
, setDefaultTimeZoneType
+ * and getDefaultTimeZoneType
.
+ * @stable ICU 4.0
+ */
+ public static final int TIMEZONE_ICU = 0;
+ /**
+ * {@icu} A time zone implementation type indicating JDK TimeZone used by
+ * getTimeZone
, setDefaultTimeZoneType
+ * and getDefaultTimeZoneType
.
+ * @stable ICU 4.0
+ */
+ public static final int TIMEZONE_JDK = 1;
+
+ /**
+ * A style specifier for getDisplayName()
indicating
+ * a short name, such as "PST."
+ * @see #LONG
+ * @stable ICU 2.0
+ */
+ public static final int SHORT = 0;
+
+ /**
+ * A style specifier for getDisplayName()
indicating
+ * a long name, such as "Pacific Standard Time."
+ * @see #SHORT
+ * @stable ICU 2.0
+ */
+ public static final int LONG = 1;
+
+ /**
+ * {@icu} A style specifier for getDisplayName()
indicating
+ * a short generic name, such as "PT."
+ * @see #LONG_GENERIC
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int SHORT_GENERIC = 2;
+
+ /**
+ * {@icu} A style specifier for getDisplayName()
indicating
+ * a long generic name, such as "Pacific Time."
+ * @see #SHORT_GENERIC
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int LONG_GENERIC = 3;
+
+ /**
+ * {@icu} A style specifier for getDisplayName()
indicating
+ * a short name derived from the timezone's offset, such as "-0800."
+ * @see #LONG_GMT
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int SHORT_GMT = 4;
+
+ /**
+ * {@icu} A style specifier for getDisplayName()
indicating
+ * a long name derived from the timezone's offset, such as "GMT-08:00."
+ * @see #SHORT_GMT
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int LONG_GMT = 5;
+
+ /**
+ * {@icu} A style specifier for getDisplayName()
indicating
+ * a short name derived from the timezone's short standard or daylight
+ * timezone name ignoring commonlyUsed, such as "PDT."
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+
+ public static final int SHORT_COMMONLY_USED = 6;
+
+ /**
+ * {@icu} A style specifier for getDisplayName()
indicating
+ * a long name derived from the timezone's fallback name, such as
+ * "United States (Los Angeles)."
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int GENERIC_LOCATION = 7;
+
+ /**
+ * Gets the time zone offset, for current date, modified in case of
+ * daylight savings. This is the offset to add *to* UTC to get local time.
+ * @param era the era of the given date.
+ * @param year the year in the given date.
+ * @param month the month in the given date.
+ * Month is 0-based. e.g., 0 for January.
+ * @param day the day-in-month of the given date.
+ * @param dayOfWeek the day-of-week of the given date.
+ * @param milliseconds the millis in day in standard local time.
+ * @return the offset to add *to* GMT to get local time.
+ * @stable ICU 2.0
+ */
+ public int getOffset(int era, int year, int month, int day,
+ int dayOfWeek, int milliseconds) {
+ return timeZone.getOffset(era, year, month, day, dayOfWeek, milliseconds);
+ }
+
+
+ /**
+ * Returns the offset of this time zone from UTC at the specified
+ * date. If Daylight Saving Time is in effect at the specified
+ * date, the offset value is adjusted with the amount of daylight
+ * saving.
+ *
+ * @param date the date represented in milliseconds since January 1, 1970 00:00:00 GMT
+ * @return the amount of time in milliseconds to add to UTC to get local time.
+ *
+ * @see Calendar#ZONE_OFFSET
+ * @see Calendar#DST_OFFSET
+ * @see #getOffset(long, boolean, int[])
+ * @stable ICU 2.8
+ */
+ public int getOffset(long date) {
+ return timeZone.getOffset(date);
+ }
+
+ /**
+ * Returns the time zone raw and GMT offset for the given moment
+ * in time. Upon return, local-millis = GMT-millis + rawOffset +
+ * dstOffset. All computations are performed in the proleptic
+ * Gregorian calendar. The default implementation in the TimeZone
+ * class delegates to the 8-argument getOffset().
+ *
+ * @param date moment in time for which to return offsets, in
+ * units of milliseconds from January 1, 1970 0:00 GMT, either GMT
+ * time or local wall time, depending on `local'.
+ * @param local if true, `date' is local wall time; otherwise it
+ * is in GMT time.
+ * @param offsets output parameter to receive the raw offset, that
+ * is, the offset not including DST adjustments, in offsets[0],
+ * and the DST offset, that is, the offset to be added to
+ * `rawOffset' to obtain the total offset between local and GMT
+ * time, in offsets[1]. If DST is not in effect, the DST offset is
+ * zero; otherwise it is a positive value, typically one hour.
+ *
+ * @stable ICU 2.8
+ */
+ public void getOffset(long date, boolean local, int[] offsets) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the base time zone offset to GMT.
+ * This is the offset to add *to* UTC to get local time.
+ * @param offsetMillis the given base time zone offset to GMT.
+ * @stable ICU 2.0
+ */
+ public void setRawOffset(int offsetMillis) {
+ timeZone.setRawOffset(offsetMillis);
+ }
+
+ /**
+ * Gets unmodified offset, NOT modified in case of daylight savings.
+ * This is the offset to add *to* UTC to get local time.
+ * @return the unmodified offset to add *to* UTC to get local time.
+ * @stable ICU 2.0
+ */
+ public int getRawOffset() {
+ return timeZone.getRawOffset();
+ }
+
+ /**
+ * Gets the ID of this time zone.
+ * @return the ID of this time zone.
+ * @stable ICU 2.0
+ */
+ public String getID() {
+ return timeZone.getID();
+ }
+
+ /**
+ * Sets the time zone ID. This does not change any other data in
+ * the time zone object.
+ * @param ID the new time zone ID.
+ * @stable ICU 2.0
+ */
+ public void setID(String ID) {
+ timeZone.setID(ID);
+ }
+
+ /**
+ * Returns a name of this time zone suitable for presentation to the user
+ * in the default locale.
+ * This method returns the long generic name.
+ * If the display name is not available for the locale,
+ * a fallback based on the country, city, or time zone id will be used.
+ * @return the human-readable name of this time zone in the default locale.
+ * @stable ICU 2.0
+ */
+ public final String getDisplayName() {
+ return timeZone.getDisplayName();
+ }
+
+ /**
+ * Returns a name of this time zone suitable for presentation to the user
+ * in the specified locale.
+ * This method returns the long generic name.
+ * If the display name is not available for the locale,
+ * a fallback based on the country, city, or time zone id will be used.
+ * @param locale the locale in which to supply the display name.
+ * @return the human-readable name of this time zone in the given locale
+ * or in the default locale if the given locale is not recognized.
+ * @stable ICU 2.0
+ */
+ public final String getDisplayName(Locale locale) {
+ return timeZone.getDisplayName(locale);
+ }
+
+ /**
+ * Returns a name of this time zone suitable for presentation to the user
+ * in the specified locale.
+ * This method returns the long name, not including daylight savings.
+ * If the display name is not available for the locale,
+ * a fallback based on the country, city, or time zone id will be used.
+ * @param locale the ulocale in which to supply the display name.
+ * @return the human-readable name of this time zone in the given locale
+ * or in the default ulocale if the given ulocale is not recognized.
+ * @stable ICU 3.2
+ */
+ public final String getDisplayName(ULocale locale) {
+ return timeZone.getDisplayName(locale.toLocale());
+ }
+
+ /**
+ * Returns a name of this time zone suitable for presentation to the user
+ * in the default locale.
+ * If the display name is not available for the locale,
+ * then this method returns a string in the format
+ * GMT[+-]hh:mm
.
+ * @param daylight if true, return the daylight savings name.
+ * @param style the output style of the display name. Valid styles are
+ * SHORT
, LONG
, SHORT_GENERIC
,
+ * LONG_GENERIC
, SHORT_GMT
, LONG_GMT
,
+ * SHORT_COMMONLY_USED
or GENERIC_LOCATION
.
+ * @return the human-readable name of this time zone in the default locale.
+ * @stable ICU 2.0
+ */
+ public final String getDisplayName(boolean daylight, int style) {
+ return getDisplayName(daylight, style, ULocale.getDefault());
+ }
+
+ /**
+ * Returns a name of this time zone suitable for presentation to the user
+ * in the specified locale.
+ * If the display name is not available for the locale,
+ * then this method returns a string in the format
+ * GMT[+-]hh:mm
.
+ * @param daylight if true, return the daylight savings name.
+ * @param style the output style of the display name. Valid styles are
+ * SHORT
, LONG
, SHORT_GENERIC
,
+ * LONG_GENERIC
, SHORT_GMT
, LONG_GMT
,
+ * SHORT_COMMONLY_USED
or GENERIC_LOCATION
.
+ * @param locale the locale in which to supply the display name.
+ * @return the human-readable name of this time zone in the given locale
+ * or in the default locale if the given locale is not recognized.
+ * @exception IllegalArgumentException style is invalid.
+ * @stable ICU 2.0
+ */
+ public String getDisplayName(boolean daylight, int style, Locale locale) {
+ return getDisplayName(daylight, style, ULocale.forLocale(locale));
+ }
+
+ /**
+ * Returns a name of this time zone suitable for presentation to the user
+ * in the specified locale.
+ * If the display name is not available for the locale,
+ * then this method returns a string in the format
+ * GMT[+-]hh:mm
.
+ * @param daylight if true, return the daylight savings name.
+ * @param style the output style of the display name. Valid styles are
+ * SHORT
, LONG
, SHORT_GENERIC
,
+ * LONG_GENERIC
, SHORT_GMT
, LONG_GMT
,
+ * SHORT_COMMONLY_USED
or GENERIC_LOCATION
.
+ * @param locale the locale in which to supply the display name.
+ * @return the human-readable name of this time zone in the given locale
+ * or in the default locale if the given locale is not recognized.
+ * @exception IllegalArgumentException style is invalid.
+ * @stable ICU 3.2
+ */
+ public String getDisplayName(boolean daylight, int style, ULocale locale) {
+ if (style == SHORT) {
+ return timeZone.getDisplayName(daylight, java.util.TimeZone.SHORT, locale.toLocale());
+ } else if (style == LONG) {
+ return timeZone.getDisplayName(daylight, java.util.TimeZone.LONG, locale.toLocale());
+ } else {
+ throw new UnsupportedOperationException("Specified time zone format style is not supported by com.ibm.icu.base");
+ }
+ }
+
+ /**
+ * Returns the amount of time to be added to local standard time
+ * to get local wall clock time.
+ *
+ * The default implementation always returns 3600000 milliseconds
+ * (i.e., one hour) if this time zone observes Daylight Saving
+ * Time. Otherwise, 0 (zero) is returned.
+ *
+ * If an underlying TimeZone implementation subclass supports
+ * historical Daylight Saving Time changes, this method returns
+ * the known latest daylight saving value.
+ *
+ * @return the amount of saving time in milliseconds
+ * @stable ICU 2.8
+ */
+ public int getDSTSavings() {
+ return timeZone.getDSTSavings();
+ }
+
+ /**
+ * Queries if this time zone uses daylight savings time.
+ * @return true if this time zone uses daylight savings time,
+ * false, otherwise.
+ * @stable ICU 2.0
+ */
+ public boolean useDaylightTime() {
+ return timeZone.useDaylightTime();
+ }
+
+ /**
+ * Queries if the given date is in daylight savings time in
+ * this time zone.
+ * @param date the given Date.
+ * @return true if the given date is in daylight savings time,
+ * false, otherwise.
+ * @stable ICU 2.0
+ */
+ public boolean inDaylightTime(Date date) {
+ return timeZone.inDaylightTime(date);
+ }
+
+ /**
+ * Gets the TimeZone
for the given ID.
+ *
+ * @param ID the ID for a TimeZone
, such as "America/Los_Angeles",
+ * or a custom ID such as "GMT-8:00". Note that the support of abbreviations,
+ * such as "PST", is for JDK 1.1.x compatibility only and full names should be used.
+ *
+ * @return the specified TimeZone
, or the GMT zone if the given ID
+ * cannot be understood.
+ * @stable ICU 2.0
+ */
+ public static synchronized TimeZone getTimeZone(String ID) {
+ return new TimeZone(java.util.TimeZone.getTimeZone(ID));
+ }
+
+ /**
+ * Gets the TimeZone
for the given ID and the timezone type.
+ * @param ID the ID for a TimeZone
, such as "America/Los_Angeles", or a
+ * custom ID such as "GMT-8:00". Note that the support of abbreviations, such as
+ * "PST", is for JDK 1.1.x compatibility only and full names should be used.
+ * @param type Time zone type, either TIMEZONE_ICU
or
+ * TIMEZONE_JDK
.
+ * @return the specified TimeZone
, or the GMT zone if the given ID
+ * cannot be understood.
+ * @stable ICU 4.0
+ */
+ public static synchronized TimeZone getTimeZone(String ID, int type) {
+ if (type == TIMEZONE_JDK) {
+ return new TimeZone(java.util.TimeZone.getTimeZone(ID));
+ }
+ throw new UnsupportedOperationException("TIMEZONE_ICU not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the default time zone type used by getTimeZone
.
+ * @param type time zone type, either TIMEZONE_ICU
or
+ * TIMEZONE_JDK
.
+ * @stable ICU 4.0
+ */
+ public static synchronized void setDefaultTimeZoneType(int type) {
+ if (type != TIMEZONE_JDK) {
+ throw new UnsupportedOperationException("TimeZone type other than TIMEZONE_JDK is not supported by com.ibm.icu.base");
+ }
+ }
+
+ /**
+ * {@icu} Returns the default time zone type currently used.
+ * @return The default time zone type, either TIMEZONE_ICU
or
+ * TIMEZONE_JDK
.
+ * @stable ICU 4.0
+ */
+ public static int getDefaultTimeZoneType() {
+ return TIMEZONE_JDK;
+ }
+
+ /**
+ * Return a new String array containing all system TimeZone IDs
+ * with the given raw offset from GMT. These IDs may be passed to
+ * get()
to construct the corresponding TimeZone
+ * object.
+ * @param rawOffset the offset in milliseconds from GMT
+ * @return an array of IDs for system TimeZones with the given
+ * raw offset. If there are none, return a zero-length array.
+ * @stable ICU 2.0
+ */
+ public static String[] getAvailableIDs(int rawOffset) {
+ return java.util.TimeZone.getAvailableIDs(rawOffset);
+
+ }
+
+
+ /**
+ * Return a new String array containing all system TimeZone IDs
+ * associated with the given country. These IDs may be passed to
+ * get()
to construct the corresponding TimeZone
+ * object.
+ * @param country a two-letter ISO 3166 country code, or null
+ * to return zones not associated with any country
+ * @return an array of IDs for system TimeZones in the given
+ * country. If there are none, return a zero-length array.
+ * @stable ICU 2.0
+ */
+ public static String[] getAvailableIDs(String country) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Return a new String array containing all system TimeZone IDs.
+ * These IDs (and only these IDs) may be passed to
+ * get()
to construct the corresponding TimeZone
+ * object.
+ * @return an array of all system TimeZone IDs
+ * @stable ICU 2.0
+ */
+ public static String[] getAvailableIDs() {
+ return java.util.TimeZone.getAvailableIDs();
+ }
+
+ /**
+ * {@icu} Returns the number of IDs in the equivalency group that
+ * includes the given ID. An equivalency group contains zones
+ * that have the same GMT offset and rules.
+ *
+ *
The returned count includes the given ID; it is always >= 1
+ * for valid IDs. The given ID must be a system time zone. If it
+ * is not, returns zero.
+ * @param id a system time zone ID
+ * @return the number of zones in the equivalency group containing
+ * 'id', or zero if 'id' is not a valid system ID
+ * @see #getEquivalentID
+ * @stable ICU 2.0
+ */
+ public static int countEquivalentIDs(String id) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns an ID in the equivalency group that
+ * includes the given ID. An equivalency group contains zones
+ * that have the same GMT offset and rules.
+ *
+ *
The given index must be in the range 0..n-1, where n is the
+ * value returned by countEquivalentIDs(id)
. For
+ * some value of 'index', the returned value will be equal to the
+ * given id. If the given id is not a valid system time zone, or
+ * if 'index' is out of range, then returns an empty string.
+ * @param id a system time zone ID
+ * @param index a value from 0 to n-1, where n is the value
+ * returned by countEquivalentIDs(id)
+ * @return the ID of the index-th zone in the equivalency group
+ * containing 'id', or an empty string if 'id' is not a valid
+ * system ID or 'index' is out of range
+ * @see #countEquivalentIDs
+ * @stable ICU 2.0
+ */
+ public static String getEquivalentID(String id, int index) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Gets the default TimeZone
for this host.
+ * The source of the default TimeZone
+ * may vary with implementation.
+ * @return a default TimeZone
.
+ * @stable ICU 2.0
+ */
+ public static TimeZone getDefault() {
+ return new TimeZone(java.util.TimeZone.getDefault());
+ }
+
+ /**
+ * Sets the TimeZone
that is
+ * returned by the getDefault
method. If zone
+ * is null, reset the default to the value it had originally when the
+ * VM first started.
+ * @param tz the new default time zone
+ * @stable ICU 2.0
+ */
+ public static void setDefault(TimeZone tz) {
+ java.util.TimeZone.setDefault(tz.timeZone);
+ }
+
+ /**
+ * Returns true if this zone has the same rule and offset as another zone.
+ * That is, if this zone differs only in ID, if at all. Returns false
+ * if the other zone is null.
+ * @param other the TimeZone
object to be compared with
+ * @return true if the other zone is not null and is the same as this one,
+ * with the possible exception of the ID
+ * @stable ICU 2.0
+ */
+ public boolean hasSameRules(TimeZone other) {
+ return timeZone.hasSameRules(other.timeZone);
+ }
+
+ /**
+ * Overrides clone.
+ * @stable ICU 2.0
+ */
+ public Object clone() {
+ return new TimeZone((java.util.TimeZone)timeZone.clone());
+ }
+
+ /**
+ * Overrides equals.
+ * @stable ICU 3.6
+ */
+ public boolean equals(Object obj){
+ try {
+ return timeZone.equals(((TimeZone)obj).timeZone);
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Overrides hashCode.
+ * @stable ICU 3.6
+ */
+ public int hashCode(){
+ return timeZone.hashCode();
+ }
+
+ /**
+ * {@icu} Returns the time zone data version currently used by ICU.
+ *
+ * @return the version string, such as "2007f"
+ * @throws MissingResourceException if ICU time zone resource bundle
+ * is missing or the version information is not available.
+ *
+ * @stable ICU 3.8
+ */
+ public static synchronized String getTZDataVersion() {
+ throw new UnsupportedOperationException("Method not supproted by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the canonical system time zone ID or the normalized
+ * custom time zone ID for the given time zone ID.
+ * @param id The input time zone ID to be canonicalized.
+ * @return The canonical system time zone ID or the custom time zone ID
+ * in normalized format for the given time zone ID. When the given time zone ID
+ * is neither a known system time zone ID nor a valid custom time zone ID,
+ * null is returned.
+ * @stable ICU 4.0
+ */
+ public static String getCanonicalID(String id) {
+ throw new UnsupportedOperationException("Method not supproted by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the canonical system time zone ID or the normalized
+ * custom time zone ID for the given time zone ID.
+ * @param id The input time zone ID to be canonicalized.
+ * @param isSystemID When non-null boolean array is specified and
+ * the given ID is a known system time zone ID, true is set to isSystemID[0]
+ * @return The canonical system time zone ID or the custom time zone ID
+ * in normalized format for the given time zone ID. When the given time zone ID
+ * is neither a known system time zone ID nor a valid custom time zone ID,
+ * null is returned.
+ * @stable ICU 4.0
+ */
+ public static String getCanonicalID(String id, boolean[] isSystemID) {
+ throw new UnsupportedOperationException("Method not supproted by com.ibm.icu.base");
+ }
+}
+
+//eof
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java
new file mode 100644
index 00000000000..4d49adf54df
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java
@@ -0,0 +1,2493 @@
+/*
+******************************************************************************
+* Copyright (C) 2003-2011, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+******************************************************************************
+*/
+
+package com.ibm.icu.util;
+
+import java.io.Serializable;
+import java.text.ParseException;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.MissingResourceException;
+import java.util.Set;
+import java.util.TreeMap;
+
+import com.ibm.icu.impl.ICUCache;
+import com.ibm.icu.impl.LocaleIDParser;
+import com.ibm.icu.impl.LocaleIDs;
+import com.ibm.icu.impl.LocaleUtility;
+import com.ibm.icu.impl.SimpleCache;
+import com.ibm.icu.impl.locale.AsciiUtil;
+
+/**
+ * {@icuenhanced java.util.Locale}.{@icu _usage_}
+ *
+ * A class analogous to {@link java.util.Locale} that provides additional
+ * support for ICU protocol. In ICU 3.0 this class is enhanced to support
+ * RFC 3066 language identifiers.
+ *
+ *
Many classes and services in ICU follow a factory idiom, in
+ * which a factory method or object responds to a client request with
+ * an object. The request includes a locale (the requested
+ * locale), and the returned object is constructed using data for that
+ * locale. The system may lack data for the requested locale, in
+ * which case the locale fallback mechanism will be invoked until a
+ * populated locale is found (the valid locale). Furthermore,
+ * even when a populated locale is found (the valid locale),
+ * further fallback may be required to reach a locale containing the
+ * specific data required by the service (the actual locale).
+ *
+ *
ULocale performs 'normalization' and 'canonicalization' of locale ids.
+ * Normalization 'cleans up' ICU locale ids as follows:
+ *
+ * language, script, country, variant, and keywords are properly cased
+ * (lower, title, upper, upper, and lower case respectively)
+ * hyphens used as separators are converted to underscores
+ * three-letter language and country ids are converted to two-letter
+ * equivalents where available
+ * surrounding spaces are removed from keywords and values
+ * if there are multiple keywords, they are put in sorted order
+ *
+ * Canonicalization additionally performs the following:
+ *
+ * POSIX ids are converted to ICU format IDs
+ * 'grandfathered' 3066 ids are converted to ICU standard form
+ * 'PREEURO' and 'EURO' variants are converted to currency keyword form,
+ * with the currency
+ * id appropriate to the country of the locale (for PREEURO) or EUR (for EURO).
+ *
+ * All ULocale constructors automatically normalize the locale id. To handle
+ * POSIX ids, canonicalize
can be called to convert the id
+ * to canonical form, or the canonicalInstance
factory method
+ * can be called.
+ *
+ * This class provides selectors {@link #VALID_LOCALE} and {@link
+ * #ACTUAL_LOCALE} intended for use in methods named
+ * getLocale() . These methods exist in several ICU classes,
+ * including {@link com.ibm.icu.util.Calendar}, {@link
+ * com.ibm.icu.util.Currency}, {@link com.ibm.icu.text.UFormat},
+ * {@link com.ibm.icu.text.BreakIterator},
+ * Collator
,
+ * {@link com.ibm.icu.text.DateFormatSymbols}, and {@link
+ * com.ibm.icu.text.DecimalFormatSymbols} and their subclasses, if
+ * any. Once an object of one of these classes has been created,
+ * getLocale() may be called on it to determine the valid and
+ * actual locale arrived at during the object's construction.
+ *
+ *
Note: The actual locale is returned correctly, but the valid
+ * locale is not, in most cases.
+ *
+ * @see java.util.Locale
+ * @author weiv
+ * @author Alan Liu
+ * @author Ram Viswanadha
+ * @stable ICU 2.8
+ */
+public final class ULocale implements Serializable {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Useful constant for language.
+ * @stable ICU 3.0
+ */
+ public static final ULocale ENGLISH = new ULocale("en", Locale.ENGLISH);
+
+ /**
+ * Useful constant for language.
+ * @stable ICU 3.0
+ */
+ public static final ULocale FRENCH = new ULocale("fr", Locale.FRENCH);
+
+ /**
+ * Useful constant for language.
+ * @stable ICU 3.0
+ */
+ public static final ULocale GERMAN = new ULocale("de", Locale.GERMAN);
+
+ /**
+ * Useful constant for language.
+ * @stable ICU 3.0
+ */
+ public static final ULocale ITALIAN = new ULocale("it", Locale.ITALIAN);
+
+ /**
+ * Useful constant for language.
+ * @stable ICU 3.0
+ */
+ public static final ULocale JAPANESE = new ULocale("ja", Locale.JAPANESE);
+
+ /**
+ * Useful constant for language.
+ * @stable ICU 3.0
+ */
+ public static final ULocale KOREAN = new ULocale("ko", Locale.KOREAN);
+
+ /**
+ * Useful constant for language.
+ * @stable ICU 3.0
+ */
+ public static final ULocale CHINESE = new ULocale("zh", Locale.CHINESE);
+
+ /**
+ * Useful constant for language.
+ * @stable ICU 3.0
+ */
+ public static final ULocale SIMPLIFIED_CHINESE = new ULocale("zh_Hans", Locale.CHINESE);
+
+ /**
+ * Useful constant for language.
+ * @stable ICU 3.0
+ */
+ public static final ULocale TRADITIONAL_CHINESE = new ULocale("zh_Hant", Locale.CHINESE);
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale FRANCE = new ULocale("fr_FR", Locale.FRANCE);
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale GERMANY = new ULocale("de_DE", Locale.GERMANY);
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale ITALY = new ULocale("it_IT", Locale.ITALY);
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale JAPAN = new ULocale("ja_JP", Locale.JAPAN);
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale KOREA = new ULocale("ko_KR", Locale.KOREA);
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale CHINA = new ULocale("zh_Hans_CN", Locale.CHINA);
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale PRC = CHINA;
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale TAIWAN = new ULocale("zh_Hant_TW", Locale.TAIWAN);
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale UK = new ULocale("en_GB", Locale.UK);
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale US = new ULocale("en_US", Locale.US);
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale CANADA = new ULocale("en_CA", Locale.CANADA);
+
+ /**
+ * Useful constant for country/region.
+ * @stable ICU 3.0
+ */
+ public static final ULocale CANADA_FRENCH = new ULocale("fr_CA", Locale.CANADA_FRENCH);
+
+ /**
+ * Handy constant.
+ */
+ private static final String EMPTY_STRING = "";
+
+ // Used in both ULocale and LocaleIDParser, so moved up here.
+ private static final char UNDERSCORE = '_';
+
+ // default empty locale
+ private static final Locale EMPTY_LOCALE = new Locale("", "");
+
+ /**
+ * The root ULocale.
+ * @stable ICU 2.8
+ */
+ public static final ULocale ROOT = new ULocale("", EMPTY_LOCALE);
+
+ private static final SimpleCache CACHE = new SimpleCache();
+
+ /**
+ * Cache the locale.
+ */
+ private transient volatile Locale locale;
+
+ /**
+ * The raw localeID that we were passed in.
+ */
+ private String localeID;
+
+ private static String[][] CANONICALIZE_MAP;
+ private static String[][] variantsToKeywords;
+
+ private static void initCANONICALIZE_MAP() {
+ if (CANONICALIZE_MAP == null) {
+ /**
+ * This table lists pairs of locale ids for canonicalization. The
+ * The 1st item is the normalized id. The 2nd item is the
+ * canonicalized id. The 3rd is the keyword. The 4th is the keyword value.
+ */
+ String[][] tempCANONICALIZE_MAP = {
+// { EMPTY_STRING, "en_US_POSIX", null, null }, /* .NET name */
+ { "C", "en_US_POSIX", null, null }, /* POSIX name */
+ { "art_LOJBAN", "jbo", null, null }, /* registered name */
+ { "az_AZ_CYRL", "az_Cyrl_AZ", null, null }, /* .NET name */
+ { "az_AZ_LATN", "az_Latn_AZ", null, null }, /* .NET name */
+ { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
+ { "cel_GAULISH", "cel__GAULISH", null, null }, /* registered name */
+ { "de_1901", "de__1901", null, null }, /* registered name */
+ { "de_1906", "de__1906", null, null }, /* registered name */
+ { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
+ { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
+ { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
+ { "de_LU_PREEURO", "de_LU", "currency", "EUR" },
+ { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
+ { "en_BOONT", "en__BOONT", null, null }, /* registered name */
+ { "en_SCOUSE", "en__SCOUSE", null, null }, /* registered name */
+ { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
+ { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
+ { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
+ { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
+ { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
+ { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
+ { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
+ { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
+ { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
+ { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
+ { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
+ { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
+ { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
+ { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" },
+// { "nb_NO_NY", "nn_NO", null, null },
+ { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
+ { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
+ { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
+ { "sl_ROZAJ", "sl__ROZAJ", null, null }, /* registered name */
+ { "sr_SP_CYRL", "sr_Cyrl_RS", null, null }, /* .NET name */
+ { "sr_SP_LATN", "sr_Latn_RS", null, null }, /* .NET name */
+ { "sr_YU_CYRILLIC", "sr_Cyrl_RS", null, null }, /* Linux name */
+ { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
+ { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", null, null }, /* Linux name */
+ { "uz_UZ_CYRL", "uz_Cyrl_UZ", null, null }, /* .NET name */
+ { "uz_UZ_LATN", "uz_Latn_UZ", null, null }, /* .NET name */
+ { "zh_CHS", "zh_Hans", null, null }, /* .NET name */
+ { "zh_CHT", "zh_Hant", null, null }, /* .NET name */
+ { "zh_GAN", "zh__GAN", null, null }, /* registered name */
+ { "zh_GUOYU", "zh", null, null }, /* registered name */
+ { "zh_HAKKA", "zh__HAKKA", null, null }, /* registered name */
+ { "zh_MIN", "zh__MIN", null, null }, /* registered name */
+ { "zh_MIN_NAN", "zh__MINNAN", null, null }, /* registered name */
+ { "zh_WUU", "zh__WUU", null, null }, /* registered name */
+ { "zh_XIANG", "zh__XIANG", null, null }, /* registered name */
+ { "zh_YUE", "zh__YUE", null, null } /* registered name */
+ };
+
+ synchronized (ULocale.class) {
+ if (CANONICALIZE_MAP == null) {
+ CANONICALIZE_MAP = tempCANONICALIZE_MAP;
+ }
+ }
+ }
+ if (variantsToKeywords == null) {
+ /**
+ * This table lists pairs of locale ids for canonicalization. The
+ * The first item is the normalized variant id.
+ */
+ String[][] tempVariantsToKeywords = {
+ { "EURO", "currency", "EUR" },
+ { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
+ { "STROKE", "collation", "stroke" } /* Solaris variant */
+ };
+
+ synchronized (ULocale.class) {
+ if (variantsToKeywords == null) {
+ variantsToKeywords = tempVariantsToKeywords;
+ }
+ }
+ }
+ }
+
+ /*
+ * This table is used for mapping between ICU and special Java
+ * locales. When an ICU locale matches with
+ * /, the ICU locale is mapped to locale.
+ * For example, both ja_JP@calendar=japanese and ja@calendar=japanese
+ * are mapped to Java locale "ja_JP_JP". ICU locale "nn" is mapped
+ * to Java locale "no_NO_NY".
+ */
+ private static final String[][] _javaLocaleMap = {
+ // { , , , ,
+ { "ja_JP_JP", "ja_JP", "calendar", "japanese", "ja"},
+ { "no_NO_NY", "nn_NO", null, null, "nn"},
+ { "th_TH_TH", "th_TH", "numbers", "thai", "th"},
+ };
+
+ /**
+ * Private constructor used by static initializers.
+ */
+ private ULocale(String localeID, Locale locale) {
+ this.localeID = localeID;
+ this.locale = locale;
+ }
+
+ /**
+ * Construct a ULocale object from a {@link java.util.Locale}.
+ * @param loc a JDK locale
+ */
+ private ULocale(Locale loc) {
+ this.localeID = getName(forLocale(loc).toString());
+ this.locale = loc;
+ }
+
+ /**
+ * {@icu} Returns a ULocale object for a {@link java.util.Locale}.
+ * The ULocale is canonicalized.
+ * @param loc a JDK locale
+ * @stable ICU 3.2
+ */
+ public static ULocale forLocale(Locale loc) {
+ if (loc == null) {
+ return null;
+ }
+ ULocale result = CACHE.get(loc);
+ if (result == null) {
+ if (defaultULocale != null && loc == defaultULocale.locale) {
+ result = defaultULocale;
+ } else {
+ String locStr = loc.toString();
+ if (locStr.length() == 0) {
+ result = ROOT;
+ } else {
+ for (int i = 0; i < _javaLocaleMap.length; i++) {
+ if (_javaLocaleMap[i][0].equals(locStr)) {
+ LocaleIDParser p = new LocaleIDParser(_javaLocaleMap[i][1]);
+ p.setKeywordValue(_javaLocaleMap[i][2], _javaLocaleMap[i][3]);
+ locStr = p.getName();
+ break;
+ }
+ }
+ result = new ULocale(locStr, loc);
+ }
+ }
+ CACHE.put(loc, result);
+ }
+ return result;
+ }
+
+ /**
+ * {@icu} Constructs a ULocale from a RFC 3066 locale ID. The locale ID consists
+ * of optional language, script, country, and variant fields in that order,
+ * separated by underscores, followed by an optional keyword list. The
+ * script, if present, is four characters long-- this distinguishes it
+ * from a country code, which is two characters long. Other fields
+ * are distinguished by position as indicated by the underscores. The
+ * start of the keyword list is indicated by '@', and consists of two
+ * or more keyword/value pairs separated by semicolons(';').
+ *
+ * This constructor does not canonicalize the localeID. So, for
+ * example, "zh__pinyin" remains unchanged instead of converting
+ * to "zh@collation=pinyin". By default ICU only recognizes the
+ * latter as specifying pinyin collation. Use {@link #createCanonical}
+ * or {@link #canonicalize} if you need to canonicalize the localeID.
+ *
+ * @param localeID string representation of the locale, e.g:
+ * "en_US", "sy_Cyrl_YU", "zh__pinyin", "es_ES@currency=EUR;collation=traditional"
+ * @stable ICU 2.8
+ */
+ public ULocale(String localeID) {
+ this.localeID = getName(localeID);
+ }
+
+ /**
+ * Convenience overload of ULocale(String, String, String) for
+ * compatibility with java.util.Locale.
+ * @see #ULocale(String, String, String)
+ * @stable ICU 3.4
+ */
+ public ULocale(String a, String b) {
+ this(a, b, null);
+ }
+
+ /**
+ * Constructs a ULocale from a localeID constructed from the three 'fields' a, b, and
+ * c. These fields are concatenated using underscores to form a localeID of the form
+ * a_b_c, which is then handled like the localeID passed to ULocale(String
+ * localeID)
.
+ *
+ *
Java locale strings consisting of language, country, and
+ * variant will be handled by this form, since the country code
+ * (being shorter than four letters long) will not be interpreted
+ * as a script code. If a script code is present, the final
+ * argument ('c') will be interpreted as the country code. It is
+ * recommended that this constructor only be used to ease porting,
+ * and that clients instead use the single-argument constructor
+ * when constructing a ULocale from a localeID.
+ * @param a first component of the locale id
+ * @param b second component of the locale id
+ * @param c third component of the locale id
+ * @see #ULocale(String)
+ * @stable ICU 3.0
+ */
+ public ULocale(String a, String b, String c) {
+ localeID = getName(lscvToID(a, b, c, EMPTY_STRING));
+ }
+
+ /**
+ * {@icu} Creates a ULocale from the id by first canonicalizing the id.
+ * @param nonCanonicalID the locale id to canonicalize
+ * @return the locale created from the canonical version of the ID.
+ * @stable ICU 3.0
+ */
+ public static ULocale createCanonical(String nonCanonicalID) {
+ return new ULocale(canonicalize(nonCanonicalID), (Locale)null);
+ }
+
+ private static String lscvToID(String lang, String script, String country, String variant) {
+ StringBuilder buf = new StringBuilder();
+
+ if (lang != null && lang.length() > 0) {
+ buf.append(lang);
+ }
+ if (script != null && script.length() > 0) {
+ buf.append(UNDERSCORE);
+ buf.append(script);
+ }
+ if (country != null && country.length() > 0) {
+ buf.append(UNDERSCORE);
+ buf.append(country);
+ }
+ if (variant != null && variant.length() > 0) {
+ if (country == null || country.length() == 0) {
+ buf.append(UNDERSCORE);
+ }
+ buf.append(UNDERSCORE);
+ buf.append(variant);
+ }
+ return buf.toString();
+ }
+
+ /**
+ * {@icu} Converts this ULocale object to a {@link java.util.Locale}.
+ * @return a JDK locale that either exactly represents this object
+ * or is the closest approximation.
+ * @stable ICU 2.8
+ */
+ public Locale toLocale() {
+ if (locale == null) {
+ LocaleIDParser p = new LocaleIDParser(localeID);
+ String base = p.getBaseName();
+ for (int i = 0; i < _javaLocaleMap.length; i++) {
+ if (base.equals(_javaLocaleMap[i][1]) || base.equals(_javaLocaleMap[i][4])) {
+ if (_javaLocaleMap[i][2] != null) {
+ String val = p.getKeywordValue(_javaLocaleMap[i][2]);
+ if (val != null && val.equals(_javaLocaleMap[i][3])) {
+ p = new LocaleIDParser(_javaLocaleMap[i][0]);
+ break;
+ }
+ } else {
+ p = new LocaleIDParser(_javaLocaleMap[i][0]);
+ break;
+ }
+ }
+ }
+ String[] names = p.getLanguageScriptCountryVariant();
+ locale = new Locale(names[0], names[2], names[3]);
+ }
+ return locale;
+ }
+
+ private static ICUCache nameCache = new SimpleCache();
+ /**
+ * Keep our own default ULocale.
+ */
+ private static Locale defaultLocale = Locale.getDefault();
+ private static ULocale defaultULocale = new ULocale(defaultLocale);
+
+ /**
+ * Returns the current default ULocale.
+ * @stable ICU 2.8
+ */
+ public static ULocale getDefault() {
+ synchronized (ULocale.class) {
+ Locale currentDefault = Locale.getDefault();
+ if (!defaultLocale.equals(currentDefault)) {
+ defaultLocale = currentDefault;
+ defaultULocale = new ULocale(defaultLocale);
+ }
+ return defaultULocale;
+ }
+ }
+
+ /**
+ * {@icu} Sets the default ULocale. This also sets the default Locale.
+ * If the caller does not have write permission to the
+ * user.language property, a security exception will be thrown,
+ * and the default ULocale will remain unchanged.
+ * @param newLocale the new default locale
+ * @throws SecurityException if a security manager exists and its
+ * checkPermission
method doesn't allow the operation.
+ * @throws NullPointerException if newLocale
is null
+ * @see SecurityManager#checkPermission(java.security.Permission)
+ * @see java.util.PropertyPermission
+ * @stable ICU 3.0
+ */
+ public static synchronized void setDefault(ULocale newLocale){
+ Locale.setDefault(newLocale.toLocale());
+ defaultULocale = newLocale;
+ }
+
+ /**
+ * This is for compatibility with Locale-- in actuality, since ULocale is
+ * immutable, there is no reason to clone it, so this API returns 'this'.
+ * @stable ICU 3.0
+ */
+ public Object clone() {
+ return this;
+ }
+
+ /**
+ * Returns the hashCode.
+ * @stable ICU 3.0
+ */
+ public int hashCode() {
+ return localeID.hashCode();
+ }
+
+ /**
+ * Returns true if the other object is another ULocale with the
+ * same full name, or is a String localeID that matches the full name.
+ * Note that since names are not canonicalized, two ULocales that
+ * function identically might not compare equal.
+ *
+ * @return true if this Locale is equal to the specified object.
+ * @stable ICU 3.0
+ */
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj instanceof String) {
+ return localeID.equals((String)obj);
+ }
+ if (obj instanceof ULocale) {
+ return localeID.equals(((ULocale)obj).localeID);
+ }
+ return false;
+ }
+
+ /**
+ * {@icunote} Unlike the Locale API, this returns an array of ULocale
,
+ * not Locale
. Returns a list of all installed locales.
+ * @stable ICU 3.0
+ */
+ public static ULocale[] getAvailableLocales() {
+ if (availableLocales == null) {
+ synchronized (ULocale.class) {
+ if (availableLocales == null) {
+ Locale[] locales = Locale.getAvailableLocales();
+ availableLocales = new ULocale[locales.length];
+ for (int i = 0; i < locales.length; i++) {
+ availableLocales[i] = ULocale.forLocale(locales[i]);
+ }
+ }
+ }
+ }
+ return availableLocales.clone();
+ }
+ private static volatile ULocale[] availableLocales = null;
+
+ /**
+ * Returns a list of all 2-letter country codes defined in ISO 3166.
+ * Can be used to create Locales.
+ * @stable ICU 3.0
+ */
+ public static String[] getISOCountries() {
+ return LocaleIDs.getISOCountries();
+ }
+
+ /**
+ * Returns a list of all 2-letter language codes defined in ISO 639.
+ * Can be used to create Locales.
+ * [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed.
+ * The list this function returns includes both the new and the old codes for the
+ * languages whose codes have changed.]
+ * @stable ICU 3.0
+ */
+ public static String[] getISOLanguages() {
+ return LocaleIDs.getISOLanguages();
+ }
+
+ /**
+ * Returns the language code for this locale, which will either be the empty string
+ * or a lowercase ISO 639 code.
+ * @see #getDisplayLanguage()
+ * @see #getDisplayLanguage(ULocale)
+ * @stable ICU 3.0
+ */
+ public String getLanguage() {
+ return getLanguage(localeID);
+ }
+
+ /**
+ * Returns the language code for the locale ID,
+ * which will either be the empty string
+ * or a lowercase ISO 639 code.
+ * @see #getDisplayLanguage()
+ * @see #getDisplayLanguage(ULocale)
+ * @stable ICU 3.0
+ */
+ public static String getLanguage(String localeID) {
+ return new LocaleIDParser(localeID).getLanguage();
+ }
+
+ /**
+ * {@icu} Returns the script code for this locale, which might be the empty string.
+ * @see #getDisplayScript()
+ * @see #getDisplayScript(ULocale)
+ * @stable ICU 3.0
+ */
+ public String getScript() {
+ return getScript(localeID);
+ }
+
+ /**
+ * {@icu} Returns the script code for the specified locale, which might be the empty
+ * string.
+ * @see #getDisplayScript()
+ * @see #getDisplayScript(ULocale)
+ * @stable ICU 3.0
+ */
+ public static String getScript(String localeID) {
+ return new LocaleIDParser(localeID).getScript();
+ }
+
+ /**
+ * Returns the country/region code for this locale, which will either be the empty string
+ * or an uppercase ISO 3166 2-letter code.
+ * @see #getDisplayCountry()
+ * @see #getDisplayCountry(ULocale)
+ * @stable ICU 3.0
+ */
+ public String getCountry() {
+ return getCountry(localeID);
+ }
+
+ /**
+ * Returns the country/region code for this locale, which will either be the empty string
+ * or an uppercase ISO 3166 2-letter code.
+ * @param localeID The locale identification string.
+ * @see #getDisplayCountry()
+ * @see #getDisplayCountry(ULocale)
+ * @stable ICU 3.0
+ */
+ public static String getCountry(String localeID) {
+ return new LocaleIDParser(localeID).getCountry();
+ }
+
+ /**
+ * Returns the variant code for this locale, which might be the empty string.
+ * @see #getDisplayVariant()
+ * @see #getDisplayVariant(ULocale)
+ * @stable ICU 3.0
+ */
+ public String getVariant() {
+ return getVariant(localeID);
+ }
+
+ /**
+ * Returns the variant code for the specified locale, which might be the empty string.
+ * @see #getDisplayVariant()
+ * @see #getDisplayVariant(ULocale)
+ * @stable ICU 3.0
+ */
+ public static String getVariant(String localeID) {
+ return new LocaleIDParser(localeID).getVariant();
+ }
+
+ /**
+ * {@icu} Returns the fallback locale for the specified locale, which might be the
+ * empty string.
+ * @stable ICU 3.2
+ */
+ public static String getFallback(String localeID) {
+ return getFallbackString(getName(localeID));
+ }
+
+ /**
+ * {@icu} Returns the fallback locale for this locale. If this locale is root,
+ * returns null.
+ * @stable ICU 3.2
+ */
+ public ULocale getFallback() {
+ if (localeID.length() == 0 || localeID.charAt(0) == '@') {
+ return null;
+ }
+ return new ULocale(getFallbackString(localeID), (Locale)null);
+ }
+
+ /**
+ * Returns the given (canonical) locale id minus the last part before the tags.
+ */
+ private static String getFallbackString(String fallback) {
+ int extStart = fallback.indexOf('@');
+ if (extStart == -1) {
+ extStart = fallback.length();
+ }
+ int last = fallback.lastIndexOf('_', extStart);
+ if (last == -1) {
+ last = 0;
+ } else {
+ // truncate empty segment
+ while (last > 0) {
+ if (fallback.charAt(last - 1) != '_') {
+ break;
+ }
+ last--;
+ }
+ }
+ return fallback.substring(0, last) + fallback.substring(extStart);
+ }
+
+ /**
+ * {@icu} Returns the (normalized) base name for this locale.
+ * @return the base name as a String.
+ * @stable ICU 3.0
+ */
+ public String getBaseName() {
+ return getBaseName(localeID);
+ }
+
+ /**
+ * {@icu} Returns the (normalized) base name for the specified locale.
+ * @param localeID the locale ID as a string
+ * @return the base name as a String.
+ * @stable ICU 3.0
+ */
+ public static String getBaseName(String localeID){
+ if (localeID.indexOf('@') == -1) {
+ return localeID;
+ }
+ return new LocaleIDParser(localeID).getBaseName();
+ }
+
+ /**
+ * {@icu} Returns the (normalized) full name for this locale.
+ *
+ * @return String the full name of the localeID
+ * @stable ICU 3.0
+ */
+ public String getName() {
+ return localeID; // always normalized
+ }
+
+ /**
+ * {@icu} Returns the (normalized) full name for the specified locale.
+ *
+ * @param localeID the localeID as a string
+ * @return String the full name of the localeID
+ * @stable ICU 3.0
+ */
+ public static String getName(String localeID){
+ String name = nameCache.get(localeID);
+ if (name == null) {
+ name = new LocaleIDParser(localeID).getName();
+ nameCache.put(localeID, name);
+ }
+ return name;
+ }
+
+ /**
+ * Returns a string representation of this object.
+ * @stable ICU 3.0
+ */
+ public String toString() {
+ return localeID;
+ }
+
+ /**
+ * {@icu} Returns an iterator over keywords for this locale. If there
+ * are no keywords, returns null.
+ * @return iterator over keywords, or null if there are no keywords.
+ * @stable ICU 3.0
+ */
+ public Iterator getKeywords() {
+ return getKeywords(localeID);
+ }
+
+ /**
+ * {@icu} Returns an iterator over keywords for the specified locale. If there
+ * are no keywords, returns null.
+ * @return an iterator over the keywords in the specified locale, or null
+ * if there are no keywords.
+ * @stable ICU 3.0
+ */
+ public static Iterator getKeywords(String localeID){
+ return new LocaleIDParser(localeID).getKeywords();
+ }
+
+ /**
+ * {@icu} Returns the value for a keyword in this locale. If the keyword is not
+ * defined, returns null.
+ * @param keywordName name of the keyword whose value is desired. Case insensitive.
+ * @return the value of the keyword, or null.
+ * @stable ICU 3.0
+ */
+ public String getKeywordValue(String keywordName){
+ return getKeywordValue(localeID, keywordName);
+ }
+
+ /**
+ * {@icu} Returns the value for a keyword in the specified locale. If the keyword is
+ * not defined, returns null. The locale name does not need to be normalized.
+ * @param keywordName name of the keyword whose value is desired. Case insensitive.
+ * @return String the value of the keyword as a string
+ * @stable ICU 3.0
+ */
+ public static String getKeywordValue(String localeID, String keywordName) {
+ return new LocaleIDParser(localeID).getKeywordValue(keywordName);
+ }
+
+ /**
+ * {@icu} Returns the canonical name for the specified locale ID. This is used to
+ * convert POSIX and other grandfathered IDs to standard ICU form.
+ * @param localeID the locale id
+ * @return the canonicalized id
+ * @stable ICU 3.0
+ */
+ public static String canonicalize(String localeID){
+ LocaleIDParser parser = new LocaleIDParser(localeID, true);
+ String baseName = parser.getBaseName();
+ boolean foundVariant = false;
+
+ // formerly, we always set to en_US_POSIX if the basename was empty, but
+ // now we require that the entire id be empty, so that "@foo=bar"
+ // will pass through unchanged.
+ // {dlf} I'd rather keep "" unchanged.
+ if (localeID.equals("")) {
+ return "";
+// return "en_US_POSIX";
+ }
+
+ // we have an ID in the form xx_Yyyy_ZZ_KKKKK
+
+ initCANONICALIZE_MAP();
+
+ /* convert the variants to appropriate ID */
+ for (int i = 0; i < variantsToKeywords.length; i++) {
+ String[] vals = variantsToKeywords[i];
+ int idx = baseName.lastIndexOf("_" + vals[0]);
+ if (idx > -1) {
+ foundVariant = true;
+
+ baseName = baseName.substring(0, idx);
+ if (baseName.endsWith("_")) {
+ baseName = baseName.substring(0, --idx);
+ }
+ parser.setBaseName(baseName);
+ parser.defaultKeywordValue(vals[1], vals[2]);
+ break;
+ }
+ }
+
+ /* See if this is an already known locale */
+ for (int i = 0; i < CANONICALIZE_MAP.length; i++) {
+ if (CANONICALIZE_MAP[i][0].equals(baseName)) {
+ foundVariant = true;
+
+ String[] vals = CANONICALIZE_MAP[i];
+ parser.setBaseName(vals[1]);
+ if (vals[2] != null) {
+ parser.defaultKeywordValue(vals[2], vals[3]);
+ }
+ break;
+ }
+ }
+
+ /* total mondo hack for Norwegian, fortunately the main NY case is handled earlier */
+ if (!foundVariant) {
+ if (parser.getLanguage().equals("nb") && parser.getVariant().equals("NY")) {
+ parser.setBaseName(lscvToID("nn", parser.getScript(), parser.getCountry(), null));
+ }
+ }
+
+ return parser.getName();
+ }
+
+ /**
+ * Given a keyword and a value, return a new locale with an updated
+ * keyword and value. If keyword is null, this removes all keywords from the locale id.
+ * Otherwise, if the value is null, this removes the value for this keyword from the
+ * locale id. Otherwise, this adds/replaces the value for this keyword in the locale id.
+ * The keyword and value must not be empty.
+ * @param keyword the keyword to add/remove, or null to remove all keywords.
+ * @param value the value to add/set, or null to remove this particular keyword.
+ * @return the updated locale
+ * @stable ICU 3.2
+ */
+ public ULocale setKeywordValue(String keyword, String value) {
+ return new ULocale(setKeywordValue(localeID, keyword, value), (Locale)null);
+ }
+
+ /**
+ * Given a locale id, a keyword, and a value, return a new locale id with an updated
+ * keyword and value. If keyword is null, this removes all keywords from the locale id.
+ * Otherwise, if the value is null, this removes the value for this keyword from the
+ * locale id. Otherwise, this adds/replaces the value for this keyword in the locale id.
+ * The keyword and value must not be empty.
+ * @param localeID the locale id to modify
+ * @param keyword the keyword to add/remove, or null to remove all keywords.
+ * @param value the value to add/set, or null to remove this particular keyword.
+ * @return the updated locale id
+ * @stable ICU 3.2
+ */
+ public static String setKeywordValue(String localeID, String keyword, String value) {
+ LocaleIDParser parser = new LocaleIDParser(localeID);
+ parser.setKeywordValue(keyword, value);
+ return parser.getName();
+ }
+
+ /*
+ * Given a locale id, a keyword, and a value, return a new locale id with an updated
+ * keyword and value, if the keyword does not already have a value. The keyword and
+ * value must not be null or empty.
+ * @param localeID the locale id to modify
+ * @param keyword the keyword to add, if not already present
+ * @param value the value to add, if not already present
+ * @return the updated locale id
+ */
+/* private static String defaultKeywordValue(String localeID, String keyword, String value) {
+ LocaleIDParser parser = new LocaleIDParser(localeID);
+ parser.defaultKeywordValue(keyword, value);
+ return parser.getName();
+ }*/
+
+ /**
+ * Returns a three-letter abbreviation for this locale's language. If the locale
+ * doesn't specify a language, returns the empty string. Otherwise, returns
+ * a lowercase ISO 639-2/T language code.
+ * The ISO 639-2 language codes can be found on-line at
+ * ftp://dkuug.dk/i18n/iso-639-2.txt
+ * @exception MissingResourceException Throws MissingResourceException if the
+ * three-letter language abbreviation is not available for this locale.
+ * @stable ICU 3.0
+ */
+ public String getISO3Language(){
+ return getISO3Language(localeID);
+ }
+
+ /**
+ * Returns a three-letter abbreviation for this locale's language. If the locale
+ * doesn't specify a language, returns the empty string. Otherwise, returns
+ * a lowercase ISO 639-2/T language code.
+ * The ISO 639-2 language codes can be found on-line at
+ * ftp://dkuug.dk/i18n/iso-639-2.txt
+ * @exception MissingResourceException Throws MissingResourceException if the
+ * three-letter language abbreviation is not available for this locale.
+ * @stable ICU 3.0
+ */
+ public static String getISO3Language(String localeID) {
+ return LocaleIDs.getISO3Language(getLanguage(localeID));
+ }
+
+ /**
+ * Returns a three-letter abbreviation for this locale's country/region. If the locale
+ * doesn't specify a country, returns the empty string. Otherwise, returns
+ * an uppercase ISO 3166 3-letter country code.
+ * @exception MissingResourceException Throws MissingResourceException if the
+ * three-letter country abbreviation is not available for this locale.
+ * @stable ICU 3.0
+ */
+ public String getISO3Country() {
+ return getISO3Country(localeID);
+ }
+
+ /**
+ * Returns a three-letter abbreviation for this locale's country/region. If the locale
+ * doesn't specify a country, returns the empty string. Otherwise, returns
+ * an uppercase ISO 3166 3-letter country code.
+ * @exception MissingResourceException Throws MissingResourceException if the
+ * three-letter country abbreviation is not available for this locale.
+ * @stable ICU 3.0
+ */
+ public static String getISO3Country(String localeID) {
+ return LocaleIDs.getISO3Country(getCountry(localeID));
+ }
+
+ // display names
+
+ /**
+ * Returns this locale's language localized for display in the default locale.
+ * @return the localized language name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayLanguage() {
+ return getDisplayLanguageInternal(this, getDefault(), false);
+ }
+
+ /**
+ * {@icu} Returns this locale's language localized for display in the provided locale.
+ * @param displayLocale the locale in which to display the name.
+ * @return the localized language name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayLanguage(ULocale displayLocale) {
+ return getDisplayLanguageInternal(this, displayLocale, false);
+ }
+
+ /**
+ * Returns a locale's language localized for display in the provided locale.
+ * This is a cover for the ICU4C API.
+ * @param localeID the id of the locale whose language will be displayed
+ * @param displayLocaleID the id of the locale in which to display the name.
+ * @return the localized language name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayLanguage(String localeID, String displayLocaleID) {
+ return getDisplayLanguageInternal(new ULocale(localeID), new ULocale(displayLocaleID),
+ false);
+ }
+
+ /**
+ * Returns a locale's language localized for display in the provided locale.
+ * This is a cover for the ICU4C API.
+ * @param localeID the id of the locale whose language will be displayed.
+ * @param displayLocale the locale in which to display the name.
+ * @return the localized language name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayLanguage(String localeID, ULocale displayLocale) {
+ return getDisplayLanguageInternal(new ULocale(localeID), displayLocale, false);
+ }
+ /**
+ * {@icu} Returns this locale's language localized for display in the default locale.
+ * If a dialect name is present in the data, then it is returned.
+ * @return the localized language name.
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getDisplayLanguageWithDialect() {
+ return getDisplayLanguageInternal(this, getDefault(), true);
+ }
+
+ /**
+ * {@icu} Returns this locale's language localized for display in the provided locale.
+ * If a dialect name is present in the data, then it is returned.
+ * @param displayLocale the locale in which to display the name.
+ * @return the localized language name.
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getDisplayLanguageWithDialect(ULocale displayLocale) {
+ return getDisplayLanguageInternal(this, displayLocale, true);
+ }
+
+ /**
+ * {@icu} Returns a locale's language localized for display in the provided locale.
+ * If a dialect name is present in the data, then it is returned.
+ * This is a cover for the ICU4C API.
+ * @param localeID the id of the locale whose language will be displayed
+ * @param displayLocaleID the id of the locale in which to display the name.
+ * @return the localized language name.
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static String getDisplayLanguageWithDialect(String localeID, String displayLocaleID) {
+ return getDisplayLanguageInternal(new ULocale(localeID), new ULocale(displayLocaleID),
+ true);
+ }
+
+ /**
+ * {@icu} Returns a locale's language localized for display in the provided locale.
+ * If a dialect name is present in the data, then it is returned.
+ * This is a cover for the ICU4C API.
+ * @param localeID the id of the locale whose language will be displayed.
+ * @param displayLocale the locale in which to display the name.
+ * @return the localized language name.
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static String getDisplayLanguageWithDialect(String localeID, ULocale displayLocale) {
+ return getDisplayLanguageInternal(new ULocale(localeID), displayLocale, true);
+ }
+
+ private static String getDisplayLanguageInternal(ULocale locale, ULocale displayLocale,
+ boolean useDialect) {
+ // No dialect support
+ return locale.toLocale().getDisplayLanguage(displayLocale.toLocale());
+ }
+
+ /**
+ * {@icu} Returns this locale's script localized for display in the default locale.
+ * @return the localized script name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayScript() {
+ return getDisplayScriptInternal(this, getDefault());
+ }
+
+ /**
+ * {@icu} Returns this locale's script localized for display in the provided locale.
+ * @param displayLocale the locale in which to display the name.
+ * @return the localized script name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayScript(ULocale displayLocale) {
+ return getDisplayScriptInternal(this, displayLocale);
+ }
+
+ /**
+ * {@icu} Returns a locale's script localized for display in the provided locale.
+ * This is a cover for the ICU4C API.
+ * @param localeID the id of the locale whose script will be displayed
+ * @param displayLocaleID the id of the locale in which to display the name.
+ * @return the localized script name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayScript(String localeID, String displayLocaleID) {
+ return getDisplayScriptInternal(new ULocale(localeID), new ULocale(displayLocaleID));
+ }
+
+ /**
+ * {@icu} Returns a locale's script localized for display in the provided locale.
+ * @param localeID the id of the locale whose script will be displayed.
+ * @param displayLocale the locale in which to display the name.
+ * @return the localized script name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayScript(String localeID, ULocale displayLocale) {
+ return getDisplayScriptInternal(new ULocale(localeID), displayLocale);
+ }
+
+ // displayLocaleID is canonical, localeID need not be since parsing will fix this.
+ private static String getDisplayScriptInternal(ULocale locale, ULocale displayLocale) {
+ // No localization, just return the script code
+ return locale.getScript();
+ }
+
+ /**
+ * Returns this locale's country localized for display in the default locale.
+ * @return the localized country name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayCountry() {
+ return getDisplayCountryInternal(this, getDefault());
+ }
+
+ /**
+ * Returns this locale's country localized for display in the provided locale.
+ * @param displayLocale the locale in which to display the name.
+ * @return the localized country name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayCountry(ULocale displayLocale){
+ return getDisplayCountryInternal(this, displayLocale);
+ }
+
+ /**
+ * Returns a locale's country localized for display in the provided locale.
+ * This is a cover for the ICU4C API.
+ * @param localeID the id of the locale whose country will be displayed
+ * @param displayLocaleID the id of the locale in which to display the name.
+ * @return the localized country name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayCountry(String localeID, String displayLocaleID) {
+ return getDisplayCountryInternal(new ULocale(localeID), new ULocale(displayLocaleID));
+ }
+
+ /**
+ * Returns a locale's country localized for display in the provided locale.
+ * This is a cover for the ICU4C API.
+ * @param localeID the id of the locale whose country will be displayed.
+ * @param displayLocale the locale in which to display the name.
+ * @return the localized country name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayCountry(String localeID, ULocale displayLocale) {
+ return getDisplayCountryInternal(new ULocale(localeID), displayLocale);
+ }
+
+ // displayLocaleID is canonical, localeID need not be since parsing will fix this.
+ private static String getDisplayCountryInternal(ULocale locale, ULocale displayLocale) {
+ return locale.toLocale().getDisplayCountry(displayLocale.toLocale());
+ }
+
+ /**
+ * Returns this locale's variant localized for display in the default locale.
+ * @return the localized variant name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayVariant() {
+ return getDisplayVariantInternal(this, getDefault());
+ }
+
+ /**
+ * Returns this locale's variant localized for display in the provided locale.
+ * @param displayLocale the locale in which to display the name.
+ * @return the localized variant name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayVariant(ULocale displayLocale) {
+ return getDisplayVariantInternal(this, displayLocale);
+ }
+
+ /**
+ * Returns a locale's variant localized for display in the provided locale.
+ * This is a cover for the ICU4C API.
+ * @param localeID the id of the locale whose variant will be displayed
+ * @param displayLocaleID the id of the locale in which to display the name.
+ * @return the localized variant name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayVariant(String localeID, String displayLocaleID){
+ return getDisplayVariantInternal(new ULocale(localeID), new ULocale(displayLocaleID));
+ }
+
+ /**
+ * Returns a locale's variant localized for display in the provided locale.
+ * This is a cover for the ICU4C API.
+ * @param localeID the id of the locale whose variant will be displayed.
+ * @param displayLocale the locale in which to display the name.
+ * @return the localized variant name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayVariant(String localeID, ULocale displayLocale) {
+ return getDisplayVariantInternal(new ULocale(localeID), displayLocale);
+ }
+
+ private static String getDisplayVariantInternal(ULocale locale, ULocale displayLocale) {
+ return locale.toLocale().getDisplayVariant(displayLocale.toLocale());
+ }
+
+ /**
+ * {@icu} Returns a keyword localized for display in the default locale.
+ * @param keyword the keyword to be displayed.
+ * @return the localized keyword name.
+ * @see #getKeywords()
+ * @stable ICU 3.0
+ */
+ public static String getDisplayKeyword(String keyword) {
+ return getDisplayKeywordInternal(keyword, getDefault());
+ }
+
+ /**
+ * {@icu} Returns a keyword localized for display in the specified locale.
+ * @param keyword the keyword to be displayed.
+ * @param displayLocaleID the id of the locale in which to display the keyword.
+ * @return the localized keyword name.
+ * @see #getKeywords(String)
+ * @stable ICU 3.0
+ */
+ public static String getDisplayKeyword(String keyword, String displayLocaleID) {
+ return getDisplayKeywordInternal(keyword, new ULocale(displayLocaleID));
+ }
+
+ /**
+ * {@icu} Returns a keyword localized for display in the specified locale.
+ * @param keyword the keyword to be displayed.
+ * @param displayLocale the locale in which to display the keyword.
+ * @return the localized keyword name.
+ * @see #getKeywords(String)
+ * @stable ICU 3.0
+ */
+ public static String getDisplayKeyword(String keyword, ULocale displayLocale) {
+ return getDisplayKeywordInternal(keyword, displayLocale);
+ }
+
+ private static String getDisplayKeywordInternal(String keyword, ULocale displayLocale) {
+ // No localization
+ return keyword;
+ }
+
+ /**
+ * {@icu} Returns a keyword value localized for display in the default locale.
+ * @param keyword the keyword whose value is to be displayed.
+ * @return the localized value name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayKeywordValue(String keyword) {
+ return getDisplayKeywordValueInternal(this, keyword, getDefault());
+ }
+
+ /**
+ * {@icu} Returns a keyword value localized for display in the specified locale.
+ * @param keyword the keyword whose value is to be displayed.
+ * @param displayLocale the locale in which to display the value.
+ * @return the localized value name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayKeywordValue(String keyword, ULocale displayLocale) {
+ return getDisplayKeywordValueInternal(this, keyword, displayLocale);
+ }
+
+ /**
+ * {@icu} Returns a keyword value localized for display in the specified locale.
+ * This is a cover for the ICU4C API.
+ * @param localeID the id of the locale whose keyword value is to be displayed.
+ * @param keyword the keyword whose value is to be displayed.
+ * @param displayLocaleID the id of the locale in which to display the value.
+ * @return the localized value name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayKeywordValue(String localeID, String keyword,
+ String displayLocaleID) {
+ return getDisplayKeywordValueInternal(new ULocale(localeID), keyword,
+ new ULocale(displayLocaleID));
+ }
+
+ /**
+ * {@icu} Returns a keyword value localized for display in the specified locale.
+ * This is a cover for the ICU4C API.
+ * @param localeID the id of the locale whose keyword value is to be displayed.
+ * @param keyword the keyword whose value is to be displayed.
+ * @param displayLocale the id of the locale in which to display the value.
+ * @return the localized value name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayKeywordValue(String localeID, String keyword,
+ ULocale displayLocale) {
+ return getDisplayKeywordValueInternal(new ULocale(localeID), keyword, displayLocale);
+ }
+
+ // displayLocaleID is canonical, localeID need not be since parsing will fix this.
+ private static String getDisplayKeywordValueInternal(ULocale locale, String keyword,
+ ULocale displayLocale) {
+ keyword = AsciiUtil.toLowerString(keyword.trim());
+ String value = locale.getKeywordValue(keyword);
+ return value;
+ }
+
+ /**
+ * Returns this locale name localized for display in the default locale.
+ * @return the localized locale name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayName() {
+ return getDisplayNameInternal(this, getDefault());
+ }
+
+ /**
+ * Returns this locale name localized for display in the provided locale.
+ * @param displayLocale the locale in which to display the locale name.
+ * @return the localized locale name.
+ * @stable ICU 3.0
+ */
+ public String getDisplayName(ULocale displayLocale) {
+ return getDisplayNameInternal(this, displayLocale);
+ }
+
+ /**
+ * Returns the locale ID localized for display in the provided locale.
+ * This is a cover for the ICU4C API.
+ * @param localeID the locale whose name is to be displayed.
+ * @param displayLocaleID the id of the locale in which to display the locale name.
+ * @return the localized locale name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayName(String localeID, String displayLocaleID) {
+ return getDisplayNameInternal(new ULocale(localeID), new ULocale(displayLocaleID));
+ }
+
+ /**
+ * Returns the locale ID localized for display in the provided locale.
+ * This is a cover for the ICU4C API.
+ * @param localeID the locale whose name is to be displayed.
+ * @param displayLocale the locale in which to display the locale name.
+ * @return the localized locale name.
+ * @stable ICU 3.0
+ */
+ public static String getDisplayName(String localeID, ULocale displayLocale) {
+ return getDisplayNameInternal(new ULocale(localeID), displayLocale);
+ }
+
+ private static String getDisplayNameInternal(ULocale locale, ULocale displayLocale) {
+ // No localization, no script and keywords
+ return locale.toLocale().getDisplayName(displayLocale.toLocale());
+ }
+
+ /**
+ * {@icu} Returns this locale name localized for display in the default locale.
+ * If a dialect name is present in the locale data, then it is returned.
+ * @return the localized locale name.
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getDisplayNameWithDialect() {
+ return getDisplayNameWithDialectInternal(this, getDefault());
+ }
+
+ /**
+ * {@icu} Returns this locale name localized for display in the provided locale.
+ * If a dialect name is present in the locale data, then it is returned.
+ * @param displayLocale the locale in which to display the locale name.
+ * @return the localized locale name.
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getDisplayNameWithDialect(ULocale displayLocale) {
+ return getDisplayNameWithDialectInternal(this, displayLocale);
+ }
+
+ /**
+ * {@icu} Returns the locale ID localized for display in the provided locale.
+ * If a dialect name is present in the locale data, then it is returned.
+ * This is a cover for the ICU4C API.
+ * @param localeID the locale whose name is to be displayed.
+ * @param displayLocaleID the id of the locale in which to display the locale name.
+ * @return the localized locale name.
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static String getDisplayNameWithDialect(String localeID, String displayLocaleID) {
+ return getDisplayNameWithDialectInternal(new ULocale(localeID),
+ new ULocale(displayLocaleID));
+ }
+
+ /**
+ * {@icu} Returns the locale ID localized for display in the provided locale.
+ * If a dialect name is present in the locale data, then it is returned.
+ * This is a cover for the ICU4C API.
+ * @param localeID the locale whose name is to be displayed.
+ * @param displayLocale the locale in which to display the locale name.
+ * @return the localized locale name.
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static String getDisplayNameWithDialect(String localeID, ULocale displayLocale) {
+ return getDisplayNameWithDialectInternal(new ULocale(localeID), displayLocale);
+ }
+
+ private static String getDisplayNameWithDialectInternal(ULocale locale, ULocale displayLocale) {
+ // No dialect support, no script and keyword support
+ return locale.toLocale().getDisplayName(displayLocale.toLocale());
+ }
+
+ /**
+ * {@icu} Returns this locale's layout orientation for characters. The possible
+ * values are "left-to-right", "right-to-left", "top-to-bottom" or
+ * "bottom-to-top".
+ * @return The locale's layout orientation for characters.
+ * @stable ICU 4.0
+ */
+ public String getCharacterOrientation() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns this locale's layout orientation for lines. The possible
+ * values are "left-to-right", "right-to-left", "top-to-bottom" or
+ * "bottom-to-top".
+ * @return The locale's layout orientation for lines.
+ * @stable ICU 4.0
+ */
+ public String getLineOrientation() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Selector for getLocale() indicating the locale of the
+ * resource containing the data. This is always at or above the
+ * valid locale. If the valid locale does not contain the
+ * specific data being requested, then the actual locale will be
+ * above the valid locale. If the object was not constructed from
+ * locale data, then the valid locale is null .
+ *
+ * @draft ICU 2.8 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Type ACTUAL_LOCALE = new Type();
+
+ /**
+ * {@icu} Selector for getLocale() indicating the most specific
+ * locale for which any data exists. This is always at or above
+ * the requested locale, and at or below the actual locale. If
+ * the requested locale does not correspond to any resource data,
+ * then the valid locale will be above the requested locale. If
+ * the object was not constructed from locale data, then the
+ * actual locale is null .
+ *
+ * Note: The valid locale will be returned correctly in ICU
+ * 3.0 or later. In ICU 2.8, it is not returned correctly.
+ * @draft ICU 2.8 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Type VALID_LOCALE = new Type();
+
+ /**
+ * Opaque selector enum for getLocale() .
+ * @see com.ibm.icu.util.ULocale
+ * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
+ * @see com.ibm.icu.util.ULocale#VALID_LOCALE
+ * @draft ICU 2.8 (retainAll)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Type {
+ private Type() {}
+ }
+
+ /**
+ * {@icu} Based on a HTTP formatted list of acceptable locales, determine an available
+ * locale for the user. NullPointerException is thrown if acceptLanguageList or
+ * availableLocales is null. If fallback is non-null, it will contain true if a
+ * fallback locale (one not in the acceptLanguageList) was returned. The value on
+ * entry is ignored. ULocale will be one of the locales in availableLocales, or the
+ * ROOT ULocale if if a ROOT locale was used as a fallback (because nothing else in
+ * availableLocales matched). No ULocale array element should be null; behavior is
+ * undefined if this is the case.
+ * @param acceptLanguageList list in HTTP "Accept-Language:" format of acceptable locales
+ * @param availableLocales list of available locales. One of these will be returned.
+ * @param fallback if non-null, a 1-element array containing a boolean to be set with
+ * the fallback status
+ * @return one of the locales from the availableLocales list, or null if none match
+ * @stable ICU 3.4
+ */
+ public static ULocale acceptLanguage(String acceptLanguageList, ULocale[] availableLocales,
+ boolean[] fallback) {
+ if (acceptLanguageList == null) {
+ throw new NullPointerException();
+ }
+ ULocale acceptList[] = null;
+ try {
+ acceptList = parseAcceptLanguage(acceptLanguageList, true);
+ } catch (ParseException pe) {
+ acceptList = null;
+ }
+ if (acceptList == null) {
+ return null;
+ }
+ return acceptLanguage(acceptList, availableLocales, fallback);
+ }
+
+ /**
+ * {@icu} Based on a list of acceptable locales, determine an available locale for the
+ * user. NullPointerException is thrown if acceptLanguageList or availableLocales is
+ * null. If fallback is non-null, it will contain true if a fallback locale (one not
+ * in the acceptLanguageList) was returned. The value on entry is ignored. ULocale
+ * will be one of the locales in availableLocales, or the ROOT ULocale if if a ROOT
+ * locale was used as a fallback (because nothing else in availableLocales matched).
+ * No ULocale array element should be null; behavior is undefined if this is the case.
+ * @param acceptLanguageList list of acceptable locales
+ * @param availableLocales list of available locales. One of these will be returned.
+ * @param fallback if non-null, a 1-element array containing a boolean to be set with
+ * the fallback status
+ * @return one of the locales from the availableLocales list, or null if none match
+ * @stable ICU 3.4
+ */
+
+ public static ULocale acceptLanguage(ULocale[] acceptLanguageList, ULocale[]
+ availableLocales, boolean[] fallback) {
+ // fallbacklist
+ int i,j;
+ if(fallback != null) {
+ fallback[0]=true;
+ }
+ for(i=0;i {
+ private double q;
+ private double serial;
+ public ULocaleAcceptLanguageQ(double theq, int theserial) {
+ q = theq;
+ serial = theserial;
+ }
+ public int compareTo(ULocaleAcceptLanguageQ other) {
+ if (q > other.q) { // reverse - to sort in descending order
+ return -1;
+ } else if (q < other.q) {
+ return 1;
+ }
+ if (serial < other.serial) {
+ return -1;
+ } else if (serial > other.serial) {
+ return 1;
+ } else {
+ return 0; // same object
+ }
+ }
+ }
+
+ // parse out the acceptLanguage into an array
+ TreeMap map =
+ new TreeMap();
+ StringBuilder languageRangeBuf = new StringBuilder();
+ StringBuilder qvalBuf = new StringBuilder();
+ int state = 0;
+ acceptLanguage += ","; // append comma to simplify the parsing code
+ int n;
+ boolean subTag = false;
+ boolean q1 = false;
+ for (n = 0; n < acceptLanguage.length(); n++) {
+ boolean gotLanguageQ = false;
+ char c = acceptLanguage.charAt(n);
+ switch (state) {
+ case 0: // before language-range start
+ if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
+ // in language-range
+ languageRangeBuf.append(c);
+ state = 1;
+ subTag = false;
+ } else if (c == '*') {
+ languageRangeBuf.append(c);
+ state = 2;
+ } else if (c != ' ' && c != '\t') {
+ // invalid character
+ state = -1;
+ }
+ break;
+ case 1: // in language-range
+ if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
+ languageRangeBuf.append(c);
+ } else if (c == '-') {
+ subTag = true;
+ languageRangeBuf.append(c);
+ } else if (c == '_') {
+ if (isLenient) {
+ subTag = true;
+ languageRangeBuf.append(c);
+ } else {
+ state = -1;
+ }
+ } else if ('0' <= c && c <= '9') {
+ if (subTag) {
+ languageRangeBuf.append(c);
+ } else {
+ // DIGIT is allowed only in language sub tag
+ state = -1;
+ }
+ } else if (c == ',') {
+ // language-q end
+ gotLanguageQ = true;
+ } else if (c == ' ' || c == '\t') {
+ // language-range end
+ state = 3;
+ } else if (c == ';') {
+ // before q
+ state = 4;
+ } else {
+ // invalid character for language-range
+ state = -1;
+ }
+ break;
+ case 2: // saw wild card range
+ if (c == ',') {
+ // language-q end
+ gotLanguageQ = true;
+ } else if (c == ' ' || c == '\t') {
+ // language-range end
+ state = 3;
+ } else if (c == ';') {
+ // before q
+ state = 4;
+ } else {
+ // invalid
+ state = -1;
+ }
+ break;
+ case 3: // language-range end
+ if (c == ',') {
+ // language-q end
+ gotLanguageQ = true;
+ } else if (c == ';') {
+ // before q
+ state =4;
+ } else if (c != ' ' && c != '\t') {
+ // invalid
+ state = -1;
+ }
+ break;
+ case 4: // before q
+ if (c == 'q') {
+ // before equal
+ state = 5;
+ } else if (c != ' ' && c != '\t') {
+ // invalid
+ state = -1;
+ }
+ break;
+ case 5: // before equal
+ if (c == '=') {
+ // before q value
+ state = 6;
+ } else if (c != ' ' && c != '\t') {
+ // invalid
+ state = -1;
+ }
+ break;
+ case 6: // before q value
+ if (c == '0') {
+ // q value start with 0
+ q1 = false;
+ qvalBuf.append(c);
+ state = 7;
+ } else if (c == '1') {
+ // q value start with 1
+ qvalBuf.append(c);
+ state = 7;
+ } else if (c == '.') {
+ if (isLenient) {
+ qvalBuf.append(c);
+ state = 8;
+ } else {
+ state = -1;
+ }
+ } else if (c != ' ' && c != '\t') {
+ // invalid
+ state = -1;
+ }
+ break;
+ case 7: // q value start
+ if (c == '.') {
+ // before q value fraction part
+ qvalBuf.append(c);
+ state = 8;
+ } else if (c == ',') {
+ // language-q end
+ gotLanguageQ = true;
+ } else if (c == ' ' || c == '\t') {
+ // after q value
+ state = 10;
+ } else {
+ // invalid
+ state = -1;
+ }
+ break;
+ case 8: // before q value fraction part
+ if ('0' <= c || c <= '9') {
+ if (q1 && c != '0' && !isLenient) {
+ // if q value starts with 1, the fraction part must be 0
+ state = -1;
+ } else {
+ // in q value fraction part
+ qvalBuf.append(c);
+ state = 9;
+ }
+ } else {
+ // invalid
+ state = -1;
+ }
+ break;
+ case 9: // in q value fraction part
+ if ('0' <= c && c <= '9') {
+ if (q1 && c != '0') {
+ // if q value starts with 1, the fraction part must be 0
+ state = -1;
+ } else {
+ qvalBuf.append(c);
+ }
+ } else if (c == ',') {
+ // language-q end
+ gotLanguageQ = true;
+ } else if (c == ' ' || c == '\t') {
+ // after q value
+ state = 10;
+ } else {
+ // invalid
+ state = -1;
+ }
+ break;
+ case 10: // after q value
+ if (c == ',') {
+ // language-q end
+ gotLanguageQ = true;
+ } else if (c != ' ' && c != '\t') {
+ // invalid
+ state = -1;
+ }
+ break;
+ }
+ if (state == -1) {
+ // error state
+ throw new ParseException("Invalid Accept-Language", n);
+ }
+ if (gotLanguageQ) {
+ double q = 1.0;
+ if (qvalBuf.length() != 0) {
+ try {
+ q = Double.parseDouble(qvalBuf.toString());
+ } catch (NumberFormatException nfe) {
+ // Already validated, so it should never happen
+ q = 1.0;
+ }
+ if (q > 1.0) {
+ q = 1.0;
+ }
+ }
+ if (languageRangeBuf.charAt(0) != '*') {
+ int serial = map.size();
+ ULocaleAcceptLanguageQ entry = new ULocaleAcceptLanguageQ(q, serial);
+ // sort in reverse order.. 1.0, 0.9, 0.8 .. etc
+ map.put(entry, new ULocale(canonicalize(languageRangeBuf.toString())));
+ }
+
+ // reset buffer and parse state
+ languageRangeBuf.setLength(0);
+ qvalBuf.setLength(0);
+ state = 0;
+ }
+ }
+ if (state != 0) {
+ // Well, the parser should handle all cases. So just in case.
+ throw new ParseException("Invalid AcceptlLanguage", n);
+ }
+
+ // pull out the map
+ ULocale acceptList[] = map.values().toArray(new ULocale[map.size()]);
+ return acceptList;
+ }
+
+ /**
+ * {@icu} Adds the likely subtags for a provided locale ID, per the algorithm
+ * described in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If the provided ULocale instance is already in the maximal form, or there is no
+ * data available available for maximization, it will be returned. For example,
+ * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
+ * Otherwise, a new ULocale instance with the maximal form is returned.
+ *
+ * Examples:
+ *
+ * "en" maximizes to "en_Latn_US"
+ *
+ * "de" maximizes to "de_Latn_US"
+ *
+ * "sr" maximizes to "sr_Cyrl_RS"
+ *
+ * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ *
+ * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+ *
+ * @param loc The ULocale to maximize
+ * @return The maximized ULocale instance.
+ * @stable ICU 4.0
+ */
+ public static ULocale addLikelySubtags(ULocale loc) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Minimizes the subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If the provided ULocale instance is already in the minimal form, or there
+ * is no data available for minimization, it will be returned. Since the
+ * minimization algorithm relies on proper maximization, see the comments
+ * for addLikelySubtags for reasons why there might not be any data.
+ *
+ * Examples:
+ *
+ * "en_Latn_US" minimizes to "en"
+ *
+ * "de_Latn_US" minimizes to "de"
+ *
+ * "sr_Cyrl_RS" minimizes to "sr"
+ *
+ * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+ * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+ *
+ * @param loc The ULocale to minimize
+ * @return The minimized ULocale instance.
+ * @stable ICU 4.0
+ */
+ public static ULocale minimizeSubtags(ULocale loc) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ // --------------------------------
+ // BCP47/OpenJDK APIs
+ // --------------------------------
+
+ /**
+ * {@icu} The key for the private use locale extension ('x').
+ *
+ * @see #getExtension(char)
+ * @see Builder#setExtension(char, String)
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final char PRIVATE_USE_EXTENSION = 'x';
+
+ /**
+ * {@icu} The key for Unicode locale extension ('u').
+ *
+ * @see #getExtension(char)
+ * @see Builder#setExtension(char, String)
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final char UNICODE_LOCALE_EXTENSION = 'u';
+
+ /**
+ * {@icu} Returns the extension (or private use) value associated with
+ * the specified singleton key, or null if there is no extension
+ * associated with the key. To be valid, the key must be one
+ * of [0-9A-Za-z]
. Keys are case-insensitive, so
+ * for example 'z' and 'Z' represent the same extension.
+ *
+ * @param key the extension key
+ * @return the extension, or null if this locale defines no
+ * extension for the specified key
+ * @throws IllegalArgumentException if the key is not valid
+ * @see #PRIVATE_USE_EXTENSION
+ * @see #UNICODE_LOCALE_EXTENSION
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getExtension(char key) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the set of extension keys associated with this locale, or the
+ * empty set if it has no extensions. The returned set is unmodifiable.
+ *
+ * @return the set of extension keys, or the empty set if this locale has
+ * no extensions
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Set getExtensionKeys() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the Unicode locale type associated with the specified Unicode
+ * locale key for this locale. Unicode locale keywrods are specified
+ * by the 'u' extension and consist of key/type pairs. The key must be
+ * two alphanumeric characters in length, or an IllegalArgumentException
+ * is thrown.
+ * @param key the Unicode locale key
+ * @return the Unicode locale type associated with the key, or null if the
+ * locale does not define a value for the key.
+ * @throws IllegalArgumentException if the key is not valid.
+ *
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getUnicodeLocaleType(String key) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns the set of keys for Unicode locale keywords defined by this locale,
+ * or null if this locale has no locale extension. The returned set is
+ * immutable.
+ *
+ * @return the set of the Unicode locale keys, or null
+ *
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Set getUnicodeLocaleKeys() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns a well-formed IETF BCP 47 language tag representing
+ * this locale.
+ *
+ *
+ * If this ULocale
object has language, country, or variant
+ * that does not satisfy the IETF BCP 47 language tag syntax requirements,
+ * this method handles these fields as described below:
+ *
+ * Language: If language is empty or ill-formed (for example "a" or "e2"),
+ * it will be emitted as "und" (Undetermined).
+ *
+ * Country: If country is ill-formed (for example "12" or "USA"), it
+ * will be omitted.
+ *
+ * Variant: Variant is treated as consisting of subtags separated by
+ * underscore and converted to lower case letters. 'Well-formed' subtags
+ * consist of either an ASCII letter followed by 4-7 ASCII characters, or an
+ * ASCII digit followed by 3-7 ASCII characters. If well-formed, the variant
+ * is emitted as each subtag in order (separated by hyphen). Otherwise:
+ *
+ * if all sub-segments consist of 1 to 8 ASCII alphanumerics (for example
+ * "WIN", "WINDOWS_XP", "SOLARIS_10"), the first ill-formed variant subtag
+ * and all following sub-segments will be emitted as private use subtags prefixed
+ * by the special private use subtag "variant" followed by each subtag in order
+ * (separated by hyphen). For example, locale "en_US_WIN" is converted to language
+ * tag "en-US-x-variant-win", locale "de_WINDOWS_XP" is converted to language tag
+ * "de-windows-x-variant-xp". If this locale has a private use extension value,
+ * the special private use subtags prefixed by "variant" are appended after the
+ * locale's private use value.
+ * if any subtag does not consist of 1 to 8 ASCII alphanumerics, the
+ * variant will be truncated and the problematic subtag and all following
+ * sub-segments will be omitted. If the remainder is non-empty, it will be
+ * emitted as a private use subtag as above (even if the remainder turns out
+ * to be well-formed). For example, "Solaris_isjustthecoolestthing" is emitted
+ * as "x-jvariant-Solaris", not as "solaris".
+ *
+ *
+ * Note: Although the language tag created by this method
+ * satisfies the syntax requirements defined by the IETF BCP 47
+ * specification, it is not always a valid BCP 47 language tag.
+ * For example,
+ *
+ * new ULocale("xx_YY").toLanguageTag();
+ *
+ * will return "xx-YY", but the language subtag "xx" and the region subtag "YY"
+ * are invalid because they are not registered in the
+ *
+ * IANA Language Subtag Registry .
+ *
+ * @return a BCP47 language tag representing the locale
+ * @see #forLanguageTag(String)
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String toLanguageTag() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * {@icu} Returns a locale for the specified IETF BCP 47 language tag string.
+ * If the specified language tag contains any ill-formed subtags,
+ * the first such subtag and all following subtags are ignored.
+ *
+ * This implements the 'Language-Tag' production of BCP47, and
+ * so supports grandfathered (regular and irregular) as well as
+ * private use language tags. Stand alone private use tags are
+ * represented as empty language and extension 'x-whatever',
+ * and grandfathered tags are converted to their canonical replacements
+ * where they exist. Note that a few grandfathered tags have no
+ * modern replacement; these will be converted using the fallback
+ * described above so some information might be lost.
+ *
+ *
For a list of grandfathered tags, see the
+ *
+ * IANA Language Subtag Registry .
+ *
+ *
Notes: This method converts private use subtags prefixed
+ * by "variant" to variant field in the result locale. For example,
+ * the code below will return "POSIX".
+ *
+ * ULocale.forLanguageTag("en-US-x-variant-posix).getVariant();
+ *
+ *
+ * @param languageTag the language tag
+ * @return the locale that best represents the language tag
+ * @exception NullPointerException if languageTag
is null
+ * @see #toLanguageTag()
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static ULocale forLanguageTag(String languageTag) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+
+ /**
+ * Builder
is used to build instances of ULocale
+ * from values configured by the setter. Unlike the ULocale
+ * constructors, the Builder
checks if a value configured by a
+ * setter satisfies the syntactical requirements defined by the ULocale
+ * class. A ULocale
object created by a Builder
is
+ * well-formed and can be transformed to a well-formed IETF BCP 47 language tag
+ * without losing information.
+ *
+ *
+ * Note: The ULocale
class does not provide
+ * any syntactical restrictions on variant, while BCP 47
+ * requires each variant subtag to be 5 to 8 alphanumeric letters or a single
+ * numeric letter followed by 3 alphanumeric letters. By default,
+ * the setVariant
method throws IllformedLocaleException
+ * for a variant that does not satisfy the syntax above. If it is
+ * necessary to support such a variant, you could use the constructor
+ * Builder(boolean isLenientVariant)
passing true
to
+ * skip the syntax validation for variant. However, you should keep in
+ * mind that a Locale
object created this way might lose
+ * the variant information when transformed to a BCP 47 language tag.
+ *
+ *
+ * The following example shows how to create a ULocale
object
+ * with the Builder
.
+ *
+ *
+ * ULocale aLocale = new Builder().setLanguage("sr").setScript("Latn").setRegion("RS").build();
+ *
+ *
+ *
+ * Builders can be reused; clear()
resets all
+ * fields to their default values.
+ *
+ * @see ULocale#toLanguageTag()
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Builder {
+
+ /**
+ * Constructs an empty Builder. The default value of all
+ * fields, extensions, and private use information is the
+ * empty string.
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder() {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Constructs an empty Builder with an option whether to allow
+ * setVariant
to accept a value that does not
+ * conform to the IETF BCP 47 variant subtag's syntax requirements.
+ *
+ * @param isLenientVariant When true, this Builder
+ * will accept an ill-formed variant.
+ * @see #setVariant(String)
+ *
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder(boolean isLenientVariant) {
+ throw new UnsupportedOperationException("Constructor not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns true if this Builder
accepts a value that does
+ * not conform to the IETF BCP 47 variant subtag's syntax requirements
+ * in setVariant
+ *
+ * @return true if this Build
accepts an ill-formed variant.
+ *
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean isLenientVariant() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+
+ /**
+ * Resets the Builder
to match the provided locale
.
+ * The previous state of the builder is discarded. Fields that do
+ * not conform to the ULocale
class specification, for example,
+ * a single letter language, are ill-formed.
+ *
+ * @param locale the locale
+ * @return this builder
+ * @throws IllformedLocaleException if locale
has
+ * any ill-formed fields.
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setLocale(ULocale locale) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Resets the builder to match the provided IETF BCP 47 language tag.
+ * The previous state of the builder is discarded.
+ *
+ * @param languageTag the language tag
+ * @return this builder
+ * @throws IllformedLocaleException if languageTag
is ill-formed.
+ * @throws NullPointerException if languageTag
is null.
+ * @see ULocale#forLanguageTag(String)
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setLanguageTag(String languageTag) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the language. If language
is the empty string,
+ * the language in this Builder
will be removed.
+ * Typical language value is a two or three-letter language
+ * code as defined in ISO639.
+ * Well-formed values are any string of two to eight alpha
+ * letters. This method accepts upper case alpha letters
+ * [A-Z], but the language value in the ULocale
+ * created by the Builder
is always normalized
+ * to lower case letters.
+ *
+ * @param language the language
+ * @return this builder
+ * @throws IllformedLocaleException if language
is ill-formed
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setLanguage(String language) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the script. If script
is the empty string,
+ * the script in this Builder
is removed.
+ * Typical script value is a four-letter script code as defined by ISO 15924.
+ * Well-formed values are any string of four alpha letters.
+ * This method accepts both upper and lower case alpha letters [a-zA-Z],
+ * but the script value in the ULocale
created by the
+ * Builder
is always normalized to title case
+ * (the first letter is upper case and the rest of letters are lower case).
+ *
+ * @param script the script
+ * @return this builder
+ * @throws IllformedLocaleException if script
is ill-formed
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setScript(String script) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the region. If region is the empty string, the region
+ * in this Builder
is removed.
+ * Typical region value is a two-letter ISO 3166 code or a three-digit UN M.49
+ * area code. Well-formed values are any two-letter or three-digit string.
+ * This method accepts lower case letters [a-z], but the country value in
+ * the ULocale
created by the Builder
is always
+ * normalized to upper case.
+ *
+ * @param region the region
+ * @return this builder
+ * @throws IllformedLocaleException if region
is ill-formed
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setRegion(String region) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the variant. If variant is the empty string, the
+ * variant in this Builder
is removed.
+ *
+ * Note: By default, this method checks if variant
+ * satisfies the IETF BCP 47 variant subtag's syntax requirements.
+ * However, the ULocale
class itself does not impose any syntactical
+ * restriction on variant. When a Builder
is created by the
+ * constructor Builder(boolean isLenientVariant)
+ * with true
, this method skips the syntax check.
+ *
+ * @param variant the variant
+ * @return this builder
+ * @throws IllformedLocaleException if variant
is ill-formed
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setVariant(String variant) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the extension for the given key. If the value is the
+ * empty string, the extension is removed. Legal keys are
+ * characters in the ranges [0-9A-Za-z]
. Keys
+ * are case-insensitive, so for example 'z' and 'Z' represent
+ * the same extension. In general, well-formed values are any
+ * series of fields of two to eight alphanumeric characters,
+ * separated by hyphen or underscore.
+ *
+ *
Note: The key {@link ULocale#UNICODE_LOCALE_EXTENSION
+ * UNICODE_LOCALE_EXTENSION} ('u') is used for the Unicode locale extension.
+ * Setting a value for this key replaces any existing Unicode locale key/type
+ * pairs with those defined in the extension.
+ * To be well-formed, a value for this extension must meet the additional
+ * constraints that each locale key is two alphanumeric characters,
+ * followed by at least one locale type subtag represented by
+ * three to eight alphanumeric characters, and that the keys and types
+ * be legal Unicode locale keys and values.
+ *
+ *
Note: The key {@link ULocale#PRIVATE_USE_EXTENSION
+ * PRIVATE_USE_EXTENSION} ('x') is used for the private use code. To be
+ * well-formed, the value for this key needs only to have fields of one to
+ * eight alphanumeric characters, not two to eight as in the general case.
+ *
+ * @param key the extension key
+ * @param value the extension value
+ * @return this builder
+ * @throws IllformedLocaleException if key
is illegal
+ * or value
is ill-formed
+ * @see #setUnicodeLocaleKeyword(String, String)
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setExtension(char key, String value) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Sets the Unicode locale keyword type for the given key. If the
+ * value is the empty string, the Unicode keyword is removed.
+ * Well-formed keys are strings of two alphanumeric characters.
+ * Well-formed types are one or more subtags where each of them is
+ * three to eight alphanumeric characters.
+ *
+ * Note :Setting the 'u' extension replaces all Unicode locale
+ * keywords with those defined in the extension.
+ * @param key the Unicode locale key
+ * @param type the Unicode locale type
+ * @return this builder
+ * @throws IllformedLocaleException if key
or type
+ * is ill-formed
+ * @see #setExtension(char, String)
+ *
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder setUnicodeLocaleKeyword(String key, String type) {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Resets the builder to its initial, empty state.
+ *
+ * @return this builder
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder clear() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Resets the extensions to their initial, empty state.
+ * Language, script, region and variant are unchanged.
+ *
+ * @return this builder
+ * @see #setExtension(char, String)
+ *
+ * @draft ICU 4.2
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Builder clearExtensions() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+
+ /**
+ * Returns an instance of Locale created from the fields set
+ * on this builder.
+ *
+ * @return a new Locale
+ *
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public ULocale build() {
+ throw new UnsupportedOperationException("Method not supported by com.ibm.icu.base");
+ }
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/VersionInfo.java b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/VersionInfo.java
new file mode 100644
index 00000000000..22af59379aa
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/util/VersionInfo.java
@@ -0,0 +1,14 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.util;
+
+/*
+ * Empty stub
+ */
+public final class VersionInfo {
+ private VersionInfo() {}
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu.tests/.classpath b/eclipse-build/plugins.template/com.ibm.icu.tests/.classpath
new file mode 100644
index 00000000000..304e86186aa
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.tests/.classpath
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/eclipse-build/plugins.template/com.ibm.icu.tests/.project b/eclipse-build/plugins.template/com.ibm.icu.tests/.project
new file mode 100644
index 00000000000..b406f25e57d
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.tests/.project
@@ -0,0 +1,28 @@
+
+
+ com.ibm.icu.tests
+
+
+
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+
+ org.eclipse.pde.ManifestBuilder
+
+
+
+
+ org.eclipse.pde.SchemaBuilder
+
+
+
+
+
+ org.eclipse.pde.PluginNature
+ org.eclipse.jdt.core.javanature
+
+
diff --git a/eclipse-build/plugins.template/com.ibm.icu.tests/.settings/org.eclipse.jdt.core.prefs b/eclipse-build/plugins.template/com.ibm.icu.tests/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 00000000000..2e9a6a71111
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.tests/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,78 @@
+#Thu Jan 13 17:45:06 EST 2011
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
+org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning
+org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
+org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
+org.eclipse.jdt.core.compiler.problem.unusedLocal=warning
+org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
+org.eclipse.jdt.core.compiler.problem.unusedLabel=warning
+org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
+org.eclipse.jdt.core.compiler.problem.rawTypeReference=ignore
+org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotationForInterfaceMethodImplementation=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
+org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning
+org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
+org.eclipse.jdt.core.compiler.problem.deprecation=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
+org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore
+org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
+org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
+org.eclipse.jdt.core.compiler.problem.unusedImport=warning
+org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=disabled
+org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
+org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning
+org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
+org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
+org.eclipse.jdt.core.compiler.problem.includeNullInfoFromAsserts=disabled
+org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
+org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.problem.finalParameterBound=ignore
+org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning
+org.eclipse.jdt.core.compiler.problem.nullReference=warning
+org.eclipse.jdt.core.compiler.source=1.5
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
+org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning
+org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning
+org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning
+org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
+org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning
+org.eclipse.jdt.core.compiler.problem.discouragedReference=warning
+org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
+org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
+org.eclipse.jdt.core.compiler.problem.deadCode=ignore
+org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
+org.eclipse.jdt.core.compiler.problem.fatalOptionalError=disabled
+org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.unusedObjectAllocation=ignore
+org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning
+org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
+org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning
+org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
+org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=error
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
diff --git a/eclipse-build/plugins.template/com.ibm.icu.tests/.settings/org.eclipse.jdt.ui.prefs b/eclipse-build/plugins.template/com.ibm.icu.tests/.settings/org.eclipse.jdt.ui.prefs
new file mode 100644
index 00000000000..1c69cb6a3c0
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.tests/.settings/org.eclipse.jdt.ui.prefs
@@ -0,0 +1,3 @@
+#Thu Dec 14 11:51:01 EST 2006
+eclipse.preferences.version=1
+internal.default.compliance=default
diff --git a/eclipse-build/plugins.template/com.ibm.icu.tests/META-INF/MANIFEST.MF b/eclipse-build/plugins.template/com.ibm.icu.tests/META-INF/MANIFEST.MF
new file mode 100644
index 00000000000..99cd32a5f65
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.tests/META-INF/MANIFEST.MF
@@ -0,0 +1,10 @@
+Manifest-Version: 1.0
+Bundle-ManifestVersion: 2
+Bundle-Name: %pluginName
+Bundle-SymbolicName: com.ibm.icu.tests
+Bundle-Version: @BUILD_VERSION@
+Bundle-Vendor: %providerName
+Fragment-Host: com.ibm.icu
+Bundle-Copyright: @COPYRIGHT@
+Require-Bundle: org.junit
+Bundle-RequiredExecutionEnvironment: J2SE-1.5
diff --git a/eclipse-build/plugins.template/com.ibm.icu.tests/build.properties b/eclipse-build/plugins.template/com.ibm.icu.tests/build.properties
new file mode 100644
index 00000000000..3edb7086b44
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.tests/build.properties
@@ -0,0 +1,17 @@
+###############################################################################
+# Copyright (c) 2000, 2011 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+###############################################################################
+source.. = src/
+output.. = bin/
+bin.includes = .,\
+ about.html,\
+ about_files/,\
+ plugin.properties,\
+ META-INF/
diff --git a/eclipse-build/plugins.template/com.ibm.icu.tests/plugin.properties b/eclipse-build/plugins.template/com.ibm.icu.tests/plugin.properties
new file mode 100644
index 00000000000..b9d61cab0c2
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.tests/plugin.properties
@@ -0,0 +1,12 @@
+###############################################################################
+# Copyright (c) 2011 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+###############################################################################
+pluginName = International Components for Unicode for Java (ICU4J) Tests
+providerName = IBM Corporation
\ No newline at end of file
diff --git a/eclipse-build/plugins.template/com.ibm.icu.tests/src/com/ibm/icu/tests/UnitTest.java b/eclipse-build/plugins.template/com.ibm.icu.tests/src/com/ibm/icu/tests/UnitTest.java
new file mode 100644
index 00000000000..bd2d5c7f52d
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu.tests/src/com/ibm/icu/tests/UnitTest.java
@@ -0,0 +1,142 @@
+/*
+ ******************************************************************************
+ * Copyright (C) 2005-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ ******************************************************************************
+ */
+package com.ibm.icu.tests;
+
+import junit.framework.TestCase;
+
+import com.ibm.icu.dev.test.TestAll;
+import com.ibm.icu.dev.test.TestFmwk;
+import com.ibm.icu.dev.test.TestFmwk.TestParams;
+
+
+public class UnitTest extends TestCase {
+
+ public void runUtility(String testname) throws Exception {
+ TestParams params = TestParams.create("-n", null);
+ TestFmwk test = new TestAll();
+ test.resolveTarget(params, testname).run();
+ if (params.errorCount > 0) {
+ fail(params.errorSummary.toString());
+ }
+ }
+
+
+ // Core
+
+ public void testFormat() throws Exception {
+ runUtility("Core/Format");
+ }
+
+ public void testCompression() throws Exception {
+ runUtility("Core/Compression");
+ }
+
+ public void testRBBI() throws Exception {
+ runUtility("Core/RBBI");
+ }
+
+ public void testArabicShapingRegTest() throws Exception {
+ runUtility("Core/ArabicShapingRegTest");
+ }
+
+ public void testCalendar() throws Exception {
+ runUtility("Core/Calendar");
+ }
+
+ public void testTimeZone() throws Exception {
+ runUtility("Core/TimeZone");
+ }
+
+ public void testProperty() throws Exception {
+ runUtility("Core/Property");
+ }
+
+ public void testSpoofChecker() throws Exception {
+ runUtility("Core/SpoofChecker");
+ }
+
+ public void testNormalizer() throws Exception {
+ runUtility("Core/Normalizer");
+ }
+
+ public void testUtil() throws Exception {
+ runUtility("Core/Util");
+ }
+
+ public void testTestUCharacterIterator() throws Exception {
+ runUtility("Core/TestUCharacterIterator");
+ }
+
+ public void testDiagBigDecimal() throws Exception {
+ runUtility("Core/DiagBigDecimal");
+ }
+
+ public void testImpl() throws Exception {
+ runUtility("Core/Impl");
+ }
+
+ public void testStringPrep() throws Exception {
+ runUtility("Core/StringPrep");
+ }
+
+ public void testTimeScale() throws Exception {
+ runUtility("Core/TimeScale");
+ }
+
+ public void testTestCharsetDetector() throws Exception {
+ runUtility("Core/TestCharsetDetector");
+ }
+
+ public void testBidi() throws Exception {
+ runUtility("Core/Bidi");
+ }
+
+ public void testDuration() throws Exception {
+ runUtility("Core/Duration");
+ }
+
+ public void testSerializable() throws Exception {
+ runUtility("Core/Serializable");
+ }
+
+
+ // Collate
+
+ public void testCollator() throws Exception {
+ runUtility("Collate/Collator");
+ }
+
+ public void testGlobalizationPreferencesTest() throws Exception {
+ runUtility("Collate/GlobalizationPreferencesTest");
+ }
+
+ public void testRbnfLenientScannerTest() throws Exception {
+ runUtility("Collate/RbnfLenientScannerTest");
+ }
+
+ public void testSearchTest() throws Exception {
+ runUtility("Collate/SearchTest");
+ }
+
+ public void testICUResourceBundleCollationTest() throws Exception {
+ runUtility("Collate/ICUResourceBundleCollationTest");
+ }
+
+ public void testLocaleAliasCollationTest() throws Exception {
+ runUtility("Collate/LocaleAliasCollationTest");
+ }
+
+ public void testULocaleCollationTest() throws Exception {
+ runUtility("Collate/ULocaleCollationTest");
+ }
+
+ // Translit
+
+ public void testTranslit() throws Exception {
+ runUtility("Translit/Translit");
+ }
+}
diff --git a/eclipse-build/plugins.template/com.ibm.icu/.classpath b/eclipse-build/plugins.template/com.ibm.icu/.classpath
new file mode 100644
index 00000000000..751c8f2e504
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu/.classpath
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/eclipse-build/plugins.template/com.ibm.icu/.project b/eclipse-build/plugins.template/com.ibm.icu/.project
new file mode 100644
index 00000000000..e73714f9be5
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu/.project
@@ -0,0 +1,28 @@
+
+
+ com.ibm.icu
+
+
+
+
+
+ org.eclipse.pde.ManifestBuilder
+
+
+
+
+ org.eclipse.pde.SchemaBuilder
+
+
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+
+
+ org.eclipse.jdt.core.javanature
+ org.eclipse.pde.PluginNature
+
+
diff --git a/eclipse-build/plugins.template/com.ibm.icu/.settings/org.eclipse.jdt.core.prefs b/eclipse-build/plugins.template/com.ibm.icu/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 00000000000..179918e2c58
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,77 @@
+#Mon Aug 30 14:05:56 EDT 2010
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=ignore
+org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
+org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning
+org.eclipse.jdt.core.compiler.problem.deadCode=warning
+org.eclipse.jdt.core.compiler.problem.deprecation=ignore
+org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
+org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
+org.eclipse.jdt.core.compiler.problem.discouragedReference=warning
+org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=ignore
+org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore
+org.eclipse.jdt.core.compiler.problem.fatalOptionalError=disabled
+org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
+org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning
+org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=error
+org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning
+org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning
+org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore
+org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
+org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
+org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning
+org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotationForInterfaceMethodImplementation=enabled
+org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning
+org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore
+org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning
+org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning
+org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
+org.eclipse.jdt.core.compiler.problem.nullReference=warning
+org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning
+org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
+org.eclipse.jdt.core.compiler.problem.rawTypeReference=warning
+org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
+org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
+org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
+org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning
+org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=disabled
+org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
+org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
+org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning
+org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=warning
+org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
+org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
+org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
+org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.unusedImport=warning
+org.eclipse.jdt.core.compiler.problem.unusedLabel=warning
+org.eclipse.jdt.core.compiler.problem.unusedLocal=warning
+org.eclipse.jdt.core.compiler.problem.unusedObjectAllocation=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
+org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning
+org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
+org.eclipse.jdt.core.compiler.source=1.5
diff --git a/eclipse-build/plugins.template/com.ibm.icu/.settings/org.eclipse.jdt.ui.prefs b/eclipse-build/plugins.template/com.ibm.icu/.settings/org.eclipse.jdt.ui.prefs
new file mode 100644
index 00000000000..5693f2fd40d
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu/.settings/org.eclipse.jdt.ui.prefs
@@ -0,0 +1,3 @@
+#Thu Dec 14 11:50:17 EST 2006
+eclipse.preferences.version=1
+internal.default.compliance=default
diff --git a/eclipse-build/plugins.template/com.ibm.icu/META-INF/MANIFEST.MF b/eclipse-build/plugins.template/com.ibm.icu/META-INF/MANIFEST.MF
new file mode 100644
index 00000000000..7aa37bc364b
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu/META-INF/MANIFEST.MF
@@ -0,0 +1,29 @@
+Manifest-Version: 1.0
+Bundle-ManifestVersion: 2
+Bundle-Name: %pluginName
+Bundle-SymbolicName: com.ibm.icu; singleton:=true
+Bundle-Version: @BUILD_VERSION@
+Bundle-Vendor: %providerName
+Bundle-Localization: plugin
+Bundle-Copyright: @COPYRIGHT@
+Export-Package: com.ibm.icu.lang;base=true;full=true;version="@IMPL_VERSION@",
+ com.ibm.icu.math;base=true;full=true;version="@IMPL_VERSION@",
+ com.ibm.icu.text;base=true;full=true;version="@IMPL_VERSION@",
+ com.ibm.icu.util;base=true;full=true;version="@IMPL_VERSION@",
+ com.ibm.icu.impl;x-internal:=true,
+ com.ibm.icu.impl.data;x-internal:=true,
+ com.ibm.icu.impl.data.icudt@DATA_VERSION_NUMBER@b;x-internal:=true,
+ com.ibm.icu.impl.data.icudt@DATA_VERSION_NUMBER@b.brkitr;x-internal:=true,
+ com.ibm.icu.impl.data.icudt@DATA_VERSION_NUMBER@b.coll;x-internal:=true,
+ com.ibm.icu.impl.data.icudt@DATA_VERSION_NUMBER@b.curr;x-internal:=true,
+ com.ibm.icu.impl.data.icudt@DATA_VERSION_NUMBER@b.lang;x-internal:=true,
+ com.ibm.icu.impl.data.icudt@DATA_VERSION_NUMBER@b.rbnf;x-internal:=true,
+ com.ibm.icu.impl.data.icudt@DATA_VERSION_NUMBER@b.region;x-internal:=true,
+ com.ibm.icu.impl.data.icudt@DATA_VERSION_NUMBER@b.translit;x-internal:=true,
+ com.ibm.icu.impl.data.icudt@DATA_VERSION_NUMBER@b.zone;x-internal:=true,
+ com.ibm.icu.impl.duration;x-internal:=true,
+ com.ibm.icu.impl.locale;x-internal:=true
+Eclipse-LazyStart: true
+Bundle-RequiredExecutionEnvironment: J2SE-1.5
+Bundle-ClassPath: icu-data.jar,.
+Eclipse-ExtensibleAPI: true
diff --git a/eclipse-build/plugins.template/com.ibm.icu/build.properties b/eclipse-build/plugins.template/com.ibm.icu/build.properties
new file mode 100644
index 00000000000..8c108f29641
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu/build.properties
@@ -0,0 +1,19 @@
+###############################################################################
+# Copyright (c) 2000, 2008 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+###############################################################################
+source.. = src/
+output.. = bin/
+src.includes = about.html,\
+ about_files/
+bin.includes = .,\
+ about.html,\
+ about_files/,\
+ plugin.properties,\
+ META-INF/
diff --git a/eclipse-build/plugins.template/com.ibm.icu/plugin.properties b/eclipse-build/plugins.template/com.ibm.icu/plugin.properties
new file mode 100644
index 00000000000..9fe7037be7f
--- /dev/null
+++ b/eclipse-build/plugins.template/com.ibm.icu/plugin.properties
@@ -0,0 +1,12 @@
+###############################################################################
+# Copyright (c) 2000, 2008 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+###############################################################################
+pluginName = International Components for Unicode for Java (ICU4J)
+providerName = IBM Corporation
\ No newline at end of file
diff --git a/main/classes/charset/.classpath b/main/classes/charset/.classpath
new file mode 100644
index 00000000000..3529965b155
--- /dev/null
+++ b/main/classes/charset/.classpath
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/main/classes/charset/.externalToolBuilders/copy-data-charset.launch b/main/classes/charset/.externalToolBuilders/copy-data-charset.launch
new file mode 100644
index 00000000000..dd1c877c964
--- /dev/null
+++ b/main/classes/charset/.externalToolBuilders/copy-data-charset.launch
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/classes/charset/.project b/main/classes/charset/.project
new file mode 100644
index 00000000000..a4fb3b89418
--- /dev/null
+++ b/main/classes/charset/.project
@@ -0,0 +1,29 @@
+
+
+ icu4j-charset
+
+
+ icu4j-core
+ icu4j-shared
+
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+
+ org.eclipse.ui.externaltools.ExternalToolBuilder
+ full,incremental,
+
+
+ LaunchConfigHandle
+ <project>/.externalToolBuilders/copy-data-charset.launch
+
+
+
+
+
+ org.eclipse.jdt.core.javanature
+
+
diff --git a/main/classes/charset/.settings/org.eclipse.core.resources.prefs b/main/classes/charset/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 00000000000..216d4d9ef18
--- /dev/null
+++ b/main/classes/charset/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,3 @@
+#Fri Nov 05 14:17:11 EDT 2010
+eclipse.preferences.version=1
+encoding/=UTF-8
diff --git a/main/classes/charset/.settings/org.eclipse.jdt.core.prefs b/main/classes/charset/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 00000000000..ce15e6f7d61
--- /dev/null
+++ b/main/classes/charset/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,343 @@
+#Thu Aug 27 17:46:17 EDT 2009
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.doc.comment.support=enabled
+org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
+org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning
+org.eclipse.jdt.core.compiler.problem.deadCode=warning
+org.eclipse.jdt.core.compiler.problem.deprecation=ignore
+org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
+org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
+org.eclipse.jdt.core.compiler.problem.discouragedReference=warning
+org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.fallthroughCase=warning
+org.eclipse.jdt.core.compiler.problem.fatalOptionalError=enabled
+org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
+org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning
+org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=error
+org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning
+org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning
+org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore
+org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
+org.eclipse.jdt.core.compiler.problem.invalidJavadoc=warning
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTags=enabled
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsDeprecatedRef=disabled
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsNotVisibleRef=enabled
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsVisibility=public
+org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
+org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning
+org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
+org.eclipse.jdt.core.compiler.problem.missingJavadocComments=ignore
+org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsVisibility=public
+org.eclipse.jdt.core.compiler.problem.missingJavadocTagDescription=all_standard_tags
+org.eclipse.jdt.core.compiler.problem.missingJavadocTags=ignore
+org.eclipse.jdt.core.compiler.problem.missingJavadocTagsOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.missingJavadocTagsVisibility=public
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning
+org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore
+org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning
+org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning
+org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
+org.eclipse.jdt.core.compiler.problem.nullReference=warning
+org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning
+org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
+org.eclipse.jdt.core.compiler.problem.rawTypeReference=warning
+org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
+org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
+org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
+org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning
+org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
+org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
+org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning
+org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=warning
+org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
+org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
+org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
+org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.unusedImport=warning
+org.eclipse.jdt.core.compiler.problem.unusedLabel=warning
+org.eclipse.jdt.core.compiler.problem.unusedLocal=warning
+org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
+org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning
+org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
+org.eclipse.jdt.core.compiler.source=1.5
+org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_assignment=0
+org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
+org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
+org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
+org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
+org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
+org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
+org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
+org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
+org.eclipse.jdt.core.formatter.blank_lines_after_package=1
+org.eclipse.jdt.core.formatter.blank_lines_before_field=0
+org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
+org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
+org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
+org.eclipse.jdt.core.formatter.blank_lines_before_method=1
+org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
+org.eclipse.jdt.core.formatter.blank_lines_before_package=0
+org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
+org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
+org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
+org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
+org.eclipse.jdt.core.formatter.comment.format_block_comments=true
+org.eclipse.jdt.core.formatter.comment.format_header=false
+org.eclipse.jdt.core.formatter.comment.format_html=true
+org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
+org.eclipse.jdt.core.formatter.comment.format_line_comments=true
+org.eclipse.jdt.core.formatter.comment.format_source_code=true
+org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
+org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
+org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
+org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert
+org.eclipse.jdt.core.formatter.comment.line_length=120
+org.eclipse.jdt.core.formatter.compact_else_if=true
+org.eclipse.jdt.core.formatter.continuation_indentation=2
+org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
+org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
+org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
+org.eclipse.jdt.core.formatter.indent_empty_lines=false
+org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
+org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
+org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
+org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false
+org.eclipse.jdt.core.formatter.indentation.size=4
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member=insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
+org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
+org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
+org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
+org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
+org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
+org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
+org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
+org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
+org.eclipse.jdt.core.formatter.lineSplit=120
+org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
+org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
+org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
+org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
+org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
+org.eclipse.jdt.core.formatter.tabulation.char=space
+org.eclipse.jdt.core.formatter.tabulation.size=4
+org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
+org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
diff --git a/main/classes/charset/.settings/org.eclipse.jdt.ui.prefs b/main/classes/charset/.settings/org.eclipse.jdt.ui.prefs
new file mode 100644
index 00000000000..2f2671e6ee4
--- /dev/null
+++ b/main/classes/charset/.settings/org.eclipse.jdt.ui.prefs
@@ -0,0 +1,10 @@
+#Wed Jun 17 11:09:55 EDT 2009
+eclipse.preferences.version=1
+formatter_profile=_ICU4J Standard
+formatter_settings_version=11
+org.eclipse.jdt.ui.ignorelowercasenames=true
+org.eclipse.jdt.ui.importorder=java;javax;org;com;
+org.eclipse.jdt.ui.javadoc=true
+org.eclipse.jdt.ui.ondemandthreshold=99
+org.eclipse.jdt.ui.staticondemandthreshold=99
+org.eclipse.jdt.ui.text.custom_code_templates=/**\r\n * @return the ${bare_field_name}\r\n */ /**\r\n * @param ${param} the ${bare_field_name} to set\r\n */ /**\r\n * ${tags}\r\n */ /*\r\n *******************************************************************************\r\n * Copyright (C) ${year}, International Business Machines Corporation and *\r\n * others. All Rights Reserved. *\r\n *******************************************************************************\r\n */ /**\r\n * @author ${user}\r\n *\r\n * ${tags}\r\n */ /**\r\n * \r\n */ /**\r\n * ${tags}\r\n */ /* (non-Javadoc)\r\n * ${see_to_overridden}\r\n */ /**\r\n * ${tags}\r\n * ${see_to_target}\r\n */ ${filecomment}\r\n${package_declaration}\r\n\r\n${typecomment}\r\n${type_declaration} \r\n \r\n \r\n \r\n // ${todo} Auto-generated catch block\r\n${exception_var}.printStackTrace(); // ${todo} Auto-generated method stub\r\n${body_statement} ${body_statement}\r\n// ${todo} Auto-generated constructor stub return ${field}; ${field} \= ${param};
diff --git a/main/classes/charset/build.properties b/main/classes/charset/build.properties
new file mode 100644
index 00000000000..a21fb196196
--- /dev/null
+++ b/main/classes/charset/build.properties
@@ -0,0 +1,6 @@
+#*******************************************************************************
+#* Copyright (C) 2009, International Business Machines Corporation and *
+#* others. All Rights Reserved. *
+#*******************************************************************************
+shared.dir = ../../shared
+javac.compilerarg = -Xlint:all,-deprecation,-dep-ann
diff --git a/main/classes/charset/build.xml b/main/classes/charset/build.xml
new file mode 100644
index 00000000000..28efad3b7ee
--- /dev/null
+++ b/main/classes/charset/build.xml
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/main/classes/charset/charset-build.launch b/main/classes/charset/charset-build.launch
new file mode 100644
index 00000000000..5afd6a3ee4b
--- /dev/null
+++ b/main/classes/charset/charset-build.launch
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/classes/charset/manifest.stub b/main/classes/charset/manifest.stub
new file mode 100644
index 00000000000..c9fa009a3a1
--- /dev/null
+++ b/main/classes/charset/manifest.stub
@@ -0,0 +1,16 @@
+Manifest-Version: 1.0
+Specification-Title: International Components for Unicode for Java (charset)
+Specification-Version: @SPECVERSION@
+Specification-Vendor: icu-project.org
+Implementation-Title: International Components for Unicode for Java (charset)
+Implementation-Version: @IMPLVERSION@
+Implementation-Vendor: IBM Corporation
+Implementation-Vendor-Id: com.ibm
+Bundle-ManifestVersion: 2
+Bundle-Name: ICU4J charset
+Bundle-Description: International Components for Unicode for Java (charset)
+Bundle-SymbolicName: com.ibm.icu.charset
+Bundle-Version: @IMPLVERSION@
+Bundle-Vendor: IBM Corporation
+Bundle-Copyright: @COPYRIGHT@
+Bundle-RequiredExecutionEnvironment: @EXECENV@
diff --git a/main/classes/charset/src/META-INF/services/java.nio.charset.spi.CharsetProvider b/main/classes/charset/src/META-INF/services/java.nio.charset.spi.CharsetProvider
new file mode 100644
index 00000000000..ca798e7dd4a
--- /dev/null
+++ b/main/classes/charset/src/META-INF/services/java.nio.charset.spi.CharsetProvider
@@ -0,0 +1,3 @@
+# Copyright (C) 2006, International Business Machines Corporation and others. All Rights Reserved.
+# icu4j converters
+com.ibm.icu.charset.CharsetProviderICU
diff --git a/main/classes/charset/src/com/ibm/icu/charset/Charset88591.java b/main/classes/charset/src/com/ibm/icu/charset/Charset88591.java
new file mode 100644
index 00000000000..a74a1f75e73
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/Charset88591.java
@@ -0,0 +1,128 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.charset;
+
+import java.nio.BufferOverflowException;
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UnicodeSet;
+
+class Charset88591 extends CharsetASCII {
+ public Charset88591(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ }
+
+ class CharsetDecoder88591 extends CharsetDecoderASCII {
+ public CharsetDecoder88591(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target,
+ byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) {
+
+ /*
+ * perform 88591 conversion from the source array to the target array. no range check is
+ * necessary.
+ */
+ for (int i = oldSource; i < limit; i++)
+ targetArray[i + offset] = (char) (sourceArray[i] & 0xff);
+
+ return null;
+ }
+
+ protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target)
+ throws BufferUnderflowException, BufferOverflowException {
+
+ /*
+ * perform 88591 conversion from the source buffer to the target buffer. no range check
+ * is necessary (an exception will be generated to end the loop).
+ */
+ while (true)
+ target.put((char) (source.get() & 0xff));
+ }
+ }
+
+ class CharsetEncoder88591 extends CharsetEncoderASCII {
+ public CharsetEncoder88591(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected final CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target,
+ char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit,
+ boolean flush) {
+ int i, ch = 0;
+
+ /*
+ * perform 88591 conversion from the source array to the target array, making sure each
+ * char in the source is within the correct range
+ */
+ for (i = oldSource; i < limit; i++) {
+ ch = (int) sourceArray[i];
+ if ((ch & 0xff00) == 0) {
+ targetArray[i + offset] = (byte) ch;
+ } else {
+ break;
+ }
+ }
+
+ /*
+ * if some byte was not in the correct range, we need to deal with this byte by calling
+ * encodeMalformedOrUnmappable and move the source and target positions to reflect the
+ * early termination of the loop
+ */
+ if ((ch & 0xff00) != 0) {
+ source.position(i + 1);
+ target.position(i + offset);
+ return encodeMalformedOrUnmappable(source, ch, flush);
+ } else
+ return null;
+ }
+
+ protected final CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target,
+ boolean flush) throws BufferUnderflowException, BufferOverflowException {
+ int ch;
+
+ /*
+ * perform 88591 conversion from the source buffer to the target buffer, making sure
+ * each char in the source is within the correct range
+ */
+
+ while (true) {
+ ch = (int) source.get();
+ if ((ch & 0xff00) == 0) {
+ target.put((byte) ch);
+ } else {
+ break;
+ }
+ }
+ /*
+ * if we reach here, it's because a character was not in the correct range, and we need
+ * to deak with this by calling encodeMalformedOrUnmappable.
+ */
+ return encodeMalformedOrUnmappable(source, ch, flush);
+ }
+
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoder88591(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoder88591(this);
+ }
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ setFillIn.add(0,0xff);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java
new file mode 100644
index 00000000000..419a4601aeb
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetASCII.java
@@ -0,0 +1,357 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.BufferOverflowException;
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+class CharsetASCII extends CharsetICU {
+ protected byte[] fromUSubstitution = new byte[] { (byte) 0x1a };
+
+ public CharsetASCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = 1;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ }
+
+ class CharsetDecoderASCII extends CharsetDecoderICU {
+
+ public CharsetDecoderASCII(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ boolean flush) {
+ if (!source.hasRemaining()) {
+ /* no input, nothing to do */
+ return CoderResult.UNDERFLOW;
+ }
+ if (!target.hasRemaining()) {
+ /* no output available, can't do anything */
+ return CoderResult.OVERFLOW;
+ }
+
+ CoderResult cr;
+ int oldSource = source.position();
+ int oldTarget = target.position();
+
+ if (source.hasArray() && target.hasArray()) {
+ /* optimized loop */
+
+ /*
+ * extract arrays from the buffers and obtain various constant values that will be
+ * necessary in the core loop
+ */
+ byte[] sourceArray = source.array();
+ int sourceOffset = source.arrayOffset();
+ int sourceIndex = oldSource + sourceOffset;
+ int sourceLength = source.limit() - oldSource;
+
+ char[] targetArray = target.array();
+ int targetOffset = target.arrayOffset();
+ int targetIndex = oldTarget + targetOffset;
+ int targetLength = target.limit() - oldTarget;
+
+ int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)
+ + sourceIndex;
+ int offset = targetIndex - sourceIndex;
+
+ /*
+ * perform the core loop... if it returns null, it must be due to an overflow or
+ * underflow
+ */
+ cr = decodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit);
+ if (cr == null) {
+ if (sourceLength <= targetLength) {
+ source.position(oldSource + sourceLength);
+ target.position(oldTarget + sourceLength);
+ cr = CoderResult.UNDERFLOW;
+ } else {
+ source.position(oldSource + targetLength);
+ target.position(oldTarget + targetLength);
+ cr = CoderResult.OVERFLOW;
+ }
+ }
+ } else {
+ /* unoptimized loop */
+
+ try {
+ /*
+ * perform the core loop... if it throws an exception, it must be due to an
+ * overflow or underflow
+ */
+ cr = decodeLoopCoreUnoptimized(source, target);
+
+ } catch (BufferUnderflowException ex) {
+ /* all of the source has been read */
+ cr = CoderResult.UNDERFLOW;
+ } catch (BufferOverflowException ex) {
+ /* the target is full */
+ source.position(source.position() - 1); /* rewind by 1 */
+ cr = CoderResult.OVERFLOW;
+ }
+ }
+
+ /* set offsets since the start */
+ if (offsets != null) {
+ int count = target.position() - oldTarget;
+ int sourceIndex = -1;
+ while (--count >= 0) offsets.put(++sourceIndex);
+ }
+
+ return cr;
+ }
+
+ protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target,
+ byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) {
+ int i, ch = 0;
+
+ /*
+ * perform ascii conversion from the source array to the target array, making sure each
+ * byte in the source is within the correct range
+ */
+ for (i = oldSource; i < limit && (((ch = (sourceArray[i] & 0xff)) & 0x80) == 0); i++)
+ targetArray[i + offset] = (char) ch;
+
+ /*
+ * if some byte was not in the correct range, we need to deal with this byte by calling
+ * decodeMalformedOrUnmappable and move the source and target positions to reflect the
+ * early termination of the loop
+ */
+ if ((ch & 0x80) != 0) {
+ source.position(i + 1);
+ target.position(i + offset);
+ return decodeMalformedOrUnmappable(ch);
+ } else
+ return null;
+ }
+
+ protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target)
+ throws BufferUnderflowException, BufferOverflowException {
+ int ch = 0;
+
+ /*
+ * perform ascii conversion from the source buffer to the target buffer, making sure
+ * each byte in the source is within the correct range
+ */
+ while (((ch = (source.get() & 0xff)) & 0x80) == 0)
+ target.put((char) ch);
+
+ /*
+ * if we reach here, it's because a character was not in the correct range, and we need
+ * to deak with this by calling decodeMalformedOrUnmappable
+ */
+ return decodeMalformedOrUnmappable(ch);
+ }
+
+ protected CoderResult decodeMalformedOrUnmappable(int ch) {
+ /*
+ * put the guilty character into toUBytesArray and return a message saying that the
+ * character was malformed and of length 1.
+ */
+ toUBytesArray[0] = (byte) ch;
+ toULength = 1;
+ return CoderResult.malformedForLength(1);
+ }
+ }
+
+ class CharsetEncoderASCII extends CharsetEncoderICU {
+
+ public CharsetEncoderASCII(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ private final static int NEED_TO_WRITE_BOM = 1;
+
+ protected void implReset() {
+ super.implReset();
+ fromUnicodeStatus = NEED_TO_WRITE_BOM;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ boolean flush) {
+ if (!source.hasRemaining()) {
+ /* no input, nothing to do */
+ return CoderResult.UNDERFLOW;
+ }
+ if (!target.hasRemaining()) {
+ /* no output available, can't do anything */
+ return CoderResult.OVERFLOW;
+ }
+
+ CoderResult cr;
+ int oldSource = source.position();
+ int oldTarget = target.position();
+
+ if (fromUChar32 != 0) {
+ /*
+ * if we have a leading character in fromUChar32 that needs to be dealt with, we
+ * need to check for a matching trail character and taking the appropriate action as
+ * dictated by encodeTrail.
+ */
+ cr = encodeTrail(source, (char) fromUChar32, flush);
+ } else {
+ if (source.hasArray() && target.hasArray()) {
+ /* optimized loop */
+
+ /*
+ * extract arrays from the buffers and obtain various constant values that will
+ * be necessary in the core loop
+ */
+ char[] sourceArray = source.array();
+ int sourceOffset = source.arrayOffset();
+ int sourceIndex = oldSource + sourceOffset;
+ int sourceLength = source.limit() - oldSource;
+
+ byte[] targetArray = target.array();
+ int targetOffset = target.arrayOffset();
+ int targetIndex = oldTarget + targetOffset;
+ int targetLength = target.limit() - oldTarget;
+
+ int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)
+ + sourceIndex;
+ int offset = targetIndex - sourceIndex;
+
+ /*
+ * perform the core loop... if it returns null, it must be due to an overflow or
+ * underflow
+ */
+ cr = encodeLoopCoreOptimized(source, target, sourceArray, targetArray, sourceIndex, offset, limit, flush);
+ if (cr == null) {
+ if (sourceLength <= targetLength) {
+ source.position(oldSource + sourceLength);
+ target.position(oldTarget + sourceLength);
+ cr = CoderResult.UNDERFLOW;
+ } else {
+ source.position(oldSource + targetLength);
+ target.position(oldTarget + targetLength);
+ cr = CoderResult.OVERFLOW;
+ }
+ }
+ } else {
+ /* unoptimized loop */
+
+ try {
+ /*
+ * perform the core loop... if it throws an exception, it must be due to an
+ * overflow or underflow
+ */
+ cr = encodeLoopCoreUnoptimized(source, target, flush);
+
+ } catch (BufferUnderflowException ex) {
+ cr = CoderResult.UNDERFLOW;
+ } catch (BufferOverflowException ex) {
+ source.position(source.position() - 1); /* rewind by 1 */
+ cr = CoderResult.OVERFLOW;
+ }
+ }
+ }
+
+ /* set offsets since the start */
+ if (offsets != null) {
+ int count = target.position() - oldTarget;
+ int sourceIndex = -1;
+ while (--count >= 0) offsets.put(++sourceIndex);
+ }
+
+ return cr;
+ }
+
+ protected CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target,
+ char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit,
+ boolean flush) {
+ int i, ch = 0;
+
+ /*
+ * perform ascii conversion from the source array to the target array, making sure each
+ * char in the source is within the correct range
+ */
+ for (i = oldSource; i < limit && (((ch = (int) sourceArray[i]) & 0xff80) == 0); i++)
+ targetArray[i + offset] = (byte) ch;
+
+ /*
+ * if some byte was not in the correct range, we need to deal with this byte by calling
+ * encodeMalformedOrUnmappable and move the source and target positions to reflect the
+ * early termination of the loop
+ */
+ if ((ch & 0xff80) != 0) {
+ source.position(i + 1);
+ target.position(i + offset);
+ return encodeMalformedOrUnmappable(source, ch, flush);
+ } else
+ return null;
+ }
+
+ protected CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target,
+ boolean flush) throws BufferUnderflowException, BufferOverflowException {
+ int ch;
+
+ /*
+ * perform ascii conversion from the source buffer to the target buffer, making sure
+ * each char in the source is within the correct range
+ */
+ while (((ch = (int) source.get()) & 0xff80) == 0)
+ target.put((byte) ch);
+
+ /*
+ * if we reach here, it's because a character was not in the correct range, and we need
+ * to deak with this by calling encodeMalformedOrUnmappable.
+ */
+ return encodeMalformedOrUnmappable(source, ch, flush);
+ }
+
+ protected final CoderResult encodeMalformedOrUnmappable(CharBuffer source, int ch, boolean flush) {
+ /*
+ * if the character is a lead surrogate, we need to call encodeTrail to attempt to match
+ * it up with a trail surrogate. if not, the character is unmappable.
+ */
+ return (UTF16.isSurrogate((char) ch))
+ ? encodeTrail(source, (char) ch, flush)
+ : CoderResult.unmappableForLength(1);
+ }
+
+ private final CoderResult encodeTrail(CharBuffer source, char lead, boolean flush) {
+ /*
+ * ASCII doesn't support characters in the BMP, so if handleSurrogates returns null,
+ * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable.
+ */
+ CoderResult cr = handleSurrogates(source, lead);
+ if (cr != null) {
+ return cr;
+ } else {
+ //source.position(source.position() - 2);
+ return CoderResult.unmappableForLength(2);
+ }
+ }
+
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderASCII(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderASCII(this);
+ }
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ setFillIn.add(0,0x7f);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetBOCU1.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetBOCU1.java
new file mode 100644
index 00000000000..884f21a0223
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetBOCU1.java
@@ -0,0 +1,1063 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author krajwade
+ *
+ */
+class CharsetBOCU1 extends CharsetICU {
+ /* BOCU constants and macros */
+
+ /* initial value for "prev": middle of the ASCII range */
+ private static final byte BOCU1_ASCII_PREV = 0x40;
+
+ /* bounding byte values for differences */
+ private static final int BOCU1_MIN = 0x21;
+ private static final int BOCU1_MIDDLE = 0x90;
+ //private static final int BOCU1_MAX_LEAD = 0xfe;
+ private static final int BOCU1_MAX_TRAIL = 0xff;
+ private static final int BOCU1_RESET = 0xff;
+
+ /* number of lead bytes */
+ //private static final int BOCU1_COUNT = (BOCU1_MAX_LEAD-BOCU1_MIN+1);
+
+ /* adjust trail byte counts for the use of some C0 control byte values */
+ private static final int BOCU1_TRAIL_CONTROLS_COUNT = 20;
+ private static final int BOCU1_TRAIL_BYTE_OFFSET = (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT);
+
+ /* number of trail bytes */
+ private static final int BOCU1_TRAIL_COUNT =((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT);
+
+ /*
+ * number of positive and negative single-byte codes
+ * (counting 0==BOCU1_MIDDLE among the positive ones)
+ */
+ private static final int BOCU1_SINGLE = 64;
+
+ /* number of lead bytes for positive and negative 2/3/4-byte sequences */
+ private static final int BOCU1_LEAD_2 = 43;
+ private static final int BOCU1_LEAD_3 = 3;
+ //private static final int BOCU1_LEAD_4 = 1;
+
+ /* The difference value range for single-byters. */
+ private static final int BOCU1_REACH_POS_1 = (BOCU1_SINGLE-1);
+ private static final int BOCU1_REACH_NEG_1 = (-BOCU1_SINGLE);
+
+ /* The difference value range for double-byters. */
+ private static final int BOCU1_REACH_POS_2 = (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT);
+ private static final int BOCU1_REACH_NEG_2 = (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT);
+
+ /* The difference value range for 3-byters. */
+ private static final int BOCU1_REACH_POS_3 =
+ (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
+
+ private static final int BOCU1_REACH_NEG_3 = (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
+
+ /* The lead byte start values. */
+ private static final int BOCU1_START_POS_2 = (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1);
+ private static final int BOCU1_START_POS_3 = (BOCU1_START_POS_2+BOCU1_LEAD_2);
+ private static final int BOCU1_START_POS_4 = (BOCU1_START_POS_3+BOCU1_LEAD_3);
+ /* ==BOCU1_MAX_LEAD */
+
+ private static final int BOCU1_START_NEG_2 = (BOCU1_MIDDLE+BOCU1_REACH_NEG_1);
+ private static final int BOCU1_START_NEG_3 = (BOCU1_START_NEG_2-BOCU1_LEAD_2);
+ //private static final int BOCU1_START_NEG_4 = (BOCU1_START_NEG_3-BOCU1_LEAD_3);
+ /* ==BOCU1_MIN+1 */
+
+ /* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
+ /* private static int BOCU1_LENGTH_FROM_LEAD(int lead) {
+ return ((BOCU1_START_NEG_2<=(lead) && (lead)>24 : 4);
+ }
+
+ /*
+ * Byte value map for control codes,
+ * from external byte values 0x00..0x20
+ * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
+ * External byte values that are illegal as trail bytes are mapped to -1.
+ */
+ private static final int[]
+ bocu1ByteToTrail={
+ /* 0 1 2 3 4 5 6 7 */
+ -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
+
+ /* 8 9 a b c d e f */
+ -1, -1, -1, -1, -1, -1, -1, -1,
+
+ /* 10 11 12 13 14 15 16 17 */
+ 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
+
+ /* 18 19 1a 1b 1c 1d 1e 1f */
+ 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13,
+
+ /* 20 */
+ -1
+ };
+
+ /*
+ * Byte value map for control codes,
+ * from trail byte values 0..19 (0..0x13) as used in the difference calculation
+ * to external byte values 0x00..0x20.
+ */
+ private static final int[]
+ bocu1TrailToByte = {
+ /* 0 1 2 3 4 5 6 7 */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
+
+ /* 8 9 a b c d e f */
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+
+ /* 10 11 12 13 */
+ 0x1c, 0x1d, 0x1e, 0x1f
+ };
+
+
+ /*
+ * 12 commonly used C0 control codes (and space) are only used to encode
+ * themselves directly,
+ * which makes BOCU-1 MIME-usable and reasonably safe for
+ * ASCII-oriented software.
+ *
+ * These controls are
+ * 0 NUL
+ *
+ * 7 BEL
+ * 8 BS
+ *
+ * 9 TAB
+ * a LF
+ * b VT
+ * c FF
+ * d CR
+ *
+ * e SO
+ * f SI
+ *
+ * 1a SUB
+ * 1b ESC
+ *
+ * The other 20 C0 controls are also encoded directly (to preserve order)
+ * but are also used as trail bytes in difference encoding
+ * (for better compression).
+ */
+ private static int BOCU1_TRAIL_TO_BYTE(int trail) {
+ return ((trail)>=BOCU1_TRAIL_CONTROLS_COUNT ? (trail)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[trail]);
+ }
+
+ /* BOCU-1 implementation functions ------------------------------------------ */
+ private static int BOCU1_SIMPLE_PREV(int c){
+ return (((c)&~0x7f)+BOCU1_ASCII_PREV);
+ }
+
+ /**
+ * Compute the next "previous" value for differencing
+ * from the current code point.
+ *
+ * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
+ * @return "previous code point" state value
+ */
+ private static int bocu1Prev(int c) {
+ /* compute new prev */
+ if(/* 0x3040<=c && */ c<=0x309f) {
+ /* Hiragana is not 128-aligned */
+ return 0x3070;
+ } else if(0x4e00<=c && c<=0x9fa5) {
+ /* CJK Unihan */
+ return 0x4e00-BOCU1_REACH_NEG_2;
+ } else if(0xac00<=c /* && c<=0xd7a3 */) {
+ /* Korean Hangul */
+ return (0xd7a3+0xac00)/2;
+ } else {
+ /* mostly small scripts */
+ return BOCU1_SIMPLE_PREV(c);
+ }
+ }
+
+ /** Fast version of bocu1Prev() for most scripts. */
+ private static int BOCU1_PREV(int c) {
+ return ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c));
+ }
+
+ protected byte[] fromUSubstitution = new byte[]{(byte)0x1A};
+
+ /* Faster versions of packDiff() for single-byte-encoded diff values. */
+
+ /** Is a diff value encodable in a single byte? */
+ private static boolean DIFF_IS_SINGLE(int diff){
+ return (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1);
+ }
+
+ /** Encode a diff value in a single byte. */
+ private static int PACK_SINGLE_DIFF(int diff){
+ return (BOCU1_MIDDLE+(diff));
+ }
+
+ /** Is a diff value encodable in two bytes? */
+ private static boolean DIFF_IS_DOUBLE(int diff){
+ return (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2);
+ }
+
+ public CharsetBOCU1(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = 4;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ }
+
+ class CharsetEncoderBOCU extends CharsetEncoderICU {
+ public CharsetEncoderBOCU(CharsetICU cs) {
+ super(cs,fromUSubstitution);
+ }
+
+ int sourceIndex, nextSourceIndex;
+ int prev, c , diff;
+ boolean checkNegative;
+ boolean LoopAfterTrail;
+ int targetCapacity;
+ CoderResult cr;
+
+ /* label values for supporting behavior similar to goto in C */
+ private static final int fastSingle=0;
+ private static final int getTrail=1;
+ private static final int regularLoop=2;
+
+ private boolean LabelLoop; //used to break the while loop
+ private int labelType = fastSingle; //labeType is set to fastSingle to start the code from fastSingle:
+
+ /**
+ * Integer division and modulo with negative numerators
+ * yields negative modulo results and quotients that are one more than
+ * what we need here.
+ * This macro adjust the results so that the modulo-value m is always >=0.
+ *
+ * For positive n, the if() condition is always FALSE.
+ *
+ * @param n Number to be split into quotient and rest.
+ * Will be modified to contain the quotient.
+ * @param d Divisor.
+ * @param m Output variable for the rest (modulo result).
+ */
+ private int NEGDIVMOD(int n, int d, int m) {
+ diff = n;
+ (m)=(diff)%(d);
+ (diff)/=(d);
+ if((m)<0) {
+ --(diff);
+ (m)+=(d);
+ }
+ return m;
+ }
+
+ /**
+ * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
+ * and return a packed integer with them.
+ *
+ * The encoding favors small absolute differences with short encodings
+ * to compress runs of same-script characters.
+ *
+ * Optimized version with unrolled loops and fewer floating-point operations
+ * than the standard packDiff().
+ *
+ * @param diff difference value -0x10ffff..0x10ffff
+ * @return
+ * 0x010000zz for 1-byte sequence zz
+ * 0x0200yyzz for 2-byte sequence yy zz
+ * 0x03xxyyzz for 3-byte sequence xx yy zz
+ * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
+ */
+ private int packDiff(int n) {
+ int result, m = 0;
+ diff = n;
+
+ if(diff>=BOCU1_REACH_NEG_1) {
+ /* mostly positive differences, and single-byte negative ones */
+ if(diff<=BOCU1_REACH_POS_2) {
+ /* two bytes */
+ diff-=BOCU1_REACH_POS_1+1;
+ result=0x02000000;
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ result|=(BOCU1_START_POS_2+diff)<<8;
+ } else if(diff<=BOCU1_REACH_POS_3) {
+ /* three bytes */
+ diff-=BOCU1_REACH_POS_2+1;
+ result=0x03000000;
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ result|=(BOCU1_START_POS_3+diff)<<16;
+ } else {
+ /* four bytes */
+ diff-=BOCU1_REACH_POS_3+1;
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result=BOCU1_TRAIL_TO_BYTE(m);
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ /*
+ * We know that / and % would deliver quotient 0 and rest=diff.
+ * Avoid division and modulo for performance.
+ */
+ result|=BOCU1_TRAIL_TO_BYTE(diff)<<16;
+
+ result|=((BOCU1_START_POS_4&UConverterConstants.UNSIGNED_INT_MASK))<<24;
+ }
+ } else {
+ /* two- to four-byte negative differences */
+ if(diff>=BOCU1_REACH_NEG_2) {
+ /* two bytes */
+ diff-=BOCU1_REACH_NEG_1;
+ result=0x02000000;
+
+ m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ result|=(BOCU1_START_NEG_2+diff)<<8;
+ } else if(diff>=BOCU1_REACH_NEG_3) {
+ /* three bytes */
+ diff-=BOCU1_REACH_NEG_2;
+ result=0x03000000;
+
+ m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ result|=(BOCU1_START_NEG_3+diff)<<16;
+ } else {
+ /* four bytes */
+ diff-=BOCU1_REACH_NEG_3;
+
+ m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result=BOCU1_TRAIL_TO_BYTE(m);
+
+ m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ /*
+ * We know that NEGDIVMOD would deliver
+ * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
+ * Avoid division and modulo for performance.
+ */
+ m=diff+BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<16;
+
+ result|=BOCU1_MIN<<24;
+ }
+ }
+ return result;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+ cr = CoderResult.UNDERFLOW;
+
+ LabelLoop = true; //used to break the while loop
+ checkNegative = false; // its value is set to true to get out of while loop when c = -c
+ LoopAfterTrail = false; // its value is set to true to ignore code before getTrail:
+
+ /*set up the local pointers*/
+ targetCapacity = target.limit() - target.position();
+ c = fromUChar32;
+ prev = fromUnicodeStatus;
+
+ if(prev==0){
+ prev = BOCU1_ASCII_PREV;
+ }
+
+ /*sourceIndex ==-1 if the current characte began in the previous buffer*/
+ sourceIndex = c == 0 ? 0: -1;
+ nextSourceIndex = 0;
+
+ /*conversion loop*/
+ if(c!=0 && targetCapacity>0){
+ labelType = getTrail;
+ }
+
+ while(LabelLoop){
+ switch(labelType){
+ case fastSingle:
+ labelType = fastSingle(source, target, offsets);
+ break;
+ case getTrail:
+ labelType = getTrail(source, target, offsets);
+ break;
+ case regularLoop:
+ labelType = regularLoop(source, target, offsets);
+ break;
+ }
+ }
+
+ return cr;
+ }
+
+ private int fastSingle(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+//fastSingle:
+ /*fast loop for single-byte differences*/
+ /*use only one loop counter variable , targetCapacity, not also source*/
+ diff = source.limit() - source.position();
+ if(targetCapacity>diff){
+ targetCapacity = diff;
+ }
+ while(targetCapacity>0 && (c=source.get(source.position()))<0x3000){
+ if(c<=0x20){
+ if(c!=0x20){
+ prev = BOCU1_ASCII_PREV;
+ }
+ target.put((byte)c);
+ if(offsets!=null){
+ offsets.put(nextSourceIndex++);
+ }
+ source.position(source.position()+1);
+ --targetCapacity;
+ }else {
+ diff = c-prev;
+ if(DIFF_IS_SINGLE(diff)){
+ prev = BOCU1_SIMPLE_PREV(c);
+ target.put((byte)PACK_SINGLE_DIFF(diff));
+ if(offsets!=null){
+ offsets.put(nextSourceIndex++);
+ }
+ source.position(source.position()+1);
+ --targetCapacity;
+ }else {
+ break;
+ }
+ }
+ }
+ return regularLoop;
+ }
+
+ private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+ if(source.hasRemaining()){
+ /*test the following code unit*/
+ char trail = source.get(source.position());
+ if(UTF16.isTrailSurrogate(trail)){
+ source.position(source.position()+1);
+ ++nextSourceIndex;
+ c=UCharacter.getCodePoint((char)c, trail);
+ }
+ } else {
+ /*no more input*/
+ c = -c; /*negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else*/
+ checkNegative = true;
+ }
+ LoopAfterTrail = true;
+ return regularLoop;
+ }
+
+ @SuppressWarnings("fallthrough")
+ private int regularLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+ if(!LoopAfterTrail){
+ /*restore real values*/
+ targetCapacity = target.limit()-target.position();
+ sourceIndex = nextSourceIndex; /*wrong if offsets==null but does not matter*/
+ }
+ /*regular loop for all classes*/
+ while(LoopAfterTrail || source.hasRemaining()){
+ if(LoopAfterTrail || targetCapacity>0){
+
+ if(!LoopAfterTrail){
+ c = source.get();
+ ++nextSourceIndex;
+
+ if(c<=0x20){
+ /*
+ * ISO C0 control & space:
+ * Encode directly for MIME compatibility,
+ * and reset state except for space, to not disrupt compression.
+ */
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ target.put((byte)c);
+ if(offsets != null){
+ offsets.put(sourceIndex++);
+ }
+ --targetCapacity;
+
+ sourceIndex=nextSourceIndex;
+ continue;
+ }
+
+ if(UTF16.isLeadSurrogate((char)c)){
+ getTrail(source, target, offsets);
+ if(checkNegative){
+ break;
+ }
+ }
+ }
+
+ if(LoopAfterTrail){
+ LoopAfterTrail = false;
+ }
+
+ /*
+ * all other Unicode code points c==U+0021..U+10ffff
+ * are encoded with the difference c-prev
+ *
+ * a new prev is computed from c,
+ * placed in the middle of a 0x80-block (for most small scripts) or
+ * in the middle of the Unihan and Hangul blocks
+ * to statistically minimize the following difference
+ */
+ diff = c- prev;
+ prev = BOCU1_PREV(c);
+ if(DIFF_IS_SINGLE(diff)){
+ target.put((byte)PACK_SINGLE_DIFF(diff));
+ if(offsets!=null){
+ offsets.put(sourceIndex++);
+ }
+ --targetCapacity;
+ sourceIndex=nextSourceIndex;
+ if(c<0x3000){
+ labelType = fastSingle;
+ return labelType;
+ }
+ } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity){
+ /*optimize 2 byte case*/
+ int m = 0;
+ if(diff>=0){
+ diff -= BOCU1_REACH_POS_1 +1;
+ m = diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ diff+=BOCU1_START_POS_2;
+ } else {
+ diff -= BOCU1_REACH_NEG_1;
+ m = NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ diff+=BOCU1_START_NEG_2;
+ }
+ target.put((byte)diff);
+ target.put((byte)BOCU1_TRAIL_TO_BYTE(m));
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ }
+ targetCapacity -= 2;
+ sourceIndex = nextSourceIndex;
+ } else {
+ int length; /*will be 2..4*/
+ diff = packDiff(diff);
+ length = BOCU1_LENGTH_FROM_PACKED(diff);
+
+ /*write the output character bytes from diff and length*/
+ /*from the first if in the loop we know that targetCapacity>0*/
+ if(length<=targetCapacity){
+ switch(length){
+ /*each branch falls through the next one*/
+ case 4:
+ target.put((byte)(diff>>24));
+ if(offsets!= null){
+ offsets.put(sourceIndex);
+ }
+ case 3:
+ target.put((byte)(diff>>16));
+ if(offsets!= null){
+ offsets.put(sourceIndex);
+ }
+ case 2:
+ target.put((byte)(diff>>8));
+ if(offsets!= null){
+ offsets.put(sourceIndex);
+ }
+ /*case 1 handled above*/
+ target.put((byte)diff);
+ if(offsets!= null){
+ offsets.put(sourceIndex);
+ }
+ default:
+ /*will never occur*/
+ break;
+ }
+ targetCapacity -= length;
+ sourceIndex = nextSourceIndex;
+ } else {
+ ByteBuffer error = ByteBuffer.wrap(errorBuffer);
+ /*
+ * We actually do this backwards here:
+ * In order to save an intermediate variable, we output
+ * first to the overflow buffer what does not fit into the
+ * regular target.
+ */
+ /* we know that 1<=targetCapacity>16));
+ case 2:
+ error.put((byte)(diff>>8));
+ case 1:
+ error.put((byte)diff);
+ default:
+ /* will never occur */
+ break;
+ }
+ errorBufferLength = length;
+
+ /* now output what fits into the regular target */
+ diff>>=8*length; /* length was reduced by targetCapacity */
+ switch(targetCapacity) {
+ /* each branch falls through to the next one */
+ case 3:
+ target.put((byte)(diff>>16));
+ if(offsets!= null){
+ offsets.put(sourceIndex);
+ }
+ case 2:
+ target.put((byte)(diff>>8));
+ if(offsets!= null){
+ offsets.put(sourceIndex);
+ }
+ case 1:
+ target.put((byte)diff);
+ if(offsets!= null){
+ offsets.put(sourceIndex);
+ }
+ default:
+ /* will never occur */
+ break;
+ }
+
+ /* target overflow */
+ targetCapacity=0;
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ } else{
+ /*target is full*/
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+
+ }
+ /*set the converter state back into UConverter*/
+ fromUChar32 = c<0 ? -c :0;
+ fromUnicodeStatus = prev;
+ LabelLoop = false;
+ labelType = fastSingle;
+ return labelType;
+ }
+
+ }
+
+ class CharsetDecoderBOCU extends CharsetDecoderICU{
+ public CharsetDecoderBOCU(CharsetICU cs) {
+ super(cs);
+ }
+
+ int byteIndex;
+ int sourceIndex, nextSourceIndex;
+ int prev, c , diff, count;
+ byte[] bytes;
+ int targetCapacity;
+ CoderResult cr;
+
+ /* label values for supporting behavior similar to goto in C */
+ private static final int fastSingle=0;
+ private static final int getTrail=1;
+ private static final int regularLoop=2;
+ private static final int endLoop=3;
+
+ private boolean LabelLoop;//used to break the while loop
+ private boolean afterTrail; // its value is set to true to ignore code after getTrail:
+ private int labelType;
+ /*
+ * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
+ * The UConverter fields are used as follows:
+ *
+ * fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
+ *
+ * toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
+ * mode decoder's incomplete (diff<<2)|count (ignored when toULength==0)
+ */
+
+ /* BOCU-1-from-Unicode conversion functions --------------------------------- */
+
+
+
+ /**
+ * Function for BOCU-1 decoder; handles multi-byte lead bytes.
+ *
+ * @param b lead byte;
+ * BOCU1_MIN<=b= BOCU1_START_NEG_2) {
+ /* positive difference */
+ if(b < BOCU1_START_POS_3) {
+ /* two bytes */
+ diffValue = (b - BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT + BOCU1_REACH_POS_1+1;
+ countValue = 1;
+ } else if(b < BOCU1_START_POS_4) {
+ /* three bytes */
+ diffValue = (b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
+ countValue = 2;
+ } else {
+ /* four bytes */
+ diffValue = BOCU1_REACH_POS_3+1;
+ countValue = 3;
+ }
+ } else {
+ /* negative difference */
+ if(b >= BOCU1_START_NEG_3) {
+ /* two bytes */
+ diffValue=(b -BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT + BOCU1_REACH_NEG_1;
+ countValue=1;
+ } else if(b>BOCU1_MIN) {
+ /* three bytes */
+ diffValue=(b - BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT + BOCU1_REACH_NEG_2;
+ countValue = 2;
+ } else {
+ /* four bytes */
+ diffValue=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
+ countValue=3;
+ }
+ }
+
+ /* return the state for decoding the trail byte(s) */
+ return (diffValue<<2)|countValue;
+ }
+
+ /**
+ * Function for BOCU-1 decoder; handles multi-byte trail bytes.
+ *
+ * @param count number of remaining trail bytes including this one
+ * @param b trail byte
+ * @return new delta for diff including b - <0 indicates an error
+ *
+ * @see decodeBocu1
+ */
+ private int decodeBocu1TrailByte(int countValue, int b) {
+ b = b&UConverterConstants.UNSIGNED_BYTE_MASK;
+ if((b)<=0x20) {
+ /* skip some C0 controls and make the trail byte range contiguous */
+ b = bocu1ByteToTrail[b];
+ /* b<0 for an illegal trail byte value will result in return<0 below */
+ } else {
+ //b-= BOCU1_TRAIL_BYTE_OFFSET;
+ b = b - BOCU1_TRAIL_BYTE_OFFSET;
+ }
+
+ /* add trail byte into difference and decrement count */
+ if(countValue==1) {
+ return b;
+ } else if(countValue==2) {
+ return b*BOCU1_TRAIL_COUNT;
+ } else /* count==3 */ {
+ return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
+ }
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ boolean flush){
+ cr = CoderResult.UNDERFLOW;
+
+ LabelLoop = true;
+ afterTrail = false;
+ labelType = fastSingle; // labelType is set to fastSingle so t
+
+ /*get the converter state*/
+ prev = toUnicodeStatus;
+
+ if(prev==0){
+ prev = BOCU1_ASCII_PREV;
+ }
+ diff = mode;
+ count = diff&3;
+ diff>>=2;
+
+ byteIndex = toULength;
+ bytes = toUBytesArray;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex=byteIndex==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
+ if(count>0 && byteIndex>0 && target.position()diff) {
+ count = diff;
+ }
+ while(count>0) {
+ if(BOCU1_START_NEG_2 <=(c=source.get(source.position())&UConverterConstants.UNSIGNED_BYTE_MASK) && c< BOCU1_START_POS_2) {
+ c = prev + (c-BOCU1_MIDDLE);
+ if(c<0x3000) {
+ target.put((char)c);
+ if(offsets!=null){
+ offsets.put(nextSourceIndex++);
+ }
+ prev = BOCU1_SIMPLE_PREV(c);
+ } else {
+ break;
+ }
+ } else if((c&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0x20) {
+ if((c&UConverterConstants.UNSIGNED_BYTE_MASK) != 0x20) {
+ prev = BOCU1_ASCII_PREV;
+ }
+ target.put((char)c);
+ if(offsets!=null){
+ offsets.put(nextSourceIndex++);
+ }
+ } else {
+ break;
+ }
+ source.position(source.position()+1);
+ --count;
+ }
+ sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
+ return labelType;
+ }
+
+ private int getTrail(ByteBuffer source, CharBuffer target, IntBuffer offsets){
+ labelType = regularLoop;
+ for(;;) {
+ if(source.position() >= source.limit()) {
+ labelType = endLoop;
+ return labelType;
+ }
+ ++nextSourceIndex;
+ c = bytes[byteIndex++] = source.get();
+
+ /* trail byte in any position */
+ c = decodeBocu1TrailByte(count, c);
+ if(c<0) {
+ cr = CoderResult.malformedForLength(1);
+ labelType = endLoop;
+ return labelType;
+ }
+
+ diff+=c;
+ if(--count==0) {
+ /* final trail byte, deliver a code point */
+ byteIndex=0;
+ c = prev + diff;
+ if(c > 0x10ffff) {
+ cr = CoderResult.malformedForLength(1);
+ labelType = endLoop;
+ return labelType;
+ }
+ break;
+ }
+ }
+ afterTrail = true;
+ return labelType;
+
+ }
+
+ private int afterGetTrail(ByteBuffer source, CharBuffer target, IntBuffer offsets){
+ /* decode a sequence of single and lead bytes */
+ while(afterTrail || source.hasRemaining()) {
+ if(!afterTrail){
+ if(target.position() >= target.limit()) {
+ /* target is full */
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+
+ ++nextSourceIndex;
+ c = source.get()&UConverterConstants.UNSIGNED_BYTE_MASK;
+ if(BOCU1_START_NEG_2 <= c && c < BOCU1_START_POS_2) {
+ /* Write a code point directly from a single-byte difference. */
+ c = prev + (c-BOCU1_MIDDLE);
+ if(c<0x3000) {
+ target.put((char)c);
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ prev = BOCU1_SIMPLE_PREV(c);
+ sourceIndex = nextSourceIndex;
+ labelType = fastSingle;
+ return labelType;
+ }
+ } else if(c <= 0x20) {
+ /*
+ * Direct-encoded C0 control code or space.
+ * Reset prev for C0 control codes but not for space.
+ */
+ if(c != 0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ target.put((char)c);
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ sourceIndex=nextSourceIndex;
+ continue;
+ } else if(BOCU1_START_NEG_3 <= c && c < BOCU1_START_POS_3 && source.hasRemaining()) {
+ /* Optimize two-byte case. */
+ if(c >= BOCU1_MIDDLE) {
+ diff=(c - BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT + BOCU1_REACH_POS_1 + 1;
+ } else {
+ diff=(c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT + BOCU1_REACH_NEG_1;
+ }
+
+ /* trail byte */
+ ++nextSourceIndex;
+ c = decodeBocu1TrailByte(1, source.get());
+ if(c<0 || ((c = prev + diff + c)&UConverterConstants.UNSIGNED_INT_MASK)>0x10ffff) {
+ bytes[0]= source.get(source.position()-2);
+ bytes[1]= source.get(source.position()-1);
+ byteIndex = 2;
+ cr = CoderResult.malformedForLength(2);
+ break;
+ }
+ } else if(c == BOCU1_RESET) {
+ /* only reset the state, no code point */
+ prev=BOCU1_ASCII_PREV;
+ sourceIndex=nextSourceIndex;
+ continue;
+ } else {
+ /*
+ * For multi-byte difference lead bytes, set the decoder state
+ * with the partial difference value from the lead byte and
+ * with the number of trail bytes.
+ */
+ bytes[0]= (byte)c;
+ byteIndex = 1;
+
+ diff = decodeBocu1LeadByte(c);
+ count = diff&3;
+ diff>>=2;
+ getTrail(source, target, offsets);
+ if(labelType != regularLoop){
+ return labelType;
+ }
+ }
+ }
+
+ if(afterTrail){
+ afterTrail = false;
+ }
+
+ /* calculate the next prev and output c */
+ prev = BOCU1_PREV(c);
+ if(c<=0xffff) {
+ target.put((char)c);
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ } else {
+ /* output surrogate pair */
+ target.put(UTF16.getLeadSurrogate(c));
+ if(target.hasRemaining()) {
+ target.put(UTF16.getTrailSurrogate(c));
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ }
+ } else {
+ /* target overflow */
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ charErrorBufferArray[0] = UTF16.getTrailSurrogate(c);
+ charErrorBufferLength = 1;
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ sourceIndex=nextSourceIndex;
+ }
+ labelType = endLoop;
+ return labelType;
+ }
+
+ private void endLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){
+ if(cr.isMalformed()) {
+ /* set the converter state in UConverter to deal with the next character */
+ toUnicodeStatus = BOCU1_ASCII_PREV;
+ mode = 0;
+ } else {
+ /* set the converter state back into UConverter */
+ toUnicodeStatus=prev;
+ mode=(diff<<2)|count;
+ }
+ toULength=byteIndex;
+ LabelLoop = false;
+ }
+
+ }
+
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderBOCU(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderBOCU(this);
+ }
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ CharsetICU.getCompleteUnicodeSet(setFillIn);
+ }
+
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetCESU8.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetCESU8.java
new file mode 100644
index 00000000000..bf2c204f5be
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetCESU8.java
@@ -0,0 +1,26 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * The purpose of this class is to set isCESU8 to true in the super class, and to allow the Charset framework to open
+ * the variant UTF-8 converter without extra setup work. CESU-8 encodes/decodes supplementary characters as 6 bytes
+ * instead of the proper 4 bytes.
+ */
+class CharsetCESU8 extends CharsetUTF8 {
+ public CharsetCESU8(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ }
+
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ getCompleteUnicodeSet(setFillIn);
+
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java
new file mode 100644
index 00000000000..d9cc0c2e673
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java
@@ -0,0 +1,408 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CoderResult;
+
+/**
+ * Callback API for CharsetICU API
+ *
+ * CharsetCallback class defines some error behaviour functions called
+ * by CharsetDecoderICU and CharsetEncoderICU. The class also provides
+ * the facility by which clients can write their own callbacks.
+ *
+ * These functions, although public, should NEVER be called directly.
+ * They should be used as parameters to the onUmappableCharacter() and
+ * onMalformedInput() methods, to set the behaviour of a converter
+ * when it encounters UNMAPPED/INVALID sequences.
+ * Currently the only way to set callbacks is by using CodingErrorAction.
+ * In the future we will provide set methods on CharsetEncoder and CharsetDecoder
+ * that will accept CharsetCallback fields.
+ *
+ * @stable ICU 3.6
+ */
+
+public class CharsetCallback {
+ /*
+ * FROM_U, TO_U context options for sub callback
+ */
+ private static final String SUB_STOP_ON_ILLEGAL = "i";
+
+// /*
+// * FROM_U, TO_U context options for skip callback
+// */
+// private static final String SKIP_STOP_ON_ILLEGAL = "i";
+
+// /*
+// * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
+// */
+// private static final String ESCAPE_ICU = null;
+
+ /*
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
+ */
+ private static final String ESCAPE_JAVA = "J";
+
+ /*
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
+ * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
+ */
+ private static final String ESCAPE_C = "C";
+
+ /*
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
+ */
+ private static final String ESCAPE_XML_DEC = "D";
+
+ /*
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
+ */
+ private static final String ESCAPE_XML_HEX = "X";
+
+ /*
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
+ */
+ private static final String ESCAPE_UNICODE = "U";
+
+ /*
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
+ */
+ private static final String ESCAPE_CSS2 = "S";
+
+ /**
+ * Decoder Callback interface
+ * @stable ICU 3.6
+ */
+ public interface Decoder {
+ /**
+ * This function is called when the bytes in the source cannot be handled,
+ * and this function is meant to handle or fix the error if possible.
+ *
+ * @return Result of decoding action. This returned object is set to an error
+ * if this function could not handle the conversion.
+ * @stable ICU 3.6
+ */
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr);
+ }
+ /**
+ * Encoder Callback interface
+ * @stable ICU 3.6
+ */
+ public interface Encoder {
+ /**
+ * This function is called when the Unicode characters in the source cannot be handled,
+ * and this function is meant to handle or fix the error if possible.
+ * @return Result of decoding action. This returned object is set to an error
+ * if this function could not handle the conversion.
+ * @stable ICU 3.6
+ */
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr);
+ }
+ /**
+ * Skip callback
+ * @stable ICU 3.6
+ */
+ public static final Encoder FROM_U_CALLBACK_SKIP = new Encoder() {
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr){
+ if(context==null){
+ return CoderResult.UNDERFLOW;
+ }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+ if(!cr.isUnmappable()){
+ return cr;
+ }else{
+ return CoderResult.UNDERFLOW;
+ }
+ }
+ return cr;
+ }
+ };
+ /**
+ * Skip callback
+ * @stable ICU 3.6
+ */
+ public static final Decoder TO_U_CALLBACK_SKIP = new Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr){
+ if(context==null){
+ return CoderResult.UNDERFLOW;
+ }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+ if(!cr.isUnmappable()){
+ return cr;
+ }else{
+ return CoderResult.UNDERFLOW;
+ }
+ }
+ return cr;
+ }
+ };
+ /**
+ * Write substitute callback
+ * @stable ICU 3.6
+ */
+ public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr){
+ if(context==null){
+ return encoder.cbFromUWriteSub(encoder, source, target, offsets);
+ }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+ if(!cr.isUnmappable()){
+ return cr;
+ }else{
+ return encoder.cbFromUWriteSub(encoder, source, target, offsets);
+ }
+ }
+ return cr;
+ }
+ };
+ private static final char[] kSubstituteChar1 = new char[]{0x1A};
+ private static final char[] kSubstituteChar = new char[] {0xFFFD};
+ /**
+ * Write substitute callback
+ * @stable ICU 3.6
+ */
+ public static final Decoder TO_U_CALLBACK_SUBSTITUTE = new Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr){
+
+ CharsetICU cs = (CharsetICU) decoder.charset();
+ /* could optimize this case, just one uchar */
+ if(decoder.invalidCharLength == 1 && cs.subChar1 != 0) {
+ return CharsetDecoderICU.toUWriteUChars(decoder, kSubstituteChar1, 0, 1, target, offsets, source.position());
+ } else {
+ return CharsetDecoderICU.toUWriteUChars(decoder, kSubstituteChar, 0, 1, target, offsets, source.position());
+ }
+ }
+ };
+ /**
+ * Stop callback
+ * @stable ICU 3.6
+ */
+ public static final Encoder FROM_U_CALLBACK_STOP = new Encoder() {
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr){
+ return cr;
+ }
+ };
+ /**
+ * Stop callback
+ * @stable ICU 3.6
+ */
+ public static final Decoder TO_U_CALLBACK_STOP = new Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr){
+ return cr;
+ }
+ };
+ private static final int VALUE_STRING_LENGTH = 32;
+ private static final char UNICODE_PERCENT_SIGN_CODEPOINT = 0x0025;
+ private static final char UNICODE_U_CODEPOINT = 0x0055;
+ private static final char UNICODE_X_CODEPOINT = 0x0058;
+ private static final char UNICODE_RS_CODEPOINT = 0x005C;
+ private static final char UNICODE_U_LOW_CODEPOINT = 0x0075;
+ private static final char UNICODE_X_LOW_CODEPOINT = 0x0078;
+ private static final char UNICODE_AMP_CODEPOINT = 0x0026;
+ private static final char UNICODE_HASH_CODEPOINT = 0x0023;
+ private static final char UNICODE_SEMICOLON_CODEPOINT = 0x003B;
+ private static final char UNICODE_PLUS_CODEPOINT = 0x002B;
+ private static final char UNICODE_LEFT_CURLY_CODEPOINT = 0x007B;
+ private static final char UNICODE_RIGHT_CURLY_CODEPOINT = 0x007D;
+ private static final char UNICODE_SPACE_CODEPOINT = 0x0020;
+ /**
+ * Write escape callback
+ * @stable ICU 4.0
+ */
+ public static final Encoder FROM_U_CALLBACK_ESCAPE = new Encoder() {
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr){
+ char[] valueString = new char[VALUE_STRING_LENGTH];
+ int valueStringLength = 0;
+ int i = 0;
+
+ cr = CoderResult.UNDERFLOW;
+
+ if (context == null || !(context instanceof String)) {
+ while (i < length) {
+ valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ }
+ } else {
+ if (((String)context).equals(ESCAPE_JAVA)) {
+ while (i < length) {
+ valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
+ valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ }
+ } else if (((String)context).equals(ESCAPE_C)) {
+ valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
+
+ if (length == 2) {
+ valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
+ valueStringLength = itou(valueString, valueStringLength, cp, 16, 8);
+ } else {
+ valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ }
+ } else if (((String)context).equals(ESCAPE_XML_DEC)) {
+ valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
+ valueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
+ if (length == 2) {
+ valueStringLength += itou(valueString, valueStringLength, cp, 10, 0);
+ } else {
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 10, 0);
+ }
+ valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ } else if (((String)context).equals(ESCAPE_XML_HEX)) {
+ valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
+ valueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
+ valueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
+ if (length == 2) {
+ valueStringLength += itou(valueString, valueStringLength, cp, 16, 0);
+ } else {
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 0);
+ }
+ valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ } else if (((String)context).equals(ESCAPE_UNICODE)) {
+ valueString[valueStringLength++] = UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
+ valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
+ valueString[valueStringLength++] = UNICODE_PLUS_CODEPOINT; /* adding + */
+ if (length == 2) {
+ valueStringLength += itou(valueString, valueStringLength,cp, 16, 4);
+ } else {
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ }
+ valueString[valueStringLength++] = UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
+ } else if (((String)context).equals(ESCAPE_CSS2)) {
+ valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
+ valueStringLength += itou(valueString, valueStringLength, cp, 16, 0);
+ /* Always add space character, because the next character might be whitespace,
+ which would erroneously be considered the termination of the escape sequence. */
+ valueString[valueStringLength++] = UNICODE_SPACE_CODEPOINT;
+ } else {
+ while (i < length) {
+ valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ }
+ }
+ }
+
+ cr = encoder.cbFromUWriteUChars(encoder, CharBuffer.wrap(valueString, 0, valueStringLength), target, offsets);
+ return cr;
+ }
+ };
+ /**
+ * Write escape callback
+ * @stable ICU 4.0
+ */
+ public static final Decoder TO_U_CALLBACK_ESCAPE = new Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr){
+ char[] uniValueString = new char[VALUE_STRING_LENGTH];
+ int valueStringLength = 0;
+ int i = 0;
+
+ if (context == null || !(context instanceof String)) {
+ while (i < length) {
+ uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding U */
+ valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
+ }
+ } else {
+ if (((String)context).equals(ESCAPE_XML_DEC)) {
+ while (i < length) {
+ uniValueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
+ uniValueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
+ valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 10, 0);
+ uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ }
+ } else if (((String)context).equals(ESCAPE_XML_HEX)) {
+ while (i < length) {
+ uniValueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
+ uniValueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
+ uniValueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
+ valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 0);
+ uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ }
+ } else if (((String)context).equals(ESCAPE_C)) {
+ while (i < length) {
+ uniValueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
+ uniValueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
+ valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
+ }
+ } else {
+ while (i < length) {
+ uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding X */
+ itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
+ valueStringLength += 2;
+ }
+ }
+ }
+
+ cr = CharsetDecoderICU.toUWriteUChars(decoder, uniValueString, 0, valueStringLength, target, offsets, 0);
+
+ return cr;
+ }
+ };
+ /***
+ * Java port of uprv_itou() in ICU4C used by TO_U_CALLBACK_ESCAPE and FROM_U_CALLBACK_ESCAPE.
+ * Fills in a char string with the radix-based representation of a number padded with zeroes
+ * to minwidth.
+ */
+ private static final int itou(char[] buffer, int sourceIndex, int i, int radix, int minwidth) {
+ int length = 0;
+ int digit;
+ int j;
+ char temp;
+
+ do {
+ digit = i % radix;
+ buffer[sourceIndex + length++] = (char)(digit <= 9 ? (0x0030+digit) : (0x0030+digit+7));
+ i = i/radix;
+ } while (i != 0 && (sourceIndex + length) < buffer.length);
+
+ while (length < minwidth) {
+ buffer[sourceIndex + length++] = (char)0x0030; /* zero padding */
+ }
+ /* reverses the string */
+ for (j = 0; j < (length / 2); j++) {
+ temp = buffer[(sourceIndex + length - 1) - j];
+ buffer[(sourceIndex + length-1) -j] = buffer[sourceIndex + j];
+ buffer[sourceIndex + j] = temp;
+ }
+
+ return length;
+ }
+
+ /*
+ * No need to create an instance
+ */
+ private CharsetCallback() {
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetCompoundText.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetCompoundText.java
new file mode 100644
index 00000000000..8e62234e12a
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetCompoundText.java
@@ -0,0 +1,622 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.charset.CharsetMBCS.CharsetDecoderMBCS;
+import com.ibm.icu.charset.CharsetMBCS.CharsetEncoderMBCS;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+class CharsetCompoundText extends CharsetICU {
+ private static final byte[] fromUSubstitution = new byte[] { (byte) 0x3F };
+ private CharsetMBCS myConverterArray[];
+ private byte state;
+
+ private final static byte INVALID = -2;
+ private final static byte DO_SEARCH = -1;
+ private final static byte COMPOUND_TEXT_SINGLE_0 = 0;
+ private final static byte COMPOUND_TEXT_SINGLE_1 = 1;
+ private final static byte COMPOUND_TEXT_SINGLE_2 = 2;
+ private final static byte COMPOUND_TEXT_SINGLE_3 = 3;
+
+ /*private final static byte COMPOUND_TEXT_DOUBLE_1 = 4;
+ private final static byte COMPOUND_TEXT_DOUBLE_2 = 5;
+ private final static byte COMPOUND_TEXT_DOUBLE_3 = 6;
+ private final static byte COMPOUND_TEXT_DOUBLE_4 = 7;
+ private final static byte COMPOUND_TEXT_DOUBLE_5 = 8;
+ private final static byte COMPOUND_TEXT_DOUBLE_6 = 9;
+ private final static byte COMPOUND_TEXT_DOUBLE_7 = 10;
+
+ private final static byte COMPOUND_TEXT_TRIPLE_DOUBLE = 11;*/
+
+ private final static byte IBM_915 = 12;
+ private final static byte IBM_916 = 13;
+ private final static byte IBM_914 = 14;
+ private final static byte IBM_874 = 15;
+ private final static byte IBM_912 = 16;
+ private final static byte IBM_913 = 17;
+ private final static byte ISO_8859_14 = 18;
+ private final static byte IBM_923 = 19;
+
+ private final static byte NUM_OF_CONVERTERS = 20;
+
+ private final static byte SEARCH_LENGTH = 12;
+
+ private final static byte[][] escSeqCompoundText = {
+ /* Single */
+ { 0x1B, 0x2D, 0x41 },
+ { 0x1B, 0x2D, 0x4D },
+ { 0x1B, 0x2D, 0x46 },
+ { 0x1B, 0x2D, 0x47 },
+
+ /* Double */
+ { 0x1B, 0x24, 0x29, 0x41 },
+ { 0x1B, 0x24, 0x29, 0x42 },
+ { 0x1B, 0x24, 0x29, 0x43 },
+ { 0x1B, 0x24, 0x29, 0x44 },
+ { 0x1B, 0x24, 0x29, 0x47 },
+ { 0x1B, 0x24, 0x29, 0x48 },
+ { 0x1B, 0x24, 0x29, 0x49 },
+
+ /* Triple/Double */
+ { 0x1B, 0x25, 0x47 },
+
+ /*IBM-915*/
+ { 0x1B, 0x2D, 0x4C },
+ /*IBM-916*/
+ { 0x1B, 0x2D, 0x48 },
+ /*IBM-914*/
+ { 0x1B, 0x2D, 0x44 },
+ /*IBM-874*/
+ { 0x1B, 0x2D, 0x54 },
+ /*IBM-912*/
+ { 0x1B, 0x2D, 0x42 },
+ /* IBM-913 */
+ { 0x1B, 0x2D, 0x43 },
+ /* ISO-8859_14 */
+ { 0x1B, 0x2D, 0x5F },
+ /* IBM-923 */
+ { 0x1B, 0x2D, 0x62 },
+ };
+
+ private final static byte ESC_START = 0x1B;
+
+ private static boolean isASCIIRange(int codepoint) {
+ if ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) ||
+ (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isIBM915(int codepoint) {
+ if ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isIBM916(int codepoint) {
+ if ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isCompoundS3(int codepoint) {
+ if ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) ||
+ (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) ||
+ (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isCompoundS2(int codepoint) {
+ if ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isIBM914(int codepoint) {
+ if ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) ||
+ (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) ||
+ (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) ||
+ (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) ||
+ (codepoint == 0x0172) || (codepoint == 0x0173)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isIBM874(int codepoint) {
+ if ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isIBM912(int codepoint) {
+ if ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) ||
+ (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) ||
+ (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) ||
+ (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) ||
+ (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) ||
+ (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isIBM913(int codepoint) {
+ if ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) ||
+ (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) ||
+ (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) ||
+ (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isCompoundS1(int codepoint) {
+ if ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) ||
+ (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isISO8859_14(int codepoint) {
+ if ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) ||
+ (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) ||
+ (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) ||
+ (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) ||
+ (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) ||
+ (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isIBM923(int codepoint) {
+ if ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static int findNextEsc(ByteBuffer source) {
+ int sourceLimit = source.limit();
+ for (int i = source.position(); i < sourceLimit; i++) {
+ if (source.get(i) == 0x1B) {
+ return i;
+ }
+ }
+ return source.limit();
+ }
+
+ private static byte getState(int codepoint) {
+ byte state = -1;
+
+ if (isASCIIRange(codepoint)) {
+ state = COMPOUND_TEXT_SINGLE_0;
+ } else if (isIBM912(codepoint)) {
+ state = IBM_912;
+ }else if (isIBM913(codepoint)) {
+ state = IBM_913;
+ } else if (isISO8859_14(codepoint)) {
+ state = ISO_8859_14;
+ } else if (isIBM923(codepoint)) {
+ state = IBM_923;
+ } else if (isIBM874(codepoint)) {
+ state = IBM_874;
+ } else if (isIBM914(codepoint)) {
+ state = IBM_914;
+ } else if (isCompoundS2(codepoint)) {
+ state = COMPOUND_TEXT_SINGLE_2;
+ } else if (isCompoundS3(codepoint)) {
+ state = COMPOUND_TEXT_SINGLE_3;
+ } else if (isIBM916(codepoint)) {
+ state = IBM_916;
+ } else if (isIBM915(codepoint)) {
+ state = IBM_915;
+ } else if (isCompoundS1(codepoint)) {
+ state = COMPOUND_TEXT_SINGLE_1;
+ }
+
+ return state;
+ }
+
+ private static byte findStateFromEscSeq(ByteBuffer source, byte[] toUBytes, int toUBytesLength) {
+ byte state = INVALID;
+ int sourceIndex = source.position();
+ boolean matchFound = false;
+ byte i, n;
+ int offset = toUBytesLength;
+ int sourceLimit = source.limit();
+
+ for (i = 0; i < escSeqCompoundText.length; i++) {
+ matchFound = true;
+ for (n = 0; n < escSeqCompoundText[i].length; n++) {
+ if (n < toUBytesLength) {
+ if (toUBytes[n] != escSeqCompoundText[i][n]) {
+ matchFound = false;
+ break;
+ }
+ } else if ((sourceIndex + (n - offset)) >= sourceLimit) {
+ return DO_SEARCH;
+ } else if (source.get(sourceIndex + (n - offset)) != escSeqCompoundText[i][n]) {
+ matchFound = false;
+ break;
+ }
+ }
+ if (matchFound) {
+ break;
+ }
+ }
+
+ if (matchFound) {
+ state = i;
+ source.position(sourceIndex + (escSeqCompoundText[i].length - offset));
+ }
+
+ return state;
+ }
+
+ public CharsetCompoundText(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+
+ LoadConverters();
+
+ maxBytesPerChar = 6;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ }
+
+ private void LoadConverters() {
+ myConverterArray = new CharsetMBCS[NUM_OF_CONVERTERS];
+
+ myConverterArray[COMPOUND_TEXT_SINGLE_0] = null;
+
+ for (int i = 1; i < SEARCH_LENGTH; i++) {
+ String name = "icu-internal-compound-";
+ if (i <= 3) {
+ name = name + "s" + i;
+ } else if (i <= 10) {
+ name = name + "d" + (i - 3);
+ } else {
+ name = name + "t";
+ }
+
+ myConverterArray[i] = (CharsetMBCS)CharsetICU.forNameICU(name);
+ }
+
+ myConverterArray[IBM_915] = (CharsetMBCS)CharsetICU.forNameICU("ibm-915_P100-1995");
+ myConverterArray[IBM_916] = (CharsetMBCS)CharsetICU.forNameICU("ibm-916_P100-1995");
+ myConverterArray[IBM_914] = (CharsetMBCS)CharsetICU.forNameICU("ibm-914_P100-1995");
+ myConverterArray[IBM_874] = (CharsetMBCS)CharsetICU.forNameICU("ibm-874_P100-1995");
+ myConverterArray[IBM_912] = (CharsetMBCS)CharsetICU.forNameICU("ibm-912_P100-1995");
+ myConverterArray[IBM_913] = (CharsetMBCS)CharsetICU.forNameICU("ibm-913_P100-2000");
+ myConverterArray[ISO_8859_14] = (CharsetMBCS)CharsetICU.forNameICU("iso-8859_14-1998");
+ myConverterArray[IBM_923] = (CharsetMBCS)CharsetICU.forNameICU("ibm-923_P100-1998");
+ }
+
+ class CharsetEncoderCompoundText extends CharsetEncoderICU {
+ CharsetEncoderMBCS gbEncoder[];
+
+ public CharsetEncoderCompoundText(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+
+ gbEncoder = new CharsetEncoderMBCS[NUM_OF_CONVERTERS];
+
+ for (int i = 0; i < NUM_OF_CONVERTERS; i++) {
+ if (i == 0) {
+ gbEncoder[i] = null;
+ } else {
+ gbEncoder[i] = (CharsetEncoderMBCS)myConverterArray[i].newEncoder();
+ }
+ }
+ }
+
+ protected void implReset() {
+ super.implReset();
+ for (int i = 0; i < NUM_OF_CONVERTERS; i++) {
+ if (gbEncoder[i] != null) {
+ gbEncoder[i].implReset();
+ }
+ }
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ int sourceChar;
+ char []sourceCharArray = { 0x0000 };
+ ByteBuffer tmpTargetBuffer = ByteBuffer.allocate(3);
+ byte[] targetBytes = new byte[10];
+ int targetLength = 0;
+ byte currentState = state;
+ byte tmpState = 0;
+ int i = 0;
+ boolean gotoGetTrail = false;
+
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ else if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ /* check if the last codepoint of previous buffer was a lead surrogate */
+ if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
+ // goto getTrail label
+ gotoGetTrail = true;
+ }
+
+ while (source.hasRemaining()) {
+ if (target.hasRemaining()) {
+ if (!gotoGetTrail) {
+ sourceChar = source.get();
+ }
+
+ targetLength = 0;
+ tmpTargetBuffer.position(0);
+ tmpTargetBuffer.limit(3);
+
+ /* check if the char is a First surrogate */
+ if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) {
+ if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) {
+// getTrail label
+ /* reset gotoGetTrail flag*/
+ gotoGetTrail = false;
+
+ /* look ahead to find the trail surrogate */
+ if (source.hasRemaining()) {
+ /* test the following code unit */
+ char trail = source.get();
+ source.position(source.position()-1);
+ if (UTF16.isTrailSurrogate(trail)) {
+ source.get();
+ sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
+ fromUChar32 = 0x00;
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ err = CoderResult.malformedForLength(1);
+ fromUChar32 = sourceChar;
+ break;
+ }
+ } else {
+ /* no more input */
+ fromUChar32 = sourceChar;
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ err = CoderResult.malformedForLength(1);
+ fromUChar32 = sourceChar;
+ break;
+ }
+ }
+
+ tmpState = getState(sourceChar);
+
+ sourceCharArray[0] = (char)sourceChar;
+
+ if (tmpState < 0) {
+ /* Test all available converters */
+ for (i = 1; i < SEARCH_LENGTH; i++) {
+ err = gbEncoder[i].cnvMBCSFromUnicodeWithOffsets(CharBuffer.wrap(sourceCharArray), tmpTargetBuffer, offsets, true);
+ if (!err.isError()) {
+ tmpState = (byte)i;
+ tmpTargetBuffer.limit(tmpTargetBuffer.position());
+ implReset();
+ break;
+ }
+ }
+ } else if (tmpState == COMPOUND_TEXT_SINGLE_0) {
+ tmpTargetBuffer.put(0, (byte)sourceChar);
+ tmpTargetBuffer.limit(1);
+ } else {
+ err = gbEncoder[tmpState].cnvMBCSFromUnicodeWithOffsets(CharBuffer.wrap(sourceCharArray), tmpTargetBuffer, offsets, true);
+ if (!err.isError()) {
+ tmpTargetBuffer.limit(tmpTargetBuffer.position());
+ }
+ }
+ if (err.isError()) {
+ break;
+ }
+
+ if (currentState != tmpState) {
+ currentState = tmpState;
+
+ /* Write escape sequence if necessary */
+ for (i = 0; i < escSeqCompoundText[currentState].length; i++) {
+ targetBytes[i] = escSeqCompoundText[currentState][i];
+ }
+ targetLength = i;
+ }
+
+ for (i = 0; i < tmpTargetBuffer.limit(); i++) {
+ targetBytes[i+targetLength] = tmpTargetBuffer.get(i);
+ }
+ targetLength += i;
+
+ for (i = 0; i < targetLength; i++) {
+ if (target.hasRemaining()) {
+ target.put(targetBytes[i]);
+ } else {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ } else {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+
+ if (err.isOverflow()) {
+ int m = 0;
+ for (int n = i; n < targetLength; n++) {
+ this.errorBuffer[m++] = targetBytes[n];
+ }
+ this.errorBufferLength = m;
+ }
+ state = currentState;
+
+ return err;
+ }
+ }
+
+ class CharsetDecoderCompoundText extends CharsetDecoderICU {
+ CharsetDecoderMBCS gbDecoder[];
+
+ public CharsetDecoderCompoundText(CharsetICU cs) {
+ super(cs);
+ gbDecoder = new CharsetDecoderMBCS[NUM_OF_CONVERTERS];
+
+ for (int i = 0; i < NUM_OF_CONVERTERS; i++) {
+ if (i == 0) {
+ gbDecoder[i] = null;
+ } else {
+ gbDecoder[i] = (CharsetDecoderMBCS)myConverterArray[i].newDecoder();
+ }
+ }
+ }
+
+ protected void implReset() {
+ super.implReset();
+ for (int i = 0; i < NUM_OF_CONVERTERS; i++) {
+ if (gbDecoder[i] != null) {
+ gbDecoder[i].implReset();
+ }
+ }
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ byte[] sourceChar = { 0x00 };
+ byte currentState = state;
+ byte tmpState = currentState;
+ CharsetDecoderMBCS decoder;
+ int sourceLimit = source.limit();;
+
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ else if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ while (source.hasRemaining()) {
+ if (target.hasRemaining()) {
+ if (this.toULength > 0) {
+ sourceChar[0] = this.toUBytesArray[0];
+ } else {
+ sourceChar[0] = source.get(source.position());
+ }
+
+ if (sourceChar[0] == ESC_START) {
+ tmpState = findStateFromEscSeq(source, this.toUBytesArray, this.toULength);
+ if (tmpState == DO_SEARCH) {
+ while (source.hasRemaining()) {
+ this.toUBytesArray[this.toULength++] = source.get();
+ }
+ break;
+ }
+ if (tmpState < 0) {
+ err = CoderResult.malformedForLength(1);
+ break;
+ }
+
+ this.toULength = 0;
+ }
+
+ if (tmpState != currentState) {
+ currentState = tmpState;
+ }
+
+ if (currentState == COMPOUND_TEXT_SINGLE_0) {
+ while (source.hasRemaining()) {
+ if (!target.hasRemaining()) {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ if (source.get(source.position()) == ESC_START) {
+ break;
+ }
+ if (target.hasRemaining()) {
+ target.put((char)(UConverterConstants.UNSIGNED_BYTE_MASK&source.get()));
+ }
+ }
+ } else if (source.hasRemaining()) {
+ source.limit(findNextEsc(source));
+
+ decoder = gbDecoder[currentState];
+
+ decoder.toUBytesArray = this.toUBytesArray;
+ decoder.toULength = this.toULength;
+
+ err = decoder.decodeLoop(source, target, offsets, true);
+
+ this.toULength = decoder.toULength;
+ decoder.toULength = 0;
+
+ if (err.isError()) {
+ if (err.isOverflow()) {
+ this.charErrorBufferArray = decoder.charErrorBufferArray;
+ this.charErrorBufferBegin = decoder.charErrorBufferBegin;
+ this.charErrorBufferLength = decoder.charErrorBufferLength;
+
+ decoder.charErrorBufferBegin = 0;
+ decoder.charErrorBufferLength = 0;
+ }
+ }
+
+ source.limit(sourceLimit);
+ }
+
+ if (err.isError()) {
+ break;
+ }
+ } else {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ state = currentState;
+ return err;
+ }
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderCompoundText(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderCompoundText(this);
+ }
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ for (int i = 1; i < NUM_OF_CONVERTERS; i++) {
+ myConverterArray[i].MBCSGetFilteredUnicodeSetForUnicode(myConverterArray[i].sharedData, setFillIn, which, CharsetMBCS.UCNV_SET_FILTER_NONE);
+ }
+ setFillIn.add(0x0000);
+ setFillIn.add(0x0009);
+ setFillIn.add(0x000A);
+ setFillIn.add(0x0020, 0x007F);
+ setFillIn.add(0x00A0, 0x00FF);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetDecoderICU.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetDecoderICU.java
new file mode 100644
index 00000000000..c8fc89e4387
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetDecoderICU.java
@@ -0,0 +1,735 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006-2011, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+import com.ibm.icu.impl.Assert;
+
+/**
+ * An abstract class that provides framework methods of decoding operations for concrete
+ * subclasses.
+ * In the future this class will contain API that will implement converter sematics of ICU4C.
+ * @stable ICU 3.6
+ */
+public abstract class CharsetDecoderICU extends CharsetDecoder{
+
+ int toUnicodeStatus;
+ byte[] toUBytesArray = new byte[128];
+ int toUBytesBegin = 0;
+ int toULength;
+ char[] charErrorBufferArray = new char[128];
+ int charErrorBufferLength;
+ int charErrorBufferBegin;
+ char[] invalidCharBuffer = new char[128];
+ int invalidCharLength;
+
+ /**
+ * Maximum number of indexed bytes
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ protected static final int EXT_MAX_BYTES = 0x1f;
+
+ /* store previous UChars/chars to continue partial matches */
+ byte[] preToUArray = new byte[EXT_MAX_BYTES];
+ int preToUBegin;
+ int preToULength; /* negative: replay */
+ int preToUFirstLength; /* length of first character */
+ int mode;
+
+ Object toUContext = null;
+ private CharsetCallback.Decoder onUnmappableCharacter = CharsetCallback.TO_U_CALLBACK_STOP;
+ private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP;
+ CharsetCallback.Decoder toCharErrorBehaviour = new CharsetCallback.Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source,
+ CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr) {
+ if (cr.isUnmappable()) {
+ return onUnmappableCharacter.call(decoder, context, source, target, offsets, buffer,
+ length, cr);
+ } else /* if (cr.isMalformed()) */ {
+ return onMalformedInput.call(decoder, context, source, target, offsets, buffer,
+ length, cr);
+ }
+ // return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context, source, target, offsets, buffer, length, cr);
+ }
+ };
+
+ // exist to keep implOnMalformedInput and implOnUnmappableInput from being too recursive
+ private boolean malformedInputCalled = false;
+ private boolean unmappableCharacterCalled = false;
+
+ /*
+ * Construct a CharsetDecorderICU based on the information provided from a CharsetICU object.
+ *
+ * @param cs The CharsetICU object containing information about how to charset to decode.
+ */
+ CharsetDecoderICU(CharsetICU cs) {
+ super(cs, (1/cs.maxCharsPerByte), cs.maxCharsPerByte);
+ }
+
+ /*
+ * Is this Decoder allowed to use fallbacks? A fallback mapping is a mapping
+ * that will convert a byte sequence to a Unicode codepoint sequence, but
+ * the encoded Unicode codepoint sequence will round trip convert to a different
+ * byte sequence. In ICU, this is can be called a reverse fallback.
+ * @return A boolean
+ */
+ final boolean isFallbackUsed() {
+ return true;
+ }
+
+ /**
+ * Fallback is currently always used by icu4j decoders.
+ */
+ static final boolean isToUUseFallback() {
+ return isToUUseFallback(true);
+ }
+
+ /**
+ * Fallback is currently always used by icu4j decoders.
+ */
+ static final boolean isToUUseFallback(boolean iUseFallback) {
+ return true;
+ }
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ *
+ * @param newAction action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected final void implOnMalformedInput(CodingErrorAction newAction) {
+ // don't run infinitely
+ if (malformedInputCalled)
+ return;
+
+ // if we get a replace, do not let the nio replace
+ if (newAction == CodingErrorAction.REPLACE) {
+ malformedInputCalled = true;
+ super.onMalformedInput(CodingErrorAction.IGNORE);
+ malformedInputCalled = false;
+ }
+
+ onMalformedInput = getCallback(newAction);
+ }
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ *
+ * @param newAction action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected final void implOnUnmappableCharacter(CodingErrorAction newAction) {
+ // dont run infinitely
+ if (unmappableCharacterCalled)
+ return;
+
+ // if we get a replace, do not let the nio replace
+ if (newAction == CodingErrorAction.REPLACE) {
+ unmappableCharacterCalled = true;
+ super.onUnmappableCharacter(CodingErrorAction.IGNORE);
+ unmappableCharacterCalled = false;
+ }
+
+ onUnmappableCharacter = getCallback(newAction);
+ }
+
+ /**
+ * Sets the callback encoder method and context to be used if an illegal sequence is encounterd.
+ * You would normally call this twice to set both the malform and unmappable error. In this case,
+ * newContext should remain the same since using a different newContext each time will negate the last
+ * one used.
+ * @param err CoderResult
+ * @param newCallback CharsetCallback.Encoder
+ * @param newContext Object
+ * @stable ICU 4.0
+ */
+ public final void setToUCallback(CoderResult err, CharsetCallback.Decoder newCallback, Object newContext) {
+ if (err.isMalformed()) {
+ onMalformedInput = newCallback;
+ } else if (err.isUnmappable()) {
+ onUnmappableCharacter = newCallback;
+ } else {
+ /* Error: Only malformed and unmappable are handled. */
+ }
+
+ if (toUContext == null || !toUContext.equals(newContext)) {
+ toUContext = newContext;
+ }
+ }
+
+ private static CharsetCallback.Decoder getCallback(CodingErrorAction action){
+ if(action==CodingErrorAction.REPLACE){
+ return CharsetCallback.TO_U_CALLBACK_SUBSTITUTE;
+ }else if(action==CodingErrorAction.IGNORE){
+ return CharsetCallback.TO_U_CALLBACK_SKIP;
+ }else /* if(action==CodingErrorAction.REPORT) */ {
+ return CharsetCallback.TO_U_CALLBACK_STOP;
+ }
+ }
+ private final ByteBuffer EMPTY = ByteBuffer.allocate(0);
+ /**
+ * Flushes any characters saved in the converter's internal buffer and
+ * resets the converter.
+ * @param out action to be taken
+ * @return result of flushing action and completes the decoding all input.
+ * Returns CoderResult.UNDERFLOW if the action succeeds.
+ * @stable ICU 3.6
+ */
+ protected final CoderResult implFlush(CharBuffer out) {
+ return decode(EMPTY, out, null, true);
+ }
+
+ /**
+ * Resets the to Unicode mode of converter
+ * @stable ICU 3.6
+ */
+ protected void implReset() {
+ toUnicodeStatus = 0 ;
+ toULength = 0;
+ charErrorBufferLength = 0;
+ charErrorBufferBegin = 0;
+
+ /* store previous UChars/chars to continue partial matches */
+ preToUBegin = 0;
+ preToULength = 0; /* negative: replay */
+ preToUFirstLength = 0;
+
+ mode = 0;
+ }
+
+ /**
+ * Decodes one or more bytes. The default behaviour of the converter
+ * is stop and report if an error in input stream is encountered.
+ * To set different behaviour use @see CharsetDecoder.onMalformedInput()
+ * This method allows a buffer by buffer conversion of a data stream.
+ * The state of the conversion is saved between calls to convert.
+ * Among other things, this means multibyte input sequences can be
+ * split between calls. If a call to convert results in an Error, the
+ * conversion may be continued by calling convert again with suitably
+ * modified parameters.All conversions should be finished with a call to
+ * the flush method.
+ * @param in buffer to decode
+ * @param out buffer to populate with decoded result
+ * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ * @stable ICU 3.6
+ */
+ protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){
+ if(in.remaining() < toUCountPending()){
+ return CoderResult.UNDERFLOW;
+ }
+// if (!in.hasRemaining()) {
+// toULength = 0;
+// return CoderResult.UNDERFLOW;
+// }
+
+ in.position(in.position() + toUCountPending());
+
+ /* do the conversion */
+ CoderResult ret = decode(in, out, null, false);
+
+ // ok was there input held in the previous invocation of decodeLoop
+ // that resulted in output in this invocation?
+ in.position(in.position() - toUCountPending());
+
+ return ret;
+ }
+
+ /*
+ * Implements the ICU semantic for decode operation
+ * @param in The input byte buffer
+ * @param out The output character buffer
+ * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ */
+ abstract CoderResult decodeLoop(ByteBuffer in, CharBuffer out, IntBuffer offsets, boolean flush);
+
+ /*
+ * Implements the ICU semantic for decode operation
+ * @param source The input byte buffer
+ * @param target The output character buffer
+ * @param offsets
+ * @param flush true if, and only if, the invoker can provide no
+ * additional input bytes beyond those in the given buffer.
+ * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ */
+ final CoderResult decode(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+
+ /* check parameters */
+ if (target == null || source == null) {
+ throw new IllegalArgumentException();
+ }
+
+ /*
+ * Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be sourceLimit=t+0x7fffffff; for example.
+ */
+ /*agljport:fix
+ if(
+ ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
+ ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t)
+ ) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ */
+
+ /* flush the target overflow buffer */
+ if (charErrorBufferLength > 0) {
+ int i = 0;
+ do {
+ if (!target.hasRemaining()) {
+ /* the overflow buffer contains too much, keep the rest */
+ int j = 0;
+
+ do {
+ charErrorBufferArray[j++] = charErrorBufferArray[i++];
+ } while (i < charErrorBufferLength);
+
+ charErrorBufferLength = (byte) j;
+ return CoderResult.OVERFLOW;
+ }
+
+ /* copy the overflow contents to the target */
+ target.put(charErrorBufferArray[i++]);
+ if (offsets != null) {
+ offsets.put(-1); /* no source index available for old output */
+ }
+ } while (i < charErrorBufferLength);
+
+ /* the overflow buffer is completely copied to the target */
+ charErrorBufferLength = 0;
+ }
+
+ if (!flush && !source.hasRemaining() && toULength == 0 && preToULength >= 0) {
+ /* the overflow buffer is emptied and there is no new input: we are done */
+ return CoderResult.UNDERFLOW;
+ }
+
+ /*
+ * Do not simply return with a buffer overflow error if
+ * !flush && t==targetLimit
+ * because it is possible that the source will not generate any output.
+ * For example, the skip callback may be called;
+ * it does not output anything.
+ */
+
+ return toUnicodeWithCallback(source, target, offsets, flush);
+ }
+
+ /* Currently, we are not using offsets in ICU4J. */
+ /* private void updateOffsets(IntBuffer offsets,int length, int sourceIndex, int errorInputLength) {
+ int limit;
+ int delta, offset;
+
+ if(sourceIndex>=0) {
+ /*
+ * adjust each offset by adding the previous sourceIndex
+ * minus the length of the input sequence that caused an
+ * error, if any
+ */
+ /* delta=sourceIndex-errorInputLength;
+ } else {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ */
+ /* delta=-1;
+ }
+ limit=offsets.position()+length;
+ if(delta==0) {
+ /* most common case, nothing to do */
+ /* } else if(delta>0) {
+ /* add the delta to each offset (but not if the offset is <0) */
+ /* while(offsets.position()=0) {
+ offsets.put(offset+delta);
+ }
+ //FIXME: ++offsets;
+ }
+ } else /* delta<0 */ /* {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ * or the error input sequence started in a previous buffer
+ */
+ /* while(offsets.position()=0) {
+ /* normal mode */
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource=source;
+ realFlush=flush;
+ realSourceIndex=sourceIndex;
+ //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);
+ replayArray.put(preToUArray,0, -preToULength);
+ source=replayArray;
+ source.position(0);
+ source.limit(replayArrayIndex-preToULength);
+ flush=false;
+ sourceIndex=-1;
+ preToULength=0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for(;;) {
+
+ /* convert */
+ cr = decodeLoop(source, target, offsets, flush);
+
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv->preToULength==0 because a replay (<0) will cause
+ * s0) {
+ updateOffsets(offsets, length, sourceIndex, errorInputLength);
+
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then pArgs->offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ //TODO: pArgs->offsets=offsets+=length;
+ /* }
+
+ if(sourceIndex>=0) {
+ sourceIndex+=(source.position()-s);
+ }
+
+ } */
+
+ if(preToULength<0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if(realSource==null)
+ {
+ realSource=source;
+ realFlush=flush;
+ realSourceIndex=sourceIndex;
+
+ //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);
+ replayArray.put(preToUArray,0, -preToULength);
+ // reset position
+ replayArray.position(0);
+
+ source=replayArray;
+ source.limit(replayArrayIndex-preToULength);
+ flush=false;
+ if((sourceIndex+=preToULength)<0) {
+ sourceIndex=-1;
+ }
+
+ preToULength=0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ //agljport:todo U_ASSERT(realSource==NULL);
+ Assert.assrt(realSource==null);
+ }
+ }
+
+ /* update pointers */
+ s=source.position();
+ //t=target.position();
+
+ if(cr.isUnderflow()) {
+ if(s0) {
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ cr = CoderResult.malformedForLength(toULength);
+ calledCallback=false; /* new error condition */
+ } else {
+ /* input consumed */
+ if(flush) {
+ /*
+ * return to the conversion loop once more if the flush
+ * flag is set and the conversion function has not
+ * successfully processed the end of the input yet
+ *
+ * (continue converting by breaking out of only the inner loop)
+ */
+ if(!converterSawEndOfInput) {
+ break;
+ }
+
+ /* reset the converter without calling the callback function */
+ implReset();
+ }
+
+ /* done successfully */
+ return cr;
+ }
+ }
+
+ /* U_FAILURE(*err) */
+ {
+
+ if( calledCallback || cr.isOverflow() ||
+ (cr.isMalformed() && cr.isUnmappable())
+ ) {
+ /*
+ * the callback did not or cannot resolve the error:
+ * set output pointers and return
+ *
+ * the check for buffer overflow is redundant but it is
+ * a high-runner case and hopefully documents the intent
+ * well
+ *
+ * if we were replaying, then the replay buffer must be
+ * copied back into the UConverter
+ * and the real arguments must be restored
+ */
+ if(realSource!=null) {
+ int length;
+ Assert.assrt(preToULength==0);
+ length = source.limit() - source.position();
+ if(length>0) {
+ //UConverterUtility.uprv_memcpy(preToUArray, preToUBegin, pArgs.sourceArray, pArgs.sourceBegin, length);
+ source.get(preToUArray, preToUBegin, length);
+ preToULength=(byte)-length;
+ }
+
+ source=realSource;
+ flush=realFlush;
+ }
+ return cr;
+ }
+ }
+
+ /* copy toUBytes[] to invalidCharBuffer[] */
+ errorInputLength=invalidCharLength=toULength;
+ if(errorInputLength>0) {
+ copy(toUBytesArray, 0, invalidCharBuffer, 0, errorInputLength);
+ }
+
+ /* set the converter state to deal with the next character */
+ toULength=0;
+
+ /* call the callback function */
+ cr = toCharErrorBehaviour.call(this, toUContext, source, target, offsets, invalidCharBuffer, errorInputLength, cr);
+ /*
+ * loop back to the offset handling
+ *
+ * this flag will indicate after offset handling
+ * that a callback was called;
+ * if the callback did not resolve the error, then we return
+ */
+ calledCallback=true;
+ }
+ }
+ }
+
+ /*
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ */
+ /*public*/ int toUCountPending() {
+ if(preToULength > 0){
+ return preToULength ;
+ } else if(preToULength < 0){
+ return -preToULength;
+ } else if(toULength > 0){
+ return toULength;
+ } else {
+ return 0;
+ }
+ }
+
+
+ private void copy(byte[] src, int srcOffset, char[] dst, int dstOffset, int length) {
+ for(int i=srcOffset; i0 && target.hasRemaining()) {
+ target.put(ucharsArray[ucharsBegin++]);
+ --length;
+ }
+
+ } else {
+ /* output with offsets */
+ while(length>0 && target.hasRemaining()) {
+ target.put(ucharsArray[ucharsBegin++]);
+ offsets.put(sourceIndex);
+ --length;
+ }
+ }
+ /* write overflow */
+ if(length>0) {
+ cnv.charErrorBufferLength= 0;
+ cr = CoderResult.OVERFLOW;
+ do {
+ cnv.charErrorBufferArray[cnv.charErrorBufferLength++]=ucharsArray[ucharsBegin++];
+ } while(--length>0);
+ }
+ return cr;
+ }
+ /*
+ * This function will write out the Unicode substitution character to the
+ * target character buffer.
+ * Sub classes to override this method if required
+ * @param decoder
+ * @param source
+ * @param target
+ * @param offsets
+ * @return A CoderResult object that contains the error result when an error occurs.
+ */
+ /* Note: Currently, this method is not being used because the callback method calls toUWriteUChars with
+ * the substitution characters. Will leave in here for the time being. To be removed later. (4.0)
+ */
+ /*CoderResult cbToUWriteSub(CharsetDecoderICU decoder,
+ ByteBuffer source, CharBuffer target,
+ IntBuffer offsets){
+ String sub = decoder.replacement();
+ CharsetICU cs = (CharsetICU) decoder.charset();
+ if (decoder.invalidCharLength==1 && cs.subChar1 != 0x00) {
+ char[] subArr = new char[] { 0x1a };
+ return CharsetDecoderICU.toUWriteUChars(decoder, subArr, 0, sub
+ .length(), target, offsets, source.position());
+ } else {
+ return CharsetDecoderICU.toUWriteUChars(decoder, sub.toCharArray(),
+ 0, sub.length(), target, offsets, source.position());
+
+ }
+ }*/
+
+ /**
+ * Returns the maxBytesPerChar value for the Charset that created this decoder.
+ * @return maxBytesPerChar
+ * @draft ICU 4.8
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final float maxBytesPerChar() {
+ return ((CharsetICU)(this.charset())).maxBytesPerChar;
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java
new file mode 100644
index 00000000000..5bd69a70ab1
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetEncoderICU.java
@@ -0,0 +1,930 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.charset;
+
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+import com.ibm.icu.impl.Assert;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+
+/**
+ * An abstract class that provides framework methods of decoding operations for concrete
+ * subclasses.
+ * In the future this class will contain API that will implement converter semantics of ICU4C.
+ * @stable ICU 3.6
+ */
+public abstract class CharsetEncoderICU extends CharsetEncoder {
+
+ /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
+ static final char MISSING_CHAR_MARKER = '\uFFFF';
+
+ byte[] errorBuffer = new byte[30];
+
+ int errorBufferLength = 0;
+
+ /** these are for encodeLoopICU */
+ int fromUnicodeStatus;
+
+ int fromUChar32;
+
+ boolean useSubChar1;
+
+ boolean useFallback;
+
+ /* maximum number of indexed UChars */
+ static final int EXT_MAX_UCHARS = 19;
+
+ /* store previous UChars/chars to continue partial matches */
+ int preFromUFirstCP; /* >=0: partial match */
+
+ char[] preFromUArray = new char[EXT_MAX_UCHARS];
+
+ int preFromUBegin;
+
+ int preFromULength; /* negative: replay */
+
+ char[] invalidUCharBuffer = new char[2];
+
+ int invalidUCharLength;
+
+ Object fromUContext;
+
+ private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP;
+
+ private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP;
+
+ CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() {
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr) {
+ if (cr.isUnmappable()) {
+ return onUnmappableInput.call(encoder, context, source, target,
+ offsets, buffer, length, cp, cr);
+ } else /* if (cr.isMalformed()) */ {
+ return onMalformedInput.call(encoder, context, source, target,
+ offsets, buffer, length, cp, cr);
+ }
+ // return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, source, target, offsets, buffer, length, cp, cr);
+
+ }
+ };
+
+ /*
+ * Construcs a new encoder for the given charset
+ *
+ * @param cs
+ * for which the decoder is created
+ * @param replacement
+ * the substitution bytes
+ */
+ CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
+ super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2,
+ cs.maxBytesPerChar, replacement);
+ }
+
+ /**
+ * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping
+ * that will convert a Unicode codepoint sequence to a byte sequence, but
+ * the encoded byte sequence will round trip convert to a different
+ * Unicode codepoint sequence.
+ * @return true if the converter uses fallback, false otherwise.
+ * @stable ICU 3.8
+ */
+ public boolean isFallbackUsed() {
+ return useFallback;
+ }
+
+ /**
+ * Sets whether this Encoder can use fallbacks?
+ * @param usesFallback true if the user wants the converter to take
+ * advantage of the fallback mapping, false otherwise.
+ * @stable ICU 3.8
+ */
+ public void setFallbackUsed(boolean usesFallback) {
+ useFallback = usesFallback;
+ }
+
+ /*
+ * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
+ * @param c A codepoint
+ */
+ final boolean isFromUUseFallback(int c) {
+ return (useFallback)
+ || (UCharacter.getType(c) == UCharacter.PRIVATE_USE);
+ }
+
+ /**
+ * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
+ */
+ static final boolean isFromUUseFallback(boolean iUseFallback, int c) {
+ return (iUseFallback)
+ || (UCharacter.getType(c) == UCharacter.PRIVATE_USE);
+ }
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ *
+ * @param newAction
+ * action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected void implOnMalformedInput(CodingErrorAction newAction) {
+ onMalformedInput = getCallback(newAction);
+ }
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ *
+ * @param newAction
+ * action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
+ onUnmappableInput = getCallback(newAction);
+ }
+
+ /**
+ * Sets the callback encoder method and context to be used if an illegal sequence is encountered.
+ * You would normally call this twice to set both the malform and unmappable error. In this case,
+ * newContext should remain the same since using a different newContext each time will negate the last
+ * one used.
+ * @param err CoderResult
+ * @param newCallback CharsetCallback.Encoder
+ * @param newContext Object
+ * @stable ICU 4.0
+ */
+ public final void setFromUCallback(CoderResult err, CharsetCallback.Encoder newCallback, Object newContext) {
+ if (err.isMalformed()) {
+ onMalformedInput = newCallback;
+ } else if (err.isUnmappable()) {
+ onUnmappableInput = newCallback;
+ } else {
+ /* Error: Only malformed and unmappable are handled. */
+ }
+
+ if (fromUContext == null || !fromUContext.equals(newContext)) {
+ setFromUContext(newContext);
+ }
+ }
+
+ /**
+ * Sets fromUContext used in callbacks.
+ *
+ * @param newContext Object
+ * @exception IllegalArgumentException The object is an illegal argument for UContext.
+ * @stable ICU 4.0
+ */
+ public final void setFromUContext(Object newContext) {
+ fromUContext = newContext;
+ }
+
+ private static CharsetCallback.Encoder getCallback(CodingErrorAction action) {
+ if (action == CodingErrorAction.REPLACE) {
+ return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE;
+ } else if (action == CodingErrorAction.IGNORE) {
+ return CharsetCallback.FROM_U_CALLBACK_SKIP;
+ } else /* if (action == CodingErrorAction.REPORT) */ {
+ return CharsetCallback.FROM_U_CALLBACK_STOP;
+ }
+ }
+
+ private static final CharBuffer EMPTY = CharBuffer.allocate(0);
+
+ /**
+ * Flushes any characters saved in the converter's internal buffer and
+ * resets the converter.
+ * @param out action to be taken
+ * @return result of flushing action and completes the decoding all input.
+ * Returns CoderResult.UNDERFLOW if the action succeeds.
+ * @stable ICU 3.6
+ */
+ protected CoderResult implFlush(ByteBuffer out) {
+ return encode(EMPTY, out, null, true);
+ }
+
+ /**
+ * Resets the from Unicode mode of converter
+ * @stable ICU 3.6
+ */
+ protected void implReset() {
+ errorBufferLength = 0;
+ fromUnicodeStatus = 0;
+ fromUChar32 = 0;
+ fromUnicodeReset();
+ }
+
+ private void fromUnicodeReset() {
+ preFromUBegin = 0;
+ preFromUFirstCP = UConverterConstants.U_SENTINEL;
+ preFromULength = 0;
+ }
+
+ /**
+ * Encodes one or more chars. The default behaviour of the
+ * converter is stop and report if an error in input stream is encountered.
+ * To set different behaviour use @see CharsetEncoder.onMalformedInput()
+ * @param in buffer to decode
+ * @param out buffer to populate with decoded result
+ * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ * @stable ICU 3.6
+ */
+ protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
+ if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty
+ // The Java framework should have already substituted what was left.
+ fromUChar32 = 0;
+ //fromUnicodeReset();
+ return CoderResult.UNDERFLOW;
+ }
+ in.position(in.position() + fromUCountPending());
+ /* do the conversion */
+ CoderResult ret = encode(in, out, null, false);
+ setSourcePosition(in);
+ /* No need to reset to keep the proper state of the encoder.
+ if (ret.isUnderflow() && in.hasRemaining()) {
+ // The Java framework is going to substitute what is left.
+ //fromUnicodeReset();
+ } */
+ return ret;
+ }
+
+ /*
+ * Implements ICU semantics of buffer management
+ * @param source
+ * @param target
+ * @param offsets
+ * @return A CoderResult object that contains the error result when an error occurs.
+ */
+ abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target,
+ IntBuffer offsets, boolean flush);
+
+ /*
+ * Implements ICU semantics for encoding the buffer
+ * @param source The input character buffer
+ * @param target The output byte buffer
+ * @param offsets
+ * @param flush true if, and only if, the invoker can provide no
+ * additional input bytes beyond those in the given buffer.
+ * @return A CoderResult object that contains the error result when an error occurs.
+ */
+ final CoderResult encode(CharBuffer source, ByteBuffer target,
+ IntBuffer offsets, boolean flush) {
+
+ /* check parameters */
+ if (target == null || source == null) {
+ throw new IllegalArgumentException();
+ }
+
+ /*
+ * Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be targetLimit=t+0x7fffffff; for example.
+ */
+
+ /* flush the target overflow buffer */
+ if (errorBufferLength > 0) {
+ byte[] overflowArray;
+ int i, length;
+
+ overflowArray = errorBuffer;
+ length = errorBufferLength;
+ i = 0;
+ do {
+ if (target.remaining() == 0) {
+ /* the overflow buffer contains too much, keep the rest */
+ int j = 0;
+
+ do {
+ overflowArray[j++] = overflowArray[i++];
+ } while (i < length);
+
+ errorBufferLength = (byte) j;
+ return CoderResult.OVERFLOW;
+ }
+
+ /* copy the overflow contents to the target */
+ target.put(overflowArray[i++]);
+ if (offsets != null) {
+ offsets.put(-1); /* no source index available for old output */
+ }
+ } while (i < length);
+
+ /* the overflow buffer is completely copied to the target */
+ errorBufferLength = 0;
+ }
+
+ if (!flush && source.remaining() == 0 && preFromULength >= 0) {
+ /* the overflow buffer is emptied and there is no new input: we are done */
+ return CoderResult.UNDERFLOW;
+ }
+
+ /*
+ * Do not simply return with a buffer overflow error if
+ * !flush && t==targetLimit
+ * because it is possible that the source will not generate any output.
+ * For example, the skip callback may be called;
+ * it does not output anything.
+ */
+
+ return fromUnicodeWithCallback(source, target, offsets, flush);
+
+ }
+
+ /*
+ * Implementation note for m:n conversions
+ *
+ * While collecting source units to find the longest match for m:n conversion,
+ * some source units may need to be stored for a partial match.
+ * When a second buffer does not yield a match on all of the previously stored
+ * source units, then they must be "replayed", i.e., fed back into the converter.
+ *
+ * The code relies on the fact that replaying will not nest -
+ * converting a replay buffer will not result in a replay.
+ * This is because a replay is necessary only after the _continuation_ of a
+ * partial match failed, but a replay buffer is converted as a whole.
+ * It may result in some of its units being stored again for a partial match,
+ * but there will not be a continuation _during_ the replay which could fail.
+ *
+ * It is conceivable that a callback function could call the converter
+ * recursively in a way that causes another replay to be stored, but that
+ * would be an error in the callback function.
+ * Such violations will cause assertion failures in a debug build,
+ * and wrong output, but they will not cause a crash.
+ */
+ final CoderResult fromUnicodeWithCallback(CharBuffer source,
+ ByteBuffer target, IntBuffer offsets, boolean flush) {
+ int sBufferIndex;
+ int sourceIndex;
+ int errorInputLength;
+ boolean converterSawEndOfInput, calledCallback;
+
+ /* variables for m:n conversion */
+ CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS);
+ int replayArrayIndex = 0;
+ CharBuffer realSource;
+ boolean realFlush;
+
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ /* get the converter implementation function */
+ sourceIndex = 0;
+
+ if (preFromULength >= 0) {
+ /* normal mode */
+ realSource = null;
+ realFlush = false;
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource = source;
+ realFlush = flush;
+
+ //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
+ replayArray.put(preFromUArray, 0, -preFromULength);
+ source = replayArray;
+ source.position(replayArrayIndex);
+ source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration
+ flush = false;
+
+ preFromULength = 0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for (;;) {
+ /* convert */
+ cr = encodeLoop(source, target, offsets, flush);
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv.preFromULength==0 because a replay (<0) will cause
+ * s 0) {
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ /* offsets.position(offsets.position() + length);
+ }
+
+ if (sourceIndex >= 0) {
+ sourceIndex += (int) (source.position());
+ }
+ } */
+
+ if (preFromULength < 0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if (realSource == null) {
+ realSource = source;
+ realFlush = flush;
+
+ //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
+ replayArray.put(preFromUArray, 0, -preFromULength);
+
+ source = replayArray;
+ source.position(replayArrayIndex);
+ source.limit(replayArrayIndex - preFromULength);
+ flush = false;
+ if ((sourceIndex += preFromULength) < 0) {
+ sourceIndex = -1;
+ }
+
+ preFromULength = 0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ //agljport:todo U_ASSERT(realSource==NULL);
+ Assert.assrt(realSource == null);
+ }
+ }
+
+ /* update pointers */
+ sBufferIndex = source.position();
+ if (cr.isUnderflow()) {
+ if (sBufferIndex < source.limit()) {
+ /*
+ * continue with the conversion loop while there is still input left
+ * (continue converting by breaking out of only the inner loop)
+ */
+ break;
+ } else if (realSource != null) {
+ /* switch back from replaying to the real source and continue */
+ source = realSource;
+ flush = realFlush;
+ sourceIndex = source.position();
+ realSource = null;
+ break;
+ } else if (flush && fromUChar32 != 0) {
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND;
+ cr = CoderResult.malformedForLength(1);
+ calledCallback = false; /* new error condition */
+ } else {
+ /* input consumed */
+ if (flush) {
+ /*
+ * return to the conversion loop once more if the flush
+ * flag is set and the conversion function has not
+ * successfully processed the end of the input yet
+ *
+ * (continue converting by breaking out of only the inner loop)
+ */
+ if (!converterSawEndOfInput) {
+ break;
+ }
+
+ /* reset the converter without calling the callback function */
+ implReset();
+ }
+
+ /* done successfully */
+ return cr;
+ }
+ }
+
+ /*U_FAILURE(*err) */
+ {
+
+ if (calledCallback || cr.isOverflow()
+ || (!cr.isMalformed() && !cr.isUnmappable())) {
+ /*
+ * the callback did not or cannot resolve the error:
+ * set output pointers and return
+ *
+ * the check for buffer overflow is redundant but it is
+ * a high-runner case and hopefully documents the intent
+ * well
+ *
+ * if we were replaying, then the replay buffer must be
+ * copied back into the UConverter
+ * and the real arguments must be restored
+ */
+ if (realSource != null) {
+ int length;
+
+ //agljport:todo U_ASSERT(cnv.preFromULength==0);
+
+ length = source.remaining();
+ if (length > 0) {
+ //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR);
+ source.get(preFromUArray, 0, length);
+ preFromULength = (byte) -length;
+ }
+ source = realSource;
+ flush = realFlush;
+ }
+ return cr;
+ }
+ }
+
+ /* callback handling */
+ {
+ int codePoint;
+
+ /* get and write the code point */
+ codePoint = fromUChar32;
+ errorInputLength = UTF16.append(invalidUCharBuffer, 0,
+ fromUChar32);
+ invalidUCharLength = errorInputLength;
+
+ /* set the converter state to deal with the next character */
+ fromUChar32 = 0;
+
+ /* call the callback function */
+ cr = fromCharErrorBehaviour.call(this, fromUContext,
+ source, target, offsets, invalidUCharBuffer,
+ invalidUCharLength, codePoint, cr);
+ }
+
+ /*
+ * loop back to the offset handling
+ *
+ * this flag will indicate after offset handling
+ * that a callback was called;
+ * if the callback did not resolve the error, then we return
+ */
+ calledCallback = true;
+ }
+ }
+ }
+
+ /*
+ * Ascertains if a given Unicode code point (32bit value for handling surrogates)
+ * can be converted to the target encoding. If the caller wants to test if a
+ * surrogate pair can be converted to target encoding then the
+ * responsibility of assembling the int value lies with the caller.
+ * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
+ *
+ * while(i
+ * or
+ *
+ * String src = new String(mySource);
+ * int i,codepoint;
+ * boolean passed = false;
+ * while(i0xfff)? 2:1;
+ * if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
+ * passed = false;
+ * }
+ * }
+ *
+ *
+ * @param codepoint Unicode code point as int value
+ * @return true if a character can be converted
+ */
+ /* TODO This is different from Java's canEncode(char) API.
+ * ICU's API should implement getUnicodeSet,
+ * and override canEncode(char) which queries getUnicodeSet.
+ * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C.
+ */
+ /*public boolean canEncode(int codepoint) {
+ return true;
+ }*/
+ /**
+ * Overrides super class method
+ * @stable ICU 3.6
+ */
+ public boolean isLegalReplacement(byte[] repl) {
+ return true;
+ }
+
+ /*
+ * Writes out the specified output bytes to the target byte buffer or to converter internal buffers.
+ * @param cnv
+ * @param bytesArray
+ * @param bytesBegin
+ * @param bytesLength
+ * @param out
+ * @param offsets
+ * @param sourceIndex
+ * @return A CoderResult object that contains the error result when an error occurs.
+ */
+ static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
+ byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out,
+ IntBuffer offsets, int sourceIndex) {
+
+ //write bytes
+ int obl = bytesLength;
+ CoderResult cr = CoderResult.UNDERFLOW;
+ int bytesLimit = bytesBegin + bytesLength;
+ try {
+ for (; bytesBegin < bytesLimit;) {
+ out.put(bytesArray[bytesBegin]);
+ bytesBegin++;
+ }
+ // success
+ bytesLength = 0;
+ } catch (BufferOverflowException ex) {
+ cr = CoderResult.OVERFLOW;
+ }
+
+ if (offsets != null) {
+ while (obl > bytesLength) {
+ offsets.put(sourceIndex);
+ --obl;
+ }
+ }
+ //write overflow
+ cnv.errorBufferLength = bytesLimit - bytesBegin;
+ if (cnv.errorBufferLength > 0) {
+ int index = 0;
+ while (bytesBegin < bytesLimit) {
+ cnv.errorBuffer[index++] = bytesArray[bytesBegin++];
+ }
+ cr = CoderResult.OVERFLOW;
+ }
+ return cr;
+ }
+
+ /*
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ */
+ /*public*/int fromUCountPending() {
+ if (preFromULength > 0) {
+ return UTF16.getCharCount(preFromUFirstCP) + preFromULength;
+ } else if (preFromULength < 0) {
+ return -preFromULength;
+ } else if (fromUChar32 > 0) {
+ return 1;
+ } else if (preFromUFirstCP > 0) {
+ return UTF16.getCharCount(preFromUFirstCP);
+ }
+ return 0;
+ }
+
+ /**
+ *
+ * @param source
+ */
+ private final void setSourcePosition(CharBuffer source) {
+
+ // ok was there input held in the previous invocation of encodeLoop
+ // that resulted in output in this invocation?
+ source.position(source.position() - fromUCountPending());
+ }
+
+ /*
+ * Write the codepage substitution character.
+ * Subclasses to override this method.
+ * For stateful converters, it is typically necessary to handle this
+ * specificially for the converter in order to properly maintain the state.
+ * @param source The input character buffer
+ * @param target The output byte buffer
+ * @param offsets
+ * @return A CoderResult object that contains the error result when an error occurs.
+ */
+ CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source,
+ ByteBuffer target, IntBuffer offsets) {
+ CharsetICU cs = (CharsetICU) encoder.charset();
+ byte[] sub = encoder.replacement();
+ if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
+ return CharsetEncoderICU.fromUWriteBytes(encoder,
+ new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
+ .position());
+ } else {
+ return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
+ sub.length, target, offsets, source.position());
+ }
+ }
+
+ /*
+ * Write the characters to target.
+ * @param source The input character buffer
+ * @param target The output byte buffer
+ * @param offsets
+ * @return A CoderResult object that contains the error result when an error occurs.
+ */
+ CoderResult cbFromUWriteUChars(CharsetEncoderICU encoder,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets) {
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ /* This is a fun one. Recursion can occur - we're basically going to
+ * just retry shoving data through the same converter. Note, if you got
+ * here through some kind of invalid sequence, you maybe should emit a
+ * reset sequence of some kind. Since this IS an actual conversion,
+ * take care that you've changed the callback or the data, or you'll
+ * get an infinite loop.
+ */
+
+ int oldTargetPosition = target.position();
+ int offsetIndex = source.position();
+
+ cr = encoder.encode(source, target, null, false); /* no offsets and no flush */
+
+ if (offsets != null) {
+ while (target.position() != oldTargetPosition) {
+ offsets.put(offsetIndex);
+ oldTargetPosition++;
+ }
+ }
+
+ /* Note, if you did something like used a stop subcallback, things would get interesting.
+ * In fact, here's where we want to return the partially consumed in-source!
+ */
+ if (cr.isOverflow()) {
+ /* Overflowed target. Now, we'll write into the charErrorBuffer.
+ * It's a fixed size. If we overflow it...Hm
+ */
+
+ /* start the new target at the first free slot in the error buffer */
+ int errBuffLen = encoder.errorBufferLength;
+ ByteBuffer newTarget = ByteBuffer.wrap(encoder.errorBuffer);
+ newTarget.position(errBuffLen); /* set the position at the end of the error buffer */
+ encoder.errorBufferLength = 0;
+
+ encoder.encode(source, newTarget, null, false);
+
+ encoder.errorBuffer = newTarget.array();
+ encoder.errorBufferLength = newTarget.position();
+ }
+
+ return cr;
+ }
+
+ /**
+ *
+ * Handles a common situation where a character has been read and it may be
+ * a lead surrogate followed by a trail surrogate. This method can change
+ * the source position and will modify fromUChar32.
+ *
+ *
+ *
+ * If null
is returned, then there was success in reading a
+ * surrogate pair, the codepoint is stored in fromUChar32
and
+ * fromUChar32
should be reset (to 0) after being read.
+ *
+ *
+ * @param source
+ * The encoding source.
+ * @param lead
+ * A character that may be the first in a surrogate pair.
+ * @return CoderResult.malformedForLength(1)
or
+ * CoderResult.UNDERFLOW
if there is a problem, or
+ * null
if there isn't.
+ * @see #handleSurrogates(CharBuffer, char)
+ * @see #handleSurrogates(CharBuffer, int, char)
+ * @see #handleSurrogates(char[], int, int, char)
+ */
+ final CoderResult handleSurrogates(CharBuffer source, char lead) {
+ if (!UTF16.isLeadSurrogate(lead)) {
+ fromUChar32 = lead;
+ return CoderResult.malformedForLength(1);
+ }
+
+ if (!source.hasRemaining()) {
+ fromUChar32 = lead;
+ return CoderResult.UNDERFLOW;
+ }
+
+ char trail = source.get();
+
+ if (!UTF16.isTrailSurrogate(trail)) {
+ fromUChar32 = lead;
+ source.position(source.position() - 1);
+ return CoderResult.malformedForLength(1);
+ }
+
+ fromUChar32 = UCharacter.getCodePoint(lead, trail);
+ return null;
+ }
+
+ /**
+ *
+ * Same as handleSurrogates(CharBuffer, char)
, but with arrays. As an added
+ * requirement, the calling method must also increment the index if this method returns
+ * null
.
+ *
+ *
+ *
+ * @param source
+ * The encoding source.
+ * @param lead
+ * A character that may be the first in a surrogate pair.
+ * @return CoderResult.malformedForLength(1)
or
+ * CoderResult.UNDERFLOW
if there is a problem, or null
if
+ * there isn't.
+ * @see #handleSurrogates(CharBuffer, char)
+ * @see #handleSurrogates(CharBuffer, int, char)
+ * @see #handleSurrogates(char[], int, int, char)
+ */
+ final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex,
+ int sourceLimit, char lead) {
+ if (!UTF16.isLeadSurrogate(lead)) {
+ fromUChar32 = lead;
+ return CoderResult.malformedForLength(1);
+ }
+
+ if (sourceIndex >= sourceLimit) {
+ fromUChar32 = lead;
+ return CoderResult.UNDERFLOW;
+ }
+
+ char trail = sourceArray[sourceIndex];
+
+ if (!UTF16.isTrailSurrogate(trail)) {
+ fromUChar32 = lead;
+ return CoderResult.malformedForLength(1);
+ }
+
+ fromUChar32 = UCharacter.getCodePoint(lead, trail);
+ return null;
+ }
+
+ /**
+ * Returns the maxCharsPerByte value for the Charset that created this encoder.
+ * @return maxCharsPerByte
+ * @draft ICU 4.8
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final float maxCharsPerByte() {
+ return ((CharsetICU)(this.charset())).maxCharsPerByte;
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java
new file mode 100644
index 00000000000..4fc11086ac0
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java
@@ -0,0 +1,385 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008-2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+class CharsetHZ extends CharsetICU {
+
+ private static final int UCNV_TILDE = 0x7E; /* ~ */
+ private static final int UCNV_OPEN_BRACE = 0x7B; /* { */
+ private static final int UCNV_CLOSE_BRACE = 0x7D; /* } */
+ private static final byte[] SB_ESCAPE = new byte[] { 0x7E, 0x7D };
+ private static final byte[] DB_ESCAPE = new byte[] { 0x7E, 0x7B };
+ private static final byte[] TILDE_ESCAPE = new byte[] { 0x7E, 0x7E };
+ private static final byte[] fromUSubstitution = new byte[] { (byte) 0x1A };
+
+ private CharsetMBCS gbCharset;
+ private boolean isEmptySegment;
+
+ public CharsetHZ(String icuCanonicalName, String canonicalName, String[] aliases) {
+ super(icuCanonicalName, canonicalName, aliases);
+ gbCharset = (CharsetMBCS) new CharsetProviderICU().charsetForName("GBK");
+
+ maxBytesPerChar = 4;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+
+ isEmptySegment = false;
+ }
+
+ class CharsetDecoderHZ extends CharsetDecoderICU {
+ CharsetMBCS.CharsetDecoderMBCS gbDecoder;
+ boolean isStateDBCS = false;
+
+ public CharsetDecoderHZ(CharsetICU cs) {
+ super(cs);
+ gbDecoder = (CharsetMBCS.CharsetDecoderMBCS) gbCharset.newDecoder();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ gbDecoder.implReset();
+
+ isStateDBCS = false;
+ isEmptySegment = false;
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ byte[] tempBuf = new byte[2];
+ int targetUniChar = 0;
+ int mySourceChar = 0;
+
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ else if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ while (source.hasRemaining()) {
+
+ if (target.hasRemaining()) {
+
+ // get the byte as unsigned
+ mySourceChar = source.get() & 0xff;
+
+ if (mode == UCNV_TILDE) {
+ /* second byte after ~ */
+ mode = 0;
+ switch (mySourceChar) {
+ case 0x0A:
+ /* no output for ~\n (line-continuation marker) */
+ continue;
+ case UCNV_TILDE:
+ if (offsets != null) {
+ offsets.put(source.position() - 2);
+ }
+ target.put((char) mySourceChar);
+ continue;
+ case UCNV_OPEN_BRACE:
+ case UCNV_CLOSE_BRACE:
+ isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
+ if (isEmptySegment) {
+ isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
+ this.toUBytesArray[0] = UCNV_TILDE;
+ this.toUBytesArray[1] = (byte)mySourceChar;
+ this.toULength = 2;
+ return CoderResult.malformedForLength(1);
+ }
+ isEmptySegment = true;
+ continue;
+ default:
+ /*
+ * if the first byte is equal to TILDE and the trail byte is not a valid byte then it is an
+ * error condition
+ */
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ */
+ isEmptySegment = false; /* different error here, reset this to avoid spurious furture error */
+ err = CoderResult.malformedForLength(1);
+ toUBytesArray[0] = UCNV_TILDE;
+ if (isStateDBCS ? (0x21 <= mySourceChar && mySourceChar <= 0x7e) : mySourceChar <= 0x7f) {
+ /* The current byte could be the start of a character: Back it out. */
+ toULength = 1;
+ source.position(source.position() - 1);
+ } else {
+ /* Include the current byte in the illegal sequence. */
+ toUBytesArray[1] = (byte)mySourceChar;
+ toULength = 2;
+ }
+ return err;
+ }
+ } else if (isStateDBCS) {
+ if (toUnicodeStatus == 0) {
+ /* lead byte */
+ if (mySourceChar == UCNV_TILDE) {
+ mode = UCNV_TILDE;
+ } else {
+ /*
+ * add another bit to distinguish a 0 byte from not having seen a lead byte
+ */
+ toUnicodeStatus = mySourceChar | 0x100;
+ isEmptySegment = false; /* the segment has something, either valid or will produce a different error, so reset this */
+ }
+ continue;
+ } else {
+ /* trail byte */
+ boolean leadIsOk, trailIsOk;
+ int leadByte = toUnicodeStatus & 0xff;
+ targetUniChar = 0xffff;
+ /*
+ * Ticket 5691: consistent illegal sequence
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those
+ *
+ * In HZ DBCS, if the second byte is in the 21..7e range,
+ * we report ony the first byte as the illegal sequence.
+ * Otherwise we convert of report the pair of bytes.
+ */
+ leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (leadByte - 0x21)) <= (0x7d - 0x21);
+ trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
+ if (leadIsOk && trailIsOk) {
+ tempBuf[0] = (byte)(leadByte + 0x80);
+ tempBuf[1] = (byte)(mySourceChar + 0x80);
+ targetUniChar = gbDecoder.simpleGetNextUChar(ByteBuffer.wrap(tempBuf), super.isFallbackUsed());
+ mySourceChar = (leadByte << 8) | mySourceChar;
+ } else if (trailIsOk) {
+ /* report a single illegal byte and continue with the following DBCS starter byte */
+ source.position(source.position() - 1);
+ mySourceChar = leadByte;
+ } else {
+ /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+ /* add another bit so that the code below writes 2 bytes in case of error */
+ mySourceChar = 0x10000 | (leadByte << 8) | mySourceChar;
+ }
+ toUnicodeStatus = 0x00;
+ }
+ } else {
+ if (mySourceChar == UCNV_TILDE) {
+ mode = UCNV_TILDE;
+ continue;
+ } else if (mySourceChar <= 0x7f) {
+ targetUniChar = mySourceChar; /* ASCII */
+ isEmptySegment = false; /* the segment has something valid */
+ } else {
+ targetUniChar = 0xffff;
+ isEmptySegment = false; /* different error here, reset this to avoid spurious future error */
+ }
+ }
+
+ if (targetUniChar < 0xfffe) {
+ if (offsets != null) {
+ offsets.put(source.position() - 1 - (isStateDBCS ? 1 : 0));
+ }
+
+ target.put((char) targetUniChar);
+ } else /* targetUniChar >= 0xfffe */{
+ if (mySourceChar > 0xff) {
+ toUBytesArray[toUBytesBegin + 0] = (byte) (mySourceChar >> 8);
+ toUBytesArray[toUBytesBegin + 1] = (byte) mySourceChar;
+ toULength = 2;
+ } else {
+ toUBytesArray[toUBytesBegin + 0] = (byte) mySourceChar;
+ toULength = 1;
+ }
+ if (targetUniChar == 0xfffe) {
+ return CoderResult.unmappableForLength(toULength);
+ } else {
+ return CoderResult.malformedForLength(toULength);
+ }
+ }
+ } else {
+ return CoderResult.OVERFLOW;
+ }
+ }
+
+ return err;
+ }
+ }
+
+ class CharsetEncoderHZ extends CharsetEncoderICU {
+ CharsetMBCS.CharsetEncoderMBCS gbEncoder;
+ boolean isEscapeAppended = false;
+ boolean isTargetUCharDBCS = false;
+
+ public CharsetEncoderHZ(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ gbEncoder = (CharsetMBCS.CharsetEncoderMBCS) gbCharset.newEncoder();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ gbEncoder.implReset();
+
+ isEscapeAppended = false;
+ isTargetUCharDBCS = false;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ int length = 0;
+ int[] targetUniChar = new int[] { 0 };
+ int mySourceChar = 0;
+ boolean oldIsTargetUCharDBCS = isTargetUCharDBCS;
+
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ else if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ if (fromUChar32 != 0 && target.hasRemaining()) {
+ CoderResult cr = handleSurrogates(source, (char) fromUChar32);
+ return (cr != null) ? cr : CoderResult.unmappableForLength(2);
+ }
+ /* writing the char to the output stream */
+ while (source.hasRemaining()) {
+ targetUniChar[0] = MISSING_CHAR_MARKER;
+ if (target.hasRemaining()) {
+
+ mySourceChar = source.get();
+
+ oldIsTargetUCharDBCS = isTargetUCharDBCS;
+ if (mySourceChar == UCNV_TILDE) {
+ /*
+ * concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);
+ */
+ concatEscape(source, target, offsets, TILDE_ESCAPE);
+ continue;
+ } else if (mySourceChar <= 0x7f) {
+ length = 1;
+ targetUniChar[0] = mySourceChar;
+ } else {
+ length = gbEncoder.fromUChar32(mySourceChar, targetUniChar, super.isFallbackUsed());
+
+ /*
+ * we can only use lead bytes 21..7D and trail bytes 21..7E
+ */
+ if (length == 2 && 0xa1a1 <= targetUniChar[0] && targetUniChar[0] <= 0xfdfe
+ && 0xa1 <= (targetUniChar[0] & 0xff) && (targetUniChar[0] & 0xff) <= 0xfe) {
+ targetUniChar[0] -= 0x8080;
+ } else {
+ targetUniChar[0] = MISSING_CHAR_MARKER;
+ }
+ }
+ if (targetUniChar[0] != MISSING_CHAR_MARKER) {
+ isTargetUCharDBCS = (targetUniChar[0] > 0x00FF);
+ if (oldIsTargetUCharDBCS != isTargetUCharDBCS || !isEscapeAppended) {
+ /* Shifting from a double byte to single byte mode */
+ if (!isTargetUCharDBCS) {
+ concatEscape(source, target, offsets, SB_ESCAPE);
+ isEscapeAppended = true;
+ } else { /*
+ * Shifting from a single byte to double byte mode
+ */
+ concatEscape(source, target, offsets, DB_ESCAPE);
+ isEscapeAppended = true;
+
+ }
+ }
+
+ if (isTargetUCharDBCS) {
+ if (target.hasRemaining()) {
+ target.put((byte) (targetUniChar[0] >> 8));
+ if (offsets != null) {
+ offsets.put(source.position() - 1);
+ }
+ if (target.hasRemaining()) {
+ target.put((byte) targetUniChar[0]);
+ if (offsets != null) {
+ offsets.put(source.position() - 1);
+ }
+ } else {
+ errorBuffer[errorBufferLength++] = (byte) targetUniChar[0];
+ // *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ } else {
+ errorBuffer[errorBufferLength++] = (byte) (targetUniChar[0] >> 8);
+ errorBuffer[errorBufferLength++] = (byte) targetUniChar[0];
+ // *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ } else {
+ if (target.hasRemaining()) {
+ target.put((byte) targetUniChar[0]);
+ if (offsets != null) {
+ offsets.put(source.position() - 1);
+ }
+
+ } else {
+ errorBuffer[errorBufferLength++] = (byte) targetUniChar[0];
+ // *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+
+ } else {
+ /* oops.. the code point is unassigned */
+ /* Handle surrogates */
+ /* check if the char is a First surrogate */
+
+ if (UTF16.isSurrogate((char) mySourceChar)) {
+ // use that handy handleSurrogates method everyone's been talking about!
+ CoderResult cr = handleSurrogates(source, (char) mySourceChar);
+ return (cr != null) ? cr : CoderResult.unmappableForLength(2);
+ } else {
+ /* callback(unassigned) for a BMP code point */
+ // *err = U_INVALID_CHAR_FOUND;
+ fromUChar32 = mySourceChar;
+ return CoderResult.unmappableForLength(1);
+ }
+ }
+ } else {
+ // *err = U_BUFFER_OVERFLOW_ERROR;
+ return CoderResult.OVERFLOW;
+ }
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+
+ private CoderResult concatEscape(CharBuffer source, ByteBuffer target, IntBuffer offsets, byte[] strToAppend) {
+ CoderResult cr = null;
+ for (int i=0; iA subclass of java.nio.Charset for providing implementation of ICU's charset converters.
+ * This API is used to convert codepage or character encoded data to and
+ * from UTF-16. You can open a converter with {@link Charset#forName } and {@link #forNameICU }. With that
+ * converter, you can get its properties, set options, convert your data.
+ *
+ * Since many software programs recogize different converter names for
+ * different types of converters, there are other functions in this API to
+ * iterate over the converter aliases.
+ *
+ * @stable ICU 3.6
+ */
+public abstract class CharsetICU extends Charset{
+
+ String icuCanonicalName;
+ String javaCanonicalName;
+ int options;
+
+ float maxCharsPerByte;
+
+ String name; /* +4: 60 internal name of the converter- invariant chars */
+
+ int codepage; /* +64: 4 codepage # (now IBM-$codepage) */
+
+ byte platform; /* +68: 1 platform of the converter (only IBM now) */
+ byte conversionType; /* +69: 1 conversion type */
+
+ int minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
+ int maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
+
+ byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4 [note: 4 and 8 byte boundary] */
+ byte subCharLen; /* +76: 1 */
+
+ byte hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
+ byte hasFromUnicodeFallback; /* +78: 1 */
+ short unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
+ byte subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
+ //byte reserved[/*19*/]; /* +81: 19 to round out the structure */
+
+
+ // typedef enum UConverterUnicodeSet {
+ /**
+ * Parameter that select the set of roundtrippable Unicode code points.
+ * @stable ICU 4.0
+ */
+ public static final int ROUNDTRIP_SET=0;
+ /**
+ * Select the set of Unicode code points with roundtrip or fallback mappings.
+ * Not supported at this point.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public static final int ROUNDTRIP_AND_FALLBACK_SET =1;
+
+ //} UConverterUnicodeSet;
+
+ /**
+ *
+ * @param icuCanonicalName
+ * @param canonicalName
+ * @param aliases
+ * @stable ICU 3.6
+ */
+ protected CharsetICU(String icuCanonicalName, String canonicalName, String[] aliases) {
+ super(canonicalName,aliases);
+ if(canonicalName.length() == 0){
+ throw new IllegalCharsetNameException(canonicalName);
+ }
+ this.javaCanonicalName = canonicalName;
+ this.icuCanonicalName = icuCanonicalName;
+ }
+
+ /**
+ * Ascertains if a charset is a sub set of this charset
+ * Implements the abstract method of super class.
+ * @param cs charset to test
+ * @return true if the given charset is a subset of this charset
+ * @stable ICU 3.6
+ */
+ public boolean contains(Charset cs){
+ if (null == cs) {
+ return false;
+ } else if (this.equals(cs)) {
+ return true;
+ }
+ return false;
+ }
+ private static final HashMap algorithmicCharsets = new HashMap();
+ static{
+ algorithmicCharsets.put("LMBCS-1", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("LMBCS-2", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("LMBCS-3", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("LMBCS-4", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("LMBCS-5", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("LMBCS-6", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("LMBCS-8", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("LMBCS-11", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("LMBCS-16", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("LMBCS-17", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("LMBCS-18", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("LMBCS-19", "com.ibm.icu.charset.CharsetLMBCS");
+ algorithmicCharsets.put("BOCU-1", "com.ibm.icu.charset.CharsetBOCU1" );
+ algorithmicCharsets.put("SCSU", "com.ibm.icu.charset.CharsetSCSU" );
+ algorithmicCharsets.put("US-ASCII", "com.ibm.icu.charset.CharsetASCII" );
+ algorithmicCharsets.put("ISO-8859-1", "com.ibm.icu.charset.Charset88591" );
+ algorithmicCharsets.put("UTF-16", "com.ibm.icu.charset.CharsetUTF16" );
+ algorithmicCharsets.put("UTF-16BE", "com.ibm.icu.charset.CharsetUTF16BE" );
+ algorithmicCharsets.put("UTF-16BE,version=1", "com.ibm.icu.charset.CharsetUTF16BE" );
+ algorithmicCharsets.put("UTF-16LE", "com.ibm.icu.charset.CharsetUTF16LE" );
+ algorithmicCharsets.put("UTF-16LE,version=1", "com.ibm.icu.charset.CharsetUTF16LE" );
+ algorithmicCharsets.put("UTF16_OppositeEndian", "com.ibm.icu.charset.CharsetUTF16LE" );
+ algorithmicCharsets.put("UTF16_PlatformEndian", "com.ibm.icu.charset.CharsetUTF16" );
+ algorithmicCharsets.put("UTF-32", "com.ibm.icu.charset.CharsetUTF32" );
+ algorithmicCharsets.put("UTF-32BE", "com.ibm.icu.charset.CharsetUTF32BE" );
+ algorithmicCharsets.put("UTF-32LE", "com.ibm.icu.charset.CharsetUTF32LE" );
+ algorithmicCharsets.put("UTF32_OppositeEndian", "com.ibm.icu.charset.CharsetUTF32LE" );
+ algorithmicCharsets.put("UTF32_PlatformEndian", "com.ibm.icu.charset.CharsetUTF32" );
+ algorithmicCharsets.put("UTF-8", "com.ibm.icu.charset.CharsetUTF8" );
+ algorithmicCharsets.put("CESU-8", "com.ibm.icu.charset.CharsetCESU8" );
+ algorithmicCharsets.put("UTF-7", "com.ibm.icu.charset.CharsetUTF7" );
+ algorithmicCharsets.put("ISCII,version=0", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=1", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=2", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=3", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=4", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=5", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=6", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=7", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=8", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("IMAP-mailbox-name", "com.ibm.icu.charset.CharsetUTF7" );
+ algorithmicCharsets.put("HZ", "com.ibm.icu.charset.CharsetHZ" );
+ algorithmicCharsets.put("ISO_2022,locale=ja,version=0", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ja,version=1", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ja,version=2", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ja,version=3", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ja,version=4", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=zh,version=0", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=zh,version=1", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=zh,version=2", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ko,version=0", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ko,version=1", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("x11-compound-text", "com.ibm.icu.charset.CharsetCompoundText" );
+ }
+
+ /*public*/ static final Charset getCharset(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+ String className = algorithmicCharsets.get(icuCanonicalName);
+ if(className==null){
+ //all the cnv files are loaded as MBCS
+ className = "com.ibm.icu.charset.CharsetMBCS";
+ }
+ try{
+ CharsetICU conv = null;
+ Class extends CharsetICU> cs = Class.forName(className).asSubclass(CharsetICU.class);
+ Class>[] paramTypes = new Class>[]{ String.class, String.class, String[].class};
+ final Constructor extends CharsetICU> c = cs.getConstructor(paramTypes);
+ Object[] params = new Object[]{ icuCanonicalName, javaCanonicalName, aliases};
+
+ // Run constructor
+ try {
+ conv = c.newInstance(params);
+ if (conv != null) {
+ return conv;
+ }
+ }catch (InvocationTargetException e) {
+ throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className+ ". Exception:" + e.getTargetException());
+ }
+ }catch(ClassNotFoundException ex){
+ }catch(NoSuchMethodException ex){
+ }catch (IllegalAccessException ex){
+ }catch (InstantiationException ex){
+ }
+ throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className);
+ }
+
+ static final boolean isSurrogate(int c){
+ return (((c)&0xfffff800)==0xd800);
+ }
+
+ /*
+ * Returns the default charset name
+ */
+// static final String getDefaultCharsetName(){
+// String defaultEncoding = new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding();
+// return defaultEncoding;
+// }
+
+ /**
+ * Returns a charset object for the named charset.
+ * This method gurantee that ICU charset is returned when
+ * available. If the ICU charset provider does not support
+ * the specified charset, then try other charset providers
+ * including the standard Java charset provider.
+ *
+ * @param charsetName The name of the requested charset,
+ * may be either a canonical name or an alias
+ * @return A charset object for the named charset
+ * @throws IllegalCharsetNameException If the given charset name
+ * is illegal
+ * @throws UnsupportedCharsetException If no support for the
+ * named charset is available in this instance of th Java
+ * virtual machine
+ * @stable ICU 3.6
+ */
+ public static Charset forNameICU(String charsetName) throws IllegalCharsetNameException, UnsupportedCharsetException {
+ CharsetProviderICU icuProvider = new CharsetProviderICU();
+ CharsetICU cs = (CharsetICU) icuProvider.charsetForName(charsetName);
+ if (cs != null) {
+ return cs;
+ }
+ return Charset.forName(charsetName);
+ }
+
+// /**
+// * @see java.lang.Comparable#compareTo(java.lang.Object)
+// * @stable 3.8
+// */
+// public int compareTo(Object otherObj) {
+// if (!(otherObj instanceof CharsetICU)) {
+// return -1;
+// }
+// return icuCanonicalName.compareTo(((CharsetICU)otherObj).icuCanonicalName);
+// }
+
+ /**
+ * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
+ * start of the stream for example U+FEFF (the Unicode BOM/signature
+ * character) that can be ignored.
+ *
+ * Detects Unicode signature byte sequences at the start of the byte stream
+ * and returns number of bytes of the BOM of the indicated Unicode charset.
+ * 0 is returned when no Unicode signature is recognized.
+ *
+ */
+ // TODO This should be proposed as CharsetDecoderICU API.
+// static String detectUnicodeSignature(ByteBuffer source) {
+// int signatureLength = 0; // number of bytes of the signature
+// final int SIG_MAX_LEN = 5;
+// String sigUniCharset = null; // states what unicode charset is the BOM
+// int i = 0;
+//
+// /*
+// * initial 0xa5 bytes: make sure that if we read Returns the set of Unicode code points that can be converted by an ICU Converter.
+ *
+ * The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET): The set of all Unicode code points that can be
+ * roundtrip-converted (converted without any data loss) with the converter This set will not include code points that have fallback
+ * mappings or are only the result of reverse fallback mappings. See UTR #22 "Character Mapping Markup Language" at http://www.unicode.org/reports/tr22/
+ *
* In the future, there may be more UConverterUnicodeSet choices to select sets with different properties.
+ *
+ *
This is useful for example for
+ *
checking that a string or document can be roundtrip-converted with a converter,
+ * without/before actually performing the conversion
+ * testing if a converter can be used for text for typical text for a certain locale,
+ * by comparing its roundtrip set with the set of ExemplarCharacters from
+ * ICU's locale data or other sources
+ *
+ * @param setFillIn A valid UnicodeSet. It will be cleared by this function before
+ * the converter's specific set is filled in.
+ * @param which A selector; currently ROUNDTRIP_SET is the only supported value.
+ * @throws IllegalArgumentException if the parameters does not match.
+ * @stable ICU 4.0
+ */
+ public void getUnicodeSet(UnicodeSet setFillIn, int which){
+ if( setFillIn == null || which != ROUNDTRIP_SET ){
+ throw new IllegalArgumentException();
+ }
+ setFillIn.clear();
+ getUnicodeSetImpl(setFillIn, which);
+ }
+
+ /**
+ * Returns whether or not the charset of the converter has a fixed number of bytes
+ * per charset character.
+ * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS.
+ * Another example is UTF-32 which is always 4 bytes per character. A UTF-32 code point
+ * may represent more than one UTF-8 or UTF-16 code units but always have size of 4 bytes.
+ * Note: This method is not intended to be used to determine whether the charset has a
+ * fixed ratio of bytes to Unicode codes units for any particular Unicode encoding form.
+ * @return true if the converter is fixed-width
+ * @draft ICU 4.8
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean isFixedWidth() {
+ if (this instanceof CharsetASCII || this instanceof CharsetUTF32) {
+ return true;
+ }
+
+ if (this instanceof CharsetMBCS) {
+ if (((CharsetMBCS)this).sharedData.staticData.maxBytesPerChar == ((CharsetMBCS)this).sharedData.staticData.minBytesPerChar) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ static void getNonSurrogateUnicodeSet(UnicodeSet setFillIn){
+ setFillIn.add(0, 0xd7ff);
+ setFillIn.add(0xe000, 0x10ffff);
+ }
+
+ static void getCompleteUnicodeSet(UnicodeSet setFillIn){
+ setFillIn.add(0, 0x10ffff);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetISCII.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetISCII.java
new file mode 100644
index 00000000000..d9c82d5ed3e
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetISCII.java
@@ -0,0 +1,1458 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008-2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author Michael Ow
+ *
+ */
+class CharsetISCII extends CharsetICU {
+ private static final short UCNV_OPTIONS_VERSION_MASK = 0X0f;
+ //private static final short NUKTA = 0x093c;
+ //private static final short HALANT = 0x094d;
+ private static final short ZWNJ = 0x200c; /* Zero Width Non Joiner */
+ private static final short ZWJ = 0x200d; /* Zero Width Joiner */
+ //private static final int INVALID_CHAR = 0xffff;
+ private static final short ATR = 0xef; /* Attribute code */
+ private static final short EXT = 0xf0; /* Extension code */
+ private static final short DANDA = 0x0964;
+ private static final short DOUBLE_DANDA = 0x0965;
+ private static final short ISCII_NUKTA = 0xe9;
+ private static final short ISCII_HALANT = 0xe8;
+ private static final short ISCII_DANDA = 0xea;
+ private static final short ISCII_VOWEL_SIGN_E = 0xe0;
+ private static final short ISCII_INV = 0xd9;
+ private static final short INDIC_BLOCK_BEGIN = 0x0900;
+ private static final short INDIC_BLOCK_END = 0x0d7f;
+ private static final short INDIC_RANGE = (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN);
+ private static final short VOCALLIC_RR = 0x0931;
+ private static final short LF = 0x0a;
+ private static final short ASCII_END = 0xa0;
+ private static final short TELUGU_DELTA = (UniLang.DELTA * UniLang.TELUGU);
+ private static final short DEV_ABBR_SIGN = 0x0970;
+ private static final short DEV_ANUDATTA = 0x0952;
+ private static final short EXT_RANGE_BEGIN = 0xa1;
+ private static final short EXT_RANGE_END = 0xee;
+ private static final short PNJ_DELTA = 0x100;
+ private static final int NO_CHAR_MARKER = 0xfffe;
+
+ /* Used for proper conversion to and from Gurmukhi */
+ private static UnicodeSet PNJ_BINDI_TIPPI_SET;
+ private static UnicodeSet PNJ_CONSONANT_SET;
+ private static final short PNJ_BINDI = 0x0a02;
+ private static final short PNJ_TIPPI = 0x0a70;
+ private static final short PNJ_SIGN_VIRAMA = 0x0a4d;
+ private static final short PNJ_ADHAK = 0x0a71;
+ private static final short PNJ_HA = 0x0a39;
+ private static final short PNJ_RRA = 0x0a5c;
+
+ private static final class UniLang {
+ static final short DEVALANGARI = 0;
+ static final short BENGALI = DEVALANGARI + 1;
+ static final short GURMUKHI = BENGALI + 1;
+ static final short GUJARATI = GURMUKHI + 1;
+ static final short ORIYA = GUJARATI + 1;
+ static final short TAMIL = ORIYA + 1;
+ static final short TELUGU = TAMIL + 1;
+ static final short KANNADA = TELUGU + 1;
+ static final short MALAYALAM = KANNADA + 1;
+ static final short DELTA = 0x80;
+ }
+ @SuppressWarnings("unused")
+ private static final class ISCIILang {
+ static final short DEF = 0x40;
+ static final short RMN = 0x41;
+ static final short DEV = 0x42;
+ static final short BNG = 0x43;
+ static final short TML = 0x44;
+ static final short TLG = 0x45;
+ static final short ASM = 0x46;
+ static final short ORI = 0x47;
+ static final short KND = 0x48;
+ static final short MLM = 0x49;
+ static final short GJR = 0x4a;
+ static final short PNJ = 0x4b;
+ static final short ARB = 0x71;
+ static final short PES = 0x72;
+ static final short URD = 0x73;
+ static final short SND = 0x74;
+ static final short KSM = 0x75;
+ static final short PST = 0x76;
+ }
+
+ private static final class MaskEnum {
+ static final short DEV_MASK = 0x80;
+ static final short PNJ_MASK = 0x40;
+ static final short GJR_MASK = 0x20;
+ static final short ORI_MASK = 0x10;
+ static final short BNG_MASK = 0x08;
+ static final short KND_MASK = 0x04;
+ static final short MLM_MASK = 0x02;
+ static final short TML_MASK = 0x01;
+ static final short ZERO = 0x00;
+ }
+
+ private final String ISCII_CNV_PREFIX = "ISCII,version=";
+
+ @SuppressWarnings("unused")
+ private final class UConverterDataISCII {
+ int option;
+ int contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */
+ int contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */
+ short defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */
+ short currentDeltaFromUnicode; /* current delta in Indic block */
+ short currentDeltaToUnicode; /* current delta in Indic block */
+ short currentMaskFromUnicode; /* mask for current state in fromUnicode */
+ short currentMaskToUnicode; /* mask for current state in toUnicode */
+ short defMaskToUnicode; /* mask for default state in toUnicode */
+ boolean isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */
+ boolean resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered */
+ String name;
+ int prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
+
+ UConverterDataISCII(int option, String name) {
+ this.option = option;
+ this.name = name;
+
+ initialize();
+ }
+
+ void initialize() {
+ this.contextCharToUnicode = NO_CHAR_MARKER; /* contextCharToUnicode */
+ this.currentDeltaFromUnicode = 0x0000; /* contextCharFromUnicode */
+ this.defDeltaToUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* defDeltaToUnicode */
+ this.currentDeltaFromUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* currentDeltaFromUnicode */
+ this.currentDeltaToUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* currentDeltaToUnicode */
+ this.currentMaskToUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* currentMaskToUnicode */
+ this.currentMaskFromUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* currentMaskFromUnicode */
+ this.defMaskToUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* defMaskToUnicode */
+ this.isFirstBuffer = true; /* isFirstBuffer */
+ this.resetToDefaultToUnicode = false; /* resetToDefaultToUnicode */
+ this.prevToUnicodeStatus = 0x0000;
+ }
+ }
+
+ private static final class LookupDataStruct {
+ short uniLang;
+ short maskEnum;
+ short isciiLang;
+
+ LookupDataStruct(short uniLang, short maskEnum, short isciiLang) {
+ this.uniLang = uniLang;
+ this.maskEnum = maskEnum;
+ this.isciiLang = isciiLang;
+ }
+ }
+
+ private static final LookupDataStruct [] lookupInitialData = {
+ new LookupDataStruct(UniLang.DEVALANGARI, MaskEnum.DEV_MASK, ISCIILang.DEV),
+ new LookupDataStruct(UniLang.BENGALI, MaskEnum.BNG_MASK, ISCIILang.BNG),
+ new LookupDataStruct(UniLang.GURMUKHI, MaskEnum.PNJ_MASK, ISCIILang.PNJ),
+ new LookupDataStruct(UniLang.GUJARATI, MaskEnum.GJR_MASK, ISCIILang.GJR),
+ new LookupDataStruct(UniLang.ORIYA, MaskEnum.ORI_MASK, ISCIILang.ORI),
+ new LookupDataStruct(UniLang.TAMIL, MaskEnum.TML_MASK, ISCIILang.TML),
+ new LookupDataStruct(UniLang.TELUGU, MaskEnum.KND_MASK, ISCIILang.TLG),
+ new LookupDataStruct(UniLang.KANNADA, MaskEnum.KND_MASK, ISCIILang.KND),
+ new LookupDataStruct(UniLang.MALAYALAM, MaskEnum.MLM_MASK, ISCIILang.MLM)
+ };
+
+ /*
+ * The values in validity table are indexed by the lower bits of Unicode
+ * range 0x0900 - 0x09ff. The values have a structure like:
+ * -----------------------------------------------------------------
+ * |DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML |
+ * | | | | | ASM | KND | | |
+ * -----------------------------------------------------------------
+ * If a code point is valid in a particular script
+ * then that bit is turned on
+ *
+ * Unicode does not distinguish between Bengali and Assamese aso we use 1 bit for
+ * to represent these languages
+ *
+ * Telugu and Kannda have same codepoints except for Vocallic_RR which we special case
+ * and combine and use 1 bit to represent these languages
+ */
+ private static final short validityTable[] = {
+ /* This state table is tool generated so please do not edit unless you know exactly what you are doing */
+ /* Note: This table was edited to mirror the Windows XP implementation */
+ /* ISCII: Valid: Unicode */
+ /* 0xa0: 0x00: 0x900 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xa1: 0xb8: 0x901 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xa2: 0xfe: 0x902 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa3: 0xbf: 0x903 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0x00: 0x00: 0x904 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xa4: 0xff: 0x905 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa5: 0xff: 0x906 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa6: 0xff: 0x907 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa7: 0xff: 0x908 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa8: 0xff: 0x909 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa9: 0xff: 0x90a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xaa: 0xfe: 0x90b */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x90c */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xae: 0x80: 0x90d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xab: 0x87: 0x90e */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xac: 0xff: 0x90f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xad: 0xff: 0x910 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb2: 0x80: 0x911 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xaf: 0x87: 0x912 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb0: 0xff: 0x913 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb1: 0xff: 0x914 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb3: 0xff: 0x915 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb4: 0xfe: 0x916 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xb5: 0xfe: 0x917 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xb6: 0xfe: 0x918 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xb7: 0xff: 0x919 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb8: 0xff: 0x91a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb9: 0xfe: 0x91b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xba: 0xff: 0x91c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xbb: 0xfe: 0x91d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xbc: 0xff: 0x91e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xbd: 0xff: 0x91f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xbe: 0xfe: 0x920 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xbf: 0xfe: 0x921 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xc0: 0xfe: 0x922 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xc1: 0xff: 0x923 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xc2: 0xff: 0x924 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xc3: 0xfe: 0x925 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xc4: 0xfe: 0x926 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xc5: 0xfe: 0x927 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xc6: 0xff: 0x928 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xc7: 0x81: 0x929 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.TML_MASK,
+ /* 0xc8: 0xff: 0x92a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xc9: 0xfe: 0x92b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xca: 0xfe: 0x92c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xcb: 0xfe: 0x92d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xcc: 0xfe: 0x92e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xcd: 0xff: 0x92f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xcf: 0xff: 0x930 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd0: 0x87: 0x931 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd1: 0xff: 0x932 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd2: 0xb7: 0x933 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd3: 0x83: 0x934 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd4: 0xff: 0x935 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd5: 0xfe: 0x936 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xd6: 0xbf: 0x937 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd7: 0xff: 0x938 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd8: 0xff: 0x939 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0x00: 0x00: 0x93a */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x93b */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xe9: 0xda: 0x93c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x93d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xda: 0xff: 0x93e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xdb: 0xff: 0x93f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xdc: 0xff: 0x940 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xdd: 0xff: 0x941 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xde: 0xff: 0x942 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xdf: 0xbe: 0x943 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x944 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xe3: 0x80: 0x945 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xe0: 0x87: 0x946 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe1: 0xff: 0x947 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe2: 0xff: 0x948 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe7: 0x80: 0x949 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xe4: 0x87: 0x94a */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe5: 0xff: 0x94b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe6: 0xff: 0x94c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe8: 0xff: 0x94d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xec: 0x00: 0x94e */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xed: 0x00: 0x94f */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x950 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x951 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x952 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x953 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x954 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x955 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x956 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x957 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x958 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x959 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x95a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x95b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x95c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x95d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x95e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xce: 0x98: 0x95f */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x960 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x961 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x962 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x963 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xea: 0xf8: 0x964 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xeaea: 0x00: 0x965 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xf1: 0xff: 0x966 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf2: 0xff: 0x967 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf3: 0xff: 0x968 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf4: 0xff: 0x969 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf5: 0xff: 0x96a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf6: 0xff: 0x96b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf7: 0xff: 0x96c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf8: 0xff: 0x96d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf9: 0xff: 0x96e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xfa: 0xff: 0x96f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0x00: 0x80: 0x970 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+
+ /*
+ * The length of the array is 128 to provide values for 0x900..0x97f.
+ * The last 15 entries for 0x971..0x97f of the table are all zero
+ * because no Indic script uses such Unicode code points.
+ */
+
+ /* 0x00: 0x00: 0x971 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x972 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x973 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x974 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x975 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x976 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x977 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x978 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x979 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97A */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97B */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97C */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97D */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97E */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97F */ MaskEnum.ZERO,
+ };
+
+ private static final char fromUnicodeTable[] = {
+ 0x00a0, /* 0x0900 */
+ 0x00a1, /* 0x0901 */
+ 0x00a2, /* 0x0902 */
+ 0x00a3, /* 0x0903 */
+ 0xa4e0, /* 0x0904 */
+ 0x00a4, /* 0x0905 */
+ 0x00a5, /* 0x0906 */
+ 0x00a6, /* 0x0907 */
+ 0x00a7, /* 0x0908 */
+ 0x00a8, /* 0x0909 */
+ 0x00a9, /* 0x090a */
+ 0x00aa, /* 0x090b */
+ 0xA6E9, /* 0x090c */
+ 0x00ae, /* 0x090d */
+ 0x00ab, /* 0x090e */
+ 0x00ac, /* 0x090f */
+ 0x00ad, /* 0x0910 */
+ 0x00b2, /* 0x0911 */
+ 0x00af, /* 0x0912 */
+ 0x00b0, /* 0x0913 */
+ 0x00b1, /* 0x0914 */
+ 0x00b3, /* 0x0915 */
+ 0x00b4, /* 0x0916 */
+ 0x00b5, /* 0x0917 */
+ 0x00b6, /* 0x0918 */
+ 0x00b7, /* 0x0919 */
+ 0x00b8, /* 0x091a */
+ 0x00b9, /* 0x091b */
+ 0x00ba, /* 0x091c */
+ 0x00bb, /* 0x091d */
+ 0x00bc, /* 0x091e */
+ 0x00bd, /* 0x091f */
+ 0x00be, /* 0x0920 */
+ 0x00bf, /* 0x0921 */
+ 0x00c0, /* 0x0922 */
+ 0x00c1, /* 0x0923 */
+ 0x00c2, /* 0x0924 */
+ 0x00c3, /* 0x0925 */
+ 0x00c4, /* 0x0926 */
+ 0x00c5, /* 0x0927 */
+ 0x00c6, /* 0x0928 */
+ 0x00c7, /* 0x0929 */
+ 0x00c8, /* 0x092a */
+ 0x00c9, /* 0x092b */
+ 0x00ca, /* 0x092c */
+ 0x00cb, /* 0x092d */
+ 0x00cc, /* 0x092e */
+ 0x00cd, /* 0x092f */
+ 0x00cf, /* 0x0930 */
+ 0x00d0, /* 0x0931 */
+ 0x00d1, /* 0x0932 */
+ 0x00d2, /* 0x0933 */
+ 0x00d3, /* 0x0934 */
+ 0x00d4, /* 0x0935 */
+ 0x00d5, /* 0x0936 */
+ 0x00d6, /* 0x0937 */
+ 0x00d7, /* 0x0938 */
+ 0x00d8, /* 0x0939 */
+ 0xFFFF, /* 0x093a */
+ 0xFFFF, /* 0x093b */
+ 0x00e9, /* 0x093c */
+ 0xEAE9, /* 0x093d */
+ 0x00da, /* 0x093e */
+ 0x00db, /* 0x093f */
+ 0x00dc, /* 0x0940 */
+ 0x00dd, /* 0x0941 */
+ 0x00de, /* 0x0942 */
+ 0x00df, /* 0x0943 */
+ 0xDFE9, /* 0x0944 */
+ 0x00e3, /* 0x0945 */
+ 0x00e0, /* 0x0946 */
+ 0x00e1, /* 0x0947 */
+ 0x00e2, /* 0x0948 */
+ 0x00e7, /* 0x0949 */
+ 0x00e4, /* 0x094a */
+ 0x00e5, /* 0x094b */
+ 0x00e6, /* 0x094c */
+ 0x00e8, /* 0x094d */
+ 0x00ec, /* 0x094e */
+ 0x00ed, /* 0x094f */
+ 0xA1E9, /* 0x0950 */ /* OM Symbol */
+ 0xFFFF, /* 0x0951 */
+ 0xF0B8, /* 0x0952 */
+ 0xFFFF, /* 0x0953 */
+ 0xFFFF, /* 0x0954 */
+ 0xFFFF, /* 0x0955 */
+ 0xFFFF, /* 0x0956 */
+ 0xFFFF, /* 0x0957 */
+ 0xb3e9, /* 0x0958 */
+ 0xb4e9, /* 0x0959 */
+ 0xb5e9, /* 0x095a */
+ 0xbae9, /* 0x095b */
+ 0xbfe9, /* 0x095c */
+ 0xC0E9, /* 0x095d */
+ 0xc9e9, /* 0x095e */
+ 0x00ce, /* 0x095f */
+ 0xAAe9, /* 0x0960 */
+ 0xA7E9, /* 0x0961 */
+ 0xDBE9, /* 0x0962 */
+ 0xDCE9, /* 0x0963 */
+ 0x00ea, /* 0x0964 */
+ 0xeaea, /* 0x0965 */
+ 0x00f1, /* 0x0966 */
+ 0x00f2, /* 0x0967 */
+ 0x00f3, /* 0x0968 */
+ 0x00f4, /* 0x0969 */
+ 0x00f5, /* 0x096a */
+ 0x00f6, /* 0x096b */
+ 0x00f7, /* 0x096c */
+ 0x00f8, /* 0x096d */
+ 0x00f9, /* 0x096e */
+ 0x00fa, /* 0x096f */
+ 0xF0BF, /* 0x0970 */
+ 0xFFFF, /* 0x0971 */
+ 0xFFFF, /* 0x0972 */
+ 0xFFFF, /* 0x0973 */
+ 0xFFFF, /* 0x0974 */
+ 0xFFFF, /* 0x0975 */
+ 0xFFFF, /* 0x0976 */
+ 0xFFFF, /* 0x0977 */
+ 0xFFFF, /* 0x0978 */
+ 0xFFFF, /* 0x0979 */
+ 0xFFFF, /* 0x097a */
+ 0xFFFF, /* 0x097b */
+ 0xFFFF, /* 0x097c */
+ 0xFFFF, /* 0x097d */
+ 0xFFFF, /* 0x097e */
+ 0xFFFF, /* 0x097f */
+ };
+ private static final char toUnicodeTable[] = {
+ 0x0000, /* 0x00 */
+ 0x0001, /* 0x01 */
+ 0x0002, /* 0x02 */
+ 0x0003, /* 0x03 */
+ 0x0004, /* 0x04 */
+ 0x0005, /* 0x05 */
+ 0x0006, /* 0x06 */
+ 0x0007, /* 0x07 */
+ 0x0008, /* 0x08 */
+ 0x0009, /* 0x09 */
+ 0x000a, /* 0x0a */
+ 0x000b, /* 0x0b */
+ 0x000c, /* 0x0c */
+ 0x000d, /* 0x0d */
+ 0x000e, /* 0x0e */
+ 0x000f, /* 0x0f */
+ 0x0010, /* 0x10 */
+ 0x0011, /* 0x11 */
+ 0x0012, /* 0x12 */
+ 0x0013, /* 0x13 */
+ 0x0014, /* 0x14 */
+ 0x0015, /* 0x15 */
+ 0x0016, /* 0x16 */
+ 0x0017, /* 0x17 */
+ 0x0018, /* 0x18 */
+ 0x0019, /* 0x19 */
+ 0x001a, /* 0x1a */
+ 0x001b, /* 0x1b */
+ 0x001c, /* 0x1c */
+ 0x001d, /* 0x1d */
+ 0x001e, /* 0x1e */
+ 0x001f, /* 0x1f */
+ 0x0020, /* 0x20 */
+ 0x0021, /* 0x21 */
+ 0x0022, /* 0x22 */
+ 0x0023, /* 0x23 */
+ 0x0024, /* 0x24 */
+ 0x0025, /* 0x25 */
+ 0x0026, /* 0x26 */
+ 0x0027, /* 0x27 */
+ 0x0028, /* 0x28 */
+ 0x0029, /* 0x29 */
+ 0x002a, /* 0x2a */
+ 0x002b, /* 0x2b */
+ 0x002c, /* 0x2c */
+ 0x002d, /* 0x2d */
+ 0x002e, /* 0x2e */
+ 0x002f, /* 0x2f */
+ 0x0030, /* 0x30 */
+ 0x0031, /* 0x31 */
+ 0x0032, /* 0x32 */
+ 0x0033, /* 0x33 */
+ 0x0034, /* 0x34 */
+ 0x0035, /* 0x35 */
+ 0x0036, /* 0x36 */
+ 0x0037, /* 0x37 */
+ 0x0038, /* 0x38 */
+ 0x0039, /* 0x39 */
+ 0x003A, /* 0x3A */
+ 0x003B, /* 0x3B */
+ 0x003c, /* 0x3c */
+ 0x003d, /* 0x3d */
+ 0x003e, /* 0x3e */
+ 0x003f, /* 0x3f */
+ 0x0040, /* 0x40 */
+ 0x0041, /* 0x41 */
+ 0x0042, /* 0x42 */
+ 0x0043, /* 0x43 */
+ 0x0044, /* 0x44 */
+ 0x0045, /* 0x45 */
+ 0x0046, /* 0x46 */
+ 0x0047, /* 0x47 */
+ 0x0048, /* 0x48 */
+ 0x0049, /* 0x49 */
+ 0x004a, /* 0x4a */
+ 0x004b, /* 0x4b */
+ 0x004c, /* 0x4c */
+ 0x004d, /* 0x4d */
+ 0x004e, /* 0x4e */
+ 0x004f, /* 0x4f */
+ 0x0050, /* 0x50 */
+ 0x0051, /* 0x51 */
+ 0x0052, /* 0x52 */
+ 0x0053, /* 0x53 */
+ 0x0054, /* 0x54 */
+ 0x0055, /* 0x55 */
+ 0x0056, /* 0x56 */
+ 0x0057, /* 0x57 */
+ 0x0058, /* 0x58 */
+ 0x0059, /* 0x59 */
+ 0x005a, /* 0x5a */
+ 0x005b, /* 0x5b */
+ 0x005c, /* 0x5c */
+ 0x005d, /* 0x5d */
+ 0x005e, /* 0x5e */
+ 0x005f, /* 0x5f */
+ 0x0060, /* 0x60 */
+ 0x0061, /* 0x61 */
+ 0x0062, /* 0x62 */
+ 0x0063, /* 0x63 */
+ 0x0064, /* 0x64 */
+ 0x0065, /* 0x65 */
+ 0x0066, /* 0x66 */
+ 0x0067, /* 0x67 */
+ 0x0068, /* 0x68 */
+ 0x0069, /* 0x69 */
+ 0x006a, /* 0x6a */
+ 0x006b, /* 0x6b */
+ 0x006c, /* 0x6c */
+ 0x006d, /* 0x6d */
+ 0x006e, /* 0x6e */
+ 0x006f, /* 0x6f */
+ 0x0070, /* 0x70 */
+ 0x0071, /* 0x71 */
+ 0x0072, /* 0x72 */
+ 0x0073, /* 0x73 */
+ 0x0074, /* 0x74 */
+ 0x0075, /* 0x75 */
+ 0x0076, /* 0x76 */
+ 0x0077, /* 0x77 */
+ 0x0078, /* 0x78 */
+ 0x0079, /* 0x79 */
+ 0x007a, /* 0x7a */
+ 0x007b, /* 0x7b */
+ 0x007c, /* 0x7c */
+ 0x007d, /* 0x7d */
+ 0x007e, /* 0x7e */
+ 0x007f, /* 0x7f */
+ 0x0080, /* 0x80 */
+ 0x0081, /* 0x81 */
+ 0x0082, /* 0x82 */
+ 0x0083, /* 0x83 */
+ 0x0084, /* 0x84 */
+ 0x0085, /* 0x85 */
+ 0x0086, /* 0x86 */
+ 0x0087, /* 0x87 */
+ 0x0088, /* 0x88 */
+ 0x0089, /* 0x89 */
+ 0x008a, /* 0x8a */
+ 0x008b, /* 0x8b */
+ 0x008c, /* 0x8c */
+ 0x008d, /* 0x8d */
+ 0x008e, /* 0x8e */
+ 0x008f, /* 0x8f */
+ 0x0090, /* 0x90 */
+ 0x0091, /* 0x91 */
+ 0x0092, /* 0x92 */
+ 0x0093, /* 0x93 */
+ 0x0094, /* 0x94 */
+ 0x0095, /* 0x95 */
+ 0x0096, /* 0x96 */
+ 0x0097, /* 0x97 */
+ 0x0098, /* 0x98 */
+ 0x0099, /* 0x99 */
+ 0x009a, /* 0x9a */
+ 0x009b, /* 0x9b */
+ 0x009c, /* 0x9c */
+ 0x009d, /* 0x9d */
+ 0x009e, /* 0x9e */
+ 0x009f, /* 0x9f */
+ 0x00A0, /* 0xa0 */
+ 0x0901, /* 0xa1 */
+ 0x0902, /* 0xa2 */
+ 0x0903, /* 0xa3 */
+ 0x0905, /* 0xa4 */
+ 0x0906, /* 0xa5 */
+ 0x0907, /* 0xa6 */
+ 0x0908, /* 0xa7 */
+ 0x0909, /* 0xa8 */
+ 0x090a, /* 0xa9 */
+ 0x090b, /* 0xaa */
+ 0x090e, /* 0xab */
+ 0x090f, /* 0xac */
+ 0x0910, /* 0xad */
+ 0x090d, /* 0xae */
+ 0x0912, /* 0xaf */
+ 0x0913, /* 0xb0 */
+ 0x0914, /* 0xb1 */
+ 0x0911, /* 0xb2 */
+ 0x0915, /* 0xb3 */
+ 0x0916, /* 0xb4 */
+ 0x0917, /* 0xb5 */
+ 0x0918, /* 0xb6 */
+ 0x0919, /* 0xb7 */
+ 0x091a, /* 0xb8 */
+ 0x091b, /* 0xb9 */
+ 0x091c, /* 0xba */
+ 0x091d, /* 0xbb */
+ 0x091e, /* 0xbc */
+ 0x091f, /* 0xbd */
+ 0x0920, /* 0xbe */
+ 0x0921, /* 0xbf */
+ 0x0922, /* 0xc0 */
+ 0x0923, /* 0xc1 */
+ 0x0924, /* 0xc2 */
+ 0x0925, /* 0xc3 */
+ 0x0926, /* 0xc4 */
+ 0x0927, /* 0xc5 */
+ 0x0928, /* 0xc6 */
+ 0x0929, /* 0xc7 */
+ 0x092a, /* 0xc8 */
+ 0x092b, /* 0xc9 */
+ 0x092c, /* 0xca */
+ 0x092d, /* 0xcb */
+ 0x092e, /* 0xcc */
+ 0x092f, /* 0xcd */
+ 0x095f, /* 0xce */
+ 0x0930, /* 0xcf */
+ 0x0931, /* 0xd0 */
+ 0x0932, /* 0xd1 */
+ 0x0933, /* 0xd2 */
+ 0x0934, /* 0xd3 */
+ 0x0935, /* 0xd4 */
+ 0x0936, /* 0xd5 */
+ 0x0937, /* 0xd6 */
+ 0x0938, /* 0xd7 */
+ 0x0939, /* 0xd8 */
+ 0x200D, /* 0xd9 */
+ 0x093e, /* 0xda */
+ 0x093f, /* 0xdb */
+ 0x0940, /* 0xdc */
+ 0x0941, /* 0xdd */
+ 0x0942, /* 0xde */
+ 0x0943, /* 0xdf */
+ 0x0946, /* 0xe0 */
+ 0x0947, /* 0xe1 */
+ 0x0948, /* 0xe2 */
+ 0x0945, /* 0xe3 */
+ 0x094a, /* 0xe4 */
+ 0x094b, /* 0xe5 */
+ 0x094c, /* 0xe6 */
+ 0x0949, /* 0xe7 */
+ 0x094d, /* 0xe8 */
+ 0x093c, /* 0xe9 */
+ 0x0964, /* 0xea */
+ 0xFFFF, /* 0xeb */
+ 0xFFFF, /* 0xec */
+ 0xFFFF, /* 0xed */
+ 0xFFFF, /* 0xee */
+ 0xFFFF, /* 0xef */
+ 0xFFFF, /* 0xf0 */
+ 0x0966, /* 0xf1 */
+ 0x0967, /* 0xf2 */
+ 0x0968, /* 0xf3 */
+ 0x0969, /* 0xf4 */
+ 0x096a, /* 0xf5 */
+ 0x096b, /* 0xf6 */
+ 0x096c, /* 0xf7 */
+ 0x096d, /* 0xf8 */
+ 0x096e, /* 0xf9 */
+ 0x096f, /* 0xfa */
+ 0xFFFF, /* 0xfb */
+ 0xFFFF, /* 0xfc */
+ 0xFFFF, /* 0xfd */
+ 0xFFFF, /* 0xfe */
+ 0xFFFF, /* 0xff */
+ };
+ private static final char nuktaSpecialCases[][] = {
+ { 16 /* length of array */ , 0 },
+ { 0xa6, 0x090c },
+ { 0xea, 0x093d },
+ { 0xdf, 0x0944 },
+ { 0xa1, 0x0950 },
+ { 0xb3, 0x0958 },
+ { 0xb4, 0x0959 },
+ { 0xb5, 0x095a },
+ { 0xba, 0x095b },
+ { 0xbf, 0x095c },
+ { 0xc0, 0x095d },
+ { 0xc9, 0x095e },
+ { 0xaa, 0x0960 },
+ { 0xa7, 0x0961 },
+ { 0xdb, 0x0962 },
+ { 0xdc, 0x0963 }
+ };
+ private static final char vowelSignESpecialCases[][] = {
+ { 2 /* length of array */ , 0 },
+ { 0xA4, 0x0904 }
+ };
+
+ private static final short lookupTable[][] = {
+ { MaskEnum.ZERO, MaskEnum.ZERO }, /* DEFAULT */
+ { MaskEnum.ZERO, MaskEnum.ZERO }, /* ROMAN */
+ { UniLang.DEVALANGARI, MaskEnum.DEV_MASK },
+ { UniLang.BENGALI, MaskEnum.BNG_MASK },
+ { UniLang.TAMIL, MaskEnum.TML_MASK },
+ { UniLang.TELUGU, MaskEnum.KND_MASK },
+ { UniLang.BENGALI, MaskEnum.BNG_MASK },
+ { UniLang.ORIYA, MaskEnum.ORI_MASK },
+ { UniLang.KANNADA, MaskEnum.KND_MASK },
+ { UniLang.MALAYALAM, MaskEnum.MLM_MASK },
+ { UniLang.GUJARATI, MaskEnum.GJR_MASK },
+ { UniLang.GURMUKHI, MaskEnum.PNJ_MASK }
+ };
+
+ private UConverterDataISCII extraInfo = null;
+ protected byte[] fromUSubstitution = new byte[]{(byte)0x1A};
+
+ public CharsetISCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = 4;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ //get the version number of the ISCII converter
+ int option = Integer.parseInt(icuCanonicalName.substring(14));
+
+ extraInfo = new UConverterDataISCII(
+ option,
+ new String(ISCII_CNV_PREFIX + (option & UCNV_OPTIONS_VERSION_MASK)) /* name */
+ );
+
+ initializePNJSets();
+ }
+
+ /* Initialize the two UnicodeSets use for proper Gurmukhi conversion if they have not already been created. */
+ private void initializePNJSets() {
+ if (PNJ_BINDI_TIPPI_SET != null && PNJ_CONSONANT_SET != null) {
+ return;
+ }
+ PNJ_BINDI_TIPPI_SET = new UnicodeSet();
+ PNJ_CONSONANT_SET = new UnicodeSet();
+
+ PNJ_CONSONANT_SET.add(0x0a15, 0x0a28);
+ PNJ_CONSONANT_SET.add(0x0a2a, 0x0a30);
+ PNJ_CONSONANT_SET.add(0x0a35, 0x0a36);
+ PNJ_CONSONANT_SET.add(0x0a38, 0x0a39);
+
+ PNJ_BINDI_TIPPI_SET.addAll(PNJ_CONSONANT_SET);
+ PNJ_BINDI_TIPPI_SET.add(0x0a05);
+ PNJ_BINDI_TIPPI_SET.add(0x0a07);
+
+ PNJ_BINDI_TIPPI_SET.add(0x0a41, 0x0a42);
+ PNJ_BINDI_TIPPI_SET.add(0x0a3f);
+
+ PNJ_CONSONANT_SET.compact();
+ PNJ_BINDI_TIPPI_SET.compact();
+ }
+
+ /*
+ * Rules for ISCII to Unicode converter
+ * ISCII is a stateful encoding. To convert ISCII bytes to Unicode,
+ * which is both precomposed and decomposed from characters
+ * pre-context and post-context need to be considered.
+ *
+ * Post context
+ * i) ATR : Attribute code is used to declare the font and script switching.
+ * Currently we only switch scripts and font codes consumed without generating an error
+ * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
+ * obsolete characters
+ * Pre context
+ * i) Halant: if preceeded by a halant then it is a explicit halant
+ * ii) Nukta:
+ * a) if preceeded by a halant then it is a soft halant
+ * b) if preceeded by specific consonants and the ligatures have pre-composed
+ * characters in Unicode then convert to pre-composed characters
+ * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
+ */
+ class CharsetDecoderISCII extends CharsetDecoderICU {
+ public CharsetDecoderISCII(CharsetICU cs) {
+ super(cs);
+ implReset();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ this.toUnicodeStatus = 0xFFFF;
+ extraInfo.initialize();
+ }
+
+ @SuppressWarnings("fallthrough")
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult cr = CoderResult.UNDERFLOW;
+ int targetUniChar = 0x0000;
+ short sourceChar = 0x0000;
+ UConverterDataISCII data;
+ boolean gotoCallBack = false;
+ int offset = 0;
+
+ data = extraInfo;
+ //data.contextCharToUnicode; /* contains previous ISCII codepoint visited */
+ //this.toUnicodeStatus; /* contains the mapping to Unicode of the above codepoint */
+
+ while (source.hasRemaining()) {
+ targetUniChar = UConverterConstants.missingCharMarker;
+
+ if (target.hasRemaining()) {
+ sourceChar = (short)((short)source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
+
+ /* look at the post-context perform special processing */
+ if (data.contextCharToUnicode == ATR) {
+ /* If we have ATR in data.contextCharToUnicode then we need to change our
+ * state to Indic Script specified by sourceChar
+ */
+ /* check if the sourceChar is supported script range */
+ if (((short)(ISCIILang.PNJ - sourceChar) & UConverterConstants.UNSIGNED_BYTE_MASK) <= (ISCIILang.PNJ - ISCIILang.DEV)) {
+ data.currentDeltaToUnicode = (short)(lookupTable[sourceChar & 0x0F][0] * UniLang.DELTA);
+ data.currentMaskToUnicode = lookupTable[sourceChar & 0x0F][1];
+ } else if (sourceChar == ISCIILang.DEF) {
+ /* switch back to default */
+ data.currentDeltaToUnicode = data.defDeltaToUnicode;
+ data.currentMaskToUnicode = data.defMaskToUnicode;
+ } else {
+ if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
+ /* these are display codes consume and continue */
+ } else {
+ cr = CoderResult.malformedForLength(1);
+ /* reset */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ gotoCallBack = true;
+ }
+ }
+ /* reset */
+ if (!gotoCallBack) {
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ continue;
+ }
+ } else if (data.contextCharToUnicode == EXT) {
+ /* check if sourceChar is in 0xA1 - 0xEE range */
+ if (((short)(EXT_RANGE_END - sourceChar) & UConverterConstants.UNSIGNED_BYTE_MASK) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
+ /* We currently support only Anudatta and Devanagari abbreviation sign */
+ if (sourceChar == 0xBF || sourceChar == 0xB8) {
+ targetUniChar = (sourceChar == 0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
+
+ /* find out if the mappling is valid in this state */
+ if ((validityTable[((short)targetUniChar) & UConverterConstants.UNSIGNED_BYTE_MASK] & data.currentMaskToUnicode) > 0) {
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+
+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+ if (data.prevToUnicodeStatus != 0) {
+ cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0);
+ data.prevToUnicodeStatus = 0x0000;
+ }
+ /* write to target */
+ cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode);
+
+ continue;
+ }
+ }
+ /* byte unit is unassigned */
+ targetUniChar = UConverterConstants.missingCharMarker;
+ cr = CoderResult.unmappableForLength(1);
+ } else {
+ /* only 0xA1 - 0xEE are legal after EXT char */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ cr = CoderResult.malformedForLength(1);
+ }
+ gotoCallBack = true;
+ } else if (data.contextCharToUnicode == ISCII_INV) {
+ if (sourceChar == ISCII_HALANT) {
+ targetUniChar = 0x0020; /* replace with space according to Indic FAQ */
+ } else {
+ targetUniChar = ZWJ;
+ }
+
+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+ if (data.prevToUnicodeStatus != 0) {
+ cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0);
+ data.prevToUnicodeStatus = 0x0000;
+ }
+
+ /* write to target */
+ cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode);
+ /* reset */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ }
+
+ /* look at the pre-context and perform special processing */
+ if (!gotoCallBack) {
+ switch (sourceChar) {
+ case ISCII_INV:
+ case EXT: /* falls through */
+ case ATR:
+ data.contextCharToUnicode = (char)sourceChar;
+
+ if (this.toUnicodeStatus != UConverterConstants.missingCharMarker) {
+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+ if (data.prevToUnicodeStatus != 0) {
+ cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0);
+ data.prevToUnicodeStatus = 0x0000;
+ }
+ cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode);
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ }
+ continue;
+ case ISCII_DANDA:
+ /* handle double danda */
+ if (data.contextCharToUnicode == ISCII_DANDA) {
+ targetUniChar = DOUBLE_DANDA;
+ /* clear the context */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ } else {
+ targetUniChar = GetMapping(sourceChar, targetUniChar, data);
+ data.contextCharToUnicode = (char)sourceChar;
+ }
+ break;
+ case ISCII_HALANT:
+ /* handle explicit halant */
+ if (data.contextCharToUnicode == ISCII_HALANT) {
+ targetUniChar = ZWNJ;
+ /* clear context */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ } else {
+ targetUniChar = GetMapping(sourceChar, targetUniChar, data);
+ data.contextCharToUnicode = (char)sourceChar;
+ }
+ break;
+ case 0x0A:
+ /* fall through */
+ case 0x0D:
+ data.resetToDefaultToUnicode = true;
+ targetUniChar = GetMapping(sourceChar, targetUniChar, data);
+ data.contextCharToUnicode = (char)sourceChar;
+ break;
+ case ISCII_VOWEL_SIGN_E:
+ /* find + SIGN_VOWEL_E special mapping */
+ int n = 1;
+ boolean find = false;
+ for (; n < vowelSignESpecialCases[0][0]; n++) {
+ if (vowelSignESpecialCases[n][0] == ((short)data.contextCharToUnicode & UConverterConstants.UNSIGNED_BYTE_MASK)) {
+ targetUniChar = vowelSignESpecialCases[n][1];
+ find = true;
+ break;
+ }
+ }
+ if (find) {
+ /* find out if the mapping is valid in this state */
+ if ((validityTable[(byte)targetUniChar] & data.currentMaskFromUnicode) > 0) {
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ break;
+ }
+ }
+ targetUniChar = GetMapping(sourceChar, targetUniChar, data);
+ data.contextCharToUnicode = (char)sourceChar;
+ break;
+ case ISCII_NUKTA:
+ /* handle soft halant */
+ if (data.contextCharToUnicode == ISCII_HALANT) {
+ targetUniChar = ZWJ;
+ /* clear the context */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ break;
+ } else if (data.currentDeltaToUnicode == PNJ_DELTA && data.contextCharToUnicode == 0xc0) {
+ /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
+ * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
+ * WriteToTargetToU is given 0x095c instead of 0xa5c because that method will automatically
+ * convert the code point given based on the delta provided.
+ */
+ cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_RRA, (short)0);
+ if (!cr.isOverflow()) {
+ cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_SIGN_VIRAMA, (short)0);
+ if (!cr.isOverflow()) {
+ cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_HA, (short)0);
+ } else {
+ this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_HA;
+ }
+ } else {
+ this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_SIGN_VIRAMA;
+ this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_HA;
+ }
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ if (!cr.isError()) {
+ continue;
+ }
+ break;
+ } else {
+ /* try to handle + ISCII_NUKTA special mappings */
+ int i = 1;
+ boolean found = false;
+ for (; i < nuktaSpecialCases[0][0]; i++) {
+ if (nuktaSpecialCases[i][0] == ((short)data.contextCharToUnicode & UConverterConstants.UNSIGNED_BYTE_MASK)) {
+ targetUniChar = nuktaSpecialCases[i][1];
+ found = true;
+ break;
+ }
+ }
+ if (found) {
+ /* find out if the mapping is valid in this state */
+ if ((validityTable[(byte)targetUniChar] & data.currentMaskToUnicode) > 0) {
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ if (data.currentDeltaToUnicode == PNJ_DELTA) {
+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+ if (data.prevToUnicodeStatus != 0) {
+ cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0);
+ data.prevToUnicodeStatus = 0x0000;
+ }
+ cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode);
+ continue;
+ }
+ break;
+ }
+ /* else fall through to default */
+ }
+ /* else fall through to default */
+ }
+
+ default:
+ targetUniChar = GetMapping(sourceChar, targetUniChar, data);
+ data.contextCharToUnicode = (char)sourceChar;
+ break;
+ } //end of switch
+ }//end of CallBack if statement
+
+ if (!gotoCallBack && this.toUnicodeStatus != UConverterConstants.missingCharMarker) {
+ /* Check to make sure that consonant clusters are handled correctly for Gurmukhi script. */
+ if (data.currentDeltaToUnicode == PNJ_DELTA && data.prevToUnicodeStatus != 0 && PNJ_CONSONANT_SET.contains(data.prevToUnicodeStatus) &&
+ (this.toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data.prevToUnicodeStatus) {
+ if (offsets != null) {
+ offset = source.position() - 3;
+ }
+ cr = WriteToTargetToU(offsets, offset, source, target, PNJ_ADHAK, (short)0);
+ cr = WriteToTargetToU(offsets, offset, source, target, data.prevToUnicodeStatus, (short)0);
+ data.prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
+ toUnicodeStatus = UConverterConstants.missingCharMarker;
+ continue;
+ } else {
+ /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+ if (data.prevToUnicodeStatus != 0) {
+ cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0);
+ data.prevToUnicodeStatus = 0x0000;
+ }
+ /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
+ * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
+ */
+ if (data.currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && PNJ_BINDI_TIPPI_SET.contains(this.toUnicodeStatus + PNJ_DELTA)) {
+ targetUniChar = PNJ_TIPPI - PNJ_DELTA;
+ cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, PNJ_DELTA);
+ } else if (data.currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && PNJ_CONSONANT_SET.contains(this.toUnicodeStatus + PNJ_DELTA)) {
+ /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
+ data.prevToUnicodeStatus = this.toUnicodeStatus + PNJ_DELTA;
+ } else {
+ /* write the previously mapped codepoint */
+ cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode);
+ }
+ }
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ }
+
+ if (!gotoCallBack && targetUniChar != UConverterConstants.missingCharMarker) {
+ /* now save the targetUniChar for delayed write */
+ this.toUnicodeStatus = (char)targetUniChar;
+ if (data.resetToDefaultToUnicode) {
+ data.currentDeltaToUnicode = data.defDeltaToUnicode;
+ data.currentMaskToUnicode = data.defMaskToUnicode;
+ data.resetToDefaultToUnicode = false;
+ }
+ } else {
+ /* we reach here only if targetUniChar == missingCharMarker
+ * so assign codes to reason and err
+ */
+ if (!gotoCallBack) {
+ cr = CoderResult.unmappableForLength(1);
+ }
+//CallBack :
+ toUBytesArray[0] = (byte)sourceChar;
+ toULength = 1;
+ gotoCallBack = false;
+ break;
+ }
+ } else {
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+
+ } //end of while
+
+ if (cr.isUnderflow() && flush && !source.hasRemaining()) {
+ /*end of the input stream */
+ if (data.contextCharToUnicode == ATR || data.contextCharToUnicode == EXT || data.contextCharToUnicode == ISCII_INV) {
+ /* set toUBytes[] */
+ toUBytesArray[0] = (byte)data.contextCharToUnicode;
+ toULength = 1;
+
+ /* avoid looping on truncated sequences */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ } else {
+ toULength = 0;
+ }
+
+ if (this.toUnicodeStatus != UConverterConstants.missingCharMarker) {
+ /* output a remaining target character */
+ WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode);
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ }
+ }
+ return cr;
+ }
+
+ private CoderResult WriteToTargetToU(IntBuffer offsets, int offset, ByteBuffer source, CharBuffer target, int targetUniChar, short delta) {
+ CoderResult cr = CoderResult.UNDERFLOW;
+ /* add offset to current Indic Block */
+ if (targetUniChar > ASCII_END &&
+ targetUniChar != ZWJ &&
+ targetUniChar != ZWNJ &&
+ targetUniChar != DANDA &&
+ targetUniChar != DOUBLE_DANDA) {
+ targetUniChar += delta;
+ }
+
+ /* now write the targetUniChar */
+ if (target.hasRemaining()) {
+ target.put((char)targetUniChar);
+ if (offsets != null) {
+ offsets.put(offset);
+ }
+ } else {
+ charErrorBufferArray[charErrorBufferLength++] = (char)targetUniChar;
+ cr = CoderResult.OVERFLOW;
+ }
+ return cr;
+ }
+
+ private int GetMapping(short sourceChar, int targetUniChar, UConverterDataISCII data) {
+ targetUniChar = toUnicodeTable[sourceChar];
+ /* is the code point valid in current script? */
+ if (sourceChar > ASCII_END &&
+ (validityTable[(short)targetUniChar & UConverterConstants.UNSIGNED_BYTE_MASK] & data.currentMaskToUnicode) == 0) {
+ /* Vocallic RR is assigne in ISCII Telugu and Unicode */
+ if (data.currentDeltaToUnicode != (TELUGU_DELTA) || targetUniChar != VOCALLIC_RR) {
+ targetUniChar = UConverterConstants.missingCharMarker;
+ }
+ }
+ return targetUniChar;
+ }
+ }
+
+ /*
+ * Rules:
+ * Explicit Halant :
+ * +
+ * Soft Halant :
+ * +
+ */
+ class CharsetEncoderISCII extends CharsetEncoderICU {
+ public CharsetEncoderISCII(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ extraInfo.initialize();
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ int targetByteUnit = 0x0000;
+ int sourceChar = 0x0000;
+ UConverterDataISCII converterData;
+ short newDelta = 0;
+ short range = 0;
+ boolean deltaChanged = false;
+ int tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ /* initialize data */
+ converterData = extraInfo;
+ newDelta = converterData.currentDeltaFromUnicode;
+ range = (short)(newDelta / UniLang.DELTA);
+
+ if ((sourceChar = fromUChar32) != 0) {
+ cr = handleSurrogates(source, (char) sourceChar);
+ return (cr != null) ? cr : CoderResult.unmappableForLength(2);
+ }
+
+ /* writing the char to the output stream */
+ while (source.hasRemaining()) {
+ if (!target.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+
+ /* Write the language code following LF only if LF is not the last character. */
+ if (fromUnicodeStatus == LF) {
+ targetByteUnit = ATR << 8;
+ targetByteUnit += (byte)lookupInitialData[range].isciiLang;
+ fromUnicodeStatus = 0x0000;
+ /* now append ATR and language code */
+ cr = WriteToTargetFromU(offsets, source, target, targetByteUnit);
+ if (cr.isOverflow()) {
+ break;
+ }
+ }
+
+ sourceChar = source.get();
+ tempContextFromUnicode = converterData.contextCharFromUnicode;
+
+ targetByteUnit = UConverterConstants.missingCharMarker;
+
+ /* check if input is in ASCII and C0 control codes range */
+ if (sourceChar <= ASCII_END) {
+ fromUnicodeStatus = sourceChar;
+ cr = WriteToTargetFromU(offsets, source, target, sourceChar);
+ if (cr.isOverflow()) {
+ break;
+ }
+ continue;
+ }
+
+ switch (sourceChar) {
+ case ZWNJ:
+ /* contextChar has HALANT */
+ if (converterData.contextCharFromUnicode != 0) {
+ converterData.contextCharFromUnicode = 0x00;
+ targetByteUnit = ISCII_HALANT;
+ } else {
+ /* consume ZWNJ and continue */
+ converterData.contextCharFromUnicode = 0x00;
+ continue;
+ }
+ break;
+ case ZWJ:
+ /* contextChar has HALANT */
+ if (converterData.contextCharFromUnicode != 0) {
+ targetByteUnit = ISCII_NUKTA;
+ } else {
+ targetByteUnit = ISCII_INV;
+ }
+ converterData.contextCharFromUnicode = 0x00;
+ break;
+ default:
+ /* is the sourceChar in the INDIC_RANGE? */
+ if((char)(INDIC_BLOCK_END - sourceChar) <= INDIC_RANGE) {
+ /* Danda and Doube Danda are valid in Northern scripts.. since Unicode
+ * does not include these codepoints in all Northern scripts we need to
+ * filter them out
+ */
+ if (sourceChar != DANDA && sourceChar != DOUBLE_DANDA) {
+ /* find out to which block the sourceChar belongs */
+ range = (short)((sourceChar - INDIC_BLOCK_BEGIN) / UniLang.DELTA);
+ newDelta = (short)(range * UniLang.DELTA);
+
+ /* Now are we in the same block as previous? */
+ if (newDelta != converterData.currentDeltaFromUnicode || converterData.isFirstBuffer) {
+ converterData.currentDeltaFromUnicode = newDelta;
+ converterData.currentMaskFromUnicode = lookupInitialData[range].maskEnum;
+ deltaChanged = true;
+ converterData.isFirstBuffer = false;
+ }
+ if (converterData.currentDeltaFromUnicode == PNJ_DELTA) {
+ if (sourceChar == PNJ_TIPPI) {
+ /* Make sure Tippi is converterd to Bindi. */
+ sourceChar = PNJ_BINDI;
+ } else if (sourceChar == PNJ_ADHAK) {
+ /* This is for consonant cluster handling. */
+ converterData.contextCharFromUnicode = PNJ_ADHAK;
+ }
+ }
+ /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
+ /* now subtract the new delta from sourceChar */
+ sourceChar -= converterData.currentDeltaFromUnicode;
+ }
+ /* get the target byte unit */
+ targetByteUnit = fromUnicodeTable[(short)sourceChar & UConverterConstants.UNSIGNED_BYTE_MASK];
+
+ /* is the code point valid in current script? */
+ if ((validityTable[(short)sourceChar & UConverterConstants.UNSIGNED_BYTE_MASK] & converterData.currentMaskFromUnicode) == 0) {
+ /* Vocallic RR is assigned in ISCII Telugu and Unicode */
+ if (converterData.currentDeltaFromUnicode != (TELUGU_DELTA) || sourceChar != VOCALLIC_RR) {
+ targetByteUnit = UConverterConstants.missingCharMarker;
+ }
+ }
+
+ if (deltaChanged) {
+ /* we are in a script block which is different than
+ * previous sourceChar's script block write ATR and language codes
+ */
+ char temp = 0;
+ temp = (char)(ATR << 8);
+ temp += (char)(lookupInitialData[range].isciiLang & UConverterConstants.UNSIGNED_BYTE_MASK);
+ /* reset */
+ deltaChanged = false;
+ /* now append ATR and language code */
+ cr = WriteToTargetFromU(offsets, source, target, temp);
+ if (cr.isOverflow()) {
+ break;
+ }
+ }
+ if (converterData.currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
+ continue;
+ }
+ }
+ /* reset context char */
+ converterData.contextCharFromUnicode = 0x00;
+ break;
+ } //end of switch
+ if (converterData.currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && PNJ_CONSONANT_SET.contains(sourceChar + PNJ_DELTA)) {
+ /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
+ /* reset context char */
+ converterData.contextCharFromUnicode = 0x0000;
+ targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
+ /*write targetByteUnit to target */
+ cr = WriteToTargetFromU(offsets, source, target, targetByteUnit);
+ if (cr.isOverflow()) {
+ break;
+ }
+ } else if (targetByteUnit != UConverterConstants.missingCharMarker) {
+ if (targetByteUnit == ISCII_HALANT) {
+ converterData.contextCharFromUnicode = (char)targetByteUnit;
+ }
+ /*write targetByteUnit to target */
+ cr = WriteToTargetFromU(offsets, source, target, targetByteUnit);
+ if (cr.isOverflow()) {
+ break;
+ }
+ } else if (UTF16.isSurrogate((char)sourceChar)) {
+ cr = handleSurrogates(source, (char) sourceChar);
+ return (cr != null) ? cr : CoderResult.unmappableForLength(2);
+ } else {
+ return CoderResult.unmappableForLength(1);
+ }
+ } /* end of while */
+
+ /* save the state and return */
+ return cr;
+ }
+
+ private CoderResult WriteToTargetFromU(IntBuffer offsets, CharBuffer source, ByteBuffer target, int targetByteUnit) {
+ CoderResult cr = CoderResult.UNDERFLOW;
+ int offset = source.position() - 1;
+ /* write the targetUniChar to target */
+ if (target.hasRemaining()) {
+ if (targetByteUnit <= 0xFF) {
+ target.put((byte)targetByteUnit);
+ if (offsets != null) {
+ offsets.put(offset);
+ }
+ } else {
+ if (targetByteUnit > 0xFFFF) {
+ target.put((byte)(targetByteUnit >> 16));
+ if (offsets != null) {
+ --offset;
+ offsets.put(offset);
+ }
+ }
+ if (!target.hasRemaining()) {
+ errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 8);
+ errorBuffer[errorBufferLength++] = (byte)targetByteUnit;
+ cr = CoderResult.OVERFLOW;
+ return cr;
+ }
+ target.put((byte)(targetByteUnit >> 8));
+ if (offsets != null) {
+ offsets.put(offset);
+ }
+ if (target.hasRemaining()) {
+ target.put((byte)targetByteUnit);
+ if (offsets != null) {
+ offsets.put(offset);
+ }
+ } else {
+ errorBuffer[errorBufferLength++] = (byte)targetByteUnit;
+ cr = CoderResult.OVERFLOW;
+ }
+ }
+ } else {
+ if ((targetByteUnit > 0xFFFF)) {
+ errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 16);
+ } else if ((targetByteUnit & 0xFF00) > 0) {
+ errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 8);
+ }
+ errorBuffer[errorBufferLength++] = (byte)(targetByteUnit);
+ cr = CoderResult.OVERFLOW;
+ }
+ return cr;
+ }
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderISCII(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderISCII(this);
+ }
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ int idx,script;
+ char mask;
+
+ setFillIn.add(0,ASCII_END );
+ for(script = UniLang.DEVALANGARI ; script<= UniLang.MALAYALAM ;script++){
+ mask = (char)lookupInitialData[script].maskEnum;
+ for(idx=0; idx < UniLang.DELTA ; idx++){
+ // Special check for telugu character
+ if((validityTable[idx] & mask)!=0 || (script == UniLang.TELUGU && idx==0x31)){
+ setFillIn.add(idx+(script*UniLang.DELTA)+INDIC_BLOCK_BEGIN );
+ }
+ }
+ }
+ setFillIn.add(DANDA);
+ setFillIn.add(DOUBLE_DANDA);
+ setFillIn.add(ZWNJ);
+ setFillIn.add(ZWJ);
+
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java
new file mode 100644
index 00000000000..a48fed00121
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java
@@ -0,0 +1,2998 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.util.Arrays;
+
+import com.ibm.icu.charset.CharsetMBCS.CharsetDecoderMBCS;
+import com.ibm.icu.charset.CharsetMBCS.CharsetEncoderMBCS;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+class CharsetISO2022 extends CharsetICU {
+ private UConverterDataISO2022 myConverterData;
+ private int variant; // one of enum {ISO_2022_JP, ISO_2022_KR, or ISO_2022_CN}
+
+ private static final byte[] SHIFT_IN_STR = { 0x0f };
+// private static final byte[] SHIFT_OUT_STR = { 0x0e };
+
+ private static final byte CR = 0x0D;
+ private static final byte LF = 0x0A;
+/*
+ private static final byte H_TAB = 0x09;
+ private static final byte SPACE = 0x20;
+*/
+ private static final char HWKANA_START = 0xff61;
+ private static final char HWKANA_END = 0xff9f;
+
+ /*
+ * 94-character sets with native byte values A1..FE are encoded in ISO 2022
+ * as bytes 21..7E. (Subtract 0x80.)
+ * 96-character sets with native bit values A0..FF are encoded in ISO 2022
+ * as bytes 20..7F. (Subtract 0x80.)
+ * Do not encode C1 control codes with native bytes 80..9F
+ * as bytes 00..1F (C0 control codes).
+ */
+/*
+ private static final char GR94_START = 0xa1;
+ private static final char GR94_END = 0xfe;
+*/
+ private static final char GR96_START = 0xa0;
+ private static final char GR96_END = 0xff;
+
+ /* for ISO-2022-JP and -CN implementations */
+ // typedef enum {
+ /* shared values */
+ private static final byte INVALID_STATE = -1;
+ private static final byte ASCII = 0;
+
+ private static final byte SS2_STATE = 0x10;
+ private static final byte SS3_STATE = 0x11;
+
+ /* JP */
+ private static final byte ISO8859_1 = 1;
+ private static final byte ISO8859_7 = 2;
+ private static final byte JISX201 = 3;
+ private static final byte JISX208 = 4;
+ private static final byte JISX212 = 5;
+ private static final byte GB2312 = 6;
+ private static final byte KSC5601 = 7;
+ private static final byte HWKANA_7BIT = 8; /* Halfwidth Katakana 7 bit */
+
+ /* CN */
+ /* the first few enum constants must keep their values because they corresponds to myConverterArray[] */
+ private static final byte GB2312_1 = 1;
+ private static final byte ISO_IR_165= 2;
+ private static final byte CNS_11643 = 3;
+
+ /*
+ * these are used in StateEnum and ISO2022State variables,
+ * but CNS_11643 must be used to index into myConverterArray[]
+ */
+ private static final byte CNS_11643_0 = 0x20;
+ private static final byte CNS_11643_1 = 0x21;
+ private static final byte CNS_11643_2 = 0x22;
+ private static final byte CNS_11643_3 = 0x23;
+ private static final byte CNS_11643_4 = 0x24;
+ private static final byte CNS_11643_5 = 0x25;
+ private static final byte CNS_11643_6 = 0x26;
+ private static final byte CNS_11643_7 = 0x27;
+ // } StateEnum;
+
+
+ public CharsetISO2022(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+
+ myConverterData = new UConverterDataISO2022();
+
+ int versionIndex = icuCanonicalName.indexOf("version=");
+ int version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue();
+
+ myConverterData.version = version;
+
+ if (icuCanonicalName.indexOf("locale=ja") > 0) {
+ ISO2022InitJP(version);
+ } else if (icuCanonicalName.indexOf("locale=zh") > 0) {
+ ISO2022InitCN(version);
+ } else /* if (icuCanonicalName.indexOf("locale=ko") > 0) */ {
+ ISO2022InitKR(version);
+ }
+
+ myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
+ myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
+ }
+
+ private void ISO2022InitJP(int version) {
+ variant = ISO_2022_JP;
+
+ maxBytesPerChar = 6;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ // open the required converters and cache them
+ if((jpCharsetMasks[version]&CSM(ISO8859_7)) != 0) {
+ myConverterData.myConverterArray[ISO8859_7] = ((CharsetMBCS)CharsetICU.forNameICU("ISO8859_7")).sharedData;
+ }
+ // myConverterData.myConverterArray[JISX201] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-201")).sharedData;
+ myConverterData.myConverterArray[JISX208] = ((CharsetMBCS)CharsetICU.forNameICU("Shift-JIS")).sharedData;
+ if ((jpCharsetMasks[version]&CSM(JISX212)) != 0) {
+ myConverterData.myConverterArray[JISX212] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-212")).sharedData;
+ }
+ if ((jpCharsetMasks[version]&CSM(GB2312)) != 0) {
+ myConverterData.myConverterArray[GB2312] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;
+ }
+ if ((jpCharsetMasks[version]&CSM(KSC5601)) != 0) {
+ myConverterData.myConverterArray[KSC5601] = ((CharsetMBCS)CharsetICU.forNameICU("ksc_5601")).sharedData;
+ }
+
+ // create a generic CharsetMBCS object
+ myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
+ }
+
+ private void ISO2022InitCN(int version) {
+ variant = ISO_2022_CN;
+
+ maxBytesPerChar = 8;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ // open the required coverters and cache them.
+ myConverterData.myConverterArray[GB2312_1] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;
+ if (version == 1) {
+ myConverterData.myConverterArray[ISO_IR_165] = ((CharsetMBCS)CharsetICU.forNameICU("iso-ir-165")).sharedData;
+ }
+ myConverterData.myConverterArray[CNS_11643] = ((CharsetMBCS)CharsetICU.forNameICU("cns-11643-1992")).sharedData;
+
+ // create a generic CharsetMBCS object
+ myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
+ }
+
+ private void ISO2022InitKR(int version) {
+ variant = ISO_2022_KR;
+
+ maxBytesPerChar = 3;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+
+ if (version == 1) {
+ myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
+ myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];
+ } else {
+ myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("ibm-949");
+ }
+
+ myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
+ myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
+ }
+
+ /*
+ * ISO 2022 control codes must not be converted from Unicode
+ * because they would mess up the byte stream.
+ * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
+ * corresponding to SO, SI, and ESC.
+ */
+ private static boolean IS_2022_CONTROL(int c) {
+ return (c<0x20) && (((1<= 0xa1a1) &&
+ ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {
+ return (value - 0x8080); /* shift down to 21..7e byte range */
+ } else {
+ return 0; /* not valid for ISO 2022 */
+ }
+ }
+
+ /*
+ * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that.
+ *
+ * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
+ * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
+ * unchanged.
+ *
+ private static int _2022ToGR94DBCS(int value) {
+ int returnValue = value + 0x8080;
+
+ if ((returnValue <= 0xfefe && returnValue >= 0xa1a1) &&
+ ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {
+ return returnValue;
+ } else {
+ return value;
+ }
+ }*/
+
+ /* is the StateEnum charset value for a DBCS charset? */
+ private static boolean IS_JP_DBCS(byte cs) {
+ return ((JISX208 <= cs) && (cs <= KSC5601));
+ }
+
+ private static short CSM(short cs) {
+ return (short)(1<= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ return 0;
+ }
+ /* convert the Unicode code point in c into codepage bytes */
+ table = sharedData.mbcs.fromUnicodeTable;
+ /* get the byte for the output */
+ value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);
+ /* get the byte for the output */
+ retval[0] = value & 0xff;
+ if (value >= 0xf00) {
+ return 1; /* roundtrip */
+ } else if (useFallback ? value>=0x800 : value>=0xc00) {
+ return -1; /* fallback taken */
+ } else {
+ return 0; /* no mapping */
+ }
+ }
+
+ /*
+ * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
+ * to whether that charset is used in the corresponding version x of ISO_2022, locale=ja,version=x
+ *
+ * Note: The converter uses some leniency:
+ * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
+ * all versions, not just JIS7 and JIS8.
+ * - ICU does not distinguish between different version so of JIS X 0208.
+ */
+ private static final short jpCharsetMasks[] = {
+ (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)),
+ (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)),
+ (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),
+ (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),
+ (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7))
+ };
+
+/*
+ // typedef enum {
+ private static final byte ASCII1 = 0;
+ private static final byte LATIN1 = 1;
+ private static final byte SBCS = 2;
+ private static final byte DBCS = 3;
+ private static final byte MBCS = 4;
+ private static final byte HWKANA = 5;
+ // } Cnv2002Type;
+*/
+
+ private class ISO2022State {
+ private byte []cs; /* Charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
+ private byte g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
+ private byte prevG; /* g before single shift (SS2 or SS3) */
+
+ ISO2022State() {
+ cs = new byte[4];
+ }
+
+ void reset() {
+ Arrays.fill(cs, (byte)0);
+ g = 0;
+ prevG = 0;
+ }
+ }
+
+// private static final byte UCNV_OPTIONS_VERSION_MASK = 0xf;
+ private static final byte UCNV_2022_MAX_CONVERTERS = 10;
+
+ @SuppressWarnings("unused")
+ private class UConverterDataISO2022 {
+ UConverterSharedData []myConverterArray;
+ CharsetEncoderMBCS currentEncoder;
+ CharsetDecoderMBCS currentDecoder;
+ CharsetMBCS currentConverter;
+ int currentType; // Cnv2022Type;
+ ISO2022State toU2022State;
+ ISO2022State fromU2022State;
+ int key;
+ int version;
+ boolean isEmptySegment;
+
+ UConverterDataISO2022() {
+ myConverterArray = new UConverterSharedData[UCNV_2022_MAX_CONVERTERS];
+ toU2022State = new ISO2022State();
+ fromU2022State = new ISO2022State();
+ currentType = 0;
+ key = 0;
+ version = 0;
+ isEmptySegment = false;
+ }
+
+ void reset() {
+ toU2022State.reset();
+ fromU2022State.reset();
+ isEmptySegment = false;
+ }
+ }
+
+ private static final byte ESC_2022 = 0x1B; /* ESC */
+
+ // typedef enum {
+ private static final byte INVALID_2022 = -1; /* Doesn't correspond to a valid iso 2022 escape sequence */
+ private static final byte VALID_NON_TERMINAL_2022 = 0; /* so far corresponds to a valid iso 2022 escape sequence */
+ private static final byte VALID_TERMINAL_2022 = 1; /* corresponds to a valid iso 2022 escape sequence */
+ private static final byte VALID_MAYBE_TERMINAL_2022 = 2; /* so far matches one iso 2022 escape sequence, but by adding
+ more characters might match another escape sequence */
+ // } UCNV_TableStates_2022;
+
+ /*
+ * The way these state transition arrays work is:
+ * ex : ESC$B is the sequence for JISX208
+ * a) First Iteration: char is ESC
+ * i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
+ * int x = normalize_esq_chars_2022[27] which is equal to 1
+ * ii) Search for this value in escSeqStateTable_Key_2022[]
+ * value of x is stored at escSeqStateTable_Key_2022[0]
+ * iii) Save this index as offset
+ * iv) Get state of this sequence from escSeqStateTable_Value_2022[]
+ * escSeqStateTable_value_2022[offset], which is VALID_NON_TERMINAL_2022
+ * b) Switch on this state and continue to next char
+ * i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
+ * which is normalize_esq_chars_2022[36] == 4
+ * ii) x is currently 1(from above)
+ * x<<=5 -- x is now 32
+ * x+=normalize_esq_chars_2022[36]
+ * now x is 36
+ * iii) Search for this value in escSeqStateTable_Key_2022[]
+ * value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
+ * iv) Get state of this sequence from escSeqStateTable_Value_2022[]
+ * escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
+ * c) Switch on this state and continue to next char
+ * i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index
+ * ii) x is currently 36 (from above)
+ * x<<=5 -- x is now 1152
+ * x+= normalize_esq_chars_2022[66]
+ * now x is 1161
+ * iii) Search for this value in escSeqStateTable_Key_2022[]
+ * value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
+ * iv) Get state of this sequence from escSeqStateTable_Value_2022[1]
+ * escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
+ * v) Get the converter name from escSeqStateTable_Result_2022[21] which is JISX208
+ */
+ /* Below are the 3 arrays depicting a state transition table */
+ private static final byte normalize_esq_chars_2022[] = {
+ /* 0 1 2 3 4 5 6 7 8 9 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 4, 7, 29, 0,
+ 2, 24, 26, 27, 0, 3, 23, 6, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 5, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 25, 28,
+ 0, 0, 21, 0, 0, 0, 0, 0, 0, 0,
+ 22, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0
+ };
+
+ private static final short MAX_STATES_2022 = 74;
+ private static final int escSeqStateTable_Key_2022[/* MAX_STATES_2022 */] = {
+ /* 0 1 2 3 4 5 6 7 8 9 */
+ 1, 34, 36, 39, 55, 57, 60, 61, 1093, 1096,
+ 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106,
+ 1109, 1154, 1157, 1160, 1161, 1176, 1178, 1179, 1254, 1257,
+ 1768, 1773, 1957, 35105, 36933, 36936, 36937, 36938, 36939, 36940,
+ 36942, 36943, 36944, 36945, 36946, 36947, 36948, 37640, 37642, 37644,
+ 37646, 37711, 37744, 37745, 37746, 37747, 37748, 40133, 40136, 40138,
+ 40139, 40140, 40141, 1123363, 35947624, 35947625, 35947626, 35947627, 35947629, 35947630,
+ 35947631, 35947635, 35947636, 35947638
+ };
+
+ private static final byte escSeqStateTable_Value_2022[/* MAX_STATES_2022 */] = {
+ /* 0 1 2 3 4 */
+ VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_MAYBE_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022
+ };
+
+ /* Type def for refactoring changeState_2022 code */
+ // typedef enum {
+ private static final byte ISO_2022_JP = 1;
+ private static final byte ISO_2022_KR = 2;
+ private static final byte ISO_2022_CN = 3;
+ // } Variant2022;
+
+ /* const UConverterSharedData _ISO2022Data; */
+ //private UConverterSharedData _ISO2022JPData;
+ //private UConverterSharedData _ISO2022KRData;
+ //private UConverterSharedData _ISO2022CNData;
+
+ /******************** to unicode ********************/
+ /****************************************************
+ * Recognized escape sequenes are
+ * (B ASCII
+ * .A ISO-8859-1
+ * .F ISO-8859-7
+ * (J JISX-201
+ * (I JISX-201
+ * $B JISX-208
+ * $@ JISX-208
+ * $(D JISX-212
+ * $A GB2312
+ * $(C KSC5601
+ */
+ private final static byte nextStateToUnicodeJP[/* MAX_STATES_2022 */] = {
+ /* 0 1 2 3 4 5 6 7 8 9 */
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, SS2_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ ASCII, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, JISX201, HWKANA_7BIT, JISX201, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, JISX208, GB2312, JISX208, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ ISO8859_1, ISO8859_7, JISX208, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, KSC5601, JISX212, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE
+ };
+
+ private final static byte nextStateToUnicodeCN[/* MAX_STATES_2022 */] = {
+ /* 0 1 2 3 4 5 6 7 8 9 */
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, SS2_STATE, SS3_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, GB2312_1, INVALID_STATE, ISO_IR_165,
+ CNS_11643_1, CNS_11643_2, CNS_11643_3, CNS_11643_4, CNS_11643_5, CNS_11643_6, CNS_11643_7, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE
+ };
+
+ /* runs through a state machine to determine the escape sequence - codepage correspondence */
+ @SuppressWarnings("fallthrough")
+ private CoderResult changeState_2022(CharsetDecoderICU decoder, ByteBuffer source, int var) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ boolean DONE = false;
+ byte value;
+ int key[] = {myConverterData.key};
+ int offset[] = {0};
+ int initialToULength = decoder.toULength;
+ byte c;
+ int malformLength = 0;
+
+ value = VALID_NON_TERMINAL_2022;
+ while (source.hasRemaining()) {
+ c = source.get();
+ malformLength++;
+ decoder.toUBytesArray[decoder.toULength++] = c;
+ value = getKey_2022(c, key, offset);
+
+ switch(value) {
+
+ case VALID_NON_TERMINAL_2022:
+ /* continue with the loop */
+ break;
+
+ case VALID_TERMINAL_2022:
+ key[0] = 0;
+ DONE = true;
+ break;
+
+ case INVALID_2022:
+ DONE = true;
+ break;
+
+ case VALID_MAYBE_TERMINAL_2022:
+ /* not ISO_2022 itself, finish here */
+ value = VALID_TERMINAL_2022;
+ key[0] = 0;
+ DONE = true;
+ break;
+ }
+ if (DONE) {
+ break;
+ }
+ }
+// DONE:
+ myConverterData.key = key[0];
+
+ if (value == VALID_NON_TERMINAL_2022) {
+ /* indicate that the escape sequence is incomplete: key !=0 */
+ return err;
+ } else if (value == INVALID_2022) {
+ err = CoderResult.malformedForLength(malformLength);
+ } else /* value == VALID_TERMINAL_2022 */ {
+ switch (var) {
+ case ISO_2022_JP: {
+ byte tempState = nextStateToUnicodeJP[offset[0]];
+ switch (tempState) {
+ case INVALID_STATE:
+ err = CoderResult.malformedForLength(malformLength);
+ break;
+ case SS2_STATE:
+ if (myConverterData.toU2022State.cs[2] != 0) {
+ if (myConverterData.toU2022State.g < 2) {
+ myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
+ }
+ myConverterData.toU2022State.g = 2;
+ } else {
+ /* illegal to have SS2 before a matching designator */
+ err = CoderResult.malformedForLength(malformLength);
+ }
+ break;
+ /* case SS3_STATE: not used in ISO-2022-JP-x */
+ case ISO8859_1:
+ case ISO8859_7:
+ if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {
+ err = CoderResult.unmappableForLength(malformLength);
+ } else {
+ /* G2 charset for SS2 */
+ myConverterData.toU2022State.cs[2] = tempState;
+ }
+ break;
+ default:
+ if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {
+ err = CoderResult.unmappableForLength(source.position() - 1);
+ } else {
+ /* G0 charset */
+ myConverterData.toU2022State.cs[0] = tempState;
+ }
+ break;
+ } // end of switch
+ break;
+ }
+ case ISO_2022_CN: {
+ byte tempState = nextStateToUnicodeCN[offset[0]];
+ switch (tempState) {
+ case INVALID_STATE:
+ err = CoderResult.unmappableForLength(malformLength);
+ break;
+ case SS2_STATE:
+ if (myConverterData.toU2022State.cs[2] != 0) {
+ if (myConverterData.toU2022State.g < 2) {
+ myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
+ }
+ myConverterData.toU2022State.g = 2;
+ } else {
+ /* illegal to have SS2 before a matching designator */
+ err = CoderResult.malformedForLength(malformLength);
+ }
+ break;
+ case SS3_STATE:
+ if (myConverterData.toU2022State.cs[3] != 0) {
+ if (myConverterData.toU2022State.g < 2) {
+ myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
+ }
+ myConverterData.toU2022State.g = 3;
+ } else {
+ /* illegal to have SS3 before a matching designator */
+ err = CoderResult.malformedForLength(malformLength);
+ }
+ break;
+ case ISO_IR_165:
+ if (myConverterData.version == 0) {
+ err = CoderResult.unmappableForLength(malformLength);
+ break;
+ }
+ /* fall through */
+ case GB2312_1:
+ /* fall through */
+ case CNS_11643_1:
+ myConverterData.toU2022State.cs[1] = tempState;
+ break;
+ case CNS_11643_2:
+ myConverterData.toU2022State.cs[2] = tempState;
+ break;
+ default:
+ /* other CNS 11643 planes */
+ if (myConverterData.version == 0) {
+ err = CoderResult.unmappableForLength(source.position() - 1);
+ } else {
+ myConverterData.toU2022State.cs[3] = tempState;
+ }
+ break;
+ } //end of switch
+ }
+ break;
+ case ISO_2022_KR:
+ if (offset[0] == 0x30) {
+ /* nothing to be done, just accept this one escape sequence */
+ } else {
+ err = CoderResult.unmappableForLength(malformLength);
+ }
+ break;
+ default:
+ err = CoderResult.malformedForLength(malformLength);
+ break;
+ } // end of switch
+ }
+ if (!err.isError()) {
+ decoder.toULength = 0;
+ } else if (err.isMalformed()) {
+ if (decoder.toULength > 1) {
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte (ESC) in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequece before the first one of those.
+ * In escape sequences, all following bytes are "printable", that is,
+ * unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
+ * they are valid single/lead bytes.
+ * For simplicity, we always only report the initial ESC byte as the
+ * illegal sequence and back out all other bytes we looked at.
+ */
+ /* Back out some bytes. */
+ int backOutDistance = decoder.toULength - 1;
+ int bytesFromThisBuffer = decoder.toULength - initialToULength;
+ if (backOutDistance <= bytesFromThisBuffer) {
+ /* same as initialToULength<=1 */
+ source.position(source.position() - backOutDistance);
+ } else {
+ /* Back out bytes from the previous buffer: Need to replay them. */
+ decoder.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
+ /* same as -(initalToULength-1) */
+ /* preToULength is negative! */
+ for (int i = 0; i < -(decoder.preToULength); i++) {
+ decoder.preToUArray[i] = decoder.toUBytesArray[i+1];
+ }
+ source.position(source.position() - bytesFromThisBuffer);
+ }
+ decoder.toULength = 1;
+ }
+ }
+
+ return err;
+ }
+
+ private static byte getKey_2022(byte c, int[]key, int[]offset) {
+ int togo;
+ int low = 0;
+ int hi = MAX_STATES_2022;
+ int oldmid = 0;
+
+ togo = normalize_esq_chars_2022[(short)c&UConverterConstants.UNSIGNED_BYTE_MASK];
+
+ if (togo == 0) {
+ /* not a valid character anywhere in an escape sequence */
+ key[0] = 0;
+ offset[0] = 0;
+ return INVALID_2022;
+ }
+ togo = (key[0] << 5) + togo;
+
+ while (hi != low) { /* binary search */
+ int mid = (hi+low) >> 1; /* Finds median */
+
+ if (mid == oldmid) {
+ break;
+ }
+
+ if (escSeqStateTable_Key_2022[mid] > togo) {
+ hi = mid;
+ } else if (escSeqStateTable_Key_2022[mid] < togo) {
+ low = mid;
+ } else /* we found it */ {
+ key[0] = togo;
+ offset[0] = mid;
+ return escSeqStateTable_Value_2022[mid];
+ }
+ oldmid = mid;
+ }
+ return INVALID_2022;
+ }
+
+ /*
+ * To Unicode Callback helper function
+ */
+ private static CoderResult toUnicodeCallback(CharsetDecoderICU cnv, int sourceChar, int targetUniChar) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ if (sourceChar > 0xff) {
+ cnv.toUBytesArray[0] = (byte)(sourceChar>>8);
+ cnv.toUBytesArray[1] = (byte)sourceChar;
+ cnv.toULength = 2;
+ } else {
+ cnv.toUBytesArray[0] = (byte)sourceChar;
+ cnv.toULength = 1;
+ }
+
+ if (targetUniChar == (UConverterConstants.missingCharMarker-1/* 0xfffe */)) {
+ err = CoderResult.unmappableForLength(1);
+ } else {
+ err = CoderResult.malformedForLength(1);
+ }
+
+ return err;
+ }
+
+ /****************************ISO-2022-JP************************************/
+ private class CharsetDecoderISO2022JP extends CharsetDecoderICU {
+ public CharsetDecoderISO2022JP(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected void implReset() {
+ super.implReset();
+ myConverterData.reset();
+ }
+ /*
+ * Map 00..7F to Unicode according to JIS X 0201.
+ * */
+ private int jisx201ToU(int value) {
+ if (value < 0x5c) {
+ return value;
+ } else if (value == 0x5c) {
+ return 0xa5;
+ } else if (value == 0x7e) {
+ return 0x203e;
+ } else { /* value <= 0x7f */
+ return value;
+ }
+ }
+ /*
+ * Convert a pair of JIS X 208 21..7E bytes to Shift-JIS.
+ * If either byte is outside 21..7E make sure that the result is not valid
+ * for Shift-JIS so that the converter catches it.
+ * Some invalid byte values already turn into equally invalid Shift-JIS
+ * byte values and need not be tested explicitly.
+ */
+ private void _2022ToSJIS(char c1, char c2, byte []bytes) {
+ if ((c1&1) > 0) {
+ ++c1;
+ if (c2 <= 0x5f) {
+ c2 += 0x1f;
+ } else if (c2 <= 0x7e) {
+ c2 += 0x20;
+ } else {
+ c2 = 0; /* invalid */
+ }
+ } else {
+ if ((c2 >= 0x21) && (c2 <= 0x7e)) {
+ c2 += 0x7e;
+ } else {
+ c2 = 0; /* invalid */
+ }
+ }
+
+ c1 >>=1;
+ if (c1 <= 0x2f) {
+ c1 += 0x70;
+ } else if (c1 <= 0x3f) {
+ c1 += 0xb0;
+ } else {
+ c1 = 0; /* invalid */
+ }
+ bytes[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c1);
+ bytes[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c2);
+ }
+
+ @SuppressWarnings("fallthrough")
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ boolean gotoGetTrail = false;
+ boolean gotoEscape = false;
+ CoderResult err = CoderResult.UNDERFLOW;
+ byte []tempBuf = new byte[2];
+ int targetUniChar = 0x0000;
+ int mySourceChar = 0x0000;
+ int mySourceCharTemp = 0x0000; // use for getTrail label call.
+ byte cs; /* StateEnum */
+ byte csTemp= 0; // use for getTrail label call.
+
+ if (myConverterData.key != 0) {
+ /* continue with a partial escape sequence */
+ // goto escape;
+ gotoEscape = true;
+ } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
+ /* continue with a partial double-byte character */
+ mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
+ toULength = 0;
+ cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
+ // goto getTrailByte;
+ mySourceCharTemp = 0x99;
+ gotoGetTrail = true;
+ }
+
+ while (source.hasRemaining() || gotoEscape || gotoGetTrail) {
+ // This code is here for the goto escape label call above.
+ if (gotoEscape) {
+ mySourceCharTemp = ESC_2022;
+ }
+
+ targetUniChar = UConverterConstants.missingCharMarker;
+
+ if (gotoEscape || gotoGetTrail || target.hasRemaining()) {
+ if (!gotoEscape && !gotoGetTrail) {
+ mySourceChar = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;
+ mySourceCharTemp = mySourceChar;
+ }
+
+ switch (mySourceCharTemp) {
+ case UConverterConstants.SI:
+ if (myConverterData.version == 3) {
+ myConverterData.toU2022State.g = 0;
+ continue;
+ } else {
+ /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
+ myConverterData.isEmptySegment = false;
+ break;
+ }
+
+ case UConverterConstants.SO:
+ if (myConverterData.version == 3) {
+ /* JIS7: switch to G1 half-width Katakana */
+ myConverterData.toU2022State.cs[1] = HWKANA_7BIT;
+ myConverterData.toU2022State.g = 1;
+ continue;
+ } else {
+ /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
+ myConverterData.isEmptySegment = false; /* reset this, we have a different error */
+ break;
+ }
+
+ case ESC_2022:
+ if (!gotoEscape) {
+ source.position(source.position() - 1);
+ } else {
+ gotoEscape = false;
+ }
+// escape:
+ {
+ int mySourceBefore = source.position();
+ int toULengthBefore = this.toULength;
+
+ err = changeState_2022(this, source, variant);
+
+ /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
+ if(myConverterData.version == 0 && myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {
+ err = CoderResult.malformedForLength(source.position() - mySourceBefore);
+ this.toULength = toULengthBefore + (source.position() - mySourceBefore);
+ }
+ }
+
+ /* invalid or illegal escape sequence */
+ if(err.isError()){
+ myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */
+ return err;
+ }
+ /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
+ if(myConverterData.key == 0) {
+ myConverterData.isEmptySegment = true;
+ }
+
+ continue;
+ /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
+ case CR:
+ /* falls through */
+ case LF:
+ /* automatically reset to single-byte mode */
+ if (myConverterData.toU2022State.cs[0] != ASCII && myConverterData.toU2022State.cs[0] != JISX201) {
+ myConverterData.toU2022State.cs[0] = ASCII;
+ }
+ myConverterData.toU2022State.cs[2] = 0;
+ myConverterData.toU2022State.g = 0;
+ /* falls through */
+ default :
+ /* convert one or two bytes */
+ myConverterData.isEmptySegment = false;
+ cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
+ csTemp = cs;
+ if (gotoGetTrail) {
+ csTemp = (byte)0x99;
+ }
+ if (!gotoGetTrail && ((mySourceChar >= 0xa1) && (mySourceChar <= 0xdf) && myConverterData.version == 4 && !IS_JP_DBCS(cs))) {
+ /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
+ targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
+
+ /* return from a single-shift state to the previous one */
+ if (myConverterData.toU2022State.g >= 2) {
+ myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
+ }
+ } else {
+ switch(csTemp) {
+ case ASCII:
+ if (mySourceChar <= 0x7f) {
+ targetUniChar = mySourceChar;
+ }
+ break;
+ case ISO8859_1:
+ if (mySourceChar <= 0x7f) {
+ targetUniChar = mySourceChar + 0x80;
+ }
+ /* return from a single-shift state to the prevous one */
+ myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
+ break;
+ case ISO8859_7:
+ if (mySourceChar <= 0x7f) {
+ /* convert mySourceChar+0x80 to use a normal 8-bit table */
+ targetUniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(myConverterData.myConverterArray[cs].mbcs,
+ mySourceChar+0x80);
+ }
+ /* return from a single-shift state to the previous one */
+ myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
+ break;
+ case JISX201:
+ if (mySourceChar <= 0x7f) {
+ targetUniChar = jisx201ToU(mySourceChar);
+ }
+ break;
+ case HWKANA_7BIT:
+ if ((mySourceChar >= 0x21) && (mySourceChar <= 0x5f)) {
+ /* 7-bit halfwidth Katakana */
+ targetUniChar = mySourceChar + (HWKANA_START - 0x21);
+ break;
+ }
+ default :
+ /* G0 DBCS */
+ if (gotoGetTrail || source.hasRemaining()) {
+// getTrailByte:
+ int tmpSourceChar;
+ gotoGetTrail = false;
+ short trailByte;
+ boolean leadIsOk, trailIsOk;
+
+ trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ *
+ * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
+ * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+ * Otherwise we convert or report the pair of bytes.
+ */
+ leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
+ trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
+ if (leadIsOk && trailIsOk) {
+ source.get();
+ tmpSourceChar = (mySourceChar << 8) | trailByte;
+ if (cs == JISX208) {
+ _2022ToSJIS((char)mySourceChar, (char)trailByte, tempBuf);
+ mySourceChar = tmpSourceChar;
+ } else {
+ /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
+ mySourceChar = tmpSourceChar;
+ if (cs == KSC5601) {
+ tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
+ }
+ tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (tmpSourceChar >> 8));
+ tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & tmpSourceChar);
+ }
+ targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], ByteBuffer.wrap(tempBuf), false);
+ } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+ /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+ source.get();
+ /* add another bit so that the code below writes 2 bytes in case of error */
+ mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
+ }
+ } else {
+ toUBytesArray[0] = (byte)mySourceChar;
+ toULength = 1;
+ // goto endloop
+ return err;
+ }
+ } /* end of inner switch */
+ }
+ break;
+ } /* end of outer switch */
+
+ if (targetUniChar < (UConverterConstants.missingCharMarker-1/*0xfffe*/)) {
+ if (offsets != null) {
+ offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ target.put((char)targetUniChar);
+ } else if (targetUniChar > UConverterConstants.missingCharMarker) {
+ /* disassemble the surrogate pair and write to output */
+ targetUniChar -= 0x0010000;
+ target.put((char)(0xd800 + (char)(targetUniChar>>10)));
+ target.position(target.position()-1);
+ if (offsets != null) {
+ offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ target.get();
+ if (target.hasRemaining()) {
+ target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));
+ target.position(target.position()-1);
+ if (offsets != null) {
+ offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ target.get();
+ } else {
+ charErrorBufferArray[charErrorBufferLength++] =
+ (char)(0xdc00+(char)(targetUniChar&0x3ff));
+ }
+ } else {
+ /* Call the callback function */
+ err = toUnicodeCallback(this, mySourceChar, targetUniChar);
+ break;
+ }
+ } else { /* goes with "if (target.hasRemaining())" way up near the top of the function */
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+//endloop:
+ return err;
+ }
+ } // end of class CharsetDecoderISO2022JP
+
+ /****************************ISO-2022-CN************************************/
+ private class CharsetDecoderISO2022CN extends CharsetDecoderICU {
+ public CharsetDecoderISO2022CN(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected void implReset() {
+ super.implReset();
+ myConverterData.reset();
+ }
+
+ @SuppressWarnings("fallthrough")
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ byte[] tempBuf = new byte[3];
+ int targetUniChar = 0x0000;
+ int mySourceChar = 0x0000;
+ int mySourceCharTemp = 0x0000;
+ boolean gotoEscape = false;
+ boolean gotoGetTrailByte = false;
+
+ if (myConverterData.key != 0) {
+ /* continue with a partial escape sequence */
+ // goto escape;
+ gotoEscape = true;
+ } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
+ /* continue with a partial double-byte character */
+ mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
+ toULength = 0;
+ targetUniChar = UConverterConstants.missingCharMarker;
+ // goto getTrailByte
+ gotoGetTrailByte = true;
+ }
+
+ while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
+ targetUniChar = UConverterConstants.missingCharMarker;
+
+ if (target.hasRemaining() || gotoEscape) {
+ if (gotoEscape) {
+ mySourceChar = ESC_2022; // goto escape label
+ mySourceCharTemp = mySourceChar;
+ } else if (gotoGetTrailByte) {
+ mySourceCharTemp = 0xff; // goto getTrailByte; set mySourceCharTemp to go to default
+ } else {
+ mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get();
+ mySourceCharTemp = mySourceChar;
+ }
+
+ switch (mySourceCharTemp) {
+ case UConverterConstants.SI:
+ myConverterData.toU2022State.g = 0;
+ if (myConverterData.isEmptySegment) {
+ myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
+ err = CoderResult.malformedForLength(1);
+ this.toUBytesArray[0] = (byte)mySourceChar;
+ this.toULength = 1;
+ return err;
+ }
+ continue;
+
+ case UConverterConstants.SO:
+ if (myConverterData.toU2022State.cs[1] != 0) {
+ myConverterData.toU2022State.g = 1;
+ myConverterData.isEmptySegment = true; /* Begin a new segment, empty so far */
+ continue;
+ } else {
+ /* illegal to have SO before a matching designator */
+ myConverterData.isEmptySegment = false; /* Handling a different error, reset this to avoid future spurious errs */
+ break;
+ }
+
+ case ESC_2022:
+ if (!gotoEscape) {
+ source.position(source.position()-1);
+ }
+// escape label
+ gotoEscape = false;
+ {
+ int mySourceBefore = source.position();
+ int toULengthBefore = this.toULength;
+
+ err = changeState_2022(this, source, ISO_2022_CN);
+
+ /* After SO there must be at least one character before a designator (designator error handled separately) */
+ if(myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {
+ err = CoderResult.malformedForLength(source.position() - mySourceBefore);
+ this.toULength = toULengthBefore + (source.position() - mySourceBefore);
+ }
+ }
+
+ /* invalid or illegal escape sequence */
+ if(err.isError()){
+ myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */
+ return err;
+ }
+ continue;
+
+ /*ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
+ case CR:
+ /* falls through */
+ case LF:
+ myConverterData.toU2022State.reset();
+ /* falls through */
+ default:
+ /* converter one or two bytes */
+ myConverterData.isEmptySegment = false;
+ if (myConverterData.toU2022State.g != 0 || gotoGetTrailByte) {
+ if (source.hasRemaining() || gotoGetTrailByte) {
+ UConverterSharedData cnv;
+ byte tempState;
+ int tempBufLen;
+ boolean leadIsOk, trailIsOk;
+ short trailByte;
+// getTrailByte: label
+ gotoGetTrailByte = false; // reset gotoGetTrailByte
+
+ trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ *
+ * In ISO-2022 DBCS, if the second byte is in the range 21..7e range or is
+ * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+ * Otherwise we convert or report the pair of bytes.
+ */
+ leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
+ trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
+ if (leadIsOk && trailIsOk) {
+ source.get();
+ tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
+ if (tempState > CNS_11643_0) {
+ cnv = myConverterData.myConverterArray[CNS_11643];
+ tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0));
+ tempBuf[1] = (byte)mySourceChar;
+ tempBuf[2] = (byte)trailByte;
+ tempBufLen = 3;
+ } else {
+ cnv = myConverterData.myConverterArray[tempState];
+ tempBuf[0] = (byte)mySourceChar;
+ tempBuf[1] = (byte)trailByte;
+ tempBufLen = 2;
+ }
+ ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf);
+ tempBuffer.limit(tempBufLen);
+ targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);
+ mySourceChar = (mySourceChar << 8) | trailByte;
+
+ } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+ /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+ source.get();
+ /* add another bit so that the code below writes 2 bytes in case of error */
+ mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
+ }
+ if (myConverterData.toU2022State.g >= 2) {
+ /* return from a single-shift state to the previous one */
+ myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
+ }
+ } else {
+ toUBytesArray[0] = (byte)mySourceChar;
+ toULength = 1;
+ // goto endloop;
+ return err;
+ }
+ } else {
+ if (mySourceChar <= 0x7f) {
+ targetUniChar = (char)mySourceChar;
+ }
+ }
+ break;
+ }
+ if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) < (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker-1))) {
+ if (offsets != null) {
+ offsets.array()[target.position()] = source.remaining() - (mySourceChar <= 0xff ? 1 : 2);
+ }
+ target.put((char)targetUniChar);
+ } else if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) > (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker))) {
+ /* disassemble the surrogate pair and write to output */
+ targetUniChar -= 0x0010000;
+ target.put((char)(0xd800+(char)(targetUniChar>>10)));
+ if (offsets != null) {
+ offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
+ }
+ if (target.hasRemaining()) {
+ target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));
+ if (offsets != null) {
+ offsets.array()[target.position()-1] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
+ }
+ } else {
+ charErrorBufferArray[charErrorBufferLength++] = (char)(0xdc00+(char)(targetUniChar&0x3ff));
+ }
+ } else {
+ /* Call the callback function */
+ err = toUnicodeCallback(this, mySourceChar, targetUniChar);
+ break;
+ }
+
+ } else {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+
+ return err;
+ }
+
+ }
+ /************************ ISO-2022-KR ********************/
+ private class CharsetDecoderISO2022KR extends CharsetDecoderICU {
+ public CharsetDecoderISO2022KR(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected void implReset() {
+ super.implReset();
+ setInitialStateToUnicodeKR();
+ myConverterData.reset();
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ int mySourceChar = 0x0000;
+ int targetUniChar = 0x0000;
+ byte[] tempBuf = new byte[2];
+ boolean usingFallback;
+ boolean gotoGetTrailByte = false;
+ boolean gotoEscape = false;
+
+ if (myConverterData.version == 1) {
+ return decodeLoopIBM(myConverterData.currentDecoder, source, target, offsets, flush);
+ }
+
+ /* initialize state */
+ usingFallback = isFallbackUsed();
+
+ if (myConverterData.key != 0) {
+ /* continue with a partial escape sequence */
+ gotoEscape = true;
+ } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
+ /* continue with a partial double-byte character */
+ mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
+ toULength = 0;
+ gotoGetTrailByte = true;
+ }
+
+ while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
+ if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) {
+ if (!gotoGetTrailByte && !gotoEscape) {
+ mySourceChar = (char)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
+ }
+
+ if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) {
+ myConverterData.toU2022State.g = 0;
+ if (myConverterData.isEmptySegment) {
+ myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
+ err = CoderResult.malformedForLength(1);
+ this.toUBytesArray[0] = (byte)mySourceChar;
+ this.toULength = 1;
+ return err;
+ }
+ /* consume the source */
+ continue;
+ } else if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SO) {
+ myConverterData.toU2022State.g = 1;
+ myConverterData.isEmptySegment = true;
+ /* consume the source */
+ continue;
+ } else if (!gotoGetTrailByte && (gotoEscape || mySourceChar == ESC_2022)) {
+ if (!gotoEscape) {
+ source.position(source.position()-1);
+ }
+// escape label
+ gotoEscape = false; // reset gotoEscape flag
+ myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */
+ err = changeState_2022(this, source, ISO_2022_KR);
+ if (err.isError()) {
+ return err;
+ }
+ continue;
+ }
+ myConverterData.isEmptySegment = false; /* Any invalid char errors will be detected separately, so just reset this */
+ if (myConverterData.toU2022State.g == 1 || gotoGetTrailByte) {
+ if (source.hasRemaining() || gotoGetTrailByte) {
+ boolean leadIsOk, trailIsOk;
+ short trailByte;
+// getTrailByte label
+ gotoGetTrailByte = false; // reset gotoGetTrailByte flag
+
+ trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ targetUniChar = UConverterConstants.missingCharMarker;
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ *
+ * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
+ * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+ * Otherwise we convert or report the pair of bytes.
+ */
+ leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
+ trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
+ if (leadIsOk && trailIsOk) {
+ source.get();
+ tempBuf[0] = (byte)(mySourceChar + 0x80);
+ tempBuf[1] = (byte)(trailByte + 0x80);
+ targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, ByteBuffer.wrap(tempBuf), usingFallback);
+ mySourceChar = (char)((mySourceChar << 8) | trailByte);
+ } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+ /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+ source.get();
+ /* add another bit so that the code below writes 2 bytes in case of error */
+ mySourceChar = (char)(0x10000 | (mySourceChar << 8) | trailByte);
+ }
+ } else {
+ toUBytesArray[0] = (byte)mySourceChar;
+ toULength = 1;
+ break;
+ }
+ } else if (mySourceChar <= 0x7f) {
+ int savedSourceLimit = source.limit();
+ int savedSourcePosition = source.position();
+ source.limit(source.position());
+ source.position(source.position()-1);
+ targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback);
+ source.limit(savedSourceLimit);
+ source.position(savedSourcePosition);
+ } else {
+ targetUniChar = 0xffff;
+ }
+ if (targetUniChar < 0xfffe) {
+ target.put((char)targetUniChar);
+ if (offsets != null) {
+ offsets.array()[target.position()] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
+ }
+ } else {
+ /* Call the callback function */
+ err = toUnicodeCallback(this, mySourceChar, targetUniChar);
+ break;
+ }
+ } else {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+
+ return err;
+ }
+
+ protected CoderResult decodeLoopIBM(CharsetDecoderMBCS cnv, ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ int sourceStart;
+ int sourceLimit;
+ int argSource;
+ int argTarget;
+ boolean gotoEscape = false;
+ int oldSourceLimit;
+
+ /* remember the original start of the input for offsets */
+ sourceStart = argSource = source.position();
+
+ if (myConverterData.key != 0) {
+ /* continue with a partial escape sequence */
+ gotoEscape = true;
+ }
+
+ while (gotoEscape || (!err.isError() && source.hasRemaining())) {
+ if (!gotoEscape) {
+ /* Find the end of the buffer e.g : Next Escape Seq | end of Buffer */
+ int oldSourcePos = source.position();
+ sourceLimit = getEndOfBuffer_2022(source);
+ source.position(oldSourcePos);
+ if (source.position() != sourceLimit) {
+ /*
+ * get the current partial byte sequence
+ *
+ * it needs to be moved between the public and the subconverter
+ * so that the conversion frameword, which only sees the public
+ * converter, can handle truncated and illegal input etc.
+ */
+ if (toULength > 0) {
+ cnv.toUBytesArray = toUBytesArray.clone();
+ }
+ cnv.toULength = toULength;
+
+ /*
+ * Convert up to the end of the input, or to before the next escape character.
+ * Does not handle conversion extensions because the preToU[] state etc.
+ * is not copied.
+ */
+ argTarget = target.position();
+ oldSourceLimit = source.limit(); // save the old source limit change to new one
+ source.limit(sourceLimit);
+ err = myConverterData.currentDecoder.cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);
+ source.limit(oldSourceLimit); // restore source limit;
+ if (offsets != null && sourceStart != argSource) {
+ /* update offsets to base them on the actual start of the input */
+ int delta = argSource - sourceStart;
+ while (argTarget < target.position()) {
+ int currentOffset = offsets.get();
+ offsets.position(offsets.position()-1);
+ if (currentOffset >= 0) {
+ offsets.put(currentOffset + delta);
+ offsets.position(offsets.position()-1);
+ }
+ offsets.get();
+ target.get();
+ }
+ }
+ argSource = source.position();
+
+ /* copy input/error/overflow buffers */
+ if (cnv.toULength > 0) {
+ toUBytesArray = cnv.toUBytesArray.clone();
+ }
+ toULength = cnv.toULength;
+
+ if (err.isOverflow()) {
+ if (cnv.charErrorBufferLength > 0) {
+ charErrorBufferArray = cnv.charErrorBufferArray.clone();
+ }
+ charErrorBufferLength = cnv.charErrorBufferLength;
+ cnv.charErrorBufferLength = 0;
+ }
+ }
+
+ if (err.isError() || err.isOverflow() || (source.position() == source.limit())) {
+ return err;
+ }
+ }
+// escape label
+ gotoEscape = false;
+ err = changeState_2022(this, source, ISO_2022_KR);
+ }
+ return err;
+ }
+ }
+
+ /******************** from unicode **********************/
+ /* preference order of JP charsets */
+ private final static byte []jpCharsetPref = {
+ ASCII,
+ JISX201,
+ ISO8859_1,
+ ISO8859_7,
+ JISX208,
+ JISX212,
+ GB2312,
+ KSC5601,
+ HWKANA_7BIT
+ };
+ /*
+ * The escape sequences must be in order of the enum constants like JISX201 = 3,
+ * not in order of jpCharsetPref[]!
+ */
+ private final static byte [][]escSeqChars = {
+ { 0x1B, 0x28, 0x42}, /* (B ASCII */
+ { 0x1B, 0x2E, 0x41}, /* .A ISO-8859-1 */
+ { 0x1B, 0x2E, 0x46}, /* .F ISO-8859-7 */
+ { 0x1B, 0x28, 0x4A}, /* (J JISX-201 */
+ { 0x1B, 0x24, 0x42}, /* $B JISX-208 */
+ { 0x1B, 0x24, 0x28, 0x44}, /* $(D JISX-212 */
+ { 0x1B, 0x24, 0x41}, /* $A GB2312 */
+ { 0x1B, 0x24, 0x28, 0x43}, /* $(C KSC5601 */
+ { 0x1B, 0x28, 0x49} /* (I HWKANA_7BIT */
+ };
+ /*
+ * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
+ * Katakana.
+ * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
+ * because Shift-JIS roundtrips half-width Katakana to single bytes.
+ * These were the only fallbacks in ICU's jisx-208.ucm file.
+ */
+ private final static char []hwkana_fb = {
+ 0x2123, /* U+FF61 */
+ 0x2156,
+ 0x2157,
+ 0x2122,
+ 0x2126,
+ 0x2572,
+ 0x2521,
+ 0x2523,
+ 0x2525,
+ 0x2527,
+ 0x2529,
+ 0x2563,
+ 0x2565,
+ 0x2567,
+ 0x2543,
+ 0x213C, /* U+FF70 */
+ 0x2522,
+ 0x2524,
+ 0x2526,
+ 0x2528,
+ 0x252A,
+ 0x252B,
+ 0x252D,
+ 0x252F,
+ 0x2531,
+ 0x2533,
+ 0x2535,
+ 0x2537,
+ 0x2539,
+ 0x253B,
+ 0x253D,
+ 0x253F, /* U+FF80 */
+ 0x2541,
+ 0x2544,
+ 0x2546,
+ 0x2548,
+ 0x254A,
+ 0x254B,
+ 0x254C,
+ 0x254D,
+ 0x254E,
+ 0x254F,
+ 0x2552,
+ 0x2555,
+ 0x2558,
+ 0x255B,
+ 0x255E,
+ 0x255F, /* U+FF90 */
+ 0x2560,
+ 0x2561,
+ 0x2562,
+ 0x2564,
+ 0x2566,
+ 0x2568,
+ 0x2569,
+ 0x256A,
+ 0x256B,
+ 0x256C,
+ 0x256D,
+ 0x256F,
+ 0x2573,
+ 0x212B,
+ 0x212C /* U+FF9F */
+ };
+
+ protected byte [][]fromUSubstitutionChar = new byte[][]{ { (byte)0x1A }, { (byte)0x2F, (byte)0x7E} };
+ /****************************ISO-2022-JP************************************/
+ private class CharsetEncoderISO2022JP extends CharsetEncoderICU {
+ public CharsetEncoderISO2022JP(CharsetICU cs) {
+ super(cs, fromUSubstitutionChar[0]);
+ }
+
+ protected void implReset() {
+ super.implReset();
+ myConverterData.reset();
+ }
+ /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
+ private int jisx201FromU(int value) {
+ if (value <= 0x7f) {
+ if (value != 0x5c && value != 0x7e) {
+ return value;
+ }
+ } else if (value == 0xa5) {
+ return 0x5c;
+ } else if (value == 0x203e) {
+ return 0x7e;
+ }
+ return (int)(UConverterConstants.UNSIGNED_INT_MASK & 0xfffe);
+ }
+
+ /*
+ * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
+ * to JIS X 0208, and convert it to a pair of 21..7E bytes.
+ * Return 0 if the byte pair is out of range.
+ */
+ private int _2022FromSJIS(int value) {
+ short trail;
+
+ if (value > 0xEFFC) {
+ return 0; /* beyond JIS X 0208 */
+ }
+
+ trail = (short)(value & UConverterConstants.UNSIGNED_BYTE_MASK);
+
+ value &= 0xff00; /* lead byte */
+ if (value <= 0x9f00) {
+ value -= 0x7000;
+ } else { /* 0xe000 <= value <= 0xef00 */
+ value -= 0xb000;
+ }
+
+ value <<= 1;
+
+ if (trail <= 0x9e) {
+ value -= 0x100;
+ if (trail <= 0x7e) {
+ value |= ((trail - 0x1f) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ } else {
+ value |= ((trail - 0x20) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ }
+ } else { /* trail <= 0xfc */
+ value |= ((trail - 0x7e) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ }
+
+ return value;
+ }
+ /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
+ CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets){
+ CoderResult err = CoderResult.UNDERFLOW;
+ byte[] buffer = new byte[8];
+ int i = 0;
+ byte[] subchar;
+ subchar = encoder.replacement();
+
+ byte cs;
+ if (myConverterData.fromU2022State.g == 1) {
+ /* JIS7: switch from G1 to G0 */
+ myConverterData.fromU2022State.g = 0;
+ buffer[i++] = UConverterConstants.SI;
+ }
+ cs = myConverterData.fromU2022State.cs[0];
+
+ if (cs != ASCII && cs != JISX201) {
+ /* not in ASCII or JIS X 0201: switch to ASCII */
+ myConverterData.fromU2022State.cs[0] = ASCII;
+ buffer[i++] = 0x1B;
+ buffer[i++] = 0x28;
+ buffer[i++] = 0x42;
+ }
+
+ buffer[i++] = subchar[0];
+
+ err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
+
+ return err;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ int sourceChar;
+ byte cs, g;
+ int choiceCount;
+ int len, outLen;
+ byte[] choices = new byte[10];
+ int targetValue = 0;
+ boolean usingFallback;
+ byte[] buffer = new byte[8];
+ boolean getTrail = false; // use for getTrail label
+ int oldSourcePos; // for proper error handling
+
+ choiceCount = 0;
+
+ /* check if the last codepoint of previous buffer was a lead surrogate */
+ if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
+ getTrail = true;
+ }
+
+ while (getTrail || source.hasRemaining()) {
+ if (getTrail || target.hasRemaining()) {
+ oldSourcePos = source.position();
+ if (!getTrail) { /* skip if going to getTrail label */
+ sourceChar = source.get();
+ }
+ /* check if the char is a First surrogate */
+ if (getTrail || UTF16.isSurrogate((char)sourceChar)) {
+ if (getTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
+// getTrail:
+ if (getTrail) {
+ getTrail = false;
+ }
+ /* look ahead to find the trail surrogate */
+ if (source.hasRemaining()) {
+ /* test the following code unit */
+ char trail = source.get();
+ /* go back to the previous position */
+ source.position(source.position()-1);
+ if (UTF16.isTrailSurrogate(trail)) {
+ source.get();
+ sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
+ fromUChar32 = 0x00;
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ err = CoderResult.malformedForLength(1);
+ fromUChar32 = sourceChar;
+ break;
+ }
+ } else {
+ /* no more input */
+ fromUChar32 = sourceChar;
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ err = CoderResult.malformedForLength(1);
+ fromUChar32 = sourceChar;
+ break;
+ }
+ }
+
+ /* do not convert SO/SI/ESC */
+ if (IS_2022_CONTROL(sourceChar)) {
+ /* callback(illegal) */
+ err = CoderResult.malformedForLength(1);
+ fromUChar32 = sourceChar;
+ break;
+ }
+
+ /* do the conversion */
+
+ if (choiceCount == 0) {
+ char csm;
+ /*
+ * The csm variable keeps track of which charsets are allowed
+ * and not used yet while building the choices[].
+ */
+ csm = (char)jpCharsetMasks[myConverterData.version];
+ choiceCount = 0;
+
+ /* JIS7/8: try single-byte half-width Katakana before JISX208 */
+ if (myConverterData.version == 3 || myConverterData.version == 4) {
+ choices[choiceCount++] = HWKANA_7BIT;
+ }
+ /* Do not try single-bit half-width Katakana for other versions. */
+ csm &= ~CSM(HWKANA_7BIT);
+
+ /* try the current G0 charset */
+ choices[choiceCount++] = cs = myConverterData.fromU2022State.cs[0];
+ csm &= ~CSM(cs);
+
+ /* try the current G2 charset */
+ if ((cs = myConverterData.fromU2022State.cs[2]) != 0) {
+ choices[choiceCount++] = cs;
+ csm &= ~CSM(cs);
+ }
+
+ /* try all the other charsets */
+ for (int i = 0; i < jpCharsetPref.length; i++) {
+ cs = jpCharsetPref[i];
+ if ((CSM(cs) & csm) != 0) {
+ choices[choiceCount++] = cs;
+ csm &= ~CSM(cs);
+ }
+ }
+ }
+
+ cs = g = 0;
+ /*
+ * len==0: no mapping found yet
+ * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
+ * len>0: found a roundtrip result, done
+ */
+ len = 0;
+ /*
+ * We will turn off usingFallBack after finding a fallback,
+ * but we still get fallbacks from PUA code points as usual.
+ * Therefore, we will also need to check that we don't overwrite
+ * an early fallback with a later one.
+ */
+ usingFallback = useFallback;
+
+ for (int i = 0; i < choiceCount && len <= 0; i++) {
+ int[] value = new int[1];
+ int len2;
+ byte cs0 = choices[i];
+ switch (cs0) {
+ case ASCII:
+ if (sourceChar <= 0x7f) {
+ targetValue = sourceChar;
+ len = 1;
+ cs = cs0;
+ g = 0;
+ }
+ break;
+ case ISO8859_1:
+ if (GR96_START <= sourceChar && sourceChar <= GR96_END) {
+ targetValue = sourceChar - 0x80;
+ len = 1;
+ cs = cs0;
+ g = 2;
+ }
+ break;
+ case HWKANA_7BIT:
+ if (sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {
+ if (myConverterData.version == 3) {
+ /* JIS7: use G1 (SO) */
+ /* Shift U+FF61..U+FF9F to bytes 21..5F. */
+ targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0x21)));
+ len = 1;
+ myConverterData.fromU2022State.cs[1] = cs = cs0; /* do not output an escape sequence */
+ g = 1;
+ } else if (myConverterData.version == 4) {
+ /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
+ /* Shift U+FF61..U+FF9F to bytes A1..DF. */
+ targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0xa1)));
+ len = 1;
+
+ cs = myConverterData.fromU2022State.cs[0];
+ if (IS_JP_DBCS(cs)) {
+ /* switch from a DBCS charset to JISX201 */
+ cs = JISX201;
+ }
+ /* else stay in the current G0 charset */
+ g = 0;
+ }
+ /* else do not use HWKANA_7BIT with other versions */
+ }
+ break;
+ case JISX201:
+ /* G0 SBCS */
+ value[0] = jisx201FromU(sourceChar);
+ if (value[0] <= 0x7f) {
+ targetValue = value[0];
+ len = 1;
+ cs = cs0;
+ g = 0;
+ usingFallback = false;
+ }
+ break;
+ case JISX208:
+ /* G0 DBCS from JIS table */
+ myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
+ myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
+ len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
+ //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
+ if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len) == 2 */
+ value[0] = _2022FromSJIS(value[0]);
+ if (value[0] != 0) {
+ targetValue = value[0];
+ len = len2;
+ cs = cs0;
+ g = 0;
+ usingFallback = false;
+ }
+ } else if (len == 0 && usingFallback && sourceChar <= HWKANA_END && sourceChar >= HWKANA_START) {
+ targetValue = hwkana_fb[sourceChar - HWKANA_START];
+ len = -2;
+ cs = cs0;
+ g = 0;
+ usingFallback = false;
+ }
+ break;
+ case ISO8859_7:
+ /* G0 SBCS forced to 7-bit output */
+ len2 = MBCSSingleFromUChar32(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback);
+ if (len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value[0] && value[0] <= GR96_END) {
+ targetValue = value[0] - 0x80;
+ len = len2;
+ cs = cs0;
+ g = 2;
+ usingFallback = false;
+ }
+ break;
+ default :
+ /* G0 DBCS */
+ myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
+ myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
+ len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
+ //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0], sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
+ if (len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
+ if (cs0 == KSC5601) {
+ /*
+ * Check for valid bytes for the encoding scheme.
+ * This is necessary because the sub-converter (windows-949)
+ * has a broader encoding scheme than is valid for 2022.
+ */
+ value[0] = _2022FromGR94DBCS(value[0]);
+ if (value[0] == 0) {
+ break;
+ }
+ }
+ targetValue = value[0];
+ len = len2;
+ cs = cs0;
+ g = 0;
+ usingFallback = false;
+ }
+ break;
+ }
+ }
+
+ if (len != 0) {
+ if (len < 0) {
+ len = -len; /* fallback */
+ }
+ outLen = 0;
+
+ /* write SI if necessary (only for JIS7 */
+ if (myConverterData.fromU2022State.g == 1 && g == 0) {
+ buffer[outLen++] = UConverterConstants.SI;
+ myConverterData.fromU2022State.g = 0;
+ }
+
+ /* write the designation sequence if necessary */
+ if (cs != myConverterData.fromU2022State.cs[g]) {
+ for (int i = 0; i < escSeqChars[cs].length; i++) {
+ buffer[outLen++] = escSeqChars[cs][i];
+ }
+ myConverterData.fromU2022State.cs[g] = cs;
+
+ /* invalidate the choices[] */
+ choiceCount = 0;
+ }
+
+ /* write the shift sequence if necessary */
+ if (g != myConverterData.fromU2022State.g) {
+ switch (g) {
+ /* case 0 handled before writing escapes */
+ case 1:
+ buffer[outLen++] = UConverterConstants.SO;
+ myConverterData.fromU2022State.g = 1;
+ break;
+ default : /* case 2 */
+ buffer[outLen++] = 0x1b;
+ buffer[outLen++] = 0x4e;
+ break;
+ /* case 3: no SS3 in ISO-2022-JP-x */
+ }
+ }
+
+ /* write the output bytes */
+ if (len == 1) {
+ buffer[outLen++] = (byte)targetValue;
+ } else { /* len == 2 */
+ buffer[outLen++] = (byte)(targetValue >> 8);
+ buffer[outLen++] = (byte)targetValue;
+ }
+ }else {
+ /*
+ * if we cannot find the character after checking all codepages
+ * then this is an error.
+ */
+ err = CoderResult.unmappableForLength(source.position()-oldSourcePos);
+ fromUChar32 = sourceChar;
+ break;
+ }
+
+ if (sourceChar == CR || sourceChar == LF) {
+ /* reset the G2 state at the end of a line (conversion got use into ASCII or JISX201 already) */
+ myConverterData.fromU2022State.cs[2] = 0;
+ choiceCount = 0;
+ }
+
+ /* output outLen>0 bytes in buffer[] */
+ if (outLen == 1) {
+ target.put(buffer[0]);
+ if (offsets != null) {
+ offsets.put(source.remaining() - 1); /* -1 known to be ASCII */
+ }
+ } else if (outLen == 2 && (target.position() + 2) <= target.limit()) {
+ target.put(buffer[0]);
+ target.put(buffer[1]);
+ if (offsets != null) {
+ int sourceIndex = source.position() - 1;
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ }
+ } else {
+ err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, source.position()-1);
+ }
+ } else {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+
+ /*
+ * the end of the input stream and detection of truncated input
+ * are handled by the framework, but for ISO-2022-JP conversion
+ * we need to be in ASCII mode at the very end
+ *
+ * conditions:
+ * successful
+ * in SO mode or not in ASCII mode
+ * end of input and no truncated input
+ */
+ if (!err.isError() &&
+ (myConverterData.fromU2022State.g != 0 || myConverterData.fromU2022State.cs[0] != ASCII) &&
+ flush && !source.hasRemaining() && fromUChar32 == 0) {
+ int sourceIndex;
+
+ outLen = 0;
+
+ if (myConverterData.fromU2022State.g != 0) {
+ buffer[outLen++] = UConverterConstants.SI;
+ myConverterData.fromU2022State.g = 0;
+ }
+
+ if (myConverterData.fromU2022State.cs[0] != ASCII) {
+ for (int i = 0; i < escSeqChars[ASCII].length; i++) {
+ buffer[outLen++] = escSeqChars[ASCII][i];
+ }
+ myConverterData.fromU2022State.cs[0] = ASCII;
+ }
+
+ /* get the source index of the last input character */
+ sourceIndex = source.position();
+ if (sourceIndex > 0) {
+ --sourceIndex;
+ if (UTF16.isTrailSurrogate(source.get(sourceIndex)) &&
+ (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {
+ --sourceIndex;
+ }
+ } else {
+ sourceIndex = -1;
+ }
+
+ err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, sourceIndex);
+ }
+ return err;
+ }
+ }
+ /****************************ISO-2022-CN************************************/
+ /*
+ * Rules for ISO-2022-CN Encoding:
+ * i) The designator sequence must appear once on a line before any instance
+ * of chracter set it designates.
+ * ii) If two lines contain characters from the same character set, both lines
+ * must include the designator sequence.
+ * iii) Once the designator sequence is known, a shifting sequence has to be found
+ * to invoke the shifting
+ * iv) All lines start in ASCII and end in ASCII.
+ * v) Four shifting sequences are employed for this purpose:
+ * Sequence ASCII Eq Charsets
+ * --------- --------- --------
+ * SI US-ASCII
+ * SO CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
+ * SS2 N CNS-11643-1992 Plane 2
+ * SS3 O CNS-11643-1992 Planes 3-7
+ * vi)
+ * SOdesignator : ESC "$" ")" finalchar_for_SO
+ * SS2designator : ESC "$" "*" finalchar_for_SS2
+ * SS3designator : ESC "$" "+" finalchar_for_SS3
+ *
+ * ESC $ ) A Indicates the bytes following SO are Chinese
+ * characters as defined in GB 2312-80, until
+ * another SOdesignation appears
+ *
+ * ESC $ ) E Indicates the bytes following SO are as defined
+ * in ISO-IR-165 (for details, see section 2.1),
+ * until another SOdesignation appears
+ *
+ * ESC $ ) G Indicates the bytes following SO are as defined
+ * in CNS 11643-plane-1, until another SOdesignation appears
+ *
+ * ESC $ * H Indicates teh two bytes immediately following
+ * SS2 is a Chinese character as defined in CNS
+ * 11643-plane-2, until another SS2designation
+ * appears
+ * (Meaning N must preceed ever 2 byte sequence.)
+ *
+ * ESC $ + I Indicates the immediate two bytes following SS3
+ * is a Chinese character as defined in CNS
+ * 11643-plane-3, until another SS3designation
+ * appears
+ * (Meaning O must preceed every 2 byte sequence.)
+ *
+ * ESC $ + J Indicates the immediate two bytes following SS3
+ * is a Chinese character as defined in CNS
+ * 11643-plane-4, until another SS3designation
+ * appears
+ * (In English: O must preceed every 2 byte sequence.)
+ *
+ * ESC $ + K Indicates the immediate two bytes following SS3
+ * is a Chinese character as defined in CNS
+ * 11643-plane-5, until another SS3designation
+ * appears
+ *
+ * ESC $ + L Indicates the immediate two bytes following SS3
+ * is a Chinese character as defined in CNS
+ * 11643-plane-6, until another SS3designation
+ * appears
+ *
+ * ESC $ + M Indicates the immediate two bytes following SS3
+ * is a Chinese character as defined in CNS
+ * 11643-plane-7, until another SS3designation
+ * appears
+ *
+ * As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
+ * has its own designation information before any Chinese chracters
+ * appears
+ */
+
+ /* The following are defined this way to make strings truely readonly */
+ private final static byte[] GB_2312_80_STR = { 0x1B, 0x24, 0x29, 0x41 };
+ private final static byte[] ISO_IR_165_STR = { 0x1B, 0x24, 0x29, 0x45 };
+ private final static byte[] CNS_11643_1992_Plane_1_STR = { 0x1B, 0x24, 0x29, 0x47 };
+ private final static byte[] CNS_11643_1992_Plane_2_STR = { 0x1B, 0x24, 0x2A, 0x48 };
+ private final static byte[] CNS_11643_1992_Plane_3_STR = { 0x1B, 0x24, 0x2B, 0x49 };
+ private final static byte[] CNS_11643_1992_Plane_4_STR = { 0x1B, 0x24, 0x2B, 0x4A };
+ private final static byte[] CNS_11643_1992_Plane_5_STR = { 0x1B, 0x24, 0x2B, 0x4B };
+ private final static byte[] CNS_11643_1992_Plane_6_STR = { 0x1B, 0x24, 0x2B, 0x4C };
+ private final static byte[] CNS_11643_1992_Plane_7_STR = { 0x1B, 0x24, 0x2B, 0x4D };
+
+ /************************ ISO2022-CN Data *****************************/
+ private final static byte[][] escSeqCharsCN = {
+ SHIFT_IN_STR,
+ GB_2312_80_STR,
+ ISO_IR_165_STR,
+ CNS_11643_1992_Plane_1_STR,
+ CNS_11643_1992_Plane_2_STR,
+ CNS_11643_1992_Plane_3_STR,
+ CNS_11643_1992_Plane_4_STR,
+ CNS_11643_1992_Plane_5_STR,
+ CNS_11643_1992_Plane_6_STR,
+ CNS_11643_1992_Plane_7_STR,
+ };
+
+ private class CharsetEncoderISO2022CN extends CharsetEncoderICU {
+ public CharsetEncoderISO2022CN(CharsetICU cs) {
+ super(cs, fromUSubstitutionChar[0]);
+ }
+
+ protected void implReset() {
+ super.implReset();
+ myConverterData.reset();
+ }
+
+ /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
+ CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets){
+ CoderResult err = CoderResult.UNDERFLOW;
+ byte[] buffer = new byte[8];
+ int i = 0;
+ byte[] subchar;
+ subchar = encoder.replacement();
+
+ if (myConverterData.fromU2022State.g != 0) {
+ /* not in ASCII mode: switch to ASCII */
+ myConverterData.fromU2022State.g = 0;
+ buffer[i++] = UConverterConstants.SI;
+ }
+ buffer[i++] = subchar[0];
+
+ err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
+
+ return err;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ int sourceChar;
+ byte[] buffer = new byte[8];
+ int len;
+ byte[] choices = new byte[3];
+ int choiceCount;
+ int targetValue = 0;
+ boolean usingFallback;
+ boolean gotoGetTrail = false;
+ int oldSourcePos; // For proper error handling
+
+ choiceCount = 0;
+
+ /* check if the last codepoint of previous buffer was a lead surrogate */
+ if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
+ // goto getTrail label
+ gotoGetTrail = true;
+ }
+
+ while (source.hasRemaining() || gotoGetTrail) {
+ if (target.hasRemaining() || gotoGetTrail) {
+ oldSourcePos = source.position();
+ if (!gotoGetTrail) {
+ sourceChar = source.get();
+ }
+ /* check if the char is a First surrogate */
+ if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) {
+ if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) {
+// getTrail label
+ /* reset gotoGetTrail flag*/
+ gotoGetTrail = false;
+
+ /* look ahead to find the trail surrogate */
+ if (source.hasRemaining()) {
+ /* test the following code unit */
+ char trail = source.get();
+ source.position(source.position()-1);
+ if (UTF16.isTrailSurrogate(trail)) {
+ source.get();
+ sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
+ fromUChar32 = 0x00;
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ err = CoderResult.malformedForLength(1);
+ fromUChar32 = sourceChar;
+ break;
+ }
+ } else {
+ /* no more input */
+ fromUChar32 = sourceChar;
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ err = CoderResult.malformedForLength(1);
+ fromUChar32 = sourceChar;
+ break;
+ }
+ }
+
+ /* do the conversion */
+ if (sourceChar <= 0x007f) {
+ /* do not converter SO/SI/ESC */
+ if (IS_2022_CONTROL(sourceChar)) {
+ /* callback(illegal) */
+ err = CoderResult.malformedForLength(1);
+ fromUChar32 = sourceChar;
+ break;
+ }
+
+ /* US-ASCII */
+ if (myConverterData.fromU2022State.g == 0) {
+ buffer[0] = (byte)sourceChar;
+ len = 1;
+ } else {
+ buffer[0] = UConverterConstants.SI;
+ buffer[1] = (byte)sourceChar;
+ len = 2;
+ myConverterData.fromU2022State.g = 0;
+ choiceCount = 0;
+ }
+
+ if (sourceChar == CR || sourceChar == LF) {
+ /* reset the state at the end of a line */
+ myConverterData.fromU2022State.reset();
+ choiceCount = 0;
+ }
+ } else {
+ /* convert U+0080..U+10ffff */
+ int i;
+ byte cs, g;
+
+ if (choiceCount == 0) {
+ /* try the current SO/G1 converter first */
+ choices[0] = myConverterData.fromU2022State.cs[1];
+
+ /* default to GB2312_1 if none is designated yet */
+ if (choices[0] == 0) {
+ choices[0] = GB2312_1;
+ }
+ if (myConverterData.version == 0) {
+ /* ISO-2022-CN */
+ /* try other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
+ if (choices[0] == GB2312_1) {
+ choices[1] = CNS_11643_1;
+ } else {
+ choices[1] = GB2312_1;
+ }
+
+ choiceCount = 2;
+ } else if (myConverterData.version == 1) {
+ /* ISO-2022-CN-EXT */
+
+ /* try one of the other converters */
+ switch (choices[0]) {
+ case GB2312_1:
+ choices[1] = CNS_11643_1;
+ choices[2] = ISO_IR_165;
+ break;
+ case ISO_IR_165:
+ choices[1] = GB2312_1;
+ choices[2] = CNS_11643_1;
+ break;
+ default :
+ choices[1] = GB2312_1;
+ choices[2] = ISO_IR_165;
+ break;
+ }
+
+ choiceCount = 3;
+ } else {
+ /* ISO-2022-CN-CNS */
+ choices[0] = CNS_11643_1;
+ choices[1] = GB2312_1;
+
+ choiceCount = 2;
+ }
+ }
+
+ cs = g = 0;
+ /*
+ * len==0: no mapping found yet
+ * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
+ * len>0: found a roundtrip result, done
+ */
+ len = 0;
+ /*
+ * We will turn off usingFallback after finding a fallback,
+ * but we still get fallbacks from PUA code points as usual.
+ * Therefore, we will also need to check that we don't overwrite
+ * an early fallback with a later one.
+ */
+ usingFallback = useFallback;
+
+ for (i = 0; i < choiceCount && len <= 0; ++i) {
+ byte cs0 = choices[i];
+ if (cs0 > 0) {
+ int[] value = new int[1];
+ int len2;
+ if (cs0 > CNS_11643_0) {
+ myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[CNS_11643];
+ myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_3;
+ len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
+ //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[CNS_11643],
+ // sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_3);
+ if (len2 == 3 || (len2 == -3 && len == 0)) {
+ targetValue = value[0];
+ cs = (byte)(CNS_11643_0 + (value[0] >> 16) - 0x80);
+ if (len2 >= 0) {
+ len = 2;
+ } else {
+ len = -2;
+ usingFallback = false;
+ }
+ if (cs == CNS_11643_1) {
+ g = 1;
+ } else if (cs == CNS_11643_2) {
+ g = 2;
+ } else if (myConverterData.version == 1) { /* plane 3..7 */
+ g = 3;
+ } else {
+ /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
+ len = 0;
+ }
+ }
+ } else {
+ /* GB2312_1 or ISO-IR-165 */
+ myConverterData.currentConverter.sharedData = myConverterData.myConverterArray[cs0];
+ myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
+ len2 = myConverterData.currentEncoder.fromUChar32(sourceChar, value, usingFallback);
+ //len2 = MBCSFromUChar32_ISO2022(myConverterData.myConverterArray[cs0],
+ // sourceChar, value, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
+ if (len2 == 2 || (len2 == -2 && len == 0)) {
+ targetValue = value[0];
+ len = len2;
+ cs = cs0;
+ g = 1;
+ usingFallback = false;
+ }
+ }
+ }
+ }
+
+ if (len != 0) {
+ len = 0; /* count output bytes; it must have ben abs(len) == 2 */
+
+ /* write the designation sequence if necessary */
+ if (cs != myConverterData.fromU2022State.cs[g]) {
+ if (cs < CNS_11643) {
+ for (int n = 0; n < escSeqCharsCN[cs].length; n++) {
+ buffer[n] = escSeqCharsCN[cs][n];
+ }
+ } else {
+ for (int n = 0; n < escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)].length; n++) {
+ buffer[n] = escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)][n];
+ }
+ }
+ len = 4;
+ myConverterData.fromU2022State.cs[g] = cs;
+ if (g == 1) {
+ /* changing the SO/G1 charset invalidates the choices[] */
+ choiceCount = 0;
+ }
+ }
+
+ /* write the shift sequence if necessary */
+ if (g != myConverterData.fromU2022State.g) {
+ switch (g) {
+ case 1:
+ buffer[len++] = UConverterConstants.SO;
+
+ /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
+ myConverterData.fromU2022State.g = 1;
+ break;
+ case 2:
+ buffer[len++] = 0x1b;
+ buffer[len++] = 0x4e;
+ break;
+ default: /* case 3 */
+ buffer[len++] = 0x1b;
+ buffer[len++] = 0x4f;
+ break;
+ }
+ }
+
+ /* write the two output bytes */
+ buffer[len++] = (byte)(targetValue >> 8);
+ buffer[len++] = (byte)targetValue;
+ } else {
+ /* if we cannot find the character after checking all codepages
+ * then this is an error
+ */
+ err = CoderResult.unmappableForLength(source.position()-oldSourcePos);
+ fromUChar32 = sourceChar;
+ break;
+ }
+ }
+ /* output len>0 bytes in buffer[] */
+ if (len == 1) {
+ target.put(buffer[0]);
+ if (offsets != null) {
+ offsets.put(source.position()-1);
+ }
+ } else if (len == 2 && (target.remaining() >= 2)) {
+ target.put(buffer[0]);
+ target.put(buffer[1]);
+ if (offsets != null) {
+ int sourceIndex = source.position();
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ }
+ } else {
+ err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, len, target, offsets, source.position()-1);
+ if (err.isError()) {
+ break;
+ }
+ }
+ } else {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ } /* end while (source.hasRemaining() */
+
+ /*
+ * the end of the input stream and detection of truncated input
+ * are handled by the framework, but for ISO-2022-CN conversion
+ * we need to be in ASCII mode at the very end
+ *
+ * condtions:
+ * succesful
+ * not in ASCII mode
+ * end of input and no truncated input
+ */
+ if (!err.isError() && myConverterData.fromU2022State.g != 0 && flush && !source.hasRemaining() && fromUChar32 == 0) {
+ int sourceIndex;
+
+ /* we are switching to ASCII */
+ myConverterData.fromU2022State.g = 0;
+
+ /* get the source index of the last input character */
+ sourceIndex = source.position();
+ if (sourceIndex > 0) {
+ --sourceIndex;
+ if (UTF16.isTrailSurrogate(source.get(sourceIndex)) &&
+ (sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {
+ --sourceIndex;
+ }
+ } else {
+ sourceIndex = -1;
+ }
+
+ err = CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);
+ }
+
+ return err;
+ }
+ }
+ /******************************** ISO-2022-KR *****************************/
+ /*
+ * Rules for ISO-2022-KR encoding
+ * i) The KSC5601 designator sequence should appear only once in a file,
+ * at the begining of a line before any KSC5601 characters. This usually
+ * means that it appears by itself on the first line of the file
+ * ii) There are only 2 shifting sequences SO to shift into double byte mode
+ * and SI to shift into single byte mode
+ */
+ private class CharsetEncoderISO2022KR extends CharsetEncoderICU {
+ public CharsetEncoderISO2022KR(CharsetICU cs) {
+ super(cs, fromUSubstitutionChar[myConverterData.version]);
+ }
+
+ protected void implReset() {
+ super.implReset();
+ myConverterData.reset();
+ setInitialStateFromUnicodeKR(this);
+ }
+
+ /* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
+ CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets){
+ CoderResult err = CoderResult.UNDERFLOW;
+ byte[] buffer = new byte[8];
+ int length, i = 0;
+ byte[] subchar;
+
+ subchar = encoder.replacement();
+ length = subchar.length;
+
+ if (myConverterData.version == 0) {
+ if (length == 1) {
+ if (encoder.fromUnicodeStatus != 0) {
+ /* in DBCS mode: switch to SBCS */
+ encoder.fromUnicodeStatus = 0;
+ buffer[i++] = UConverterConstants.SI;
+ }
+ buffer[i++] = subchar[0];
+ } else { /* length == 2 */
+ if (encoder.fromUnicodeStatus == 0) {
+ /* in SBCS mode: switch to DBCS */
+ encoder.fromUnicodeStatus = 1;
+ buffer[i++] = UConverterConstants.SO;
+ }
+ buffer[i++] = subchar[0];
+ buffer[i++] = subchar[1];
+ }
+ err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
+ } else {
+ /* save the subvonverter's substitution string */
+ byte[] currentSubChars = myConverterData.currentEncoder.replacement();
+
+ /* set our substitution string into the subconverter */
+ myConverterData.currentEncoder.replaceWith(subchar);
+ myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];
+ /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
+ myConverterData.currentEncoder.fromUChar32 = encoder.fromUChar32;
+ err = myConverterData.currentEncoder.cbFromUWriteSub(myConverterData.currentEncoder, source, target, offsets);
+ encoder.fromUChar32 = myConverterData.currentEncoder.fromUChar32;
+
+ /* restore the subconverter's substitution string */
+ myConverterData.currentEncoder.replaceWith(currentSubChars);
+
+ if (err.isOverflow()) {
+ if (myConverterData.currentEncoder.errorBufferLength > 0) {
+ encoder.errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();
+ }
+ encoder.errorBufferLength = myConverterData.currentEncoder.errorBufferLength;
+ myConverterData.currentEncoder.errorBufferLength = 0;
+ }
+ }
+
+ return err;
+ }
+
+ private CoderResult encodeLoopIBM(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+
+ myConverterData.currentEncoder.fromUChar32 = fromUChar32;
+ err = myConverterData.currentEncoder.cnvMBCSFromUnicodeWithOffsets(source, target, offsets, flush);
+ fromUChar32 = myConverterData.currentEncoder.fromUChar32;
+
+ if (err.isOverflow()) {
+ if (myConverterData.currentEncoder.errorBufferLength > 0) {
+ errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();
+ }
+ errorBufferLength = myConverterData.currentEncoder.errorBufferLength;
+ myConverterData.currentEncoder.errorBufferLength = 0;
+ }
+
+ return err;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ int[] targetByteUnit = { 0x0000 };
+ int sourceChar = 0x0000;
+ boolean isTargetByteDBCS;
+ boolean oldIsTargetByteDBCS;
+ boolean usingFallback;
+ int length = 0;
+ boolean gotoGetTrail = false; // for goto getTrail label call
+
+ /*
+ * if the version is 1 then the user is requesting
+ * conversion with ibm-25546 pass the argument to
+ * MBCS converter and return
+ */
+ if (myConverterData.version == 1) {
+ return encodeLoopIBM(source, target, offsets, flush);
+ }
+
+ usingFallback = useFallback;
+ isTargetByteDBCS = fromUnicodeStatus == 0 ? false : true;
+ if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
+ gotoGetTrail = true;
+ }
+
+ while (source.hasRemaining() || gotoGetTrail) {
+ targetByteUnit[0] = UConverterConstants.missingCharMarker;
+
+ if (target.hasRemaining() || gotoGetTrail) {
+ if (!gotoGetTrail) {
+ sourceChar = source.get();
+
+ /* do not convert SO/SI/ESC */
+ if (IS_2022_CONTROL(sourceChar)) {
+ /* callback(illegal) */
+ err = CoderResult.malformedForLength(1);
+ fromUChar32 = sourceChar;
+ break;
+ }
+ myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
+ length = myConverterData.currentEncoder.fromUChar32(sourceChar, targetByteUnit, usingFallback);
+ //length = MBCSFromUChar32_ISO2022(myConverterData.currentConverter.sharedData, sourceChar, targetByteUnit, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
+ if (length < 0) {
+ length = -length; /* fallback */
+ }
+ /* only DBCS or SBCS characters are expected */
+ /* DB characters with high bit set to 1 are expected */
+ if (length > 2 || length == 0 ||
+ (length == 1 && targetByteUnit[0] > 0x7f) ||
+ (length ==2 &&
+ ((char)(targetByteUnit[0] - 0xa1a1) > (0xfefe - 0xa1a1) ||
+ ((targetByteUnit[0] - 0xa1) & UConverterConstants.UNSIGNED_BYTE_MASK) > (0xfe - 0xa1)))) {
+ targetByteUnit[0] = UConverterConstants.missingCharMarker;
+ }
+ }
+ if (!gotoGetTrail && targetByteUnit[0] != UConverterConstants.missingCharMarker) {
+ oldIsTargetByteDBCS = isTargetByteDBCS;
+ isTargetByteDBCS = (targetByteUnit[0] > 0x00FF);
+ /* append the shift sequence */
+ if (oldIsTargetByteDBCS != isTargetByteDBCS) {
+ if (isTargetByteDBCS) {
+ target.put((byte)UConverterConstants.SO);
+ } else {
+ target.put((byte)UConverterConstants.SI);
+ }
+ if (offsets != null) {
+ offsets.put(source.position()-1);
+ }
+ }
+ /* write the targetUniChar to target */
+ if (targetByteUnit[0] <= 0x00FF) {
+ if (target.hasRemaining()) {
+ target.put((byte)targetByteUnit[0]);
+ if (offsets != null) {
+ offsets.put(source.position()-1);
+ }
+ } else {
+ errorBuffer[errorBufferLength++] = (byte)targetByteUnit[0];
+ err = CoderResult.OVERFLOW;
+ }
+ } else {
+ if (target.hasRemaining()) {
+ target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80)));
+ if (offsets != null) {
+ offsets.put(source.position()-1);
+ }
+ if (target.hasRemaining()) {
+ target.put((byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80)));
+ if (offsets != null) {
+ offsets.put(source.position()-1);
+ }
+ } else {
+ errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0] - 0x80));
+ err = CoderResult.OVERFLOW;
+ }
+
+ } else {
+ errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80));
+ errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80));
+ err = CoderResult.OVERFLOW;
+ }
+ }
+ } else {
+ /* oops.. the code point is unassigned
+ * set the error and reason
+ */
+
+ /* check if the char is a First surrogate */
+ if (gotoGetTrail || UTF16.isSurrogate((char)sourceChar)) {
+ if (gotoGetTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
+// getTrail label
+ // reset gotoGetTrail flag
+ gotoGetTrail = false;
+
+ /* look ahead to find the trail surrogate */
+ if (source.hasRemaining()) {
+ /* test the following code unit */
+ char trail = source.get();
+ source.position(source.position()-1);
+ if (UTF16.isTrailSurrogate(trail)) {
+ source.get();
+ sourceChar = UCharacter.getCodePoint((char)sourceChar, trail);
+ err = CoderResult.unmappableForLength(2);
+ /* convert this surrogate code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ err = CoderResult.malformedForLength(1);
+ }
+ } else {
+ /* no more input */
+ err = CoderResult.UNDERFLOW;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate ) */
+ /* callback(illegal) */
+ err = CoderResult.malformedForLength(1);
+ }
+ } else {
+ /* callback(unassigned) for a BMP code point */
+ err = CoderResult.unmappableForLength(1);
+ }
+
+ fromUChar32 = sourceChar;
+ break;
+ }
+ } else {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ /*
+ * the end of the input stream and detection of truncated input
+ * are handled by the framework, but for ISO-2022-KR conversion
+ * we need to be inASCII mode at the very end
+ *
+ * conditions:
+ * successful
+ * not in ASCII mode
+ * end of input and no truncated input
+ */
+ if (!err.isError() && isTargetByteDBCS && flush && !source.hasRemaining() && fromUChar32 == 0) {
+ int sourceIndex;
+
+ /* we are switching to ASCII */
+ isTargetByteDBCS = false;
+
+ /* get the source index of the last input character */
+ sourceIndex = source.position();
+ if (sourceIndex > 0) {
+ --sourceIndex;
+ if (UTF16.isTrailSurrogate(source.get(sourceIndex)) && UTF16.isLeadSurrogate(source.get(sourceIndex-1))) {
+ --sourceIndex;
+ }
+ } else {
+ sourceIndex = -1;
+ }
+
+ CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);
+ }
+ /*save the state and return */
+ fromUnicodeStatus = isTargetByteDBCS ? 1 : 0;
+
+ return err;
+ }
+ }
+
+ public CharsetDecoder newDecoder() {
+ switch (variant) {
+ case ISO_2022_JP:
+ return new CharsetDecoderISO2022JP(this);
+
+ case ISO_2022_CN:
+ return new CharsetDecoderISO2022CN(this);
+
+ case ISO_2022_KR:
+ setInitialStateToUnicodeKR();
+ return new CharsetDecoderISO2022KR(this);
+
+ default: /* should not happen */
+ return null;
+ }
+ }
+
+ public CharsetEncoder newEncoder() {
+ CharsetEncoderICU cnv;
+
+ switch (variant) {
+ case ISO_2022_JP:
+ return new CharsetEncoderISO2022JP(this);
+
+ case ISO_2022_CN:
+ return new CharsetEncoderISO2022CN(this);
+
+ case ISO_2022_KR:
+ cnv = new CharsetEncoderISO2022KR(this);
+ setInitialStateFromUnicodeKR(cnv);
+ return cnv;
+
+ default: /* should not happen */
+ return null;
+ }
+ }
+
+ private void setInitialStateToUnicodeKR() {
+ if (myConverterData.version == 1) {
+ myConverterData.currentDecoder.toUnicodeStatus = 0; /* offset */
+ myConverterData.currentDecoder.mode = 0; /* state */
+ myConverterData.currentDecoder.toULength = 0; /* byteIndex */
+ }
+ }
+ private void setInitialStateFromUnicodeKR(CharsetEncoderICU cnv) {
+ /* ISO-2022-KR the designator sequence appears only once
+ * in a file so we append it only once
+ */
+ if (cnv.errorBufferLength == 0) {
+ cnv.errorBufferLength = 4;
+ cnv.errorBuffer[0] = 0x1b;
+ cnv.errorBuffer[1] = 0x24;
+ cnv.errorBuffer[2] = 0x29;
+ cnv.errorBuffer[3] = 0x43;
+ }
+ if (myConverterData.version == 1) {
+ ((CharsetMBCS)myConverterData.currentEncoder.charset()).subChar1 = 0x1A;
+ myConverterData.currentEncoder.fromUChar32 = 0;
+ myConverterData.currentEncoder.fromUnicodeStatus = 1; /* prevLength */
+ }
+ }
+
+ void getUnicodeSetImpl(UnicodeSet setFillIn, int which) {
+ int i;
+ /*open a set and initialize it with code points that are algorithmically round-tripped */
+
+ switch(variant){
+ case ISO_2022_JP:
+ /*include JIS X 0201 which is hardcoded */
+ setFillIn.add(0xa5);
+ setFillIn.add(0x203e);
+ if((jpCharsetMasks[myConverterData.version]&CSM(ISO8859_1))!=0){
+ /*include Latin-1 some variants of JP */
+ setFillIn.add(0, 0xff);
+
+ }
+ else {
+ /* include ASCII for JP */
+ setFillIn.add(0, 0x7f);
+ }
+ if(myConverterData.version==3 || myConverterData.version==4 ||which == ROUNDTRIP_AND_FALLBACK_SET){
+ /*
+ * Do not test(jpCharsetMasks[myConverterData.version]&CSM(HWKANA_7BIT))!=0 because the bit
+ * is on for all JP versions although version 3 & 4 (JIS7 and JIS8) use half-width Katakana.
+ * This is because all ISO_2022_JP variant are lenient in that they accept (in toUnicode) half-width
+ * Katakana via ESC.
+ * However, we only emit (fromUnicode) half-width Katakana according to the
+ * definition of each variant.
+ *
+ * When including fallbacks,
+ * we need to include half-width Katakana Unicode code points for all JP variants because
+ * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
+ */
+ /* include half-width Katakana for JP */
+ setFillIn.add(HWKANA_START, HWKANA_END);
+ }
+ break;
+ case ISO_2022_CN:
+ /* Include ASCII for CN */
+ setFillIn.add(0, 0x7f);
+ break;
+ case ISO_2022_KR:
+ /* there is only one converter for KR */
+ myConverterData.currentConverter.getUnicodeSetImpl(setFillIn, which);
+ break;
+ default:
+ break;
+ }
+
+ //TODO Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until
+ for(i=0; i 0x80 in an otherwise double-byte
+ * character set. So, for example, the LMBCS sequence x10 x10 xAE is the
+ * same as '0xAE' in the Japanese code page 943.
+ *
+ * Next, you will notice that the list of group bytes has some gaps.
+ * These are used in various ways.
+ *
+ * We reserve a few special single byte values for common control
+ * characters. These are in the same place as their ANSI equivalents for speed.
+ */
+ private static final short ULMBCS_HT = 0x09; /* Fixed control-char - Horizontal Tab */
+ private static final short ULMBCS_LF = 0x0A; /* Fixed control-char - Line Feed */
+ private static final short ULMBCS_CR = 0x0D; /* Fixed control-char - Carriage Return */
+ /*
+ * Then, 1-2-3 reserved a special single-byte character to put at the
+ * beginning of internal 'system' range names:
+ */
+ private static final short ULMBCS_123SYSTEMRANGE = 0x19;
+ /*
+ * Then we needed a place to put all the other ansi control characters
+ * that must be moved to different values because LMBCS reserves those
+ * values for other purposes. To represent the control characters, we start
+ * with a first byte of 0x0F & add the control character value as the
+ * second byte.
+ */
+ private static final short ULMBCS_GRP_CTRL = 0x0F;
+ /*
+ * For the C0 controls (less than 0x20), we add 0x20 to preserve the
+ * useful doctrine that any byte less than 0x20 in a LMBCS char must be
+ * the first byte of a character:
+ */
+ private static final short ULMBCS_CTRLOFFSET = 0x20;
+ /*
+ * Where to put the characters that aren't part of any of the 12 national
+ * character sets? The first thing that was done, in the earlier years of
+ * LMBCS, was to use up the spaces of the form
+ * [G] D1,
+ * where 'G' was one of the single-byte character groups, and
+ * D1 was less than 0x80. These sequences are gathered together
+ * into a Lotus-invented doublebyte character set to represent a
+ * lot of stray values. Internally, in this implementation, we track this
+ * as group '0', as a place to tuck this exceptions list.
+ */
+ private static final short ULMBCS_GRP_EXCEPT = 0x00;
+ /*
+ * Finally, as the durability and usefulness of UNICODE became clear,
+ * LOTUS added a new group 0x14 to hold Unicode values not otherwise
+ * represented in LMBCS:
+ */
+ private static final short ULMBCS_GRP_UNICODE = 0x14;
+ /*
+ * The two bytes appearing after a 0x14 are interpreted as UTF-16 BE
+ * (Big Endian) characters. The exception comes when UTF16
+ * representation would have a zero as the second byte. In that case,
+ * 'F6' is used in its place, and the bytes are swapped. (This prevents
+ * LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK:
+ * 0xF6xx is in the middle of the Private Use Area.)
+ */
+ private static char ULMBCS_UNICOMPATZERO = 0x00F6;
+ /*
+ * It is also useful in our code to have a constant for the size of
+ * a LMBCS char that holds a literal Unicode value.
+ */
+ private static final short ULMBCS_UNICODE_SIZE = 3;
+ /*
+ * To squish the LMBCS representation down even further, and to make
+ * translations even faster, sometimes the optimization group byte can be dropped
+ * from a LMBCS character. This is decided on a process-by-process basis. The
+ * group byte that is dropped is called the 'optimization group.'
+ *
+ * For Notes, the optimization group is always 0x1.
+ */
+ //private static final short ULMBCS_DEFAULTOPTGROUP = 0x01;
+ /* For 1-2-3 files, the optimization group is stored in the header of the 1-2-3
+ * file.
+ * In any case, when using ICU, you either pass in the
+ * optimization group as part of the name of the converter (LMBCS-1, LMBCS-2,
+ * etc.). Using plain 'LMBCS' as the name of the converter will give you
+ * LMBCS-1.
+ */
+
+ /* Implementation strategy */
+ /*
+ * Because of the extensive use of other character sets, the LMBCS converter
+ * keeps a mapping between optimization groups and IBM character sets, so that
+ * ICU converters can be created and used as needed.
+ *
+ * As you can see, even though any byte below 0x20 could be an optimization
+ * byte, only those at 0x13 or below can map to an actual converter. To limit
+ * some loops and searches, we define a value for that last group converter:
+ */
+ private static final short ULMBCS_GRP_LAST = 0x13; /* last LMBCS group that has a converter */
+
+ private static final String[] OptGroupByteToCPName = {
+ /* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */
+ /* 0x0001 */ "ibm-850",
+ /* 0x0002 */ "ibm-851",
+ /* 0x0003 */ "windows-1255",
+ /* 0x0004 */ "windows-1256",
+ /* 0x0005 */ "windows-1251",
+ /* 0x0006 */ "ibm-852",
+ /* 0x0007 */ null, /* Unused */
+ /* 0x0008 */ "windows-1254",
+ /* 0x0009 */ null, /* Control char HT */
+ /* 0x000A */ null, /* Control char LF */
+ /* 0x000B */ "windows-874",
+ /* 0x000C */ null, /* Unused */
+ /* 0x000D */ null, /* Control char CR */
+ /* 0x000E */ null, /* Unused */
+ /* 0x000F */ null, /* Control chars: 0x0F20 + C0/C1 character: algorithmic */
+ /* 0x0010 */ "windows-932",
+ /* 0x0011 */ "windows-949",
+ /* 0x0012 */ "windows-950",
+ /* 0x0013 */ "windows-936",
+ /* The rest are null, including the 0x0014 Unicode compatibility region
+ * and 0x0019, the 1-2-3 system range control char */
+ /* 0x0014 */ null
+ };
+
+ /* That's approximately all the data that's needed for translating
+ * LMBCS to Unicode.
+ *
+ * However, to translate Unicode to LMBCS, we need some more support.
+ *
+ * That's because there are often more than one possible mappings from a Unicode
+ * code point back into LMBCS. The first thing we do is look up into a table
+ * to figure out if there are more than one possible mapplings. This table,
+ * arranged by Unicode values (including ranges) either lists which group
+ * to use, or says that it could go into one or more of the SBCS sets, or
+ * into one or more of the DBCS sets. (If the character exists in both DBCS &
+ * SBCS, the table will place it in the SBCS sets, to make the LMBCS code point
+ * length as small as possible. Here's the two special markers we use to indicate
+ * ambiguous mappings:
+ */
+ private static final short ULMBCS_AMBIGUOUS_SBCS = 0x80; /* could fit in more than one
+ LMBCS sbcs native encoding
+ (example: most accented latin) */
+ private static final short ULMBCS_AMBIGUOUS_MBCS = 0x81; /* could fit in more than one
+ LMBCS mbcs native encoding
+ (example: Unihan) */
+ private static final short ULMBCS_AMBIGUOUS_ALL = 0x82;
+
+ /* And here's a simple way to see if a group falls in an appropriate range */
+ private boolean ULMBCS_AMBIGUOUS_MATCH(short agroup, short xgroup) {
+ return (((agroup == ULMBCS_AMBIGUOUS_SBCS) &&
+ (xgroup < ULMBCS_DOUBLEOPTGROUP_START)) ||
+ ((agroup == ULMBCS_AMBIGUOUS_MBCS) &&
+ (xgroup >= ULMBCS_DOUBLEOPTGROUP_START)) ||
+ ((agroup) == ULMBCS_AMBIGUOUS_ALL));
+ }
+
+ /* The table & some code to use it: */
+ private static class _UniLMBCSGrpMap {
+ int uniStartRange;
+ int uniEndRange;
+ short GrpType;
+ _UniLMBCSGrpMap(int uniStartRange, int uniEndRange, short GrpType) {
+ this.uniStartRange = uniStartRange;
+ this.uniEndRange = uniEndRange;
+ this.GrpType = GrpType;
+ }
+ }
+
+ private static final _UniLMBCSGrpMap[] UniLMBCSGrpMap = {
+ new _UniLMBCSGrpMap(0x0001, 0x001F, ULMBCS_GRP_CTRL),
+ new _UniLMBCSGrpMap(0x0080, 0x009F, ULMBCS_GRP_CTRL),
+ new _UniLMBCSGrpMap(0x00A0, 0x00A6, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x00A7, 0x00A8, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x00A9, 0x00AF, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x00B0, 0x00B1, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x00B2, 0x00B3, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x00B4, 0x00B4, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x00B5, 0x00B5, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x00B6, 0x00B6, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x00B7, 0x00D6, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x00D7, 0x00D7, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x00D8, 0x00F6, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x00F7, 0x00F7, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x00F8, 0x01CD, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x01CE, 0x01CE, ULMBCS_GRP_TW ),
+ new _UniLMBCSGrpMap(0x01CF, 0x02B9, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x02BA, 0x02BA, ULMBCS_GRP_CN),
+ new _UniLMBCSGrpMap(0x02BC, 0x02C8, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x02C9, 0x02D0, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x02D8, 0x02DD, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x0384, 0x0390, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x0391, 0x03A9, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x03AA, 0x03B0, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x03B1, 0x03C9, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x03CA, 0x03CE, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x0400, 0x0400, ULMBCS_GRP_RU),
+ new _UniLMBCSGrpMap(0x0401, 0x0401, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x0402, 0x040F, ULMBCS_GRP_RU),
+ new _UniLMBCSGrpMap(0x0410, 0x0431, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x0432, 0x044E, ULMBCS_GRP_RU),
+ new _UniLMBCSGrpMap(0x044F, 0x044F, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x0450, 0x0491, ULMBCS_GRP_RU),
+ new _UniLMBCSGrpMap(0x05B0, 0x05F2, ULMBCS_GRP_HE),
+ new _UniLMBCSGrpMap(0x060C, 0x06AF, ULMBCS_GRP_AR),
+ new _UniLMBCSGrpMap(0x0E01, 0x0E5B, ULMBCS_GRP_TH),
+ new _UniLMBCSGrpMap(0x200C, 0x200F, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x2010, 0x2010, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2013, 0x2014, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x2015, 0x2015, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2016, 0x2016, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2017, 0x2017, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x2018, 0x2019, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x201A, 0x201B, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x201C, 0x201D, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x201E, 0x201F, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x2020, 0x2021, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x2022, 0x2024, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x2025, 0x2025, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2026, 0x2026, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x2027, 0x2027, ULMBCS_GRP_TW),
+ new _UniLMBCSGrpMap(0x2030, 0x2030, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x2031, 0x2031, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x2032, 0x2033, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2035, 0x2035, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2039, 0x203A, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x203B, 0x203B, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x203C, 0x203C, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2074, 0x2074, ULMBCS_GRP_KO),
+ new _UniLMBCSGrpMap(0x207F, 0x207F, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2081, 0x2084, ULMBCS_GRP_KO),
+ new _UniLMBCSGrpMap(0x20A4, 0x20AC, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x2103, 0x2109, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2111, 0x2120, ULMBCS_AMBIGUOUS_SBCS),
+ /*zhujin: upgrade, for regressiont test, spr HKIA4YHTSU*/
+ new _UniLMBCSGrpMap(0x2121, 0x2121, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2122, 0x2126, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x212B, 0x212B, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2135, 0x2135, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x2153, 0x2154, ULMBCS_GRP_KO),
+ new _UniLMBCSGrpMap(0x215B, 0x215E, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2160, 0x2179, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2190, 0x2193, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x2194, 0x2195, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2196, 0x2199, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x21A8, 0x21A8, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x21B8, 0x21B9, ULMBCS_GRP_CN),
+ new _UniLMBCSGrpMap(0x21D0, 0x21D1, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x21D2, 0x21D2, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x21D3, 0x21D3, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x21D4, 0x21D4, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x21D5, 0x21D5, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x21E7, 0x21E7, ULMBCS_GRP_CN),
+ new _UniLMBCSGrpMap(0x2200, 0x2200, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2201, 0x2201, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2202, 0x2202, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2203, 0x2203, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2204, 0x2206, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2207, 0x2208, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2209, 0x220A, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x220B, 0x220B, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x220F, 0x2215, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2219, 0x2219, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x221A, 0x221A, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x221B, 0x221C, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x221D, 0x221E, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x221F, 0x221F, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2220, 0x2220, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2223, 0x222A, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x222B, 0x223D, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2245, 0x2248, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x224C, 0x224C, ULMBCS_GRP_TW),
+ new _UniLMBCSGrpMap(0x2252, 0x2252, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2260, 0x2261, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2262, 0x2265, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2266, 0x226F, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2282, 0x2283, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2284, 0x2285, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2286, 0x2287, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2288, 0x2297, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2299, 0x22BF, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x22C0, 0x22C0, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2310, 0x2310, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2312, 0x2312, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2318, 0x2321, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2318, 0x2321, ULMBCS_GRP_CN),
+ new _UniLMBCSGrpMap(0x2460, 0x24E9, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2500, 0x2500, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x2501, 0x2501, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2502, 0x2502, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x2503, 0x2503, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x2504, 0x2505, ULMBCS_GRP_TW),
+ new _UniLMBCSGrpMap(0x2506, 0x2665, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x2666, 0x2666, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0x2667, 0x2669, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x266A, 0x266A, ULMBCS_AMBIGUOUS_ALL),
+ new _UniLMBCSGrpMap(0x266B, 0x266C, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x266D, 0x266D, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0x266E, 0x266E, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x266F, 0x266F, ULMBCS_GRP_JA),
+ new _UniLMBCSGrpMap(0x2670, 0x2E7F, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0x2E80, 0xF861, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0xF862, 0xF8FF, ULMBCS_GRP_EXCEPT),
+ new _UniLMBCSGrpMap(0xF900, 0xFA2D, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0xFB00, 0xFEFF, ULMBCS_AMBIGUOUS_SBCS),
+ new _UniLMBCSGrpMap(0xFF01, 0xFFEE, ULMBCS_AMBIGUOUS_MBCS),
+ new _UniLMBCSGrpMap(0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE)
+ };
+
+ static short FindLMBCSUniRange(char uniChar) {
+ int index = 0;
+
+ while (uniChar > UniLMBCSGrpMap[index].uniEndRange) {
+ index++;
+ }
+
+ if (uniChar >= UniLMBCSGrpMap[index].uniStartRange) {
+ return UniLMBCSGrpMap[index].GrpType;
+ }
+ return ULMBCS_GRP_UNICODE;
+ }
+
+ /*
+ * We also ask the creator of a converter to send in a preferred locale
+ * that we can use in resolving ambiguous mappings. They send the locale
+ * in as a string, and we map it, if possible, to one of the
+ * LMBCS groups. We use this table, and the associated code, to
+ * do the lookup:
+ *
+ * This table maps locale ID's to LMBCS opt groups.
+ * The default return is group 0x01. Note that for
+ * performance reasons, the table is sorted in
+ * increasing alphabetic order, with the notable
+ * exception of zhTW. This is to force the check
+ * for Traditional Chinese before dropping back to
+ * Simplified.
+ * Note too that the Latin-1 groups have been
+ * commented out because it's the default, and
+ * this shortens the table, allowing a serial
+ * search to go quickly.
+ */
+ private static class _LocaleLMBCSGrpMap {
+ String LocaleID;
+ short OptGroup;
+ _LocaleLMBCSGrpMap(String LocaleID, short OptGroup) {
+ this.LocaleID = LocaleID;
+ this.OptGroup = OptGroup;
+ }
+ }
+ private static final _LocaleLMBCSGrpMap[] LocaleLMBCSGrpMap = {
+ new _LocaleLMBCSGrpMap("ar", ULMBCS_GRP_AR),
+ new _LocaleLMBCSGrpMap("be", ULMBCS_GRP_RU),
+ new _LocaleLMBCSGrpMap("bg", ULMBCS_GRP_L2),
+ // new _LocaleLMBCSGrpMap("ca", ULMBCS_GRP_L1),
+ new _LocaleLMBCSGrpMap("cs", ULMBCS_GRP_L2),
+ // new _LocaleLMBCSGrpMap("da", ULMBCS_GRP_L1),
+ // new _LocaleLMBCSGrpMap("de", ULMBCS_GRP_L1),
+ new _LocaleLMBCSGrpMap("el", ULMBCS_GRP_GR),
+ // new _LocaleLMBCSGrpMap("en", ULMBCS_GRP_L1),
+ // new _LocaleLMBCSGrpMap("es", ULMBCS_GRP_L1),
+ // new _LocaleLMBCSGrpMap("et", ULMBCS_GRP_L1),
+ // new _LocaleLMBCSGrpMap("fi", ULMBCS_GRP_L1),
+ // new _LocaleLMBCSGrpMap("fr", ULMBCS_GRP_L1),
+ new _LocaleLMBCSGrpMap("he", ULMBCS_GRP_HE),
+ new _LocaleLMBCSGrpMap("hu", ULMBCS_GRP_L2),
+ // new _LocaleLMBCSGrpMap("is", ULMBCS_GRP_L1),
+ // new _LocaleLMBCSGrpMap("it", ULMBCS_GRP_L1),
+ new _LocaleLMBCSGrpMap("iw", ULMBCS_GRP_HE),
+ new _LocaleLMBCSGrpMap("ja", ULMBCS_GRP_JA),
+ new _LocaleLMBCSGrpMap("ko", ULMBCS_GRP_KO),
+ // new _LocaleLMBCSGrpMap("lt", ULMBCS_GRP_L1),
+ // new _LocaleLMBCSGrpMap("lv", ULMBCS_GRP_L1),
+ new _LocaleLMBCSGrpMap("mk", ULMBCS_GRP_RU),
+ // new _LocaleLMBCSGrpMap("nl", ULMBCS_GRP_L1),
+ // new _LocaleLMBCSGrpMap("no", ULMBCS_GRP_L1),
+ new _LocaleLMBCSGrpMap("pl", ULMBCS_GRP_L2),
+ // new _LocaleLMBCSGrpMap("pt", ULMBCS_GRP_L1),
+ new _LocaleLMBCSGrpMap("ro", ULMBCS_GRP_L2),
+ new _LocaleLMBCSGrpMap("ru", ULMBCS_GRP_RU),
+ new _LocaleLMBCSGrpMap("sh", ULMBCS_GRP_L2),
+ new _LocaleLMBCSGrpMap("sk", ULMBCS_GRP_L2),
+ new _LocaleLMBCSGrpMap("sl", ULMBCS_GRP_L2),
+ new _LocaleLMBCSGrpMap("sq", ULMBCS_GRP_L2),
+ new _LocaleLMBCSGrpMap("sr", ULMBCS_GRP_RU),
+ // new _LocaleLMBCSGrpMap("sv", ULMBCS_GRP_L1),
+ new _LocaleLMBCSGrpMap("th", ULMBCS_GRP_TH),
+ new _LocaleLMBCSGrpMap("tr", ULMBCS_GRP_TR),
+ new _LocaleLMBCSGrpMap("uk", ULMBCS_GRP_RU),
+ // new _LocaleLMBCSGrpMap("vi", ULMBCS_GRP_L1),
+ new _LocaleLMBCSGrpMap("zhTW", ULMBCS_GRP_TW),
+ new _LocaleLMBCSGrpMap("zh", ULMBCS_GRP_CN),
+ new _LocaleLMBCSGrpMap(null, ULMBCS_GRP_L1)
+ };
+ static short FindLMBCSLocale(String LocaleID) {
+ int index = 0;
+
+ if (LocaleID == null) {
+ return 0;
+ }
+
+ while (LocaleLMBCSGrpMap[index].LocaleID != null) {
+ if (LocaleLMBCSGrpMap[index].LocaleID == LocaleID) {
+ return LocaleLMBCSGrpMap[index].OptGroup;
+ } else if (LocaleLMBCSGrpMap[index].LocaleID.compareTo(LocaleID) > 0){
+ break;
+ }
+ index++;
+ }
+ return ULMBCS_GRP_L1;
+ }
+
+ /*
+ * Before we get to the main body of code, here's how we hook up the rest
+ * of ICU. ICU converters are required to define a structure that includes
+ * some function pointers, and some common data, in the style of a C++
+ * vtable. There is also room in there for converter-specific data. LMBCS
+ * uses that converter-specific data to keep track of the 12 subconverters
+ * we use, the optimization group, and the group (if any) that matches the
+ * locale. We have one structure instantiated for each of the 12 possible
+ * optimization groups.
+ */
+ private class UConverterDataLMBCS {
+ UConverterSharedData[] OptGrpConverter; /* Converter per Opt. grp. */
+ short OptGroup; /* default Opt. grp. for this LMBCS session */
+ short localeConverterIndex; /* reasonable locale match for index */
+ CharsetDecoderMBCS decoder;
+ CharsetEncoderMBCS encoder;
+ CharsetMBCS charset;
+ UConverterDataLMBCS() {
+ OptGrpConverter = new UConverterSharedData[ULMBCS_GRP_LAST + 1];
+ charset = (CharsetMBCS)CharsetICU.forNameICU("ibm-850");
+ encoder = (CharsetEncoderMBCS)charset.newEncoder();
+ decoder = (CharsetDecoderMBCS)charset.newDecoder();
+ }
+ }
+
+ private UConverterDataLMBCS extraInfo; /* extraInfo in ICU4C implementation */
+
+ public CharsetLMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = ULMBCS_CHARSIZE_MAX;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+
+ extraInfo = new UConverterDataLMBCS();
+
+ for (int i = 0; i <= ULMBCS_GRP_LAST; i++) {
+ if (OptGroupByteToCPName[i] != null) {
+ extraInfo.OptGrpConverter[i] = ((CharsetMBCS)CharsetICU.forNameICU(OptGroupByteToCPName[i])).sharedData;
+ }
+ }
+
+ //get the Opt Group number for the LMBCS converter
+ int option = Integer.parseInt(icuCanonicalName.substring(6));
+ extraInfo.OptGroup = (short)option;
+ extraInfo.localeConverterIndex = FindLMBCSLocale(ULocale.getDefault().getBaseName());
+ }
+
+ class CharsetDecoderLMBCS extends CharsetDecoderICU {
+ public CharsetDecoderLMBCS(CharsetICU cs) {
+ super(cs);
+ implReset();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ }
+
+ /* A function to call when we are looking at the Unicode group byte in LMBCS */
+ private char GetUniFromLMBCSUni(ByteBuffer ppLMBCSin) {
+ short HighCh = (short)(ppLMBCSin.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
+ short LowCh = (short)(ppLMBCSin.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
+
+ if (HighCh == ULMBCS_UNICOMPATZERO) {
+ HighCh = LowCh;
+ LowCh = 0; /* zero-byte in LSB special character */
+ }
+
+ return (char)((HighCh << 8) | LowCh);
+ }
+
+ private int LMBCS_SimpleGetNextUChar(UConverterSharedData cnv, ByteBuffer source, int positionOffset, int length) {
+ int uniChar;
+ int oldSourceLimit;
+ int oldSourcePos;
+
+ extraInfo.charset.sharedData = cnv;
+
+ oldSourceLimit = source.limit();
+ oldSourcePos = source.position();
+
+ source.position(oldSourcePos + positionOffset);
+ source.limit(source.position() + length);
+
+ uniChar = extraInfo.decoder.simpleGetNextUChar(source, false);
+
+ source.limit(oldSourceLimit);
+ source.position(oldSourcePos);
+
+ return uniChar;
+ }
+ /* Return the Unicode representation for the current LMBCS character. */
+ /*
+ * Note: Because there is no U_TRUNCATED_CHAR_FOUND error code in ICU4J, we
+ * are going to use BufferOverFlow. The error will be handled correctly
+ * by the calling function.
+ */
+ private int LMBCSGetNextUCharWorker(ByteBuffer source, CoderResult[] err) {
+ int uniChar = 0; /* an output Unicode char */
+ short CurByte; /* A byte from the input stream */
+
+ /* error check */
+ if (!source.hasRemaining()) {
+ err[0] = CoderResult.malformedForLength(0);
+ return 0xffff;
+ }
+ /* Grab first byte & save address for error recovery */
+ CurByte = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
+
+ /*
+ * at entry of each if clause:
+ * 1. 'CurByte' points at the first byte of a LMBCS character
+ * 2. 'source' points to the next byte of the source stream after 'CurByte'
+ *
+ * the job of each if clause is:
+ * 1. set 'source' to the point at the beginning of the next char (not if LMBCS char is only 1 byte)
+ * 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately
+ */
+ /* First lets check the simple fixed values. */
+ if ((CurByte > ULMBCS_C0END && CurByte < ULMBCS_C1START) /* ascii range */ ||
+ CurByte == 0 || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR || CurByte == ULMBCS_LF ||
+ CurByte == ULMBCS_123SYSTEMRANGE) {
+
+ uniChar = CurByte;
+ } else {
+ short group;
+ UConverterSharedData cnv;
+
+ if (CurByte == ULMBCS_GRP_CTRL) { /* Control character group - no opt group update */
+ short C0C1byte;
+ /* CHECK_SOURCE_LIMIT(1) */
+ if (source.position() + 1 > source.limit()) {
+ err[0] = CoderResult.OVERFLOW;
+ source.position(source.limit());
+ return 0xFFFF;
+ }
+ C0C1byte = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
+ uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte;
+ } else if (CurByte == ULMBCS_GRP_UNICODE) { /* Unicode Compatibility group: Big Endian UTF16 */
+ /* CHECK_SOURCE_LIMIT(2) */
+ if (source.position() + 2 > source.limit()) {
+ err[0] = CoderResult.OVERFLOW;
+ source.position(source.limit());
+ return 0xFFFF;
+ }
+
+ /* don't check for error indicators fffe/ffff below */
+ return GetUniFromLMBCSUni(source);
+ } else if (CurByte <= ULMBCS_CTRLOFFSET) {
+ group = CurByte;
+ if (group > ULMBCS_GRP_LAST || (cnv = extraInfo.OptGrpConverter[group]) == null) {
+ /* this is not a valid group byte - no converter */
+ err[0] = CoderResult.unmappableForLength(1);
+ } else if (group >= ULMBCS_DOUBLEOPTGROUP_START) {
+ /* CHECK_SOURCE_LIMIT(2) */
+ if (source.position() + 2 > source.limit()) {
+ err[0] = CoderResult.OVERFLOW;
+ source.position(source.limit());
+ return 0xFFFF;
+ }
+
+ /* check for LMBCS doubled-group-byte case */
+ if (source.get(source.position()) == group) {
+ /* single byte */
+ source.get();
+ uniChar = LMBCS_SimpleGetNextUChar(cnv, source, 0, 1);
+ source.get();
+ } else {
+ /* double byte */
+ uniChar = LMBCS_SimpleGetNextUChar(cnv, source, 0, 2);
+ source.get();
+ source.get();
+ }
+ } else { /* single byte conversion */
+ /* CHECK_SOURCE_LIMIT(1) */
+ if (source.position() + 1 > source.limit()) {
+ err[0] = CoderResult.OVERFLOW;
+ source.position(source.limit());
+ return 0xFFFF;
+ }
+ CurByte = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
+
+ if (CurByte >= ULMBCS_C1START) {
+ uniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv.mbcs, CurByte);
+ } else {
+ /*
+ * The non-optimizable oddballs where there is an explicit byte
+ * AND the second byte is not in the upper ascii range
+ */
+ byte[] bytes = new byte[2];
+
+ cnv = extraInfo.OptGrpConverter[ULMBCS_GRP_EXCEPT];
+
+ /* Lookup value must include opt group */
+ bytes[0] = (byte)group;
+ bytes[1] = (byte)CurByte;
+ uniChar = LMBCS_SimpleGetNextUChar(cnv, ByteBuffer.wrap(bytes), 0, 2);
+ }
+ }
+
+ } else if (CurByte >= ULMBCS_C1START) { /* group byte is implicit */
+ group = extraInfo.OptGroup;
+ cnv = extraInfo.OptGrpConverter[group];
+ if (group >= ULMBCS_DOUBLEOPTGROUP_START) { /* double byte conversion */
+ if (CharsetMBCS.MBCS_ENTRY_IS_TRANSITION(cnv.mbcs.stateTable[0][CurByte]) /* isLeadByte */) {
+ /* CHECK_SOURCE_LIMIT(0) */
+ if (source.position() + 0 > source.limit()) {
+ err[0] = CoderResult.OVERFLOW;
+ source.position(source.limit());
+ return 0xFFFF;
+ }
+
+ /* let the MBCS conversion consume CurByte again */
+ uniChar = LMBCS_SimpleGetNextUChar(cnv, source, -1, 1);
+ } else {
+ /* CHECK_SOURCE_LIMIT(1) */
+ if (source.position() + 1 > source.limit()) {
+ err[0] = CoderResult.OVERFLOW;
+ source.position(source.limit());
+ return 0xFFFF;
+ }
+
+ /* let the MBCS conversion consume CurByte again */
+ uniChar = LMBCS_SimpleGetNextUChar(cnv, source, -1, 2);
+ source.get();
+ }
+ } else {
+ uniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv.mbcs, CurByte);
+ }
+ }
+ }
+
+ return uniChar;
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult[] err = new CoderResult[1];
+ err[0] = CoderResult.UNDERFLOW;
+ byte[] LMBCS = new byte[ULMBCS_CHARSIZE_MAX * 2]; /* Increase the size for proper handling in subsequent calls to MBCS functions */
+ char uniChar; /* one output Unicode char */
+ int saveSource; /* beginning of current code point */
+ int errSource = 0; /* index to actual input in case an error occurs */
+ byte savebytes = 0;
+
+ /* Process from source to limit, or until error */
+ while (err[0].isUnderflow() && source.hasRemaining() && target.hasRemaining()) {
+ saveSource = source.position(); /* beginning of current code point */
+ if (toULength > 0) { /* reassemble char from previous call */
+ int size_old = toULength;
+ ByteBuffer tmpSourceBuffer;
+
+ /* limit from source is either remainder of temp buffer, or user limit on source */
+ int size_new_maybe_1 = ULMBCS_CHARSIZE_MAX - size_old;
+ int size_new_maybe_2 = source.remaining();
+ int size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2;
+ savebytes = (byte)(size_old + size_new);
+ for (int i = 0; i < savebytes; i++) {
+ if (i < size_old) {
+ LMBCS[i] = toUBytesArray[i];
+ } else {
+ LMBCS[i] = source.get();
+ }
+ }
+ tmpSourceBuffer = ByteBuffer.wrap(LMBCS);
+ tmpSourceBuffer.limit(savebytes);
+ uniChar = (char)LMBCSGetNextUCharWorker(tmpSourceBuffer, err);
+ source.position(saveSource + tmpSourceBuffer.position() - size_old);
+ errSource = saveSource - size_old;
+
+ if (err[0].isOverflow()) { /* err == U_TRUNCATED_CHAR_FOUND */
+ /* evil special case: source buffers so small a char spans more than 2 buffers */
+ toULength = savebytes;
+ for (int i = 0; i < savebytes; i++) {
+ toUBytesArray[i] = LMBCS[i];
+ }
+ source.position(source.limit());
+ err[0] = CoderResult.UNDERFLOW;
+ return err[0];
+ } else {
+ /* clear the partial-char marker */
+ toULength = 0;
+ }
+ } else {
+ errSource = saveSource;
+ uniChar = (char)LMBCSGetNextUCharWorker(source, err);
+ savebytes = (byte)(source.position() - saveSource);
+ }
+
+ if (err[0].isUnderflow()) {
+ if (uniChar < 0x0fffe) {
+ target.put(uniChar);
+ if (offsets != null) {
+ offsets.put(saveSource);
+ }
+ } else if (uniChar == 0xfffe) {
+ err[0] = CoderResult.unmappableForLength(source.position() - saveSource);
+ } else /* if (uniChar == 0xffff) */ {
+ err[0] = CoderResult.malformedForLength(source.position() - saveSource);
+ }
+ }
+ }
+ /* If target ran out before source, return over flow buffer error. */
+ if (err[0].isUnderflow() && source.hasRemaining() && !target.hasRemaining()) {
+ err[0] = CoderResult.OVERFLOW;
+ } else if (!err[0].isUnderflow()) {
+ /* If character incomplete or unmappable/illegal, store it in toUBytesArray[] */
+ toULength = savebytes;
+ if (savebytes > 0) {
+ for (int i = 0; i < savebytes; i++) {
+ toUBytesArray[i] = source.get(errSource + i);
+ }
+ }
+ if (err[0].isOverflow()) { /* err == U_TRUNCATED_CHAR_FOUND */
+ err[0] = CoderResult.UNDERFLOW;
+ }
+ }
+ return err[0];
+ }
+ }
+
+ class CharsetEncoderLMBCS extends CharsetEncoderICU {
+ public CharsetEncoderLMBCS(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ }
+ /*
+ * Here's the basic helper function that we use when converting from
+ * Unicode to LMBCS, and we suspect that a Unicode character will fit into
+ * one of the 12 groups. The return value is the number of bytes written
+ * starting at pStartLMBCS (if any).
+ */
+ @SuppressWarnings("fallthrough")
+ private int LMBCSConversionWorker(short group, byte[] LMBCS, char pUniChar, short[] lastConverterIndex, boolean[] groups_tried) {
+ byte pLMBCS = 0;
+ UConverterSharedData xcnv = extraInfo.OptGrpConverter[group];
+
+ int bytesConverted;
+ int[] value = new int[1];
+ short firstByte;
+
+ extraInfo.charset.sharedData = xcnv;
+ bytesConverted = extraInfo.encoder.fromUChar32(pUniChar, value, false);
+
+ /* get the first result byte */
+ if (bytesConverted > 0) {
+ firstByte = (short)((value[0] >> ((bytesConverted - 1) * 8)) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ } else {
+ /* most common failure mode is an unassigned character */
+ groups_tried[group] = true;
+ return 0;
+ }
+
+ lastConverterIndex[0] = group;
+
+ /*
+ * All initial byte values in lower ascii range should have been caught by now,
+ * except with the exception group.
+ */
+
+ /* use converted data: first write 0, 1 or two group bytes */
+ if (group != ULMBCS_GRP_EXCEPT && extraInfo.OptGroup != group) {
+ LMBCS[pLMBCS++] = (byte)group;
+ if (bytesConverted == 1 && group >= ULMBCS_DOUBLEOPTGROUP_START) {
+ LMBCS[pLMBCS++] = (byte)group;
+ }
+ }
+
+ /* don't emit control chars */
+ if (bytesConverted == 1 && firstByte < 0x20) {
+ return 0;
+ }
+
+ /* then move over the converted data */
+ switch (bytesConverted) {
+ case 4:
+ LMBCS[pLMBCS++] = (byte)(value[0] >> 24);
+ case 3:
+ LMBCS[pLMBCS++] = (byte)(value[0] >> 16);
+ case 2:
+ LMBCS[pLMBCS++] = (byte)(value[0] >> 8);
+ case 1:
+ LMBCS[pLMBCS++] = (byte)value[0];
+ default:
+ /* will never occur */
+ break;
+ }
+
+ return pLMBCS;
+ }
+ /*
+ * This is a much simpler version of above, when we
+ * know we are writing LMBCS using the Unicode group.
+ */
+ private int LMBCSConvertUni(byte[] LMBCS, char uniChar) {
+ int index = 0;
+ short LowCh = (short)(uniChar & UConverterConstants.UNSIGNED_BYTE_MASK);
+ short HighCh = (short)((uniChar >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK);
+
+ LMBCS[index++] = (byte)ULMBCS_GRP_UNICODE;
+
+ if (LowCh == 0) {
+ LMBCS[index++] = (byte)ULMBCS_UNICOMPATZERO;
+ LMBCS[index++] = (byte)HighCh;
+ } else {
+ LMBCS[index++] = (byte)HighCh;
+ LMBCS[index++] = (byte)LowCh;
+ }
+ return ULMBCS_UNICODE_SIZE;
+ }
+ /* The main Unicode to LMBCS conversion function */
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ short[] lastConverterIndex = new short[1];
+ char uniChar;
+ byte[] LMBCS = new byte[ULMBCS_CHARSIZE_MAX];
+ byte pLMBCS;
+ int bytes_written;
+ boolean[] groups_tried = new boolean[ULMBCS_GRP_LAST+1];
+ int sourceIndex = 0;
+
+ /*
+ * Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS)
+ * If that succeeds, see if it will all fit into the target & copy it over
+ * if it does.
+ *
+ * We try conversions in the following order:
+ * 1. Single-byte ascii & special fixed control chars (&null)
+ * 2. Look up group in table & try that (could b
+ * A) Unicode group
+ * B) control group
+ * C) national encodeing
+ * or ambiguous SBCS or MBCS group (on to step 4...)
+ * 3. If its ambiguous, try this order:
+ * A) The optimization group
+ * B) The locale group
+ * C) The last group that succeeded with this string.
+ * D) every other group that's relevant
+ * E) If its single-byte ambiguous, try the exceptions group
+ * 4. And as a grand fallback: Unicode
+ */
+
+ short OldConverterIndex = 0;
+
+ while (source.hasRemaining() && err.isUnderflow()) {
+ OldConverterIndex = extraInfo.localeConverterIndex;
+
+ if (!target.hasRemaining()) {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+
+ uniChar = source.get(source.position());
+ bytes_written = 0;
+ pLMBCS = 0;
+
+ /* check cases in rough order of how common they are, for speed */
+
+ /* single-byte matches: strategy 1 */
+ if((uniChar>=0x80) && (uniChar<=0xff) && (uniChar!=0xB1) && (uniChar!=0xD7) && (uniChar!=0xF7) &&
+ (uniChar!=0xB0) && (uniChar!=0xB4) && (uniChar!=0xB6) && (uniChar!=0xA7) && (uniChar!=0xA8)) {
+ extraInfo.localeConverterIndex = ULMBCS_GRP_L1;
+ }
+ if (((uniChar > ULMBCS_C0END) && (uniChar < ULMBCS_C1START)) ||
+ uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR ||
+ uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE) {
+ LMBCS[pLMBCS++] = (byte)uniChar;
+ bytes_written = 1;
+ }
+
+ if (bytes_written == 0) {
+ /* Check by Unicode rage (Strategy 2) */
+ short group = FindLMBCSUniRange(uniChar);
+ if (group == ULMBCS_GRP_UNICODE) { /* (Strategy 2A) */
+ bytes_written = LMBCSConvertUni(LMBCS, uniChar);
+ } else if (group == ULMBCS_GRP_CTRL) { /* Strategy 2B) */
+ /* Handle control characters here */
+ if (uniChar <= ULMBCS_C0END) {
+ LMBCS[pLMBCS++] = ULMBCS_GRP_CTRL;
+ LMBCS[pLMBCS++] = (byte)(ULMBCS_CTRLOFFSET + uniChar);
+ } else if (uniChar >= ULMBCS_C1START && uniChar <= (ULMBCS_C1START + ULMBCS_CTRLOFFSET)) {
+ LMBCS[pLMBCS++] = ULMBCS_GRP_CTRL;
+ LMBCS[pLMBCS++] = (byte)uniChar;
+ }
+ bytes_written = pLMBCS;
+ } else if (group < ULMBCS_GRP_UNICODE) { /* (Strategy 2C) */
+ /* a specific converter has been identified - use it */
+ bytes_written = LMBCSConversionWorker(group, LMBCS, uniChar, lastConverterIndex, groups_tried);
+ }
+ if (bytes_written == 0) { /* the ambiguous group cases (Strategy 3) */
+ groups_tried = new boolean[ULMBCS_GRP_LAST+1];
+
+ /* check for non-default optimization group (Strategy 3A) */
+ if (extraInfo.OptGroup != 1 && ULMBCS_AMBIGUOUS_MATCH(group, extraInfo.OptGroup)) {
+ if(extraInfo.localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START) {
+ bytes_written = LMBCSConversionWorker (ULMBCS_GRP_L1, LMBCS, uniChar, lastConverterIndex, groups_tried);
+
+ if(bytes_written == 0) {
+ bytes_written = LMBCSConversionWorker (ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried);
+ }
+ if(bytes_written == 0) {
+ bytes_written = LMBCSConversionWorker (extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried);
+ }
+ } else {
+ bytes_written = LMBCSConversionWorker (extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried);
+ }
+ }
+ /* check for locale optimization group (Strategy 3B) */
+ if (bytes_written == 0 && extraInfo.localeConverterIndex > 0 && ULMBCS_AMBIGUOUS_MATCH(group, extraInfo.localeConverterIndex)) {
+
+ bytes_written = LMBCSConversionWorker(extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried);
+ }
+ /* check for last optimization group used for this string (Strategy 3C) */
+ if (bytes_written == 0 && lastConverterIndex[0] > 0 && ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex[0])) {
+ bytes_written = LMBCSConversionWorker(lastConverterIndex[0], LMBCS, uniChar, lastConverterIndex, groups_tried);
+ }
+ if (bytes_written == 0) {
+ /* just check every possible matching converter (Strategy 3D) */
+ short grp_start;
+ short grp_end;
+ short grp_ix;
+
+ grp_start = (group == ULMBCS_AMBIGUOUS_MBCS) ? ULMBCS_DOUBLEOPTGROUP_START : ULMBCS_GRP_L1;
+ grp_end = (group == ULMBCS_AMBIGUOUS_MBCS) ? ULMBCS_GRP_LAST : ULMBCS_GRP_TH;
+
+ if(group == ULMBCS_AMBIGUOUS_ALL) {
+ grp_start = ULMBCS_GRP_L1;
+ grp_end = ULMBCS_GRP_LAST;
+ }
+
+ for (grp_ix = grp_start; grp_ix <= grp_end && bytes_written == 0; grp_ix++) {
+ if (extraInfo.OptGrpConverter[grp_ix] != null && !groups_tried[grp_ix]) {
+ bytes_written = LMBCSConversionWorker(grp_ix, LMBCS, uniChar, lastConverterIndex, groups_tried);
+ }
+ }
+ /*
+ * a final conversion fallback to the exceptions group if its likely
+ * to be single byte (Strategy 3E)
+ */
+ if (bytes_written == 0 && grp_start == ULMBCS_GRP_L1) {
+ bytes_written = LMBCSConversionWorker(ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried);
+ }
+ }
+ /* all of our other strategies failed. Fallback to Unicode. (Strategy 4) */
+ if (bytes_written == 0) {
+ bytes_written = LMBCSConvertUni(LMBCS, uniChar);
+ }
+ }
+ }
+ /* we have a translation. increment source and write as much as possible to target */
+ source.get();
+ pLMBCS = 0;
+ while (target.hasRemaining() && bytes_written > 0) {
+ bytes_written--;
+ target.put(LMBCS[pLMBCS++]);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ }
+ sourceIndex++;
+ if (bytes_written > 0) {
+ /*
+ * write any bytes that didn't fit in target to the error buffer,
+ * common code will move this to target if we get called back with
+ * enough target room
+ */
+ err = CoderResult.OVERFLOW;
+ errorBufferLength = bytes_written;
+ for (int i = 0; bytes_written > 0; i++, bytes_written--) {
+ errorBuffer[i] = LMBCS[pLMBCS++];
+ }
+ }
+ extraInfo.localeConverterIndex = OldConverterIndex;
+ }
+
+ return err;
+ }
+ }
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderLMBCS(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderLMBCS(this);
+ }
+
+ void getUnicodeSetImpl(UnicodeSet setFillIn, int which){
+ getCompleteUnicodeSet(setFillIn);
+ }
+ private byte[] fromUSubstitution = new byte[]{ 0x3F };
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java
new file mode 100644
index 00000000000..d61028e142f
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java
@@ -0,0 +1,5127 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.Buffer;
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.charset.UConverterSharedData.UConverterType;
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.InvalidFormatException;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+class CharsetMBCS extends CharsetICU {
+
+ private byte[] fromUSubstitution = null;
+ UConverterSharedData sharedData = null;
+ private static final int MAX_VERSION_LENGTH = 4;
+
+ // these variables are used in getUnicodeSet() and may be changed in future
+ // typedef enum UConverterSetFilter {
+ static final int UCNV_SET_FILTER_NONE = 1;
+ static final int UCNV_SET_FILTER_DBCS_ONLY = 2;
+ static final int UCNV_SET_FILTER_2022_CN = 3;
+ static final int UCNV_SET_FILTER_SJIS= 4 ;
+ static final int UCNV_SET_FILTER_GR94DBCS = 5;
+ static final int UCNV_SET_FILTER_HZ = 6;
+ static final int UCNV_SET_FILTER_COUNT = 7;
+ // } UConverterSetFilter;
+
+ /**
+ * Fallbacks to Unicode are stored outside the normal state table and code point structures in a vector of items of
+ * this type. They are sorted by offset.
+ */
+ final class MBCSToUFallback {
+ int offset;
+ int codePoint;
+ }
+
+ /**
+ * This is the MBCS part of the UConverterTable union (a runtime data structure). It keeps all the per-converter
+ * data and points into the loaded mapping tables.
+ */
+ static final class UConverterMBCSTable {
+ /* toUnicode */
+ short countStates;
+ byte dbcsOnlyState;
+ boolean stateTableOwned;
+ int countToUFallbacks;
+
+ int stateTable[/* countStates */][/* 256 */];
+ int swapLFNLStateTable[/* countStates */][/* 256 */]; /* for swaplfnl */
+ char unicodeCodeUnits[/* countUnicodeResults */];
+ MBCSToUFallback toUFallbacks[/* countToUFallbacks */];
+
+ /* fromUnicode */
+ char fromUnicodeTable[];
+ byte fromUnicodeBytes[];
+ byte swapLFNLFromUnicodeBytes[]; /* for swaplfnl */
+ int fromUBytesLength;
+ short outputType, unicodeMask;
+
+ /* converter name for swaplfnl */
+ String swapLFNLName;
+
+ /* extension data */
+ UConverterSharedData baseSharedData;
+ // int extIndexes[];
+ ByteBuffer extIndexes; // create int[] view etc. as needed
+
+ CharBuffer mbcsIndex; /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */
+ char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */
+ boolean utf8Friendly; /* for utf8Friendly data */
+ char maxFastUChar; /* for utf8Friendly data */
+
+ /* roundtrips */
+ long asciiRoundtrips;
+
+ UConverterMBCSTable() {
+ utf8Friendly = false;
+ mbcsIndex = null;
+ sbcsIndex = new char[SBCS_FAST_LIMIT>>6];
+ }
+
+ /*
+ * UConverterMBCSTable(UConverterMBCSTable t) { countStates = t.countStates; dbcsOnlyState = t.dbcsOnlyState;
+ * stateTableOwned = t.stateTableOwned; countToUFallbacks = t.countToUFallbacks; stateTable = t.stateTable;
+ * swapLFNLStateTable = t.swapLFNLStateTable; unicodeCodeUnits = t.unicodeCodeUnits; toUFallbacks =
+ * t.toUFallbacks; fromUnicodeTable = t.fromUnicodeTable; fromUnicodeBytes = t.fromUnicodeBytes;
+ * swapLFNLFromUnicodeBytes = t.swapLFNLFromUnicodeBytes; fromUBytesLength = t.fromUBytesLength; outputType =
+ * t.outputType; unicodeMask = t.unicodeMask; swapLFNLName = t.swapLFNLName; baseSharedData = t.baseSharedData;
+ * extIndexes = t.extIndexes; }
+ */
+ }
+
+ /* Constants used in MBCS data header */
+ // enum {
+ static final int MBCS_OPT_LENGTH_MASK=0x3f;
+ static final int MBCS_OPT_NO_FROM_U=0x40;
+ /*
+ * If any of the following options bits are set,
+ * then the file must be rejected.
+ */
+ static final int MBCS_OPT_INCOMPATIBLE_MASK=0xffc0;
+ /*
+ * Remove bits from this mask as more options are recognized
+ * by all implementations that use this constant.
+ */
+ static final int MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK=0xff80;
+ // };
+ /* Constants for fast and UTF-8-friendly conversion. */
+ // enum {
+ static final int SBCS_FAST_MAX=0x0fff; /* maximum code point with UTF-8-friendly SBCS runtime code, see makeconv SBCS_UTF8_MAX */
+ static final int SBCS_FAST_LIMIT=SBCS_FAST_MAX+1; /* =0x1000 */
+ static final int MBCS_FAST_MAX=0xd7ff; /* maximum code point with UTF-8-friendly MBCS runtime code, see makeconv MBCS_UTF8_MAX */
+ static final int MBCS_FAST_LIMIT=MBCS_FAST_MAX+1; /* =0xd800 */
+ // };
+ /**
+ * MBCS data header. See data format description above.
+ */
+ final class MBCSHeader {
+ byte version[/* U_MAX_VERSION_LENGTH */];
+ int countStates, countToUFallbacks, offsetToUCodeUnits, offsetFromUTable, offsetFromUBytes;
+ int flags;
+ int fromUBytesLength;
+
+ /* new and required in version 5 */
+ int options;
+
+ /* new and optional in version 5; used if options&MBCS_OPT_NO_FROM_U */
+ int fullStage2Length; /* number of 32-bit units */
+
+ MBCSHeader() {
+ version = new byte[MAX_VERSION_LENGTH];
+ }
+ }
+
+ public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases, String classPath,
+ ClassLoader loader) throws InvalidFormatException {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+
+ /* See if the icuCanonicalName contains certain option information. */
+ if (icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING) > -1) {
+ options = UConverterConstants.OPTION_SWAP_LFNL;
+ icuCanonicalName = icuCanonicalName.substring(0, icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING));
+ super.icuCanonicalName = icuCanonicalName;
+ }
+
+ // now try to load the data
+ sharedData = loadConverter(1, icuCanonicalName, classPath, loader);
+
+ maxBytesPerChar = sharedData.staticData.maxBytesPerChar;
+ minBytesPerChar = sharedData.staticData.minBytesPerChar;
+ maxCharsPerByte = 1;
+ fromUSubstitution = sharedData.staticData.subChar;
+ subChar = sharedData.staticData.subChar;
+ subCharLen = sharedData.staticData.subCharLen;
+ subChar1 = sharedData.staticData.subChar1;
+ fromUSubstitution = new byte[sharedData.staticData.subCharLen];
+ System.arraycopy(sharedData.staticData.subChar, 0, fromUSubstitution, 0, sharedData.staticData.subCharLen);
+
+ initializeConverter(options);
+ }
+
+ public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases)
+ throws InvalidFormatException {
+ this(icuCanonicalName, javaCanonicalName, aliases, ICUResourceBundle.ICU_BUNDLE, null);
+ }
+
+ private UConverterSharedData loadConverter(int nestedLoads, String myName, String classPath, ClassLoader loader)
+ throws InvalidFormatException {
+ boolean noFromU = false;
+ // Read converter data from file
+ UConverterStaticData staticData = new UConverterStaticData();
+ UConverterDataReader reader = null;
+ try {
+ String resourceName = classPath + "/" + myName + "." + UConverterSharedData.DATA_TYPE;
+ InputStream i;
+
+ if (loader != null) {
+ i = ICUData.getRequiredStream(loader, resourceName);
+ } else {
+ i = ICUData.getRequiredStream(resourceName);
+ }
+ BufferedInputStream b = new BufferedInputStream(i, UConverterConstants.CNV_DATA_BUFFER_SIZE);
+ reader = new UConverterDataReader(b);
+ reader.readStaticData(staticData);
+ } catch (IOException e) {
+ throw new InvalidFormatException();
+ } catch (Exception e) {
+ throw new InvalidFormatException();
+ }
+
+ UConverterSharedData data = null;
+ int type = staticData.conversionType;
+
+ if (type != UConverterSharedData.UConverterType.MBCS
+ || staticData.structSize != UConverterStaticData.SIZE_OF_UCONVERTER_STATIC_DATA) {
+ throw new InvalidFormatException();
+ }
+
+ data = new UConverterSharedData(1, null, false, 0);
+ data.dataReader = reader;
+ data.staticData = staticData;
+ data.sharedDataCached = false;
+
+ // Load data
+ UConverterMBCSTable mbcsTable = data.mbcs;
+ MBCSHeader header = new MBCSHeader();
+ try {
+ reader.readMBCSHeader(header);
+ } catch (IOException e) {
+ throw new InvalidFormatException();
+ }
+
+ int offset;
+ // int[] extIndexesArray = null;
+ String baseNameString = null;
+ int[][] stateTableArray = null;
+ MBCSToUFallback[] toUFallbacksArray = null;
+ char[] unicodeCodeUnitsArray = null;
+ char[] fromUnicodeTableArray = null;
+ byte[] fromUnicodeBytesArray = null;
+
+ if (header.version[0] == 5 && header.version[1] >= 3 && (header.options & MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK) == 0) {
+ noFromU = ((header.options & MBCS_OPT_NO_FROM_U) != 0);
+ } else if (header.version[0] != 4) {
+ throw new InvalidFormatException();
+ }
+
+ mbcsTable.outputType = (byte) header.flags;
+
+ /* extension data, header version 4.2 and higher */
+ offset = header.flags >>> 8;
+ // if(offset!=0 && mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
+ if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
+ try {
+ baseNameString = reader.readBaseTableName();
+ if (offset != 0) {
+ // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
+ // terminator byte all already read;
+ mbcsTable.extIndexes = reader.readExtIndexes(offset
+ - (reader.bytesRead - reader.staticDataBytesRead));
+ }
+ } catch (IOException e) {
+ throw new InvalidFormatException();
+ }
+ }
+
+ // agljport:add this would be unnecessary if extIndexes were memory mapped
+ /*
+ * if(mbcsTable.extIndexes != null) {
+ *
+ * try { //int nbytes = mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_LENGTH]*4 +
+ * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_UCHARS_LENGTH]*2 +
+ * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_LENGTH]*6 +
+ * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_BYTES_LENGTH] +
+ * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_12_LENGTH]*2 +
+ * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3_LENGTH]*2 +
+ * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3B_LENGTH]*4; //int nbytes =
+ * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_SIZE] //byte[] extTables = dataReader.readExtTables(nbytes);
+ * //mbcsTable.extTables = ByteBuffer.wrap(extTables); } catch(IOException e) { System.err.println("Caught
+ * IOException: " + e.getMessage()); pErrorCode[0] = UErrorCode.U_INVALID_FORMAT_ERROR; return; } }
+ */
+ if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
+ UConverterSharedData baseSharedData = null;
+ ByteBuffer extIndexes;
+ String baseName;
+
+ /* extension-only file, load the base table and set values appropriately */
+ extIndexes = mbcsTable.extIndexes;
+ if (extIndexes == null) {
+ /* extension-only file without extension */
+ throw new InvalidFormatException();
+ }
+
+ if (nestedLoads != 1) {
+ /* an extension table must not be loaded as a base table */
+ throw new InvalidFormatException();
+ }
+
+ /* load the base table */
+ baseName = baseNameString;
+ if (baseName.equals(staticData.name)) {
+ /* forbid loading this same extension-only file */
+ throw new InvalidFormatException();
+ }
+
+ // agljport:fix args.size=sizeof(UConverterLoadArgs);
+ baseSharedData = loadConverter(2, baseName, classPath, loader);
+
+ if (baseSharedData.staticData.conversionType != UConverterType.MBCS
+ || baseSharedData.mbcs.baseSharedData != null) {
+ // agljport:fix ucnv_unload(baseSharedData);
+ throw new InvalidFormatException();
+ }
+
+ /* copy the base table data */
+ // agljport:comment deep copy in C changes mbcs through local reference mbcsTable; in java we probably don't
+ // need the deep copy so can just make sure mbcs and its local reference both refer to the same new object
+ mbcsTable = data.mbcs = baseSharedData.mbcs;
+
+ /* overwrite values with relevant ones for the extension converter */
+ mbcsTable.baseSharedData = baseSharedData;
+ mbcsTable.extIndexes = extIndexes;
+
+ /*
+ * It would be possible to share the swapLFNL data with a base converter, but the generated name would have
+ * to be different, and the memory would have to be free'd only once. It is easier to just create the data
+ * for the extension converter separately when it is requested.
+ */
+ mbcsTable.swapLFNLStateTable = null;
+ mbcsTable.swapLFNLFromUnicodeBytes = null;
+ mbcsTable.swapLFNLName = null;
+
+ /*
+ * Set a special, runtime-only outputType if the extension converter is a DBCS version of a base converter
+ * that also maps single bytes.
+ */
+ if (staticData.conversionType == UConverterType.DBCS
+ || (staticData.conversionType == UConverterType.MBCS && staticData.minBytesPerChar >= 2)) {
+
+ if (baseSharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) {
+ /* the base converter is SI/SO-stateful */
+ int entry;
+
+ /* get the dbcs state from the state table entry for SO=0x0e */
+ entry = mbcsTable.stateTable[0][0xe];
+ if (MBCS_ENTRY_IS_FINAL(entry) && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_CHANGE_ONLY
+ && MBCS_ENTRY_FINAL_STATE(entry) != 0) {
+ mbcsTable.dbcsOnlyState = (byte) MBCS_ENTRY_FINAL_STATE(entry);
+
+ mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY;
+ }
+ } else if (baseSharedData.staticData.conversionType == UConverterType.MBCS
+ && baseSharedData.staticData.minBytesPerChar == 1
+ && baseSharedData.staticData.maxBytesPerChar == 2 && mbcsTable.countStates <= 127) {
+
+ /* non-stateful base converter, need to modify the state table */
+ int newStateTable[][/* 256 */];
+ int state[]; // this works because java 2-D array is array of references and we can have state =
+ // newStateTable[i];
+ int i, count;
+
+ /* allocate a new state table and copy the base state table contents */
+ count = mbcsTable.countStates;
+ newStateTable = new int[(count + 1) * 1024][256];
+
+ for (i = 0; i < mbcsTable.stateTable.length; ++i)
+ System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0,
+ mbcsTable.stateTable[i].length);
+
+ /* change all final single-byte entries to go to a new all-illegal state */
+ state = newStateTable[0];
+ for (i = 0; i < 256; ++i) {
+ if (MBCS_ENTRY_IS_FINAL(state[i])) {
+ state[i] = MBCS_ENTRY_TRANSITION(count, 0);
+ }
+ }
+
+ /* build the new all-illegal state */
+ state = newStateTable[count];
+ for (i = 0; i < 256; ++i) {
+ state[i] = MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
+ }
+ mbcsTable.stateTable = newStateTable;
+ mbcsTable.countStates = (byte) (count + 1);
+ mbcsTable.stateTableOwned = true;
+
+ mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY;
+ }
+ }
+
+ /*
+ * unlike below for files with base tables, do not get the unicodeMask from the sharedData; instead, use the
+ * base table's unicodeMask, which we copied in the memcpy above; this is necessary because the static data
+ * unicodeMask, especially the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
+ */
+ } else {
+ /* conversion file with a base table; an additional extension table is optional */
+ /* make sure that the output type is known */
+ switch (mbcsTable.outputType) {
+ case MBCS_OUTPUT_1:
+ case MBCS_OUTPUT_2:
+ case MBCS_OUTPUT_3:
+ case MBCS_OUTPUT_4:
+ case MBCS_OUTPUT_3_EUC:
+ case MBCS_OUTPUT_4_EUC:
+ case MBCS_OUTPUT_2_SISO:
+ /* OK */
+ break;
+ default:
+ throw new InvalidFormatException();
+ }
+
+ stateTableArray = new int[header.countStates][256];
+ toUFallbacksArray = new MBCSToUFallback[header.countToUFallbacks];
+ for (int i = 0; i < toUFallbacksArray.length; ++i)
+ toUFallbacksArray[i] = new MBCSToUFallback();
+ unicodeCodeUnitsArray = new char[(header.offsetFromUTable - header.offsetToUCodeUnits) / 2];
+ fromUnicodeTableArray = new char[(header.offsetFromUBytes - header.offsetFromUTable) / 2];
+ fromUnicodeBytesArray = new byte[header.fromUBytesLength];
+ try {
+ reader.readMBCSTable(stateTableArray, toUFallbacksArray, unicodeCodeUnitsArray, fromUnicodeTableArray,
+ fromUnicodeBytesArray);
+ } catch (IOException e) {
+ throw new InvalidFormatException();
+ }
+
+ mbcsTable.countStates = (byte) header.countStates;
+ mbcsTable.countToUFallbacks = header.countToUFallbacks;
+ mbcsTable.stateTable = stateTableArray;
+ mbcsTable.toUFallbacks = toUFallbacksArray;
+ mbcsTable.unicodeCodeUnits = unicodeCodeUnitsArray;
+
+ mbcsTable.fromUnicodeTable = fromUnicodeTableArray;
+ mbcsTable.fromUnicodeBytes = fromUnicodeBytesArray;
+ mbcsTable.fromUBytesLength = header.fromUBytesLength;
+
+ /*
+ * converter versions 6.1 and up contain a unicodeMask that is used here to select the most efficient
+ * function implementations
+ */
+ // agljport:fix info.size=sizeof(UDataInfo);
+ // agljport:fix udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
+ // agljport:fix if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
+ /* mask off possible future extensions to be safe */
+ mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3);
+ // agljport:fix } else {
+ /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
+ // agljport:fix mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES;
+ // agljport:fix }
+ if (offset != 0) {
+ try {
+ // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
+ // terminator byte all already read;
+ // int namelen = baseNameString != null? baseNameString.length() + 1: 0;
+ mbcsTable.extIndexes = reader.readExtIndexes(offset
+ - (reader.bytesRead - reader.staticDataBytesRead));
+ } catch (IOException e) {
+ throw new InvalidFormatException();
+ }
+ }
+
+ if (header.version[1] >= 3 && (mbcsTable.unicodeMask & UConverterConstants.HAS_SURROGATES) == 0 &&
+ (mbcsTable.countStates == 1 ? ((char)header.version[2] >= (SBCS_FAST_MAX>>8)) : ((char)header.version[2] >= (MBCS_FAST_MAX>>8)))) {
+ mbcsTable.utf8Friendly = true;
+
+ if (mbcsTable.countStates == 1) {
+ /*
+ * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
+ * Build a table with indexes to each block, to be used instaed of
+ * the regular stage 1/2 table.
+ */
+ for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) {
+ mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
+ }
+ /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header.version[2]>(SBCS_FAST_MAX>>8) */
+ mbcsTable.maxFastUChar = SBCS_FAST_MAX;
+ } else {
+ /*
+ * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
+ * The .cnv file is prebuilt with an additional stage table with indexes to each block.
+ */
+ if (noFromU) {
+ mbcsTable.mbcsIndex = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer();
+ }
+ mbcsTable.maxFastUChar = (char)((header.version[2]<<8) | 0xff);
+ }
+ }
+ /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
+ {
+ long asciiRoundtrips = 0xffffffff;
+ for (int i = 0; i < 0x80; ++i) {
+ if (mbcsTable.stateTable[0][i] != MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
+ asciiRoundtrips&=~((long)1<<(i>>2))&UConverterConstants.UNSIGNED_INT_MASK;
+ }
+ }
+ mbcsTable.asciiRoundtrips = asciiRoundtrips&UConverterConstants.UNSIGNED_INT_MASK;
+ }
+
+ if (noFromU) {
+ int stage1Length = (mbcsTable.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) != 0 ? 0x440 : 0x40;
+ int stage2Length = (header.offsetFromUBytes - header.offsetFromUTable)/4 - stage1Length/2;
+ reconstituteData(mbcsTable, stage1Length, stage2Length, header.fullStage2Length);
+ }
+ if (mbcsTable.outputType == MBCS_OUTPUT_DBCS_ONLY || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) {
+ /*
+ * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.
+ * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.
+ */
+ mbcsTable.asciiRoundtrips = 0;
+ }
+ }
+ return data;
+ }
+
+ private static boolean writeStage3Roundtrip(UConverterMBCSTable mbcsTable, long value, int codePoints[]) {
+ char[] table;
+ byte[] bytes;
+ int stage2;
+ int p;
+ int c;
+ int i, st3;
+ long temp;
+
+ table = mbcsTable.fromUnicodeTable;
+ bytes = mbcsTable.fromUnicodeBytes;
+
+ /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
+ switch(mbcsTable.outputType) {
+ case MBCS_OUTPUT_3_EUC:
+ if(value<=0xffff) {
+ /* short sequences are stored directly */
+ /* code set 0 or 1 */
+ } else if(value<=0x8effff) {
+ /* code set 2 */
+ value&=0x7fff;
+ } else /* first byte is 0x8f */ {
+ /* code set 3 */
+ value&=0xff7f;
+ }
+ break;
+ case MBCS_OUTPUT_4_EUC:
+ if(value<=0xffffff) {
+ /* short sequences are stored directly */
+ /* code set 0 or 1 */
+ } else if(value<=0x8effffff) {
+ /* code set 2 */
+ value&=0x7fffff;
+ } else /* first byte is 0x8f */ {
+ /* code set 3 */
+ value&=0xff7fff;
+ }
+ break;
+ default:
+ break;
+ }
+
+ for(i=0; i<=0x1f; ++value, ++i) {
+ c=codePoints[i];
+ if(c<0) {
+ continue;
+ }
+
+ /* locate the stage 2 & 3 data */
+ stage2 = table[c>>10] + ((c>>4)&0x3f);
+ st3 = table[stage2*2]<<16|table[stage2*2 + 1];
+ st3 = (int)(char)(st3 * 16 + (c&0xf));
+
+ /* write the codepage bytes into stage 3 */
+ switch(mbcsTable.outputType) {
+ case MBCS_OUTPUT_3:
+ case MBCS_OUTPUT_4_EUC:
+ p = st3*3;
+ bytes[p] = (byte)(value>>16);
+ bytes[p+1] = (byte)(value>>8);
+ bytes[p+2] = (byte)value;
+ break;
+ case MBCS_OUTPUT_4:
+ bytes[st3*4] = (byte)(value >> 24);
+ bytes[st3*4 + 1] = (byte)(value >> 16);
+ bytes[st3*4 + 2] = (byte)(value >> 8);
+ bytes[st3*4 + 3] = (byte)value;
+ break;
+ default:
+ /* 2 bytes per character */
+ bytes[st3*2] = (byte)(value >> 8);
+ bytes[st3*2 + 1] = (byte)value;
+ break;
+ }
+
+ /* set the roundtrip flag */
+ temp = (1L<<(16+(c&0xf)));
+ table[stage2*2] |= (char)(temp>>16);
+ table[stage2*2 + 1] |= (char)temp;
+ }
+ return true;
+ }
+
+ private static void reconstituteData(UConverterMBCSTable mbcsTable, int stage1Length, int stage2Length, int fullStage2Length) {
+ int datalength = stage1Length*2+fullStage2Length*4+mbcsTable.fromUBytesLength;
+ int offset = 0;
+ byte[] stage = new byte[datalength];
+
+ for (int i = 0; i < stage1Length; ++i) {
+ stage[i*2] = (byte)(mbcsTable.fromUnicodeTable[i]>>8);
+ stage[i*2+1] = (byte)(mbcsTable.fromUnicodeTable[i]);
+ }
+
+ offset = ((fullStage2Length - stage2Length) * 4) + (stage1Length * 2);
+ for (int i = 0; i < stage2Length; ++i) {
+ stage[offset + i*4] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]>>8);
+ stage[offset + i*4+1] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]);
+ stage[offset + i*4+2] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]>>8);
+ stage[offset + i*4+3] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]);
+ }
+
+ /* indexes into stage 2 count from the bottom of the fromUnicodeTable */
+
+ /* reconsitute the initial part of stage 2 from the mbcsIndex */
+ {
+ int stageUTF8Length=(mbcsTable.maxFastUChar+1)>>6;
+ int stageUTF8Index=0;
+ int st1, st2, st3, i;
+
+ for (st1 = 0; stageUTF8Index < stageUTF8Length; ++st1) {
+ st2 = ((char)stage[2*st1]<<8) | stage[2*st1+1];
+ if (st2 != stage1Length/2) {
+ /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
+ for (i = 0; i < 16; ++i) {
+ st3 = mbcsTable.mbcsIndex.get(stageUTF8Index++);
+ if (st3 != 0) {
+ /* a stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
+ st3>>=4;
+ /*
+ * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
+ * allocated together as a single 64-block for access from the mbcsIndex
+ */
+ stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;
+ stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;
+ stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;
+ stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3);
+ } else {
+ /* no stage 3 block, skip */
+ st2+=4;
+ }
+ }
+ } else {
+ /* no stage 2 block, skip */
+ stageUTF8Index+=16;
+ }
+ }
+ }
+
+ char[] stage1 = new char[stage.length/2];
+ for (int i = 0; i < stage1.length; ++i) {
+ stage1[i] = (char)(((stage[i*2])<<8)|(stage[i*2+1] & UConverterConstants.UNSIGNED_BYTE_MASK));
+ }
+ byte[] stage2 = new byte[stage.length - ((stage1Length * 2) + (fullStage2Length * 4))];
+ System.arraycopy(stage, ((stage1Length * 2) + (fullStage2Length * 4)), stage2, 0, stage2.length);
+
+ mbcsTable.fromUnicodeTable = stage1;
+ mbcsTable.fromUnicodeBytes = stage2;
+
+ /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
+ MBCSEnumToUnicode(mbcsTable);
+ }
+
+ /*
+ * Internal function enumerating the toUnicode data of an MBCS converter.
+ * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
+ * table, but could also be used for a future getUnicodeSet() option
+ * that includes reverse fallbacks (after updating this function's implementation).
+ * Currently only handles roundtrip mappings.
+ * Does not currently handle extensions.
+ */
+ private static void MBCSEnumToUnicode(UConverterMBCSTable mbcsTable) {
+ /*
+ * Properties for each state, to speed up the enumeration.
+ * Ignorable actions are unassigned/illegal/state-change-only:
+ * They do not lead to mappings.
+ *
+ * Bits 7..6
+ * 1 direct/initial state (stateful converters have mulitple)
+ * 0 non-initial state with transitions or with nonignorable result actions
+ * -1 final state with only ignorable actions
+ *
+ * Bits 5..3
+ * The lowest byte value with non-ignorable actions is
+ * value<<5 (rounded down).
+ *
+ * Bits 2..0:
+ * The highest byte value with non-ignorable actions is
+ * (value<<5)&0x1f (rounded up).
+ */
+ byte stateProps[] = new byte[MBCS_MAX_STATE_COUNT];
+ int state;
+
+ /* recurse from state 0 and set all stateProps */
+ getStateProp(mbcsTable.stateTable, stateProps, 0);
+
+ for (state = 0; state < mbcsTable.countStates; ++state) {
+ if (stateProps[state] >= 0x40) {
+ /* start from each direct state */
+ enumToU(mbcsTable, stateProps, state, 0, 0);
+ }
+ }
+
+
+ }
+
+ private static boolean enumToU(UConverterMBCSTable mbcsTable, byte stateProps[], int state, int offset, int value) {
+ int[] codePoints = new int[32];
+ int[] row;
+ char[] unicodeCodeUnits;
+ int anyCodePoints;
+ int b, limit;
+
+ row = mbcsTable.stateTable[state];
+ unicodeCodeUnits = mbcsTable.unicodeCodeUnits;
+
+ value<<=8;
+ anyCodePoints = -1; /* becomes non-negative if there is a mapping */
+
+ b = (stateProps[state]&0x38)<<2;
+ if (b == 0 && stateProps[state] >= 0x40) {
+ /* skip byte sequences with leading zeros because they are note stored in the fromUnicode table */
+ codePoints[0] = UConverterConstants.U_SENTINEL;
+ b = 1;
+ }
+ limit = ((stateProps[state]&7)+1)<<5;
+ while (b < limit) {
+ int entry = row[b];
+ if (MBCS_ENTRY_IS_TRANSITION(entry)) {
+ int nextState = MBCS_ENTRY_TRANSITION_STATE(entry);
+ if (stateProps[nextState] >= 0) {
+ /* recurse to a state with non-ignorable actions */
+ if (!enumToU(mbcsTable, stateProps, nextState, offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), value|b)) {
+ return false;
+ }
+ }
+ codePoints[b&0x1f] = UConverterConstants.U_SENTINEL;
+ } else {
+ int c;
+ int action;
+
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action = MBCS_ENTRY_FINAL_ACTION(entry);
+ if (action == MBCS_STATE_VALID_DIRECT_16) {
+ /* output BMP code point */
+ c = MBCS_ENTRY_FINAL_VALUE_16(entry);
+ } else if (action == MBCS_STATE_VALID_16) {
+ int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c = unicodeCodeUnits[finalOffset];
+ if (c < 0xfffe) {
+ /* output BMP code point */
+ } else {
+ c = UConverterConstants.U_SENTINEL;
+ }
+ } else if (action == MBCS_STATE_VALID_16_PAIR) {
+ int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c = unicodeCodeUnits[finalOffset++];
+ if (c < 0xd800) {
+ /* output BMP code point below 0xd800 */
+ } else if (c <= 0xdbff) {
+ /* output roundtrip or fallback supplementary code point */
+ c = ((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
+ } else if (c == 0xe000) {
+ /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
+ c = unicodeCodeUnits[finalOffset];
+ } else {
+ c = UConverterConstants.U_SENTINEL;
+ }
+ } else if (action == MBCS_STATE_VALID_DIRECT_20) {
+ /* output supplementary code point */
+ c = MBCS_ENTRY_FINAL_VALUE(entry)+0x10000;
+ } else {
+ c = UConverterConstants.U_SENTINEL;
+ }
+
+ codePoints[b&0x1f] = c;
+ anyCodePoints&=c;
+ }
+ if (((++b)&0x1f) == 0) {
+ if(anyCodePoints>=0) {
+ if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20)&UConverterConstants.UNSIGNED_INT_MASK, codePoints)) {
+ return false;
+ }
+ anyCodePoints=-1;
+ }
+ }
+ }
+
+ return true;
+ }
+
+ /*
+ * Only called if stateProps[state]==-1.
+ * A recursive call may do stateProps[state]|=0x40 if this state is the target of an
+ * MBCS_STATE_CHANGE_ONLY.
+ */
+ private static byte getStateProp(int stateTable[][], byte stateProps[], int state) {
+ int[] row;
+ int min, max, entry, nextState;
+
+ row = stateTable[state];
+ stateProps[state] = 0;
+
+ /* find first non-ignorable state */
+ for (min = 0;;++min) {
+ entry = row[min];
+ nextState = MBCS_ENTRY_STATE(entry);
+ if (stateProps[nextState] == -1) {
+ getStateProp(stateTable, stateProps, nextState);
+ }
+ if (MBCS_ENTRY_IS_TRANSITION(entry)) {
+ if (stateProps[nextState] >- 0) {
+ break;
+ }
+ } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) {
+ break;
+ }
+ if (min == 0xff) {
+ stateProps[state] = -0x40; /* (byte)0xc0 */
+ return stateProps[state];
+ }
+ }
+ stateProps[state]|=(byte)((min>>5)<<3);
+
+ /* find last non-ignorable state */
+ for (max = 0xff; min < max; --max) {
+ entry = row[max];
+ nextState = MBCS_ENTRY_STATE(entry);
+ if (stateProps[nextState] == -1) {
+ getStateProp(stateTable, stateProps, nextState);
+ }
+ if (MBCS_ENTRY_IS_TRANSITION(entry)) {
+ if (stateProps[nextState] >- 0) {
+ break;
+ }
+ } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) {
+ break;
+ }
+ }
+ stateProps[state]|=(byte)(max>>5);
+
+ /* recurse further and collect direct-state information */
+ while (min <= max) {
+ entry = row[min];
+ nextState = MBCS_ENTRY_STATE(entry);
+ if (stateProps[nextState] == -1) {
+ getStateProp(stateTable, stateProps, nextState);
+ }
+ if (MBCS_ENTRY_IS_TRANSITION(entry)) {
+ stateProps[nextState]|=0x40;
+ if (MBCS_ENTRY_FINAL_ACTION(entry) <= MBCS_STATE_FALLBACK_DIRECT_20) {
+ stateProps[state]|=0x40;
+ }
+ }
+ ++min;
+ }
+ return stateProps[state];
+ }
+
+ protected void initializeConverter(int myOptions) {
+ UConverterMBCSTable mbcsTable;
+ ByteBuffer extIndexes;
+ short outputType;
+ byte maxBytesPerUChar;
+
+ mbcsTable = sharedData.mbcs;
+ outputType = mbcsTable.outputType;
+
+ if (outputType == MBCS_OUTPUT_DBCS_ONLY) {
+ /* the swaplfnl option does not apply, remove it */
+ this.options = myOptions &= ~UConverterConstants.OPTION_SWAP_LFNL;
+ }
+
+ if ((myOptions & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
+ /* do this because double-checked locking is broken */
+ boolean isCached;
+
+ // agljport:todo umtx_lock(NULL);
+ isCached = mbcsTable.swapLFNLStateTable != null;
+ // agljport:todo umtx_unlock(NULL);
+
+ if (!isCached) {
+ try {
+ if (!EBCDICSwapLFNL()) {
+ /* this option does not apply, remove it */
+ this.options = myOptions &= ~UConverterConstants.OPTION_SWAP_LFNL;
+ }
+ } catch (Exception e) {
+ /* something went wrong. */
+ return;
+ }
+ }
+ }
+
+ if (icuCanonicalName.toLowerCase().indexOf("gb18030") >= 0) {
+ /* set a flag for GB 18030 mode, which changes the callback behavior */
+ this.options |= MBCS_OPTION_GB18030;
+ } else if (icuCanonicalName.toLowerCase().indexOf("keis") >= 0) {
+ this.options |= MBCS_OPTION_KEIS;
+ } else if (icuCanonicalName.toLowerCase().indexOf("jef") >= 0) {
+ this.options |= MBCS_OPTION_JEF;
+ } else if (icuCanonicalName.toLowerCase().indexOf("jips") >= 0) {
+ this.options |= MBCS_OPTION_JIPS;
+ }
+
+ /* fix maxBytesPerUChar depending on outputType and options etc. */
+ if (outputType == MBCS_OUTPUT_2_SISO) {
+ maxBytesPerChar = 3; /* SO+DBCS */
+ }
+
+ extIndexes = mbcsTable.extIndexes;
+ if (extIndexes != null) {
+ maxBytesPerUChar = (byte) GET_MAX_BYTES_PER_UCHAR(extIndexes);
+ if (outputType == MBCS_OUTPUT_2_SISO) {
+ ++maxBytesPerUChar; /* SO + multiple DBCS */
+ }
+
+ if (maxBytesPerUChar > maxBytesPerChar) {
+ maxBytesPerChar = maxBytesPerUChar;
+ }
+ }
+ }
+ /* EBCDIC swap LF<->NL--------------------------------------------------------------------------------*/
+ /*
+ * This code modifies a standard EBCDIC<->Unicode mappling table for
+ * OS/390 (z/OS) Unix System Services (Open Edition).
+ * The difference is in the mapping of Line Feed and New Line control codes:
+ * Standard EBDIC maps
+ *
+ * \x25 |0
+ * \x15 |0
+ *
+ * but OS/390 USS EBCDIC swaps the control codes for LF and NL,
+ * mapping
+ *
+ * \x15 |0
+ * \x25 |0
+ *
+ * This code modifies a loaded standard EBCDIC<->Unicode mapping table
+ * by copying it into allocated memory and swapping the LF and NL values.
+ * It allows to support the same EBCDIC charset in both version without
+ * duplicating the entire installed table.
+ */
+ /* standard EBCDIC codes */
+ private static final short EBCDIC_LF = 0x0025;
+ private static final short EBCDIC_NL = 0x0015;
+
+ /* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */
+ private static final short EBCDIC_RT_LF = 0x0f25;
+ private static final short EBCDIC_RT_NL = 0x0f15;
+
+ /* Unicode code points */
+ private static final short U_LF = 0x000A;
+ private static final short U_NL = 0x0085;
+
+ private boolean EBCDICSwapLFNL() throws Exception {
+ UConverterMBCSTable mbcsTable;
+
+ char[] table;
+ byte[] results;
+ byte[] bytes;
+
+ int[][] newStateTable;
+ byte[] newResults;
+ String newName;
+
+ int stage2Entry;
+// int size;
+ int sizeofFromUBytes;
+
+ mbcsTable = sharedData.mbcs;
+
+ table = mbcsTable.fromUnicodeTable;
+ bytes = mbcsTable.fromUnicodeBytes;
+ results = bytes;
+
+ /*
+ * Check that this is an EBCDIC table with SBCS portion -
+ * SBCS or EBCDIC with standard EBCDIC LF and NL mappings.
+ *
+ * If not, ignore the option Options are always ignored if they do not apply.
+ */
+ if (!((mbcsTable.outputType == MBCS_OUTPUT_1 || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) &&
+ mbcsTable.stateTable[0][EBCDIC_LF] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
+ mbcsTable.stateTable[0][EBCDIC_NL] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL))) {
+ return false;
+ }
+
+ if (mbcsTable.outputType == MBCS_OUTPUT_1) {
+ if (!(EBCDIC_RT_LF == MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) &&
+ EBCDIC_RT_NL == MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL))) {
+ return false;
+ }
+ } else /* MBCS_OUTPUT_2_SISO */ {
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF);
+ if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF) &&
+ EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF))) {
+ return false;
+ }
+
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL);
+ if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL) &&
+ EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL))) {
+ return false;
+ }
+ }
+
+ if (mbcsTable.fromUBytesLength > 0) {
+ /*
+ * We _know_ the number of bytes in the fromUnicodeBytes array
+ * starting with header.version 4.1.
+ */
+ sizeofFromUBytes = mbcsTable.fromUBytesLength;
+ } else {
+ /*
+ * Otherwise:
+ * There used to be code to enumerate the fromUnicode
+ * trie and find the highest entry, but it was removed in ICU 3.2
+ * because it was not tested and caused a low code coverage number.
+ */
+ throw new Exception("U_INVALID_FORMAT_ERROR");
+ }
+
+ /*
+ * The table has an appropriate format.
+ * Allocate and build
+ * - a modified to-Unicode state table
+ * - a modified from-Unicode output array
+ * - a converter name string with the swap option appended
+ */
+// size = mbcsTable.countStates * 1024 + sizeofFromUBytes + UConverterConstants.MAX_CONVERTER_NAME_LENGTH + 20;
+
+ /* copy and modify the to-Unicode state table */
+ newStateTable = new int[mbcsTable.stateTable.length][mbcsTable.stateTable[0].length];
+ for (int i = 0; i < newStateTable.length; i++) {
+ System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0, newStateTable[i].length);
+ }
+
+ newStateTable[0][EBCDIC_LF] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL);
+ newStateTable[0][EBCDIC_NL] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
+
+ /* copy and modify the from-Unicode result table */
+ newResults = new byte[sizeofFromUBytes];
+ System.arraycopy(bytes, 0, newResults, 0, sizeofFromUBytes);
+ /* conveniently, the table access macros work on the left side of expressions */
+ if (mbcsTable.outputType == MBCS_OUTPUT_1) {
+ MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_LF, EBCDIC_RT_NL);
+ MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_NL, EBCDIC_RT_LF);
+ } else /* MBCS_OUTPUT_2_SISO */ {
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF);
+ MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_LF, EBCDIC_NL);
+
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL);
+ MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_NL, EBCDIC_LF);
+ }
+
+ /* set the canonical converter name */
+ newName = new String(icuCanonicalName);
+ newName.concat(UConverterConstants.OPTION_SWAP_LFNL_STRING);
+
+ if (mbcsTable.swapLFNLStateTable == null) {
+ mbcsTable.swapLFNLStateTable = newStateTable;
+ mbcsTable.swapLFNLFromUnicodeBytes = newResults;
+ mbcsTable.swapLFNLName = newName;
+ }
+ return true;
+ }
+
+ /**
+ * MBCS output types for conversions from Unicode. These per-converter types determine the storage method in stage 3
+ * of the lookup table, mostly how many bytes are stored per entry.
+ */
+ static final int MBCS_OUTPUT_1 = 0; /* 0 */
+ static final int MBCS_OUTPUT_2 = MBCS_OUTPUT_1 + 1; /* 1 */
+ static final int MBCS_OUTPUT_3 = MBCS_OUTPUT_2 + 1; /* 2 */
+ static final int MBCS_OUTPUT_4 = MBCS_OUTPUT_3 + 1; /* 3 */
+ static final int MBCS_OUTPUT_3_EUC = 8; /* 8 */
+ static final int MBCS_OUTPUT_4_EUC = MBCS_OUTPUT_3_EUC + 1; /* 9 */
+ static final int MBCS_OUTPUT_2_SISO = 12; /* c */
+ static final int MBCS_OUTPUT_2_HZ = MBCS_OUTPUT_2_SISO + 1; /* d */
+ static final int MBCS_OUTPUT_EXT_ONLY = MBCS_OUTPUT_2_HZ + 1; /* e */
+ // static final int MBCS_OUTPUT_COUNT = MBCS_OUTPUT_EXT_ONLY + 1;
+ static final int MBCS_OUTPUT_DBCS_ONLY = 0xdb; /* runtime-only type for DBCS-only handling of SISO tables */
+
+ /* GB 18030 data ------------------------------------------------------------ */
+
+ /* helper macros for linear values for GB 18030 four-byte sequences */
+ private static long LINEAR_18030(long a, long b, long c, long d) {
+ return ((((a & 0xff) * 10 + (b & 0xff)) * 126L + (c & 0xff)) * 10L + (d & 0xff));
+ }
+
+ private static long LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30);
+
+ private static long LINEAR(long x) {
+ return LINEAR_18030(x >>> 24, (x >>> 16) & 0xff, (x >>> 8) & 0xff, x & 0xff);
+ }
+
+ /*
+ * Some ranges of GB 18030 where both the Unicode code points and the GB four-byte sequences are contiguous and are
+ * handled algorithmically by the special callback functions below. The values are start & end of Unicode & GB
+ * codes.
+ *
+ * Note that single surrogates are not mapped by GB 18030 as of the re-released mapping tables from 2000-nov-30.
+ */
+ private static final long gb18030Ranges[][] = new long[/* 14 */][/* 4 */] {
+ { 0x10000L, 0x10FFFFL, LINEAR(0x90308130L), LINEAR(0xE3329A35L) },
+ { 0x9FA6L, 0xD7FFL, LINEAR(0x82358F33L), LINEAR(0x8336C738L) },
+ { 0x0452L, 0x1E3EL, LINEAR(0x8130D330L), LINEAR(0x8135F436L) },
+ { 0x1E40L, 0x200FL, LINEAR(0x8135F438L), LINEAR(0x8136A531L) },
+ { 0xE865L, 0xF92BL, LINEAR(0x8336D030L), LINEAR(0x84308534L) },
+ { 0x2643L, 0x2E80L, LINEAR(0x8137A839L), LINEAR(0x8138FD38L) },
+ { 0xFA2AL, 0xFE2FL, LINEAR(0x84309C38L), LINEAR(0x84318537L) },
+ { 0x3CE1L, 0x4055L, LINEAR(0x8231D438L), LINEAR(0x8232AF32L) },
+ { 0x361BL, 0x3917L, LINEAR(0x8230A633L), LINEAR(0x8230F237L) },
+ { 0x49B8L, 0x4C76L, LINEAR(0x8234A131L), LINEAR(0x8234E733L) },
+ { 0x4160L, 0x4336L, LINEAR(0x8232C937L), LINEAR(0x8232F837L) },
+ { 0x478EL, 0x4946L, LINEAR(0x8233E838L), LINEAR(0x82349638L) },
+ { 0x44D7L, 0x464BL, LINEAR(0x8233A339L), LINEAR(0x8233C931L) },
+ { 0xFFE6L, 0xFFFFL, LINEAR(0x8431A234L), LINEAR(0x8431A439L) } };
+
+ /* bit flag for UConverter.options indicating GB 18030 special handling */
+ private static final int MBCS_OPTION_GB18030 = 0x8000;
+
+ /* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
+ private static final int MBCS_OPTION_KEIS = 0x01000;
+ private static final int MBCS_OPTION_JEF = 0x02000;
+ private static final int MBCS_OPTION_JIPS = 0x04000;
+
+ private static enum SISO_Option {
+ SI,
+ SO
+ }
+
+ private static final byte[] KEIS_SO_CHAR = { 0x0A, 0x42 };
+ private static final byte[] KEIS_SI_CHAR = { 0x0A, 0x41 };
+ private static final byte JEF_SO_CHAR = 0x28;
+ private static final byte JEF_SI_CHAR = 0x29;
+ private static final byte[] JIPS_SO_CHAR = { 0x1A, 0x70 };
+ private static final byte[] JIPS_SI_CHAR = { 0x1A, 0x71 };
+
+ private static int getSISOBytes(SISO_Option option, int cnvOption, byte[] value) {
+ int SISOLength = 0;
+
+ switch (option) {
+ case SI:
+ if ((cnvOption&MBCS_OPTION_KEIS)!=0) {
+ value[0] = KEIS_SI_CHAR[0];
+ value[1] = KEIS_SI_CHAR[1];
+ SISOLength = 2;
+ } else if ((cnvOption&MBCS_OPTION_JEF)!=0) {
+ value[0] = JEF_SI_CHAR;
+ SISOLength = 1;
+ } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) {
+ value[0] = JIPS_SI_CHAR[0];
+ value[1] = JIPS_SI_CHAR[1];
+ SISOLength = 2;
+ } else {
+ value[0] = UConverterConstants.SI;
+ SISOLength = 1;
+ }
+ break;
+ case SO:
+ if ((cnvOption&MBCS_OPTION_KEIS)!=0) {
+ value[0] = KEIS_SO_CHAR[0];
+ value[1] = KEIS_SO_CHAR[1];
+ SISOLength = 2;
+ } else if ((cnvOption&MBCS_OPTION_JEF)!=0) {
+ value[0] = JEF_SO_CHAR;
+ SISOLength = 1;
+ } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) {
+ value[0] = JIPS_SO_CHAR[0];
+ value[1] = JIPS_SO_CHAR[1];
+ SISOLength = 2;
+ } else {
+ value[0] = UConverterConstants.SO;
+ SISOLength = 1;
+ }
+ break;
+ default:
+ /* Should never happen. */
+ break;
+ }
+
+ return SISOLength;
+ }
+ // enum {
+ static final int MBCS_MAX_STATE_COUNT = 128;
+ // };
+ /**
+ * MBCS action codes for conversions to Unicode. These values are in bits 23..20 of the state table entries.
+ */
+ static final int MBCS_STATE_VALID_DIRECT_16 = 0;
+ static final int MBCS_STATE_VALID_DIRECT_20 = MBCS_STATE_VALID_DIRECT_16 + 1;
+ static final int MBCS_STATE_FALLBACK_DIRECT_16 = MBCS_STATE_VALID_DIRECT_20 + 1;
+ static final int MBCS_STATE_FALLBACK_DIRECT_20 = MBCS_STATE_FALLBACK_DIRECT_16 + 1;
+ static final int MBCS_STATE_VALID_16 = MBCS_STATE_FALLBACK_DIRECT_20 + 1;
+ static final int MBCS_STATE_VALID_16_PAIR = MBCS_STATE_VALID_16 + 1;
+ static final int MBCS_STATE_UNASSIGNED = MBCS_STATE_VALID_16_PAIR + 1;
+ static final int MBCS_STATE_ILLEGAL = MBCS_STATE_UNASSIGNED + 1;
+ static final int MBCS_STATE_CHANGE_ONLY = MBCS_STATE_ILLEGAL + 1;
+
+ static int MBCS_ENTRY_SET_STATE(int entry, int state) {
+ return (entry&0x80ffffff)|(state<<24L);
+ }
+
+ static int MBCS_ENTRY_STATE(int entry) {
+ return (((entry)>>24)&0x7f);
+ }
+
+ /* Methods for state table entries */
+ static int MBCS_ENTRY_TRANSITION(int state, int offset) {
+ return (state << 24L) | offset;
+ }
+
+ static int MBCS_ENTRY_FINAL(int state, int action, int value) {
+ return 0x80000000 | (state << 24L) | (action << 20L) | value;
+ }
+
+ static boolean MBCS_ENTRY_IS_TRANSITION(int entry) {
+ return (entry) >= 0;
+ }
+
+ static boolean MBCS_ENTRY_IS_FINAL(int entry) {
+ return (entry) < 0;
+ }
+
+ static int MBCS_ENTRY_TRANSITION_STATE(int entry) {
+ return ((entry) >>> 24);
+ }
+
+ static int MBCS_ENTRY_TRANSITION_OFFSET(int entry) {
+ return ((entry) & 0xffffff);
+ }
+
+ static int MBCS_ENTRY_FINAL_STATE(int entry) {
+ return ((entry) >>> 24) & 0x7f;
+ }
+
+ static boolean MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry) {
+ return ((entry) < 0x80100000);
+ }
+
+ static int MBCS_ENTRY_FINAL_ACTION(int entry) {
+ return ((entry) >>> 20) & 0xf;
+ }
+
+ static int MBCS_ENTRY_FINAL_VALUE(int entry) {
+ return ((entry) & 0xfffff);
+ }
+
+ static char MBCS_ENTRY_FINAL_VALUE_16(int entry) {
+ return (char) (entry);
+ }
+
+ static boolean MBCS_IS_ASCII_ROUNDTRIP(int b, long asciiRoundtrips) {
+ return (((asciiRoundtrips) & (1<<((b)>>2)))!=0);
+ }
+
+ /**
+ * This macro version of _MBCSSingleSimpleGetNextUChar() gets a code point from a byte. It works for single-byte,
+ * single-state codepages that only map to and from BMP code points, and it always returns fallback values.
+ */
+ static char MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(UConverterMBCSTable mbcs, final int b) {
+ return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b & UConverterConstants.UNSIGNED_BYTE_MASK]);
+ }
+
+ /* single-byte fromUnicode: get the 16-bit result word */
+ static char MBCS_SINGLE_RESULT_FROM_U(char[] table, byte[] results, int c) {
+ int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
+ int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array
+ return (char) (((results[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (results[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));
+ }
+
+ /* single-byte fromUnicode: set the 16-bit result word with newValue*/
+ static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, byte[] results, int c, int newValue) {
+ int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
+ int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array
+ results[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ results[i + 1] = (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK);
+ }
+
+ /* multi-byte fromUnicode: get the 32-bit stage 2 entry */
+ static int MBCS_STAGE_2_FROM_U(char[] table, int c) {
+ int i = 2 * (table[(c) >>> 10] + ((c >>> 4) & 0x3f)); // 2x because used as index into char[] array treated as
+ // int[] array
+ return ((table[i] & UConverterConstants.UNSIGNED_SHORT_MASK) << 16)
+ | (table[i + 1] & UConverterConstants.UNSIGNED_SHORT_MASK);
+ }
+
+ private static boolean MBCS_FROM_U_IS_ROUNDTRIP(int stage2Entry, int c) {
+ return (((stage2Entry) & (1 << (16 + ((c) & 0xf)))) != 0);
+ }
+
+ static char MBCS_VALUE_2_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
+ int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));
+ return (char) (((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));
+ }
+
+ static void MBCS_VALUE_2_FROM_STAGE_2_SET(byte[] bytes, int stage2Entry, int c, int newValue) {
+ int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));
+ bytes[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ bytes[i + 1] = (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK);
+ }
+
+ private static int MBCS_VALUE_4_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
+ int i = 4 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));
+ return ((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 24)
+ | ((bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
+ | ((bytes[i + 2] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
+ | (bytes[i + 3] & UConverterConstants.UNSIGNED_BYTE_MASK);
+ }
+
+ static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
+ return ((16 * ((char) (stage2Entry) & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3);
+ }
+
+ // ------------UConverterExt-------------------------------------------------------
+
+ static final int EXT_INDEXES_LENGTH = 0; /* 0 */
+
+ static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */
+ static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1;
+ static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1;
+ static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1;
+
+ static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */
+ static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1;
+ static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1;
+ static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1;
+ static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1;
+
+ static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */
+ static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1;
+ static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1;
+ static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1;
+ static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1;
+ static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1;
+ static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1;
+
+ private static final int EXT_COUNT_BYTES = EXT_FROM_U_STAGE_3B_LENGTH + 1; /* 17 */
+ // private static final int EXT_COUNT_UCHARS = EXT_COUNT_BYTES + 1;
+ // private static final int EXT_FLAGS = EXT_COUNT_UCHARS + 1;
+ //
+ // private static final int EXT_RESERVED_INDEX = EXT_FLAGS + 1; /* 20, moves with additional indexes */
+ //
+ // private static final int EXT_SIZE=31;
+ // private static final int EXT_INDEXES_MIN_LENGTH=32;
+
+ static final int EXT_FROM_U_MAX_DIRECT_LENGTH = 3;
+
+ /* toUnicode helpers -------------------------------------------------------- */
+
+ private static final int TO_U_BYTE_SHIFT = 24;
+ private static final int TO_U_VALUE_MASK = 0xffffff;
+ private static final int TO_U_MIN_CODE_POINT = 0x1f0000;
+ private static final int TO_U_MAX_CODE_POINT = 0x2fffff;
+ private static final int TO_U_ROUNDTRIP_FLAG = (1 << 23);
+ private static final int TO_U_INDEX_MASK = 0x3ffff;
+ private static final int TO_U_LENGTH_SHIFT = 18;
+ private static final int TO_U_LENGTH_OFFSET = 12;
+
+ /* maximum number of indexed UChars */
+ static final int MAX_UCHARS = 19;
+
+ static int TO_U_GET_BYTE(int word) {
+ return word >>> TO_U_BYTE_SHIFT;
+ }
+
+ static int TO_U_GET_VALUE(int word) {
+ return word & TO_U_VALUE_MASK;
+ }
+
+ static boolean TO_U_IS_ROUNDTRIP(int value) {
+ return (value & TO_U_ROUNDTRIP_FLAG) != 0;
+ }
+
+ static boolean TO_U_IS_PARTIAL(int value) {
+ return (value & UConverterConstants.UNSIGNED_INT_MASK) < TO_U_MIN_CODE_POINT;
+ }
+
+ static int TO_U_GET_PARTIAL_INDEX(int value) {
+ return value;
+ }
+
+ static int TO_U_MASK_ROUNDTRIP(int value) {
+ return value & ~TO_U_ROUNDTRIP_FLAG;
+ }
+
+ private static int TO_U_MAKE_WORD(byte b, int value) {
+ return ((b & UConverterConstants.UNSIGNED_BYTE_MASK) << TO_U_BYTE_SHIFT) | value;
+ }
+
+ /* use after masking off the roundtrip flag */
+ static boolean TO_U_IS_CODE_POINT(int value) {
+ return (value & UConverterConstants.UNSIGNED_INT_MASK) <= TO_U_MAX_CODE_POINT;
+ }
+
+ static int TO_U_GET_CODE_POINT(int value) {
+ return (int) ((value & UConverterConstants.UNSIGNED_INT_MASK) - TO_U_MIN_CODE_POINT);
+ }
+
+ private static int TO_U_GET_INDEX(int value) {
+ return value & TO_U_INDEX_MASK;
+ }
+
+ private static int TO_U_GET_LENGTH(int value) {
+ return (value >>> TO_U_LENGTH_SHIFT) - TO_U_LENGTH_OFFSET;
+ }
+
+ /* fromUnicode helpers ------------------------------------------------------ */
+
+ /* most trie constants are shared with ucnvmbcs.h */
+ private static final int STAGE_2_LEFT_SHIFT = 2;
+
+ // private static final int STAGE_3_GRANULARITY = 4;
+
+ /* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */
+ static int FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c) {
+ return stage3.get(((int) stage12.get((stage12.get(s1Index) + ((c >>> 4) & 0x3f))) << STAGE_2_LEFT_SHIFT)
+ + (c & 0xf));
+ }
+
+ private static final int FROM_U_LENGTH_SHIFT = 24;
+ private static final int FROM_U_ROUNDTRIP_FLAG = 1 << 31;
+ static final int FROM_U_RESERVED_MASK = 0x60000000;
+ private static final int FROM_U_DATA_MASK = 0xffffff;
+
+ /* special value for "no mapping" to (impossible roundtrip to 0 bytes, value 01) */
+ static final int FROM_U_SUBCHAR1 = 0x80000001;
+
+ /* at most 3 bytes in the lower part of the value */
+ private static final int FROM_U_MAX_DIRECT_LENGTH = 3;
+
+ /* maximum number of indexed bytes */
+ static final int MAX_BYTES = 0x1f;
+
+ static boolean FROM_U_IS_PARTIAL(int value) {
+ return (value >>> FROM_U_LENGTH_SHIFT) == 0;
+ }
+
+ static int FROM_U_GET_PARTIAL_INDEX(int value) {
+ return value;
+ }
+
+ static boolean FROM_U_IS_ROUNDTRIP(int value) {
+ return (value & FROM_U_ROUNDTRIP_FLAG) != 0;
+ }
+
+ private static int FROM_U_MASK_ROUNDTRIP(int value) {
+ return value & ~FROM_U_ROUNDTRIP_FLAG;
+ }
+
+ /* use after masking off the roundtrip flag */
+ static int FROM_U_GET_LENGTH(int value) {
+ return (value >>> FROM_U_LENGTH_SHIFT) & MAX_BYTES;
+ }
+
+ /* get bytes or bytes index */
+ static int FROM_U_GET_DATA(int value) {
+ return value & FROM_U_DATA_MASK;
+ }
+
+ /* get the pointer to an extension array from indexes[index] */
+ static Buffer ARRAY(ByteBuffer indexes, int index, Class> itemType) {
+ int oldpos = indexes.position();
+ Buffer b;
+
+ indexes.position(indexes.getInt(index << 2));
+ if (itemType == int.class)
+ b = indexes.asIntBuffer();
+ else if (itemType == char.class)
+ b = indexes.asCharBuffer();
+ else if (itemType == short.class)
+ b = indexes.asShortBuffer();
+ else
+ // default or (itemType == byte.class)
+ b = indexes.slice();
+ indexes.position(oldpos);
+ return b;
+ }
+
+ private static int GET_MAX_BYTES_PER_UCHAR(ByteBuffer indexes) {
+ indexes.position(0);
+ return indexes.getInt(EXT_COUNT_BYTES) & 0xff;
+ }
+
+ /*
+ * @return index of the UChar, if found; else <0
+ */
+ static int findFromU(CharBuffer fromUSection, int length, char u) {
+ int i, start, limit;
+
+ /* binary search */
+ start = 0;
+ limit = length;
+ for (;;) {
+ i = limit - start;
+ if (i <= 1) {
+ break; /* done */
+ }
+ /* startmode==0 is equivalent to firstLength==1.
+ */
+ private static int SISO_STATE(UConverterSharedData sharedData, int mode) {
+ return sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO ? (byte) mode
+ : sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY ? 1 : -1;
+ }
+
+ class CharsetDecoderMBCS extends CharsetDecoderICU {
+
+ CharsetDecoderMBCS(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ /* Just call cnvMBCSToUnicodeWithOffsets() to remove duplicate code. */
+ return cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);
+ }
+
+ /*
+ * continue partial match with new input never called for simple, single-character conversion
+ */
+ private CoderResult continueMatchToU(ByteBuffer source, CharBuffer target, IntBuffer offsets, int srcIndex,
+ boolean flush) {
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ int[] value = new int[1];
+ int match, length;
+
+ match = matchToU((byte) SISO_STATE(sharedData, mode), preToUArray, preToUBegin, preToULength, source,
+ value, isToUUseFallback(), flush);
+
+ if (match > 0) {
+ if (match >= preToULength) {
+ /* advance src pointer for the consumed input */
+ source.position(source.position() + match - preToULength);
+ preToULength = 0;
+ } else {
+ /* the match did not use all of preToU[] - keep the rest for replay */
+ length = preToULength - match;
+ System.arraycopy(preToUArray, preToUBegin + match, preToUArray, preToUBegin, length);
+ preToULength = (byte) -length;
+ }
+
+ /* write result */
+ cr = writeToU(value[0], target, offsets, srcIndex);
+ } else if (match < 0) {
+ /* save state for partial match */
+ int j, sArrayIndex;
+
+ /* just _append_ the newly consumed input to preToU[] */
+ sArrayIndex = source.position();
+ match = -match;
+ for (j = preToULength; j < match; ++j) {
+ preToUArray[j] = source.get(sArrayIndex++);
+ }
+ source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
+ preToULength = (byte) match;
+ } else /* match==0 */{
+ /*
+ * no match
+ *
+ * We need to split the previous input into two parts:
+ *
+ * 1. The first codepage character is unmappable - that's how we got into trying the extension data in
+ * the first place. We need to move it from the preToU buffer to the error buffer, set an error code,
+ * and prepare the rest of the previous input for 2.
+ *
+ * 2. The rest of the previous input must be converted once we come back from the callback for the first
+ * character. At that time, we have to try again from scratch to convert these input characters. The
+ * replay will be handled by the ucnv.c conversion code.
+ */
+
+ /* move the first codepage character to the error field */
+ System.arraycopy(preToUArray, preToUBegin, toUBytesArray, toUBytesBegin, preToUFirstLength);
+ toULength = preToUFirstLength;
+
+ /* move the rest up inside the buffer */
+ length = preToULength - preToUFirstLength;
+ if (length > 0) {
+ System.arraycopy(preToUArray, preToUBegin + preToUFirstLength, preToUArray, preToUBegin, length);
+ }
+
+ /* mark preToU for replay */
+ preToULength = (byte) -length;
+
+ /* set the error code for unassigned */
+ cr = CoderResult.unmappableForLength(preToUFirstLength);
+ }
+ return cr;
+ }
+
+ /*
+ * this works like matchFromU() except - the first character is in pre - no trie is used - the returned
+ * matchLength is not offset by 2
+ */
+ private int matchToU(byte sisoState, byte[] preArray, int preArrayBegin, int preLength, ByteBuffer source,
+ int[] pMatchValue, boolean isUseFallback, boolean flush) {
+ ByteBuffer cx = sharedData.mbcs.extIndexes;
+ IntBuffer toUTable, toUSection;
+
+ int value, matchValue, srcLength = 0;
+ int i, j, index, length, matchLength;
+ short b;
+
+ if (cx == null || cx.asIntBuffer().get(EXT_TO_U_LENGTH) <= 0) {
+ return 0; /* no extension data, no match */
+ }
+
+ /* initialize */
+ toUTable = (IntBuffer) ARRAY(cx, EXT_TO_U_INDEX, int.class);
+ index = 0;
+
+ matchValue = 0;
+ i = j = matchLength = 0;
+ if (source != null) {
+ srcLength = source.remaining();
+ }
+
+ if (sisoState == 0) {
+ /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
+ if (preLength > 1) {
+ return 0; /* no match of a DBCS sequence in SBCS mode */
+ } else if (preLength == 1) {
+ srcLength = 0;
+ } else /* preLength==0 */{
+ if (srcLength > 1) {
+ srcLength = 1;
+ }
+ }
+ flush = true;
+ }
+
+ /* we must not remember fallback matches when not using fallbacks */
+
+ /* match input units until there is a full match or the input is consumed */
+ for (;;) {
+ /* go to the next section */
+ int oldpos = toUTable.position();
+ toUSection = ((IntBuffer) toUTable.position(index)).slice();
+ toUTable.position(oldpos);
+
+ /* read first pair of the section */
+ value = toUSection.get();
+ length = TO_U_GET_BYTE(value);
+ value = TO_U_GET_VALUE(value);
+ if (value != 0 && (TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback))
+ && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
+ /* remember longest match so far */
+ matchValue = value;
+ matchLength = i + j;
+ }
+
+ /* match pre[] then src[] */
+ if (i < preLength) {
+ b = (short) (preArray[preArrayBegin + i++] & UConverterConstants.UNSIGNED_BYTE_MASK);
+ } else if (j < srcLength) {
+ b = (short) (source.get(source.position() + j++) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ } else {
+ /* all input consumed, partial match */
+ if (flush || (length = (i + j)) > MAX_BYTES) {
+ /*
+ * end of the entire input stream, stop with the longest match so far or: partial match must not
+ * be longer than UCNV_EXT_MAX_BYTES because it must fit into state buffers
+ */
+ break;
+ } else {
+ /* continue with more input next time */
+ return -length;
+ }
+ }
+
+ /* search for the current UChar */
+ value = findToU(toUSection, length, b);
+ if (value == 0) {
+ /* no match here, stop with the longest match so far */
+ break;
+ } else {
+ if (TO_U_IS_PARTIAL(value)) {
+ /* partial match, continue */
+ index = TO_U_GET_PARTIAL_INDEX(value);
+ } else {
+ if ((TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback)) && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
+ /* full match, stop with result */
+ matchValue = value;
+ matchLength = i + j;
+ } else {
+ /* full match on fallback not taken, stop with the longest match so far */
+ }
+ break;
+ }
+ }
+ }
+
+ if (matchLength == 0) {
+ /* no match at all */
+ return 0;
+ }
+
+ /* return result */
+ pMatchValue[0] = TO_U_MASK_ROUNDTRIP(matchValue);
+ return matchLength;
+ }
+
+ private CoderResult writeToU(int value, CharBuffer target, IntBuffer offsets, int srcIndex) {
+ ByteBuffer cx = sharedData.mbcs.extIndexes;
+ /* output the result */
+ if (TO_U_IS_CODE_POINT(value)) {
+ /* output a single code point */
+ return toUWriteCodePoint(TO_U_GET_CODE_POINT(value), target, offsets, srcIndex);
+ } else {
+ /* output a string - with correct data we have resultLength>0 */
+
+ char[] a = new char[TO_U_GET_LENGTH(value)];
+ CharBuffer cb = ((CharBuffer) ARRAY(cx, EXT_TO_U_UCHARS_INDEX, char.class));
+ cb.position(TO_U_GET_INDEX(value));
+ cb.get(a, 0, a.length);
+ return toUWriteUChars(this, a, 0, a.length, target, offsets, srcIndex);
+ }
+ }
+
+ private CoderResult toUWriteCodePoint(int c, CharBuffer target, IntBuffer offsets, int sourceIndex) {
+ CoderResult cr = CoderResult.UNDERFLOW;
+ int tBeginIndex = target.position();
+
+ if (target.hasRemaining()) {
+ if (c <= 0xffff) {
+ target.put((char) c);
+ c = UConverterConstants.U_SENTINEL;
+ } else /* c is a supplementary code point */{
+ target.put(UTF16.getLeadSurrogate(c));
+ c = UTF16.getTrailSurrogate(c);
+ if (target.hasRemaining()) {
+ target.put((char) c);
+ c = UConverterConstants.U_SENTINEL;
+ }
+ }
+
+ /* write offsets */
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ if ((tBeginIndex + 1) < target.position()) {
+ offsets.put(sourceIndex);
+ }
+ }
+ }
+
+ /* write overflow from c */
+ if (c >= 0) {
+ charErrorBufferLength = UTF16.append(charErrorBufferArray, 0, c);
+ cr = CoderResult.OVERFLOW;
+ }
+
+ return cr;
+ }
+
+ /*
+ * Input sequence: cnv->toUBytes[0..length[ @return if(U_FAILURE) return the length (toULength, byteIndex) for
+ * the input else return 0 after output has been written to the target
+ */
+ private int toU(int length, ByteBuffer source, CharBuffer target, IntBuffer offsets, int sourceIndex,
+ boolean flush, CoderResult[] cr) {
+ // ByteBuffer cx;
+
+ if (sharedData.mbcs.extIndexes != null
+ && initialMatchToU(length, source, target, offsets, sourceIndex, flush, cr)) {
+ return 0; /* an extension mapping handled the input */
+ }
+
+ /* GB 18030 */
+ if (length == 4 && (options & MBCS_OPTION_GB18030) != 0) {
+ long[] range;
+ long linear;
+ int i;
+
+ linear = LINEAR_18030(toUBytesArray[0], toUBytesArray[1], toUBytesArray[2], toUBytesArray[3]);
+ for (i = 0; i < gb18030Ranges.length; ++i) {
+ range = gb18030Ranges[i];
+ if (range[2] <= linear && linear <= range[3]) {
+ /* found the sequence, output the Unicode code point for it */
+ cr[0] = CoderResult.UNDERFLOW;
+
+ /* add the linear difference between the input and start sequences to the start code point */
+ linear = range[0] + (linear - range[2]);
+
+ /* output this code point */
+ cr[0] = toUWriteCodePoint((int) linear, target, offsets, sourceIndex);
+
+ return 0;
+ }
+ }
+ }
+
+ /* no mapping */
+ cr[0] = CoderResult.unmappableForLength(length);
+ return length;
+ }
+
+ /*
+ * target 0) {
+ /* advance src pointer for the consumed input */
+ source.position(source.position() + match - firstLength);
+
+ /* write result to target */
+ cr[0] = writeToU(value[0], target, offsets, srcIndex);
+ return true;
+ } else if (match < 0) {
+ /* save state for partial match */
+ byte[] sArray;
+ int sArrayIndex;
+ int j;
+
+ /* copy the first code point */
+ sArray = toUBytesArray;
+ sArrayIndex = toUBytesBegin;
+ preToUFirstLength = (byte) firstLength;
+ for (j = 0; j < firstLength; ++j) {
+ preToUArray[j] = sArray[sArrayIndex++];
+ }
+
+ /* now copy the newly consumed input */
+ sArrayIndex = source.position();
+ match = -match;
+ for (; j < match; ++j) {
+ preToUArray[j] = source.get(sArrayIndex++);
+ }
+ source.position(sArrayIndex);
+ preToULength = (byte) match;
+ return true;
+ } else /* match==0 no match */{
+ return false;
+ }
+ }
+
+ private int simpleMatchToU(ByteBuffer source, boolean useFallback) {
+ int[] value = new int[1];
+ int match;
+
+ if (source.remaining() <= 0) {
+ return 0xffff;
+ }
+
+ /* try to match */
+ byte[] sourceArray;
+ int sourcePosition, sourceLimit;
+ if (source.isReadOnly()) {
+ // source.array() would throw an exception
+ sourcePosition = source.position(); // relative to source.array()
+ sourceArray = new byte[Math.min(source.remaining(), EXT_MAX_BYTES)];
+ source.get(sourceArray).position(sourcePosition);
+ sourcePosition = 0; // relative to sourceArray
+ sourceLimit = sourceArray.length;
+ } else {
+ sourceArray = source.array();
+ sourcePosition = source.position();
+ sourceLimit = source.limit();
+ }
+ match = matchToU((byte) -1, sourceArray, sourcePosition, sourceLimit, null, value, useFallback, true);
+
+ if (match == source.remaining()) {
+ /* write result for simple, single-character conversion */
+ if (TO_U_IS_CODE_POINT(value[0])) {
+ return TO_U_GET_CODE_POINT(value[0]);
+ }
+ }
+
+ /*
+ * return no match because - match>0 && value points to string: simple conversion cannot handle multiple
+ * code points - match>0 && match!=length: not all input consumed, forbidden for this function - match==0:
+ * no match found in the first place - match<0: partial match, not supported for simple conversion (and
+ * flush==TRUE)
+ */
+ return 0xfffe;
+ }
+
+ CoderResult cnvMBCSToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult[] cr = { CoderResult.UNDERFLOW };
+
+ int sourceArrayIndex, sourceArrayIndexStart;
+ int stateTable[][/* 256 */];
+ char[] unicodeCodeUnits;
+
+ int offset;
+ byte state;
+ int byteIndex;
+ byte[] bytes;
+
+ int sourceIndex, nextSourceIndex;
+
+ int entry = 0;
+ char c;
+ byte action;
+
+ if (preToULength > 0) {
+ /*
+ * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change with
+ * continuous offsets
+ */
+ cr[0] = continueMatchToU(source, target, offsets, -1, flush);
+
+ if (cr[0].isError() || preToULength < 0) {
+ return cr[0];
+ }
+ }
+
+ if (sharedData.mbcs.countStates == 1) {
+ if ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);
+ } else {
+ cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);
+ }
+ return cr[0];
+ }
+
+ /* set up the local pointers */
+ sourceArrayIndex = sourceArrayIndexStart = source.position();
+
+ if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
+ stateTable = sharedData.mbcs.swapLFNLStateTable;
+ } else {
+ stateTable = sharedData.mbcs.stateTable;
+ }
+ unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
+
+ /* get the converter state from UConverter */
+ offset = toUnicodeStatus;
+ byteIndex = toULength;
+ bytes = toUBytesArray;
+
+ /*
+ * if we are in the SBCS state for a DBCS-only converter, then load the DBCS state from the MBCS data
+ * (dbcsOnlyState==0 if it is not a DBCS-only converter)
+ */
+ state = (byte)mode;
+ if (state == 0) {
+ state = sharedData.mbcs.dbcsOnlyState;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex = byteIndex == 0 ? 0 : -1;
+ nextSourceIndex = 0;
+
+ /* conversion loop */
+ while (sourceArrayIndex < source.limit()) {
+ /*
+ * This following test is to see if available input would overflow the output. It does not catch output
+ * of more than one code unit that overflows as a result of a surrogate pair or callback output from the
+ * last source byte. Therefore, those situations also test for overflows and will then break the loop,
+ * too.
+ */
+ if (!target.hasRemaining()) {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+
+ if (byteIndex == 0) {
+ /* optimized loop for 1/2-byte input and BMP output */
+ // agljport:todo see ucnvmbcs.c for deleted block
+ do {
+ entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK];
+ if (MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
+ ++sourceArrayIndex;
+ if (sourceArrayIndex < source.limit()
+ && MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK])
+ && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16
+ && (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
+ ++sourceArrayIndex;
+ target.put(c);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ sourceIndex = (nextSourceIndex += 2);
+ }
+ state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+ offset = 0;
+ } else {
+ /* set the state and leave the optimized loop */
+ ++nextSourceIndex;
+ bytes[0] = source.get(sourceArrayIndex - 1);
+ byteIndex = 1;
+ break;
+ }
+ } else {
+ if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ ++sourceArrayIndex;
+ target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ sourceIndex = ++nextSourceIndex;
+ }
+ state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+ } else {
+ /* leave the optimized loop */
+ break;
+ }
+ }
+ } while (sourceArrayIndex < source.limit() && target.hasRemaining());
+ /*
+ * these tests and break statements could be put inside the loop if C had "break outerLoop" like
+ * Java
+ */
+ if (sourceArrayIndex >= source.limit()) {
+ break;
+ }
+ if (!target.hasRemaining()) {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+
+ ++nextSourceIndex;
+ bytes[byteIndex++] = source.get(sourceArrayIndex++);
+ } else /* byteIndex>0 */{
+ ++nextSourceIndex;
+ entry = stateTable[state][(bytes[byteIndex++] = source.get(sourceArrayIndex++))
+ & UConverterConstants.UNSIGNED_BYTE_MASK];
+ }
+
+ if (MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
+ continue;
+ }
+
+ /* save the previous state for proper extension mapping with SI/SO-stateful converters */
+ mode = state;
+
+ /* set the next state early so that we can reuse the entry variable */
+ state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+
+ /*
+ * An if-else-if chain provides more reliable performance for the most common cases compared to a
+ * switch.
+ */
+ action = (byte)MBCS_ENTRY_FINAL_ACTION(entry);
+ if (action == MBCS_STATE_VALID_16) {
+ offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c = unicodeCodeUnits[offset];
+ if (c < 0xfffe) {
+ /* output BMP code point */
+ target.put(c);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ } else if (c == 0xfffe) {
+ if (isFallbackUsed() && (entry = getFallback(sharedData.mbcs, offset)) != 0xfffe) {
+ /* output fallback BMP code point */
+ target.put((char)entry);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ }
+ } else {
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(byteIndex);
+ }
+ } else if (action == MBCS_STATE_VALID_DIRECT_16) {
+ /* output BMP code point */
+ target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ } else if (action == MBCS_STATE_VALID_16_PAIR) {
+ offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c = unicodeCodeUnits[offset++];
+ if (c < 0xd800) {
+ /* output BMP code point below 0xd800 */
+ target.put(c);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ } else if (isFallbackUsed() ? c <= 0xdfff : c <= 0xdbff) {
+ /* output roundtrip or fallback surrogate pair */
+ target.put((char)(c & 0xdbff));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ if (target.hasRemaining()) {
+ target.put(unicodeCodeUnits[offset]);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ } else {
+ /* target overflow */
+ charErrorBufferArray[0] = unicodeCodeUnits[offset];
+ charErrorBufferLength = 1;
+ cr[0] = CoderResult.OVERFLOW;
+
+ offset = 0;
+ break;
+ }
+ } else if (isFallbackUsed() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
+ /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
+ target.put(unicodeCodeUnits[offset]);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ } else if (c == 0xffff) {
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(byteIndex);
+ }
+ } else if (action == MBCS_STATE_VALID_DIRECT_20
+ || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
+ entry = MBCS_ENTRY_FINAL_VALUE(entry);
+ /* output surrogate pair */
+ target.put((char)(0xd800 | (char)(entry >> 10)));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ c = (char)(0xdc00 | (char)(entry & 0x3ff));
+ if (target.hasRemaining()) {
+ target.put(c);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ } else {
+ /* target overflow */
+ charErrorBufferArray[0] = c;
+ charErrorBufferLength = 1;
+ cr[0] = CoderResult.OVERFLOW;
+
+ offset = 0;
+ break;
+ }
+ } else if (action == MBCS_STATE_CHANGE_ONLY) {
+ /*
+ * This serves as a state change without any output. It is useful for reading simple stateful
+ * encodings, for example using just Shift-In/Shift-Out codes. The 21 unused bits may later be used
+ * for more sophisticated state transitions.
+ */
+ if (sharedData.mbcs.dbcsOnlyState == 0) {
+ byteIndex = 0;
+ } else {
+ /* SI/SO are illegal for DBCS-only conversion */
+ state = (byte)(mode); /* restore the previous state */
+
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(byteIndex);
+ }
+ } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
+ if (isFallbackUsed()) {
+ /* output BMP code point */
+ target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ }
+ } else if (action == MBCS_STATE_UNASSIGNED) {
+ /* just fall through */
+ } else if (action == MBCS_STATE_ILLEGAL) {
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(byteIndex);
+ } else {
+ /* reserved, must never occur */
+ byteIndex = 0;
+ }
+
+ /* end of action codes: prepare for a new character */
+ offset = 0;
+
+ if (byteIndex == 0) {
+ sourceIndex = nextSourceIndex;
+ } else if (cr[0].isError()) {
+ /* callback(illegal) */
+ if (byteIndex > 1) {
+ /*
+ * Ticket 5691: consistent illegal sequences:
+ * - We include at least the first byte in the illegal sequence.
+ * - If any of the non-initial bytes could be the start of a character,
+ * we stop the illegal sequence before the first one of those.
+ */
+ boolean isDBCSOnly = (sharedData.mbcs.dbcsOnlyState != 0);
+ byte i;
+ for (i = 1; i < byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, (short)(bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK)); i++) {}
+ if (i < byteIndex) {
+ byte backOutDistance = (byte)(byteIndex - i);
+ int bytesFromThisBuffer = sourceArrayIndex - sourceArrayIndexStart;
+ byteIndex = i; /* length of reported illegal byte sequence */
+ if (backOutDistance <= bytesFromThisBuffer) {
+ sourceArrayIndex -= backOutDistance;
+ } else {
+ /* Back out bytes from the previous buffer: Need to replay them. */
+ this.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
+ /* preToULength is negative! */
+ for (int n = 0; n < -this.preToULength; n++) {
+ this.preToUArray[n] = bytes[i+n];
+ }
+ sourceArrayIndex = sourceArrayIndexStart;
+ }
+ }
+ }
+ break;
+ } else /* unassigned sequences indicated with byteIndex>0 */{
+ /* try an extension mapping */
+ int sourceBeginIndex = sourceArrayIndex;
+ source.position(sourceArrayIndex);
+ byteIndex = toU(byteIndex, source, target, offsets, sourceIndex, flush, cr);
+ sourceArrayIndex = source.position();
+ sourceIndex = nextSourceIndex += (sourceArrayIndex - sourceBeginIndex);
+
+ if (cr[0].isError() || cr[0].isOverflow()) {
+ /* not mappable or buffer overflow */
+ break;
+ }
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ toUnicodeStatus = offset;
+ mode = state;
+ toULength = byteIndex;
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+ /*
+ * This version of cnvMBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages that
+ * only map to and from the BMP. In addition to single-byte optimizations, the offset calculations become much
+ * easier.
+ */
+ private CoderResult cnvMBCSSingleToBMPWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ boolean flush) {
+ CoderResult[] cr = { CoderResult.UNDERFLOW };
+
+ int sourceArrayIndex, lastSource;
+ int targetCapacity, length;
+ int[][] stateTable;
+
+ int sourceIndex;
+
+ int entry;
+ byte action;
+
+ /* set up the local pointers */
+ sourceArrayIndex = source.position();
+ targetCapacity = target.remaining();
+
+ if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
+ stateTable = sharedData.mbcs.swapLFNLStateTable;
+ } else {
+ stateTable = sharedData.mbcs.stateTable;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex = 0;
+ lastSource = sourceArrayIndex;
+
+ /*
+ * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the
+ * sourceLength and targetCapacity
+ */
+ length = source.remaining();
+ if (length < targetCapacity) {
+ targetCapacity = length;
+ }
+
+ /* conversion loop */
+ while (targetCapacity > 0 && sourceArrayIndex < source.limit()) {
+ entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];
+ /* MBCS_ENTRY_IS_FINAL(entry) */
+
+ /* test the most common case first */
+ if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
+ --targetCapacity;
+ continue;
+ }
+
+ /*
+ * An if-else-if chain provides more reliable performance for the most common cases compared to a
+ * switch.
+ */
+ action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));
+ if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
+ if (isFallbackUsed()) {
+ /* output BMP code point */
+ target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
+ --targetCapacity;
+ continue;
+ }
+ } else if (action == MBCS_STATE_UNASSIGNED) {
+ /* just fall through */
+ } else if (action == MBCS_STATE_ILLEGAL) {
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(sourceArrayIndex - lastSource);
+ } else {
+ /* reserved, must never occur */
+ continue;
+ }
+
+ /* set offsets since the start or the last extension */
+ if (offsets != null) {
+ int count = sourceArrayIndex - lastSource;
+
+ /* predecrement: do not set the offset for the callback-causing character */
+ while (--count > 0) {
+ offsets.put(sourceIndex++);
+ }
+ /* offset and sourceIndex are now set for the current character */
+ }
+
+ if (cr[0].isError()) {
+ /* callback(illegal) */
+ break;
+ } else /* unassigned sequences indicated with byteIndex>0 */{
+ /* try an extension mapping */
+ lastSource = sourceArrayIndex;
+ toUBytesArray[0] = source.get(sourceArrayIndex - 1);
+ source.position(sourceArrayIndex);
+ toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr);
+ sourceArrayIndex = source.position();
+ sourceIndex += 1 + (sourceArrayIndex - lastSource);
+
+ if (cr[0].isError()) {
+ /* not mappable or buffer overflow */
+ break;
+ }
+
+ /* recalculate the targetCapacity after an extension mapping */
+ targetCapacity = target.remaining();
+ length = source.remaining();
+ if (length < targetCapacity) {
+ targetCapacity = length;
+ }
+ }
+ }
+
+ if (!cr[0].isError() && sourceArrayIndex < source.limit() && !target.hasRemaining()) {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ }
+
+ /* set offsets since the start or the last callback */
+ if (offsets != null) {
+ int count = sourceArrayIndex - lastSource;
+ while (count > 0) {
+ offsets.put(sourceIndex++);
+ --count;
+ }
+ }
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+
+ /* This version of cnvMBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
+ private CoderResult cnvMBCSSingleToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ boolean flush) {
+ CoderResult[] cr = { CoderResult.UNDERFLOW };
+
+ int sourceArrayIndex;
+ int[][] stateTable;
+
+ int sourceIndex;
+
+ int entry;
+ char c;
+ byte action;
+
+ /* set up the local pointers */
+ sourceArrayIndex = source.position();
+
+ if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
+ stateTable = sharedData.mbcs.swapLFNLStateTable;
+ } else {
+ stateTable = sharedData.mbcs.stateTable;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex = 0;
+
+ /* conversion loop */
+ while (sourceArrayIndex < source.limit()) {
+ /*
+ * This following test is to see if available input would overflow the output. It does not catch output
+ * of more than one code unit that overflows as a result of a surrogate pair or callback output from the
+ * last source byte. Therefore, those situations also test for overflows and will then break the loop,
+ * too.
+ */
+ if (!target.hasRemaining()) {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+
+ entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];
+ /* MBCS_ENTRY_IS_FINAL(entry) */
+
+ /* test the most common case first */
+ if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+
+ /* normal end of action codes: prepare for a new character */
+ ++sourceIndex;
+ continue;
+ }
+
+ /*
+ * An if-else-if chain provides more reliable performance for the most common cases compared to a
+ * switch.
+ */
+ action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));
+ if (action == MBCS_STATE_VALID_DIRECT_20
+ || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
+
+ entry = MBCS_ENTRY_FINAL_VALUE(entry);
+ /* output surrogate pair */
+ target.put((char) (0xd800 | (char) (entry >>> 10)));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ c = (char) (0xdc00 | (char) (entry & 0x3ff));
+ if (target.hasRemaining()) {
+ target.put(c);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ } else {
+ /* target overflow */
+ charErrorBufferArray[0] = c;
+ charErrorBufferLength = 1;
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+
+ ++sourceIndex;
+ continue;
+ } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
+ if (isFallbackUsed()) {
+ /* output BMP code point */
+ target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+
+ ++sourceIndex;
+ continue;
+ }
+ } else if (action == MBCS_STATE_UNASSIGNED) {
+ /* just fall through */
+ } else if (action == MBCS_STATE_ILLEGAL) {
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ } else {
+ /* reserved, must never occur */
+ ++sourceIndex;
+ continue;
+ }
+
+ if (cr[0].isError()) {
+ /* callback(illegal) */
+ break;
+ } else /* unassigned sequences indicated with byteIndex>0 */{
+ /* try an extension mapping */
+ int sourceBeginIndex = sourceArrayIndex;
+ toUBytesArray[0] = source.get(sourceArrayIndex - 1);
+ source.position(sourceArrayIndex);
+ toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr);
+ sourceArrayIndex = source.position();
+ sourceIndex += 1 + (sourceArrayIndex - sourceBeginIndex);
+
+ if (cr[0].isError()) {
+ /* not mappable or buffer overflow */
+ break;
+ }
+ }
+ }
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+
+ private int getFallback(UConverterMBCSTable mbcsTable, int offset) {
+ MBCSToUFallback[] toUFallbacks;
+ int i, start, limit;
+
+ limit = mbcsTable.countToUFallbacks;
+ if (limit > 0) {
+ /* do a binary search for the fallback mapping */
+ toUFallbacks = mbcsTable.toUFallbacks;
+ start = 0;
+ while (start < limit - 1) {
+ i = (start + limit) / 2;
+ if (offset < toUFallbacks[i].offset) {
+ limit = i;
+ } else {
+ start = i;
+ }
+ }
+
+ /* did we really find it? */
+ if (offset == toUFallbacks[start].offset) {
+ return toUFallbacks[start].codePoint;
+ }
+ }
+
+ return 0xfffe;
+ }
+
+ /**
+ * This is a simple version of _MBCSGetNextUChar() that is used by other converter implementations. It only
+ * returns an "assigned" result if it consumes the entire input. It does not use state from the converter, nor
+ * error codes. It does not handle the EBCDIC swaplfnl option (set in UConverter). It handles conversion
+ * extensions but not GB 18030.
+ *
+ * @return U+fffe unassigned U+ffff illegal otherwise the Unicode code point
+ */
+ int simpleGetNextUChar(ByteBuffer source, boolean useFallback) {
+
+ // #if 0
+ // /*
+ // * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
+ // * TODO In future releases, verify that this function is never called for SBCS
+ // * conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
+ // * Removal improves code coverage.
+ // */
+ // /* use optimized function if possible */
+ // if(sharedData->mbcs.countStates==1) {
+ // if(length==1) {
+ // return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
+ // } else {
+ // return 0xffff; /* illegal: more than a single byte for an SBCS converter */
+ // }
+ // }
+ // #endif
+
+ /* set up the local pointers */
+ int[][] stateTable = sharedData.mbcs.stateTable;
+ char[] unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
+
+ /* converter state */
+ int offset = 0;
+ int state = sharedData.mbcs.dbcsOnlyState;
+
+ int action;
+ int entry;
+ int c;
+ int i = source.position();
+ int length = source.limit() - i;
+
+ /* conversion loop */
+ while (true) {
+ // entry=stateTable[state][(uint8_t)source[i++]];
+ entry = stateTable[state][source.get(i++) & UConverterConstants.UNSIGNED_BYTE_MASK];
+
+ if (MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state = MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
+
+ if (i == source.limit()) {
+ return 0xffff; /* truncated character */
+ }
+ } else {
+ /*
+ * An if-else-if chain provides more reliable performance for the most common cases compared to a
+ * switch.
+ */
+ action = MBCS_ENTRY_FINAL_ACTION(entry);
+ if (action == MBCS_STATE_VALID_16) {
+ offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c = unicodeCodeUnits[offset];
+ if (c != 0xfffe) {
+ /* done */
+ } else if (isToUUseFallback()) {
+ c = getFallback(sharedData.mbcs, offset);
+ }
+ /* else done with 0xfffe */
+ } else if (action == MBCS_STATE_VALID_DIRECT_16) {
+ // /* output BMP code point */
+ c = MBCS_ENTRY_FINAL_VALUE_16(entry);
+ } else if (action == MBCS_STATE_VALID_16_PAIR) {
+ offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c = unicodeCodeUnits[offset++];
+ if (c < 0xd800) {
+ /* output BMP code point below 0xd800 */
+ } else if (isToUUseFallback() ? c <= 0xdfff : c <= 0xdbff) {
+ /* output roundtrip or fallback supplementary code point */
+ c = (((c & 0x3ff) << 10) + unicodeCodeUnits[offset] + (0x10000 - 0xdc00));
+ } else if (isToUUseFallback() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
+ /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
+ c = unicodeCodeUnits[offset];
+ } else if (c == 0xffff) {
+ return 0xffff;
+ } else {
+ c = 0xfffe;
+ }
+ } else if (action == MBCS_STATE_VALID_DIRECT_20) {
+ /* output supplementary code point */
+ c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry);
+ } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
+ if (!isToUUseFallback(useFallback)) {
+ c = 0xfffe;
+ } else {
+ /* output BMP code point */
+ c = MBCS_ENTRY_FINAL_VALUE_16(entry);
+ }
+ } else if (action == MBCS_STATE_FALLBACK_DIRECT_20) {
+ if (!isToUUseFallback(useFallback)) {
+ c = 0xfffe;
+ } else {
+ /* output supplementary code point */
+ c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry);
+ }
+ } else if (action == MBCS_STATE_UNASSIGNED) {
+ c = 0xfffe;
+ } else {
+ /*
+ * forbid MBCS_STATE_CHANGE_ONLY for this function, and MBCS_STATE_ILLEGAL and reserved action
+ * codes
+ */
+ return 0xffff;
+ }
+ break;
+ }
+ }
+
+ if (i != source.limit()) {
+ /* illegal for this function: not all input consumed */
+ return 0xffff;
+ }
+
+ if (c == 0xfffe) {
+ /* try an extension mapping */
+ if (sharedData.mbcs.extIndexes != null) {
+ /* Increase the limit for proper handling. Used in LMBCS. */
+ if (source.limit() > i + length) {
+ source.limit(i + length);
+ }
+ return simpleMatchToU(source, useFallback);
+ }
+ }
+
+ return c;
+ }
+ private boolean hasValidTrailBytes(int[][] stateTable, short state) {
+ int[] row = stateTable[state];
+ int b, entry;
+ /* First test for final entries in this state for some commonly valid byte values. */
+ entry = row[0xa1];
+ if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
+ return true;
+ }
+ entry = row[0x41];
+ if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
+ return true;
+ }
+ /* Then test for final entries in this state. */
+ for (b = 0; b <= 0xff; b++) {
+ entry = row[b];
+ if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
+ return true;
+ }
+ }
+ /* Then recurse for transition entries. */
+ for (b = 0; b <= 0xff; b++) {
+ entry = row[b];
+ if (MBCS_ENTRY_IS_TRANSITION(entry) &&
+ hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private boolean isSingleOrLead(int[][] stateTable, int state, boolean isDBCSOnly, int b) {
+ int[] row = stateTable[state];
+ int entry = row[b];
+ if (MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
+ return hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK));
+ } else {
+ short action = (short)(MBCS_ENTRY_FINAL_ACTION(entry) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ if (action == MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
+ return false; /* SI/SO are illegal for DBCS-only conversion */
+ } else {
+ return (action != MBCS_STATE_ILLEGAL);
+ }
+ }
+ }
+
+
+ }
+
+ class CharsetEncoderMBCS extends CharsetEncoderICU {
+ private boolean allowReplacementChanges = false;
+
+ CharsetEncoderMBCS(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ allowReplacementChanges = true; // allow changes in implReplaceWith
+ implReset();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ preFromUFirstCP = UConverterConstants.U_SENTINEL;
+ }
+
+ @SuppressWarnings("fallthrough")
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult[] cr = { CoderResult.UNDERFLOW };
+ // if (!source.hasRemaining() && fromUChar32 == 0)
+ // return cr[0];
+
+ int sourceArrayIndex;
+ char[] table;
+ byte[] pArray, bytes;
+ int pArrayIndex, outputType, c;
+ int prevSourceIndex, sourceIndex, nextSourceIndex;
+ int stage2Entry = 0, value = 0, length = 0, prevLength;
+ short uniMask;
+ // long asciiRoundtrips;
+
+ byte[] si_value = new byte[2];
+ byte[] so_value = new byte[2];
+ int si_value_length = 0, so_value_length = 0;
+
+ boolean gotoUnassigned = false;
+
+ try {
+
+ if (!flush && preFromUFirstCP >= 0) {
+ /*
+ * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change
+ * with continuous offsets
+ */
+ cr[0] = continueMatchFromU(source, target, offsets, flush, -1);
+
+ if (cr[0].isError() || preFromULength < 0) {
+ return cr[0];
+ }
+ }
+
+ /* use optimized function if possible */
+ outputType = sharedData.mbcs.outputType;
+ uniMask = sharedData.mbcs.unicodeMask;
+ if (outputType == MBCS_OUTPUT_1 && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
+ if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ cr[0] = cnvMBCSSingleFromBMPWithOffsets(source, target, offsets, flush);
+ } else {
+ cr[0] = cnvMBCSSingleFromUnicodeWithOffsets(source, target, offsets, flush);
+ }
+ return cr[0];
+ } else if (outputType == MBCS_OUTPUT_2) {
+ cr[0] = cnvMBCSDoubleFromUnicodeWithOffsets(source, target, offsets, flush);
+ return cr[0];
+ }
+
+ table = sharedData.mbcs.fromUnicodeTable;
+ sourceArrayIndex = source.position();
+
+ if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
+ bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes;
+ } else {
+ bytes = sharedData.mbcs.fromUnicodeBytes;
+ }
+
+ // asciiRoundtrips = sharedData.mbcs.asciiRoundtrips;
+
+ /* get the converter state from UConverter */
+ c = fromUChar32;
+
+ if (outputType == MBCS_OUTPUT_2_SISO) {
+ prevLength = fromUnicodeStatus;
+ if (prevLength == 0) {
+ /* set the real value */
+ prevLength = 1;
+ }
+ } else {
+ /* prevent fromUnicodeStatus from being set to something non-0 */
+ prevLength = 0;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ prevSourceIndex = -1;
+ sourceIndex = c == 0 ? 0 : -1;
+ nextSourceIndex = 0;
+
+ /* Get the SI/SO character for the converter */
+ si_value_length = getSISOBytes(SISO_Option.SI, options, si_value);
+ so_value_length = getSISOBytes(SISO_Option.SO, options, so_value);
+
+ /* conversion loop */
+ /*
+ * This is another piece of ugly code: A goto into the loop if the converter state contains a first
+ * surrogate from the previous function call. It saves me to check in each loop iteration a check of
+ * if(c==0) and duplicating the trail-surrogate-handling code in the else branch of that check. I could
+ * not find any other way to get around this other than using a function call for the conversion and
+ * callback, which would be even more inefficient.
+ *
+ * Markus Scherer 2000-jul-19
+ */
+ boolean doloop = true;
+ boolean doread = true;
+ if (c != 0 && target.hasRemaining()) {
+ if (UTF16.isLeadSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
+ // c is a lead surrogate, read another input
+ SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex,
+ prevSourceIndex, prevLength);
+ doloop = getTrail(source, target, uniMask, x, flush, cr);
+ doread = x.doread;
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ prevSourceIndex = x.prevSourceIndex;
+ prevLength = x.prevLength;
+ } else {
+ // c is not a lead surrogate, do not read another input
+ doread = false;
+ }
+ }
+
+ if (doloop) {
+ while (!doread || sourceArrayIndex < source.limit()) {
+ /*
+ * This following test is to see if available input would overflow the output. It does not catch
+ * output of more than one byte that overflows as a result of a multi-byte character or callback
+ * output from the last source character. Therefore, those situations also test for overflows
+ * and will then break the loop, too.
+ */
+ if (target.hasRemaining()) {
+ /*
+ * Get a correct Unicode code point: a single UChar for a BMP code point or a matched
+ * surrogate pair for a "supplementary code point".
+ */
+
+ if (doread) {
+ // doread might be false only on the first looping
+
+ c = source.get(sourceArrayIndex++);
+ ++nextSourceIndex;
+
+ /*
+ * This also tests if the codepage maps single surrogates. If it does, then surrogates
+ * are not paired but mapped separately. Note that in this case unmatched surrogates are
+ * not detected.
+ */
+ if (UTF16.isSurrogate((char) c)
+ && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
+ if (UTF16.isLeadSurrogate((char) c)) {
+ // getTrail:
+ SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex,
+ nextSourceIndex, prevSourceIndex, prevLength);
+ doloop = getTrail(source, target, uniMask, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ prevSourceIndex = x.prevSourceIndex;
+
+ if (x.doread) {
+ if (doloop)
+ continue;
+ else
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ break;
+ }
+ }
+ } else {
+ doread = true;
+ }
+ /* convert the Unicode code point in c into codepage bytes */
+
+ /*
+ * The basic lookup is a triple-stage compact array (trie) lookup. For details see the
+ * beginning of this file.
+ *
+ * Single-byte codepages are handled with a different data structure by _MBCSSingle...
+ * functions.
+ *
+ * The result consists of a 32-bit value from stage 2 and a pointer to as many bytes as are
+ * stored per character. The pointer points to the character's bytes in stage 3. Bits 15..0
+ * of the stage 2 entry contain the stage 3 index for that pointer, while bits 31..16 are
+ * flags for which of the 16 characters in the block are roundtrip-assigned.
+ *
+ * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t respectively as
+ * uint32_t, in the platform encoding. For 3-byte codepages, the bytes are always stored in
+ * big-endian order.
+ *
+ * For EUC encodings that use only either 0x8e or 0x8f as the first byte of their longest
+ * byte sequences, the first two bytes in this third stage indicate with their 7th bits
+ * whether these bytes are to be written directly or actually need to be preceeded by one of
+ * the two Single-Shift codes. With this, the third stage stores one byte fewer per
+ * character than the actual maximum length of EUC byte sequences.
+ *
+ * Other than that, leading zero bytes are removed and the other bytes output. A single zero
+ * byte may be output if the "assigned" bit in stage 2 was on. The data structure does not
+ * support zero byte output as a fallback, and also does not allow output of leading zeros.
+ */
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
+
+ /* get the bytes and the length for the output */
+ switch (outputType) {
+ /* This is handled above with the method cnvMBCSDoubleFromUnicodeWithOffsets() */
+ /* case MBCS_OUTPUT_2:
+ value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ length = 1;
+ } else {
+ length = 2;
+ }
+ break; */
+ case MBCS_OUTPUT_2_SISO:
+ /* 1/2-byte stateful with Shift-In/Shift-Out */
+ /*
+ * Save the old state in the converter object right here, then change the local
+ * prevLength state variable if necessary. Then, if this character turns out to be
+ * unassigned or a fallback that is not taken, the callback code must not save the new
+ * state in the converter because the new state is for a character that is not output.
+ * However, the callback must still restore the state from the converter in case the
+ * callback function changed it for its output.
+ */
+ fromUnicodeStatus = prevLength; /* save the old state */
+ value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ if (value == 0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) == false) {
+ /* no mapping, leave value==0 */
+ length = 0;
+ } else if (prevLength <= 1) {
+ length = 1;
+ } else {
+ /* change from double-byte mode to single-byte */
+ if (si_value_length == 1) {
+ value|=si_value[0]<<8;
+ length = 2;
+ } else if (si_value_length == 2) {
+ value|=si_value[1]<<8;
+ value|=si_value[0]<<16;
+ length = 3;
+ }
+ prevLength = 1;
+ }
+ } else {
+ if (prevLength == 2) {
+ length = 2;
+ } else {
+ /* change from single-byte mode to double-byte */
+ if (so_value_length == 1) {
+ value|=so_value[0]<<16;
+ length = 3;
+ } else if (so_value_length == 2) {
+ value|=so_value[1]<<16;
+ value|=so_value[0]<<24;
+ length = 4;
+ }
+ prevLength = 2;
+ }
+ }
+ break;
+ case MBCS_OUTPUT_DBCS_ONLY:
+ /* table with single-byte results, but only DBCS mappings used */
+ value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ /* no mapping or SBCS result, not taken for DBCS-only */
+ value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
+ length = 0;
+ } else {
+ length = 2;
+ }
+ break;
+ case MBCS_OUTPUT_3:
+ pArray = bytes;
+ pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
+ value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
+ | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
+ | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
+ if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ length = 1;
+ } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+ length = 2;
+ } else {
+ length = 3;
+ }
+ break;
+ case MBCS_OUTPUT_4:
+ value = MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
+ if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ length = 1;
+ } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+ length = 2;
+ } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffffff) {
+ length = 3;
+ } else {
+ length = 4;
+ }
+ break;
+ case MBCS_OUTPUT_3_EUC:
+ value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ /* EUC 16-bit fixed-length representation */
+ if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ length = 1;
+ } else if ((value & 0x8000) == 0) {
+ value |= 0x8e8000;
+ length = 3;
+ } else if ((value & 0x80) == 0) {
+ value |= 0x8f0080;
+ length = 3;
+ } else {
+ length = 2;
+ }
+ break;
+ case MBCS_OUTPUT_4_EUC:
+ pArray = bytes;
+ pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
+ value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
+ | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
+ | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
+ /* EUC 16-bit fixed-length representation applied to the first two bytes */
+ if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ length = 1;
+ } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+ length = 2;
+ } else if ((value & 0x800000) == 0) {
+ value |= 0x8e800000;
+ length = 4;
+ } else if ((value & 0x8000) == 0) {
+ value |= 0x8f008000;
+ length = 4;
+ } else {
+ length = 3;
+ }
+ break;
+ default:
+ /* must not occur */
+ /*
+ * To avoid compiler warnings that value & length may be used without having been
+ * initialized, we set them here. In reality, this is unreachable code. Not having a
+ * default branch also causes warnings with some compilers.
+ */
+ value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
+ length = 0;
+ break;
+ }
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if (gotoUnassigned || (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0)))) {
+ gotoUnassigned = false;
+ /*
+ * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way
+ * with this data structure for fallback output to be a zero byte.
+ */
+
+ // unassigned:
+ SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex,
+ prevSourceIndex, prevLength);
+ doloop = unassigned(source, target, offsets, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ prevSourceIndex = x.prevSourceIndex;
+ prevLength = x.prevLength;
+ if (doloop)
+ continue;
+ else
+ break;
+ }
+
+ /* write the output character bytes from value and length */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if (length <= target.remaining()) {
+ switch (length) {
+ /* each branch falls through to the next one */
+ case 4:
+ target.put((byte) (value >>> 24));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ case 3:
+ target.put((byte) (value >>> 16));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ case 2:
+ target.put((byte) (value >>> 8));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ case 1:
+ target.put((byte) value);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ default:
+ /* will never occur */
+ break;
+ }
+ } else {
+ int errorBufferArrayIndex;
+
+ /*
+ * We actually do this backwards here: In order to save an intermediate variable, we
+ * output first to the overflow buffer what does not fit into the regular target.
+ */
+ /* we know that 1<=targetCapacity>> 16);
+ case 2:
+ errorBuffer[errorBufferArrayIndex++] = (byte) (value >>> 8);
+ case 1:
+ errorBuffer[errorBufferArrayIndex] = (byte) value;
+ default:
+ /* will never occur */
+ break;
+ }
+ errorBufferLength = (byte) length;
+
+ /* now output what fits into the regular target */
+ value >>>= 8 * length; /* length was reduced by targetCapacity */
+ switch (target.remaining()) {
+ /* each branch falls through to the next one */
+ case 3:
+ target.put((byte) (value >>> 16));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ case 2:
+ target.put((byte) (value >>> 8));
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ case 1:
+ target.put((byte) value);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ default:
+ /* will never occur */
+ break;
+ }
+
+ /* target overflow */
+ cr[0] = CoderResult.OVERFLOW;
+ c = 0;
+ break;
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c = 0;
+ if (offsets != null) {
+ prevSourceIndex = sourceIndex;
+ sourceIndex = nextSourceIndex;
+ }
+ continue;
+ } else {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ }
+
+ /*
+ * the end of the input stream and detection of truncated input are handled by the framework, but for
+ * EBCDIC_STATEFUL conversion we need to emit an SI at the very end
+ *
+ * conditions: successful EBCDIC_STATEFUL in DBCS mode end of input and no truncated input
+ */
+ if (outputType == MBCS_OUTPUT_2_SISO && prevLength == 2 && flush && sourceArrayIndex >= source.limit()
+ && c == 0) {
+
+ /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
+ if (target.hasRemaining()) {
+ target.put(si_value[0]);
+ if (si_value_length == 2) {
+ if (target.remaining() > 0) {
+ target.put(si_value[1]);
+ } else {
+ errorBuffer[0] = si_value[1];
+ errorBufferLength = 1;
+ cr[0] = CoderResult.OVERFLOW;
+ }
+ }
+ if (offsets != null) {
+ /* set the last source character's index (sourceIndex points at sourceLimit now) */
+ offsets.put(prevSourceIndex);
+ }
+ } else {
+ /* target is full */
+ errorBuffer[0] = si_value[0];
+ if (si_value_length == 2) {
+ errorBuffer[1] = si_value[1];
+ }
+ errorBufferLength = si_value_length;
+ cr[0] = CoderResult.OVERFLOW;
+ }
+ prevLength = 1; /* we switched into SBCS */
+ }
+
+ /* set the converter state back into UConverter */
+ fromUChar32 = c;
+ fromUnicodeStatus = prevLength;
+
+ source.position(sourceArrayIndex);
+ } catch (BufferOverflowException ex) {
+ cr[0] = CoderResult.OVERFLOW;
+ }
+
+ return cr[0];
+ }
+
+ /*
+ * This is another simple conversion function for internal use by other conversion implementations. It does not
+ * use the converter state nor call callbacks. It does not handle the EBCDIC swaplfnl option (set in
+ * UConverter). It handles conversion extensions but not GB 18030.
+ *
+ * It converts one single Unicode code point into codepage bytes, encoded as one 32-bit value. The function
+ * returns the number of bytes in *pValue: 1..4 the number of bytes in *pValue 0 unassigned (*pValue undefined)
+ * -1 illegal (currently not used, *pValue undefined)
+ *
+ * *pValue will contain the resulting bytes with the last byte in bits 7..0, the second to last byte in bits
+ * 15..8, etc. Currently, the function assumes but does not check that 0<=c<=0x10ffff.
+ */
+ int fromUChar32(int c, int[] pValue, boolean isUseFallback) {
+ // #if 0
+ // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
+ // const uint8_t *p;
+ // #endif
+
+ char[] table;
+ int stage2Entry;
+ int value;
+ int length;
+ int p;
+
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ if (c <= 0xffff || ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0)) {
+ table = sharedData.mbcs.fromUnicodeTable;
+
+ /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
+ if (sharedData.mbcs.outputType == MBCS_OUTPUT_1) {
+ value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);
+ /* is this code point assigned, or do we use fallbacks? */
+ if (isUseFallback ? value >= 0x800 : value >= 0xc00) {
+ pValue[0] = value & 0xff;
+ return 1;
+ }
+ } else /* outputType!=MBCS_OUTPUT_1 */{
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
+
+ /* get the bytes and the length for the output */
+ switch (sharedData.mbcs.outputType) {
+ case MBCS_OUTPUT_2:
+ value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeBytes, stage2Entry, c);
+ if (value <= 0xff) {
+ length = 1;
+ } else {
+ length = 2;
+ }
+ break;
+ // #if 0
+ // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
+ // case MBCS_OUTPUT_DBCS_ONLY:
+ // /* table with single-byte results, but only DBCS mappings used */
+ // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ // if(value<=0xff) {
+ // /* no mapping or SBCS result, not taken for DBCS-only */
+ // value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
+ // length=0;
+ // } else {
+ // length=2;
+ // }
+ // break;
+ case MBCS_OUTPUT_3:
+ byte[] bytes = sharedData.mbcs.fromUnicodeBytes;
+ p = CharsetMBCS.MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
+ value = ((bytes[p] & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) |
+ ((bytes[p+1] & UConverterConstants.UNSIGNED_BYTE_MASK)<<8) |
+ (bytes[p+2] & UConverterConstants.UNSIGNED_BYTE_MASK);
+ if (value <= 0xff) {
+ length = 1;
+ } else if (value <= 0xffff) {
+ length = 2;
+ } else {
+ length = 3;
+ }
+ break;
+ // case MBCS_OUTPUT_4:
+ // value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ // if(value<=0xff) {
+ // length=1;
+ // } else if(value<=0xffff) {
+ // length=2;
+ // } else if(value<=0xffffff) {
+ // length=3;
+ // } else {
+ // length=4;
+ // }
+ // break;
+ // case MBCS_OUTPUT_3_EUC:
+ // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ // /* EUC 16-bit fixed-length representation */
+ // if(value<=0xff) {
+ // length=1;
+ // } else if((value&0x8000)==0) {
+ // value|=0x8e8000;
+ // length=3;
+ // } else if((value&0x80)==0) {
+ // value|=0x8f0080;
+ // length=3;
+ // } else {
+ // length=2;
+ // }
+ // break;
+ // case MBCS_OUTPUT_4_EUC:
+ // p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ // value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
+ // /* EUC 16-bit fixed-length representation applied to the first two bytes */
+ // if(value<=0xff) {
+ // length=1;
+ // } else if(value<=0xffff) {
+ // length=2;
+ // } else if((value&0x800000)==0) {
+ // value|=0x8e800000;
+ // length=4;
+ // } else if((value&0x8000)==0) {
+ // value|=0x8f008000;
+ // length=4;
+ // } else {
+ // length=3;
+ // }
+ // break;
+ // #endif
+ default:
+ /* must not occur */
+ return -1;
+ }
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if (MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)
+ || (CharsetEncoderICU.isFromUUseFallback(isUseFallback, c) && value != 0)) {
+ /*
+ * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way with
+ * this data structure for fallback output to be a zero byte.
+ */
+ /* assigned */
+ pValue[0] = value;
+ return length;
+ }
+ }
+ }
+
+ if (sharedData.mbcs.extIndexes != null) {
+ length = simpleMatchFromU(c, pValue, isUseFallback);
+ return length >= 0 ? length : -length; /* return abs(length); */
+ }
+
+ /* unassigned */
+ return 0;
+ }
+
+ /*
+ * continue partial match with new input, requires cnv->preFromUFirstCP>=0 never called for simple,
+ * single-character conversion
+ */
+ private CoderResult continueMatchFromU(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush,
+ int srcIndex) {
+ CoderResult cr = CoderResult.UNDERFLOW;
+ int[] value = new int[1];
+ int match;
+
+ match = matchFromU(preFromUFirstCP, preFromUArray, preFromUBegin, preFromULength, source, value, useFallback, flush);
+ if (match >= 2) {
+ match -= 2; /* remove 2 for the initial code point */
+
+ if (match >= preFromULength) {
+ /* advance src pointer for the consumed input */
+ source.position(source.position() + match - preFromULength);
+ preFromULength = 0;
+ } else {
+ /* the match did not use all of preFromU[] - keep the rest for replay */
+ int length = preFromULength - match;
+ System.arraycopy(preFromUArray, preFromUBegin + match, preFromUArray, preFromUBegin, length);
+ preFromULength = (byte) -length;
+ }
+
+ /* finish the partial match */
+ preFromUFirstCP = UConverterConstants.U_SENTINEL;
+
+ /* write result */
+ writeFromU(value[0], target, offsets, srcIndex);
+ } else if (match < 0) {
+ /* save state for partial match */
+ int sArrayIndex;
+ int j;
+
+ /* just _append_ the newly consumed input to preFromU[] */
+ sArrayIndex = source.position();
+ match = -match - 2; /* remove 2 for the initial code point */
+ for (j = preFromULength; j < match; ++j) {
+ preFromUArray[j] = source.get(sArrayIndex++);
+ }
+ source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
+ preFromULength = (byte) match;
+ } else { /* match==0 or 1 */
+ /*
+ * no match
+ *
+ * We need to split the previous input into two parts:
+ *
+ * 1. The first code point is unmappable - that's how we got into trying the extension data in the first
+ * place. We need to move it from the preFromU buffer to the error buffer, set an error code, and
+ * prepare the rest of the previous input for 2.
+ *
+ * 2. The rest of the previous input must be converted once we come back from the callback for the first
+ * code point. At that time, we have to try again from scratch to convert these input characters. The
+ * replay will be handled by the ucnv.c conversion code.
+ */
+
+ if (match == 1) {
+ /* matched, no mapping but request for */
+ useSubChar1 = true;
+ }
+
+ /* move the first code point to the error field */
+ fromUChar32 = preFromUFirstCP;
+ preFromUFirstCP = UConverterConstants.U_SENTINEL;
+
+ /* mark preFromU for replay */
+ preFromULength = (byte) -preFromULength;
+
+ /* set the error code for unassigned */
+ // TODO: figure out what the unmappable length really should be
+ cr = CoderResult.unmappableForLength(1);
+ }
+ return cr;
+ }
+
+ /**
+ * @param cx
+ * pointer to extension data; if NULL, returns 0
+ * @param firstCP
+ * the first code point before all the other UChars
+ * @param pre
+ * UChars that must match; !initialMatch: partial match with them
+ * @param preLength
+ * length of pre, >=0
+ * @param src
+ * UChars that can be used to complete a match
+ * @param srcLength
+ * length of src, >=0
+ * @param pMatchValue
+ * [out] output result value for the match from the data structure
+ * @param useFallback
+ * "use fallback" flag, usually from cnv->useFallback
+ * @param flush
+ * TRUE if the end of the input stream is reached
+ * @return >1: matched, return value=total match length (number of input units matched) 1: matched, no mapping
+ * but request for (only for the first code point) 0: no match <0: partial match, return
+ * value=negative total match length (partial matches are never returned for flush==TRUE) (partial
+ * matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) the matchLength is 2 if only
+ * firstCP matched, and >2 if firstCP and further code units matched
+ */
+ // static int32_t ucnv_extMatchFromU(const int32_t *cx, UChar32 firstCP, const UChar *pre, int32_t preLength,
+ // const UChar *src, int32_t srcLength, uint32_t *pMatchValue, UBool useFallback, UBool flush)
+ private int matchFromU(int firstCP, char[] preArray, int preArrayBegin, int preLength, CharBuffer source,
+ int[] pMatchValue, boolean isUseFallback, boolean flush) {
+ ByteBuffer cx = sharedData.mbcs.extIndexes;
+
+ CharBuffer stage12, stage3;
+ IntBuffer stage3b;
+
+ CharBuffer fromUTableUChars, fromUSectionUChars;
+ IntBuffer fromUTableValues, fromUSectionValues;
+
+ int value, matchValue;
+ int i, j, index, length, matchLength;
+ char c;
+
+ if (cx == null) {
+ return 0; /* no extension data, no match */
+ }
+
+ /* trie lookup of firstCP */
+ index = firstCP >>> 10; /* stage 1 index */
+ if (index >= cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH)) {
+ return 0; /* the first code point is outside the trie */
+ }
+
+ stage12 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX, char.class);
+ stage3 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX, char.class);
+ index = FROM_U(stage12, stage3, index, firstCP);
+
+ stage3b = (IntBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX, int.class);
+ value = stage3b.get(stage3b.position() + index);
+ if (value == 0) {
+ return 0;
+ }
+
+ if (TO_U_IS_PARTIAL(value)) {
+ /* partial match, enter the loop below */
+ index = FROM_U_GET_PARTIAL_INDEX(value);
+
+ /* initialize */
+ fromUTableUChars = (CharBuffer) ARRAY(cx, EXT_FROM_U_UCHARS_INDEX, char.class);
+ fromUTableValues = (IntBuffer) ARRAY(cx, EXT_FROM_U_VALUES_INDEX, int.class);
+
+ matchValue = 0;
+ i = j = matchLength = 0;
+
+ /* we must not remember fallback matches when not using fallbacks */
+
+ /* match input units until there is a full match or the input is consumed */
+ for (;;) {
+ /* go to the next section */
+ int oldpos = fromUTableUChars.position();
+ fromUSectionUChars = ((CharBuffer) fromUTableUChars.position(index)).slice();
+ fromUTableUChars.position(oldpos);
+ oldpos = fromUTableValues.position();
+ fromUSectionValues = ((IntBuffer) fromUTableValues.position(index)).slice();
+ fromUTableValues.position(oldpos);
+
+ /* read first pair of the section */
+ length = fromUSectionUChars.get();
+ value = fromUSectionValues.get();
+ if (value != 0 && (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP))) {
+ /* remember longest match so far */
+ matchValue = value;
+ matchLength = 2 + i + j;
+ }
+
+ /* match pre[] then src[] */
+ if (i < preLength) {
+ c = preArray[preArrayBegin + i++];
+ } else if (source != null && j < source.remaining()) {
+ c = source.get(source.position() + j++);
+ } else {
+ /* all input consumed, partial match */
+ if (flush || (length = (i + j)) > MAX_UCHARS) {
+ /*
+ * end of the entire input stream, stop with the longest match so far or: partial match must
+ * not be longer than UCNV_EXT_MAX_UCHARS because it must fit into state buffers
+ */
+ break;
+ } else {
+ /* continue with more input next time */
+ return -(2 + length);
+ }
+ }
+
+ /* search for the current UChar */
+ index = findFromU(fromUSectionUChars, length, c);
+ if (index < 0) {
+ /* no match here, stop with the longest match so far */
+ break;
+ } else {
+ value = fromUSectionValues.get(fromUSectionValues.position() + index);
+ if (FROM_U_IS_PARTIAL(value)) {
+ /* partial match, continue */
+ index = FROM_U_GET_PARTIAL_INDEX(value);
+ } else {
+ if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) {
+ /* full match, stop with result */
+ matchValue = value;
+ matchLength = 2 + i + j;
+ } else {
+ /* full match on fallback not taken, stop with the longest match so far */
+ }
+ break;
+ }
+ }
+ }
+
+ if (matchLength == 0) {
+ /* no match at all */
+ return 0;
+ }
+ } else /* result from firstCP trie lookup */{
+ if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) {
+ /* full match, stop with result */
+ matchValue = value;
+ matchLength = 2;
+ } else {
+ /* fallback not taken */
+ return 0;
+ }
+ }
+
+ if ((matchValue & FROM_U_RESERVED_MASK) != 0) {
+ /* do not interpret values with reserved bits used, for forward compatibility */
+ return 0;
+ }
+
+ /* return result */
+ if (matchValue == FROM_U_SUBCHAR1) {
+ return 1; /* assert matchLength==2 */
+ }
+
+ pMatchValue[0] = FROM_U_MASK_ROUNDTRIP(matchValue);
+ return matchLength;
+ }
+
+ private int simpleMatchFromU(int cp, int[] pValue, boolean isUseFallback) {
+ int[] value = new int[1];
+ int match; // signed
+
+ /* try to match */
+ match = matchFromU(cp, null, 0, 0, null, value, isUseFallback, true);
+ if (match >= 2) {
+ /* write result for simple, single-character conversion */
+ int length;
+ boolean isRoundtrip;
+
+ isRoundtrip = FROM_U_IS_ROUNDTRIP(value[0]);
+ length = FROM_U_GET_LENGTH(value[0]);
+ value[0] = FROM_U_GET_DATA(value[0]);
+
+ if (length <= EXT_FROM_U_MAX_DIRECT_LENGTH) {
+ pValue[0] = value[0];
+ return isRoundtrip ? length : -length;
+ // #if 0 /* not currently used */
+ // } else if(length==4) {
+ // /* de-serialize a 4-byte result */
+ // const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
+ // *pValue=
+ // ((uint32_t)result[0]<<24)|
+ // ((uint32_t)result[1]<<16)|
+ // ((uint32_t)result[2]<<8)|
+ // result[3];
+ // return isRoundtrip ? 4 : -4;
+ // #endif
+ }
+ }
+
+ /*
+ * return no match because - match>1 && resultLength>4: result too long for simple conversion - match==1: no
+ * match found, preferred - match==0: no match found in the first place - match<0: partial
+ * match, not supported for simple conversion (and flush==TRUE)
+ */
+ return 0;
+ }
+
+ @SuppressWarnings("fallthrough")
+ private CoderResult writeFromU(int value, ByteBuffer target, IntBuffer offsets, int srcIndex) {
+ ByteBuffer cx = sharedData.mbcs.extIndexes;
+
+ byte bufferArray[] = new byte[1 + MAX_BYTES];
+ int bufferArrayIndex = 0;
+ byte[] resultArray;
+ int resultArrayIndex;
+ int length, prevLength;
+
+ length = FROM_U_GET_LENGTH(value);
+ value = FROM_U_GET_DATA(value);
+
+ /* output the result */
+ if (length <= FROM_U_MAX_DIRECT_LENGTH) {
+ /*
+ * Generate a byte array and then write it below. This is not the fastest possible way, but it should be
+ * ok for extension mappings, and it is much simpler. Offset and overflow handling are only done once
+ * this way.
+ */
+ int p = bufferArrayIndex + 1; /* reserve buffer[0] for shiftByte below */
+ switch (length) {
+ case 3:
+ bufferArray[p++] = (byte) (value >>> 16);
+ case 2:
+ bufferArray[p++] = (byte) (value >>> 8);
+ case 1:
+ bufferArray[p++] = (byte) value;
+ default:
+ break; /* will never occur */
+ }
+ resultArray = bufferArray;
+ resultArrayIndex = bufferArrayIndex + 1;
+ } else {
+ byte[] slice = new byte[length];
+
+ ByteBuffer bb = ((ByteBuffer) ARRAY(cx, EXT_FROM_U_BYTES_INDEX, byte.class));
+ bb.position(value);
+ bb.get(slice, 0, slice.length);
+
+ resultArray = slice;
+ resultArrayIndex = 0;
+ }
+
+ /* with correct data we have length>0 */
+
+ if ((prevLength = fromUnicodeStatus) != 0) {
+ /* handle SI/SO stateful output */
+ byte shiftByte;
+
+ if (prevLength > 1 && length == 1) {
+ /* change from double-byte mode to single-byte */
+ shiftByte = (byte) UConverterConstants.SI;
+ fromUnicodeStatus = 1;
+ } else if (prevLength == 1 && length > 1) {
+ /* change from single-byte mode to double-byte */
+ shiftByte = (byte) UConverterConstants.SO;
+ fromUnicodeStatus = 2;
+ } else {
+ shiftByte = 0;
+ }
+
+ if (shiftByte != 0) {
+ /* prepend the shift byte to the result bytes */
+ bufferArray[0] = shiftByte;
+ if (resultArray != bufferArray || resultArrayIndex != bufferArrayIndex + 1) {
+ System.arraycopy(resultArray, resultArrayIndex, bufferArray, bufferArrayIndex + 1, length);
+ }
+ resultArray = bufferArray;
+ resultArrayIndex = bufferArrayIndex;
+ ++length;
+ }
+ }
+
+ return fromUWriteBytes(this, resultArray, resultArrayIndex, length, target, offsets, srcIndex);
+ }
+
+ /*
+ * @return if(U_FAILURE) return the code point for cnv->fromUChar32 else return 0 after output has been written
+ * to the target
+ */
+ private int fromU(int cp_, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex,
+ int length, boolean flush, CoderResult[] cr) {
+ // ByteBuffer cx;
+ long cp = cp_ & UConverterConstants.UNSIGNED_INT_MASK;
+
+ useSubChar1 = false;
+
+ if (sharedData.mbcs.extIndexes != null
+ && initialMatchFromU((int) cp, source, target, offsets, sourceIndex, flush, cr)) {
+ return 0; /* an extension mapping handled the input */
+ }
+
+ /* GB 18030 */
+ if ((options & MBCS_OPTION_GB18030) != 0) {
+ long[] range;
+ int i;
+
+ for (i = 0; i < gb18030Ranges.length; ++i) {
+ range = gb18030Ranges[i];
+ if (range[0] <= cp && cp <= range[1]) {
+ /* found the Unicode code point, output the four-byte sequence for it */
+ long linear;
+ byte bytes[] = new byte[4];
+
+ /* get the linear value of the first GB 18030 code in this range */
+ linear = range[2] - LINEAR_18030_BASE;
+
+ /* add the offset from the beginning of the range */
+ linear += (cp - range[0]);
+
+ bytes[3] = (byte) (0x30 + linear % 10);
+ linear /= 10;
+ bytes[2] = (byte) (0x81 + linear % 126);
+ linear /= 126;
+ bytes[1] = (byte) (0x30 + linear % 10);
+ linear /= 10;
+ bytes[0] = (byte) (0x81 + linear);
+
+ /* output this sequence */
+ cr[0] = fromUWriteBytes(this, bytes, 0, 4, target, offsets, sourceIndex);
+ return 0;
+ }
+ }
+ }
+
+ /* no mapping */
+ cr[0] = CoderResult.unmappableForLength(length);
+ return (int) cp;
+ }
+
+ /*
+ * target= 2
+ && !(FROM_U_GET_LENGTH(value[0]) == 1 && sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY)) {
+ /* advance src pointer for the consumed input */
+ source.position(source.position() + match - 2); /* remove 2 for the initial code point */
+
+ /* write result to target */
+ cr[0] = writeFromU(value[0], target, offsets, srcIndex);
+ return true;
+ } else if (match < 0) {
+ /* save state for partial match */
+ int sArrayIndex;
+ int j;
+
+ /* copy the first code point */
+ preFromUFirstCP = cp;
+
+ /* now copy the newly consumed input */
+ sArrayIndex = source.position();
+ match = -match - 2; /* remove 2 for the initial code point */
+ for (j = 0; j < match; ++j) {
+ preFromUArray[j] = source.get(sArrayIndex++);
+ }
+ source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
+ preFromULength = (byte) match;
+ return true;
+ } else if (match == 1) {
+ /* matched, no mapping but request for */
+ useSubChar1 = true;
+ return false;
+ } else /* match==0 no match */{
+ return false;
+ }
+ }
+
+ CoderResult cnvMBCSFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ // Just call encodeLoop to remove duplicate code.
+ return encodeLoop(source, target, offsets, flush);
+ }
+
+ /*
+ * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages that map only to and from the
+ * BMP. In addition to single-byte/state optimizations, the offset calculations become much easier.
+ */
+ private CoderResult cnvMBCSSingleFromBMPWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ boolean flush) {
+
+ CoderResult[] cr = { CoderResult.UNDERFLOW };
+
+ int sourceArrayIndex, lastSource;
+ int targetCapacity, length;
+ char[] table;
+ byte[] results;
+
+ int c, sourceIndex;
+ char value, minValue;
+
+ /* set up the local pointers */
+ sourceArrayIndex = source.position();
+ targetCapacity = target.remaining();
+ table = sharedData.mbcs.fromUnicodeTable;
+
+ if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
+ results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes
+ // be a ByteBuffer so results can be a 16-bit view
+ // of it?
+ } else {
+ results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a
+ // ByteBuffer so results can be a 16-bit view of it?
+ }
+
+ if (useFallback) {
+ /* use all roundtrip and fallback results */
+ minValue = 0x800;
+ } else {
+ /* use only roundtrips and fallbacks from private-use characters */
+ minValue = 0xc00;
+ }
+
+ /* get the converter state from UConverter */
+ c = fromUChar32;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex = c == 0 ? 0 : -1;
+ lastSource = sourceArrayIndex;
+
+ /*
+ * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the
+ * sourceLength and targetCapacity
+ */
+ length = source.limit() - sourceArrayIndex;
+ if (length < targetCapacity) {
+ targetCapacity = length;
+ }
+
+ boolean doloop = true;
+ if (c != 0 && targetCapacity > 0) {
+ SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);
+ doloop = getTrailSingleBMP(source, x, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ }
+
+ if (doloop) {
+ while (targetCapacity > 0) {
+ /*
+ * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate pair
+ * for a "supplementary code point".
+ */
+ c = source.get(sourceArrayIndex++);
+ /*
+ * Do not immediately check for single surrogates: Assume that they are unassigned and check for
+ * them in that case. This speeds up the conversion of assigned characters.
+ */
+ /* convert the Unicode code point in c into codepage bytes */
+ value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if (value >= minValue) {
+ /* assigned, write the output character bytes from value and length */
+ /* length==1 */
+ /* this is easy because we know that there is enough space */
+ target.put((byte) value);
+ --targetCapacity;
+
+ /* normal end of conversion: prepare for a new character */
+ c = 0;
+ continue;
+ } else if (!UTF16.isSurrogate((char) c)) {
+ /* normal, unassigned BMP character */
+ } else if (UTF16.isLeadSurrogate((char) c)) {
+ // getTrail:
+ SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);
+ doloop = getTrailSingleBMP(source, x, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ if (!doloop)
+ break;
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ break;
+ }
+
+ /* c does not have a mapping */
+
+ /* get the number of code units for c to correctly advance sourceIndex */
+ length = UTF16.getCharCount(c);
+
+ /* set offsets since the start or the last extension */
+ if (offsets != null) {
+ int count = sourceArrayIndex - lastSource;
+
+ /* do not set the offset for this character */
+ count -= length;
+
+ while (count > 0) {
+ offsets.put(sourceIndex++);
+ --count;
+ }
+ /* offsets and sourceIndex are now set for the current character */
+ }
+
+ /* try an extension mapping */
+ lastSource = sourceArrayIndex;
+ source.position(sourceArrayIndex);
+ c = fromU(c, source, target, offsets, sourceIndex, length, flush, cr);
+ sourceArrayIndex = source.position();
+ sourceIndex += length + (sourceArrayIndex - lastSource);
+ lastSource = sourceArrayIndex;
+
+ if (cr[0].isError()) {
+ /* not mappable or buffer overflow */
+ break;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ targetCapacity = target.remaining();
+ length = source.limit() - sourceArrayIndex;
+ if (length < targetCapacity) {
+ targetCapacity = length;
+ }
+ }
+ }
+ }
+
+ if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ }
+
+ /* set offsets since the start or the last callback */
+ if (offsets != null) {
+ int count = sourceArrayIndex - lastSource;
+ while (count > 0) {
+ offsets.put(sourceIndex++);
+ --count;
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ fromUChar32 = c;
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+
+ /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
+ private CoderResult cnvMBCSSingleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target,
+ IntBuffer offsets, boolean flush) {
+
+ CoderResult[] cr = { CoderResult.UNDERFLOW };
+
+ int sourceArrayIndex;
+
+ char[] table;
+ byte[] results; // agljport:comment results is used to to get 16-bit values out of byte[] array
+
+ int c;
+ int sourceIndex, nextSourceIndex;
+
+ char value, minValue;
+
+ /* set up the local pointers */
+ short uniMask;
+ sourceArrayIndex = source.position();
+
+ table = sharedData.mbcs.fromUnicodeTable;
+
+ if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
+ results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes
+ // be a ByteBuffer so results can be a 16-bit view
+ // of it?
+ } else {
+ results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a
+ // ByteBuffer so results can be a 16-bit view of it?
+ }
+
+ if (useFallback) {
+ /* use all roundtrip and fallback results */
+ minValue = 0x800;
+ } else {
+ /* use only roundtrips and fallbacks from private-use characters */
+ minValue = 0xc00;
+ }
+ // agljport:comment hasSupplementary only used in getTrail block which now simply repeats the mask operation
+ uniMask = sharedData.mbcs.unicodeMask;
+
+ /* get the converter state from UConverter */
+ c = fromUChar32;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex = c == 0 ? 0 : -1;
+ nextSourceIndex = 0;
+
+ boolean doloop = true;
+ boolean doread = true;
+ if (c != 0 && target.hasRemaining()) {
+ if (UTF16.isLeadSurrogate((char) c)) {
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
+ doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
+ doread = x.doread;
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ } else {
+ doread = false;
+ }
+ }
+
+ if (doloop) {
+ while (!doread || sourceArrayIndex < source.limit()) {
+ /*
+ * This following test is to see if available input would overflow the output. It does not catch
+ * output of more than one byte that overflows as a result of a multi-byte character or callback
+ * output from the last source character. Therefore, those situations also test for overflows and
+ * will then break the loop, too.
+ */
+ if (target.hasRemaining()) {
+ /*
+ * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate
+ * pair for a "supplementary code point".
+ */
+
+ if (doread) {
+ c = source.get(sourceArrayIndex++);
+ ++nextSourceIndex;
+ if (UTF16.isSurrogate((char) c)) {
+ if (UTF16.isLeadSurrogate((char) c)) {
+ // getTrail:
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
+ nextSourceIndex);
+ doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ if (x.doread) {
+ if (doloop)
+ continue;
+ else
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ break;
+ }
+ }
+ } else {
+ doread = true;
+ }
+
+ /* convert the Unicode code point in c into codepage bytes */
+ value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if (value >= minValue) {
+ /* assigned, write the output character bytes from value and length */
+ /* length==1 */
+ /* this is easy because we know that there is enough space */
+ target.put((byte) value);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c = 0;
+ sourceIndex = nextSourceIndex;
+ } else { /* unassigned */
+ /* try an extension mapping */
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
+ nextSourceIndex);
+ doloop = unassignedDouble(source, target, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ if (!doloop)
+ break;
+ }
+ } else {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ fromUChar32 = c;
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+
+ /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
+ private CoderResult cnvMBCSDoubleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target,
+ IntBuffer offsets, boolean flush) {
+ CoderResult[] cr = { CoderResult.UNDERFLOW };
+
+ int sourceArrayIndex;
+
+ char[] table;
+ byte[] bytes;
+
+ int c, sourceIndex, nextSourceIndex;
+
+ int stage2Entry;
+ int value;
+ int length;
+ short uniMask;
+
+ /* use optimized function if possible */
+ uniMask = sharedData.mbcs.unicodeMask;
+
+ /* set up the local pointers */
+ sourceArrayIndex = source.position();
+
+ table = sharedData.mbcs.fromUnicodeTable;
+
+ if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
+ bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes;
+ } else {
+ bytes = sharedData.mbcs.fromUnicodeBytes;
+ }
+
+ /* get the converter state from UConverter */
+ c = fromUChar32;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex = c == 0 ? 0 : -1;
+ nextSourceIndex = 0;
+
+ /* conversion loop */
+ boolean doloop = true;
+ boolean doread = true;
+ if (c != 0 && target.hasRemaining()) {
+ if (UTF16.isLeadSurrogate((char) c)) {
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
+ doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
+ doread = x.doread;
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ } else {
+ doread = false;
+ }
+ }
+
+ if (doloop) {
+ while (!doread || sourceArrayIndex < source.limit()) {
+ /*
+ * This following test is to see if available input would overflow the output. It does not catch
+ * output of more than one byte that overflows as a result of a multi-byte character or callback
+ * output from the last source character. Therefore, those situations also test for overflows and
+ * will then break the loop, too.
+ */
+ if (target.hasRemaining()) {
+ if (doread) {
+ /*
+ * Get a correct Unicode code point: a single UChar for a BMP code point or a matched
+ * surrogate pair for a "supplementary code point".
+ */
+ c = source.get(sourceArrayIndex++);
+ ++nextSourceIndex;
+ /*
+ * This also tests if the codepage maps single surrogates. If it does, then surrogates are
+ * not paired but mapped separately. Note that in this case unmatched surrogates are not
+ * detected.
+ */
+ if (UTF16.isSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
+ if (UTF16.isLeadSurrogate((char) c)) {
+ // getTrail:
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
+ nextSourceIndex);
+ doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+
+ if (x.doread) {
+ if (doloop)
+ continue;
+ else
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ break;
+ }
+ }
+ } else {
+ doread = true;
+ }
+
+ /* convert the Unicode code point in c into codepage bytes */
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
+
+ /* get the bytes and the length for the output */
+ /* MBCS_OUTPUT_2 */
+ value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ length = 1;
+ } else {
+ length = 2;
+ }
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0))) {
+ /*
+ * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way
+ * with this data structure for fallback output to be a zero byte.
+ */
+
+ // unassigned:
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
+ nextSourceIndex);
+
+ doloop = unassignedDouble(source, target, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ if (doloop)
+ continue;
+ else
+ break;
+ }
+
+ /* write the output character bytes from value and length */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if (length == 1) {
+ /* this is easy because we know that there is enough space */
+ target.put((byte) value);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ } else /* length==2 */{
+ target.put((byte) (value >>> 8));
+ if (2 <= target.remaining()) {
+ target.put((byte) value);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ }
+ } else {
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ errorBuffer[0] = (byte) value;
+ errorBufferLength = 1;
+
+ /* target overflow */
+ cr[0] = CoderResult.OVERFLOW;
+ c = 0;
+ break;
+ }
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c = 0;
+ sourceIndex = nextSourceIndex;
+ continue;
+ } else {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ fromUChar32 = c;
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+
+ private final class SideEffectsSingleBMP {
+ int c, sourceArrayIndex;
+
+ SideEffectsSingleBMP(int c_, int sourceArrayIndex_) {
+ c = c_;
+ sourceArrayIndex = sourceArrayIndex_;
+ }
+ }
+
+ // function made out of block labeled getTrail in ucnv_MBCSSingleFromUnicodeWithOffsets
+ // assumes input c is lead surrogate
+ private final boolean getTrailSingleBMP(CharBuffer source, SideEffectsSingleBMP x, CoderResult[] cr) {
+ if (x.sourceArrayIndex < source.limit()) {
+ /* test the following code unit */
+ char trail = source.get(x.sourceArrayIndex);
+ if (UTF16.isTrailSurrogate(trail)) {
+ ++x.sourceArrayIndex;
+ x.c = UCharacter.getCodePoint((char) x.c, trail);
+ /* this codepage does not map supplementary code points */
+ /* callback(unassigned) */
+ cr[0] = CoderResult.unmappableForLength(2);
+ return false;
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ return false;
+ }
+ } else {
+ /* no more input */
+ return false;
+ }
+ // return true;
+ }
+
+ private final class SideEffects {
+ int c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength;
+ boolean doread = true;
+
+ SideEffects(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_, int prevSourceIndex_,
+ int prevLength_) {
+ c = c_;
+ sourceArrayIndex = sourceArrayIndex_;
+ sourceIndex = sourceIndex_;
+ nextSourceIndex = nextSourceIndex_;
+ prevSourceIndex = prevSourceIndex_;
+ prevLength = prevLength_;
+ }
+ }
+
+ // function made out of block labeled getTrail in ucnv_MBCSFromUnicodeWithOffsets
+ // assumes input c is lead surrogate
+ private final boolean getTrail(CharBuffer source, ByteBuffer target, int uniMask, SideEffects x,
+ boolean flush, CoderResult[] cr) {
+ if (x.sourceArrayIndex < source.limit()) {
+ /* test the following code unit */
+ char trail = source.get(x.sourceArrayIndex);
+ if (UTF16.isTrailSurrogate(trail)) {
+ ++x.sourceArrayIndex;
+ ++x.nextSourceIndex;
+ /* convert this supplementary code point */
+ x.c = UCharacter.getCodePoint((char) x.c, trail);
+ if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ fromUnicodeStatus = x.prevLength; /* save the old state */
+ /* callback(unassigned) */
+ x.doread = true;
+ return unassigned(source, target, null, x, flush, cr);
+ } else {
+ x.doread = false;
+ return true;
+ }
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ return false;
+ }
+ } else {
+ /* no more input */
+ return false;
+ }
+ }
+
+ // function made out of block labeled unassigned in ucnv_MBCSFromUnicodeWithOffsets
+ private final boolean unassigned(CharBuffer source, ByteBuffer target, IntBuffer offsets, SideEffects x,
+ boolean flush, CoderResult[] cr) {
+ /* try an extension mapping */
+ int sourceBegin = x.sourceArrayIndex;
+ source.position(x.sourceArrayIndex);
+ x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr);
+ x.sourceArrayIndex = source.position();
+ x.nextSourceIndex += x.sourceArrayIndex - sourceBegin;
+ x.prevLength = fromUnicodeStatus;
+
+ if (cr[0].isError()) {
+ /* not mappable or buffer overflow */
+ return false;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;
+ /* normal end of conversion: prepare for a new character */
+ if (offsets != null) {
+ x.prevSourceIndex = x.sourceIndex;
+ x.sourceIndex = x.nextSourceIndex;
+ }
+ return true;
+ }
+ }
+
+ private final class SideEffectsDouble {
+ int c, sourceArrayIndex, sourceIndex, nextSourceIndex;
+ boolean doread = true;
+
+ SideEffectsDouble(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_) {
+ c = c_;
+ sourceArrayIndex = sourceArrayIndex_;
+ sourceIndex = sourceIndex_;
+ nextSourceIndex = nextSourceIndex_;
+ }
+ }
+
+ // function made out of block labeled getTrail in ucnv_MBCSDoubleFromUnicodeWithOffsets
+ // assumes input c is lead surrogate
+ private final boolean getTrailDouble(CharBuffer source, ByteBuffer target, int uniMask,
+ SideEffectsDouble x, boolean flush, CoderResult[] cr) {
+ if (x.sourceArrayIndex < source.limit()) {
+ /* test the following code unit */
+ char trail = source.get(x.sourceArrayIndex);
+ if (UTF16.isTrailSurrogate(trail)) {
+ ++x.sourceArrayIndex;
+ ++x.nextSourceIndex;
+ /* convert this supplementary code point */
+ x.c = UCharacter.getCodePoint((char) x.c, trail);
+ if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ /* callback(unassigned) */
+ x.doread = true;
+ return unassignedDouble(source, target, x, flush, cr);
+ } else {
+ x.doread = false;
+ return true;
+ }
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ return false;
+ }
+ } else {
+ /* no more input */
+ return false;
+ }
+ }
+
+ // function made out of block labeled unassigned in ucnv_MBCSDoubleFromUnicodeWithOffsets
+ private final boolean unassignedDouble(CharBuffer source, ByteBuffer target, SideEffectsDouble x,
+ boolean flush, CoderResult[] cr) {
+ /* try an extension mapping */
+ int sourceBegin = x.sourceArrayIndex;
+ source.position(x.sourceArrayIndex);
+ x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr);
+ x.sourceArrayIndex = source.position();
+ x.nextSourceIndex += x.sourceArrayIndex - sourceBegin;
+
+ if (cr[0].isError()) {
+ /* not mappable or buffer overflow */
+ return false;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;
+ /* normal end of conversion: prepare for a new character */
+ x.sourceIndex = x.nextSourceIndex;
+ return true;
+ }
+ }
+
+ /**
+ * Overrides super class method
+ *
+ * @param encoder
+ * @param source
+ * @param target
+ * @param offsets
+ * @return
+ */
+ protected CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source, ByteBuffer target,
+ IntBuffer offsets) {
+ CharsetMBCS cs = (CharsetMBCS) encoder.charset();
+ byte[] subchar;
+ int length;
+
+ if (cs.subChar1 != 0
+ && (cs.sharedData.mbcs.extIndexes != null ? encoder.useSubChar1
+ : (encoder.invalidUCharBuffer[0] <= 0xff))) {
+ /*
+ * select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS
+ * behavior)
+ */
+ subchar = new byte[] { cs.subChar1 };
+ length = 1;
+ } else {
+ /* select subChar in all other cases */
+ subchar = cs.subChar;
+ length = cs.subCharLen;
+ }
+
+ /* reset the selector for the next code point */
+ encoder.useSubChar1 = false;
+
+ if (cs.sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) {
+ byte[] buffer = new byte[4];
+ int i = 0;
+
+ /* fromUnicodeStatus contains prevLength */
+ switch (length) {
+ case 1:
+ if (encoder.fromUnicodeStatus == 2) {
+ /* DBCS mode and SBCS sub char: change to SBCS */
+ encoder.fromUnicodeStatus = 1;
+ buffer[i++] = UConverterConstants.SI;
+ }
+ buffer[i++] = subchar[0];
+ break;
+ case 2:
+ if (encoder.fromUnicodeStatus <= 1) {
+ /* SBCS mode and DBCS sub char: change to DBCS */
+ encoder.fromUnicodeStatus = 2;
+ buffer[i++] = UConverterConstants.SO;
+ }
+ buffer[i++] = subchar[0];
+ buffer[i++] = subchar[1];
+ break;
+ default:
+ throw new IllegalArgumentException();
+ }
+
+ subchar = buffer;
+ length = i;
+ }
+ return CharsetEncoderICU.fromUWriteBytes(encoder, subchar, 0, length, target, offsets, source.position());
+ }
+
+ /**
+ * Gets called whenever CharsetEncoder.replaceWith gets called. allowReplacementChanges only allows subChar and
+ * subChar1 to be modified outside construction (since replaceWith is called once during construction).
+ *
+ * @param replacement
+ * The replacement for subchar.
+ */
+ protected void implReplaceWith(byte[] replacement) {
+ if (allowReplacementChanges) {
+ CharsetMBCS cs = (CharsetMBCS) this.charset();
+
+ System.arraycopy(replacement, 0, cs.subChar, 0, replacement.length);
+ cs.subCharLen = (byte) replacement.length;
+ cs.subChar1 = 0;
+ }
+ }
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderMBCS(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderMBCS(this);
+ }
+
+ @SuppressWarnings("fallthrough")
+ void MBCSGetFilteredUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which, int filter){
+ UConverterMBCSTable mbcsTable;
+ char[] table;
+ char st1,maxStage1, st2;
+ int st3;
+ int c ;
+
+ mbcsTable = data.mbcs;
+ table = mbcsTable.fromUnicodeTable;
+ if((mbcsTable.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY)!=0){
+ maxStage1 = 0x440;
+ }
+ else{
+ maxStage1 = 0x40;
+ }
+ c=0; /* keep track of current code point while enumerating */
+
+ if(mbcsTable.outputType==MBCS_OUTPUT_1){
+ char stage2, stage3;
+ char minValue;
+ CharBuffer results;
+ results = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer();
+
+ if(which==ROUNDTRIP_SET) {
+ /* use only roundtrips */
+ minValue=0xf00;
+ } else {
+ /* use all roundtrip and fallback results */
+ minValue=0x800;
+ }
+ for(st1=0;st1maxStage1){
+ stage2 = st2;
+ for(st2=0; st2<64; ++st2){
+ st3 = table[stage2 + st2];
+ if(st3!=0){
+ /*read the stage 3 block */
+ stage3 = (char)st3;
+ do {
+ if(results.get(stage3++)>=minValue){
+ setFillIn.add(c);
+ }
+
+ }while((++c&0xf) !=0);
+ } else {
+ c+= 16; /*empty stage 2 block */
+ }
+ }
+ } else {
+ c+=1024; /* empty stage 2 block */
+ }
+ }
+ } else {
+ int stage2,stage3;
+ byte[] bytes;
+ int st3Multiplier;
+ int value;
+ boolean useFallBack;
+ bytes = mbcsTable.fromUnicodeBytes;
+ useFallBack = (which == ROUNDTRIP_AND_FALLBACK_SET);
+ switch(mbcsTable.outputType) {
+ case MBCS_OUTPUT_3:
+ case MBCS_OUTPUT_4_EUC:
+ st3Multiplier = 3;
+ break;
+ case MBCS_OUTPUT_4:
+ st3Multiplier =4;
+ break;
+ default:
+ st3Multiplier =2;
+ break;
+ }
+ //ByteBuffer buffer = (ByteBuffer)charTobyte(table);
+
+ for(st1=0;st1(maxStage1>>1)){
+ stage2 = st2 ;
+ for(st2=0;st2<128;++st2){
+ /*read the stage 3 block */
+ st3 = table[stage2*2 + st2]<<16;
+ st3+=table[stage2*2 + ++st2];
+ if(st3!=0){
+ //if((st3=table[stage2+st2])!=0){
+ stage3 = st3Multiplier*16*(st3&UConverterConstants.UNSIGNED_SHORT_MASK);
+
+ /* get the roundtrip flags for the stage 3 block */
+ st3>>=16;
+ st3 &= UConverterConstants.UNSIGNED_SHORT_MASK;
+ switch(filter) {
+ case UCNV_SET_FILTER_NONE:
+ do {
+
+ if((st3&1)!=0){
+ setFillIn.add(c);
+ stage3+=st3Multiplier;
+ }else if (useFallBack) {
+
+ char b =0;
+ switch(st3Multiplier) {
+ case 4 :
+
+ b|= ByteBuffer.wrap(bytes).getChar(stage3++);
+
+ case 3 :
+
+ b|= ByteBuffer.wrap(bytes).getChar(stage3++);
+
+ case 2 :
+
+ b|= ByteBuffer.wrap(bytes).getChar(stage3) | ByteBuffer.wrap(bytes).getChar(stage3+1);
+ stage3+=2;
+ default:
+ break;
+ }
+ if(b!=0) {
+ setFillIn.add(c);
+ }
+ }
+ st3>>=1;
+ }while((++c&0xf)!=0);
+ break;
+ case UCNV_SET_FILTER_DBCS_ONLY:
+ /* Ignore single bytes results (<0x100). */
+ do {
+ if(((st3&1) != 0 || useFallBack) &&
+ (UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))) >= 0x100){
+ setFillIn.add(c);
+ }
+ st3>>=1;
+ stage3+=2;
+ }while((++c&0xf) != 0);
+ break;
+ case UCNV_SET_FILTER_2022_CN :
+ /* only add code points that map to CNS 11643 planes 1&2 for non-EXT ISO-2202-CN. */
+ do {
+ if(((st3&1) != 0 || useFallBack) &&
+ ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & (ByteBuffer.wrap(bytes).get(stage3))))==0x81 || value==0x82) ){
+ setFillIn.add(c);
+ }
+ st3>>=1;
+ stage3+=3;
+ }while((++c&0xf)!=0);
+ break;
+ case UCNV_SET_FILTER_SJIS:
+ /* only add code points that map tp Shift-JIS codes corrosponding to JIS X 0280. */
+ do{
+
+ if(((st3&1) != 0 || useFallBack) && (value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))>=0x8140 && value<=0xeffc){
+ setFillIn.add(c);
+ }
+ st3>>=1;
+ stage3+=2;
+ }while((++c&0xf)!=0);
+ break;
+ case UCNV_SET_FILTER_GR94DBCS:
+ /* only add code points that maps to ISO 2022 GR 94 DBCS codes*/
+ do {
+ if(((st3&1) != 0 || useFallBack) &&
+ (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))- 0xa1a1))<=(0xfefe - 0xa1a1) &&
+ (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
+ setFillIn.add(c);
+ }
+ st3>>=1;
+ stage3+=2;
+ }while((++c&0xf)!=0);
+ break;
+ case UCNV_SET_FILTER_HZ:
+ /*Only add code points that are suitable for HZ DBCS*/
+ do {
+ if( ((st3&1) != 0 || useFallBack) &&
+ (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))-0xa1a1))<=(0xfdfe - 0xa1a1) &&
+ (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
+ setFillIn.add(c);
+ }
+ st3>>=1;
+ stage3+=2;
+ }while((++c&0xf) != 0);
+ break;
+ default:
+ return;
+ }
+ } else {
+ c+=16; /* empty stage 3 block */
+ }
+ }
+ } else {
+ c+=1024; /*empty stage2 block */
+ }
+ }
+ }
+ extGetUnicodeSet(setFillIn, which, filter, data);
+ }
+
+ static void extGetUnicodeSetString(ByteBuffer cx,UnicodeSet setFillIn, boolean useFallback,
+ int minLength, int c, char s[],int length,int sectionIndex){
+ CharBuffer fromUSectionUChar;
+ IntBuffer fromUSectionValues;
+ fromUSectionUChar = (CharBuffer)ARRAY(cx, EXT_FROM_U_UCHARS_INDEX,char.class );
+ fromUSectionValues = (IntBuffer)ARRAY(cx, EXT_FROM_U_VALUES_INDEX,int.class );
+ int fromUSectionUCharIndex = fromUSectionUChar.position()+sectionIndex;
+ int fromUSectionValuesIndex = fromUSectionValues.position()+sectionIndex;
+ int value, i, count;
+
+ /* read first pair of the section */
+ count = fromUSectionUChar.get(fromUSectionUCharIndex++);
+ value = fromUSectionValues.get(fromUSectionValuesIndex++);
+ if(value!=0 && (FROM_U_IS_ROUNDTRIP(value) || useFallback) && FROM_U_GET_LENGTH(value)>=minLength) {
+ if(c>=0){
+ setFillIn.add(c);
+ } else {
+ String normalizedString=""; // String for composite characters
+ for(int j=0; j=minLength) {
+ String normalizedString=""; // String for composite characters
+ for(int j=0; j<(length+1);j++){
+ normalizedString+=s[j];
+ }
+ setFillIn.add(normalizedString);
+ }
+ }
+
+ }
+
+
+ static void extGetUnicodeSet(UnicodeSet setFillIn, int which, int filter, UConverterSharedData Data){
+ int st1, stage1Length, st2, st3, minLength;
+ int ps2, ps3;
+
+ CharBuffer stage12, stage3;
+ int value, length;
+ IntBuffer stage3b;
+ boolean useFallback;
+ char s[] = new char[MAX_UCHARS];
+ int c;
+ ByteBuffer cx = Data.mbcs.extIndexes;
+ if(cx == null){
+ return;
+ }
+ stage12 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX,char.class );
+ stage3 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX,char.class );
+ stage3b = (IntBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX,int.class );
+
+ stage1Length = cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH);
+ useFallback = (which==ROUNDTRIP_AND_FALLBACK_SET);
+
+ c = 0;
+ if(filter == UCNV_SET_FILTER_2022_CN) {
+ minLength = 3;
+ } else if (Data.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY || filter != UCNV_SET_FILTER_NONE) {
+ /* DBCS-only, ignore single-byte results */
+ minLength = 2;
+ } else {
+ minLength = 1;
+ }
+
+ for(st1=0; st1< stage1Length; ++st1){
+ st2 = stage12.get(st1);
+ if(st2>stage1Length) {
+ ps2 = st2;
+ for(st2=0;st2<64;++st2){
+ st3=((int) stage12.get(ps2+st2))<=minLength){
+
+ switch(filter) {
+ case UCNV_SET_FILTER_2022_CN:
+ if(!(FROM_U_GET_LENGTH(value)==3 && FROM_U_GET_DATA(value)<=0x82ffff)){
+ continue;
+ }
+ break;
+ case UCNV_SET_FILTER_SJIS:
+ if(!(FROM_U_GET_LENGTH(value)==2 && (value=FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)){
+ continue;
+ }
+ break;
+ case UCNV_SET_FILTER_GR94DBCS:
+ if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfefe - 0xa1a1)
+ && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
+
+ continue;
+ }
+ break;
+ case UCNV_SET_FILTER_HZ:
+ if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfdfe - 0xa1a1)
+ && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
+ continue;
+ }
+ break;
+ default:
+ /*
+ * UCNV_SET_FILTER_NONE,
+ * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
+ */
+ break;
+ }
+ setFillIn.add(c);
+
+ }
+ }while((++c&0xf) != 0);
+
+ } else {
+ c+=16; /* emplty stage3 block */
+ }
+ }
+ } else {
+ c+=1024; /* empty stage 2 block*/
+ }
+ }
+ }
+
+ void MBCSGetUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which){
+ MBCSGetFilteredUnicodeSetForUnicode(data, setFillIn, which,
+ this.sharedData.mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? UCNV_SET_FILTER_DBCS_ONLY : UCNV_SET_FILTER_NONE );
+ }
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ if((options & MBCS_OPTION_GB18030)!=0){
+ setFillIn.add(0, 0xd7ff);
+ setFillIn.add(0xe000, 0x10ffff);
+ }
+ else {
+ this.MBCSGetUnicodeSetForUnicode(sharedData, setFillIn, which);
+ }
+ }
+
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetProviderICU.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetProviderICU.java
new file mode 100644
index 00000000000..8c227765e3d
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetProviderICU.java
@@ -0,0 +1,340 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.UnsupportedCharsetException;
+import java.nio.charset.spi.CharsetProvider;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import com.ibm.icu.impl.InvalidFormatException;
+
+
+/**
+ * A concrete subclass of CharsetProvider for loading and providing charset converters
+ * in ICU.
+ * @stable ICU 3.6
+ */
+public final class CharsetProviderICU extends CharsetProvider{
+ private static String optionsString = null;
+ private static boolean gettingJavaCanonicalName = false;
+
+ /**
+ * Default constructor
+ * @stable ICU 3.6
+ */
+ public CharsetProviderICU() {
+ }
+
+ /**
+ * Constructs a charset for the given charset name.
+ * Implements the abstract method of super class.
+ * @param charsetName charset name
+ * @return charset objet for the given charset name, null if unsupported
+ * @stable ICU 3.6
+ */
+ public final Charset charsetForName(String charsetName){
+ try{
+ // extract the options from the charset name
+ charsetName = processOptions(charsetName);
+ // get the canonical name
+ String icuCanonicalName = getICUCanonicalName(charsetName);
+
+ // create the converter object and return it
+ if(icuCanonicalName==null || icuCanonicalName.length()==0){
+ // Try the original name, may be something added and not in the alias table.
+ // Will get an unsupported encoding exception if it doesn't work.
+ return getCharset(charsetName);
+ }
+ return getCharset(icuCanonicalName);
+ }catch(UnsupportedCharsetException ex){
+ }catch(IOException ex){
+ }
+ return null;
+ }
+
+ /**
+ * Constructs a charset for the given ICU conversion table from the specified class path.
+ * Example use: cnv = CharsetProviderICU.charsetForName("myConverter", "com/myCompany/myDataPackage");
.
+ * In this example myConverter.cnv would exist in the com/myCompany/myDataPackage Java package.
+ * Conversion tables can be made with ICU4C's makeconv tool.
+ * This function allows you to allows you to load user defined conversion
+ * tables that are outside of ICU's core data.
+ * @param charsetName The name of the charset conversion table.
+ * @param classPath The class path that contain the conversion table.
+ * @return charset object for the given charset name, null if unsupported
+ * @stable ICU 3.8
+ */
+ public final Charset charsetForName(String charsetName, String classPath) {
+ return charsetForName(charsetName, classPath, null);
+ }
+
+ /**
+ * Constructs a charset for the given ICU conversion table from the specified class path.
+ * This function is similar to {@link #charsetForName(String, String)}.
+ * @param charsetName The name of the charset conversion table.
+ * @param classPath The class path that contain the conversion table.
+ * @param loader the class object from which to load the charset conversion table
+ * @return charset object for the given charset name, null if unsupported
+ * @stable ICU 3.8
+ */
+ public Charset charsetForName(String charsetName, String classPath, ClassLoader loader) {
+ CharsetMBCS cs = null;
+ try {
+ cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, loader);
+ } catch (InvalidFormatException e) {
+ // return null;
+ }
+ return cs;
+ }
+
+ /**
+ * Gets the canonical name of the converter as defined by Java
+ * @param enc converter name
+ * @return canonical name of the converter
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public static final String getICUCanonicalName(String enc)
+ throws UnsupportedCharsetException{
+ String canonicalName = null;
+ String ret = null;
+ try{
+ if(enc!=null){
+ if((canonicalName = UConverterAlias.getCanonicalName(enc, "MIME"))!=null){
+ ret = canonicalName;
+ } else if((canonicalName = UConverterAlias.getCanonicalName(enc, "IANA"))!=null){
+ ret = canonicalName;
+ } else if((canonicalName = UConverterAlias.getAlias(enc, 0))!=null){
+ /* we have some aliases in the form x-blah .. match those */
+ ret = canonicalName;
+ }/*else if((canonicalName = UConverterAlias.getCanonicalName(enc, ""))!=null){
+ ret = canonicalName;
+ }*/else if(enc.indexOf("x-")==0 || enc.indexOf("X-")==0){
+ /* TODO: Match with getJavaCanonicalName method */
+ /*
+ char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0};
+ strcpy(temp, encName+2);
+ */
+ // Remove the 'x-' and get the ICU canonical name
+ if ((canonicalName = UConverterAlias.getAlias(enc.substring(2), 0))!=null) {
+ ret = canonicalName;
+ } else {
+ ret = "";
+ }
+
+ }else{
+ /* unsupported encoding */
+ ret = "";
+ }
+ }
+ return ret;
+ }catch(IOException ex){
+ throw new UnsupportedCharsetException(enc);
+ }
+ }
+ private static final Charset getCharset(String icuCanonicalName) throws IOException{
+ String[] aliases = getAliases(icuCanonicalName);
+ String canonicalName = getJavaCanonicalName(icuCanonicalName);
+
+ /* Concat the option string to the icuCanonicalName so that the options can be handled properly
+ * by the actual charset.
+ * Note: getJavaCanonicalName() may eventually call this method so skip the concatenation part
+ * during getJavaCanonicalName() call.
+ */
+ if (!gettingJavaCanonicalName && optionsString != null) {
+ icuCanonicalName = icuCanonicalName.concat(optionsString);
+ optionsString = null;
+ }
+
+ return (CharsetICU.getCharset(icuCanonicalName,canonicalName, aliases));
+ }
+ /**
+ * Gets the canonical name of the converter as defined by Java
+ * @param charsetName converter name
+ * @return canonical name of the converter
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public static String getJavaCanonicalName(String charsetName){
+ /*
+ If a charset listed in the IANA Charset Registry is supported by an implementation
+ of the Java platform then its canonical name must be the name listed in the registry.
+ Many charsets are given more than one name in the registry, in which case the registry
+ identifies one of the names as MIME-preferred. If a charset has more than one registry
+ name then its canonical name must be the MIME-preferred name and the other names in
+ the registry must be valid aliases. If a supported charset is not listed in the IANA
+ registry then its canonical name must begin with one of the strings "X-" or "x-".
+ */
+ if(charsetName==null ){
+ return null;
+ }
+ try{
+ String cName = null;
+ /* find out the alias with MIME tag */
+ if((cName=UConverterAlias.getStandardName(charsetName, "MIME"))!=null){
+ /* find out the alias with IANA tag */
+ }else if((cName=UConverterAlias.getStandardName(charsetName, "IANA"))!=null){
+ }else {
+ /*
+ check to see if an alias already exists with x- prefix, if yes then
+ make that the canonical name
+ */
+ int aliasNum = UConverterAlias.countAliases(charsetName);
+ String name;
+ for(int i=0;i 0) {
+ cName = testName;
+ }
+ }
+ }
+ gettingJavaCanonicalName = false;
+ }
+ }
+ return cName;
+ }catch (IOException ex){
+
+ }
+ return null;
+ }
+
+ /**
+ * Gets the aliases associated with the converter name
+ * @param encName converter name
+ * @return converter names as elements in an object array
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ private static final String[] getAliases(String encName)throws IOException{
+ String[] ret = null;
+ int aliasNum = 0;
+ int i=0;
+ int j=0;
+ String aliasArray[/*50*/] = new String[50];
+
+ if(encName != null){
+ aliasNum = UConverterAlias.countAliases(encName);
+ for(i=0,j=0;i=0;) {
+ ret[j] = aliasArray[j];
+ }
+
+ }
+ return (ret);
+
+ }
+
+ private static final void putCharsets(Map map){
+ int num = UConverterAlias.countAvailable();
+ for(int i=0;i charsets(){
+ HashMap map = new HashMap();
+ putCharsets(map);
+ return map.keySet().iterator();
+ }
+
+ /**
+ * Gets the canonical names of available converters
+ * @return array of available converter names
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public static final String[] getAvailableNames(){
+ HashMap map = new HashMap();
+ putCharsets(map);
+ return map.values().toArray(new String[0]);
+ }
+
+ /**
+ * Return all names available
+ * @return String[] an array of all available names
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public static final String[] getAllNames(){
+ int num = UConverterAlias.countAvailable();
+ String[] names = new String[num];
+ for(int i=0;i -1) {
+ /* Remove and save the swap lfnl option string portion of the charset name. */
+ optionsString = UConverterConstants.OPTION_SWAP_LFNL_STRING;
+
+ charsetName = charsetName.substring(0, charsetName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING));
+ }
+
+ return charsetName;
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetSCSU.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetSCSU.java
new file mode 100644
index 00000000000..8a9717aa1be
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetSCSU.java
@@ -0,0 +1,1267 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author krajwade
+ *
+ */
+class CharsetSCSU extends CharsetICU{
+ /* SCSU definitions --------------------------------------------------------- */
+
+ /* SCSU command byte values */
+ //enum {
+ private static final short SQ0=0x01; /* Quote from window pair 0 */
+ private static final short SQ7=0x08; /* Quote from window pair 7 */
+ private static final short SDX=0x0B; /* Define a window as extended */
+ //private static final short Srs=0x0C; /* reserved */
+ private static final short SQU=0x0E; /* Quote a single Unicode character */
+ private static final short SCU=0x0F; /* Change to Unicode mode */
+ private static final short SC0=0x10; /* Select window 0 */
+ private static final short SC7=0x17; /* Select window 7 */
+ private static final short SD0=0x18; /* Define and select window 0 */
+ //private static final short SD7=0x1F; /* Define and select window 7 */
+
+ private static final short UC0=0xE0; /* Select window 0 */
+ private static final short UC7=0xE7; /* Select window 7 */
+ private static final short UD0=0xE8; /* Define and select window 0 */
+ private static final short UD7=0xEF; /* Define and select window 7 */
+ private static final short UQU=0xF0; /* Quote a single Unicode character */
+ private static final short UDX=0xF1; /* Define a Window as extended */
+ private static final short Urs=0xF2; /* reserved */
+ // };
+
+ // enum {
+ /*
+ * Unicode code points from 3400 to E000 are not adressible by
+ * dynamic window, since in these areas no short run alphabets are
+ * found. Therefore add gapOffset to all values from gapThreshold.
+ */
+ private static final int gapThreshold=0x68;
+ private static final int gapOffset = 0xAC00 ;
+ /* values between reservedStart and fixedThreshold are reserved */
+ private static final int reservedStart=0xA8;
+ /* use table of predefined fixed offsets for values from fixedThreshold */
+ private static final int fixedThreshold=0xF9;
+ //};
+
+ protected byte[] fromUSubstitution = new byte[]{(byte)0x0E,(byte)0xFF, (byte)0xFD};
+
+ /* constant offsets for the 8 static windows */
+ private static final int staticOffsets[]={
+ 0x0000, /* ASCII for quoted tags */
+ 0x0080, /* Latin - 1 Supplement (for access to punctuation) */
+ 0x0100, /* Latin Extended-A */
+ 0x0300, /* Combining Diacritical Marks */
+ 0x2000, /* General Punctuation */
+ 0x2080, /* Currency Symbols */
+ 0x2100, /* Letterlike Symbols and Number Forms */
+ 0x3000 /* CJK Symbols and punctuation */
+ };
+
+ /* initial offsets for the 8 dynamic (sliding) windows */
+ private static final int initialDynamicOffsets[]={
+ 0x0080, /* Latin-1 */
+ 0x00C0, /* Latin Extended A */
+ 0x0400, /* Cyrillic */
+ 0x0600, /* Arabic */
+ 0x0900, /* Devanagari */
+ 0x3040, /* Hiragana */
+ 0x30A0, /* Katakana */
+ 0xFF00 /* Fullwidth ASCII */
+ };
+
+ /* Table of fixed predefined Offsets */
+ private static final int fixedOffsets[]={
+ /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
+ /* 0xFA */ 0x0250, /* IPA extensions */
+ /* 0xFB */ 0x0370, /* Greek */
+ /* 0xFC */ 0x0530, /* Armenian */
+ /* 0xFD */ 0x3040, /* Hiragana */
+ /* 0xFE */ 0x30A0, /* Katakana */
+ /* 0xFF */ 0xFF60 /* Halfwidth Katakana */
+ };
+
+ /* state values */
+ //enum {
+ private static final int readCommand=0;
+ private static final int quotePairOne=1;
+ private static final int quotePairTwo=2;
+ private static final int quoteOne=3;
+ private static final int definePairOne=4;
+ private static final int definePairTwo=5;
+ private static final int defineOne=6;
+ // };
+
+ @SuppressWarnings("unused")
+ private final class SCSUData{
+ /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
+ int toUDynamicOffsets[] = new int[8] ;
+ int fromUDynamicOffsets[] = new int[8] ;
+
+ /* state machine state - toUnicode */
+ boolean toUIsSingleByteMode;
+ short toUState;
+ byte toUQuoteWindow, toUDynamicWindow;
+ short toUByteOne;
+ short toUPadding[];
+
+ /* state machine state - fromUnicode */
+ boolean fromUIsSingleByteMode;
+ byte fromUDynamicWindow;
+
+ /*
+ * windowUse[] keeps track of the use of the dynamic windows:
+ * At nextWindowUseIndex there is the least recently used window,
+ * and the following windows (in a wrapping manner) are more and more
+ * recently used.
+ * At nextWindowUseIndex-1 there is the most recently used window.
+ */
+ byte locale;
+ byte nextWindowUseIndex;
+ byte windowUse[] = new byte[8];
+
+ SCSUData(){
+ initialize();
+ }
+
+ void initialize(){
+ for(int i=0;i<8;i++){
+ this.toUDynamicOffsets[i] = initialDynamicOffsets[i];
+ }
+ this.toUIsSingleByteMode = true;
+ this.toUState = readCommand;
+ this.toUQuoteWindow = 0;
+ this.toUDynamicWindow = 0;
+ this.toUByteOne = 0;
+ this.fromUIsSingleByteMode = true;
+ this.fromUDynamicWindow = 0;
+ for(int i=0;i<8;i++){
+ this.fromUDynamicOffsets[i] = initialDynamicOffsets[i];
+ }
+ this.nextWindowUseIndex = 0;
+ switch(this.locale){
+ /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */
+ /* case l_ja:
+ for(int i=0;i<8;i++){
+ this.windowUse[i] = initialWindowUse_ja[i];
+ }
+ break; */
+ default:
+ for(int i=0;i<8;i++){
+ this.windowUse[i] = initialWindowUse[i];
+ }
+
+ }
+ }
+ }
+
+ static final byte initialWindowUse[]={ 7, 0, 3, 2, 4, 5, 6, 1 };
+ /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */
+ // static final byte initialWindowUse_ja[]={ 3, 2, 4, 1, 0, 7, 5, 6 };
+
+ //enum {
+ //private static final int lGeneric = 0;
+ /* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */
+ // private static final int l_ja = 1;
+ //};
+
+ private SCSUData extraInfo = null;
+
+ public CharsetSCSU(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = 3;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ extraInfo = new SCSUData();
+ }
+
+ class CharsetDecoderSCSU extends CharsetDecoderICU {
+ /* label values for supporting behavior similar to goto in C */
+ private static final int FastSingle=0;
+ private static final int SingleByteMode=1;
+ private static final int EndLoop=2;
+
+ /* Mode Type */
+ private static final int ByteMode = 0;
+ private static final int UnicodeMode =1;
+
+ public CharsetDecoderSCSU(CharsetICU cs) {
+ super(cs);
+ implReset();
+ }
+
+ //private SCSUData data ;
+ protected void implReset(){
+ super.implReset();
+ toULength = 0;
+ extraInfo.initialize();
+ }
+
+ short b;
+
+ //Get the state machine state
+ private boolean isSingleByteMode ;
+ private short state ;
+ private byte quoteWindow ;
+ private byte dynamicWindow ;
+ private short byteOne;
+
+
+ //sourceIndex=-1 if the current character began in the previous buffer
+ private int sourceIndex ;
+ private int nextSourceIndex ;
+
+ CoderResult cr;
+ SCSUData data ;
+ private boolean LabelLoop;// used to break the while loop
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ boolean flush){
+ data = extraInfo;
+
+ //Get the state machine state
+ isSingleByteMode = data.toUIsSingleByteMode;
+ state = data.toUState;
+ quoteWindow = data.toUQuoteWindow;
+ dynamicWindow = data.toUDynamicWindow;
+ byteOne = data.toUByteOne;
+
+ LabelLoop = true;
+
+ //sourceIndex=-1 if the current character began in the previous buffer
+ sourceIndex = data.toUState == readCommand ? 0: -1 ;
+ nextSourceIndex = 0;
+
+ cr = CoderResult.UNDERFLOW;
+ int labelType = 0;
+ while(LabelLoop){
+ if(isSingleByteMode){
+ switch(labelType){
+ case FastSingle:
+ /*fast path for single-byte mode*/
+ labelType = fastSingle(source, target, offsets, ByteMode);
+ break;
+ case SingleByteMode:
+ /* normal state machine for single-byte mode, minus handling for what fastSingleCovers */
+ labelType = singleByteMode(source, target, offsets, ByteMode);
+ break;
+ case EndLoop:
+ endLoop(source, target, offsets);
+ break;
+ }
+ }else{
+ switch(labelType){
+ case FastSingle:
+ /*fast path for single-byte mode*/
+ labelType = fastSingle(source, target, offsets, UnicodeMode);
+ break;
+ case SingleByteMode:
+ /* normal state machine for single-byte mode, minus handling for what fastSingleCovers */
+ labelType = singleByteMode(source, target, offsets, UnicodeMode);
+ break;
+ case EndLoop:
+ endLoop(source, target, offsets);
+ break;
+ }
+ //LabelLoop = false;
+ }
+ }
+ return cr;
+ }
+
+ private int fastSingle(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){
+ int label = 0;
+ if(modeType==ByteMode){
+
+ if(state==readCommand){
+ while(source.hasRemaining() && target.hasRemaining() && (b=(short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK)) >= 0x20){
+ source.position(source.position()+1);
+ ++nextSourceIndex;
+ if(b <= 0x7f){
+ /*Write US graphic character or DEL*/
+ target.put((char)b);
+ if(offsets != null){
+ offsets.put(sourceIndex);
+ }
+ }else{
+ /*Write from dynamic window*/
+ int c = data.toUDynamicOffsets[dynamicWindow] + (b&0x7f);
+ if(c <= 0xffff){
+ target.put((char)c);
+ if(offsets != null){
+ offsets.put(sourceIndex);
+ }
+ }else{
+ /*Output surrogate pair */
+ target.put((char)(0xd7c0 + (c>>10)));
+ if(target.hasRemaining()){
+ target.put((char)(0xdc00 | (c&0x3ff)));
+ if(offsets != null){
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ }
+ }else{
+ /* target overflow */
+ if(offsets != null){
+ offsets.put(sourceIndex);
+ }
+ charErrorBufferArray[0] = (char)(0xdc00 | (c&0x3ff));
+ charErrorBufferLength = 1;
+ label = EndLoop;
+ cr = CoderResult.OVERFLOW;
+ return label;
+ }
+ }
+ }
+ sourceIndex = nextSourceIndex;
+ }
+ // label = SingleByteMode;
+ }
+ }else if(modeType==UnicodeMode){
+ /* fast path for unicode mode */
+ if(state == readCommand){
+ while((source.position()+1)(Urs-UC0)){
+ target.put((char)((b<<8)|(source.get(source.position()+1)&UConverterConstants.UNSIGNED_BYTE_MASK)));
+ if(offsets != null){
+ offsets.put(sourceIndex);
+ }
+ sourceIndex = nextSourceIndex;
+ nextSourceIndex+=2;
+ source.position(source.position()+2);
+ }
+ }
+ }
+ label = SingleByteMode;
+ return label;
+ }
+
+ private int singleByteMode(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){
+ int label = SingleByteMode;
+ if(modeType == ByteMode){
+ while(source.hasRemaining()){
+ if(!target.hasRemaining()){
+ cr = CoderResult.OVERFLOW;
+ label = EndLoop;
+ return label;
+ }
+ b = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
+ ++nextSourceIndex;
+ switch(state){
+ case readCommand:
+ /*redundant conditions are commented out */
+ if(((1L<>10)));
+ if(target.hasRemaining()){
+ target.put((char)(0xdc00 | (c&0x3ff)));
+ if(offsets != null){
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ }
+ }else {
+ /* target overflow */
+ if(offsets != null){
+ offsets.put(sourceIndex);
+ }
+ charErrorBufferArray[0] = (char)(0xdc00 | (c&0x3ff));
+ charErrorBufferLength = 1;
+ label = EndLoop;
+ cr = CoderResult.OVERFLOW;
+ LabelLoop = false;
+ return label;
+ }
+ }
+ }
+ sourceIndex = nextSourceIndex;
+ state = readCommand;
+ label = FastSingle;
+ return label;
+ case definePairOne:
+ dynamicWindow = (byte)((b>>5)&7);
+ byteOne = (byte)(b&0x1f);
+ toUBytesArray[1] = (byte)b;
+ toULength = 2;
+ state = definePairTwo;
+ break;
+ case definePairTwo:
+ data.toUDynamicOffsets[dynamicWindow] = 0x10000 + (byteOne<<15L | b<<7L);
+ sourceIndex = nextSourceIndex;
+ state = readCommand;
+ label = FastSingle;
+ return label;
+ case defineOne:
+ if(b==0){
+ /*callback (illegal)*/
+ toUBytesArray[1] = (byte)b;
+ toULength =2;
+ label = EndLoop;
+ return label;
+ }else if(b=fixedThreshold){
+ data.toUDynamicOffsets[dynamicWindow] = fixedOffsets[b-fixedThreshold];
+ }else{
+ /*callback (illegal)*/
+ toUBytesArray[1] = (byte)b;
+ toULength =2;
+ label = EndLoop;
+ return label;
+ }
+ sourceIndex = nextSourceIndex;
+ state = readCommand;
+ label = FastSingle;
+ return label;
+ }
+ }
+
+ }else if(modeType==UnicodeMode){
+ while(source.hasRemaining()){
+ if(!target.hasRemaining()){
+ cr = CoderResult.OVERFLOW;
+ label = EndLoop;
+ return label;
+ }
+ b = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
+ ++nextSourceIndex;
+ switch(state){
+ case readCommand:
+ if((short)((b -UC0)&UConverterConstants.UNSIGNED_BYTE_MASK)>(Urs - UC0)){
+ byteOne = b;
+ toUBytesArray[0] = (byte)b;
+ toULength = 1;
+ state = quotePairTwo;
+ }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) <= UC7){
+ dynamicWindow = (byte)(b - UC0);
+ sourceIndex = nextSourceIndex;
+ isSingleByteMode = true;
+ label = FastSingle;
+ return label;
+ }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) <= UD7){
+ dynamicWindow = (byte)(b - UD0);
+ isSingleByteMode = true;
+ toUBytesArray[0] = (byte)b;
+ toULength = 1;
+ state = defineOne;
+ label = SingleByteMode;
+ return label;
+ }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) == UDX){
+ isSingleByteMode = true;
+ toUBytesArray[0] = (byte)b;
+ toULength = 1;
+ state = definePairOne;
+ label = SingleByteMode;
+ return label;
+ }else if((b&UConverterConstants.UNSIGNED_BYTE_MASK) == UQU){
+ toUBytesArray[0] = (byte)b;
+ toULength = 1;
+ state = quotePairOne;
+ }else {
+ /* callback (illegal)*/
+ cr = CoderResult.malformedForLength(1);
+ toUBytesArray[0] = (byte)b;
+ toULength = 1;
+ label = EndLoop;
+ return label;
+ }
+ break;
+ case quotePairOne:
+ byteOne = b;
+ toUBytesArray[1] = (byte)b;
+ toULength = 2;
+ state = quotePairTwo;
+ break;
+ case quotePairTwo:
+ target.put((char)((byteOne<<8) | b));
+ if(offsets != null){
+ offsets.put(sourceIndex);
+ }
+ sourceIndex = nextSourceIndex;
+ state = readCommand;
+ label = FastSingle;
+ return label;
+ }
+ }
+ }
+ label = EndLoop;
+ return label;
+ }
+
+ private void endLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){
+ if(cr==CoderResult.OVERFLOW){
+ state = readCommand;
+ }else if(state == readCommand){
+ toULength = 0;
+ }
+ data.toUIsSingleByteMode = isSingleByteMode;
+ data.toUState = state;
+ data.toUQuoteWindow = quoteWindow;
+ data.toUDynamicWindow = dynamicWindow;
+ data.toUByteOne = byteOne;
+ LabelLoop = false;
+ }
+ }
+
+ class CharsetEncoderSCSU extends CharsetEncoderICU{
+ public CharsetEncoderSCSU(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ //private SCSUData data;
+ protected void implReset() {
+ super.implReset();
+ extraInfo.initialize();
+ }
+
+ /* label values for supporting behavior similar to goto in C */
+ private static final int Loop=0;
+ private static final int GetTrailUnicode=1;
+ private static final int OutputBytes=2;
+ private static final int EndLoop =3;
+
+ private int delta;
+ private int length;
+
+ ///variables of compression heuristics
+ private int offset;
+ private char lead, trail;
+ private int code;
+ private byte window;
+
+ //Get the state machine state
+ private boolean isSingleByteMode;
+ private byte dynamicWindow ;
+ private int currentOffset;
+ int c;
+
+ SCSUData data ;
+
+ //sourceIndex=-1 if the current character began in the previous buffer
+ private int sourceIndex ;
+ private int nextSourceIndex;
+ private int targetCapacity;
+
+ private boolean LabelLoop;//used to break the while loop
+ private boolean AfterGetTrail;// its value is set to true in order to ignore the code before getTrailSingle:
+ private boolean AfterGetTrailUnicode;// is value is set to true in order to ignore the code before getTrailUnicode:
+
+ CoderResult cr;
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ data = extraInfo;
+ cr = CoderResult.UNDERFLOW;
+
+ //Get the state machine state
+ isSingleByteMode = data.fromUIsSingleByteMode;
+ dynamicWindow = data.fromUDynamicWindow;
+ currentOffset = data.fromUDynamicOffsets[dynamicWindow];
+ c = fromUChar32;
+
+ sourceIndex = c== 0 ? 0: -1 ;
+ nextSourceIndex = 0;
+
+
+ targetCapacity = target.limit()-target.position();
+
+ //sourceIndex=-1 if the current character began in the previous buffer
+ sourceIndex = c== 0 ? 0: -1 ;
+ nextSourceIndex = 0;
+
+ int labelType = Loop; // set to Loop so that the code starts from loop:
+ LabelLoop = true;
+ AfterGetTrail = false;
+ AfterGetTrailUnicode = false;
+
+ while(LabelLoop){
+ switch(labelType){
+ case Loop:
+ labelType = loop(source, target, offsets);
+ break;
+ case GetTrailUnicode:
+ labelType = getTrailUnicode(source, target, offsets);
+ break;
+ case OutputBytes:
+ labelType = outputBytes(source, target, offsets);
+ break;
+ case EndLoop:
+ endLoop(source, target, offsets);
+ break;
+ }
+ }
+ return cr;
+ }
+
+ private byte getWindow(int[] offsets){
+ int i;
+ for (i=0;i<8;i++){
+ if(((c-offsets[i]) & UConverterConstants.UNSIGNED_INT_MASK) <= 0x7f){
+ return (byte)i;
+ }
+ }
+ return -1;
+ }
+
+ private boolean isInOffsetWindowOrDirect(int offsetValue, int a){
+ return (a & UConverterConstants.UNSIGNED_INT_MASK)<=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK)+0x7f &
+ ((a & UConverterConstants.UNSIGNED_INT_MASK)>=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK) ||
+ ((a & UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && ((a & UConverterConstants.UNSIGNED_INT_MASK)>=0x20
+ || ((1L<<(a & UConverterConstants.UNSIGNED_INT_MASK))&0x2601)!=0)));
+ }
+
+ private byte getNextDynamicWindow(){
+ byte windowValue = data.windowUse[data.nextWindowUseIndex];
+ if(++data.nextWindowUseIndex==8){
+ data.nextWindowUseIndex=0;
+ }
+ return windowValue;
+ }
+
+ private void useDynamicWindow(byte windowValue){
+ /*first find the index of the window*/
+ int i,j;
+ i = data.nextWindowUseIndex;
+ do{
+ if(--i<0){
+ i=7;
+ }
+ }while(data.windowUse[i]!=windowValue);
+
+ /*now copy each window[i+1] to [i]*/
+ j= i+1;
+ if(j==8){
+ j=0;
+ }
+ while(j!=data.nextWindowUseIndex){
+ data.windowUse[i] = data.windowUse[j];
+ i=j;
+ if(++j==8){
+ j=0;
+ }
+ }
+
+ /*finally, set the window into the most recently used index*/
+ data.windowUse[i]= windowValue;
+ }
+
+
+ private int getDynamicOffset(){
+ int i;
+ for(i=0;i<7;++i){
+ if(((c-fixedOffsets[i])&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f){
+ offset = fixedOffsets[i];
+ return 0xf9+i;
+ }
+ }
+ if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x80){
+ /*No dynamic window for US-ASCII*/
+ return -1;
+ }else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x3400 || ((c-0x10000)&UConverterConstants.UNSIGNED_INT_MASK)<(0x14000-0x10000) ||
+ ((c-0x1d000)&UConverterConstants.UNSIGNED_INT_MASK)<=(0x1ffff-0x1d000)){
+ /*This character is in the code range for a "small", i.e, reasonably windowable, script*/
+ offset = c&0x7fffff80;
+ return (c>>7);
+ }else if(0xe000<=(c&UConverterConstants.UNSIGNED_INT_MASK) && (c&UConverterConstants.UNSIGNED_INT_MASK)!=0xfeff && (c&UConverterConstants.UNSIGNED_INT_MASK) < 0xfff0){
+ /*for these characters we need to take the gapOffset into account*/
+ offset=(c)&0x7fffff80;
+ return ((c-gapOffset)>>7);
+ }else{
+ return -1;
+ }
+ }
+
+ private int loop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+ int label = 0;
+ if(isSingleByteMode){
+ if(c!=0 && targetCapacity>0 && !AfterGetTrail){
+ label = getTrail(source, target, offsets);
+ return label;
+ }
+ /*state machine for single byte mode*/
+ while(AfterGetTrail || source.hasRemaining()){
+ if(targetCapacity<=0 && !AfterGetTrail){
+ /*target is full*/
+ cr = CoderResult.OVERFLOW;
+ label = EndLoop;
+ return label;
+ }
+ if(!AfterGetTrail){
+ c = source.get();
+ ++nextSourceIndex;
+
+ }
+ if(((c -0x20)&UConverterConstants.UNSIGNED_INT_MASK)<=0x5f && !AfterGetTrail){
+ /*pass US-ASCII graphic character through*/
+ target.put((byte)c);
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ --targetCapacity;
+ }else if((c & UConverterConstants.UNSIGNED_INT_MASK)<0x20 && !AfterGetTrail){
+ if(((1L<<(c & UConverterConstants.UNSIGNED_INT_MASK))&0x2601)!=0){
+ /*CR/LF/TAB/NUL*/
+ target.put((byte)c);
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ --targetCapacity;
+ } else {
+ /*quote c0 control character*/
+ c|=SQ0<<8;
+ length = 2;
+ label = OutputBytes;
+ return label;
+ }
+ } else if(((delta=(c-currentOffset))&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && !AfterGetTrail){
+ /*use the current dynamic window*/
+ target.put((byte)(delta|0x80));
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ --targetCapacity;
+ } else if(AfterGetTrail || UTF16.isSurrogate((char)c)){
+ if(!AfterGetTrail){
+ if(UTF16.isLeadSurrogate((char)c)){
+ label = getTrail(source, target, offsets);
+ if(label==EndLoop){
+ return label;
+ }
+ } else {
+ /*this is unmatched lead code unit (2nd Surrogate)*/
+ /*callback(illegal)*/
+ cr = CoderResult.malformedForLength(1);
+ label = EndLoop;
+ return label;
+ }
+ }
+
+
+ if(AfterGetTrail){
+ AfterGetTrail = false;
+ }
+
+ /*Compress supplementary character U+10000...U+10ffff */
+ if(((delta=(c-currentOffset))&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f){
+ /*use the current dynamic window*/
+ target.put((byte)(delta|0x80));
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ --targetCapacity;
+ } else if((window=getWindow(data.fromUDynamicOffsets))>=0){
+ /*there is a dynamic window that contains this character, change to it*/
+ dynamicWindow = window;
+ currentOffset = data.fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(dynamicWindow);
+ c = ((SC0+dynamicWindow)<<8 | (c-currentOffset)|0x80);
+ length = 2;
+ label = OutputBytes;
+ return label;
+ } else if((code=getDynamicOffset())>=0){
+ /*might check if there are come character in this window to come */
+ /*define an extended window with this character*/
+ code-=0x200;
+ dynamicWindow=getNextDynamicWindow();
+ currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset;
+ useDynamicWindow(dynamicWindow);
+ c = ((SDX<<24) | (dynamicWindow<<21)|
+ (code<<8)| (c- currentOffset) |0x80);
+ // c = (((SDX)<<25) | (dynamicWindow<<21)|
+ // (code<<8)| (c- currentOffset) |0x80 );
+ length = 4;
+ label = OutputBytes;
+ return label;
+ } else {
+ /*change to unicode mode and output this (lead, trail) pair*/
+ isSingleByteMode = false;
+ target.put((byte)SCU);
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ --targetCapacity;
+ c = (lead<<16)|trail;
+ length = 4;
+ label = OutputBytes;
+ return label;
+ }
+ } else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0xa0){
+ /*quote C1 control character*/
+ c = (c&0x7f) | (SQ0+1)<<8; /*SQ0+1 == SQ1*/
+ length = 2;
+ label = OutputBytes;
+ return label;
+ } else if((c&UConverterConstants.UNSIGNED_INT_MASK)==0xfeff || (c&UConverterConstants.UNSIGNED_INT_MASK)>= 0xfff0){
+ /*quote signature character = byte order mark and specials*/
+ c |= SQU<<16;
+ length = 3;
+ label = OutputBytes;
+ return label;
+ } else {
+ /*compress all other BMP characters*/
+ if((window=getWindow(data.fromUDynamicOffsets))>=0){
+ /*there is a window defined that contains this character - switch to it or quote from it*/
+ if(source.position()>=source.limit() || isInOffsetWindowOrDirect(data.fromUDynamicOffsets[window], source.get(source.position()))){
+ /*change to dynamic window*/
+ dynamicWindow = window;
+ currentOffset = data.fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(dynamicWindow);
+ c = ((SC0+window)<<8) | (c- currentOffset) | 0x80;
+ length = 2;
+ label = OutputBytes;
+ return label;
+ } else {
+ /*quote from dynamic window*/
+ c = ((SQ0+window)<<8) | (c - data.fromUDynamicOffsets[window]) |
+ 0x80;
+ length = 2;
+ label = OutputBytes;
+ return label;
+ }
+ } else if((window = getWindow(staticOffsets))>=0){
+ /*quote from static window*/
+ c = ((SQ0+window)<<8) | (c - staticOffsets[window]);
+ length = 2;
+ label = OutputBytes;
+ return label;
+ }else if((code=getDynamicOffset())>=0){
+ /*define a dynamic window with this character*/
+ dynamicWindow = getNextDynamicWindow();
+ currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset;
+ useDynamicWindow(dynamicWindow);
+ c = ((SD0+dynamicWindow)<<16) | (code<<8)|
+ (c - currentOffset) | 0x80;
+ length = 3;
+ label = OutputBytes;
+ return label;
+ } else if(((int)((c-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && (source.position()>=source.limit() ||
+ ((int)((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))< (0xd800 - 0x3400))){
+
+ /*
+ * this character is not compressible (a BMP ideograph of similar)
+ * switch to Unicode mode if this is the last character in the block
+ * or there is at least one more ideograph following immediately
+ */
+ isSingleByteMode = false;
+ c|=SCU<<16;
+ length =3;
+ label = OutputBytes;
+ return label;
+ } else {
+ /*quote Unicode*/
+ c|=SQU<<16;
+ length = 3;
+ label = OutputBytes;
+ return label;
+ }
+ }
+ /*normal end of conversion : prepare for new character */
+ c = 0;
+ sourceIndex = nextSourceIndex;
+ }
+ } else {
+ if(c!=0 && targetCapacity>0 && !AfterGetTrailUnicode){
+ label = GetTrailUnicode;
+ return label;
+ }
+
+ /*state machine for Unicode*/
+ /*unicodeByteMode*/
+ while(AfterGetTrailUnicode || source.hasRemaining()){
+ if(targetCapacity<=0 && !AfterGetTrailUnicode){
+ /*target is full*/
+ cr = CoderResult.OVERFLOW;
+ LabelLoop = false;
+ break;
+ }
+ if(!AfterGetTrailUnicode){
+ c = source.get();
+ ++nextSourceIndex;
+ }
+
+ if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && !AfterGetTrailUnicode){
+ /*not compressible, write character directly */
+ if(targetCapacity>=2){
+ target.put((byte)(c>>8));
+ target.put((byte)c);
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ }
+ targetCapacity-=2;
+ } else {
+ length =2;
+ label = OutputBytes;
+ return label;
+ }
+ } else if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300*/&& !AfterGetTrailUnicode){
+ /*compress BMP character if the following one is not an uncompressible ideograph*/
+ if(!(source.hasRemaining() && (((source.get(source.position())-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400))){
+ if(((((c-0x30)&UConverterConstants.UNSIGNED_INT_MASK))<10 || (((c-0x61)&UConverterConstants.UNSIGNED_INT_MASK))<26
+ || (((c-0x41)&UConverterConstants.UNSIGNED_INT_MASK))<26)){
+ /*ASCII digit or letter*/
+ isSingleByteMode = true;
+ c |=((UC0+dynamicWindow)<<8)|c;
+ length = 2;
+ label = OutputBytes;
+ return label;
+ } else if((window=getWindow(data.fromUDynamicOffsets))>=0){
+ /*there is a dynamic window that contains this character, change to it*/
+ isSingleByteMode = true;
+ dynamicWindow = window;
+ currentOffset = data.fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(dynamicWindow);
+ c = ((UC0+dynamicWindow)<<8) | (c- currentOffset) | 0x80;
+ length = 2;
+ label = OutputBytes;
+ return label;
+ } else if((code=getDynamicOffset())>=0){
+ /*define a dynamic window with this character*/
+ isSingleByteMode = true;
+ dynamicWindow = getNextDynamicWindow();
+ currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset;
+ useDynamicWindow(dynamicWindow);
+ c = ((UD0+dynamicWindow)<<16) | (code<<8)
+ |(c - currentOffset) | 0x80;
+ length = 3;
+ label = OutputBytes;
+ return label;
+ }
+ }
+
+ /*don't know how to compress these character, just write it directly*/
+ length = 2;
+ label = OutputBytes;
+ return label;
+ } else if(c<0xe000 && !AfterGetTrailUnicode){
+ label = GetTrailUnicode;
+ return label;
+ } else if (!AfterGetTrailUnicode){
+ /*quote to avoid SCSU tags*/
+ c|=UQU<<16;
+ length = 3;
+ label = OutputBytes;
+ return label;
+ }
+
+ if(AfterGetTrailUnicode){
+ AfterGetTrailUnicode = false;
+ }
+ /*normal end of conversion, prepare for a new character*/
+ c = 0;
+ sourceIndex = nextSourceIndex;
+ }
+ }
+ label = EndLoop;
+ return label;
+ }
+
+ private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+ lead = (char)c;
+ int label = Loop;
+ if(source.hasRemaining()){
+ /*test the following code unit*/
+ trail = source.get(source.position());
+ if(UTF16.isTrailSurrogate(trail)){
+ source.position(source.position()+1);
+ ++nextSourceIndex;
+ c = UCharacter.getCodePoint((char)c, trail);
+ label = Loop;
+ } else {
+ /*this is unmatched lead code unit (1st Surrogate)*/
+ /*callback(illegal)*/
+ cr = CoderResult.malformedForLength(1);
+ label = EndLoop;
+ }
+ }else {
+ /*no more input*/
+ label = EndLoop;
+ }
+ AfterGetTrail = true;
+ return label;
+ }
+
+ private int getTrailUnicode(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+ int label = EndLoop;
+ AfterGetTrailUnicode = true;
+ /*c is surrogate*/
+ if(UTF16.isLeadSurrogate((char)c)){
+ // getTrailUnicode:
+ lead = (char)c;
+ if(source.hasRemaining()){
+ /*test the following code unit*/
+ trail = source.get(source.position());
+ if(UTF16.isTrailSurrogate(trail)){
+ source.get();
+ ++nextSourceIndex;
+ c = UCharacter.getCodePoint((char)c, trail);
+ /*convert this surrogate code point*/
+ /*exit this condition tree*/
+ } else {
+ /*this is unmatched lead code unit(1st surrogate)*/
+ /*callback(illegal)*/
+ cr = CoderResult.malformedForLength(1);
+ label = EndLoop;
+ return label;
+ }
+ } else {
+ /*no more input*/
+ label = EndLoop;
+ return label;
+ }
+ } else {
+ /*this is an unmatched trail code point (2nd surrogate)*/
+ /*callback (illegal)*/
+ cr = CoderResult.malformedForLength(1);
+ label = EndLoop;
+ return label;
+ }
+
+ /*compress supplementary character*/
+ if((window=getWindow(data.fromUDynamicOffsets))>=0 &&
+ !(source.hasRemaining() && ((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK) <
+ (0xd800 - 0x3400))){
+ /*
+ * this is the dynamic window that contains this character and the following
+ * character is not uncompressible,
+ * change to the window
+ */
+ isSingleByteMode = true;
+ dynamicWindow = window;
+ currentOffset = data.fromUDynamicOffsets[dynamicWindow];
+ useDynamicWindow(dynamicWindow);
+ c = ((UC0+dynamicWindow)<<8 | (c-currentOffset) | 0x80);
+ length = 2;
+ label = OutputBytes;
+ return label;
+ } else if(source.hasRemaining() && lead == source.get(source.position()) && (code=getDynamicOffset())>=0){
+ /*two supplementary characters in (probably) the same window - define an extended one*/
+ isSingleByteMode = true;
+ dynamicWindow = getNextDynamicWindow();
+ currentOffset = data.fromUDynamicOffsets[dynamicWindow] = offset;
+ useDynamicWindow(dynamicWindow);
+ c = (UDX<<24) | (dynamicWindow<<21) |(code<<8) |(c - currentOffset) | 0x80;
+ length = 4;
+ label = OutputBytes;
+ return label;
+ } else {
+ /*don't know how to compress this character, just write it directly*/
+ c = (lead<<16)|trail;
+ length = 4;
+ label = OutputBytes;
+ return label;
+ }
+
+ }
+
+ private void endLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+ /*set the converter state back to UConverter*/
+ data.fromUIsSingleByteMode = isSingleByteMode;
+ data.fromUDynamicWindow = dynamicWindow;
+ fromUChar32 = c;
+ LabelLoop = false;
+ }
+
+ @SuppressWarnings("fallthrough")
+ private int outputBytes(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+ int label;
+ //int targetCapacity = target.limit()-target.position();
+ /*write the output character byte from c and length*/
+ /*from the first if in the loop we know that targetCapacity>0*/
+ if(length<=targetCapacity){
+ switch(length){
+ /*each branch falls through the next one*/
+ case 4:
+ target.put((byte)(c>>24));
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ case 3:
+ target.put((byte)(c>>16));
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ case 2:
+ target.put((byte)(c>>8));
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ case 1:
+ target.put((byte)c);
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ default:
+ /*will never occur*/
+ break;
+ }
+ targetCapacity-=length;
+
+ /*normal end of conversion: prepare for a new character*/
+ c = 0;
+ sourceIndex = nextSourceIndex;
+ label = Loop;
+ return label;
+ } else {
+ ByteBuffer p = ByteBuffer.wrap(errorBuffer);
+ /*
+ * We actually do this backwards here:
+ * In order to save an intermediate variable, we output
+ * first to the overflow buffer what does not fit into the
+ * regular target
+ */
+ /* we know that 0<=targetCapacity>24));
+ case 3:
+ p.put((byte)(c>>16));
+ case 2:
+ p.put((byte)(c>>8));
+ case 1:
+ p.put((byte)c);
+ default:
+ /*will never occur*/
+ break;
+ }
+ errorBufferLength = length;
+
+ /*now output what fits into the regular target*/
+ c>>=8*length; //length was reduced by targetCapacity
+ switch(targetCapacity){
+ /*each branch falls through the next one*/
+ case 3:
+ target.put((byte)(c>>16));
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ case 2:
+ target.put((byte)(c>>8));
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ case 1:
+ target.put((byte)c);
+ if(offsets!=null){
+ offsets.put(sourceIndex);
+ }
+ default:
+ break;
+ }
+
+ /*target overflow*/
+ targetCapacity = 0;
+ cr = CoderResult.OVERFLOW;
+ c = 0;
+ label = EndLoop;
+ return label;
+ }
+ }
+
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderSCSU(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderSCSU(this);
+ }
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ CharsetICU.getCompleteUnicodeSet(setFillIn);
+ }
+
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetSelector.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetSelector.java
new file mode 100644
index 00000000000..b9bf1c4cfa9
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetSelector.java
@@ -0,0 +1,215 @@
+/*
+ ******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ ******************************************************************************
+ */
+
+/*
+ * This is a port of the C++ class UConverterSelector.
+ *
+ * Methods related to serialization are not ported in this version. In addition,
+ * the selectForUTF8 method is not going to be ported, as UTF8 is seldom used
+ * in Java.
+ *
+ * @author Shaopeng Jia
+ */
+
+package com.ibm.icu.charset;
+
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.ArrayList;
+import java.util.List;
+
+import com.ibm.icu.impl.IntTrie;
+import com.ibm.icu.impl.PropsVectors;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * Charset Selector
+ *
+ * A charset selector is built with a list of charset names and given an input
+ * CharSequence returns the list of names the corresponding charsets which can
+ * convert the CharSequence.
+ *
+ * @stable ICU 4.2
+ */
+public final class CharsetSelector {
+ private IntTrie trie;
+ private int[] pv; // table of bits
+ private String[] encodings; // encodings users ask to use
+
+ private void generateSelectorData(PropsVectors pvec,
+ UnicodeSet excludedCodePoints, int mappingTypes) {
+ int columns = (encodings.length + 31) / 32;
+
+ // set errorValue to all-ones
+ for (int col = 0; col < columns; ++col) {
+ pvec.setValue(PropsVectors.ERROR_VALUE_CP,
+ PropsVectors.ERROR_VALUE_CP, col, ~0, ~0);
+ }
+
+ for (int i = 0; i < encodings.length; ++i) {
+ Charset testCharset = CharsetICU.forNameICU(encodings[i]);
+ UnicodeSet unicodePointSet = new UnicodeSet(); // empty set
+ ((CharsetICU) testCharset).getUnicodeSet(unicodePointSet,
+ mappingTypes);
+ int column = i / 32;
+ int mask = 1 << (i % 32);
+ // now iterate over intervals on set i
+ int itemCount = unicodePointSet.getRangeCount();
+ for (int j = 0; j < itemCount; ++j) {
+ int startChar = unicodePointSet.getRangeStart(j);
+ int endChar = unicodePointSet.getRangeEnd(j);
+ pvec.setValue(startChar, endChar, column, ~0, mask);
+ }
+ }
+
+ // handle excluded encodings
+ // Simply set their values to all 1's in the pvec
+ if (!excludedCodePoints.isEmpty()) {
+ int itemCount = excludedCodePoints.getRangeCount();
+ for (int j = 0; j < itemCount; ++j) {
+ int startChar = excludedCodePoints.getRangeStart(j);
+ int endChar = excludedCodePoints.getRangeEnd(j);
+ for (int col = 0; col < columns; col++) {
+ pvec.setValue(startChar, endChar, col, ~0, ~0);
+ }
+ }
+ }
+
+ trie = pvec.compactToTrieWithRowIndexes();
+ pv = pvec.getCompactedArray();
+ }
+
+ // internal function to intersect two sets of masks
+ // returns whether the mask has reduced to all zeros. The
+ // second set of mask consists of len elements in pv starting from
+ // pvIndex
+ private boolean intersectMasks(int[] dest, int pvIndex, int len) {
+ int oredDest = 0;
+ for (int i = 0; i < len; ++i) {
+ oredDest |= (dest[i] &= pv[pvIndex + i]);
+ }
+ return oredDest == 0;
+ }
+
+ // internal function
+ private List selectForMask(int[] mask) {
+ // this is the context we will use. Store a table of indices to which
+ // encodings are legit
+
+ List result = new ArrayList();
+ int columns = (encodings.length + 31) / 32;
+ int numOnes = countOnes(mask, columns);
+
+ // now we know the exact space we need to index
+ if (numOnes > 0) {
+ int k = 0;
+ for (int j = 0; j < columns; j++) {
+ int v = mask[j];
+ for (int i = 0; i < 32 && k < encodings.length; i++, k++) {
+ if ((v & 1) != 0) {
+ result.add(encodings[k]);
+ }
+ v >>= 1;
+ }
+ }
+ }
+
+ // otherwise, index will remain NULL
+ return result;
+ }
+
+ // internal function to count how many 1's are there in a mask
+ // algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html
+ private int countOnes(int[] mask, int len) {
+ int totalOnes = 0;
+ for (int i = 0; i < len; ++i) {
+ int ent = mask[i];
+ for (; ent != 0; totalOnes++) {
+ ent &= ent - 1; // clear the least significant bit set
+ }
+ }
+ return totalOnes;
+ }
+
+ /**
+ * Construct a CharsetSelector from a list of charset names.
+ *
+ * @param charsetList
+ * a list of charset names in the form of strings. If charsetList
+ * is empty, a selector for all available charset is constructed.
+ * @param excludedCodePoints
+ * a set of code points to be excluded from consideration.
+ * Excluded code points appearing in the input CharSequence do
+ * not change the selection result. It could be empty when no
+ * code point should be excluded.
+ * @param mappingTypes
+ * an int which determines whether to consider only roundtrip
+ * mappings or also fallbacks, e.g. CharsetICU.ROUNDTRIP_SET. See
+ * CharsetICU.java for the constants that are currently
+ * supported.
+ * @throws IllegalArgumentException
+ * if the parameters is invalid.
+ * @throws IllegalCharsetNameException
+ * If the given charset name is illegal.
+ * @throws UnsupportedCharsetException
+ * If no support for the named charset is available in this
+ * instance of the Java virtual machine.
+ * @stable ICU 4.2
+ */
+ public CharsetSelector(List charsetList, UnicodeSet excludedCodePoints,
+ int mappingTypes) {
+ if (mappingTypes != CharsetICU.ROUNDTRIP_AND_FALLBACK_SET
+ && mappingTypes != CharsetICU.ROUNDTRIP_SET) {
+ throw new IllegalArgumentException("Unsupported mappingTypes");
+ }
+
+ int encodingCount = charsetList.size();
+ if (encodingCount > 0) {
+ encodings = charsetList.toArray(new String[0]);
+ } else {
+ encodings = CharsetProviderICU.getAvailableNames();
+ encodingCount = encodings.length;
+ }
+
+ PropsVectors pvec = new PropsVectors((encodingCount + 31) / 32);
+ generateSelectorData(pvec, excludedCodePoints, mappingTypes);
+ }
+
+ /**
+ * Select charsets that can map all characters in a CharSequence, ignoring
+ * the excluded code points.
+ *
+ * @param unicodeText
+ * a CharSequence. It could be empty.
+ * @return a list that contains charset names in the form of strings. The
+ * returned encoding names and their order will be the same as
+ * supplied when building the selector.
+ *
+ * @stable ICU 4.2
+ */
+ public List selectForString(CharSequence unicodeText) {
+ int columns = (encodings.length + 31) / 32;
+ int[] mask = new int[columns];
+ for (int i = 0; i < columns; i++) {
+ mask[i] = - 1; // set each bit to 1
+ // Note: All integers are signed in Java, assigning
+ // 2 ^ 32 -1 to mask is wrong!
+ }
+ int index = 0;
+ while (index < unicodeText.length()) {
+ int c = UTF16.charAt(unicodeText, index);
+ int pvIndex = trie.getCodePointValue(c);
+ index += UTF16.getCharCount(c);
+ if (intersectMasks(mask, pvIndex, columns)) {
+ break;
+ }
+ }
+ return selectForMask(mask);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16.java
new file mode 100644
index 00000000000..f8963fc7143
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16.java
@@ -0,0 +1,327 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.VersionInfo;
+
+/**
+ * @author Niti Hantaweepant
+ */
+class CharsetUTF16 extends CharsetICU {
+
+ private static final int SIGNATURE_LENGTH = 2;
+ private static final byte[] fromUSubstitution_BE = { (byte) 0xff, (byte) 0xfd };
+ private static final byte[] fromUSubstitution_LE = { (byte) 0xfd, (byte) 0xff };
+ private static final byte[] BOM_BE = { (byte) 0xfe, (byte) 0xff };
+ private static final byte[] BOM_LE = { (byte) 0xff, (byte) 0xfe };
+ private static final int ENDIAN_XOR_BE = 0;
+ private static final int ENDIAN_XOR_LE = 1;
+ private static final int NEED_TO_WRITE_BOM = 1;
+
+ private boolean isEndianSpecified;
+ private boolean isBigEndian;
+ private int endianXOR;
+ private byte[] bom;
+ private byte[] fromUSubstitution;
+
+ private int version;
+
+ public CharsetUTF16(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+
+ /* Get the version number (e.g. UTF-16LE,version=1) */
+ int versionIndex = icuCanonicalName.indexOf("version=");
+ if (versionIndex > 0) {
+ version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue();
+ } else {
+ version = 0;
+ }
+
+ this.isEndianSpecified = (this instanceof CharsetUTF16BE || this instanceof CharsetUTF16LE);
+ this.isBigEndian = !(this instanceof CharsetUTF16LE);
+
+ if (isBigEndian) {
+ this.bom = BOM_BE;
+ this.fromUSubstitution = fromUSubstitution_BE;
+ this.endianXOR = ENDIAN_XOR_BE;
+ } else {
+ this.bom = BOM_LE;
+ this.fromUSubstitution = fromUSubstitution_LE;
+ this.endianXOR = ENDIAN_XOR_LE;
+ }
+
+ /* UnicodeBig and UnicodeLittle requires maxBytesPerChar set to 4 in Java 5 or less */
+ if ((VersionInfo.javaVersion().getMajor() == 1 && VersionInfo.javaVersion().getMinor() <= 5)
+ && (isEndianSpecified && version == 1)) {
+ maxBytesPerChar = 4;
+ } else {
+ maxBytesPerChar = 2;
+ }
+
+ minBytesPerChar = 2;
+ maxCharsPerByte = 1;
+ }
+
+ class CharsetDecoderUTF16 extends CharsetDecoderICU {
+
+ private boolean isBOMReadYet;
+ private int actualEndianXOR;
+ private byte[] actualBOM;
+
+ public CharsetDecoderUTF16(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected void implReset() {
+ super.implReset();
+ isBOMReadYet = false;
+ actualBOM = null;
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ /*
+ * If we detect a BOM in this buffer, then we must add the BOM size to the offsets because the actual
+ * converter function will not see and count the BOM. offsetDelta will have the number of the BOM bytes that
+ * are in the current buffer.
+ */
+ if (!isBOMReadYet) {
+ while (true) {
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+
+ toUBytesArray[toULength++] = source.get();
+
+ if (toULength == 1) {
+ // on the first byte, we haven't decided whether or not it's bigEndian yet
+ if ((!isEndianSpecified || isBigEndian)
+ && toUBytesArray[toULength - 1] == BOM_BE[toULength - 1]) {
+ actualBOM = BOM_BE;
+ actualEndianXOR = ENDIAN_XOR_BE;
+ } else if ((!isEndianSpecified || !isBigEndian)
+ && toUBytesArray[toULength - 1] == BOM_LE[toULength - 1]) {
+ actualBOM = BOM_LE;
+ actualEndianXOR = ENDIAN_XOR_LE;
+ } else {
+ // we do not have a BOM (and we have toULength==1 bytes)
+ if (isEndianSpecified && version == 1) {
+ actualBOM = isBigEndian ? CharsetUTF16.BOM_BE : CharsetUTF16.BOM_LE;
+ actualEndianXOR = isBigEndian ? CharsetUTF16.ENDIAN_XOR_BE : CharsetUTF16.ENDIAN_XOR_LE;
+ } else {
+ actualBOM = null;
+ actualEndianXOR = endianXOR;
+ }
+ break;
+ }
+ } else if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 2] && toUBytesArray[toULength - 2] == actualBOM[toULength - 1])) {
+ return CoderResult.malformedForLength(2);
+ } else if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 1] && toUBytesArray[toULength - 2] == actualBOM[toULength - 2])) {
+ // we found a BOM! at last!
+ // too bad we have to get ignore it now (like it was unwanted or something)
+ toULength = 0;
+ break;
+ } else if (isEndianSpecified || toUBytesArray[toULength - 1] != actualBOM[toULength - 1]) {
+ // we do not have a BOM (and we have toULength bytes)
+ actualBOM = null;
+ actualEndianXOR = endianXOR;
+ break;
+ } else if (toULength == SIGNATURE_LENGTH) {
+ // we found a BOM! at last!
+ // too bad we have to get ignore it now (like it was unwanted or something)
+ toULength = 0;
+ break;
+ }
+ }
+
+ isBOMReadYet = true;
+ }
+
+ // now that we no longer need to look for a BOM, let's do some work
+
+ // if we have unfinished business
+ if (toUnicodeStatus != 0) {
+ CoderResult cr = decodeTrail(source, target, offsets, (char) toUnicodeStatus);
+ if (cr != null)
+ return cr;
+ }
+
+ char char16;
+
+ while (true) {
+ while (toULength < 2) {
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ toUBytesArray[toULength++] = source.get();
+ }
+
+ if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 2] && toUBytesArray[toULength - 2] == actualBOM[toULength - 1])) {
+ return CoderResult.malformedForLength(2);
+ } else if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 1] && toUBytesArray[toULength - 2] == actualBOM[toULength - 2])) {
+ // we found a BOM! at last!
+ // too bad we have to get ignore it now (like it was unwanted or something)
+ toULength = 0;
+ continue;
+ }
+
+ if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ char16 = (char) (((toUBytesArray[0 ^ actualEndianXOR] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | ((toUBytesArray[1 ^ actualEndianXOR] & UConverterConstants.UNSIGNED_BYTE_MASK)));
+
+ if (!UTF16.isSurrogate(char16)) {
+ toULength = 0;
+ target.put(char16);
+ } else {
+ CoderResult cr = decodeTrail(source, target, offsets, char16);
+ if (cr != null)
+ return cr;
+ }
+ }
+ }
+
+ private final CoderResult decodeTrail(ByteBuffer source, CharBuffer target, IntBuffer offsets, char lead) {
+ if (!UTF16.isLeadSurrogate(lead)) {
+ // 2 bytes, lead malformed
+ toUnicodeStatus = 0;
+ return CoderResult.malformedForLength(2);
+ }
+
+ while (toULength < 4) {
+ if (!source.hasRemaining()) {
+ // let this be unfinished business
+ toUnicodeStatus = lead;
+ return CoderResult.UNDERFLOW;
+ }
+ toUBytesArray[toULength++] = source.get();
+ }
+
+ char trail = (char) (((toUBytesArray[2 ^ actualEndianXOR] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | ((toUBytesArray[3 ^ actualEndianXOR] & UConverterConstants.UNSIGNED_BYTE_MASK)));
+
+ if (!UTF16.isTrailSurrogate(trail)) {
+ // pretend like we didnt read the last 2 bytes
+ toULength = 2;
+ source.position(source.position() - 2);
+
+ // 2 bytes, lead malformed
+ toUnicodeStatus = 0;
+ return CoderResult.malformedForLength(2);
+ }
+
+ toUnicodeStatus = 0;
+ toULength = 0;
+
+ target.put(lead);
+
+ if (target.hasRemaining()) {
+ target.put(trail);
+ return null;
+ } else {
+ /* Put in overflow buffer (not handled here) */
+ charErrorBufferArray[0] = trail;
+ charErrorBufferLength = 1;
+ return CoderResult.OVERFLOW;
+ }
+ }
+ }
+
+ class CharsetEncoderUTF16 extends CharsetEncoderICU {
+ private final byte[] temp = new byte[4];
+
+ public CharsetEncoderUTF16(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ fromUnicodeStatus = (isEndianSpecified && version != 1) ? 0 : NEED_TO_WRITE_BOM;
+ }
+
+ protected void implReset() {
+ super.implReset();
+ fromUnicodeStatus = (isEndianSpecified && version != 1) ? 0 : NEED_TO_WRITE_BOM;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult cr;
+
+ /* write the BOM if necessary */
+ if (fromUnicodeStatus == NEED_TO_WRITE_BOM) {
+ if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ fromUnicodeStatus = 0;
+ cr = fromUWriteBytes(this, bom, 0, bom.length, target, offsets, -1);
+ if (cr.isOverflow())
+ return cr;
+ }
+
+ if (fromUChar32 != 0) {
+ if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ // a note: fromUChar32 will either be 0 or a lead surrogate
+ cr = encodeChar(source, target, offsets, (char) fromUChar32);
+ if (cr != null)
+ return cr;
+ }
+
+ while (true) {
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ cr = encodeChar(source, target, offsets, source.get());
+ if (cr != null)
+ return cr;
+ }
+ }
+
+ private final CoderResult encodeChar(CharBuffer source, ByteBuffer target, IntBuffer offsets, char ch) {
+ int sourceIndex = source.position() - 1;
+ CoderResult cr;
+
+ if (UTF16.isSurrogate(ch)) {
+ cr = handleSurrogates(source, ch);
+ if (cr != null)
+ return cr;
+
+ char trail = UTF16.getTrailSurrogate(fromUChar32);
+ fromUChar32 = 0;
+
+ // 4 bytes
+ temp[0 ^ endianXOR] = (byte) (ch >>> 8);
+ temp[1 ^ endianXOR] = (byte) (ch);
+ temp[2 ^ endianXOR] = (byte) (trail >>> 8);
+ temp[3 ^ endianXOR] = (byte) (trail);
+ cr = fromUWriteBytes(this, temp, 0, 4, target, offsets, sourceIndex);
+ } else {
+ // 2 bytes
+ temp[0 ^ endianXOR] = (byte) (ch >>> 8);
+ temp[1 ^ endianXOR] = (byte) (ch);
+ cr = fromUWriteBytes(this, temp, 0, 2, target, offsets, sourceIndex);
+ }
+ return (cr.isUnderflow() ? null : cr);
+ }
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderUTF16(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderUTF16(this);
+ }
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ getNonSurrogateUnicodeSet(setFillIn);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16BE.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16BE.java
new file mode 100644
index 00000000000..b1bb374d8cc
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16BE.java
@@ -0,0 +1,17 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+/**
+ * The purpose of this class is to set isBigEndian to true and isEndianSpecified to true in the super class, and to
+ * allow the Charset framework to open the variant UTF-16 converter without extra setup work.
+ */
+class CharsetUTF16BE extends CharsetUTF16 {
+ public CharsetUTF16BE(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16LE.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16LE.java
new file mode 100644
index 00000000000..07607a0156d
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF16LE.java
@@ -0,0 +1,17 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+/**
+ * The purpose of this class is to set isBigEndian to false and isEndianSpecified to true in the super class, and to
+ * allow the Charset framework to open the variant UTF-16 converter without extra setup work.
+ */
+class CharsetUTF16LE extends CharsetUTF16 {
+ public CharsetUTF16LE(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32.java
new file mode 100644
index 00000000000..d2a8a5f2898
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32.java
@@ -0,0 +1,251 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author Niti Hantaweepant
+ */
+class CharsetUTF32 extends CharsetICU {
+
+ private static final int SIGNATURE_LENGTH = 4;
+ private static final byte[] fromUSubstitution_BE = { (byte) 0, (byte) 0, (byte) 0xff, (byte) 0xfd };
+ private static final byte[] fromUSubstitution_LE = { (byte) 0xfd, (byte) 0xff, (byte) 0, (byte) 0 };
+ private static final byte[] BOM_BE = { 0, 0, (byte) 0xfe, (byte) 0xff };
+ private static final byte[] BOM_LE = { (byte) 0xff, (byte) 0xfe, 0, 0 };
+ private static final int ENDIAN_XOR_BE = 0;
+ private static final int ENDIAN_XOR_LE = 3;
+ private static final int NEED_TO_WRITE_BOM = 1;
+
+ private boolean isEndianSpecified;
+ private boolean isBigEndian;
+ private int endianXOR;
+ private byte[] bom;
+ private byte[] fromUSubstitution;
+
+ public CharsetUTF32(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+
+ this.isEndianSpecified = (this instanceof CharsetUTF32BE || this instanceof CharsetUTF32LE);
+ this.isBigEndian = !(this instanceof CharsetUTF32LE);
+
+ if (isBigEndian) {
+ this.bom = BOM_BE;
+ this.fromUSubstitution = fromUSubstitution_BE;
+ this.endianXOR = ENDIAN_XOR_BE;
+ } else {
+ this.bom = BOM_LE;
+ this.fromUSubstitution = fromUSubstitution_LE;
+ this.endianXOR = ENDIAN_XOR_LE;
+ }
+
+ maxBytesPerChar = 4;
+ minBytesPerChar = 4;
+ maxCharsPerByte = 1;
+ }
+
+ class CharsetDecoderUTF32 extends CharsetDecoderICU {
+
+ private boolean isBOMReadYet;
+ private int actualEndianXOR;
+ private byte[] actualBOM;
+
+ public CharsetDecoderUTF32(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected void implReset() {
+ super.implReset();
+ isBOMReadYet = false;
+ actualBOM = null;
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ /*
+ * If we detect a BOM in this buffer, then we must add the BOM size to the offsets because the actual
+ * converter function will not see and count the BOM. offsetDelta will have the number of the BOM bytes that
+ * are in the current buffer.
+ */
+ if (!isBOMReadYet) {
+ while (true) {
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+
+ toUBytesArray[toULength++] = source.get();
+
+ if (toULength == 1) {
+ // on the first byte, we haven't decided whether or not it's bigEndian yet
+ if ((!isEndianSpecified || isBigEndian)
+ && toUBytesArray[toULength - 1] == BOM_BE[toULength - 1]) {
+ actualBOM = BOM_BE;
+ actualEndianXOR = ENDIAN_XOR_BE;
+ } else if ((!isEndianSpecified || !isBigEndian)
+ && toUBytesArray[toULength - 1] == BOM_LE[toULength - 1]) {
+ actualBOM = BOM_LE;
+ actualEndianXOR = ENDIAN_XOR_LE;
+ } else {
+ // we do not have a BOM (and we have toULength==1 bytes)
+ actualBOM = null;
+ actualEndianXOR = endianXOR;
+ break;
+ }
+ } else if (toUBytesArray[toULength - 1] != actualBOM[toULength - 1]) {
+ // we do not have a BOM (and we have toULength bytes)
+ actualBOM = null;
+ actualEndianXOR = endianXOR;
+ break;
+ } else if (toULength == SIGNATURE_LENGTH) {
+ // we found a BOM! at last!
+ // too bad we have to get ignore it now (like it was unwanted or something)
+ toULength = 0;
+ break;
+ }
+ }
+
+ isBOMReadYet = true;
+ }
+
+ // now that we no longer need to look for a BOM, let's do some work
+ int char32;
+
+ while (true) {
+ while (toULength < 4) {
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ toUBytesArray[toULength++] = source.get();
+ }
+
+ if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ char32 = 0;
+ for (int i = 0; i < 4; i++)
+ char32 = (char32 << 8)
+ | (toUBytesArray[i ^ actualEndianXOR] & UConverterConstants.UNSIGNED_BYTE_MASK);
+
+ if (0 <= char32 && char32 <= UConverterConstants.MAXIMUM_UTF && !isSurrogate(char32)) {
+ toULength = 0;
+ if (char32 <= UConverterConstants.MAXIMUM_UCS2) {
+ /* fits in 16 bits */
+ target.put((char) char32);
+ } else {
+ /* write out the surrogates */
+ target.put(UTF16.getLeadSurrogate(char32));
+ char32 = UTF16.getTrailSurrogate(char32);
+ if (target.hasRemaining()) {
+ target.put((char) char32);
+ } else {
+ /* Put in overflow buffer (not handled here) */
+ charErrorBufferArray[0] = (char) char32;
+ charErrorBufferLength = 1;
+ return CoderResult.OVERFLOW;
+ }
+ }
+ } else {
+ return CoderResult.malformedForLength(toULength);
+ }
+ }
+ }
+ }
+
+ class CharsetEncoderUTF32 extends CharsetEncoderICU {
+ private final byte[] temp = new byte[4];
+
+ public CharsetEncoderUTF32(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM;
+ }
+
+ protected void implReset() {
+ super.implReset();
+ fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult cr;
+
+ /* write the BOM if necessary */
+ if (fromUnicodeStatus == NEED_TO_WRITE_BOM) {
+ if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ fromUnicodeStatus = 0;
+ cr = fromUWriteBytes(this, bom, 0, bom.length, target, offsets, -1);
+ if (cr.isOverflow())
+ return cr;
+ }
+
+ if (fromUChar32 != 0) {
+ if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ // a note: fromUChar32 will either be 0 or a lead surrogate
+ cr = encodeChar(source, target, offsets, (char) fromUChar32);
+ if (cr != null)
+ return cr;
+ }
+
+ while (true) {
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ cr = encodeChar(source, target, offsets, source.get());
+ if (cr != null)
+ return cr;
+ }
+ }
+
+ private final CoderResult encodeChar(CharBuffer source, ByteBuffer target, IntBuffer offsets, char ch) {
+ int sourceIndex = source.position() - 1;
+ CoderResult cr;
+ int char32;
+
+ if (UTF16.isSurrogate(ch)) {
+ cr = handleSurrogates(source, ch);
+ if (cr != null)
+ return cr;
+
+ char32 = fromUChar32;
+ fromUChar32 = 0;
+ } else {
+ char32 = ch;
+ }
+
+ /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
+ // temp[0 ^ endianXOR] = (byte) (char32 >>> 24); // (always 0)
+ temp[1 ^ endianXOR] = (byte) (char32 >>> 16); // same as (byte)((char32 >>> 16) & 0x1f)
+ temp[2 ^ endianXOR] = (byte) (char32 >>> 8);
+ temp[3 ^ endianXOR] = (byte) (char32);
+ cr = fromUWriteBytes(this, temp, 0, 4, target, offsets, sourceIndex);
+ return (cr.isUnderflow() ? null : cr);
+ }
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderUTF32(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderUTF32(this);
+ }
+
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ getNonSurrogateUnicodeSet(setFillIn);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32BE.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32BE.java
new file mode 100644
index 00000000000..177b1f7eeb6
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32BE.java
@@ -0,0 +1,17 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+/**
+ * The purpose of this class is to set isBigEndian to true and isEndianSpecified to true in the super class, and to
+ * allow the Charset framework to open the variant UTF-32 converter without extra setup work.
+ */
+class CharsetUTF32BE extends CharsetUTF32 {
+ public CharsetUTF32BE(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32LE.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32LE.java
new file mode 100644
index 00000000000..beb8303f22e
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF32LE.java
@@ -0,0 +1,17 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+/**
+ * The purpose of this class is to set isBigEndian to false and isEndianSpecified to true in the super class, and to
+ * allow the Charset framework to open the variant UTF-32 converter without extra setup work.
+ */
+class CharsetUTF32LE extends CharsetUTF32 {
+ public CharsetUTF32LE(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java
new file mode 100644
index 00000000000..2652c769a17
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF7.java
@@ -0,0 +1,758 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2007-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author Michael Ow
+ *
+ */
+class CharsetUTF7 extends CharsetICU {
+ private final String IMAP_NAME="IMAP-mailbox-name";
+ private boolean useIMAP;
+ protected byte[] fromUSubstitution=new byte[]{0x3F};
+
+ public CharsetUTF7(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar=4; /* max 3 bytes per code unit from UTF-7 (base64) */
+ minBytesPerChar=1;
+ maxCharsPerByte=1;
+
+ useIMAP=false;
+
+ if (icuCanonicalName.equals(IMAP_NAME)) {
+ useIMAP=true;
+ }
+ }
+
+ //private static boolean inSetD(char c) {
+ // return (
+ // (char)(c - 97) < 26 || (char)(c - 65) < 26 || /* letters */
+ // (char)(c - 48) < 10 || /* digits */
+ // (char)(c - 39) < 3 || /* ' () */
+ // (char)(c - 44) < 4 || /* ,-./ */
+ // (c==58) || (c==63) /* :? */
+ // );
+ //}
+
+ //private static boolean inSetO(char c) {
+ // return (
+ // (char)(c - 33) < 6 || /* !"#$%& */
+ // (char)(c - 59) < 4 || /* ;<=> */
+ // (char)(c - 93) < 4 || /* ]^_` */
+ // (char)(c - 123) < 3 || /* {|} */
+ // (c==58) || (c==63) /* *@[ */
+ // );
+ //}
+
+ private static boolean isCRLFTAB(char c) {
+ return (
+ (c==13) || (c==10) || (c==9)
+ );
+ }
+
+ //private static boolean isCRLFSPTAB(char c) {
+ // return (
+ // (c==32) || (c==13) || (c==10) || (c==9)
+ // );
+ //}
+
+ private static final byte PLUS=43;
+ private static final byte MINUS=45;
+ private static final byte BACKSLASH=92;
+ //private static final byte TILDE=126;
+ private static final byte AMPERSAND=0x26;
+ private static final byte COMMA=0x2c;
+ private static final byte SLASH=0x2f;
+
+ // legal byte values: all US-ASCII graphic characters 0x20..0x7e
+ private static boolean isLegal(char c, boolean useIMAP) {
+ if (useIMAP) {
+ return (
+ (0x20 <= c) && (c <= 0x7e)
+ );
+ } else {
+ return (
+ ((char)(c - 32) < 94 && (c != BACKSLASH)) || isCRLFTAB(c)
+ );
+ }
+ }
+
+ // directly encode all of printable ASCII 0x20..0x7e except '&' 0x26
+ private static boolean inSetDIMAP(char c) {
+ return (
+ (isLegal(c, true) && c != AMPERSAND)
+ );
+ }
+
+ private static byte TO_BASE64_IMAP(int n) {
+ return (n < 63 ? TO_BASE_64[n] : COMMA);
+ }
+
+ private static byte FROM_BASE64_IMAP(char c) {
+ return (c==COMMA ? 63 : c==SLASH ? -1 : FROM_BASE_64[c]);
+ }
+
+ /* encode directly sets D and O and CR LF SP TAB */
+ private static final byte ENCODE_DIRECTLY_MAXIMUM[] =
+ {
+ /*0 1 2 3 4 5 6 7 8 9 a b c d e f*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0
+ };
+
+ /* encode directly set D and CR LF SP TAB but not set O */
+ private static final byte ENCODE_DIRECTLY_RESTRICTED[] =
+ {
+ /*0 1 2 3 4 5 6 7 8 9 a b c d e f*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
+ };
+
+ private static final byte TO_BASE_64[] =
+ {
+ /* A-Z */
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
+ 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+ /* a-z */
+ 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
+ 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
+ /* 0-9 */
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
+ /* +/ */
+ 43, 47
+ };
+
+ private static final byte FROM_BASE_64[] =
+ {
+ /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */
+ -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,
+ -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
+ /* general punctuation with + and / and a special value (-2) for - */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,
+ /* digits */
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
+ /* A-Z */
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
+ /* a-z*/
+ -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3
+ };
+
+ class CharsetDecoderUTF7 extends CharsetDecoderICU {
+ public CharsetDecoderUTF7(CharsetICU cs) {
+ super(cs);
+ implReset();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ toUnicodeStatus=(toUnicodeStatus & 0xf0000000) | 0x1000000;
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult cr=CoderResult.UNDERFLOW;
+ byte base64Value;
+ byte base64Counter;
+ byte inDirectMode;
+ char bits;
+ int byteIndex;
+ int sourceIndex, nextSourceIndex;
+
+ int length;
+
+ char b;
+ char c;
+
+ int sourceArrayIndex=source.position();
+
+ //get the state of the machine state
+ {
+ int status=toUnicodeStatus;
+ inDirectMode=(byte)((status >> 24) & 1);
+ base64Counter=(byte)(status >> 16);
+ bits=(char)status;
+ }
+ byteIndex=toULength;
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex=byteIndex==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ directMode: while (true) {
+ if (inDirectMode==1) {
+ /*
+ * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,
+ * with their US-ASCII byte values.
+ * Backslash and Tilde and most control characters are not alled in UTF-7.
+ * A plus sign starts Unicode (or "escape") Mode.
+ * An ampersand starts Unicode Mode for IMAP.
+ *
+ * In Direct Mode, only the sourceIndex is used.
+ */
+ byteIndex=0;
+ length=source.remaining();
+ //targetCapacity=target.remaining();
+ //Commented out because length of source may be larger than target when it comes to bytes
+ /*if (useIMAP && length > targetCapacity) {
+ length=targetCapacity;
+ }*/
+ while (length > 0) {
+ b=(char)(source.get());
+ sourceArrayIndex++;
+ if (!isLegal(b, useIMAP)) {
+ toUBytesArray[0]=(byte)b;
+ byteIndex=1;
+ cr=CoderResult.malformedForLength(sourceArrayIndex);
+ break;
+ } else if ((!useIMAP && b!=PLUS) || (useIMAP && b!=AMPERSAND)) {
+ // write directly encoded character
+ if (target.hasRemaining()) { // Check to make sure that there is room in target.
+ target.put(b);
+ if (offsets!= null) {
+ offsets.put(sourceIndex++);
+ }
+ } else { // Get out and set the CoderResult.
+ charErrorBufferArray[charErrorBufferLength++] = b;
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ } else { /* PLUS or (AMPERSAND in IMAP)*/
+ /* switch to Unicode mode */
+ nextSourceIndex=++sourceIndex;
+ inDirectMode=0;
+ byteIndex=0;
+ bits=0;
+ base64Counter=-1;
+ continue directMode;
+ }
+ --length;
+ }//end of while
+ if (source.hasRemaining() && target.position() >= target.limit()) {
+ /* target is full */
+ cr=CoderResult.OVERFLOW;
+ }
+ break directMode;
+ } else { /* Unicode Mode*/
+ /*
+ * In Unicode Mode, UTF-16BE is base64-encoded.
+ * The base64 sequence ends with any character that is not in the base64 alphabet.
+ * A terminating minus sign is consumed.
+ *
+ * In Unicode Mode, the sourceIndex has the index to the start of the current
+ * base64 bytes, while nextSourceIndex is precisely parallel to source,
+ * keeping the index to the following byte.
+ */
+ while(source.hasRemaining()) {
+ if (target.hasRemaining()) {
+ b=(char)source.get();
+ sourceArrayIndex++;
+ toUBytesArray[byteIndex++]=(byte)b;
+ if ((!useIMAP && b>=126) || (useIMAP && b>0x7e)) {
+ /* illegal - test other illegal US-ASCII values by base64Value==-3 */
+ inDirectMode=1;
+ cr=CoderResult.malformedForLength(sourceArrayIndex);
+ break directMode;
+ } else if (((base64Value=FROM_BASE_64[b])>=0 && !useIMAP) || ((base64Value=FROM_BASE64_IMAP(b))>=0) && useIMAP) {
+ /* collect base64 bytes */
+ switch (base64Counter) {
+ case -1: /* -1 is immediately after the + */
+ case 0:
+ bits=(char)base64Value;
+ base64Counter=1;
+ break;
+ case 1:
+ case 3:
+ case 4:
+ case 6:
+ bits=(char)((bits<<6) | base64Value);
+ ++base64Counter;
+ break;
+ case 2:
+ c=(char)((bits<<4) | (base64Value>>2));
+ if (useIMAP && isLegal(c, useIMAP)) {
+ // illegal
+ inDirectMode=1;
+ cr=CoderResult.malformedForLength(sourceArrayIndex);
+ // goto endloop;
+ break directMode;
+ }
+ target.put(c);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ sourceIndex=nextSourceIndex - 1;
+ }
+ toUBytesArray[0]=(byte)b; /* keep this byte in case an error occurs */
+ byteIndex=1;
+ bits=(char)(base64Value&3);
+ base64Counter=3;
+ break;
+ case 5:
+ c=(char)((bits<<2) | (base64Value>>4));
+ if(useIMAP && isLegal(c, useIMAP)) {
+ // illegal
+ inDirectMode=1;
+ cr=CoderResult.malformedForLength(sourceArrayIndex);
+ // goto endloop;
+ break directMode;
+ }
+ target.put(c);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ sourceIndex=nextSourceIndex - 1;
+ }
+ toUBytesArray[0]=(byte)b; /* keep this byte in case an error occurs */
+ byteIndex=1;
+ bits=(char)(base64Value&15);
+ base64Counter=6;
+ break;
+ case 7:
+ c=(char)((bits<<6) | base64Value);
+ if (useIMAP && isLegal(c, useIMAP)) {
+ // illegal
+ inDirectMode=1;
+ cr=CoderResult.malformedForLength(sourceArrayIndex);
+ // goto endloop;
+ break directMode;
+ }
+ target.put(c);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ sourceIndex=nextSourceIndex;
+ }
+ byteIndex=0;
+ bits=0;
+ base64Counter=0;
+ break;
+ //default:
+ /* will never occur */
+ //break;
+ }//end of switch
+ } else if (base64Value==-2) {
+ /* minus sign terminates the base64 sequence */
+ inDirectMode=1;
+ if (base64Counter==-1) {
+ /* +- i.e. a minus immediately following a plus */
+ target.put(useIMAP ? (char)AMPERSAND : (char)PLUS);
+ if (offsets != null) {
+ offsets.put(sourceIndex - 1);
+ }
+ } else {
+ /* absorb the minus and leave the Unicode Mode */
+ if (bits!=0 || (useIMAP && base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
+ /*bits are illegally left over, a unicode character is incomplete */
+ cr=CoderResult.malformedForLength(sourceArrayIndex);
+ break;
+ }
+ }
+ sourceIndex=nextSourceIndex;
+ continue directMode;
+ } else if (!useIMAP && base64Value==-1) { /* for any legal character except base64 and minus sign */
+ /* leave the Unicode Mode */
+ inDirectMode=1;
+ if (base64Counter==-1) {
+ /* illegal: + immediately followed by something other than base64 minus sign */
+ /* include the plus sign in the reported sequence */
+ --sourceIndex;
+ toUBytesArray[0]=PLUS;
+ toUBytesArray[1]=(byte)b;
+ byteIndex=2;
+ cr=CoderResult.malformedForLength(sourceArrayIndex);
+ break;
+ } else if (bits==0) {
+ /* un-read the character in case it is a plus sign */
+ source.position(--sourceArrayIndex);
+ sourceIndex=nextSourceIndex - 1;
+ continue directMode;
+ } else {
+ /* bits are illegally left over, a unicode character is incomplete */
+ cr=CoderResult.malformedForLength(sourceArrayIndex);
+ break;
+ }
+ } else {
+ if (useIMAP && base64Counter==-1) {
+ // illegal: & immediately followed by something other than base64 or minus sign
+ // include the ampersand in the reported sequence
+ --sourceIndex;
+ toUBytesArray[0]=AMPERSAND;
+ toUBytesArray[1]=(byte)b;
+ byteIndex=2;
+ }
+ /* base64Value==-3 for illegal characters */
+ /* illegal */
+ inDirectMode=1;
+ cr=CoderResult.malformedForLength(sourceArrayIndex);
+ break;
+ }
+ } else {
+ /* target is full */
+ cr=CoderResult.OVERFLOW;
+ break;
+ }
+ } //end of while
+ break directMode;
+ }
+ }//end of direct mode label
+ if (useIMAP) {
+ if (!cr.isError() && inDirectMode==0 && flush && byteIndex==0 && !source.hasRemaining()) {
+ if (base64Counter==-1) {
+ /* & at the very end of the input */
+ /* make the ampersand the reported sequence */
+ toUBytesArray[0]=AMPERSAND;
+ byteIndex=1;
+ }
+ /* else if (base64Counter!=-1) byteIndex remains 0 because ther is no particular byte sequence */
+ inDirectMode=1;
+ cr=CoderResult.malformedForLength(sourceIndex);
+ }
+
+ } else {
+ if (!cr.isError() && flush && !source.hasRemaining() && bits ==0) {
+ /*
+ * if we are in Unicode Mode, then the byteIndex might not be 0,
+ * but that is ok if bits -- 0
+ * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
+ * (not true for IMAP-mailbox-name where we must end in direct mode)
+ */
+ if (!cr.isOverflow()) {
+ byteIndex=0;
+ }
+ }
+ }
+ /* set the converter state */
+ toUnicodeStatus=(inDirectMode<<24 | (((short)base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | (int)bits);
+ toULength=byteIndex;
+
+ return cr;
+ }
+ }
+
+ class CharsetEncoderUTF7 extends CharsetEncoderICU {
+ public CharsetEncoderUTF7(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ fromUnicodeStatus=(fromUnicodeStatus & 0xf0000000) | 0x1000000;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult cr=CoderResult.UNDERFLOW;
+ byte inDirectMode;
+ byte encodeDirectly[];
+ int status;
+
+ int length, targetCapacity, sourceIndex;
+
+ byte base64Counter;
+ char bits;
+ char c;
+ char b;
+ /* get the state machine state */
+ {
+ status=fromUnicodeStatus;
+ encodeDirectly=(((long)status) < 0x10000000) ? ENCODE_DIRECTLY_MAXIMUM : ENCODE_DIRECTLY_RESTRICTED;
+ inDirectMode=(byte)((status >> 24) & 1);
+ base64Counter=(byte)(status >> 16);
+ bits=(char)((byte)status);
+ }
+ /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
+ sourceIndex=0;
+
+ directMode: while(true) {
+ if(inDirectMode==1) {
+ length=source.remaining();
+ targetCapacity=target.remaining();
+ if(length > targetCapacity) {
+ length=targetCapacity;
+ }
+ while (length > 0) {
+ c=source.get();
+ /* UTF7: currently always encode CR LF SP TAB directly */
+ /* IMAP: encode 0x20..0x7e except '&' directly */
+ if ((!useIMAP && c<=127 && encodeDirectly[c]==1) || (useIMAP && inSetDIMAP(c))) {
+ /* encode directly */
+ target.put((byte)c);
+ if (offsets != null) {
+ offsets.put(sourceIndex++);
+ }
+ } else if ((!useIMAP && c==PLUS) || (useIMAP && c==AMPERSAND)) {
+ /* IMAP: output &- for & */
+ /* UTF-7: output +- for + */
+ target.put(useIMAP ? AMPERSAND : PLUS);
+ if (target.hasRemaining()) {
+ target.put(MINUS);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex++);
+ }
+ /* realign length and targetCapacity */
+ continue directMode;
+ } else {
+ if (offsets != null) {
+ offsets.put(sourceIndex++);
+ }
+ errorBuffer[0]=MINUS;
+ errorBufferLength=1;
+ cr=CoderResult.OVERFLOW;
+ break;
+ }
+ } else {
+ /* un-read this character and switch to unicode mode */
+ source.position(source.position() - 1);
+ target.put(useIMAP ? AMPERSAND : PLUS);
+ if (offsets != null) {
+ offsets.put(sourceIndex);
+ }
+ inDirectMode=0;
+ base64Counter=0;
+ continue directMode;
+ }
+ --length;
+ } //end of while
+ if (source.hasRemaining() && !target.hasRemaining()) {
+ /* target is full */
+ cr=CoderResult.OVERFLOW;
+ }
+ break directMode;
+ } else {
+ /* Unicode Mode */
+ while (source.hasRemaining()) {
+ if (target.hasRemaining()) {
+ c=source.get();
+ if ((!useIMAP && c<=127 && encodeDirectly[c]==1) || (useIMAP && isLegal(c, useIMAP))) {
+ /* encode directly */
+ inDirectMode=1;
+
+ /* trick: back out this character to make this easier */
+ source.position(source.position() - 1);
+
+ /* terminate the base64 sequence */
+ if (base64Counter!=0) {
+ /* write remaining bits for the previous character */
+ target.put(useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits]);
+ if (offsets!=null) {
+ offsets.put(sourceIndex-1);
+ }
+ }
+ if (FROM_BASE_64[c]!=-1 || useIMAP) {
+ /* need to terminate with a minus */
+ if (target.hasRemaining()) {
+ target.put(MINUS);
+ if (offsets!=null) {
+ offsets.put(sourceIndex-1);
+ }
+ } else {
+ errorBuffer[0]=MINUS;
+ errorBufferLength=1;
+ cr=CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ continue directMode;
+ } else {
+ /*
+ * base64 this character:
+ * Output 2 or 3 base64 bytres for the remaining bits of the previous character
+ * and the bits of this character, each implicitly in UTF-16BE.
+ *
+ * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
+ * character to the next. The actual 2 or 4 bits are shifted to the left edge
+ * of the 6-bits filed 5..0 to make the termination of the base64 sequence easier.
+ */
+ switch (base64Counter) {
+ case 0:
+ b=(char)(c>>10);
+ target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
+ if (target.hasRemaining()) {
+ b=(char)((c>>4)&0x3f);
+ target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
+ if (offsets!=null) {
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex++);
+ }
+ } else {
+ if (offsets!=null) {
+ offsets.put(sourceIndex++);
+ }
+ b=(char)((c>>4)&0x3f);
+ errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
+ errorBufferLength=1;
+ cr=CoderResult.OVERFLOW;
+ }
+ bits=(char)((c&15)<<2);
+ base64Counter=1;
+ break;
+ case 1:
+ b=(char)(bits|(c>>14));
+ target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
+ if (target.hasRemaining()) {
+ b=(char)((c>>8)&0x3f);
+ target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
+ if (target.hasRemaining()) {
+ b=(char)((c>>2)&0x3f);
+ target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
+ if (offsets!=null) {
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex++);
+ }
+ } else {
+ if (offsets!=null) {
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex++);
+ }
+ b=(char)((c>>2)&0x3f);
+ errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
+ errorBufferLength=1;
+ cr=CoderResult.OVERFLOW;
+ }
+ } else {
+ if (offsets!=null) {
+ offsets.put(sourceIndex++);
+ }
+ b=(char)((c>>8)&0x3f);
+ errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
+ b=(char)((c>>2)&0x3f);
+ errorBuffer[1]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
+ errorBufferLength=2;
+ cr=CoderResult.OVERFLOW;
+ }
+ bits=(char)((c&3)<<4);
+ base64Counter=2;
+ break;
+ case 2:
+ b=(char)(bits|(c>>12));
+ target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
+ if (target.hasRemaining()) {
+ b=(char)((c>>6)&0x3f);
+ target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
+ if (target.hasRemaining()) {
+ b=(char)(c&0x3f);
+ target.put(useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b]);
+ if (offsets!=null) {
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex++);
+ }
+ } else {
+ if (offsets!=null) {
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex++);
+ }
+ b=(char)(c&0x3f);
+ errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
+ errorBufferLength=1;
+ cr=CoderResult.OVERFLOW;
+ }
+ } else {
+ if (offsets!=null) {
+ offsets.put(sourceIndex++);
+ }
+ b=(char)((c>>6)&0x3f);
+ errorBuffer[0]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
+ b=(char)(c&0x3f);
+ errorBuffer[1]=useIMAP ? TO_BASE64_IMAP(b) : TO_BASE_64[b];
+ errorBufferLength=2;
+ cr=CoderResult.OVERFLOW;
+ }
+ bits=0;
+ base64Counter=0;
+ break;
+ //default:
+ /* will never occur */
+ //break;
+ } //end of switch
+ }
+ } else {
+ /* target is full */
+ cr=CoderResult.OVERFLOW;
+ break;
+ }
+ } //end of while
+ break directMode;
+ }
+ } //end of directMode label
+
+ if (flush && !source.hasRemaining()) {
+ /* flush remaining bits to the target */
+ if (inDirectMode==0) {
+ if (base64Counter!=0) {
+ if (target.hasRemaining()) {
+ target.put(useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits]);
+ if (offsets!=null) {
+ offsets.put(sourceIndex - 1);
+ }
+ } else {
+ errorBuffer[errorBufferLength++]=useIMAP ? TO_BASE64_IMAP(bits) : TO_BASE_64[bits];
+ cr=CoderResult.OVERFLOW;
+ }
+ }
+ if (useIMAP) {
+ /* IMAP: need to terminate with a minus */
+ if (target.hasRemaining()) {
+ target.put(MINUS);
+ if (offsets!=null) {
+ offsets.put(sourceIndex - 1);
+ }
+ } else {
+ errorBuffer[errorBufferLength++]=MINUS;
+ cr=CoderResult.OVERFLOW;
+ }
+ }
+ }
+ /*reset the state for the next conversion */
+ fromUnicodeStatus=((status&0xf0000000) | 0x1000000); /* keep version, inDirectMode=TRUE */
+ } else {
+ /* set the converter state back */
+ fromUnicodeStatus=((status&0xf0000000) | (inDirectMode<<24) | (((short)base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | ((int)bits));
+ }
+
+ return cr;
+ }
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderUTF7(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderUTF7(this);
+ }
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ getCompleteUnicodeSet(setFillIn);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF8.java b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF8.java
new file mode 100644
index 00000000000..ab8b5da701a
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/CharsetUTF8.java
@@ -0,0 +1,694 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author Niti Hantaweepant
+ */
+class CharsetUTF8 extends CharsetICU {
+
+ private static final byte[] fromUSubstitution = new byte[] { (byte) 0xef, (byte) 0xbf, (byte) 0xbd };
+
+ public CharsetUTF8(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ /* max 3 bytes per code unit from UTF-8 (4 bytes from surrogate _pair_) */
+ maxBytesPerChar = 3;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ }
+
+ private static final int BITMASK_FROM_UTF8[] = { -1, 0x7f, 0x1f, 0xf, 0x7, 0x3, 0x1 };
+
+ private static final byte BYTES_FROM_UTF8[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
+ };
+
+ /*
+ * Starting with Unicode 3.0.1: UTF-8 byte sequences of length N _must_ encode code points of or
+ * above utf8_minChar32[N]; byte sequences with more than 4 bytes are illegal in UTF-8, which is
+ * tested with impossible values for them
+ */
+ private static final int UTF8_MIN_CHAR32[] = { 0, 0, 0x80, 0x800, 0x10000,
+ Integer.MAX_VALUE, Integer.MAX_VALUE };
+
+ private final boolean isCESU8 = this instanceof CharsetCESU8;
+
+ class CharsetDecoderUTF8 extends CharsetDecoderICU {
+
+ public CharsetDecoderUTF8(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ boolean flush) {
+ if (!source.hasRemaining()) {
+ /* no input, nothing to do */
+ return CoderResult.UNDERFLOW;
+ }
+ if (!target.hasRemaining()) {
+ /* no output available, can't do anything */
+ return CoderResult.OVERFLOW;
+ }
+
+ if (source.hasArray() && target.hasArray()) {
+ /* source and target are backed by arrays, so use the arrays for optimal performance */
+ byte[] sourceArray = source.array();
+ int sourceIndex = source.arrayOffset() + source.position();
+ int sourceLimit = source.arrayOffset() + source.limit();
+ char[] targetArray = target.array();
+ int targetIndex = target.arrayOffset() + target.position();
+ int targetLimit = target.arrayOffset() + target.limit();
+
+ byte ch;
+ int char32, bytesExpected, bytesSoFar;
+ CoderResult cr;
+
+ if (mode == 0) {
+ /* nothing is stored in toUnicodeStatus, read a byte as input */
+ char32 = (toUBytesArray[0] = sourceArray[sourceIndex++]) & 0xff;
+ bytesExpected = BYTES_FROM_UTF8[char32];
+ char32 &= BITMASK_FROM_UTF8[bytesExpected];
+ bytesSoFar = 1;
+ } else {
+ /* a partially or fully built code point is stored in toUnicodeStatus */
+ char32 = toUnicodeStatus;
+ bytesExpected = mode;
+ bytesSoFar = toULength;
+
+ toUnicodeStatus = 0;
+ mode = 0;
+ toULength = 0;
+ }
+
+ outer: while (true) {
+ if (bytesSoFar < bytesExpected) {
+ /* read a trail byte and insert its relevant bits into char32 */
+ if (sourceIndex >= sourceLimit) {
+ /* no source left, save the state for later and break out of the loop */
+ toUnicodeStatus = char32;
+ mode = bytesExpected;
+ toULength = bytesSoFar;
+ cr = CoderResult.UNDERFLOW;
+ break;
+ }
+ if (((ch = toUBytesArray[bytesSoFar] = sourceArray[sourceIndex++]) & 0xc0) != 0x80) {
+ /* not a trail byte (is not of the form 10xxxxxx) */
+ sourceIndex--;
+ toULength = bytesSoFar;
+ cr = CoderResult.malformedForLength(bytesSoFar);
+ break;
+ }
+ char32 = (char32 << 6) | (ch & 0x3f);
+ bytesSoFar++;
+ } else if (bytesSoFar == bytesExpected && UTF8_MIN_CHAR32[bytesExpected] <= char32 && char32 <= 0x10ffff
+ && (isCESU8 ? bytesExpected <= 3 : !UTF16.isSurrogate((char) char32))) {
+ /*
+ * char32 is a valid code point and is composed of the correct number of
+ * bytes ... we now need to output it in UTF-16
+ */
+
+ if (char32 <= UConverterConstants.MAXIMUM_UCS2) {
+ /* fits in 16 bits */
+ targetArray[targetIndex++] = (char) char32;
+ } else {
+ /* fit char32 into 20 bits */
+ char32 -= UConverterConstants.HALF_BASE;
+
+ /* write out the surrogates */
+ targetArray[targetIndex++] = (char) ((char32 >>> UConverterConstants.HALF_SHIFT) + UConverterConstants.SURROGATE_HIGH_START);
+
+ if (targetIndex >= targetLimit) {
+ /* put in overflow buffer (not handled here) */
+ charErrorBufferArray[charErrorBufferLength++] = (char) ((char32 & UConverterConstants.HALF_MASK) + UConverterConstants.SURROGATE_LOW_START);
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ targetArray[targetIndex++] = (char) ((char32 & UConverterConstants.HALF_MASK) + UConverterConstants.SURROGATE_LOW_START);
+ }
+
+ /*
+ * we're finished outputing, so now we need to read in the first byte of the
+ * next byte sequence that could form a code point
+ */
+
+ if (sourceIndex >= sourceLimit) {
+ cr = CoderResult.UNDERFLOW;
+ break;
+ }
+ if (targetIndex >= targetLimit) {
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+
+ /* keep reading the next input (and writing it) while bytes == 1 */
+ while ((bytesExpected = BYTES_FROM_UTF8[char32 = (toUBytesArray[0] = sourceArray[sourceIndex++]) & 0xff]) == 1) {
+ targetArray[targetIndex++] = (char) char32;
+ if (sourceIndex >= sourceLimit) {
+ cr = CoderResult.UNDERFLOW;
+ break outer;
+ }
+ if (targetIndex >= targetLimit) {
+ cr = CoderResult.OVERFLOW;
+ break outer;
+ }
+ }
+
+ /* remove the bits that indicate the number of bytes */
+ char32 &= BITMASK_FROM_UTF8[bytesExpected];
+ bytesSoFar = 1;
+ } else {
+ /*
+ * either the lead byte in the code sequence is invalid (bytes == 0) or the
+ * lead byte combined with all the trail chars does not form a valid code
+ * point
+ */
+ toULength = bytesSoFar;
+ cr = CoderResult.malformedForLength(bytesSoFar);
+ break;
+ }
+ }
+
+ source.position(sourceIndex - source.arrayOffset());
+ target.position(targetIndex - target.arrayOffset());
+ return cr;
+
+ } else {
+
+ int sourceIndex = source.position();
+ int sourceLimit = source.limit();
+ int targetIndex = target.position();
+ int targetLimit = target.limit();
+
+ byte ch;
+ int char32, bytesExpected, bytesSoFar;
+ CoderResult cr;
+
+ if (mode == 0) {
+ /* nothing is stored in toUnicodeStatus, read a byte as input */
+ char32 = (toUBytesArray[0] = source.get(sourceIndex++)) & 0xff;
+ bytesExpected = BYTES_FROM_UTF8[char32];
+ char32 &= BITMASK_FROM_UTF8[bytesExpected];
+ bytesSoFar = 1;
+ } else {
+ /* a partially or fully built code point is stored in toUnicodeStatus */
+ char32 = toUnicodeStatus;
+ bytesExpected = mode;
+ bytesSoFar = toULength;
+
+ toUnicodeStatus = 0;
+ mode = 0;
+ toULength = 0;
+ }
+
+ outer: while (true) {
+ if (bytesSoFar < bytesExpected) {
+ /* read a trail byte and insert its relevant bits into char32 */
+ if (sourceIndex >= sourceLimit) {
+ /* no source left, save the state for later and break out of the loop */
+ toUnicodeStatus = char32;
+ mode = bytesExpected;
+ toULength = bytesSoFar;
+ cr = CoderResult.UNDERFLOW;
+ break;
+ }
+ if (((ch = toUBytesArray[bytesSoFar] = source.get(sourceIndex++)) & 0xc0) != 0x80) {
+ /* not a trail byte (is not of the form 10xxxxxx) */
+ sourceIndex--;
+ toULength = bytesSoFar;
+ cr = CoderResult.malformedForLength(bytesSoFar);
+ break;
+ }
+ char32 = (char32 << 6) | (ch & 0x3f);
+ bytesSoFar++;
+ }
+ /*
+ * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
+ * - use only trail bytes after a lead byte (checked above)
+ * - use the right number of trail bytes for a given lead byte
+ * - encode a code point <= U+10ffff
+ * - use the fewest possible number of bytes for their code points
+ * - use at most 4 bytes (for i>=5 it is 0x10ffff>> UConverterConstants.HALF_SHIFT) + UConverterConstants.SURROGATE_HIGH_START));
+
+ if (targetIndex >= targetLimit) {
+ /* put in overflow buffer (not handled here) */
+ charErrorBufferArray[charErrorBufferLength++] = (char) ((char32 & UConverterConstants.HALF_MASK) + UConverterConstants.SURROGATE_LOW_START);
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ target.put(
+ targetIndex++,
+ (char) ((char32 & UConverterConstants.HALF_MASK) + UConverterConstants.SURROGATE_LOW_START));
+ }
+
+ /*
+ * we're finished outputing, so now we need to read in the first byte of the
+ * next byte sequence that could form a code point
+ */
+
+ if (sourceIndex >= sourceLimit) {
+ cr = CoderResult.UNDERFLOW;
+ break;
+ }
+ if (targetIndex >= targetLimit) {
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+
+ /* keep reading the next input (and writing it) while bytes == 1 */
+ while ((bytesExpected = BYTES_FROM_UTF8[char32 = (toUBytesArray[0] = source.get(sourceIndex++)) & 0xff]) == 1) {
+ target.put(targetIndex++, (char) char32);
+ if (sourceIndex >= sourceLimit) {
+ cr = CoderResult.UNDERFLOW;
+ break outer;
+ }
+ if (targetIndex >= targetLimit) {
+ cr = CoderResult.OVERFLOW;
+ break outer;
+ }
+ }
+
+ /* remove the bits that indicate the number of bytes */
+ char32 &= BITMASK_FROM_UTF8[bytesExpected];
+ bytesSoFar = 1;
+ } else {
+ /*
+ * either the lead byte in the code sequence is invalid (bytes == 0) or the
+ * lead byte combined with all the trail chars does not form a valid code
+ * point
+ */
+ toULength = bytesSoFar;
+ cr = CoderResult.malformedForLength(bytesSoFar);
+ break;
+ }
+ }
+
+ source.position(sourceIndex);
+ target.position(targetIndex);
+ return cr;
+ }
+ }
+
+ }
+
+ class CharsetEncoderUTF8 extends CharsetEncoderICU {
+
+ public CharsetEncoderUTF8(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ boolean flush) {
+ if (!source.hasRemaining()) {
+ /* no input, nothing to do */
+ return CoderResult.UNDERFLOW;
+ }
+ if (!target.hasRemaining()) {
+ /* no output available, can't do anything */
+ return CoderResult.OVERFLOW;
+ }
+
+ if (source.hasArray() && target.hasArray()) {
+ /* source and target are backed by arrays, so use the arrays for optimal performance */
+ char[] sourceArray = source.array();
+ int srcIdx = source.arrayOffset() + source.position();
+ int sourceLimit = source.arrayOffset() + source.limit();
+ byte[] targetArray = target.array();
+ int tgtIdx = target.arrayOffset() + target.position();
+ int targetLimit = target.arrayOffset() + target.limit();
+
+ int char32;
+ CoderResult cr;
+
+ /* take care of the special condition of fromUChar32 not being 0 (it is a surrogate) */
+ if (fromUChar32 != 0) {
+ /* 4 bytes to encode from char32 and a following char in source */
+
+ sourceIndex = srcIdx;
+ targetIndex = tgtIdx;
+ cr = encodeFourBytes(sourceArray, targetArray, sourceLimit, targetLimit,
+ fromUChar32);
+ srcIdx = sourceIndex;
+ tgtIdx = targetIndex;
+ if (cr != null) {
+ source.position(srcIdx - source.arrayOffset());
+ target.position(tgtIdx - target.arrayOffset());
+ return cr;
+ }
+ }
+
+ while (true) {
+ if (srcIdx >= sourceLimit) {
+ /* nothing left to read */
+ cr = CoderResult.UNDERFLOW;
+ break;
+ }
+ if (tgtIdx >= targetLimit) {
+ /* no space left to write */
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+
+ /* reach the next char into char32 */
+ char32 = sourceArray[srcIdx++];
+
+ if (char32 <= 0x7f) {
+ /* 1 byte to encode from char32 */
+
+ targetArray[tgtIdx++] = encodeHeadOf1(char32);
+
+ } else if (char32 <= 0x7ff) {
+ /* 2 bytes to encode from char32 */
+
+ targetArray[tgtIdx++] = encodeHeadOf2(char32);
+
+ if (tgtIdx >= targetLimit) {
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ targetArray[tgtIdx++] = encodeLastTail(char32);
+
+ } else if (!UTF16.isSurrogate((char) char32) || isCESU8) {
+ /* 3 bytes to encode from char32 */
+
+ targetArray[tgtIdx++] = encodeHeadOf3(char32);
+
+ if (tgtIdx >= targetLimit) {
+ errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ targetArray[tgtIdx++] = encodeSecondToLastTail(char32);
+
+ if (tgtIdx >= targetLimit) {
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ targetArray[tgtIdx++] = encodeLastTail(char32);
+
+ } else {
+ /* 4 bytes to encode from char32 and a following char in source */
+
+ sourceIndex = srcIdx;
+ targetIndex = tgtIdx;
+ cr = encodeFourBytes(sourceArray, targetArray, sourceLimit, targetLimit,
+ char32);
+ srcIdx = sourceIndex;
+ tgtIdx = targetIndex;
+ if (cr != null)
+ break;
+ }
+ }
+
+ /* set the new source and target positions and return the CoderResult stored in cr */
+ source.position(srcIdx - source.arrayOffset());
+ target.position(tgtIdx - target.arrayOffset());
+ return cr;
+
+ } else {
+ int char32;
+ CoderResult cr;
+
+ /* take care of the special condition of fromUChar32 not being 0 (it is a surrogate) */
+ if (fromUChar32 != 0) {
+ /* 4 bytes to encode from char32 and a following char in source */
+
+ cr = encodeFourBytes(source, target, fromUChar32);
+ if (cr != null)
+ return cr;
+ }
+
+ while (true) {
+ if (!source.hasRemaining()) {
+ /* nothing left to read */
+ cr = CoderResult.UNDERFLOW;
+ break;
+ }
+ if (!target.hasRemaining()) {
+ /* no space left to write */
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+
+ /* reach the next char into char32 */
+ char32 = source.get();
+
+ if (char32 <= 0x7f) {
+ /* 1 byte to encode from char32 */
+
+ target.put(encodeHeadOf1(char32));
+
+ } else if (char32 <= 0x7ff) {
+ /* 2 bytes to encode from char32 */
+
+ target.put(encodeHeadOf2(char32));
+
+ if (!target.hasRemaining()) {
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ target.put(encodeLastTail(char32));
+
+ } else if (!UTF16.isSurrogate((char) char32) || isCESU8) {
+ /* 3 bytes to encode from char32 */
+
+ target.put(encodeHeadOf3(char32));
+
+ if (!target.hasRemaining()) {
+ errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ target.put(encodeSecondToLastTail(char32));
+
+ if (!target.hasRemaining()) {
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ target.put(encodeLastTail(char32));
+
+ } else {
+ /* 4 bytes to encode from char32 and a following char in source */
+
+ cr = encodeFourBytes(source, target, char32);
+ if (cr != null)
+ break;
+ }
+ }
+
+ /* set the new source and target positions and return the CoderResult stored in cr */
+ return cr;
+ }
+ }
+
+ private final CoderResult encodeFourBytes(char[] sourceArray, byte[] targetArray,
+ int sourceLimit, int targetLimit, int char32) {
+
+ /* we need to read another char to match up the surrogate stored in char32 */
+ /* handle the surrogate stuff, returning on a non-null CoderResult */
+ CoderResult cr = handleSurrogates(sourceArray, sourceIndex, sourceLimit, (char)char32);
+ if (cr != null)
+ return cr;
+
+ sourceIndex++;
+ char32 = fromUChar32;
+ fromUChar32 = 0;
+
+ /* the rest is routine -- encode four bytes, stopping on overflow */
+
+ targetArray[targetIndex++] = encodeHeadOf4(char32);
+
+ if (targetIndex >= targetLimit) {
+ errorBuffer[errorBufferLength++] = encodeThirdToLastTail(char32);
+ errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ return CoderResult.OVERFLOW;
+ }
+ targetArray[targetIndex++] = encodeThirdToLastTail(char32);
+
+ if (targetIndex >= targetLimit) {
+ errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ return CoderResult.OVERFLOW;
+ }
+ targetArray[targetIndex++] = encodeSecondToLastTail(char32);
+
+ if (targetIndex >= targetLimit) {
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ return CoderResult.OVERFLOW;
+ }
+ targetArray[targetIndex++] = encodeLastTail(char32);
+
+ /* return null for success */
+ return null;
+ }
+
+ private final CoderResult encodeFourBytes(CharBuffer source, ByteBuffer target, int char32) {
+
+ /* handle the surrogate stuff, returning on a non-null CoderResult */
+ CoderResult cr = handleSurrogates(source, (char)char32);
+ if (cr != null)
+ return cr;
+
+ char32 = fromUChar32;
+ fromUChar32 = 0;
+
+ /* the rest is routine -- encode four bytes, stopping on overflow */
+
+ target.put(encodeHeadOf4(char32));
+
+ if (!target.hasRemaining()) {
+ errorBuffer[errorBufferLength++] = encodeThirdToLastTail(char32);
+ errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ return CoderResult.OVERFLOW;
+ }
+ target.put(encodeThirdToLastTail(char32));
+
+ if (!target.hasRemaining()) {
+ errorBuffer[errorBufferLength++] = encodeSecondToLastTail(char32);
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ return CoderResult.OVERFLOW;
+ }
+ target.put(encodeSecondToLastTail(char32));
+
+ if (!target.hasRemaining()) {
+ errorBuffer[errorBufferLength++] = encodeLastTail(char32);
+ return CoderResult.OVERFLOW;
+ }
+ target.put(encodeLastTail(char32));
+
+ /* return null for success */
+ return null;
+ }
+
+ private int sourceIndex;
+
+ private int targetIndex;
+
+ }
+
+ private static final byte encodeHeadOf1(int char32) {
+ return (byte) char32;
+ }
+
+ private static final byte encodeHeadOf2(int char32) {
+ return (byte) (0xc0 | (char32 >>> 6));
+ }
+
+ private static final byte encodeHeadOf3(int char32) {
+ return (byte) (0xe0 | ((char32 >>> 12)));
+ }
+
+ private static final byte encodeHeadOf4(int char32) {
+ return (byte) (0xf0 | ((char32 >>> 18)));
+ }
+
+ private static final byte encodeThirdToLastTail(int char32) {
+ return (byte) (0x80 | ((char32 >>> 12) & 0x3f));
+ }
+
+ private static final byte encodeSecondToLastTail(int char32) {
+ return (byte) (0x80 | ((char32 >>> 6) & 0x3f));
+ }
+
+ private static final byte encodeLastTail(int char32) {
+ return (byte) (0x80 | (char32 & 0x3f));
+ }
+
+ /* single-code point definitions -------------------------------------------- */
+
+ /*
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ */
+ // static final boolean isSingle(byte c) {return (((c)&0x80)==0);}
+ /*
+ * Is this code unit (byte) a UTF-8 lead byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ */
+ // static final boolean isLead(byte c) {return ((((c)-0xc0) &
+ // UConverterConstants.UNSIGNED_BYTE_MASK)<0x3e);}
+ /*
+ * Is this code unit (byte) a UTF-8 trail byte?
+ *
+ * @param c
+ * 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ */
+ /*private static final boolean isTrail(byte c) {
+ return (((c) & 0xc0) == 0x80);
+ }*/
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderUTF8(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderUTF8(this);
+ }
+
+
+ void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
+ getNonSurrogateUnicodeSet(setFillIn);
+ }
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java
new file mode 100644
index 00000000000..a93f5360369
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java
@@ -0,0 +1,839 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+package com.ibm.icu.charset;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+
+final class UConverterAlias {
+ static final int UNNORMALIZED = 0;
+
+ static final int STD_NORMALIZED = 1;
+
+ static final int AMBIGUOUS_ALIAS_MAP_BIT = 0x8000;
+
+ static final int CONTAINS_OPTION_BIT = 0x4000;
+
+ static final int CONVERTER_INDEX_MASK = 0xFFF;
+
+ static final int NUM_RESERVED_TAGS = 2;
+
+ static final int NUM_HIDDEN_TAGS = 1;
+
+ static int[] gConverterList = null;
+
+ static int[] gTagList = null;
+
+ static int[] gAliasList = null;
+
+ static int[] gUntaggedConvArray = null;
+
+ static int[] gTaggedAliasArray = null;
+
+ static int[] gTaggedAliasLists = null;
+
+ static int[] gOptionTable = null;
+
+ static byte[] gStringTable = null;
+
+ static byte[] gNormalizedStringTable = null;
+
+ static final String GET_STRING(int idx) {
+ return extractString(gStringTable, 2 * idx);
+ }
+
+ private static final String GET_NORMALIZED_STRING(int idx) {
+ return extractString(gNormalizedStringTable, 2 * idx);
+ }
+
+ private static final String extractString(byte[] sArray, int sBegin) {
+ char[] buf = new char[strlen(sArray, sBegin)];
+ for (int i = 0; i < buf.length; i++) {
+ buf[i] = (char)(sArray[sBegin + i] & 0xff);
+ }
+ return new String(buf);
+ }
+
+ private static final int strlen(byte[] sArray, int sBegin)
+ {
+ int i = sBegin;
+ while(i < sArray.length && sArray[i++] != 0) {}
+ return i - sBegin - 1;
+ }
+
+ /*private*/ static final int tocLengthIndex = 0;
+
+ private static final int converterListIndex = 1;
+
+ private static final int tagListIndex = 2;
+
+ private static final int aliasListIndex = 3;
+
+ private static final int untaggedConvArrayIndex = 4;
+
+ private static final int taggedAliasArrayIndex = 5;
+
+ private static final int taggedAliasListsIndex = 6;
+
+ private static final int optionTableIndex = 7;
+
+ private static final int stringTableIndex = 8;
+
+ private static final int normalizedStringTableIndex = 9;
+
+ private static final int minTocLength = 9; /*
+ * min. tocLength in the file,
+ * does not count the
+ * tocLengthIndex!
+ */
+
+ private static final int offsetsCount = minTocLength + 1; /*
+ * length of the
+ * swapper's
+ * temporary
+ * offsets[]
+ */
+
+ static ByteBuffer gAliasData = null;
+
+ private static final boolean isAlias(String alias) {
+ if (alias == null) {
+ throw new IllegalArgumentException("Alias param is null!");
+ }
+ return (alias.length() != 0);
+ }
+
+ private static final String CNVALIAS_DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE + "/cnvalias.icu";
+
+ /**
+ * Default buffer size of datafile
+ */
+ private static final int CNVALIAS_DATA_BUFFER_SIZE = 25000;
+
+ private static final synchronized boolean haveAliasData()
+ throws IOException{
+ boolean needInit;
+
+ // agljport:todo umtx_lock(NULL);
+ needInit = gAliasData == null;
+
+ /* load converter alias data from file if necessary */
+ if (needInit) {
+ ByteBuffer data = null;
+ int[] tableArray = null;
+ int tableStart;
+ //byte[] reservedBytes = null;
+
+ InputStream i = ICUData.getRequiredStream(CNVALIAS_DATA_FILE_NAME);
+ BufferedInputStream b = new BufferedInputStream(i, CNVALIAS_DATA_BUFFER_SIZE);
+ UConverterAliasDataReader reader = new UConverterAliasDataReader(b);
+ tableArray = reader.readToc(offsetsCount);
+
+ tableStart = tableArray[0];
+ if (tableStart < minTocLength) {
+ throw new IOException("Invalid data format.");
+ }
+ gConverterList = new int[tableArray[converterListIndex]];
+ gTagList= new int[tableArray[tagListIndex]];
+ gAliasList = new int[tableArray[aliasListIndex]];
+ gUntaggedConvArray = new int[tableArray[untaggedConvArrayIndex]];
+ gTaggedAliasArray = new int[tableArray[taggedAliasArrayIndex]];
+ gTaggedAliasLists = new int[tableArray[taggedAliasListsIndex]];
+ gOptionTable = new int[tableArray[optionTableIndex]];
+ gStringTable = new byte[tableArray[stringTableIndex]*2];
+ gNormalizedStringTable = new byte[tableArray[normalizedStringTableIndex]*2];
+
+ reader.read(gConverterList, gTagList,
+ gAliasList, gUntaggedConvArray,
+ gTaggedAliasArray, gTaggedAliasLists,
+ gOptionTable, gStringTable, gNormalizedStringTable);
+ data = ByteBuffer.allocate(0); // dummy UDataMemory object in absence
+ // of memory mapping
+
+ if (gOptionTable[0] != STD_NORMALIZED) {
+ throw new IOException("Unsupported alias normalization");
+ }
+
+ // agljport:todo umtx_lock(NULL);
+ if (gAliasData == null) {
+ gAliasData = data;
+ data = null;
+
+ // agljport:fix ucln_common_registerCleanup(UCLN_COMMON_IO,
+ // io_cleanup);
+ }
+ // agljport:todo umtx_unlock(NULL);
+
+ /* if a different thread set it first, then close the extra data */
+ if (data != null) {
+ // agljport:fix udata_close(data); /* NULL if it was set
+ // correctly */
+ }
+ }
+
+ return true;
+ }
+
+ // U_CFUNC const char * io_getConverterName(const char *alias, UErrorCode
+ // *pErrorCode)
+// public static final String io_getConverterName(String alias)
+// throws IOException{
+// if (haveAliasData() && isAlias(alias)) {
+// boolean[] isAmbigous = new boolean[1];
+// int convNum = findConverter(alias, isAmbigous);
+// if (convNum < gConverterList.length) {
+// return GET_STRING(gConverterList[(int) convNum]);
+// }
+// /* else converter not found */
+// }
+// return null;
+// }
+
+ /*
+ * search for an alias return the converter number index for gConverterList
+ */
+ // static U_INLINE uint32_t findConverter(const char *alias, UErrorCode
+ // *pErrorCode)
+ private static final int findConverter(String alias, boolean[] isAmbigous) {
+ int mid, start, limit;
+ int lastMid;
+ int result;
+ StringBuilder strippedName = new StringBuilder();
+ String aliasToCompare;
+
+ stripForCompare(strippedName, alias);
+ alias = strippedName.toString();
+
+ /* do a binary search for the alias */
+ start = 0;
+ limit = gUntaggedConvArray.length;
+ mid = limit;
+ lastMid = Integer.MAX_VALUE;
+
+ for (;;) {
+ mid = (start + limit) / 2;
+ if (lastMid == mid) { /* Have we moved? */
+ break; /* We haven't moved, and it wasn't found. */
+ }
+ lastMid = mid;
+ aliasToCompare = GET_NORMALIZED_STRING(gAliasList[mid]);
+ result = alias.compareTo(aliasToCompare);
+
+ if (result < 0) {
+ limit = mid;
+ } else if (result > 0) {
+ start = mid;
+ } else {
+ /*
+ * Since the gencnval tool folds duplicates into one entry, this
+ * alias in gAliasList is unique, but different standards may
+ * map an alias to different converters.
+ */
+ if ((gUntaggedConvArray[mid] & AMBIGUOUS_ALIAS_MAP_BIT) != 0) {
+ isAmbigous[0]=true;
+ }
+ /* State whether the canonical converter name contains an option.
+ This information is contained in this list in order to maintain backward & forward compatibility. */
+ /*if (containsOption) {
+ UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
+ *containsOption = (UBool)((containsCnvOptionInfo
+ && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
+ || !containsCnvOptionInfo);
+ }*/
+ return gUntaggedConvArray[mid] & CONVERTER_INDEX_MASK;
+ }
+ }
+ return Integer.MAX_VALUE;
+ }
+
+ /**
+ * stripForCompare Remove the underscores, dashes and spaces from
+ * the name, and convert the name to lower case.
+ *
+ * @param dst The destination buffer, which is <= the buffer of name.
+ * @param name The alias to strip
+ * @return the destination buffer.
+ */
+ public static final StringBuilder stripForCompare(StringBuilder dst, String name) {
+ return io_stripASCIIForCompare(dst, name);
+ }
+
+ // enum {
+ private static final byte IGNORE = 0;
+ private static final byte ZERO = 1;
+ private static final byte NONZERO = 2;
+ static final byte MINLETTER = 3; /* any values from here on are lowercase letter mappings */
+ // }
+
+ /* character types for ASCII 00..7F */
+ static final byte asciiTypes[] = new byte[] {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
+ 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
+ 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
+ };
+
+ private static final char GET_CHAR_TYPE(char c) {
+ return (char)((c < asciiTypes.length) ? asciiTypes[c] : (char)IGNORE);
+ }
+
+ /** @see UConverterAlias#compareNames */
+ private static final StringBuilder io_stripASCIIForCompare(StringBuilder dst, String name) {
+ int nameIndex = 0;
+ char type, nextType;
+ char c1;
+ boolean afterDigit = false;
+
+ while (nameIndex < name.length()) {
+ c1 = name.charAt(nameIndex++);
+ type = GET_CHAR_TYPE(c1);
+ switch (type) {
+ case IGNORE:
+ afterDigit = false;
+ continue; /* ignore all but letters and digits */
+ case ZERO:
+ if (!afterDigit && nameIndex < name.length()) {
+ nextType = GET_CHAR_TYPE(name.charAt(nameIndex));
+ if (nextType == ZERO || nextType == NONZERO) {
+ continue; /* ignore leading zero before another digit */
+ }
+ }
+ break;
+ case NONZERO:
+ afterDigit = true;
+ break;
+ default:
+ c1 = type; /* lowercased letter */
+ afterDigit = false;
+ break;
+ }
+ dst.append(c1);
+ }
+ return dst;
+ }
+
+ /**
+ * Do a fuzzy compare of a two converter/alias names. The comparison is
+ * case-insensitive. It also ignores the characters '-', '_', and ' ' (dash,
+ * underscore, and space). Thus the strings "UTF-8", "utf_8", and "Utf 8"
+ * are exactly equivalent.
+ *
+ * This is a symmetrical (commutative) operation; order of arguments is
+ * insignificant. This is an important property for sorting the list (when
+ * the list is preprocessed into binary form) and for performing binary
+ * searches on it at run time.
+ *
+ * @param name1
+ * a converter name or alias, zero-terminated
+ * @param name2
+ * a converter name or alias, zero-terminated
+ * @return 0 if the names match, or a negative value if the name1 lexically
+ * precedes name2, or a positive value if the name1 lexically
+ * follows name2.
+ *
+ * @see UConverterAlias#stripForCompare
+ */
+ static int compareNames(String name1, String name2){
+ int rc, name1Index = 0, name2Index = 0;
+ char type, nextType;
+ char c1 = 0, c2 = 0;
+ boolean afterDigit1 = false, afterDigit2 = false;
+
+ for (;;) {
+ while (name1Index < name1.length()) {
+ c1 = name1.charAt(name1Index++);
+ type = GET_CHAR_TYPE(c1);
+ switch (type) {
+ case IGNORE:
+ afterDigit1 = false;
+ continue; /* ignore all but letters and digits */
+ case ZERO:
+ if (!afterDigit1 && name1Index < name1.length()) {
+ nextType = GET_CHAR_TYPE(name1.charAt(name1Index));
+ if (nextType == ZERO || nextType == NONZERO) {
+ continue; /* ignore leading zero before another digit */
+ }
+ }
+ break;
+ case NONZERO:
+ afterDigit1 = true;
+ break;
+ default:
+ c1 = type; /* lowercased letter */
+ afterDigit1 = false;
+ break;
+ }
+ break; /* deliver c1 */
+ }
+ while (name2Index < name2.length()) {
+ c2 = name2.charAt(name2Index++);
+ type = GET_CHAR_TYPE(c2);
+ switch (type) {
+ case IGNORE:
+ afterDigit2 = false;
+ continue; /* ignore all but letters and digits */
+ case ZERO:
+ if (!afterDigit2 && name1Index < name1.length()) {
+ nextType = GET_CHAR_TYPE(name2.charAt(name2Index));
+ if (nextType == ZERO || nextType == NONZERO) {
+ continue; /* ignore leading zero before another digit */
+ }
+ }
+ break;
+ case NONZERO:
+ afterDigit2 = true;
+ break;
+ default:
+ c2 = type; /* lowercased letter */
+ afterDigit2 = false;
+ break;
+ }
+ break; /* deliver c2 */
+ }
+
+ /* If we reach the ends of both strings then they match */
+ if (name1Index >= name1.length() && name2Index >= name2.length()) {
+ return 0;
+ }
+
+ /* Case-insensitive comparison */
+ rc = (int)c1 - (int)c2;
+ if (rc != 0) {
+ return rc;
+ }
+ }
+ }
+
+ static int io_countAliases(String alias)
+ throws IOException{
+ if (haveAliasData() && isAlias(alias)) {
+ boolean[] isAmbigous = new boolean[1];
+ int convNum = findConverter(alias, isAmbigous);
+ if (convNum < gConverterList.length) {
+ /* tagListNum - 1 is the ALL tag */
+ int listOffset = gTaggedAliasArray[(gTagList.length - 1)
+ * gConverterList.length + convNum];
+
+ if (listOffset != 0) {
+ return gTaggedAliasLists[listOffset];
+ }
+ /* else this shouldn't happen. internal program error */
+ }
+ /* else converter not found */
+ }
+ return 0;
+ }
+
+ /**
+ * Return the number of all aliases (and converter names).
+ *
+ * @return the number of all aliases
+ */
+ // U_CFUNC uint16_t io_countTotalAliases(UErrorCode *pErrorCode);
+// static int io_countTotalAliases() throws IOException{
+// if (haveAliasData()) {
+// return (int) gAliasList.length;
+// }
+// return 0;
+// }
+
+ // U_CFUNC const char * io_getAlias(const char *alias, uint16_t n,
+ // UErrorCode *pErrorCode)
+ static String io_getAlias(String alias, int n) throws IOException{
+ if (haveAliasData() && isAlias(alias)) {
+ boolean[] isAmbigous = new boolean[1];
+ int convNum = findConverter(alias,isAmbigous);
+ if (convNum < gConverterList.length) {
+ /* tagListNum - 1 is the ALL tag */
+ int listOffset = gTaggedAliasArray[(gTagList.length - 1)
+ * gConverterList.length + convNum];
+
+ if (listOffset != 0) {
+ //int listCount = gTaggedAliasListsArray[listOffset];
+ /* +1 to skip listCount */
+ int[] currListArray = gTaggedAliasLists;
+ int currListArrayIndex = listOffset + 1;
+
+ return GET_STRING(currListArray[currListArrayIndex + n]);
+
+ }
+ /* else this shouldn't happen. internal program error */
+ }
+ /* else converter not found */
+ }
+ return null;
+ }
+
+ // U_CFUNC uint16_t io_countStandards(UErrorCode *pErrorCode) {
+// static int io_countStandards() throws IOException{
+// if (haveAliasData()) {
+// return (int) (gTagList.length - NUM_HIDDEN_TAGS);
+// }
+// return 0;
+// }
+
+ // U_CAPI const char * U_EXPORT2getStandard(uint16_t n, UErrorCode
+ // *pErrorCode)
+// static String getStandard(int n) throws IOException{
+// if (haveAliasData()) {
+// return GET_STRING(gTagList[n]);
+// }
+// return null;
+// }
+
+ // U_CAPI const char * U_EXPORT2 getStandardName(const char *alias, const
+ // char *standard, UErrorCode *pErrorCode)
+ static final String getStandardName(String alias, String standard)throws IOException {
+ if (haveAliasData() && isAlias(alias)) {
+ int listOffset = findTaggedAliasListsOffset(alias, standard);
+
+ if (0 < listOffset && listOffset < gTaggedAliasLists.length) {
+ int[] currListArray = gTaggedAliasLists;
+ int currListArrayIndex = listOffset + 1;
+ if (currListArray[0] != 0) {
+ return GET_STRING(currListArray[currListArrayIndex]);
+ }
+ }
+ }
+ return null;
+ }
+
+ // U_CAPI uint16_t U_EXPORT2 countAliases(const char *alias, UErrorCode
+ // *pErrorCode)
+ static int countAliases(String alias) throws IOException{
+ return io_countAliases(alias);
+ }
+
+ // U_CAPI const char* U_EXPORT2 getAlias(const char *alias, uint16_t n,
+ // UErrorCode *pErrorCode)
+ static String getAlias(String alias, int n) throws IOException{
+ return io_getAlias(alias, n);
+ }
+
+ // U_CFUNC uint16_t countStandards(void)
+// static int countStandards()throws IOException{
+// return io_countStandards();
+// }
+
+ /*returns a single Name from the list, will return NULL if out of bounds
+ */
+ static String getAvailableName (int n){
+ try{
+ if (0 <= n && n <= 0xffff) {
+ String name = bld_getAvailableConverter(n);
+ return name;
+ }
+ }catch(IOException ex){
+ //throw away exception
+ }
+ return null;
+ }
+ // U_CAPI const char * U_EXPORT2 getCanonicalName(const char *alias, const
+ // char *standard, UErrorCode *pErrorCode) {
+ static String getCanonicalName(String alias, String standard) throws IOException{
+ if (haveAliasData() && isAlias(alias)) {
+ int convNum = findTaggedConverterNum(alias, standard);
+
+ if (convNum < gConverterList.length) {
+ return GET_STRING(gConverterList[convNum]);
+ }
+ }
+
+ return null;
+ }
+ static int countAvailable (){
+ try{
+ return bld_countAvailableConverters();
+ }catch(IOException ex){
+ //throw away exception
+ }
+ return -1;
+ }
+
+ // U_CAPI UEnumeration * U_EXPORT2 openStandardNames(const char *convName,
+ // const char *standard, UErrorCode *pErrorCode)
+/* static final UConverterAliasesEnumeration openStandardNames(String convName, String standard)throws IOException {
+ UConverterAliasesEnumeration aliasEnum = null;
+ if (haveAliasData() && isAlias(convName)) {
+ int listOffset = findTaggedAliasListsOffset(convName, standard);
+
+
+ * When listOffset == 0, we want to acknowledge that the converter
+ * name and standard are okay, but there is nothing to enumerate.
+
+ if (listOffset < gTaggedAliasLists.length) {
+
+ UConverterAliasesEnumeration.UAliasContext context = new UConverterAliasesEnumeration.UAliasContext(listOffset, 0);
+ aliasEnum = new UConverterAliasesEnumeration();
+ aliasEnum.setContext(context);
+ }
+ else converter or tag not found
+ }
+ return aliasEnum;
+ }*/
+
+ // static uint32_t getTagNumber(const char *tagname)
+ private static int getTagNumber(String tagName) {
+ if (gTagList != null) {
+ int tagNum;
+ for (tagNum = 0; tagNum < gTagList.length; tagNum++) {
+ if (tagName.equals(GET_STRING(gTagList[tagNum]))) {
+ return tagNum;
+ }
+ }
+ }
+
+ return Integer.MAX_VALUE;
+ }
+
+ // static uint32_t findTaggedAliasListsOffset(const char *alias, const char
+ // *standard, UErrorCode *pErrorCode)
+ private static int findTaggedAliasListsOffset(String alias, String standard) {
+ int idx;
+ int listOffset;
+ int convNum;
+ int tagNum = getTagNumber(standard);
+ boolean[] isAmbigous = new boolean[1];
+ /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
+ convNum = findConverter(alias, isAmbigous);
+
+ if (tagNum < (gTagList.length - NUM_HIDDEN_TAGS)
+ && convNum < gConverterList.length) {
+ listOffset = gTaggedAliasArray[tagNum
+ * gConverterList.length + convNum];
+ if (listOffset != 0
+ && gTaggedAliasLists[listOffset + 1] != 0) {
+ return listOffset;
+ }
+ if (isAmbigous[0]==true) {
+ /*
+ * Uh Oh! They used an ambiguous alias. We have to search the
+ * whole swiss cheese starting at the highest standard affinity.
+ * This may take a while.
+ */
+
+ for (idx = 0; idx < gTaggedAliasArray.length; idx++) {
+ listOffset = gTaggedAliasArray[idx];
+ if (listOffset != 0 && isAliasInList(alias, listOffset)) {
+ int currTagNum = idx / gConverterList.length;
+ int currConvNum = (idx - currTagNum
+ * gConverterList.length);
+ int tempListOffset = gTaggedAliasArray[tagNum
+ * gConverterList.length + currConvNum];
+ if (tempListOffset != 0
+ && gTaggedAliasLists[tempListOffset + 1] != 0) {
+ return tempListOffset;
+ }
+ /*
+ * else keep on looking We could speed this up by
+ * starting on the next row because an alias is unique
+ * per row, right now. This would change if alias
+ * versioning appears.
+ */
+ }
+ }
+ /* The standard doesn't know about the alias */
+ }
+ /* else no default name */
+ return 0;
+ }
+ /* else converter or tag not found */
+
+ return Integer.MAX_VALUE;
+ }
+
+ /* Return the canonical name */
+ // static uint32_t findTaggedConverterNum(const char *alias, const char
+ // *standard, UErrorCode *pErrorCode)
+ private static int findTaggedConverterNum(String alias, String standard) {
+ int idx;
+ int listOffset;
+ int convNum;
+ int tagNum = getTagNumber(standard);
+ boolean[] isAmbigous = new boolean[1];
+
+ /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
+ convNum = findConverter(alias, isAmbigous);
+
+ if (tagNum < (gTagList.length - NUM_HIDDEN_TAGS)
+ && convNum < gConverterList.length) {
+ listOffset = gTaggedAliasArray[tagNum
+ * gConverterList.length + convNum];
+ if (listOffset != 0 && isAliasInList(alias, listOffset)) {
+ return convNum;
+ }
+ if (isAmbigous[0] == true) {
+ /*
+ * Uh Oh! They used an ambiguous alias. We have to search one
+ * slice of the swiss cheese. We search only in the requested
+ * tag, not the whole thing. This may take a while.
+ */
+ int convStart = (tagNum) * gConverterList.length;
+ int convLimit = (tagNum + 1) * gConverterList.length;
+ for (idx = convStart; idx < convLimit; idx++) {
+ listOffset = gTaggedAliasArray[idx];
+ if (listOffset != 0 && isAliasInList(alias, listOffset)) {
+ return idx - convStart;
+ }
+ }
+ /* The standard doesn't know about the alias */
+ }
+ /* else no canonical name */
+ }
+ /* else converter or tag not found */
+
+ return Integer.MAX_VALUE;
+ }
+
+ // static U_INLINE UBool isAliasInList(const char *alias, uint32_t
+ // listOffset)
+ private static boolean isAliasInList(String alias, int listOffset) {
+ if (listOffset != 0) {
+ int currAlias;
+ int listCount = gTaggedAliasLists[listOffset];
+ /* +1 to skip listCount */
+ int[] currList = gTaggedAliasLists;
+ int currListArrayIndex = listOffset + 1;
+ for (currAlias = 0; currAlias < listCount; currAlias++) {
+ if (currList[currAlias + currListArrayIndex] != 0
+ && compareNames(
+ alias,
+ GET_STRING(currList[currAlias + currListArrayIndex])) == 0) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ // begin bld.c
+ static String[] gAvailableConverters = null;
+
+ static int gAvailableConverterCount = 0;
+
+ static byte[] gDefaultConverterNameBuffer; // [MAX_CONVERTER_NAME_LENGTH +
+ // 1]; /* +1 for NULL */
+
+ static String gDefaultConverterName = null;
+
+ // static UBool haveAvailableConverterList(UErrorCode *pErrorCode)
+ static boolean haveAvailableConverterList() throws IOException{
+ if (gAvailableConverters == null) {
+ int idx;
+ int localConverterCount;
+ String converterName;
+ String[] localConverterList;
+
+ if (!haveAliasData()) {
+ return false;
+ }
+
+ /* We can't have more than "*converterTable" converters to open */
+ localConverterList = new String[gConverterList.length];
+
+ localConverterCount = 0;
+
+ for (idx = 0; idx < gConverterList.length; idx++) {
+ converterName = GET_STRING(gConverterList[idx]);
+ //UConverter cnv = UConverter.open(converterName);
+ //TODO: Fix me
+ localConverterList[localConverterCount++] = converterName;
+
+ }
+
+ // agljport:todo umtx_lock(NULL);
+ if (gAvailableConverters == null) {
+ gAvailableConverters = localConverterList;
+ gAvailableConverterCount = localConverterCount;
+ /* haveData should have already registered the cleanup function */
+ } else {
+ // agljport:todo free((char **)localConverterList);
+ }
+ // agljport:todo umtx_unlock(NULL);
+ }
+ return true;
+ }
+
+ // U_CFUNC uint16_t bld_countAvailableConverters(UErrorCode *pErrorCode)
+ static int bld_countAvailableConverters() throws IOException{
+ if (haveAvailableConverterList()) {
+ return gAvailableConverterCount;
+ }
+ return 0;
+ }
+
+ // U_CFUNC const char * bld_getAvailableConverter(uint16_t n, UErrorCode
+ // *pErrorCode)
+ static String bld_getAvailableConverter(int n) throws IOException{
+ if (haveAvailableConverterList()) {
+ if (n < gAvailableConverterCount) {
+ return gAvailableConverters[n];
+ }
+ }
+ return null;
+ }
+
+ /* default converter name --------------------------------------------------- */
+
+ /*
+ * In order to be really thread-safe, the get function would have to take
+ * a buffer parameter and copy the current string inside a mutex block.
+ * This implementation only tries to be really thread-safe while
+ * setting the name.
+ * It assumes that setting a pointer is atomic.
+ */
+
+ // U_CFUNC const char * getDefaultName()
+// static final synchronized String getDefaultName() {
+// /* local variable to be thread-safe */
+// String name;
+//
+// //agljport:todo umtx_lock(null);
+// name = gDefaultConverterName;
+// //agljport:todo umtx_unlock(null);
+//
+// if (name == null) {
+// //UConverter cnv = null;
+// int length = 0;
+//
+// name = CharsetICU.getDefaultCharsetName();
+//
+// /* if the name is there, test it out and get the canonical name with options */
+// if (name != null) {
+// // cnv = UConverter.open(name);
+// // name = cnv.getName(cnv);
+// // TODO: fix me
+// }
+//
+// if (name == null || name.length() == 0 ||/* cnv == null ||*/
+// length >= gDefaultConverterNameBuffer.length) {
+// /* Panic time, let's use a fallback. */
+// name = new String("US-ASCII");
+// }
+//
+// //length=(int32_t)(strlen(name));
+//
+// /* Copy the name before we close the converter. */
+// name = gDefaultConverterName;
+// }
+//
+// return name;
+// }
+
+ //end bld.c
+}
\ No newline at end of file
diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java
new file mode 100644
index 00000000000..c4134b8c6ad
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java
@@ -0,0 +1,221 @@
+/*
+*******************************************************************************
+* Copyright (C) 2006-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import com.ibm.icu.impl.ICUBinary;
+
+
+/* Format of cnvalias.icu -----------------------------------------------------
+ *
+ * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
+ * This binary form contains several tables. All indexes are to uint16_t
+ * units, and not to the bytes (uint8_t units). Addressing everything on
+ * 16-bit boundaries allows us to store more information with small index
+ * numbers, which are also 16-bit in size. The majority of the table (except
+ * the string table) are 16-bit numbers.
+ *
+ * First there is the size of the Table of Contents (TOC). The TOC
+ * entries contain the size of each section. In order to find the offset
+ * you just need to sum up the previous offsets.
+ * The TOC length and entries are an array of uint32_t values.
+ * The first section after the TOC starts immediately after the TOC.
+ *
+ * 1) This section contains a list of converters. This list contains indexes
+ * into the string table for the converter name. The index of this list is
+ * also used by other sections, which are mentioned later on.
+ * This list is not sorted.
+ *
+ * 2) This section contains a list of tags. This list contains indexes
+ * into the string table for the tag name. The index of this list is
+ * also used by other sections, which are mentioned later on.
+ * This list is in priority order of standards.
+ *
+ * 3) This section contains a list of sorted unique aliases. This
+ * list contains indexes into the string table for the alias name. The
+ * index of this list is also used by other sections, like the 4th section.
+ * The index for the 3rd and 4th section is used to get the
+ * alias -> converter name mapping. Section 3 and 4 form a two column table.
+ *
+ * 4) This section contains a list of mapped converter names. Consider this
+ * as a table that maps the 3rd section to the 1st section. This list contains
+ * indexes into the 1st section. The index of this list is the same index in
+ * the 3rd section. There is also some extra information in the high bits of
+ * each converter index in this table. Currently it's only used to say that
+ * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
+ * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
+ * the predigested form of the 5th section so that an alias lookup can be fast.
+ *
+ * 5) This section contains a 2D array with indexes to the 6th section. This
+ * section is the full form of all alias mappings. The column index is the
+ * index into the converter list (column header). The row index is the index
+ * to tag list (row header). This 2D array is the top part a 3D array. The
+ * third dimension is in the 6th section.
+ *
+ * 6) This is blob of variable length arrays. Each array starts with a size,
+ * and is followed by indexes to alias names in the string table. This is
+ * the third dimension to the section 5. No other section should be referencing
+ * this section.
+ *
+ * 7) Reserved at this time (There is no information). This _usually_ has a
+ * size of 0. Future versions may add more information here.
+ *
+ * 8) This is the string table. All strings are indexed on an even address.
+ * There are two reasons for this. First many chip architectures locate strings
+ * faster on even address boundaries. Second, since all indexes are 16-bit
+ * numbers, this string table can be 128KB in size instead of 64KB when we
+ * only have strings starting on an even address.
+ *
+ *
+ * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
+ * has a unique alias among all converters. That same alias can
+ * be mentioned in other standards on different converters,
+ * but only one alias per tag can be unique.
+ *
+ *
+ * Converter Names (Usually in TR22 form)
+ * -------------------------------------------.
+ * T / /|
+ * a / / |
+ * g / / |
+ * s / / |
+ * / / |
+ * ------------------------------------------/ |
+ * A | | |
+ * l | | |
+ * i | | /
+ * a | | /
+ * s | | /
+ * e | | /
+ * s | |/
+ * -------------------------------------------
+ *
+ *
+ *
+ * Here is what it really looks like. It's like swiss cheese.
+ * There are holes. Some converters aren't recognized by
+ * a standard, or they are really old converters that the
+ * standard doesn't recognize anymore.
+ *
+ * Converter Names (Usually in TR22 form)
+ * -------------------------------------------.
+ * T /##########################################/|
+ * a / # # /#
+ * g / # ## ## ### # ### ### ### #/
+ * s / # ##### #### ## ## #/#
+ * / ### # # ## # # # ### # # #/##
+ * ------------------------------------------/# #
+ * A |### # # ## # # # ### # # #|# #
+ * l |# # # # # ## # #|# #
+ * i |# # # # # # #|#
+ * a |# #|#
+ * s | #|#
+ * e
+ * s
+ *
+ */
+
+final class UConverterAliasDataReader implements ICUBinary.Authenticate {
+// private final static boolean debug = ICUDebug.enabled("UConverterAliasDataReader");
+
+ /**
+ * Protected constructor.
+ * @param inputStream ICU uprop.dat file input stream
+ * @exception IOException throw if data file fails authentication
+ */
+ protected UConverterAliasDataReader(InputStream inputStream)
+ throws IOException{
+ //if(debug) System.out.println("Bytes in inputStream " + inputStream.available());
+
+ /*unicodeVersion = */ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this);
+
+ //if(debug) System.out.println("Bytes left in inputStream " +inputStream.available());
+
+ dataInputStream = new DataInputStream(inputStream);
+
+ //if(debug) System.out.println("Bytes left in dataInputStream " +dataInputStream.available());
+ }
+
+ // protected methods -------------------------------------------------
+
+ protected int[] readToc(int n)throws IOException
+ {
+ int[] toc = new int[n];
+ //Read the toc
+ for (int i = 0; i < n ; ++i) {
+ toc[i] = dataInputStream.readInt() & UNSIGNED_INT_MASK;
+ }
+ return toc;
+ }
+
+ protected void read(int[] convList, int[] tagList, int[] aliasList, int[]untaggedConvArray, int[] taggedAliasArray, int[] taggedAliasLists, int[] optionTable, byte[] stringTable, byte[] normalizedStringTable) throws IOException{
+ int i;
+ //int listnum = 1;
+ //long listsize;
+
+ for(i = 0; i < convList.length; ++i)
+ convList[i] = dataInputStream.readUnsignedShort();
+
+ for(i = 0; i < tagList.length; ++i)
+ tagList[i] = dataInputStream.readUnsignedShort();
+
+ for(i = 0; i < aliasList.length; ++i)
+ aliasList[i] = dataInputStream.readUnsignedShort();
+
+ for(i = 0; i < untaggedConvArray.length; ++i)
+ untaggedConvArray[i] = dataInputStream.readUnsignedShort();
+
+ for(i = 0; i < taggedAliasArray.length; ++i)
+ taggedAliasArray[i] = dataInputStream.readUnsignedShort();
+
+ for(i = 0; i < taggedAliasLists.length; ++i)
+ taggedAliasLists[i] = dataInputStream.readUnsignedShort();
+
+ for(i = 0; i < optionTable.length; ++i)
+ optionTable[i] = dataInputStream.readUnsignedShort();
+
+ dataInputStream.readFully(stringTable);
+ dataInputStream.readFully(normalizedStringTable);
+ }
+
+ public boolean isDataVersionAcceptable(byte version[])
+ {
+ return version.length >= DATA_FORMAT_VERSION.length
+ && version[0] == DATA_FORMAT_VERSION[0]
+ && version[1] == DATA_FORMAT_VERSION[1]
+ && version[2] == DATA_FORMAT_VERSION[2];
+ }
+
+ /*byte[] getUnicodeVersion(){
+ return unicodeVersion;
+ }*/
+ // private data members -------------------------------------------------
+
+
+ /**
+ * ICU data file input stream
+ */
+ private DataInputStream dataInputStream;
+
+// private byte[] unicodeVersion;
+
+ /**
+ * File format version that this class understands.
+ * No guarantees are made if a older version is used
+ * see store.c of gennorm for more information and values
+ */
+ // DATA_FORMAT_ID_ values taken from icu4c isAcceptable (ucnv_io.c)
+ private static final byte DATA_FORMAT_ID[] = {(byte)0x43, (byte)0x76, (byte)0x41, (byte)0x6c}; // dataFormat="CvAl"
+ private static final byte DATA_FORMAT_VERSION[] = {3, 0, 1};
+
+ //private static final int UNSIGNED_SHORT_MASK = 0xffff;
+ private static final int UNSIGNED_INT_MASK = 0xffffffff;
+
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterConstants.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterConstants.java
new file mode 100644
index 00000000000..2fdf5070e47
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterConstants.java
@@ -0,0 +1,169 @@
+/*
+*******************************************************************************
+* Copyright (C) 2006-2008, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+package com.ibm.icu.charset;
+
+interface UConverterConstants {
+
+ static final short UNSIGNED_BYTE_MASK = 0xff;
+ static final int UNSIGNED_SHORT_MASK = 0xffff;
+ static final long UNSIGNED_INT_MASK = 0xffffffffL;
+
+ static final int U_IS_BIG_ENDIAN = 0;
+
+ /**
+ * Useful constant for the maximum size of the whole locale ID
+ * (including the terminating NULL).
+ */
+ static final int ULOC_FULLNAME_CAPACITY = 56;
+
+ /**
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ *
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with U_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ */
+ static final int U_SENTINEL = -1;
+
+ //end utf.h
+
+ //begin ucnv.h
+ /**
+ * Character that separates converter names from options and options from each other.
+ * @see CharsetICU#forNameICU(String)
+ */
+ static final byte OPTION_SEP_CHAR = ',';
+
+ /** Maximum length of a converter name including the terminating NULL */
+ static final int MAX_CONVERTER_NAME_LENGTH = 60;
+ /** Maximum length of a converter name including path and terminating NULL */
+ static final int MAX_FULL_FILE_NAME_LENGTH = (600+MAX_CONVERTER_NAME_LENGTH);
+
+ /** Shift in for EBDCDIC_STATEFUL and iso2022 states */
+ static final int SI = 0x0F;
+ /** Shift out for EBDCDIC_STATEFUL and iso2022 states */
+ static final int SO = 0x0E;
+
+ //end ucnv.h
+
+ // begin bld.h
+ /* size of the overflow buffers in UConverter, enough for escaping callbacks */
+ //#define ERROR_BUFFER_LENGTH 32
+ static final int ERROR_BUFFER_LENGTH = 32;
+
+ /* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */
+ static final int MAX_SUBCHAR_LEN = 4;
+
+ /* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */
+ static final int MAX_CHAR_LEN = 8;
+
+ /* converter options bits */
+ static final int OPTION_VERSION = 0xf;
+ static final int OPTION_SWAP_LFNL = 0x10;
+ static final int OPTION_MAC = 0x20; //agljport:comment added for Mac ISCII encodings
+
+ static final String OPTION_SWAP_LFNL_STRING = ",swaplfnl";
+
+ /** values for the unicodeMask */
+ static final int HAS_SUPPLEMENTARY = 1;
+ static final int HAS_SURROGATES = 2;
+ // end bld.h
+
+ // begin cnv.h
+ /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
+ static final int missingCharMarker = 0xFFFF;
+ /**
+ *
+ * @author ram
+ */
+ static interface UConverterResetChoice {
+ static final int RESET_BOTH = 0;
+ static final int RESET_TO_UNICODE = RESET_BOTH + 1;
+ static final int RESET_FROM_UNICODE = RESET_TO_UNICODE + 1;
+ }
+
+ // begin utf16.h
+ /**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ */
+ static final int U16_MAX_LENGTH = 2;
+ // end utf16.h
+
+ // begin err.h
+ /**
+ * FROM_U, TO_U context options for sub callback
+ */
+ static byte[] SUB_STOP_ON_ILLEGAL = {'i'};
+
+ /**
+ * FROM_U, TO_U context options for skip callback
+ */
+ static byte[] SKIP_STOP_ON_ILLEGAL = {'i'};
+
+ /**
+ * The process condition code to be used with the callbacks.
+ * Codes which are greater than IRREGULAR should be
+ * passed on to any chained callbacks.
+ */
+ static interface UConverterCallbackReason {
+ static final int UNASSIGNED = 0; /**< The code point is unassigned.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ static final int ILLEGAL = 1; /**< The code point is illegal. For example,
+ \\x81\\x2E is illegal in SJIS because \\x2E
+ is not a valid trail byte for the \\x81
+ lead byte.
+ Also, starting with Unicode 3.0.1, non-shortest byte sequences
+ in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
+ are also illegal, not just irregular.
+ The error code U_ILLEGAL_CHAR_FOUND will be set. */
+ static final int IRREGULAR = 2; /**< The codepoint is not a regular sequence in
+ the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
+ are irregular UTF-8 byte sequences for single surrogate
+ code points.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ static final int RESET = 3; /**< The callback is called with this reason when a
+ 'reset' has occured. Callback should reset all
+ state. */
+ static final int CLOSE = 4; /**< Called when the converter is closed. The
+ callback should release any allocated memory.*/
+ static final int CLONE = 5; /**< Called when safeClone() is called on the
+ converter. the pointer available as the
+ 'context' is an alias to the original converters'
+ context pointer. If the context must be owned
+ by the new converter, the callback must clone
+ the data and call setFromUCallback
+ (or setToUCallback) with the correct pointer.
+ */
+ }
+ //end err.h
+
+
+ static final String DATA_TYPE = "cnv";
+ static final int CNV_DATA_BUFFER_SIZE = 25000;
+ static final int SIZE_OF_UCONVERTER_SHARED_DATA = 100;
+
+ static final int MAXIMUM_UCS2 = 0x0000FFFF;
+ static final int MAXIMUM_UTF = 0x0010FFFF;
+ //static final int MAXIMUM_UCS4 = 0x7FFFFFFF;
+ static final int HALF_SHIFT = 10;
+ static final int HALF_BASE = 0x0010000;
+ static final int HALF_MASK = 0x3FF;
+ static final int SURROGATE_HIGH_START = 0xD800;
+ static final int SURROGATE_HIGH_END = 0xDBFF;
+ static final int SURROGATE_LOW_START = 0xDC00;
+ static final int SURROGATE_LOW_END = 0xDFFF;
+
+ /* -SURROGATE_LOW_START + HALF_BASE */
+ static final int SURROGATE_LOW_BASE = 9216;
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java
new file mode 100644
index 00000000000..e37671a9d4f
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java
@@ -0,0 +1,612 @@
+/*
+*******************************************************************************
+* Copyright (C) 2006-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import com.ibm.icu.impl.ICUBinary;
+
+/**
+ * ucnvmbcs.h
+ *
+ * ICU conversion (.cnv) data file structure, following the usual UDataInfo
+ * header.
+ *
+ * Format version: 6.2
+ *
+ * struct UConverterStaticData -- struct containing the converter name, IBM CCSID,
+ * min/max bytes per character, etc.
+ * see ucnv_bld.h
+ *
+ * --------------------
+ *
+ * The static data is followed by conversionType-specific data structures.
+ * At the moment, there are only variations of MBCS converters. They all have
+ * the same toUnicode structures, while the fromUnicode structures for SBCS
+ * differ from those for other MBCS-style converters.
+ *
+ * _MBCSHeader.version 4.2 adds an optional conversion extension data structure.
+ * If it is present, then an ICU version reading header versions 4.0 or 4.1
+ * will be able to use the base table and ignore the extension.
+ *
+ * The unicodeMask in the static data is part of the base table data structure.
+ * Especially, the UCNV_HAS_SUPPLEMENTARY flag determines the length of the
+ * fromUnicode stage 1 array.
+ * The static data unicodeMask refers only to the base table's properties if
+ * a base table is included.
+ * In an extension-only file, the static data unicodeMask is 0.
+ * The extension data indexes have a separate field with the unicodeMask flags.
+ *
+ * MBCS-style data structure following the static data.
+ * Offsets are counted in bytes from the beginning of the MBCS header structure.
+ * Details about usage in comments in ucnvmbcs.c.
+ *
+ * struct _MBCSHeader (see the definition in this header file below)
+ * contains 32-bit fields as follows:
+ * 8 values:
+ * 0 uint8_t[4] MBCS version in UVersionInfo format (currently 4.2.0.0)
+ * 1 uint32_t countStates
+ * 2 uint32_t countToUFallbacks
+ * 3 uint32_t offsetToUCodeUnits
+ * 4 uint32_t offsetFromUTable
+ * 5 uint32_t offsetFromUBytes
+ * 6 uint32_t flags, bits:
+ * 31.. 8 offsetExtension -- _MBCSHeader.version 4.2 (ICU 2.8) and higher
+ * 0 for older versions and if
+ * there is not extension structure
+ * 7.. 0 outputType
+ * 7 uint32_t fromUBytesLength -- _MBCSHeader.version 4.1 (ICU 2.4) and higher
+ * counts bytes in fromUBytes[]
+ *
+ * if(outputType==MBCS_OUTPUT_EXT_ONLY) {
+ * -- base table name for extension-only table
+ * char baseTableName[variable]; -- with NUL plus padding for 4-alignment
+ *
+ * -- all _MBCSHeader fields except for version and flags are 0
+ * } else {
+ * -- normal base table with optional extension
+ *
+ * int32_t stateTable[countStates][256];
+ *
+ * struct _MBCSToUFallback { (fallbacks are sorted by offset)
+ * uint32_t offset;
+ * UChar32 codePoint;
+ * } toUFallbacks[countToUFallbacks];
+ *
+ * uint16_t unicodeCodeUnits[(offsetFromUTable-offsetToUCodeUnits)/2];
+ * (padded to an even number of units)
+ *
+ * -- stage 1 tables
+ * if(staticData.unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
+ * -- stage 1 table for all of Unicode
+ * uint16_t fromUTable[0x440]; (32-bit-aligned)
+ * } else {
+ * -- BMP-only tables have a smaller stage 1 table
+ * uint16_t fromUTable[0x40]; (32-bit-aligned)
+ * }
+ *
+ * -- stage 2 tables
+ * length determined by top of stage 1 and bottom of stage 3 tables
+ * if(outputType==MBCS_OUTPUT_1) {
+ * -- SBCS: pure indexes
+ * uint16_t stage 2 indexes[?];
+ * } else {
+ * -- DBCS, MBCS, EBCDIC_STATEFUL, ...: roundtrip flags and indexes
+ * uint32_t stage 2 flags and indexes[?];
+ * }
+ *
+ * -- stage 3 tables with byte results
+ * if(outputType==MBCS_OUTPUT_1) {
+ * -- SBCS: each 16-bit result contains flags and the result byte, see ucnvmbcs.c
+ * uint16_t fromUBytes[fromUBytesLength/2];
+ * } else {
+ * -- DBCS, MBCS, EBCDIC_STATEFUL, ... 2/3/4 bytes result, see ucnvmbcs.c
+ * uint8_t fromUBytes[fromUBytesLength]; or
+ * uint16_t fromUBytes[fromUBytesLength/2]; or
+ * uint32_t fromUBytes[fromUBytesLength/4];
+ * }
+ * }
+ *
+ * -- extension table, details see ucnv_ext.h
+ * int32_t indexes[>=32]; ...
+ */
+/*
+ * ucnv_ext.h
+ *
+ * See icuhtml/design/conversion/conversion_extensions.html
+ *
+ * Conversion extensions serve two purposes:
+ * 1. They support m:n mappings.
+ * 2. They support extension-only conversion files that are used together
+ * with the regular conversion data in base files.
+ *
+ * A base file may contain an extension table (explicitly requested or
+ * implicitly generated for m:n mappings), but its extension table is not
+ * used when an extension-only file is used.
+ *
+ * It is an error if a base file contains any regular (not extension) mapping
+ * from the same sequence as a mapping in the extension file
+ * because the base mapping would hide the extension mapping.
+ *
+ *
+ * Data for conversion extensions:
+ *
+ * One set of data structures per conversion direction (to/from Unicode).
+ * The data structures are sorted by input units to allow for binary search.
+ * Input sequences of more than one unit are handled like contraction tables
+ * in collation:
+ * The lookup value of a unit points to another table that is to be searched
+ * for the next unit, recursively.
+ *
+ * For conversion from Unicode, the initial code point is looked up in
+ * a 3-stage trie for speed,
+ * with an additional table of unique results to save space.
+ *
+ * Long output strings are stored in separate arrays, with length and index
+ * in the lookup tables.
+ * Output results also include a flag distinguishing roundtrip from
+ * (reverse) fallback mappings.
+ *
+ * Input Unicode strings must not begin or end with unpaired surrogates
+ * to avoid problems with matches on parts of surrogate pairs.
+ *
+ * Mappings from multiple characters (code points or codepage state
+ * table sequences) must be searched preferring the longest match.
+ * For this to work and be efficient, the variable-width table must contain
+ * all mappings that contain prefixes of the multiple characters.
+ * If an extension table is built on top of a base table in another file
+ * and a base table entry is a prefix of a multi-character mapping, then
+ * this is an error.
+ *
+ *
+ * Implementation note:
+ *
+ * Currently, the parser and several checks in the code limit the number
+ * of UChars or bytes in a mapping to
+ * UCNV_EXT_MAX_UCHARS and UCNV_EXT_MAX_BYTES, respectively,
+ * which are output value limits in the data structure.
+ *
+ * For input, this is not strictly necessary - it is a hard limit only for the
+ * buffers in UConverter that are used to store partial matches.
+ *
+ * Input sequences could otherwise be arbitrarily long if partial matches
+ * need not be stored (i.e., if a sequence does not span several buffers with too
+ * many units before the last buffer), although then results would differ
+ * depending on whether partial matches exceed the limits or not,
+ * which depends on the pattern of buffer sizes.
+ *
+ *
+ * Data structure:
+ *
+ * int32_t indexes[>=32];
+ *
+ * Array of indexes and lengths etc. The length of the array is at least 32.
+ * The actual length is stored in indexes[0] to be forward compatible.
+ *
+ * Each index to another array is the number of bytes from indexes[].
+ * Each length of an array is the number of array base units in that array.
+ *
+ * Some of the structures may not be present, in which case their indexes
+ * and lengths are 0.
+ *
+ * Usage of indexes[i]:
+ * [0] length of indexes[]
+ *
+ * // to Unicode table
+ * [1] index of toUTable[] (array of uint32_t)
+ * [2] length of toUTable[]
+ * [3] index of toUUChars[] (array of UChar)
+ * [4] length of toUUChars[]
+ *
+ * // from Unicode table, not for the initial code point
+ * [5] index of fromUTableUChars[] (array of UChar)
+ * [6] index of fromUTableValues[] (array of uint32_t)
+ * [7] length of fromUTableUChars[] and fromUTableValues[]
+ * [8] index of fromUBytes[] (array of char)
+ * [9] length of fromUBytes[]
+ *
+ * // from Unicode trie for initial-code point lookup
+ * [10] index of fromUStage12[] (combined array of uint16_t for stages 1 & 2)
+ * [11] length of stage 1 portion of fromUStage12[]
+ * [12] length of fromUStage12[]
+ * [13] index of fromUStage3[] (array of uint16_t indexes into fromUStage3b[])
+ * [14] length of fromUStage3[]
+ * [15] index of fromUStage3b[] (array of uint32_t like fromUTableValues[])
+ * [16] length of fromUStage3b[]
+ *
+ * [17] Bit field containing numbers of bytes:
+ * 31..24 reserved, 0
+ * 23..16 maximum input bytes
+ * 15.. 8 maximum output bytes
+ * 7.. 0 maximum bytes per UChar
+ *
+ * [18] Bit field containing numbers of UChars:
+ * 31..24 reserved, 0
+ * 23..16 maximum input UChars
+ * 15.. 8 maximum output UChars
+ * 7.. 0 maximum UChars per byte
+ *
+ * [19] Bit field containing flags:
+ * (extension table unicodeMask)
+ * 1 UCNV_HAS_SURROGATES flag for the extension table
+ * 0 UCNV_HAS_SUPPLEMENTARY flag for the extension table
+ *
+ * [20]..[30] reserved, 0
+ * [31] number of bytes for the entire extension structure
+ * [>31] reserved; there are indexes[0] indexes
+ *
+ *
+ * uint32_t toUTable[];
+ *
+ * Array of byte/value pairs for lookups for toUnicode conversion.
+ * The array is partitioned into sections like collation contraction tables.
+ * Each section contains one word with the number of following words and
+ * a default value for when the lookup in this section yields no match.
+ *
+ * A section is sorted in ascending order of input bytes,
+ * allowing for fast linear or binary searches.
+ * The builder may store entries for a contiguous range of byte values
+ * (compare difference between the first and last one with count),
+ * which then allows for direct array access.
+ * The builder should always do this for the initial table section.
+ *
+ * Entries may have 0 values, see below.
+ * No two entries in a section have the same byte values.
+ *
+ * Each uint32_t contains an input byte value in bits 31..24 and the
+ * corresponding lookup value in bits 23..0.
+ * Interpret the value as follows:
+ * if(value==0) {
+ * no match, see below
+ * } else if(value<0x1f0000) {
+ * partial match - use value as index to the next toUTable section
+ * and match the next unit; (value indexes toUTable[value])
+ * } else {
+ * if(bit 23 set) {
+ * roundtrip;
+ * } else {
+ * fallback;
+ * }
+ * unset value bit 23;
+ * if(value<=0x2fffff) {
+ * (value-0x1f0000) is a code point; (BMP: value<=0x1fffff)
+ * } else {
+ * bits 17..0 (value&0x3ffff) is an index to
+ * the result UChars in toUUChars[]; (0 indexes toUUChars[0])
+ * length of the result=((value>>18)-12); (length=0..19)
+ * }
+ * }
+ *
+ * The first word in a section contains the number of following words in the
+ * input byte position (bits 31..24, number=1..0xff).
+ * The value of the initial word is used when the current byte is not found
+ * in this section.
+ * If the value is not 0, then it represents a result as above.
+ * If the value is 0, then the search has to return a shorter match with an
+ * earlier default value as the result, or result in "unmappable" even for the
+ * initial bytes.
+ * If the value is 0 for the initial toUTable entry, then the initial byte
+ * does not start any mapping input.
+ *
+ *
+ * UChar toUUChars[];
+ *
+ * Contains toUnicode mapping results, stored as sequences of UChars.
+ * Indexes and lengths stored in the toUTable[].
+ *
+ *
+ * UChar fromUTableUChars[];
+ * uint32_t fromUTableValues[];
+ *
+ * The fromUTable is split into two arrays, but works otherwise much like
+ * the toUTable. The array is partitioned into sections like collation
+ * contraction tables and toUTable.
+ * A row in the table consists of same-index entries in fromUTableUChars[]
+ * and fromUTableValues[].
+ *
+ * Interpret a value as follows:
+ * if(value==0) {
+ * no match, see below
+ * } else if(value<=0xffffff) { (bits 31..24 are 0)
+ * partial match - use value as index to the next fromUTable section
+ * and match the next unit; (value indexes fromUTable[value])
+ * } else {
+ * if(value==0x80000001) {
+ * return no mapping, but request for ;
+ * }
+ * if(bit 31 set) {
+ * roundtrip;
+ * } else {
+ * fallback;
+ * }
+ * // bits 30..29 reserved, 0
+ * length=(value>>24)&0x1f; (bits 28..24)
+ * if(length==1..3) {
+ * bits 23..0 contain 1..3 bytes, padded with 00s on the left;
+ * } else {
+ * bits 23..0 (value&0xffffff) is an index to
+ * the result bytes in fromUBytes[]; (0 indexes fromUBytes[0])
+ * }
+ * }
+ *
+ * The first pair in a section contains the number of following pairs in the
+ * UChar position (16 bits, number=1..0xffff).
+ * The value of the initial pair is used when the current UChar is not found
+ * in this section.
+ * If the value is not 0, then it represents a result as above.
+ * If the value is 0, then the search has to return a shorter match with an
+ * earlier default value as the result, or result in "unmappable" even for the
+ * initial UChars.
+ *
+ * If the from Unicode trie is present, then the from Unicode search tables
+ * are not used for initial code points.
+ * In this case, the first entries (index 0) in the tables are not used
+ * (reserved, set to 0) because a value of 0 is used in trie results
+ * to indicate no mapping.
+ *
+ *
+ * uint16_t fromUStage12[];
+ *
+ * Stages 1 & 2 of a trie that maps an initial code point.
+ * Indexes in stage 1 are all offset by the length of stage 1 so that the
+ * same array pointer can be used for both stages.
+ * If (c>>10)>=(length of stage 1) then c does not start any mapping.
+ * Same bit distribution as for regular conversion tries.
+ *
+ *
+ * uint16_t fromUStage3[];
+ * uint32_t fromUStage3b[];
+ *
+ * Stage 3 of the trie. The first array simply contains indexes to the second,
+ * which contains words in the same format as fromUTableValues[].
+ * Use a stage 3 granularity of 4, which allows for 256k stage 3 entries,
+ * and 16-bit entries in stage 3 allow for 64k stage 3b entries.
+ * The stage 3 granularity means that the stage 2 entry needs to be left-shifted.
+ *
+ * Two arrays are used because it is expected that more than half of the stage 3
+ * entries will be zero. The 16-bit index stage 3 array saves space even
+ * considering storing a total of 6 bytes per non-zero entry in both arrays
+ * together.
+ * Using a stage 3 granularity of >1 diminishes the compactability in that stage
+ * but provides a larger effective addressing space in stage 2.
+ * All but the final result stage use 16-bit entries to save space.
+ *
+ * fromUStage3b[] contains a zero for "no mapping" at its index 0,
+ * and may contain UCNV_EXT_FROM_U_SUBCHAR1 at index 1 for " SUB mapping"
+ * (i.e., "no mapping" with preference for rather than ),
+ * and all other items are unique non-zero results.
+ *
+ * The default value of a fromUTableValues[] section that is referenced
+ * _directly_ from a fromUStage3b[] item may also be UCNV_EXT_FROM_U_SUBCHAR1,
+ * but this value must not occur anywhere else in fromUTableValues[]
+ * because "no mapping" is always a property of a single code point,
+ * never of multiple.
+ *
+ *
+ * char fromUBytes[];
+ *
+ * Contains fromUnicode mapping results, stored as sequences of chars.
+ * Indexes and lengths stored in the fromUTableValues[].
+ */
+
+final class UConverterDataReader implements ICUBinary.Authenticate {
+ //private final static boolean debug = ICUDebug.enabled("UConverterDataReader");
+
+ /*
+ * UConverterDataReader(UConverterDataReader r)
+ {
+ dataInputStream = new DataInputStream(r.dataInputStream);
+ unicodeVersion = r.unicodeVersion;
+ }
+ */
+ /* the number bytes read from the stream */
+ int bytesRead = 0;
+ /* the number of bytes read for static data */
+ int staticDataBytesRead = 0;
+ /**
+ * Protected constructor.
+ * @param inputStream ICU uprop.dat file input stream
+ * @exception IOException throw if data file fails authentication
+ */
+ protected UConverterDataReader(InputStream inputStream)
+ throws IOException{
+ //if(debug) System.out.println("Bytes in inputStream " + inputStream.available());
+
+ /*unicodeVersion = */ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this);
+
+ //if(debug) System.out.println("Bytes left in inputStream " +inputStream.available());
+
+ dataInputStream = new DataInputStream(inputStream);
+
+ //if(debug) System.out.println("Bytes left in dataInputStream " +dataInputStream.available());
+ }
+
+ // protected methods -------------------------------------------------
+
+ protected void readStaticData(UConverterStaticData sd) throws IOException
+ {
+ int bRead = 0;
+ sd.structSize = dataInputStream.readInt();
+ bRead +=4;
+ byte[] name = new byte[UConverterConstants.MAX_CONVERTER_NAME_LENGTH];
+ dataInputStream.readFully(name);
+ bRead +=name.length;
+ sd.name = new String(name, 0, name.length);
+ sd.codepage = dataInputStream.readInt();
+ bRead +=4;
+ sd.platform = dataInputStream.readByte();
+ bRead++;
+ sd.conversionType = dataInputStream.readByte();
+ bRead++;
+ sd.minBytesPerChar = dataInputStream.readByte();
+ bRead++;
+ sd.maxBytesPerChar = dataInputStream.readByte();
+ bRead++;
+ dataInputStream.readFully(sd.subChar);
+ bRead += sd.subChar.length;
+ sd.subCharLen = dataInputStream.readByte();
+ bRead++;
+ sd.hasToUnicodeFallback = dataInputStream.readByte();
+ bRead++;
+ sd.hasFromUnicodeFallback = dataInputStream.readByte();
+ bRead++;
+ sd.unicodeMask = (short)dataInputStream.readUnsignedByte();
+ bRead++;
+ sd.subChar1 = dataInputStream.readByte();
+ bRead++;
+ dataInputStream.readFully(sd.reserved);
+ bRead += sd.reserved.length;
+ staticDataBytesRead = bRead;
+ bytesRead += bRead;
+ }
+
+ protected void readMBCSHeader(CharsetMBCS.MBCSHeader h) throws IOException
+ {
+ dataInputStream.readFully(h.version);
+ bytesRead += h.version.length;
+ h.countStates = dataInputStream.readInt();
+ bytesRead+=4;
+ h.countToUFallbacks = dataInputStream.readInt();
+ bytesRead+=4;
+ h.offsetToUCodeUnits = dataInputStream.readInt();
+ bytesRead+=4;
+ h.offsetFromUTable = dataInputStream.readInt();
+ bytesRead+=4;
+ h.offsetFromUBytes = dataInputStream.readInt();
+ bytesRead+=4;
+ h.flags = dataInputStream.readInt();
+ bytesRead+=4;
+ h.fromUBytesLength = dataInputStream.readInt();
+ bytesRead+=4;
+ if (h.version[0] == 5 && h.version[1] >= 3) {
+ h.options = dataInputStream.readInt();
+ bytesRead+=4;
+ if ((h.options & CharsetMBCS.MBCS_OPT_NO_FROM_U) != 0) {
+ h.fullStage2Length = dataInputStream.readInt();
+ bytesRead+=4;
+ }
+ }
+ }
+
+ protected void readMBCSTable(int[][] stateTableArray, CharsetMBCS.MBCSToUFallback[] toUFallbacksArray, char[] unicodeCodeUnitsArray, char[] fromUnicodeTableArray, byte[] fromUnicodeBytesArray) throws IOException
+ {
+ int i, j;
+ for(i = 0; i < stateTableArray.length; ++i){
+ for(j = 0; j < stateTableArray[i].length; ++j){
+ stateTableArray[i][j] = dataInputStream.readInt();
+ bytesRead+=4;
+ }
+ }
+ for(i = 0; i < toUFallbacksArray.length; ++i) {
+ toUFallbacksArray[i].offset = dataInputStream.readInt();
+ bytesRead+=4;
+ toUFallbacksArray[i].codePoint = dataInputStream.readInt();
+ bytesRead+=4;
+ }
+ for(i = 0; i < unicodeCodeUnitsArray.length; ++i){
+ unicodeCodeUnitsArray[i] = dataInputStream.readChar();
+ bytesRead+=2;
+ }
+ for(i = 0; i < fromUnicodeTableArray.length; ++i){
+ fromUnicodeTableArray[i] = dataInputStream.readChar();
+ bytesRead+=2;
+ }
+ for(i = 0; i < fromUnicodeBytesArray.length; ++i){
+ fromUnicodeBytesArray[i] = dataInputStream.readByte();
+ bytesRead++;
+ }
+ }
+
+ protected String readBaseTableName() throws IOException
+ {
+ char c;
+ StringBuilder name = new StringBuilder();
+ while((c = (char)dataInputStream.readByte()) != 0){
+ name.append(c);
+ bytesRead++;
+ }
+ bytesRead++/*for null terminator*/;
+ return name.toString();
+ }
+
+ //protected int[] readExtIndexes(int skip) throws IOException
+ protected ByteBuffer readExtIndexes(int skip) throws IOException
+ {
+ int skipped = dataInputStream.skipBytes(skip);
+ if(skipped != skip){
+ throw new IOException("could not skip "+ skip +" bytes");
+ }
+ int n = dataInputStream.readInt();
+ bytesRead+=4;
+ int[] indexes = new int[n];
+ indexes[0] = n;
+ for(int i = 1; i < n; ++i) {
+ indexes[i] = dataInputStream.readInt();
+ bytesRead+=4;
+ }
+ //return indexes;
+
+ ByteBuffer b = ByteBuffer.allocate(indexes[31]);
+ for(int i = 0; i < n; ++i) {
+ b.putInt(indexes[i]);
+ }
+ int len = dataInputStream.read(b.array(), b.position(), b.remaining());
+ if(len==-1){
+ throw new IOException("Read failed");
+ }
+ bytesRead += len;
+ return b;
+ }
+
+ /*protected byte[] readExtTables(int n) throws IOException
+ {
+ byte[] tables = new byte[n];
+ int len =dataInputStream.read(tables);
+ if(len==-1){
+ throw new IOException("Read failed");
+ }
+ bytesRead += len;
+ return tables;
+ }*/
+
+ byte[] getDataFormatVersion(){
+ return DATA_FORMAT_VERSION;
+ }
+ /**
+ * Inherited method
+ */
+ public boolean isDataVersionAcceptable(byte version[]){
+ return version[0] == DATA_FORMAT_VERSION[0];
+ }
+
+/* byte[] getUnicodeVersion(){
+ return unicodeVersion;
+ }*/
+ // private data members -------------------------------------------------
+
+ /**
+ * ICU data file input stream
+ */
+ DataInputStream dataInputStream;
+
+// private byte[] unicodeVersion;
+
+ /**
+ * File format version that this class understands.
+ * No guarantees are made if a older version is used
+ * see store.c of gennorm for more information and values
+ */
+ // DATA_FORMAT_ID_ values taken from icu4c isCnvAcceptable (ucnv_bld.c)
+ private static final byte DATA_FORMAT_ID[] = {(byte)0x63, (byte)0x6e, (byte)0x76, (byte)0x74}; // dataFormat="cnvt"
+ private static final byte DATA_FORMAT_VERSION[] = {(byte)0x6};
+
+}
+
diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterSharedData.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterSharedData.java
new file mode 100644
index 00000000000..e69f40a490c
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterSharedData.java
@@ -0,0 +1,448 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2006-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+/**
+ * Defines the UConverterSharedData struct, the immutable, shared part of
+ * UConverter.
+ */
+final class UConverterSharedData {
+ // uint32_t structSize; /* Size of this structure */
+ // int structSize; /* Size of this structure */
+ /**
+ * used to count number of clients, 0xffffffff for static SharedData
+ */
+ int referenceCounter;
+
+ // agljport:todo const void *dataMemory; /* from udata_openChoice() - for cleanup */
+ // agljport:todo void *table; /* Unused. This used to be a UConverterTable - Pointer to conversion data - see mbcs below */
+
+ // const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */
+ /**
+ * pointer to the static (non changing)
+ * data.
+ */
+ UConverterStaticData staticData;
+
+ // UBool sharedDataCached; /* TRUE: shared data is in cache, don't destroy
+ // on close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to
+ // clean it up if the ref is 0 */
+
+ /**
+ * TRUE: shared data is in cache, don't destroy
+ * on close() if 0 ref. FALSE: shared data isn't
+ * in the cache, do attempt to clean it up if
+ * the ref is 0
+ */
+ boolean sharedDataCached;
+
+ /*
+ * UBool staticDataOwned; TRUE if static data owned by shared data & should
+ * be freed with it, NEVER true for udata() loaded statics. This ignored
+ * variable was removed to make space for sharedDataCached.
+ */
+
+ // const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */
+ // UConverterImpl impl; /* vtable-style struct of mostly function pointers */
+ /** initial values of some members of the mutable part of object */
+ long toUnicodeStatus;
+
+ /**
+ * Shared data structures currently come in two flavors:
+ * - readonly for built-in algorithmic converters
+ * - allocated for MBCS, with a pointer to an allocated UConverterTable
+ * which always has a UConverterMBCSTable
+ *
+ * To eliminate one allocation, I am making the UConverterMBCSTable a member
+ * of the shared data. It is the last member so that static definitions of
+ * UConverterSharedData work as before. The table field above also remains
+ * to avoid updating all static definitions, but is now unused.
+ *
+ */
+ CharsetMBCS.UConverterMBCSTable mbcs;
+
+ UConverterSharedData() {
+ mbcs = new CharsetMBCS.UConverterMBCSTable();
+ }
+
+ UConverterSharedData(int referenceCounter_, UConverterStaticData staticData_, boolean sharedDataCached_, long toUnicodeStatus_)
+ {
+ this();
+ referenceCounter = referenceCounter_;
+ staticData = staticData_;
+ sharedDataCached = sharedDataCached_;
+ // impl = impl_;
+ toUnicodeStatus = toUnicodeStatus_;
+ }
+
+ /**
+ * UConverterImpl contains all the data and functions for a converter type.
+ * Its function pointers work much like a C++ vtable. Many converter types
+ * need to define only a subset of the functions; when a function pointer is
+ * NULL, then a default action will be performed.
+ *
+ * Every converter type must implement toUnicode, fromUnicode, and
+ * getNextUChar, otherwise the converter may crash. Every converter type
+ * that has variable-length codepage sequences should also implement
+ * toUnicodeWithOffsets and fromUnicodeWithOffsets for correct offset
+ * handling. All other functions may or may not be implemented - it depends
+ * only on whether the converter type needs them.
+ *
+ * When open() fails, then close() will be called, if present.
+ */
+/* class UConverterImpl {
+ UConverterType type;
+ UConverterToUnicode toUnicode;
+ protected void doToUnicode(UConverterToUnicodeArgs args, int[] pErrorCode)
+ {
+ }
+
+ final void toUnicode(UConverterToUnicodeArgs args, int[] pErrorCode)
+ {
+ doToUnicode(args, pErrorCode);
+ }
+
+ //UConverterFromUnicode fromUnicode;
+ protected void doFromUnicode(UConverterFromUnicodeArgs args, int[] pErrorCode)
+ {
+ }
+
+ final void fromUnicode(UConverterFromUnicodeArgs args, int[] pErrorCode)
+ {
+ doFromUnicode(args, pErrorCode);
+ }
+
+ protected int doGetNextUChar(UConverterToUnicodeArgs args, int[] pErrorCode)
+ {
+ return 0;
+ }
+
+ //UConverterGetNextUChar getNextUChar;
+ final int getNextUChar(UConverterToUnicodeArgs args, int[] pErrorCode)
+ {
+ return doGetNextUChar(args, pErrorCode);
+ }
+
+ // interface UConverterImplLoadable extends UConverterImpl
+ protected void doLoad(UConverterLoadArgs pArgs, short[] raw, int[] pErrorCode)
+ {
+ }
+
+ protected void doUnload()
+ {
+ }
+
+ // interface UConverterImplOpenable extends UConverterImpl
+ protected void doOpen(UConverter cnv, String name, String locale, long options, int[] pErrorCode)
+ {
+ }
+
+ //UConverterOpen open;
+ final void open(UConverter cnv, String name, String locale, long options, int[] pErrorCode)
+ {
+ doOpen(cnv, name, locale, options, pErrorCode);
+ }
+
+ protected void doClose(UConverter cnv)
+ {
+ }
+
+ //UConverterClose close;
+ final void close(UConverter cnv)
+ {
+ doClose(cnv);
+ }
+
+ protected void doReset(UConverter cnv, int choice)
+ {
+ }
+
+ //typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice);
+ //UConverterReset reset;
+ final void reset(UConverter cnv, int choice)
+ {
+ doReset(cnv, choice);
+ }
+
+ // interface UConverterImplVariableLength extends UConverterImpl
+ protected void doToUnicodeWithOffsets(UConverterToUnicodeArgs args, int[] pErrorCode)
+ {
+ }
+
+ //UConverterToUnicode toUnicodeWithOffsets;
+ final void toUnicodeWithOffsets(UConverterToUnicodeArgs args, int[] pErrorCode)
+ {
+ doToUnicodeWithOffsets(args, pErrorCode);
+ }
+
+ protected void doFromUnicodeWithOffsets(UConverterFromUnicodeArgs args, int[] pErrorCode)
+ {
+ }
+
+ //UConverterFromUnicode fromUnicodeWithOffsets;
+ final void fromUnicodeWithOffsets(UConverterFromUnicodeArgs args, int[] pErrorCode)
+ {
+ doFromUnicodeWithOffsets(args, pErrorCode);
+ }
+
+ // interface UConverterImplMisc extends UConverterImpl
+ protected void doGetStarters(UConverter converter, boolean starters[], int[] pErrorCode)
+ {
+ }
+
+ //UConverterGetStarters getStarters;
+ final void getStarters(UConverter converter, boolean starters[], int[] pErrorCode)
+ {
+ doGetStarters(converter, starters, pErrorCode);
+ }
+
+ protected String doGetName(UConverter cnv)
+ {
+ return "";
+ }
+
+ //UConverterGetName getName;
+ final String getName(UConverter cnv)
+ {
+ return doGetName(cnv);
+ }
+
+ protected void doWriteSub(UConverterFromUnicodeArgs pArgs, long offsetIndex, int[] pErrorCode)
+ {
+ }
+
+ //UConverterWriteSub writeSub;
+ final void writeSub(UConverterFromUnicodeArgs pArgs, long offsetIndex, int[] pErrorCode)
+ {
+ doWriteSub(pArgs, offsetIndex, pErrorCode);
+ }
+
+ protected UConverter doSafeClone(UConverter cnv, byte[] stackBuffer, int[] pBufferSize, int[] status)
+ {
+ return new UConverter();
+ }
+
+ //UConverterSafeClone safeClone;
+ final UConverter safeClone(UConverter cnv, byte[] stackBuffer, int[] pBufferSize, int[] status)
+ {
+ return doSafeClone(cnv, stackBuffer, pBufferSize, status);
+ }
+
+ protected void doGetUnicodeSet(UConverter cnv, UnicodeSet /*USetAdder* / sa, int /*UConverterUnicodeSet* / which, int[] pErrorCode)
+ {
+ }
+
+ //UConverterGetUnicodeSet getUnicodeSet;
+ // final void getUnicodeSet(UConverter cnv, UnicodeSet /*USetAdder* / sa, int /*UConverterUnicodeSet* / which, int[] pErrorCode)
+ //{
+ // doGetUnicodeSet(cnv, sa, which, pErrorCode);
+ //}
+
+ //}
+
+ static final String DATA_TYPE = "cnv";
+ private static final int CNV_DATA_BUFFER_SIZE = 25000;
+ static final int sizeofUConverterSharedData = 100;
+
+ //static UDataMemoryIsAcceptable isCnvAcceptable;
+
+ /**
+ * Load a non-algorithmic converter.
+ * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
+
+ // UConverterSharedData * load(UConverterLoadArgs *pArgs, UErrorCode *err)
+ static final UConverterSharedData load(UConverterLoadArgs pArgs, int[] err)
+ {
+ UConverterSharedData mySharedConverterData = null;
+
+ if(err == null || ErrorCode.isFailure(err[0])) {
+ return null;
+ }
+
+ if(pArgs.pkg != null && pArgs.pkg.length() != 0) {
+ application-provided converters are not currently cached
+ return UConverterSharedData.createConverterFromFile(pArgs, err);
+ }
+
+ //agljport:fix mySharedConverterData = getSharedConverterData(pArgs.name);
+ if (mySharedConverterData == null)
+ {
+ Not cached, we need to stream it in from file
+ mySharedConverterData = UConverterSharedData.createConverterFromFile(pArgs, err);
+ if (ErrorCode.isFailure(err[0]) || (mySharedConverterData == null))
+ {
+ return null;
+ }
+ else
+ {
+ share it with other library clients
+ //agljport:fix shareConverterData(mySharedConverterData);
+ }
+ }
+ else
+ {
+ The data for this converter was already in the cache.
+ Update the reference counter on the shared data: one more client
+ mySharedConverterData.referenceCounter++;
+ }
+
+ return mySharedConverterData;
+ }
+
+ Takes an alias name gets an actual converter file name
+ *goes to disk and opens it.
+ *allocates the memory and returns a new UConverter object
+
+ //static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err)
+ static final UConverterSharedData createConverterFromFile(UConverterLoadArgs pArgs, int[] err)
+ {
+ UDataMemory data = null;
+ UConverterSharedData sharedData = null;
+
+ //agljport:todo UTRACE_ENTRY_OC(UTRACE_LOAD);
+
+ if (err == null || ErrorCode.isFailure(err[0])) {
+ //agljport:todo UTRACE_EXIT_STATUS(*err);
+ return null;
+ }
+
+ //agljport:todo UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg);
+
+ //agljport:fix data = udata_openChoice(pArgs.pkgArray, DATA_TYPE.getBytes(), pArgs.name, isCnvAcceptable, null, err);
+ if(ErrorCode.isFailure(err[0]))
+ {
+ //agljport:todo UTRACE_EXIT_STATUS(*err);
+ return null;
+ }
+
+ sharedData = data_unFlattenClone(pArgs, data, err);
+ if(ErrorCode.isFailure(err[0]))
+ {
+ //agljport:fix udata_close(data);
+ //agljport:todo UTRACE_EXIT_STATUS(*err);
+ return null;
+ }
+
+
+ * TODO Store pkg in a field in the shared data so that delta-only converters
+ * can load base converters from the same package.
+ * If the pkg name is longer than the field, then either do not load the converter
+ * in the first place, or just set the pkg field to "".
+
+
+ return sharedData;
+ }
+*/
+ UConverterDataReader dataReader = null;
+
+ /*
+ * returns a converter type from a string
+ */
+ /* static final UConverterSharedData getAlgorithmicTypeFromName(String realName)
+ {
+ long mid, start, limit;
+ long lastMid;
+ int result;
+ StringBuffer strippedName = new StringBuffer(UConverterConstants.MAX_CONVERTER_NAME_LENGTH);
+
+ // Lower case and remove ignoreable characters.
+ UConverterAlias.stripForCompare(strippedName, realName);
+
+ // do a binary search for the alias
+ start = 0;
+ limit = cnvNameType.length;
+ mid = limit;
+ lastMid = -1;
+
+ for (;;) {
+ mid = (long)((start + limit) / 2);
+ if (lastMid == mid) { // Have we moved?
+ break; // We haven't moved, and it wasn't found.
+ }
+ lastMid = mid;
+ result = strippedName.substring(0).compareTo(cnvNameType[(int)mid].name);
+
+ if (result < 0) {
+ limit = mid;
+ } else if (result > 0) {
+ start = mid;
+ } else {
+ return converterData[cnvNameType[(int)mid].type];
+ }
+ }
+
+ return null;
+ }*/
+
+ /*
+ * Enum for specifying basic types of converters
+ */
+ static final class UConverterType {
+ static final int UNSUPPORTED_CONVERTER = -1;
+ static final int SBCS = 0;
+ static final int DBCS = 1;
+ static final int MBCS = 2;
+ static final int LATIN_1 = 3;
+ static final int UTF8 = 4;
+ static final int UTF16_BigEndian = 5;
+ static final int UTF16_LittleEndian = 6;
+ static final int UTF32_BigEndian = 7;
+ static final int UTF32_LittleEndian = 8;
+ static final int EBCDIC_STATEFUL = 9;
+ static final int ISO_2022 = 10;
+ static final int LMBCS_1 = 11;
+ static final int LMBCS_2 = LMBCS_1 + 1; // 12
+ static final int LMBCS_3 = LMBCS_2 + 1; // 13
+ static final int LMBCS_4 = LMBCS_3 + 1; // 14
+ static final int LMBCS_5 = LMBCS_4 + 1; // 15
+ static final int LMBCS_6 = LMBCS_5 + 1; // 16
+ static final int LMBCS_8 = LMBCS_6 + 1; // 17
+ static final int LMBCS_11 = LMBCS_8 + 1; // 18
+ static final int LMBCS_16 = LMBCS_11 + 1; // 19
+ static final int LMBCS_17 = LMBCS_16 + 1; // 20
+ static final int LMBCS_18 = LMBCS_17 + 1; // 21
+ static final int LMBCS_19 = LMBCS_18 + 1; // 22
+ static final int LMBCS_LAST = LMBCS_19; // 22
+ static final int HZ = LMBCS_LAST + 1; // 23
+ static final int SCSU = HZ + 1; // 24
+ static final int ISCII = SCSU + 1; // 25
+ static final int US_ASCII = ISCII + 1; // 26
+ static final int UTF7 = US_ASCII + 1; // 27
+ static final int BOCU1 = UTF7 + 1; // 28
+ static final int UTF16 = BOCU1 + 1; // 29
+ static final int UTF32 = UTF16 + 1; // 30
+ static final int CESU8 = UTF32 + 1; // 31
+ static final int IMAP_MAILBOX = CESU8 + 1; // 32
+
+ // Number of converter types for which we have conversion routines.
+ static final int NUMBER_OF_SUPPORTED_CONVERTER_TYPES = IMAP_MAILBOX + 1;
+ }
+
+ /**
+ * Enum for specifying which platform a converter ID refers to. The use of
+ * platform/CCSID is not recommended. See openCCSID().
+ */
+ static final class UConverterPlatform {
+ static final int UNKNOWN = -1;
+ static final int IBM = 0;
+ }
+
+ // static UConverterSharedData[] converterData;
+ /* static class cnvNameTypeClass {
+ String name;
+ int type;
+ cnvNameTypeClass(String name_, int type_) { name = name_; type = type_; }
+ }
+
+ static cnvNameTypeClass cnvNameType[];*/
+
+
+ static final String DATA_TYPE = "cnv";
+ //static final int CNV_DATA_BUFFER_SIZE = 25000;
+ //static final int SIZE_OF_UCONVERTER_SHARED_DATA = 228;
+
+}
diff --git a/main/classes/charset/src/com/ibm/icu/charset/UConverterStaticData.java b/main/classes/charset/src/com/ibm/icu/charset/UConverterStaticData.java
new file mode 100644
index 00000000000..0ccd49b0c7b
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/UConverterStaticData.java
@@ -0,0 +1,61 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006-2007, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+package com.ibm.icu.charset;
+
+final class UConverterStaticData { /* +offset: size */
+ int structSize; /* +0: 4 Size of this structure */
+
+ String name; /* +4: 60 internal name of the converter- invariant chars */
+
+ int codepage; /* +64: 4 codepage # (now IBM-$codepage) */
+
+ byte platform; /* +68: 1 platform of the converter (only IBM now) */
+ byte conversionType; /* +69: 1 conversion type */
+
+ byte minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
+ byte maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
+
+ byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4 [note: 4 and 8 byte boundary] */
+ byte subCharLen; /* +76: 1 */
+
+ byte hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
+ byte hasFromUnicodeFallback; /* +78: 1 */
+ short unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
+ byte subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
+ byte reserved[/*19*/]; /* +81: 19 to round out the structure */
+ /* total size: 100 */
+ public UConverterStaticData()
+ {
+ subChar = new byte[UConverterConstants.MAX_SUBCHAR_LEN];
+ reserved = new byte[19];
+ }
+
+/* public UConverterStaticData(int structSize_, String name_, int codepage_, byte platform_, byte conversionType_, byte minBytesPerChar_, byte maxBytesPerChar_, byte[] subChar_, byte subCharLen_, byte hasToUnicodeFallback_, byte hasFromUnicodeFallback_, short unicodeMask_, byte subChar1_, byte[] reserved_)
+ {
+ structSize = structSize_;
+ name = name_;
+ codepage = codepage_;
+ platform = platform_;
+ conversionType = conversionType_;
+ minBytesPerChar = minBytesPerChar_;
+ maxBytesPerChar = maxBytesPerChar_;
+ subChar = new byte[UConverterConstants.MAX_SUBCHAR_LEN];
+ System.arraycopy(subChar_, 0, subChar, 0, (subChar.length < subChar_.length? subChar.length : subChar_.length));
+ subCharLen = subCharLen_;
+ hasToUnicodeFallback = hasToUnicodeFallback_;
+ hasFromUnicodeFallback = hasFromUnicodeFallback_;
+ unicodeMask = unicodeMask_;
+ subChar1 = subChar1_;
+ reserved = new byte[19];
+ System.arraycopy(reserved_, 0, reserved, 0, (reserved.length < reserved_.length? reserved.length : reserved_.length));
+ }*/
+
+ public static final int SIZE_OF_UCONVERTER_STATIC_DATA = 100;
+}
+
diff --git a/main/classes/charset/src/com/ibm/icu/charset/package.html b/main/classes/charset/src/com/ibm/icu/charset/package.html
new file mode 100644
index 00000000000..a9e87ef9e28
--- /dev/null
+++ b/main/classes/charset/src/com/ibm/icu/charset/package.html
@@ -0,0 +1,15 @@
+
+
+
+
+C:ICU4J .charset Package Overview
+
+
+
+
+Enhanced charset conversion support.
+CharsetICU, CharsetProviderICU, CharsetEncoderICU and CharsetDecoderICU provide conversion services for many charsets.
+
+
diff --git a/main/classes/collate/.classpath b/main/classes/collate/.classpath
new file mode 100644
index 00000000000..b0d608f2d91
--- /dev/null
+++ b/main/classes/collate/.classpath
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/main/classes/collate/.externalToolBuilders/copy-data-collate.launch b/main/classes/collate/.externalToolBuilders/copy-data-collate.launch
new file mode 100644
index 00000000000..17542f20e95
--- /dev/null
+++ b/main/classes/collate/.externalToolBuilders/copy-data-collate.launch
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/classes/collate/.project b/main/classes/collate/.project
new file mode 100644
index 00000000000..72983f29537
--- /dev/null
+++ b/main/classes/collate/.project
@@ -0,0 +1,29 @@
+
+
+ icu4j-collate
+
+
+ icu4j-core
+ icu4j-shared
+
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+
+ org.eclipse.ui.externaltools.ExternalToolBuilder
+ full,incremental,
+
+
+ LaunchConfigHandle
+ <project>/.externalToolBuilders/copy-data-collate.launch
+
+
+
+
+
+ org.eclipse.jdt.core.javanature
+
+
diff --git a/main/classes/collate/.settings/org.eclipse.core.resources.prefs b/main/classes/collate/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 00000000000..67c501d32e3
--- /dev/null
+++ b/main/classes/collate/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,4 @@
+#Fri Nov 05 14:17:35 EDT 2010
+eclipse.preferences.version=1
+encoding//src/com/ibm/icu/text/AlphabeticIndex.java=UTF-8
+encoding/=UTF-8
diff --git a/main/classes/collate/.settings/org.eclipse.jdt.core.prefs b/main/classes/collate/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 00000000000..d11b39f392e
--- /dev/null
+++ b/main/classes/collate/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,345 @@
+#Thu Aug 27 17:46:56 EDT 2009
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.doc.comment.support=enabled
+org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
+org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning
+org.eclipse.jdt.core.compiler.problem.deadCode=warning
+org.eclipse.jdt.core.compiler.problem.deprecation=ignore
+org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
+org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
+org.eclipse.jdt.core.compiler.problem.discouragedReference=warning
+org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.fallthroughCase=warning
+org.eclipse.jdt.core.compiler.problem.fatalOptionalError=enabled
+org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
+org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning
+org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=error
+org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning
+org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning
+org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore
+org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
+org.eclipse.jdt.core.compiler.problem.invalidJavadoc=warning
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTags=enabled
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsDeprecatedRef=disabled
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsNotVisibleRef=enabled
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsVisibility=public
+org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
+org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning
+org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
+org.eclipse.jdt.core.compiler.problem.missingJavadocComments=ignore
+org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsVisibility=public
+org.eclipse.jdt.core.compiler.problem.missingJavadocTagDescription=all_standard_tags
+org.eclipse.jdt.core.compiler.problem.missingJavadocTags=ignore
+org.eclipse.jdt.core.compiler.problem.missingJavadocTagsOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.missingJavadocTagsVisibility=public
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning
+org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore
+org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning
+org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning
+org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
+org.eclipse.jdt.core.compiler.problem.nullReference=warning
+org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning
+org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
+org.eclipse.jdt.core.compiler.problem.rawTypeReference=warning
+org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
+org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
+org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
+org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning
+org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
+org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
+org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning
+org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=warning
+org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
+org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
+org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
+org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.unusedImport=warning
+org.eclipse.jdt.core.compiler.problem.unusedLabel=warning
+org.eclipse.jdt.core.compiler.problem.unusedLocal=warning
+org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
+org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning
+org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
+org.eclipse.jdt.core.compiler.source=1.5
+org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_assignment=0
+org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
+org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
+org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
+org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
+org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
+org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
+org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
+org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
+org.eclipse.jdt.core.formatter.blank_lines_after_package=1
+org.eclipse.jdt.core.formatter.blank_lines_before_field=0
+org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
+org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
+org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
+org.eclipse.jdt.core.formatter.blank_lines_before_method=1
+org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
+org.eclipse.jdt.core.formatter.blank_lines_before_package=0
+org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
+org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
+org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
+org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
+org.eclipse.jdt.core.formatter.comment.format_block_comments=true
+org.eclipse.jdt.core.formatter.comment.format_header=false
+org.eclipse.jdt.core.formatter.comment.format_html=true
+org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
+org.eclipse.jdt.core.formatter.comment.format_line_comments=true
+org.eclipse.jdt.core.formatter.comment.format_source_code=true
+org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
+org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
+org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
+org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert
+org.eclipse.jdt.core.formatter.comment.line_length=120
+org.eclipse.jdt.core.formatter.compact_else_if=true
+org.eclipse.jdt.core.formatter.continuation_indentation=2
+org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
+org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
+org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
+org.eclipse.jdt.core.formatter.indent_empty_lines=false
+org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
+org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
+org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
+org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false
+org.eclipse.jdt.core.formatter.indentation.size=4
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member=insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
+org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
+org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
+org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
+org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
+org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.join_lines_in_comments=true
+org.eclipse.jdt.core.formatter.join_wrapped_lines=true
+org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
+org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
+org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
+org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
+org.eclipse.jdt.core.formatter.lineSplit=120
+org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
+org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
+org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
+org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
+org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
+org.eclipse.jdt.core.formatter.tabulation.char=space
+org.eclipse.jdt.core.formatter.tabulation.size=4
+org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
+org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
diff --git a/main/classes/collate/.settings/org.eclipse.jdt.ui.prefs b/main/classes/collate/.settings/org.eclipse.jdt.ui.prefs
new file mode 100644
index 00000000000..977a256edd2
--- /dev/null
+++ b/main/classes/collate/.settings/org.eclipse.jdt.ui.prefs
@@ -0,0 +1,10 @@
+#Wed Jul 08 12:14:39 EDT 2009
+eclipse.preferences.version=1
+formatter_profile=_ICU4J Standard
+formatter_settings_version=11
+org.eclipse.jdt.ui.ignorelowercasenames=true
+org.eclipse.jdt.ui.importorder=java;javax;org;com;
+org.eclipse.jdt.ui.javadoc=true
+org.eclipse.jdt.ui.ondemandthreshold=99
+org.eclipse.jdt.ui.staticondemandthreshold=99
+org.eclipse.jdt.ui.text.custom_code_templates=/**\r\n * @return the ${bare_field_name}\r\n */ /**\r\n * @param ${param} the ${bare_field_name} to set\r\n */ /**\r\n * ${tags}\r\n */ /*\r\n *******************************************************************************\r\n * Copyright (C) ${year}, International Business Machines Corporation and *\r\n * others. All Rights Reserved. *\r\n *******************************************************************************\r\n */ /**\r\n * @author ${user}\r\n *\r\n * ${tags}\r\n */ /**\r\n * \r\n */ /**\r\n * ${tags}\r\n */ /* (non-Javadoc)\r\n * ${see_to_overridden}\r\n */ /**\r\n * ${tags}\r\n * ${see_to_target}\r\n */ ${filecomment}\r\n${package_declaration}\r\n\r\n${typecomment}\r\n${type_declaration} \r\n \r\n \r\n \r\n // ${todo} Auto-generated catch block\r\n${exception_var}.printStackTrace(); // ${todo} Auto-generated method stub\r\n${body_statement} ${body_statement}\r\n// ${todo} Auto-generated constructor stub return ${field}; ${field} \= ${param};
diff --git a/main/classes/collate/build.properties b/main/classes/collate/build.properties
new file mode 100644
index 00000000000..a21fb196196
--- /dev/null
+++ b/main/classes/collate/build.properties
@@ -0,0 +1,6 @@
+#*******************************************************************************
+#* Copyright (C) 2009, International Business Machines Corporation and *
+#* others. All Rights Reserved. *
+#*******************************************************************************
+shared.dir = ../../shared
+javac.compilerarg = -Xlint:all,-deprecation,-dep-ann
diff --git a/main/classes/collate/build.xml b/main/classes/collate/build.xml
new file mode 100644
index 00000000000..70cce1ee044
--- /dev/null
+++ b/main/classes/collate/build.xml
@@ -0,0 +1,42 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/classes/collate/collate-build.launch b/main/classes/collate/collate-build.launch
new file mode 100644
index 00000000000..5b723b1f894
--- /dev/null
+++ b/main/classes/collate/collate-build.launch
@@ -0,0 +1,26 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/classes/collate/manifest.stub b/main/classes/collate/manifest.stub
new file mode 100644
index 00000000000..20790b5efa0
--- /dev/null
+++ b/main/classes/collate/manifest.stub
@@ -0,0 +1,16 @@
+Manifest-Version: 1.0
+Specification-Title: International Components for Unicode for Java (collate)
+Specification-Version: @SPECVERSION@
+Specification-Vendor: icu-project.org
+Implementation-Title: International Components for Unicode for Java (collate)
+Implementation-Version: @IMPLVERSION@
+Implementation-Vendor: IBM Corporation
+Implementation-Vendor-Id: com.ibm
+Bundle-ManifestVersion: 2
+Bundle-Name: ICU4J collate
+Bundle-Description: International Components for Unicode for Java (collate)
+Bundle-SymbolicName: com.ibm.icu.collate
+Bundle-Version: @IMPLVERSION@
+Bundle-Vendor: IBM Corporation
+Bundle-Copyright: @COPYRIGHT@
+Bundle-RequiredExecutionEnvironment: @EXECENV@
diff --git a/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java b/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java
new file mode 100644
index 00000000000..7688b097ffe
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java
@@ -0,0 +1,1188 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008-2011, Google Inc, International Business Machines Corporation
+ * and others. All Rights Reserved.
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.ibm.icu.impl.MultiComparator;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.AlphabeticIndex.Bucket;
+import com.ibm.icu.util.LocaleData;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * AlphabeticIndex supports the creation of a UI index appropriate for a given language. It can support either direct
+ * use, or use with a client that doesn't support localized collation. The following is an example of what an index
+ * might look like in a UI:
+ *
+ *
+ * ... A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ...
+ *
+ * A
+ * Addison
+ * Albertson
+ * Azensky
+ * B
+ * Baecker
+ * ...
+ *
+ *
+ * The class can generate a list of labels for use as a UI "index", that is, a list of clickable characters (or
+ * character sequences) that allow the user to see a segment (bucket) of a larger "target" list. That is, each label
+ * corresponds to a bucket in the target list, where everything in the bucket is greater than or equal to the character
+ * (according to the locale's collation). Strings can be added to the index; they will be in sorted order in the right
+ * bucket.
+ *
+ * The class also supports having buckets for strings before the first (underflow), after the last (overflow), and
+ * between scripts (inflow). For example, if the index is constructed with labels for Russian and English, Greek
+ * characters would fall into an inflow bucket between the other two scripts.
+ *
+ * Note: If you expect to have a lot of ASCII or Latin characters as well as characters from the user's language, then it is a good idea to call addLabels(ULocale.English).
+ *
+ * Direct Use
+ * The following shows an example of building an index directly.
+ * The "show..." methods below are just to illustrate usage.
+ *
+ *
+ * // Create a simple index where the values for the strings are Integers, and add the strings
+ *
+ * AlphabeticIndex index = new AlphabeticIndex(desiredLocale).addLabels(additionalLocale);
+ * int counter = 0;
+ * for (String item : test) {
+ * index.addRecord(item, counter++);
+ * }
+ * ...
+ * // Show index at top. We could skip or gray out empty buckets
+ *
+ * for (AlphabeticIndex.Bucket bucket : index) {
+ * if (showAll || bucket.size() != 0) {
+ * showLabelAtTop(UI, bucket.getLabel());
+ * }
+ * }
+ * ...
+ * // Show the buckets with their contents, skipping empty buckets
+ *
+ * for (AlphabeticIndex.Bucket bucket : index) {
+ * if (bucket.size() != 0) {
+ * showLabelInList(UI, bucket.getLabel());
+ * for (AlphabeticIndex.Record item : bucket) {
+ * showIndexedItem(UI, item.getName(), item.getData());
+ * }
+ *
+ *
+ * The caller can build different UIs using this class. For example, an index character could be omitted or grayed-out
+ * if its bucket is empty. Small buckets could also be combined based on size, such as:
+ *
+ *
+ * ... A-F G-N O-Z ...
+ *
+ *
+ * Client Support
+ *
+ * Callers can also use the AlphabeticIndex to support sorting on a client that doesn't support collation.
+ *
+ *
+ *
+ * Notes:
+ *
+ * Additional collation parameters can be passed in as part of the locale name. For example, German plus numeric
+ * sorting would be "de@kn-true".
+ *
+ * @author markdavis
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+public final class AlphabeticIndex implements Iterable> {
+
+ /**
+ * Internals
+ */
+ static final boolean HACK_CODED_FIRSTS = true;
+
+ private static UnicodeSet UNIHAN = new UnicodeSet("[:script=Hani:]");
+
+ private static final char CGJ = '\u034F';
+ private static final UnicodeSet ALPHABETIC = new UnicodeSet("[[:alphabetic:]-[:mark:]]");
+ private static final UnicodeSet HANGUL = new UnicodeSet(
+ "[\uAC00 \uB098 \uB2E4 \uB77C \uB9C8 \uBC14 \uC0AC \uC544 \uC790 \uCC28 \uCE74 \uD0C0 \uD30C \uD558]");
+ private static final UnicodeSet ETHIOPIC = new UnicodeSet("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]");
+ private static final UnicodeSet CORE_LATIN = new UnicodeSet("[a-z]");
+
+ private final RuleBasedCollator collatorOriginal;
+ private final RuleBasedCollator collatorPrimaryOnly;
+ private RuleBasedCollator collatorExternal;
+
+ // for testing
+ private final LinkedHashMap> alreadyIn = new LinkedHashMap>();
+ private final List noDistinctSorting = new ArrayList();
+ private final List notAlphabetic = new ArrayList();
+
+ // We accumulate these as we build up the input parameters
+
+ private final UnicodeSet initialLabels = new UnicodeSet();
+ private final Collection> inputList = new ArrayList>();
+
+ // Lazy evaluated: null means that we have not built yet.
+
+ private BucketList buckets;
+
+ private String overflowLabel = "\u2026";
+ private String underflowLabel = "\u2026";
+ private String inflowLabel = "\u2026";
+ private LangType langType;
+
+ /**
+ * Create the index object.
+ *
+ * @param locale
+ * The locale for the index.
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public AlphabeticIndex(ULocale locale) {
+ this(locale, null, getIndexExemplars(locale));
+ }
+
+ /**
+ * Create the index object.
+ *
+ * @param locale
+ * The locale for the index.
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public AlphabeticIndex(Locale locale) {
+ this(ULocale.forLocale(locale));
+ }
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
+ */
+ public enum LangType {
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
+ */
+ NORMAL,
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
+ */
+ SIMPLIFIED,
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
+ */
+ TRADITIONAL;
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
+ */
+ public static LangType fromLocale(ULocale locale) {
+ String lang = locale.getLanguage();
+ if (lang.equals("zh")) {
+ if ("Hant".equals(locale.getScript()) || "TW".equals(locale.getCountry())) {
+ return TRADITIONAL;
+ }
+ return SIMPLIFIED;
+ }
+ return NORMAL;
+ }
+ }
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only, for testing purposes and use with CLDR.
+ */
+ public AlphabeticIndex(ULocale locale, RuleBasedCollator collator, UnicodeSet exemplarChars) {
+ langType = LangType.fromLocale(locale);
+ // HACK because we have to know the type of the collation for Chinese
+ if (langType != LangType.NORMAL) {
+ locale = locale.setKeywordValue("collation", langType == LangType.TRADITIONAL ? "stroke" : "pinyin");
+ }
+ collatorOriginal = collator != null ? collator : (RuleBasedCollator) Collator.getInstance(locale);
+ try {
+ collatorPrimaryOnly = (RuleBasedCollator) (collatorOriginal.clone());
+ } catch (Exception e) {
+ // should never happen
+ throw new IllegalStateException("Collator cannot be cloned", e);
+ }
+ collatorPrimaryOnly.setStrength(Collator.PRIMARY);
+ addLabels(exemplarChars);
+ }
+
+ /**
+ * Add more index characters (aside from what are in the locale)
+ * @param additions additional characters to add to the index, such as A-Z.
+ * @return this, for chaining
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public AlphabeticIndex addLabels(UnicodeSet additions) {
+ initialLabels.addAll(additions);
+ buckets = null;
+ return this;
+ }
+
+ /**
+ * Add more index characters (aside from what are in the locale)
+ * @param additions additional characters to add to the index, such as those in Swedish.
+ * @return this, for chaining
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public AlphabeticIndex addLabels(ULocale... additions) {
+ for (ULocale addition : additions) {
+ initialLabels.addAll(getIndexExemplars(addition));
+ }
+ buckets = null;
+ return this;
+ }
+
+ /**
+ * Add more index characters (aside from what are in the locale)
+ * @param additions additional characters to add to the index, such as those in Swedish.
+ * @return this, for chaining
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public AlphabeticIndex addLabels(Locale... additions) {
+ for (Locale addition : additions) {
+ initialLabels.addAll(getIndexExemplars(ULocale.forLocale(addition)));
+ }
+ buckets = null;
+ return this;
+ }
+
+ /**
+ * Set the overflow label
+ * @param overflowLabel see class description
+ * @return this, for chaining
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public AlphabeticIndex setOverflowLabel(String overflowLabel) {
+ this.overflowLabel = overflowLabel;
+ return this;
+ }
+
+ /**
+ * Get the default label used in the IndexCharacters' locale for underflow, eg the last item in: X Y Z ...
+ *
+ * @return underflow label
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getUnderflowLabel() {
+ return underflowLabel; // TODO get localized version
+ }
+
+
+ /**
+ * Set the underflowLabel label
+ * @param underflowLabel see class description
+ * @return this, for chaining
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public AlphabeticIndex setUnderflowLabel(String underflowLabel) {
+ this.underflowLabel = underflowLabel;
+ return this;
+ }
+
+ /**
+ * Get the default label used in the IndexCharacters' locale for overflow, eg the first item in: ... A B C
+ *
+ * @return overflow label
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getOverflowLabel() {
+ return overflowLabel; // TODO get localized version
+ }
+
+
+ /**
+ * Set the inflowLabel label
+ * @param inflowLabel see class description
+ * @return this, for chaining
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public AlphabeticIndex setInflowLabel(String inflowLabel) {
+ this.inflowLabel = inflowLabel;
+ return this;
+ }
+
+ /**
+ * Get the default label used for abbreviated buckets between other labels. For example, consider the labels
+ * for Latin and Greek are used: X Y Z ... Α Β Γ.
+ *
+ * @return inflow label
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getInflowLabel() {
+ return inflowLabel; // TODO get localized version
+ }
+
+
+ /**
+ * Get the limit on the number of labels in the index. The number of buckets can be slightly larger: see getBucketCount().
+ *
+ * @return maxLabelCount maximum number of labels.
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int getMaxLabelCount() {
+ return maxLabelCount;
+ }
+
+ /**
+ * Set a limit on the number of labels in the index. The number of buckets can be slightly larger: see
+ * getBucketCount().
+ *
+ * @return maxLabelCount label Set the maximum number of labels. Currently, if the number is exceeded, then every
+ * nth item is removed to bring the count down. A more sophisticated mechanism may be available in the
+ * future.
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public AlphabeticIndex setMaxLabelCount(int maxLabelCount) {
+ this.maxLabelCount = maxLabelCount;
+ return this;
+ }
+
+ /**
+ * Determine the best labels to use. This is based on the exemplars, but we also process to make sure that they are unique,
+ * and sort differently, and that the overall list is small enough.
+ * @return
+ */
+ private ArrayList initLabels() {
+ UnicodeSet exemplars = new UnicodeSet(initialLabels);
+
+ // First sort them, with an "best" ordering among items that are the same according
+ // to the collator.
+ // Re the warning: the JDK inexplicably didn't make Collators be Comparator!
+ @SuppressWarnings("unchecked")
+ Set preferenceSorting = new TreeSet(new MultiComparator(collatorPrimaryOnly, PREFERENCE_COMPARATOR));
+ exemplars.addAllTo(preferenceSorting);
+
+ TreeSet indexCharacterSet = new TreeSet(collatorPrimaryOnly);
+
+ // We nw make a sorted array of elements
+ // Some of the input may, however, be redundant.
+ // That is, we might have c, ch, d, where "ch" sorts just like "c", "h"
+ // So we make a pass through, filtering out those cases.
+
+ for (String item : preferenceSorting) {
+ if (indexCharacterSet.contains(item)) {
+ for (String itemAlreadyIn : indexCharacterSet) {
+ if (collatorPrimaryOnly.compare(item, itemAlreadyIn) == 0) {
+ Set targets = alreadyIn.get(itemAlreadyIn);
+ if (targets == null) {
+ alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet());
+ }
+ targets.add(item);
+ break;
+ }
+ }
+ } else if (UTF16.countCodePoint(item) > 1 && collatorPrimaryOnly.compare(item, separated(item)) == 0) {
+ noDistinctSorting.add(item);
+ } else if (!ALPHABETIC.containsSome(item)) {
+ notAlphabetic.add(item);
+ } else {
+ indexCharacterSet.add(item);
+ }
+ }
+
+ // if the result is still too large, cut down to maxCount elements, by removing every nth element
+
+ final int size = indexCharacterSet.size() - 1;
+ if (size > maxLabelCount) {
+ int count = 0;
+ int old = -1;
+ for (Iterator it = indexCharacterSet.iterator(); it.hasNext();) {
+ ++count;
+ it.next();
+ final int bump = count * maxLabelCount / size;
+ if (bump == old) {
+ it.remove();
+ } else {
+ old = bump;
+ }
+ }
+ }
+
+ return new ArrayList(indexCharacterSet);
+ }
+
+ /**
+ * This method is called to get the index exemplars. Normally these come from the locale directly,
+ * but if they aren't available, we have to synthesize them.
+ * @param locale
+ * @return
+ */
+ private static UnicodeSet getIndexExemplars(ULocale locale) {
+ UnicodeSet exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_INDEX);
+
+ if (exemplars != null) {
+ return exemplars;
+ }
+
+ // Synthesize the index exemplars
+
+ exemplars = LocaleData.getExemplarSet(locale, 0, LocaleData.ES_STANDARD);
+
+ // get the exemplars, and handle special cases
+
+ exemplars = exemplars.cloneAsThawed();
+ // question: should we add auxiliary exemplars?
+ if (exemplars.containsSome(CORE_LATIN) || exemplars.size() == 0) {
+ exemplars.addAll(CORE_LATIN);
+ }
+ if (exemplars.containsSome(HANGUL)) {
+ // cut down to small list
+ exemplars.removeAll(new UnicodeSet("[:block=hangul_syllables:]")).addAll(HANGUL);
+ }
+ if (exemplars.containsSome(ETHIOPIC)) {
+ // cut down to small list
+ // make use of the fact that Ethiopic is allocated in 8's, where
+ // the base is 0 mod 8.
+ for (UnicodeSetIterator it = new UnicodeSetIterator(ETHIOPIC); it.next();) {
+ if ((it.codepoint & 0x7) != 0) {
+ exemplars.remove(it.codepoint);
+ }
+ }
+ }
+
+ UnicodeSet uppercased = new UnicodeSet();
+ for (String item : exemplars) {
+ uppercased.add(UCharacter.toUpperCase(locale, item));
+ }
+
+ return uppercased;
+ }
+
+ /**
+ * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
+ * This is used to test whether contractions sort differently from their components.
+ */
+ private String separated(String item) {
+ StringBuilder result = new StringBuilder();
+ // add a CGJ except within surrogates
+ char last = item.charAt(0);
+ result.append(last);
+ for (int i = 1; i < item.length(); ++i) {
+ char ch = item.charAt(i);
+ if (!UCharacter.isHighSurrogate(last) || !UCharacter.isLowSurrogate(ch)) {
+ result.append(CGJ);
+ }
+ result.append(ch);
+ last = ch;
+ }
+ return result.toString();
+ }
+
+ /**
+ * Get the labels.
+ *
+ * @return A collection listing the labels, after processing.
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public List getBucketLabels() {
+ if (buckets == null) {
+ initBuckets();
+ }
+ ArrayList result = new ArrayList();
+ for (Bucket bucket : buckets) {
+ result.add(bucket.getLabel());
+ }
+ return result;
+ }
+
+ /**
+ * Get a clone of the collator used internally. Note that for performance reasons, the clone is only done once, and
+ * then stored. The next time it is accessed, the same instance is returned.
+ *
+ * Don't use this method across threads if you are changing the settings on the collator, at least not without
+ * synchronizing.
+ *
+ * @return a clone of the collator used internally
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public RuleBasedCollator getCollator() {
+ if (collatorExternal == null) {
+ try {
+ collatorExternal = (RuleBasedCollator) (collatorOriginal.clone());
+ } catch (Exception e) {
+ // should never happen
+ throw new IllegalStateException("Collator cannot be cloned", e);
+ }
+ }
+ return collatorExternal;
+ }
+
+ /**
+ * Add a record (name and data) to the index. The name will be used to sort the items into buckets, and to sort
+ * within the bucket. Two records may have the same name. When they do, the sort order is according to the order added:
+ * the first added comes first.
+ *
+ * @param name
+ * Name, such as a name
+ * @param data
+ * Data, such as an address or link
+ * @return this, for chaining
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public AlphabeticIndex addRecord(CharSequence name, V data) {
+ // TODO instead of invalidating, just add to unprocessed list.
+ buckets = null; // invalidate old bucketlist
+ inputList.add(new Record(name, data, inputList.size()));
+ return this;
+ }
+
+ /**
+ * Get the bucket number for the given name. This routine permits callers to implement their own bucket handling
+ * mechanisms, including client-server handling. For example, when a new name is created on the client, it can ask
+ * the server for the bucket for that name, and the sortkey (using getCollator). Once the client has that
+ * information, it can put the name into the right bucket, and sort it within that bucket, without having access to
+ * the index or collator.
+ *
+ * Note that the bucket number (and sort key) are only valid for the settings of the current AlphabeticIndex; if
+ * those are changed, then the bucket number and sort key must be regenerated.
+ *
+ * @param name
+ * Name, such as a name
+ * @return this, for chaining
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int getBucketIndex(CharSequence name) {
+ if (buckets == null) {
+ initBuckets();
+ }
+ if (langType == LangType.SIMPLIFIED) {
+ String hackPrefix = hackName(name, collatorPrimaryOnly);
+ if (hackPrefix != null) {
+ name = hackPrefix + name;
+ }
+ }
+ return rawGetBucketIndex(name);
+ }
+
+ private int rawGetBucketIndex(CharSequence name) {
+ // TODO use a binary search
+ int result = -1;
+ for (Bucket bucket : buckets) {
+ if (bucket.lowerBoundary == null) { // last bucket
+ return result;
+ }
+ int comp = collatorPrimaryOnly.compare(name, bucket.lowerBoundary);
+ if (comp < 0) { // the first boundary is always "", and so -1 will never be returned
+ return result;
+ } else if (comp == 0) {
+ return result + 1;
+ }
+ result++;
+ }
+ return result;
+ }
+
+ /**
+ * Clear the index.
+ *
+ * @return this, for chaining
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public AlphabeticIndex clearRecords() {
+ buckets = null;
+ inputList.clear();
+ return this;
+ }
+
+ /**
+ * Return the number of buckets in the index. This will be the same as the number of labels, plus buckets for the underflow, overflow, and inflow(s).
+ *
+ * @return number of buckets
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int getBucketCount() {
+ if (buckets == null) {
+ initBuckets();
+ }
+ return buckets.bucketList.size();
+ }
+
+ /**
+ * Return the number of records in the index: that is, the total number of distinct pairs added with addRecord(...), over all the buckets.
+ *
+ * @return total number of records in buckets
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int getRecordCount() {
+ return inputList.size();
+ }
+
+ /**
+ * Return an iterator over the buckets.
+ *
+ * @return iterator over buckets.
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Iterator> iterator() {
+ if (buckets == null) {
+ initBuckets();
+ }
+ return buckets.iterator();
+ }
+
+ /**
+ * Convenience routine to bucket a list of input strings according to the index.
+ * Warning: if a UI suppresses buckets that are empty, this may result in the special buckets (underflow, overflow,
+ * inflow) being adjacent. In that case, the application may want to combine them.
+ *
+ * @param inputList
+ * List of strings to be sorted and bucketed according to the labels.
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ private void initBuckets() {
+ buckets = new BucketList();
+
+ // Make a collator for records. Do this so that the Records can be static classes, and not know about the collators.
+ // TODO make this a member of the class.
+ Comparator> fullComparator = new Comparator>() {
+ public int compare(Record o1, Record o2) {
+ int result = collatorOriginal.compare(o1.name, o2.name);
+ if (result != 0) {
+ return result;
+ }
+ return o1.counter - o2.counter;
+ }
+ };
+
+ // If we have Pinyin, then we have a special hack to bucket items with ASCII.
+ if (langType == LangType.SIMPLIFIED) {
+ Map> rebucketMap = new HashMap>();
+ for (Record name : inputList) {
+ String key = hackName(name.name, collatorOriginal);
+ if (key == null) continue;
+ Bucket bucket = rebucketMap.get(key);
+ if (bucket == null) {
+ int index = rawGetBucketIndex(key);
+ bucket = buckets.bucketList.get(index);
+ }
+ rebucketMap.put(key, bucket);
+ name.rebucket = bucket;
+ }
+ }
+
+ // Set up a sorted list of the input
+ TreeSet> sortedInput = new TreeSet>(fullComparator);
+ sortedInput.addAll(inputList);
+
+ // Now, we traverse all of the input, which is now sorted.
+ // If the item doesn't go in the current bucket, we find the next bucket that contains it.
+ // This makes the process order n*log(n), since we just sort the list and then do a linear process.
+ // However, if the user adds item at a time and then gets the buckets, this isn't efficient, so
+ // we need to improve it for that case.
+
+ Iterator> bucketIterator = buckets.iterator();
+ Bucket currentBucket = bucketIterator.next();
+ Bucket nextBucket = bucketIterator.next();
+ String upperBoundary = nextBucket.lowerBoundary; // there is always at least one bucket, so this is safe
+ boolean atEnd = false;
+ for (Record s : sortedInput) {
+ // special hack for pinyin
+ if (s.rebucket != null) {
+ s.rebucket.records.add(s);
+ continue;
+ }
+ // if the current bucket isn't the right one, find the one that is
+ // We have a special flag for the last bucket so that we don't look any further
+ while (!atEnd && collatorPrimaryOnly.compare(s.name, upperBoundary) >= 0) {
+ currentBucket = nextBucket;
+ // now reset the boundary that we compare against
+ if (bucketIterator.hasNext()) {
+ nextBucket = bucketIterator.next();
+ upperBoundary = nextBucket.lowerBoundary;
+ if (upperBoundary == null) {
+ atEnd = true;
+ }
+ } else {
+ atEnd = true;
+ }
+ }
+ // now put the record into the bucket.
+ currentBucket.records.add(s);
+ }
+ }
+
+ /**
+ * Get the Unicode character (or tailored string) that defines an overflow bucket; that is anything greater than or
+ * equal to that string should go in that bucket, instead of with the last character. Normally that is the first
+ * character of the script after lowerLimit. Thus in X Y Z ... Devanagari-ka , the overflow character for Z
+ * would be the Greek-alpha .
+ *
+ * @param lowerLimit
+ * The character below the overflow (or inflow) bucket
+ * @return string that defines top of the overflow buck for lowerLimit, or null if there is none
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public String getOverflowComparisonString(String lowerLimit) {
+ // TODO Use collator method instead of this hack
+ for (String s : HACK_FIRST_CHARS_IN_SCRIPTS) {
+ if (collatorPrimaryOnly.compare(s, lowerLimit) > 0) {
+ return s;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Return a list of the first character in each script, in collation order. Only exposed for testing.
+ *
+ * @return list of first characters in each script
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public List getFirstScriptCharacters() {
+ return HACK_FIRST_CHARS_IN_SCRIPTS;
+ }
+
+ /**
+ * As the index is built, strings may be discarded from the exemplars. This contains some of the discards, and is
+ * intended for debugging.
+ *
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public Map> getAlreadyIn() {
+ return alreadyIn;
+ }
+
+ /**
+ * As the index is built, strings may be discarded from the exemplars. This contains some of the discards, and is
+ * intended for debugging.
+ *
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public List getNoDistinctSorting() {
+ return noDistinctSorting;
+ }
+
+ /**
+ * As the index is built, strings may be discarded from the exemplars. This contains some of the discards, and is
+ * intended for debugging.
+ *
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public List getNotAlphabetic() {
+ return notAlphabetic;
+ }
+
+ private static UnicodeSet getScriptSet(String codePoint) {
+ return new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, UScript.getScript(codePoint.codePointAt(0)));
+ }
+
+ private static final UnicodeSet IGNORE_SCRIPTS = new UnicodeSet(
+ "[[:sc=Common:][:sc=inherited:][:script=Unknown:][:script=braille:]]").freeze();
+
+ private static final PreferenceComparator PREFERENCE_COMPARATOR = new PreferenceComparator();
+ private int maxLabelCount = 99;
+
+ /**
+ * Comparator that returns "better" strings first, where shorter NFKD is better, and otherwise NFKD binary order is
+ * better, and otherwise binary order is better.
+ */
+ private static class PreferenceComparator implements Comparator {
+ static final Comparator binary = new UTF16.StringComparator(true, false, 0);
+
+ public int compare(Object o1, Object o2) {
+ return compare((String) o1, (String) o2);
+ }
+
+ public int compare(String s1, String s2) {
+ if (s1 == s2) {
+ return 0;
+ }
+ String n1 = Normalizer.decompose(s1, true);
+ String n2 = Normalizer.decompose(s2, true);
+ int result = n1.length() - n2.length();
+ if (result != 0) {
+ return result;
+ }
+ result = binary.compare(n1, n2);
+ if (result != 0) {
+ return result;
+ }
+ return binary.compare(s1, s2);
+ }
+ }
+
+ /**
+ * A record to be sorted into buckets with getIndexBucketCharacters.
+ *
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static class Record {
+ private Bucket rebucket = null; // special hack for Pinyin
+ private CharSequence name;
+ private V data;
+ private int counter;
+
+ private Record(CharSequence name, V data, int counter) {
+ this.name = name;
+ this.data = data;
+ this.counter = counter;
+ }
+
+ /**
+ * Get the name
+ *
+ * @return the name
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public CharSequence getName() {
+ return name;
+ }
+
+ /**
+ * Get the data
+ *
+ * @return the data
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public V getData() {
+ return data;
+ }
+
+ /**
+ * Standard toString()
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String toString() {
+ return name + "=" + data + (rebucket == null ? "" : "{" + rebucket.label + "}");
+ }
+ }
+
+ /**
+ * A "bucket", containing records sorted under an index string by getIndexBucketCharacters. Is created by the
+ * addBucket method in BucketList. A typical implementation will provide methods getLabel(), getSpecial(), and
+ * getValues().
+ * See com.ibm.icu.dev.test.collator.IndexCharactersTest for an example.
+ *
+ * @param
+ * Data type
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static class Bucket implements Iterable> {
+ private final String label;
+ private final String lowerBoundary;
+ private final LabelType labelType;
+ private final List> records = new ArrayList>();
+
+ /**
+ * Type of the label
+ *
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public enum LabelType {
+ NORMAL, UNDERFLOW, INFLOW, OVERFLOW
+ }
+
+ /**
+ * Set up the bucket.
+ *
+ * @param label
+ * label for the bucket
+ * @param labelType
+ * is an underflow, overflow, or inflow bucket
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ private Bucket(String label, String lowerBoundary, LabelType labelType) {
+ this.label = label;
+ this.lowerBoundary = lowerBoundary;
+ this.labelType = labelType;
+ }
+
+ /**
+ * Get the label
+ *
+ * @return label for the bucket
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getLabel() {
+ return label;
+ }
+
+ /**
+ * Is a normal, underflow, overflow, or inflow bucket
+ *
+ * @return is an underflow, overflow, or inflow bucket
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public LabelType getLabelType() {
+ return labelType;
+ }
+
+ /**
+ * Get the number of records in the bucket.
+ *
+ * @return number of records in bucket
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int size() {
+ return records.size();
+ }
+
+ /**
+ * Iterator over the records in the bucket
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Iterator> iterator() {
+ return records.iterator();
+ }
+
+ /**
+ * Standard toString()
+ * @draft ICU 4.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public String toString() {
+ return "{" +
+ "labelType=" + labelType
+ + ", " +
+ "lowerBoundary=" + lowerBoundary
+ + ", " +
+ "label=" + label
+ + "}"
+ ;
+ }
+ }
+
+ private class BucketList implements Iterable> {
+ private ArrayList> bucketList = new ArrayList>();
+
+ BucketList() {
+ // initialize indexCharacters;
+ List indexCharacters = initLabels();
+
+ // underflow bucket
+ bucketList.add(new Bucket(getUnderflowLabel(), "", Bucket.LabelType.UNDERFLOW));
+
+ // fix up the list, adding underflow, additions, overflow
+ // insert infix labels as needed, using \uFFFF.
+ String last = indexCharacters.get(0);
+ bucketList.add(new Bucket(last, last, Bucket.LabelType.NORMAL));
+ UnicodeSet lastSet = getScriptSet(last).removeAll(IGNORE_SCRIPTS);
+
+ for (int i = 1; i < indexCharacters.size(); ++i) {
+ String current = indexCharacters.get(i);
+ UnicodeSet set = getScriptSet(current).removeAll(IGNORE_SCRIPTS);
+ if (lastSet.containsNone(set)) {
+ // check for adjacent
+ String overflowComparisonString = getOverflowComparisonString(last);
+ if (collatorPrimaryOnly.compare(overflowComparisonString, current) < 0) {
+ bucketList.add(new Bucket(getInflowLabel(), overflowComparisonString,
+ Bucket.LabelType.INFLOW));
+ i++;
+ lastSet = set;
+ }
+ }
+ bucketList.add(new Bucket(current, current, Bucket.LabelType.NORMAL));
+ last = current;
+ lastSet = set;
+ }
+ // overflow bucket
+ String limitString = getOverflowComparisonString(last);
+ bucketList.add(new Bucket(getOverflowLabel(), limitString, Bucket.LabelType.OVERFLOW)); // final,
+
+ }
+
+ public Iterator> iterator() {
+ return bucketList.iterator();
+ }
+ }
+
+ /*
+ * HACKS
+ */
+
+ /**
+ * Only gets called for simplified Chinese. Uses further hack to distinguish long from short pinyin table.
+ */
+ private String hackName(CharSequence name, RuleBasedCollator comparator) {
+ if (!UNIHAN.contains(Character.codePointAt(name, 0))) {
+ return null;
+ }
+ synchronized (PINYIN_LOWER_BOUNDS_LONG) {
+ if (PINYIN_LOWER_BOUNDS == null) {
+ if (comparator.getTailoredSet().contains(probeCharInLong)) {
+ PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_LONG;
+ HACK_PINYIN_LOOKUP = HACK_PINYIN_LOOKUP_LONG;
+ } else {
+ PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_SHORT;
+ HACK_PINYIN_LOOKUP = HACK_PINYIN_LOOKUP_SHORT;
+ }
+ }
+ }
+ int index = Arrays.binarySearch(HACK_PINYIN_LOOKUP, name, comparator);
+ if (index < 0) {
+ index = -index - 2;
+ }
+ return PINYIN_LOWER_BOUNDS.substring(index, index + 1);
+ }
+
+ private static String PINYIN_LOWER_BOUNDS;
+
+ private static String[] HACK_PINYIN_LOOKUP;
+
+
+ /**
+ * HACKS
+ * Generated with org.unicode.draft.GenerateUnihanCollator.
+ */
+
+ private int probeCharInLong = 0x28EAD;
+
+ private static String PINYIN_LOWER_BOUNDS_LONG = "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz";
+
+ private static String[] HACK_PINYIN_LOOKUP_LONG = {
+ "", // A
+ "\u516B", // b : \u516B [b\u0101]
+ "\uD863\uDEAD", // c : \U00028EAD [c\u0101]
+ "\uD844\uDE51", // d : \U00021251 [d\u0101]
+ "\u59B8", // e : \u59B8 [\u0113]
+ "\u53D1", // f : \u53D1 [f\u0101]
+ "\uD844\uDE45", // g : \U00021245 [g\u0101]
+ "\u54C8", // h : \u54C8 [h\u0101]
+ "\u4E0C", // j : \u4E0C [j\u012B]
+ "\u5494", // k : \u5494 [k\u0101]
+ "\u3547", // l : \u3547 [l\u0101]
+ "\u5452", // m : \u5452 [\u1E3F]
+ "\u5514", // n : \u5514 [\u0144]
+ "\u5594", // o : \u5594 [\u014D]
+ "\uD84F\uDC7A", // p : \U00023C7A [p\u0101]
+ "\u4E03", // q : \u4E03 [q\u012B]
+ "\u513F", // r : \u513F [r]
+ "\u4EE8", // s : \u4EE8 [s\u0101]
+ "\u4ED6", // t : \u4ED6 [t\u0101]
+ "\u7A75", // w : \u7A75 [w\u0101]
+ "\u5915", // x : \u5915 [x\u012B]
+ "\u4E2B", // y : \u4E2B [y\u0101]
+ "\u5E00", // z : \u5E00 [z\u0101]
+ };
+
+ private static String PINYIN_LOWER_BOUNDS_SHORT = "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz";
+
+ private static String[] HACK_PINYIN_LOOKUP_SHORT = {
+ "", // A
+ "\u516B", // b : \u516B [b\u0101]
+ "\u5693", // c : \u5693 [c\u0101]
+ "\u5491", // d : \u5491 [d\u0101]
+ "\u59B8", // e : \u59B8 [\u0113]
+ "\u53D1", // f : \u53D1 [f\u0101]
+ "\u65EE", // g : \u65EE [g\u0101]
+ "\u54C8", // h : \u54C8 [h\u0101]
+ "\u4E0C", // j : \u4E0C [j\u012B]
+ "\u5494", // k : \u5494 [k\u0101]
+ "\u3547", // l : \u3547 [l\u0101]
+ "\u5452", // m : \u5452 [\u1E3F]
+ "\u5514", // n : \u5514 [\u0144]
+ "\u5594", // o : \u5594 [\u014D]
+ "\u5991", // p : \u5991 [p\u0101]
+ "\u4E03", // q : \u4E03 [q\u012B]
+ "\u513F", // r : \u513F [r]
+ "\u4EE8", // s : \u4EE8 [s\u0101]
+ "\u4ED6", // t : \u4ED6 [t\u0101]
+ "\u7A75", // w : \u7A75 [w\u0101]
+ "\u5915", // x : \u5915 [x\u012B]
+ "\u4E2B", // y : \u4E2B [y\u0101]
+ "\u5E00", // z : \u5E00 [z\u0101]
+ };
+
+ /**
+ * HACKS
+ */
+ private static final List HACK_FIRST_CHARS_IN_SCRIPTS =
+ Arrays.asList(new String[] {
+ "a", "\u03B1", "\u2C81", "\u0430", "\u2C30", "\u10D0", "\u0561", "\u05D0", "\uD802\uDD00", "\u0800", "\u0621",
+ "\u0710", // Syriac
+ "\u0840", // Mandaic
+ "\u0780", "\u07CA", "\u2D30", "\u1200", "\u0950", "\u0985", "\u0A74", "\u0AD0", "\u0B05", "\u0BD0",
+ "\u0C05", "\u0C85", "\u0D05", "\u0D85", "\uABC0", "\uA800", "\uA882", "\uD804\uDC83",
+ "\u1B83", // Sundanese
+ "\uD804\uDC05", // Brahmi (U+11005)
+ "\uD802\uDE00", "\u0E01", "\u0E81", "\uAA80", "\u0F40", "\u1C00", "\uA840", "\u1900", "\u1700", "\u1720", "\u1740", "\u1760",
+ "\u1A00", // Buginese
+ "\u1BC0", // Batak
+ "\uA930", "\uA90A", "\u1000", "\u1780", "\u1950", "\u1980", "\u1A20", "\uAA00", "\u1B05", "\uA984", "\u1880", "\u1C5A", "\u13A0", "\u1401", "\u1681", "\u16A0", "\uD803\uDC00", "\uA500", "\uA6A0", "\u1100",
+ "\u3041", "\u30A1", "\u3105", "\uA000", "\uA4F8", "\uD800\uDE80", "\uD800\uDEA0", "\uD802\uDD20", "\uD800\uDF00", "\uD800\uDF30", "\uD801\uDC28", "\uD801\uDC50", "\uD801\uDC80", "\uD800\uDC00", "\uD802\uDC00", "\uD802\uDE60", "\uD802\uDF00", "\uD802\uDC40",
+ "\uD802\uDF40", "\uD802\uDF60", "\uD800\uDF80", "\uD800\uDFA0", "\uD808\uDC00", "\uD80C\uDC00", "\u4E00"
+ });
+
+ /**
+ * Only for testing...
+ * @internal
+ * @deprecated only for internal testing
+ */
+ public static List getFirstCharactersInScripts() {
+ return HACK_FIRST_CHARS_IN_SCRIPTS;
+ }
+}
diff --git a/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java b/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java
new file mode 100644
index 00000000000..4cde4dc02ea
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/CollationElementIterator.java
@@ -0,0 +1,2803 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2011, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+/***
+ * import java.text.StringCharacterIterator;
+ * import java.text.CharacterIterator;
+ */
+import java.text.CharacterIterator;
+import java.util.MissingResourceException;
+
+import com.ibm.icu.impl.CharacterIteratorWrapper;
+import com.ibm.icu.impl.ICUDebug;
+import com.ibm.icu.impl.Norm2AllModes;
+import com.ibm.icu.impl.Normalizer2Impl;
+import com.ibm.icu.impl.StringUCharacterIterator;
+import com.ibm.icu.impl.UCharacterProperty;
+import com.ibm.icu.lang.UCharacter;
+
+/**
+ * CollationElementIterator
is an iterator created by
+ * a RuleBasedCollator to walk through a string. The return result of
+ * each iteration is a 32-bit collation element that defines the
+ * ordering priority of the next character or sequence of characters
+ * in the source string.
+ *
+ * For illustration, consider the following in Spanish:
+ *
+ *
+ * "ca" -> the first collation element is collation_element('c') and second
+ * collation element is collation_element('a').
+ *
+ * Since "ch" in Spanish sorts as one entity, the below example returns one
+ * collation element for the two characters 'c' and 'h'
+ *
+ * "cha" -> the first collation element is collation_element('ch') and second
+ * collation element is collation_element('a').
+ *
+ *
+ * And in German,
+ *
+ *
+ * Since the character 'æ' is a composed character of 'a' and 'e', the
+ * iterator returns two collation elements for the single character 'æ'
+ *
+ * "æb" -> the first collation element is collation_element('a'), the
+ * second collation element is collation_element('e'), and the
+ * third collation element is collation_element('b').
+ *
+ *
+ *
+ *
+ * For collation ordering comparison, the collation element results
+ * can not be compared simply by using basic arithmetric operators,
+ * e.g. <, == or >, further processing has to be done. Details
+ * can be found in the ICU
+ *
+ * user guide . An example of using the CollationElementIterator
+ * for collation ordering comparison is the class
+ * com.ibm.icu.text.StringSearch .
+ *
+ * To construct a CollationElementIterator object, users
+ * call the method getCollationElementIterator() on a
+ * RuleBasedCollator that defines the desired sorting order.
+ *
+ * Example:
+ *
+ *
+ * String testString = "This is a test";
+ * RuleBasedCollator rbc = new RuleBasedCollator("&a<b");
+ * CollationElementIterator iterator = rbc.getCollationElementIterator(testString);
+ * int primaryOrder = iterator.IGNORABLE;
+ * while (primaryOrder != iterator.NULLORDER) {
+ * int order = iterator.next();
+ * if (order != iterator.IGNORABLE &&
+ * order != iterator.NULLORDER) {
+ * // order is valid, not ignorable and we have not passed the end
+ * // of the iteration, we do something
+ * primaryOrder = CollationElementIterator.primaryOrder(order);
+ * System.out.println("Next primary order 0x" +
+ * Integer.toHexString(primaryOrder));
+ * }
+ * }
+ *
+ *
+ *
+ *
+ * This class is not subclassable
+ *
+ * @see Collator
+ * @see RuleBasedCollator
+ * @see StringSearch
+ * @author Syn Wee Quek
+ * @stable ICU 2.8
+ */
+public final class CollationElementIterator
+{
+
+
+ // public data members --------------------------------------------------
+
+ /**
+ * This constant is returned by the iterator in the methods
+ * next() and previous() when the end or the beginning of the
+ * source string has been reached, and there are no more valid
+ * collation elements to return.
+ *
+ * See class documentation for an example of use.
+ * @stable ICU 2.8
+ * @see #next
+ * @see #previous */
+ public final static int NULLORDER = 0xffffffff;
+
+ /**
+ * This constant is returned by the iterator in the methods
+ * next() and previous() when a collation element result is to be
+ * ignored.
+ *
+ * See class documentation for an example of use.
+ * @stable ICU 2.8
+ * @see #next
+ * @see #previous */
+ public static final int IGNORABLE = 0;
+
+ // public methods -------------------------------------------------------
+
+ // public getters -------------------------------------------------------
+
+ /**
+ * Returns the character offset in the source string
+ * corresponding to the next collation element. I.e., getOffset()
+ * returns the position in the source string corresponding to the
+ * collation element that will be returned by the next call to
+ * next(). This value could be any of:
+ *
+ * The index of the first character corresponding to
+ * the next collation element. (This means that if
+ * setOffset(offset)
sets the index in the middle of
+ * a contraction, getOffset()
returns the index of
+ * the first character in the contraction, which may not be equal
+ * to the original offset that was set. Hence calling getOffset()
+ * immediately after setOffset(offset) does not guarantee that the
+ * original offset set will be returned.)
+ * If normalization is on, the index of the immediate
+ * subsequent character, or composite character with the first
+ * character, having a combining class of 0.
+ * The length of the source string, if iteration has reached
+ * the end.
+ *
+ *
+ * @return The character offset in the source string corresponding to the
+ * collation element that will be returned by the next call to
+ * next().
+ * @stable ICU 2.8
+ */
+ public int getOffset()
+ {
+ if (m_bufferOffset_ != -1) {
+ if (m_isForwards_) {
+ return m_FCDLimit_;
+ }
+ return m_FCDStart_;
+ }
+ return m_source_.getIndex();
+ }
+
+
+ /**
+ * Returns the maximum length of any expansion sequence that ends with
+ * the specified collation element. If there is no expansion with this
+ * collation element as the last element, returns 1.
+ *
+ * @param ce a collation element returned by previous() or next().
+ * @return the maximum length of any expansion sequence ending
+ * with the specified collation element.
+ * @stable ICU 2.8
+ */
+ public int getMaxExpansion(int ce)
+ {
+ int start = 0;
+ int limit = m_collator_.m_expansionEndCE_.length;
+ long unsignedce = ce & 0xFFFFFFFFl;
+ while (start < limit - 1) {
+ int mid = start + ((limit - start) >> 1);
+ long midce = m_collator_.m_expansionEndCE_[mid] & 0xFFFFFFFFl;
+ if (unsignedce <= midce) {
+ limit = mid;
+ }
+ else {
+ start = mid;
+ }
+ }
+ int result = 1;
+ if (m_collator_.m_expansionEndCE_[start] == ce) {
+ result = m_collator_.m_expansionEndCEMaxSize_[start];
+ }
+ else if (limit < m_collator_.m_expansionEndCE_.length &&
+ m_collator_.m_expansionEndCE_[limit] == ce) {
+ result = m_collator_.m_expansionEndCEMaxSize_[limit];
+ }
+ else if ((ce & 0xFFFF) == 0x00C0) {
+ result = 2;
+ }
+ return result;
+ }
+
+ // public other methods -------------------------------------------------
+
+ /**
+ * Resets the cursor to the beginning of the string. The next
+ * call to next() or previous() will return the first and last
+ * collation element in the string, respectively.
+ *
+ * If the RuleBasedCollator used by this iterator has had its
+ * attributes changed, calling reset() will reinitialize the
+ * iterator to use the new attributes.
+ *
+ * @stable ICU 2.8
+ */
+ public void reset()
+ {
+ m_source_.setToStart();
+ updateInternalState();
+ }
+
+ /**
+ * Get the next collation element in the source string.
+ *
+ * This iterator iterates over a sequence of collation elements
+ * that were built from the string. Because there isn't
+ * necessarily a one-to-one mapping from characters to collation
+ * elements, this doesn't mean the same thing as "return the
+ * collation element [or ordering priority] of the next character
+ * in the string".
+ *
+ * This function returns the collation element that the
+ * iterator is currently pointing to, and then updates the
+ * internal pointer to point to the next element. Previous()
+ * updates the pointer first, and then returns the element. This
+ * means that when you change direction while iterating (i.e.,
+ * call next() and then call previous(), or call previous() and
+ * then call next()), you'll get back the same element twice.
+ *
+ * @return the next collation element or NULLORDER if the end of the
+ * iteration has been reached.
+ * @stable ICU 2.8
+ */
+ public int next()
+ {
+ m_isForwards_ = true;
+ if (m_CEBufferSize_ > 0) {
+ if (m_CEBufferOffset_ < m_CEBufferSize_) {
+ // if there are expansions left in the buffer, we return it
+ return m_CEBuffer_[m_CEBufferOffset_ ++];
+ }
+ m_CEBufferSize_ = 0;
+ m_CEBufferOffset_ = 0;
+ }
+
+ int result = NULLORDER;
+ char ch = 0;
+ do {
+ int ch_int = nextChar();
+ if (ch_int == UCharacterIterator.DONE) {
+ return NULLORDER;
+ }
+ ch = (char)ch_int;
+ if (m_collator_.m_isHiragana4_) {
+ /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
+ * based on whether the previous codepoint was Hiragana or Katakana.
+ */
+ m_isCodePointHiragana_ = (m_isCodePointHiragana_ && (ch >= 0x3099 && ch <= 0x309C)) ||
+ ((ch >= 0x3040 && ch <= 0x309e) && !(ch > 0x3094 && ch < 0x309d));
+ }
+
+ if (ch <= 0xFF) {
+ // For latin-1 characters we never need to fall back to the UCA
+ // table because all of the UCA data is replicated in the
+ // latinOneMapping array.
+ // Except: Special CEs can result in CE_NOT_FOUND_,
+ // for example if the default entry for a prefix-special is "not found",
+ // and we do need to fall back to the UCA in such a case.
+ // TODO: It would be better if tailoring specials never resulted in "not found"
+ // unless the corresponding UCA result is also "not found".
+ // That would require a change in the ICU4J collator-from-rule builder.
+ result = m_collator_.m_trie_.getLatin1LinearValue(ch);
+ } else {
+ result = m_collator_.m_trie_.getLeadValue(ch);
+ }
+ if (!RuleBasedCollator.isSpecial(result)) {
+ return result;
+ }
+ if (result != CE_NOT_FOUND_) {
+ result = nextSpecial(m_collator_, result, ch);
+ }
+ if (result == CE_NOT_FOUND_) {
+ // couldn't find a good CE in the tailoring
+ if (RuleBasedCollator.UCA_ != null) {
+ result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch);
+ if (RuleBasedCollator.isSpecial(result)) {
+ // UCA also gives us a special CE
+ result = nextSpecial(RuleBasedCollator.UCA_, result, ch);
+ }
+ }
+ if(result == CE_NOT_FOUND_) {
+ // maybe there is no UCA, unlikely in Java, but ported for consistency
+ result = nextImplicit(ch);
+ }
+ }
+ } while (result == IGNORABLE && ch >= 0xAC00 && ch <= 0xD7AF);
+
+ return result;
+ }
+
+ /**
+ * Get the previous collation element in the source string.
+ *
+ * This iterator iterates over a sequence of collation elements
+ * that were built from the string. Because there isn't
+ * necessarily a one-to-one mapping from characters to collation
+ * elements, this doesn't mean the same thing as "return the
+ * collation element [or ordering priority] of the previous
+ * character in the string".
+ *
+ * This function updates the iterator's internal pointer to
+ * point to the collation element preceding the one it's currently
+ * pointing to and then returns that element, while next() returns
+ * the current element and then updates the pointer. This means
+ * that when you change direction while iterating (i.e., call
+ * next() and then call previous(), or call previous() and then
+ * call next()), you'll get back the same element twice.
+ *
+ * @return the previous collation element, or NULLORDER when the start of
+ * the iteration has been reached.
+ * @stable ICU 2.8
+ */
+ public int previous()
+ {
+ if (m_source_.getIndex() <= 0 && m_isForwards_) {
+ // if iterator is new or reset, we can immediate perform backwards
+ // iteration even when the offset is not right.
+ m_source_.setToLimit();
+ updateInternalState();
+ }
+ m_isForwards_ = false;
+ if (m_CEBufferSize_ > 0) {
+ if (m_CEBufferOffset_ > 0) {
+ return m_CEBuffer_[-- m_CEBufferOffset_];
+ }
+ m_CEBufferSize_ = 0;
+ m_CEBufferOffset_ = 0;
+ }
+
+ int result = NULLORDER;
+ char ch = 0;
+ do {
+ int ch_int = previousChar();
+ if (ch_int == UCharacterIterator.DONE) {
+ return NULLORDER;
+ }
+ ch = (char)ch_int;
+ if (m_collator_.m_isHiragana4_) {
+ m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f);
+ }
+ if (m_collator_.isContractionEnd(ch) && !isBackwardsStart()) {
+ result = previousSpecial(m_collator_, CE_CONTRACTION_, ch);
+ }
+ else {
+ if (ch <= 0xFF) {
+ result = m_collator_.m_trie_.getLatin1LinearValue(ch);
+ }
+ else {
+ result = m_collator_.m_trie_.getLeadValue(ch);
+ }
+ if (RuleBasedCollator.isSpecial(result)) {
+ result = previousSpecial(m_collator_, result, ch);
+ }
+ if (result == CE_NOT_FOUND_) {
+ if (!isBackwardsStart()
+ && m_collator_.isContractionEnd(ch)) {
+ result = CE_CONTRACTION_;
+ }
+ else {
+ if(RuleBasedCollator.UCA_ != null) {
+ result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch);
+ }
+ }
+
+ if (RuleBasedCollator.isSpecial(result)) {
+ if(RuleBasedCollator.UCA_ != null) {
+ result = previousSpecial(RuleBasedCollator.UCA_, result, ch);
+ }
+ }
+ }
+ }
+ } while (result == IGNORABLE && ch >= 0xAC00 && ch <= 0xD7AF);
+ if(result == CE_NOT_FOUND_) {
+ result = previousImplicit(ch);
+ }
+ return result;
+ }
+
+ /**
+ * Return the primary order of the specified collation element,
+ * i.e. the first 16 bits. This value is unsigned.
+ * @param ce the collation element
+ * @return the element's 16 bits primary order.
+ * @stable ICU 2.8
+ */
+ public final static int primaryOrder(int ce)
+ {
+ return (ce & RuleBasedCollator.CE_PRIMARY_MASK_)
+ >>> RuleBasedCollator.CE_PRIMARY_SHIFT_;
+ }
+ /**
+ * Return the secondary order of the specified collation element,
+ * i.e. the 16th to 23th bits, inclusive. This value is unsigned.
+ * @param ce the collation element
+ * @return the element's 8 bits secondary order
+ * @stable ICU 2.8
+ */
+ public final static int secondaryOrder(int ce)
+ {
+ return (ce & RuleBasedCollator.CE_SECONDARY_MASK_)
+ >> RuleBasedCollator.CE_SECONDARY_SHIFT_;
+ }
+
+ /**
+ * Return the tertiary order of the specified collation element, i.e. the last
+ * 8 bits. This value is unsigned.
+ * @param ce the collation element
+ * @return the element's 8 bits tertiary order
+ * @stable ICU 2.8
+ */
+ public final static int tertiaryOrder(int ce)
+ {
+ return ce & RuleBasedCollator.CE_TERTIARY_MASK_;
+ }
+
+ /**
+ * Sets the iterator to point to the collation element
+ * corresponding to the character at the specified offset. The
+ * value returned by the next call to next() will be the collation
+ * element corresponding to the characters at offset.
+ *
+ * If offset is in the middle of a contracting character
+ * sequence, the iterator is adjusted to the start of the
+ * contracting sequence. This means that getOffset() is not
+ * guaranteed to return the same value set by this method.
+ *
+ * If the decomposition mode is on, and offset is in the middle
+ * of a decomposible range of source text, the iterator may not
+ * return a correct result for the next forwards or backwards
+ * iteration. The user must ensure that the offset is not in the
+ * middle of a decomposible range.
+ *
+ * @param offset the character offset into the original source string to
+ * set. Note that this is not an offset into the corresponding
+ * sequence of collation elements.
+ * @stable ICU 2.8
+ */
+ public void setOffset(int offset)
+ {
+ m_source_.setIndex(offset);
+ int ch_int = m_source_.current();
+ char ch = (char)ch_int;
+ if (ch_int != UCharacterIterator.DONE && m_collator_.isUnsafe(ch)) {
+ // if it is unsafe we need to check if it is part of a contraction
+ // or a surrogate character
+ if (UTF16.isTrailSurrogate(ch)) {
+ // if it is a surrogate pair we move up one character
+ char prevch = (char)m_source_.previous();
+ if (!UTF16.isLeadSurrogate(prevch)) {
+ m_source_.setIndex(offset); // go back to the same index
+ }
+ }
+ else {
+ // could be part of a contraction
+ // backup to a safe point and iterate till we pass offset
+ while (m_source_.getIndex() > 0) {
+ if (!m_collator_.isUnsafe(ch)) {
+ break;
+ }
+ ch = (char)m_source_.previous();
+ }
+ updateInternalState();
+ int prevoffset = 0;
+ while (m_source_.getIndex() <= offset) {
+ prevoffset = m_source_.getIndex();
+ next();
+ }
+ m_source_.setIndex(prevoffset);
+ }
+ }
+ updateInternalState();
+ // direction code to prevent next and previous from returning a
+ // character if we are already at the ends
+ offset = m_source_.getIndex();
+ if (offset == 0/* m_source_.getBeginIndex() */) {
+ // preventing previous() from returning characters from the end of
+ // the string again if we are at the beginning
+ m_isForwards_ = false;
+ }
+ else if (offset == m_source_.getLength()) {
+ // preventing next() from returning characters from the start of
+ // the string again if we are at the end
+ m_isForwards_ = true;
+ }
+ }
+
+ /**
+ * Set a new source string for iteration, and reset the offset
+ * to the beginning of the text.
+ *
+ * @param source the new source string for iteration.
+ * @stable ICU 2.8
+ */
+ public void setText(String source)
+ {
+ m_srcUtilIter_.setText(source);
+ m_source_ = m_srcUtilIter_;
+ updateInternalState();
+ }
+
+ /**
+ * Set a new source string iterator for iteration, and reset the
+ * offset to the beginning of the text.
+ *
+ * The source iterator's integrity will be preserved since a new copy
+ * will be created for use.
+ * @param source the new source string iterator for iteration.
+ * @stable ICU 2.8
+ */
+ public void setText(UCharacterIterator source)
+ {
+ m_srcUtilIter_.setText(source.getText());
+ m_source_ = m_srcUtilIter_;
+ updateInternalState();
+ }
+
+ /**
+ * Set a new source string iterator for iteration, and reset the
+ * offset to the beginning of the text.
+ *
+ * @param source the new source string iterator for iteration.
+ * @stable ICU 2.8
+ */
+ public void setText(CharacterIterator source)
+ {
+ m_source_ = new CharacterIteratorWrapper(source);
+ m_source_.setToStart();
+ updateInternalState();
+ }
+
+ // public miscellaneous methods -----------------------------------------
+
+ /**
+ * Tests that argument object is equals to this CollationElementIterator.
+ * Iterators are equal if the objects uses the same RuleBasedCollator,
+ * the same source text and have the same current position in iteration.
+ * @param that object to test if it is equals to this
+ * CollationElementIterator
+ * @stable ICU 2.8
+ */
+ public boolean equals(Object that)
+ {
+ if (that == this) {
+ return true;
+ }
+ if (that instanceof CollationElementIterator) {
+ CollationElementIterator thatceiter
+ = (CollationElementIterator)that;
+ if (!m_collator_.equals(thatceiter.m_collator_)) {
+ return false;
+ }
+ // checks the text
+ return m_source_.getIndex() == thatceiter.m_source_.getIndex()
+ && m_source_.getText().equals(
+ thatceiter.m_source_.getText());
+ }
+ return false;
+ }
+
+ // package private constructors ------------------------------------------
+
+ private CollationElementIterator(RuleBasedCollator collator) {
+ m_utilStringBuffer_ = new StringBuilder();
+ m_collator_ = collator;
+ m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
+ m_buffer_ = new StringBuilder();
+ m_utilSpecialBackUp_ = new Backup();
+ if (collator.getDecomposition() != Collator.NO_DECOMPOSITION) {
+ m_nfcImpl_.getFCDTrie(); // ensure the FCD data is initialized
+ }
+ }
+
+ /**
+ * CollationElementIterator constructor. This takes a source
+ * string and a RuleBasedCollator. The iterator will walk through
+ * the source string based on the rules defined by the
+ * collator. If the source string is empty, NULLORDER will be
+ * returned on the first call to next().
+ *
+ * @param source the source string.
+ * @param collator the RuleBasedCollator
+ * @stable ICU 2.8
+ */
+ CollationElementIterator(String source, RuleBasedCollator collator)
+ {
+ this(collator);
+ m_source_ = m_srcUtilIter_ = new StringUCharacterIterator(source);
+ updateInternalState();
+ }
+
+ /**
+ * CollationElementIterator constructor. This takes a source
+ * character iterator and a RuleBasedCollator. The iterator will
+ * walk through the source string based on the rules defined by
+ * the collator. If the source string is empty, NULLORDER will be
+ * returned on the first call to next().
+ *
+ * @param source the source string iterator.
+ * @param collator the RuleBasedCollator
+ * @stable ICU 2.8
+ */
+ CollationElementIterator(CharacterIterator source,
+ RuleBasedCollator collator)
+ {
+ this(collator);
+ m_srcUtilIter_ = new StringUCharacterIterator();
+ m_source_ = new CharacterIteratorWrapper(source);
+ updateInternalState();
+ }
+
+ /**
+ * CollationElementIterator constructor. This takes a source
+ * character iterator and a RuleBasedCollator. The iterator will
+ * walk through the source string based on the rules defined by
+ * the collator. If the source string is empty, NULLORDER will be
+ * returned on the first call to next().
+ *
+ * @param source the source string iterator.
+ * @param collator the RuleBasedCollator
+ * @stable ICU 2.8
+ */
+ CollationElementIterator(UCharacterIterator source,
+ RuleBasedCollator collator)
+ {
+ this(collator);
+ m_srcUtilIter_ = new StringUCharacterIterator();
+ m_srcUtilIter_.setText(source.getText());
+ m_source_ = m_srcUtilIter_;
+ updateInternalState();
+ }
+
+ // package private data members -----------------------------------------
+
+ /**
+ * true if current codepoint was Hiragana
+ */
+ boolean m_isCodePointHiragana_;
+ /**
+ * Position in the original string that starts with a non-FCD sequence
+ */
+ int m_FCDStart_;
+ /**
+ * This is the CE from CEs buffer that should be returned.
+ * Initial value is 0.
+ * Forwards iteration will end with m_CEBufferOffset_ == m_CEBufferSize_,
+ * backwards will end with m_CEBufferOffset_ == 0.
+ * The next/previous after we reach the end/beginning of the m_CEBuffer_
+ * will cause this value to be reset to 0.
+ */
+ int m_CEBufferOffset_;
+
+ /**
+ * This is the position to which we have stored processed CEs.
+ * Initial value is 0.
+ * The next/previous after we reach the end/beginning of the m_CEBuffer_
+ * will cause this value to be reset to 0.
+ */
+ int m_CEBufferSize_;
+ static final int CE_NOT_FOUND_ = 0xF0000000;
+ static final int CE_EXPANSION_TAG_ = 1;
+ static final int CE_CONTRACTION_TAG_ = 2;
+ /**
+ * Collate Digits As Numbers (CODAN) implementation
+ */
+ static final int CE_DIGIT_TAG_ = 13;
+
+ // package private methods ----------------------------------------------
+
+ /**
+ * Sets the collator used.
+ * Internal use, all data members will be reset to the default values
+ * @param collator to set
+ */
+ void setCollator(RuleBasedCollator collator)
+ {
+ m_collator_ = collator;
+ updateInternalState();
+ }
+
+ /**
+ * Sets the iterator to point to the collation element corresponding to
+ * the specified character (the parameter is a CHARACTER offset in the
+ * original string, not an offset into its corresponding sequence of
+ * collation elements). The value returned by the next call to next()
+ * will be the collation element corresponding to the specified position
+ * in the text. Unlike the public method setOffset(int), this method does
+ * not try to readjust the offset to the start of a contracting sequence.
+ * getOffset() is guaranteed to return the same value as was passed to a
+ * preceding call to setOffset().
+ * @param offset new character offset into the original text to set.
+ */
+ void setExactOffset(int offset)
+ {
+ m_source_.setIndex(offset);
+ updateInternalState();
+ }
+
+ /**
+ * Checks if iterator is in the buffer zone
+ * @return true if iterator is in buffer zone, false otherwise
+ */
+ boolean isInBuffer()
+ {
+ return m_bufferOffset_ > 0;
+ }
+
+
+ /**
+ * Sets the iterator to point to the collation element corresponding to
+ * the specified character (the parameter is a CHARACTER offset in the
+ * original string, not an offset into its corresponding sequence of
+ * collation elements). The value returned by the next call to next()
+ * will be the collation element corresponding to the specified position
+ * in the text. Unlike the public method setOffset(int), this method does
+ * not try to readjust the offset to the start of a contracting sequence.
+ * getOffset() is guaranteed to return the same value as was passed to a
+ * preceding call to setOffset().
+ *
+ * @param source the new source string iterator for iteration.
+ * @param offset to the source
+ */
+ void setText(UCharacterIterator source, int offset)
+ {
+ m_srcUtilIter_.setText(source.getText());
+ m_source_ = m_srcUtilIter_;
+ m_source_.setIndex(offset);
+ updateInternalState();
+ }
+
+ // private inner class --------------------------------------------------
+
+ /**
+ * Backup data class
+ */
+ private static final class Backup
+ {
+ // protected data members -------------------------------------------
+
+ /**
+ * Backup non FCD sequence limit
+ */
+ protected int m_FCDLimit_;
+ /**
+ * Backup non FCD sequence start
+ */
+ protected int m_FCDStart_;
+ /**
+ * Backup if previous Codepoint is Hiragana quatenary
+ */
+ protected boolean m_isCodePointHiragana_;
+ /**
+ * Backup buffer position
+ */
+ protected int m_bufferOffset_;
+ /**
+ * Backup source iterator offset
+ */
+ protected int m_offset_;
+ /**
+ * Backup buffer contents
+ */
+ protected StringBuffer m_buffer_;
+
+ // protected constructor --------------------------------------------
+
+ /**
+ * Empty constructor
+ */
+ protected Backup()
+ {
+ m_buffer_ = new StringBuffer();
+ }
+ }
+ // end inner class ------------------------------------------------------
+
+ /**
+ * Direction of travel
+ */
+ private boolean m_isForwards_;
+ /**
+ * Source string iterator
+ */
+ private UCharacterIterator m_source_;
+ /**
+ * This is position to the m_buffer_, -1 if iterator is not in m_buffer_
+ */
+ private int m_bufferOffset_;
+ /**
+ * Buffer for temporary storage of normalized characters, discontiguous
+ * characters and Thai characters
+ */
+ private StringBuilder m_buffer_;
+ /**
+ * Position in the original string to continue forward FCD check from.
+ */
+ private int m_FCDLimit_;
+ /**
+ * The collator this iterator is based on
+ */
+ private RuleBasedCollator m_collator_;
+ /**
+ * true if Hiragana quatenary is on
+ */
+ //private boolean m_isHiragana4_;
+ /**
+ * CE buffer
+ */
+ private int m_CEBuffer_[];
+ /**
+ * In reality we should not have to deal with expansion sequences longer
+ * then 16. However this value can be change if a bigger buffer is needed.
+ * Note, if the size is change to too small a number, BIG trouble.
+ * Reasonable small value is around 10, if there's no Arabic or other
+ * funky collations that have long expansion sequence. This is the longest
+ * expansion sequence this can handle without bombing out.
+ */
+ private static final int CE_BUFFER_INIT_SIZE_ = 512;
+ /**
+ * Backup storage for special processing inner cases
+ */
+ private Backup m_utilSpecialBackUp_;
+ /**
+ * Backup storage in special processing entry state
+ */
+ private Backup m_utilSpecialEntryBackUp_;
+ /**
+ * Backup storage in special processing discontiguous state
+ */
+ private Backup m_utilSpecialDiscontiguousBackUp_;
+ /**
+ * Utility
+ */
+ private StringUCharacterIterator m_srcUtilIter_;
+ private StringBuilder m_utilStringBuffer_;
+ private StringBuilder m_utilSkippedBuffer_;
+ private CollationElementIterator m_utilColEIter_;
+ private static final Normalizer2Impl m_nfcImpl_ = Norm2AllModes.getNFCInstance().impl;
+ private StringBuilder m_unnormalized_;
+ private Normalizer2Impl.ReorderingBuffer m_n2Buffer_;
+ /**
+ * The first non-zero combining class character
+ */
+ private static final int FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0xC0;
+ /**
+ * One character before the first character with leading non-zero combining
+ * class
+ */
+ private static final int LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0x300;
+ /**
+ * Mask for the last byte
+ */
+ private static final int LAST_BYTE_MASK_ = 0xFF;
+ /**
+ * Shift value for the second last byte
+ */
+ private static final int SECOND_LAST_BYTE_SHIFT_ = 8;
+
+ // special ce values and tags -------------------------------------------
+
+// private static final int CE_EXPANSION_ = 0xF1000000;
+ private static final int CE_CONTRACTION_ = 0xF2000000;
+ /**
+ * Indicates the last ce has been consumed. Compare with NULLORDER.
+ * NULLORDER is returned if error occurs.
+ */
+/* private static final int CE_NO_MORE_CES_ = 0x00010101;
+ private static final int CE_NO_MORE_CES_PRIMARY_ = 0x00010000;
+ private static final int CE_NO_MORE_CES_SECONDARY_ = 0x00000100;
+ private static final int CE_NO_MORE_CES_TERTIARY_ = 0x00000001;
+*/
+ private static final int CE_NOT_FOUND_TAG_ = 0;
+ /**
+ * Charset processing, not yet implemented
+ */
+ private static final int CE_CHARSET_TAG_ = 4;
+ /**
+ * AC00-D7AF
+ */
+ private static final int CE_HANGUL_SYLLABLE_TAG_ = 6;
+ /**
+ * D800-DBFF
+ */
+ private static final int CE_LEAD_SURROGATE_TAG_ = 7;
+ /**
+ * DC00-DFFF
+ */
+ private static final int CE_TRAIL_SURROGATE_TAG_ = 8;
+ /**
+ * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
+ */
+ private static final int CE_CJK_IMPLICIT_TAG_ = 9;
+ private static final int CE_IMPLICIT_TAG_ = 10;
+ static final int CE_SPEC_PROC_TAG_ = 11;
+ /**
+ * This is a 3 byte primary with starting secondaries and tertiaries.
+ * It fits in a single 32 bit CE and is used instead of expansion to save
+ * space without affecting the performance (hopefully).
+ */
+ private static final int CE_LONG_PRIMARY_TAG_ = 12;
+
+// private static final int CE_CE_TAGS_COUNT = 14;
+ private static final int CE_BYTE_COMMON_ = 0x05;
+
+ // end special ce values and tags ---------------------------------------
+
+ private static final int HANGUL_SBASE_ = 0xAC00;
+ private static final int HANGUL_LBASE_ = 0x1100;
+ private static final int HANGUL_VBASE_ = 0x1161;
+ private static final int HANGUL_TBASE_ = 0x11A7;
+ private static final int HANGUL_VCOUNT_ = 21;
+ private static final int HANGUL_TCOUNT_ = 28;
+
+ // CJK stuff ------------------------------------------------------------
+
+/* private static final int CJK_BASE_ = 0x4E00;
+ private static final int CJK_LIMIT_ = 0x9FFF+1;
+ private static final int CJK_COMPAT_USED_BASE_ = 0xFA0E;
+ private static final int CJK_COMPAT_USED_LIMIT_ = 0xFA2F + 1;
+ private static final int CJK_A_BASE_ = 0x3400;
+ private static final int CJK_A_LIMIT_ = 0x4DBF + 1;
+ private static final int CJK_B_BASE_ = 0x20000;
+ private static final int CJK_B_LIMIT_ = 0x2A6DF + 1;
+ private static final int NON_CJK_OFFSET_ = 0x110000;
+*/
+ private static final boolean DEBUG = ICUDebug.enabled("collator");
+
+ // private methods ------------------------------------------------------
+
+ /**
+ * Reset the iterator internally
+ */
+ private void updateInternalState()
+ {
+ m_isCodePointHiragana_ = false;
+ m_buffer_.setLength(0);
+ m_bufferOffset_ = -1;
+ m_CEBufferOffset_ = 0;
+ m_CEBufferSize_ = 0;
+ m_FCDLimit_ = -1;
+ m_FCDStart_ = m_source_.getLength();
+ //m_isHiragana4_ = m_collator_.m_isHiragana4_;
+ m_isForwards_ = true;
+ }
+
+ /**
+ * Backup the current internal state
+ * @param backup object to store the data
+ */
+ private void backupInternalState(Backup backup)
+ {
+ backup.m_offset_ = m_source_.getIndex();
+ backup.m_FCDLimit_ = m_FCDLimit_;
+ backup.m_FCDStart_ = m_FCDStart_;
+ backup.m_isCodePointHiragana_ = m_isCodePointHiragana_;
+ backup.m_bufferOffset_ = m_bufferOffset_;
+ backup.m_buffer_.setLength(0);
+ if (m_bufferOffset_ >= 0) {
+ backup.m_buffer_.append(m_buffer_);
+ }
+ }
+
+ /**
+ * Update the iterator internally with backed-up state
+ * @param backup object that stored the data
+ */
+ private void updateInternalState(Backup backup)
+ {
+ m_source_.setIndex(backup.m_offset_);
+ m_isCodePointHiragana_ = backup.m_isCodePointHiragana_;
+ m_bufferOffset_ = backup.m_bufferOffset_;
+ m_FCDLimit_ = backup.m_FCDLimit_;
+ m_FCDStart_ = backup.m_FCDStart_;
+ m_buffer_.setLength(0);
+ if (m_bufferOffset_ >= 0) {
+ m_buffer_.append(backup.m_buffer_);
+ }
+ }
+
+ /**
+ * A fast combining class retrieval system.
+ * @param ch UTF16 character
+ * @return combining class of ch
+ */
+ private int getCombiningClass(int ch)
+ {
+ if (ch >= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ &&
+ m_collator_.isUnsafe((char)ch) || ch > 0xFFFF
+ ) {
+ return m_nfcImpl_.getCC(m_nfcImpl_.getNorm16(ch));
+ }
+ return 0;
+ }
+
+ /**
+ * Incremental normalization, this is an essential optimization.
+ * Assuming FCD checks has been done, normalize the non-FCD characters into
+ * the buffer.
+ * Source offsets points to the current processing character.
+ *
+ */
+ private void normalize()
+ {
+ if (m_unnormalized_ == null) {
+ m_unnormalized_ = new StringBuilder();
+ m_n2Buffer_ = new Normalizer2Impl.ReorderingBuffer(m_nfcImpl_, m_buffer_, 10);
+ } else {
+ m_unnormalized_.setLength(0);
+ m_n2Buffer_.remove();
+ }
+ int size = m_FCDLimit_ - m_FCDStart_;
+ m_source_.setIndex(m_FCDStart_);
+ for (int i = 0; i < size; i ++) {
+ m_unnormalized_.append((char)m_source_.next());
+ }
+ m_nfcImpl_.decomposeShort(m_unnormalized_, 0, size, m_n2Buffer_);
+ }
+
+ /**
+ * Incremental FCD check and normalization. Gets the next base character
+ * position and determines if the in-between characters needs normalization.
+ *
+ * When entering, the state is known to be this:
+ *
+ * We are working on source string, not the buffer.
+ * The leading combining class from the current character is 0 or the
+ * trailing combining class of the previous char was zero.
+ *
+ * Incoming source offsets points to the current processing character.
+ * Return source offsets points to the current processing character.
+ *
+ * @param ch current character (lead unit)
+ * @param offset offset of ch +1
+ * @return true if FCDCheck passes, false otherwise
+ */
+ private boolean FCDCheck(int ch, int offset)
+ {
+ boolean result = true;
+
+ // Get the trailing combining class of the current character.
+ // If it's zero, we are OK.
+ m_FCDStart_ = offset - 1;
+ m_source_.setIndex(offset);
+ // trie access
+ int fcd = m_nfcImpl_.getFCD16FromSingleLead((char)ch);
+ if (fcd != 0 && Character.isHighSurrogate((char)ch)) {
+ int c2 = m_source_.next();
+ if (c2 < 0) {
+ fcd = 0; // end of input
+ } else if (Character.isLowSurrogate((char)c2)) {
+ fcd = m_nfcImpl_.getFCD16(Character.toCodePoint((char)ch, (char)c2));
+ } else {
+ m_source_.moveIndex(-1);
+ fcd = 0;
+ }
+ }
+
+ int prevTrailCC = fcd & LAST_BYTE_MASK_;
+
+ if (prevTrailCC == 0) {
+ offset = m_source_.getIndex();
+ } else {
+ // The current char has a non-zero trailing CC. Scan forward until
+ // we find a char with a leading cc of zero.
+ while (true) {
+ ch = m_source_.nextCodePoint();
+ if (ch < 0) {
+ offset = m_source_.getIndex();
+ break;
+ }
+ // trie access
+ fcd = m_nfcImpl_.getFCD16(ch);
+ int leadCC = fcd >> SECOND_LAST_BYTE_SHIFT_;
+ if (leadCC == 0) {
+ // this is a base character, we stop the FCD checks
+ offset = m_source_.getIndex() - Character.charCount(ch);
+ break;
+ }
+
+ if (leadCC < prevTrailCC) {
+ result = false;
+ }
+
+ prevTrailCC = fcd & LAST_BYTE_MASK_;
+ }
+ }
+ m_FCDLimit_ = offset;
+ m_source_.setIndex(m_FCDStart_ + 1);
+ return result;
+ }
+
+ /**
+ * Method tries to fetch the next character that is in fcd form.
+ * Normalization is done if required.
+ * Offsets are returned at the next character.
+ * @return next fcd character
+ */
+ private int nextChar()
+ {
+ int result;
+
+ // loop handles the next character whether it is in the buffer or not.
+ if (m_bufferOffset_ < 0) {
+ // we're working on the source and not normalizing. fast path.
+ // note Thai pre-vowel reordering uses buffer too
+ result = m_source_.next();
+ }
+ else {
+ // we are in the buffer, buffer offset will never be 0 here
+ if (m_bufferOffset_ >= m_buffer_.length()) {
+ // Null marked end of buffer, revert to the source string and
+ // loop back to top to try again to get a character.
+ m_source_.setIndex(m_FCDLimit_);
+ m_bufferOffset_ = -1;
+ m_buffer_.setLength(0);
+ return nextChar();
+ }
+ return m_buffer_.charAt(m_bufferOffset_ ++);
+ }
+ int startoffset = m_source_.getIndex();
+ if (result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_
+ // Fast fcd safe path. trail combining class == 0.
+ || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
+ || m_bufferOffset_ >= 0 || m_FCDLimit_ >= startoffset) {
+ // skip the fcd checks
+ return result;
+ }
+
+ if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
+ // We need to peek at the next character in order to tell if we are
+ // FCD
+ int next = m_source_.current();
+ if (next == UCharacterIterator.DONE
+ || next < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
+ return result; // end of source string and if next character
+ // starts with a base character is always fcd.
+ }
+ }
+
+ // Need a more complete FCD check and possible normalization.
+ if (!FCDCheck(result, startoffset)) {
+ normalize();
+ result = m_buffer_.charAt(0);
+ m_bufferOffset_ = 1;
+ }
+ return result;
+ }
+
+ /**
+ * Incremental normalization, this is an essential optimization.
+ * Assuming FCD checks has been done, normalize the non-FCD characters into
+ * the buffer.
+ * Source offsets points to the current processing character.
+ */
+ private void normalizeBackwards()
+ {
+ normalize();
+ m_bufferOffset_ = m_buffer_.length();
+ }
+
+ /**
+ * Incremental backwards FCD check and normalization. Gets the previous
+ * base character position and determines if the in-between characters
+ * needs normalization.
+ *
+ * When entering, the state is known to be this:
+ *
+ * We are working on source string, not the buffer.
+ * The trailing combining class from the current character is 0 or the
+ * leading combining class of the next char was zero.
+ *
+ * Input source offsets points to the previous character.
+ * Return source offsets points to the current processing character.
+ *
+ * @param ch current character
+ * @param offset current character offset
+ * @return true if FCDCheck passes, false otherwise
+ */
+ private boolean FCDCheckBackwards(int ch, int offset)
+ {
+ int fcd;
+ m_FCDLimit_ = offset + 1;
+ m_source_.setIndex(offset);
+ if (!UTF16.isSurrogate((char)ch)) {
+ fcd = m_nfcImpl_.getFCD16FromSingleLead((char)ch);
+ } else {
+ fcd = 0;
+ if (!Normalizer2Impl.UTF16Plus.isSurrogateLead(ch)) {
+ int c2 = m_source_.previous();
+ if (c2 < 0) {
+ // start of input
+ } else if (Character.isHighSurrogate((char)c2)) {
+ ch = Character.toCodePoint((char)c2, (char)ch);
+ fcd = m_nfcImpl_.getFCD16(ch);
+ --offset;
+ } else {
+ m_source_.moveIndex(1);
+ }
+ }
+ }
+
+ // Scan backward until we find a char with a leading cc of zero.
+ boolean result = true;
+ if (fcd != 0) {
+ int leadCC;
+ for (;;) {
+ leadCC = fcd >> SECOND_LAST_BYTE_SHIFT_;
+ if (leadCC == 0 || (ch = m_source_.previousCodePoint()) < 0) {
+ offset = m_source_.getIndex();
+ break;
+ }
+ fcd = m_nfcImpl_.getFCD16(ch);
+ int prevTrailCC = fcd & LAST_BYTE_MASK_;
+ if (leadCC < prevTrailCC) {
+ result = false;
+ } else if (fcd == 0) {
+ offset = m_source_.getIndex() + Character.charCount(ch);
+ break;
+ }
+ }
+ }
+
+ // storing character with 0 lead fcd or the 1st accent with a base
+ // character before it
+ m_FCDStart_ = offset;
+ m_source_.setIndex(m_FCDLimit_);
+ return result;
+ }
+
+ /**
+ * Method tries to fetch the previous character that is in fcd form.
+ * Normalization is done if required.
+ * Offsets are returned at the current character.
+ * @return previous fcd character
+ */
+ private int previousChar()
+ {
+ if (m_bufferOffset_ >= 0) {
+ m_bufferOffset_ --;
+ if (m_bufferOffset_ >= 0) {
+ return m_buffer_.charAt(m_bufferOffset_);
+ }
+ else {
+ // At the start of buffer, route back to string.
+ m_buffer_.setLength(0);
+ if (m_FCDStart_ == 0) {
+ m_FCDStart_ = -1;
+ m_source_.setIndex(0);
+ return UCharacterIterator.DONE;
+ }
+ else {
+ m_FCDLimit_ = m_FCDStart_;
+ m_source_.setIndex(m_FCDStart_);
+ return previousChar();
+ }
+ }
+ }
+ int result = m_source_.previous();
+ int startoffset = m_source_.getIndex();
+ if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_
+ || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION
+ || m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) {
+ return result;
+ }
+ int ch = m_source_.previous();
+ if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
+ // if previous character is FCD
+ m_source_.next();
+ return result;
+ }
+ // Need a more complete FCD check and possible normalization.
+ if (!FCDCheckBackwards(result, startoffset)) {
+ normalizeBackwards();
+ m_bufferOffset_ --;
+ result = m_buffer_.charAt(m_bufferOffset_);
+ }
+ else {
+ // fcd checks always reset m_source_ to the limit of the FCD
+ m_source_.setIndex(startoffset);
+ }
+ return result;
+ }
+
+ /**
+ * Determines if it is at the start of source iteration
+ * @return true if iterator at the start, false otherwise
+ */
+ private final boolean isBackwardsStart()
+ {
+ return (m_bufferOffset_ < 0 && m_source_.getIndex() == 0)
+ || (m_bufferOffset_ == 0 && m_FCDStart_ <= 0);
+ }
+
+ /**
+ * Checks if iterator is at the end of its source string.
+ * @return true if it is at the end, false otherwise
+ */
+ private final boolean isEnd()
+ {
+ if (m_bufferOffset_ >= 0) {
+ if (m_bufferOffset_ != m_buffer_.length()) {
+ return false;
+ }
+ else {
+ // at end of buffer. check if fcd is at the end
+ return m_FCDLimit_ == m_source_.getLength();
+ }
+ }
+ return m_source_.getLength() == m_source_.getIndex();
+ }
+
+ /**
+ * Special CE management for surrogates
+ * Lead surrogate is encountered. CE to be retrieved by using the
+ * following code unit. If the next code unit is a trail surrogate, both
+ * units will be combined to retrieve the CE,
+ * otherwise we treat it like an unassigned code point.
+ * @param collator collator to use
+ * @param ce current CE
+ * @param trail character
+ * @return next CE for the surrogate characters
+ */
+ private final int nextSurrogate(RuleBasedCollator collator, int ce,
+ char trail)
+ {
+ if (!UTF16.isTrailSurrogate(trail)) {
+ updateInternalState(m_utilSpecialBackUp_);
+ return CE_NOT_FOUND_;
+ }
+ // TODO: CE contain the data from the previous CE + the mask.
+ // It should at least be unmasked
+ int result = collator.m_trie_.getTrailValue(ce, trail);
+ if (result == CE_NOT_FOUND_) {
+ updateInternalState(m_utilSpecialBackUp_);
+ }
+ return result;
+ }
+
+ /**
+ * Gets the CE expansion offset
+ * @param collator current collator
+ * @param ce ce to test
+ * @return expansion offset
+ */
+ private int getExpansionOffset(RuleBasedCollator collator, int ce)
+ {
+ return ((ce & 0xFFFFF0) >> 4) - collator.m_expansionOffset_;
+ }
+
+
+ /**
+ * Gets the contraction ce offset
+ * @param collator current collator
+ * @param ce current ce
+ * @return contraction offset
+ */
+ private int getContractionOffset(RuleBasedCollator collator, int ce)
+ {
+ return (ce & 0xFFFFFF) - collator.m_contractionOffset_;
+ }
+
+ /**
+ * Checks if CE is a special tag CE
+ * @param ce to check
+ * @return true if CE is a special tag CE, false otherwise
+ */
+ private boolean isSpecialPrefixTag(int ce)
+ {
+ return RuleBasedCollator.isSpecial(ce) &&
+ RuleBasedCollator.getTag(ce) == CE_SPEC_PROC_TAG_;
+ }
+
+ /**
+ * Special processing getting a CE that is preceded by a certain
+ * prefix.
+ * Used for optimizing Japanese length and iteration marks. When a
+ * special processing tag is encountered, iterate backwards to see if
+ * there's a match.
+ * Contraction tables are used, prefix data is stored backwards in the
+ * table.
+ * @param collator collator to use
+ * @param ce current ce
+ * @param entrybackup entry backup iterator status
+ * @return next collation element
+ */
+ private int nextSpecialPrefix(RuleBasedCollator collator, int ce,
+ Backup entrybackup)
+ {
+ backupInternalState(m_utilSpecialBackUp_);
+ updateInternalState(entrybackup);
+ previousChar();
+ // We want to look at the character where we entered
+
+ while (true) {
+ // This loop will run once per source string character, for as
+ // long as we are matching a potential contraction sequence
+ // First we position ourselves at the begining of contraction
+ // sequence
+ int entryoffset = getContractionOffset(collator, ce);
+ int offset = entryoffset;
+ if (isBackwardsStart()) {
+ ce = collator.m_contractionCE_[offset];
+ break;
+ }
+ char previous = (char)previousChar();
+ while (previous > collator.m_contractionIndex_[offset]) {
+ // contraction characters are ordered, skip smaller characters
+ offset ++;
+ }
+
+ if (previous == collator.m_contractionIndex_[offset]) {
+ // Found the source string char in the table.
+ // Pick up the corresponding CE from the table.
+ ce = collator.m_contractionCE_[offset];
+ }
+ else {
+ // Source string char was not in the table, prefix not found
+ ce = collator.m_contractionCE_[entryoffset];
+ }
+
+ if (!isSpecialPrefixTag(ce)) {
+ // The source string char was in the contraction table, and
+ // the corresponding CE is not a prefix CE. We found the
+ // prefix, break out of loop, this CE will end up being
+ // returned. This is the normal way out of prefix handling
+ // when the source actually contained the prefix.
+ break;
+ }
+ }
+ if (ce != CE_NOT_FOUND_) {
+ // we found something and we can merilly continue
+ updateInternalState(m_utilSpecialBackUp_);
+ }
+ else { // prefix search was a failure, we have to backup all the way to
+ // the start
+ updateInternalState(entrybackup);
+ }
+ return ce;
+ }
+
+ /**
+ * Checks if the ce is a contraction tag
+ * @param ce ce to check
+ * @return true if ce is a contraction tag, false otherwise
+ */
+ private boolean isContractionTag(int ce)
+ {
+ return RuleBasedCollator.isSpecial(ce) &&
+ RuleBasedCollator.getTag(ce) == CE_CONTRACTION_TAG_;
+ }
+
+ /**
+ * Method to copy skipped characters into the buffer and sets the fcd
+ * position. To ensure that the skipped characters are considered later,
+ * we need to place it in the appropriate position in the buffer and
+ * reassign the source index. simple case if index reside in string,
+ * simply copy to buffer and fcdposition = pos, pos = start of buffer.
+ * if pos in normalization buffer, we'll insert the copy infront of pos
+ * and point pos to the start of the buffer. why am i doing these copies?
+ * well, so that the whole chunk of codes in the getNextCE,
+ * ucol_prv_getSpecialCE does not require any changes, which will be
+ * really painful.
+ * @param skipped character buffer
+ */
+ private void setDiscontiguous(StringBuilder skipped)
+ {
+ if (m_bufferOffset_ >= 0) {
+ m_buffer_.replace(0, m_bufferOffset_, skipped.toString());
+ }
+ else {
+ m_FCDLimit_ = m_source_.getIndex();
+ m_buffer_.setLength(0);
+ m_buffer_.append(skipped.toString());
+ }
+
+ m_bufferOffset_ = 0;
+ }
+
+ /**
+ * Returns the current character for forward iteration
+ * @return current character
+ */
+ private int currentChar()
+ {
+ if (m_bufferOffset_ < 0) {
+ m_source_.previousCodePoint();
+ return m_source_.nextCodePoint();
+ }
+
+ // m_bufferOffset_ is never 0 in normal circumstances except after a
+ // discontiguous contraction since it is always returned and moved
+ // by 1 when we do nextChar()
+ return UTF16.charAt(m_buffer_, m_bufferOffset_ - 1);
+ }
+
+ /**
+ * Method to get the discontiguous collation element within the source.
+ * Note this function will set the position to the appropriate places.
+ * Passed in character offset points to the second combining character
+ * after the start character.
+ * @param collator current collator used
+ * @param entryoffset index to the start character in the contraction table
+ * @return discontiguous collation element offset
+ */
+ private int nextDiscontiguous(RuleBasedCollator collator, int entryoffset)
+ {
+ int offset = entryoffset;
+ boolean multicontraction = false;
+ // since it will be stuffed into this iterator and ran over again
+ if (m_utilSkippedBuffer_ == null) {
+ m_utilSkippedBuffer_ = new StringBuilder();
+ }
+ else {
+ m_utilSkippedBuffer_.setLength(0);
+ }
+ int ch = currentChar();
+ m_utilSkippedBuffer_.appendCodePoint(ch);
+ int prevCC = 0;
+ int cc = getCombiningClass(ch);
+ // accent after the first character
+ if (m_utilSpecialDiscontiguousBackUp_ == null) {
+ m_utilSpecialDiscontiguousBackUp_ = new Backup();
+ }
+ backupInternalState(m_utilSpecialDiscontiguousBackUp_);
+ boolean prevWasLead = false;
+ while (true) {
+ // We read code units for contraction table matching
+ // but have to get combining classes for code points
+ // to figure out where to stop with discontiguous contraction.
+ int ch_int = nextChar();
+ char nextch = (char)ch_int;
+ if (UTF16.isSurrogate(nextch)) {
+ if (prevWasLead) {
+ // trail surrogate of surrogate pair, keep previous and current cc
+ prevWasLead = false;
+ } else {
+ prevCC = cc;
+ cc = 0; // default cc for an unpaired surrogate
+ prevWasLead = false;
+ if (Character.isHighSurrogate(nextch)) {
+ int trail = nextChar();
+ if (Character.isLowSurrogate((char)trail)) {
+ cc = getCombiningClass(Character.toCodePoint(nextch, (char)trail));
+ prevWasLead = true;
+ }
+ if (trail >= 0) {
+ previousChar(); // restore index after having peeked at the next code unit
+ }
+ }
+ }
+ } else {
+ prevCC = cc;
+ cc = getCombiningClass(ch_int);
+ prevWasLead = false;
+ }
+ if (ch_int < 0 || cc == 0) {
+ // if there are no more accents to move around
+ // we don't have to shift previousChar, since we are resetting
+ // the offset later
+ if (multicontraction) {
+ if (ch_int >= 0) {
+ previousChar(); // backtrack
+ }
+ setDiscontiguous(m_utilSkippedBuffer_);
+ return collator.m_contractionCE_[offset];
+ }
+ break;
+ }
+
+ offset ++; // skip the combining class offset
+ while ((offset < collator.m_contractionIndex_.length) &&
+ (nextch > collator.m_contractionIndex_[offset])) {
+ offset ++;
+ }
+
+ int ce = CE_NOT_FOUND_;
+ if ( offset >= collator.m_contractionIndex_.length) {
+ break;
+ }
+ if (nextch != collator.m_contractionIndex_[offset] || cc == prevCC) {
+ // unmatched or blocked character
+ if ( (m_utilSkippedBuffer_.length()!= 1) ||
+ ((m_utilSkippedBuffer_.charAt(0)!= nextch) &&
+ (m_bufferOffset_<0) )) { // avoid push to skipped buffer twice
+ m_utilSkippedBuffer_.append(nextch);
+ }
+ offset = entryoffset; // Restore the offset before checking next character.
+ continue;
+ }
+ else {
+ ce = collator.m_contractionCE_[offset];
+ }
+
+ if (ce == CE_NOT_FOUND_) {
+ break;
+ }
+ else if (isContractionTag(ce)) {
+ // this is a multi-contraction
+ offset = getContractionOffset(collator, ce);
+ if (collator.m_contractionCE_[offset] != CE_NOT_FOUND_) {
+ multicontraction = true;
+ backupInternalState(m_utilSpecialDiscontiguousBackUp_);
+ }
+ }
+ else {
+ setDiscontiguous(m_utilSkippedBuffer_);
+ return ce;
+ }
+ }
+
+ updateInternalState(m_utilSpecialDiscontiguousBackUp_);
+ // backup is one forward of the base character, we need to move back
+ // one more
+ previousChar();
+ return collator.m_contractionCE_[entryoffset];
+ }
+
+ /**
+ * Gets the next contraction ce
+ * @param collator collator to use
+ * @param ce current ce
+ * @return ce of the next contraction
+ */
+ private int nextContraction(RuleBasedCollator collator, int ce)
+ {
+ backupInternalState(m_utilSpecialBackUp_);
+ int entryce = collator.m_contractionCE_[getContractionOffset(collator, ce)]; //CE_NOT_FOUND_;
+ while (true) {
+ int entryoffset = getContractionOffset(collator, ce);
+ int offset = entryoffset;
+
+ if (isEnd()) {
+ ce = collator.m_contractionCE_[offset];
+ if (ce == CE_NOT_FOUND_) {
+ // back up the source over all the chars we scanned going
+ // into this contraction.
+ ce = entryce;
+ updateInternalState(m_utilSpecialBackUp_);
+ }
+ break;
+ }
+
+ // get the discontiguos maximum combining class
+ int maxCC = (collator.m_contractionIndex_[offset] & 0xFF);
+ // checks if all characters have the same combining class
+ byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8);
+ char ch = (char)nextChar();
+ offset ++;
+ while (ch > collator.m_contractionIndex_[offset]) {
+ // contraction characters are ordered, skip all smaller
+ offset ++;
+ }
+
+ if (ch == collator.m_contractionIndex_[offset]) {
+ // Found the source string char in the contraction table.
+ // Pick up the corresponding CE from the table.
+ ce = collator.m_contractionCE_[offset];
+ }
+ else {
+ // Source string char was not in contraction table.
+ // Unless it is a discontiguous contraction, we are done
+ int miss = ch;
+ if(UTF16.isLeadSurrogate(ch)) { // in order to do the proper detection, we
+ // need to see if we're dealing with a supplementary
+ miss = UCharacterProperty.getRawSupplementary(ch, (char) nextChar());
+ }
+ int sCC;
+ if (maxCC == 0 || (sCC = getCombiningClass(miss)) == 0
+ || sCC > maxCC || (allSame != 0 && sCC == maxCC) ||
+ isEnd()) {
+ // Contraction can not be discontiguous, back up by one
+ previousChar();
+ if(miss > 0xFFFF) {
+ previousChar();
+ }
+ ce = collator.m_contractionCE_[entryoffset];
+ }
+ else {
+ // Contraction is possibly discontiguous.
+ // find the next character if ch is not a base character
+ int ch_int = nextChar();
+ if (ch_int != UCharacterIterator.DONE) {
+ previousChar();
+ }
+ char nextch = (char)ch_int;
+ if (getCombiningClass(nextch) == 0) {
+ previousChar();
+ if(miss > 0xFFFF) {
+ previousChar();
+ }
+ // base character not part of discontiguous contraction
+ ce = collator.m_contractionCE_[entryoffset];
+ }
+ else {
+ ce = nextDiscontiguous(collator, entryoffset);
+ }
+ }
+ }
+
+ if (ce == CE_NOT_FOUND_) {
+ // source did not match the contraction, revert back original
+ updateInternalState(m_utilSpecialBackUp_);
+ ce = entryce;
+ break;
+ }
+
+ // source was a contraction
+ if (!isContractionTag(ce)) {
+ break;
+ }
+
+ // ccontinue looping to check for the remaining contraction.
+ if (collator.m_contractionCE_[entryoffset] != CE_NOT_FOUND_) {
+ // there are further contractions to be performed, so we store
+ // the so-far completed ce, so that if we fail in the next
+ // round we just return this one.
+ entryce = collator.m_contractionCE_[entryoffset];
+ backupInternalState(m_utilSpecialBackUp_);
+ if (m_utilSpecialBackUp_.m_bufferOffset_ >= 0) {
+ m_utilSpecialBackUp_.m_bufferOffset_ --;
+ }
+ else {
+ m_utilSpecialBackUp_.m_offset_ --;
+ }
+ }
+ }
+ return ce;
+ }
+
+ /**
+ * Gets the next ce for long primaries, stuffs the rest of the collation
+ * elements into the ce buffer
+ * @param ce current ce
+ * @return next ce
+ */
+ private int nextLongPrimary(int ce)
+ {
+ m_CEBuffer_[1] = ((ce & 0xFF) << 24)
+ | RuleBasedCollator.CE_CONTINUATION_MARKER_;
+ m_CEBufferOffset_ = 1;
+ m_CEBufferSize_ = 2;
+ m_CEBuffer_[0] = ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) |
+ CE_BYTE_COMMON_;
+ return m_CEBuffer_[0];
+ }
+
+ /**
+ * Gets the number of expansion
+ * @param ce current ce
+ * @return number of expansion
+ */
+ private int getExpansionCount(int ce)
+ {
+ return ce & 0xF;
+ }
+
+ /**
+ * Gets the next expansion ce and stuffs the rest of the collation elements
+ * into the ce buffer
+ * @param collator current collator
+ * @param ce current ce
+ * @return next expansion ce
+ */
+ private int nextExpansion(RuleBasedCollator collator, int ce)
+ {
+ // NOTE: we can encounter both continuations and expansions in an
+ // expansion!
+ // I have to decide where continuations are going to be dealt with
+ int offset = getExpansionOffset(collator, ce);
+ m_CEBufferSize_ = getExpansionCount(ce);
+ m_CEBufferOffset_ = 1;
+ m_CEBuffer_[0] = collator.m_expansion_[offset];
+ if (m_CEBufferSize_ != 0) {
+ // if there are less than 16 elements in expansion
+ for (int i = 1; i < m_CEBufferSize_; i ++) {
+ m_CEBuffer_[i] = collator.m_expansion_[offset + i];
+ }
+ }
+ else {
+ // ce are terminated
+ m_CEBufferSize_ = 1;
+ while (collator.m_expansion_[offset] != 0) {
+ m_CEBuffer_[m_CEBufferSize_ ++] =
+ collator.m_expansion_[++ offset];
+ }
+ }
+ // in case of one element expansion, we
+ // want to immediately return CEpos
+ if (m_CEBufferSize_ == 1) {
+ m_CEBufferSize_ = 0;
+ m_CEBufferOffset_ = 0;
+ }
+ return m_CEBuffer_[0];
+ }
+
+ /**
+ * Gets the next digit ce
+ * @param collator current collator
+ * @param ce current collation element
+ * @param cp current codepoint
+ * @return next digit ce
+ */
+ private int nextDigit(RuleBasedCollator collator, int ce, int cp)
+ {
+ // We do a check to see if we want to collate digits as numbers;
+ // if so we generate a custom collation key. Otherwise we pull out
+ // the value stored in the expansion table.
+
+ if (m_collator_.m_isNumericCollation_){
+ int collateVal = 0;
+ int trailingZeroIndex = 0;
+ boolean nonZeroValReached = false;
+
+ // I just need a temporary place to store my generated CEs.
+ // icu4c uses a unsigned byte array, i'll use a stringbuffer here
+ // to avoid dealing with the sign problems and array allocation
+ // clear and set initial string buffer length
+ m_utilStringBuffer_.setLength(3);
+
+ // We parse the source string until we hit a char that's NOT a
+ // digit.
+ // Use this u_charDigitValue. This might be slow because we have
+ // to handle surrogates...
+ int digVal = UCharacter.digit(cp);
+ // if we have arrived here, we have already processed possible
+ // supplementaries that trigered the digit tag -
+ // all supplementaries are marked in the UCA.
+ // We pad a zero in front of the first element anyways.
+ // This takes care of the (probably) most common case where
+ // people are sorting things followed by a single digit
+ int digIndx = 1;
+ for (;;) {
+ // Make sure we have enough space.
+ if (digIndx >= ((m_utilStringBuffer_.length() - 2) << 1)) {
+ m_utilStringBuffer_.setLength(m_utilStringBuffer_.length()
+ << 1);
+ }
+ // Skipping over leading zeroes.
+ if (digVal != 0 || nonZeroValReached) {
+ if (digVal != 0 && !nonZeroValReached) {
+ nonZeroValReached = true;
+ }
+ // We parse the digit string into base 100 numbers
+ // (this fits into a byte).
+ // We only add to the buffer in twos, thus if we are
+ // parsing an odd character, that serves as the
+ // 'tens' digit while the if we are parsing an even
+ // one, that is the 'ones' digit. We dumped the
+ // parsed base 100 value (collateVal) into a buffer.
+ // We multiply each collateVal by 2 (to give us room)
+ // and add 5 (to avoid overlapping magic CE byte
+ // values). The last byte we subtract 1 to ensure it is
+ // less than all the other bytes.
+ if (digIndx % 2 == 1) {
+ collateVal += digVal;
+ // This removes trailing zeroes.
+ if (collateVal == 0 && trailingZeroIndex == 0) {
+ trailingZeroIndex = ((digIndx - 1) >>> 1) + 2;
+ }
+ else if (trailingZeroIndex != 0) {
+ trailingZeroIndex = 0;
+ }
+ m_utilStringBuffer_.setCharAt(
+ ((digIndx - 1) >>> 1) + 2,
+ (char)((collateVal << 1) + 6));
+ collateVal = 0;
+ }
+ else {
+ // We drop the collation value into the buffer so if
+ // we need to do a "front patch" we don't have to
+ // check to see if we're hitting the last element.
+ collateVal = digVal * 10;
+ m_utilStringBuffer_.setCharAt((digIndx >>> 1) + 2,
+ (char)((collateVal << 1) + 6));
+ }
+ digIndx ++;
+ }
+
+ // Get next character.
+ if (!isEnd()){
+ backupInternalState(m_utilSpecialBackUp_);
+ int char32 = nextChar();
+ char ch = (char)char32;
+ if (UTF16.isLeadSurrogate(ch)){
+ if (!isEnd()) {
+ char trail = (char)nextChar();
+ if (UTF16.isTrailSurrogate(trail)) {
+ char32 = UCharacterProperty.getRawSupplementary(
+ ch, trail);
+ }
+ else {
+ goBackOne();
+ }
+ }
+ }
+
+ digVal = UCharacter.digit(char32);
+ if (digVal == -1) {
+ // Resetting position to point to the next unprocessed
+ // char. We overshot it when doing our test/set for
+ // numbers.
+ updateInternalState(m_utilSpecialBackUp_);
+ break;
+ }
+ }
+ else {
+ break;
+ }
+ }
+
+ if (nonZeroValReached == false){
+ digIndx = 2;
+ m_utilStringBuffer_.setCharAt(2, (char)6);
+ }
+
+ int endIndex = trailingZeroIndex != 0 ? trailingZeroIndex
+ : (digIndx >>> 1) + 2;
+ if (digIndx % 2 != 0){
+ // We missed a value. Since digIndx isn't even, stuck too many
+ // values into the buffer (this is what we get for padding the
+ // first byte with a zero). "Front-patch" now by pushing all
+ // nybbles forward.
+ // Doing it this way ensures that at least 50% of the time
+ // (statistically speaking) we'll only be doing a single pass
+ // and optimizes for strings with single digits. I'm just
+ // assuming that's the more common case.
+ for (int i = 2; i < endIndex; i ++){
+ m_utilStringBuffer_.setCharAt(i,
+ (char)((((((m_utilStringBuffer_.charAt(i) - 6) >>> 1)
+ % 10) * 10)
+ + (((m_utilStringBuffer_.charAt(i + 1) - 6)
+ >>> 1) / 10) << 1) + 6));
+ }
+ -- digIndx;
+ }
+
+ // Subtract one off of the last byte.
+ m_utilStringBuffer_.setCharAt(endIndex - 1,
+ (char)(m_utilStringBuffer_.charAt(endIndex - 1) - 1));
+
+ // We want to skip over the first two slots in the buffer.
+ // The first slot is reserved for the header byte CODAN_PLACEHOLDER.
+ // The second slot is for the sign/exponent byte:
+ // 0x80 + (decimalPos/2) & 7f.
+ m_utilStringBuffer_.setCharAt(0, (char)RuleBasedCollator.CODAN_PLACEHOLDER);
+ m_utilStringBuffer_.setCharAt(1,
+ (char)(0x80 + ((digIndx >>> 1) & 0x7F)));
+
+ // Now transfer the collation key to our collIterate struct.
+ // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
+ ce = (((m_utilStringBuffer_.charAt(0) << 8)
+ // Primary weight
+ | m_utilStringBuffer_.charAt(1))
+ << RuleBasedCollator.CE_PRIMARY_SHIFT_)
+ // Secondary weight
+ | (RuleBasedCollator.BYTE_COMMON_
+ << RuleBasedCollator.CE_SECONDARY_SHIFT_)
+ | RuleBasedCollator.BYTE_COMMON_; // Tertiary weight.
+ int i = 2; // Reset the index into the buffer.
+
+ m_CEBuffer_[0] = ce;
+ m_CEBufferSize_ = 1;
+ m_CEBufferOffset_ = 1;
+ while (i < endIndex)
+ {
+ int primWeight = m_utilStringBuffer_.charAt(i ++) << 8;
+ if (i < endIndex) {
+ primWeight |= m_utilStringBuffer_.charAt(i ++);
+ }
+ m_CEBuffer_[m_CEBufferSize_ ++]
+ = (primWeight << RuleBasedCollator.CE_PRIMARY_SHIFT_)
+ | RuleBasedCollator.CE_CONTINUATION_MARKER_;
+ }
+ return ce;
+ }
+
+ // no numeric mode, we'll just switch to whatever we stashed and
+ // continue
+ // find the offset to expansion table
+ return collator.m_expansion_[getExpansionOffset(collator, ce)];
+ }
+
+ /**
+ * Gets the next implicit ce for codepoints
+ * @param codepoint current codepoint
+ * @return implicit ce
+ */
+ private int nextImplicit(int codepoint)
+ {
+ int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint);
+ m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_)
+ | 0x00000505;
+ m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0;
+ m_CEBufferOffset_ = 1;
+ m_CEBufferSize_ = 2;
+ return m_CEBuffer_[0];
+ }
+
+ /**
+ * Returns the next ce associated with the following surrogate characters
+ * @param ch current character
+ * @return ce
+ */
+ private int nextSurrogate(char ch)
+ {
+ int ch_int = nextChar();
+ char nextch = (char)ch_int;
+ if (ch_int != CharacterIterator.DONE &&
+ UTF16.isTrailSurrogate(nextch)) {
+ int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch);
+ return nextImplicit(codepoint);
+ }
+ if (nextch != CharacterIterator.DONE) {
+ previousChar(); // reverts back to the original position
+ }
+ return CE_NOT_FOUND_; // treat like unassigned
+ }
+
+ /**
+ * Returns the next ce for a hangul character, this is an implicit
+ * calculation
+ * @param collator current collator
+ * @param ch current character
+ * @return hangul ce
+ */
+ private int nextHangul(RuleBasedCollator collator, char ch)
+ {
+ char L = (char)(ch - HANGUL_SBASE_);
+
+ // divide into pieces
+ // do it in this order since some compilers can do % and / in one
+ // operation
+ char T = (char)(L % HANGUL_TCOUNT_);
+ L /= HANGUL_TCOUNT_;
+ char V = (char)(L % HANGUL_VCOUNT_);
+ L /= HANGUL_VCOUNT_;
+
+ // offset them
+ L += HANGUL_LBASE_;
+ V += HANGUL_VBASE_;
+ T += HANGUL_TBASE_;
+
+ // return the first CE, but first put the rest into the expansion
+ // buffer
+ m_CEBufferSize_ = 0;
+ if (!m_collator_.m_isJamoSpecial_) { // FAST PATH
+ m_CEBuffer_[m_CEBufferSize_ ++] =
+ collator.m_trie_.getLeadValue(L);
+ m_CEBuffer_[m_CEBufferSize_ ++] =
+ collator.m_trie_.getLeadValue(V);
+
+ if (T != HANGUL_TBASE_) {
+ m_CEBuffer_[m_CEBufferSize_ ++] =
+ collator.m_trie_.getLeadValue(T);
+ }
+ m_CEBufferOffset_ = 1;
+ return m_CEBuffer_[0];
+ }
+ else {
+ // Jamo is Special
+ // Since Hanguls pass the FCD check, it is guaranteed that we
+ // won't be in the normalization buffer if something like this
+ // happens
+ // Move Jamos into normalization buffer
+ m_buffer_.append(L);
+ m_buffer_.append(V);
+ if (T != HANGUL_TBASE_) {
+ m_buffer_.append(T);
+ }
+ m_bufferOffset_ = 0;
+ m_FCDLimit_ = m_source_.getIndex();
+ m_FCDStart_ = m_FCDLimit_ - 1;
+ // Indicate where to continue in main input string after
+ // exhausting the buffer
+ return IGNORABLE;
+ }
+ }
+
+ /**
+ * Special CE management. Expansions, contractions etc...
+ * @param collator can be plain UCA
+ * @param ce current ce
+ * @param ch current character
+ * @return next special ce
+ */
+ private int nextSpecial(RuleBasedCollator collator, int ce, char ch)
+ {
+ int codepoint = ch;
+ Backup entrybackup = m_utilSpecialEntryBackUp_;
+ // this is to handle recursive looping
+ if (entrybackup != null) {
+ m_utilSpecialEntryBackUp_ = null;
+ }
+ else {
+ entrybackup = new Backup();
+ }
+ backupInternalState(entrybackup);
+ try { // forces it to assign m_utilSpecialEntryBackup_
+ while (true) {
+ // This loop will repeat only in the case of contractions,
+ // surrogate
+ switch(RuleBasedCollator.getTag(ce)) {
+ case CE_NOT_FOUND_TAG_:
+ // impossible case for icu4j
+ return ce;
+ case RuleBasedCollator.CE_SURROGATE_TAG_:
+ if (isEnd()) {
+ return CE_NOT_FOUND_;
+ }
+ backupInternalState(m_utilSpecialBackUp_);
+ char trail = (char)nextChar();
+ ce = nextSurrogate(collator, ce, trail);
+ // calculate the supplementary code point value,
+ // if surrogate was not tailored we go one more round
+ codepoint =
+ UCharacterProperty.getRawSupplementary(ch, trail);
+ break;
+ case CE_SPEC_PROC_TAG_:
+ ce = nextSpecialPrefix(collator, ce, entrybackup);
+ break;
+ case CE_CONTRACTION_TAG_:
+ ce = nextContraction(collator, ce);
+ break;
+ case CE_LONG_PRIMARY_TAG_:
+ return nextLongPrimary(ce);
+ case CE_EXPANSION_TAG_:
+ return nextExpansion(collator, ce);
+ case CE_DIGIT_TAG_:
+ ce = nextDigit(collator, ce, codepoint);
+ break;
+ // various implicits optimization
+ case CE_CJK_IMPLICIT_TAG_:
+ // 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
+ return nextImplicit(codepoint);
+ case CE_IMPLICIT_TAG_: // everything that is not defined
+ return nextImplicit(codepoint);
+ case CE_TRAIL_SURROGATE_TAG_:
+ return CE_NOT_FOUND_; // DC00-DFFF broken surrogate, treat like unassigned
+ case CE_LEAD_SURROGATE_TAG_: // D800-DBFF
+ return nextSurrogate(ch);
+ case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF
+ return nextHangul(collator, ch);
+ case CE_CHARSET_TAG_:
+ // not yet implemented probably after 1.8
+ return CE_NOT_FOUND_;
+ default:
+ ce = IGNORABLE;
+ // synwee todo, throw exception or something here.
+ }
+ if (!RuleBasedCollator.isSpecial(ce)) {
+ break;
+ }
+ }
+ }
+ finally {
+ m_utilSpecialEntryBackUp_ = entrybackup;
+ }
+ return ce;
+ }
+
+ /**
+ * Special processing is getting a CE that is preceded by a certain prefix.
+ * Currently this is only needed for optimizing Japanese length and
+ * iteration marks. When we encouter a special processing tag, we go
+ * backwards and try to see if we have a match. Contraction tables are used
+ * - so the whole process is not unlike contraction. prefix data is stored
+ * backwards in the table.
+ * @param collator current collator
+ * @param ce current ce
+ * @return previous ce
+ */
+ private int previousSpecialPrefix(RuleBasedCollator collator, int ce)
+ {
+ backupInternalState(m_utilSpecialBackUp_);
+ while (true) {
+ // position ourselves at the begining of contraction sequence
+ int offset = getContractionOffset(collator, ce);
+ int entryoffset = offset;
+ if (isBackwardsStart()) {
+ ce = collator.m_contractionCE_[offset];
+ break;
+ }
+ char prevch = (char)previousChar();
+ while (prevch > collator.m_contractionIndex_[offset]) {
+ // since contraction codepoints are ordered, we skip all that
+ // are smaller
+ offset ++;
+ }
+ if (prevch == collator.m_contractionIndex_[offset]) {
+ ce = collator.m_contractionCE_[offset];
+ }
+ else {
+ // if there is a completely ignorable code point in the middle
+ // of a prefix, we need to act as if it's not there assumption:
+ // 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to
+ // zero)
+ // lone surrogates cannot be set to zero as it would break
+ // other processing
+ int isZeroCE = collator.m_trie_.getLeadValue(prevch);
+ // it's easy for BMP code points
+ if (isZeroCE == 0) {
+ continue;
+ }
+ else if (UTF16.isTrailSurrogate(prevch)
+ || UTF16.isLeadSurrogate(prevch)) {
+ // for supplementary code points, we have to check the next one
+ // situations where we are going to ignore
+ // 1. beginning of the string: schar is a lone surrogate
+ // 2. schar is a lone surrogate
+ // 3. schar is a trail surrogate in a valid surrogate
+ // sequence that is explicitly set to zero.
+ if (!isBackwardsStart()) {
+ char lead = (char)previousChar();
+ if (UTF16.isLeadSurrogate(lead)) {
+ isZeroCE = collator.m_trie_.getLeadValue(lead);
+ if (RuleBasedCollator.getTag(isZeroCE)
+ == RuleBasedCollator.CE_SURROGATE_TAG_) {
+ int finalCE = collator.m_trie_.getTrailValue(
+ isZeroCE,
+ prevch);
+ if (finalCE == 0) {
+ // this is a real, assigned completely
+ // ignorable code point
+ continue;
+ }
+ }
+ }
+ else {
+ nextChar(); // revert to original offset
+ // lone surrogate, completely ignorable
+ continue;
+ }
+ nextChar(); // revert to original offset
+ }
+ else {
+ // lone surrogate at the beggining, completely ignorable
+ continue;
+ }
+ }
+
+ // char was not in the table. prefix not found
+ ce = collator.m_contractionCE_[entryoffset];
+ }
+
+ if (!isSpecialPrefixTag(ce)) {
+ // char was in the contraction table, and the corresponding ce
+ // is not a prefix ce. We found the prefix, break out of loop,
+ // this ce will end up being returned.
+ break;
+ }
+ }
+ updateInternalState(m_utilSpecialBackUp_);
+ return ce;
+ }
+
+ /**
+ * Retrieves the previous contraction ce. To ensure that the backwards and
+ * forwards iteration matches, we take the current region of most possible
+ * match and pass it through the forward iteration. This will ensure that
+ * the obstinate problem of overlapping contractions will not occur.
+ * @param collator current collator
+ * @param ce current ce
+ * @param ch current character
+ * @return previous contraction ce
+ */
+ private int previousContraction(RuleBasedCollator collator, int ce, char ch)
+ {
+ m_utilStringBuffer_.setLength(0);
+ // since we might encounter normalized characters (from the thai
+ // processing) we can't use peekCharacter() here.
+ char prevch = (char)previousChar();
+ boolean atStart = false;
+ // TODO: address the comment above - maybe now we *can* use peekCharacter
+ //while (collator.isUnsafe(ch) || isThaiPreVowel(prevch)) {
+ while (collator.isUnsafe(ch)) {
+ m_utilStringBuffer_.insert(0, ch);
+ ch = prevch;
+ if (isBackwardsStart()) {
+ atStart = true;
+ break;
+ }
+ prevch = (char)previousChar();
+ }
+ if (!atStart) {
+ // undo the previousChar() if we didn't reach the beginning
+ nextChar();
+ }
+ // adds the initial base character to the string
+ m_utilStringBuffer_.insert(0, ch);
+
+ // a new collation element iterator is used to simply things, since
+ // using the current collation element iterator will mean that the
+ // forward and backwards iteration will share and change the same
+ // buffers. it is going to be painful.
+ int originaldecomp = collator.getDecomposition();
+ // for faster access, since string would have been normalized above
+ collator.setDecomposition(Collator.NO_DECOMPOSITION);
+ if (m_utilColEIter_ == null) {
+ m_utilColEIter_ = new CollationElementIterator(
+ m_utilStringBuffer_.toString(),
+ collator);
+ }
+ else {
+ m_utilColEIter_.m_collator_ = collator;
+ m_utilColEIter_.setText(m_utilStringBuffer_.toString());
+ }
+ ce = m_utilColEIter_.next();
+ m_CEBufferSize_ = 0;
+ while (ce != NULLORDER) {
+ if (m_CEBufferSize_ == m_CEBuffer_.length) {
+ try {
+ // increasing cebuffer size
+ int tempbuffer[] = new int[m_CEBuffer_.length + 50];
+ System.arraycopy(m_CEBuffer_, 0, tempbuffer, 0,
+ m_CEBuffer_.length);
+ m_CEBuffer_ = tempbuffer;
+ }
+ catch( MissingResourceException e)
+ {
+ throw e;
+ }
+ catch (Exception e) {
+ if(DEBUG){
+ e.printStackTrace();
+ }
+ return NULLORDER;
+ }
+ }
+ m_CEBuffer_[m_CEBufferSize_ ++] = ce;
+ ce = m_utilColEIter_.next();
+ }
+ collator.setDecomposition(originaldecomp);
+ m_CEBufferOffset_ = m_CEBufferSize_ - 1;
+ return m_CEBuffer_[m_CEBufferOffset_];
+ }
+
+ /**
+ * Returns the previous long primary ces
+ * @param ce long primary ce
+ * @return previous long primary ces
+ */
+ private int previousLongPrimary(int ce)
+ {
+ m_CEBufferSize_ = 0;
+ m_CEBuffer_[m_CEBufferSize_ ++] =
+ ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) | CE_BYTE_COMMON_;
+ m_CEBuffer_[m_CEBufferSize_ ++] = ((ce & 0xFF) << 24)
+ | RuleBasedCollator.CE_CONTINUATION_MARKER_;
+ m_CEBufferOffset_ = m_CEBufferSize_ - 1;
+ return m_CEBuffer_[m_CEBufferOffset_];
+ }
+
+ /**
+ * Returns the previous expansion ces
+ * @param collator current collator
+ * @param ce current ce
+ * @return previous expansion ce
+ */
+ private int previousExpansion(RuleBasedCollator collator, int ce)
+ {
+ // find the offset to expansion table
+ int offset = getExpansionOffset(collator, ce);
+ m_CEBufferSize_ = getExpansionCount(ce);
+ if (m_CEBufferSize_ != 0) {
+ // less than 16 elements in expansion
+ for (int i = 0; i < m_CEBufferSize_; i ++) {
+ m_CEBuffer_[i] = collator.m_expansion_[offset + i];
+ }
+
+ }
+ else {
+ // null terminated ces
+ while (collator.m_expansion_[offset + m_CEBufferSize_] != 0) {
+ m_CEBuffer_[m_CEBufferSize_] =
+ collator.m_expansion_[offset + m_CEBufferSize_];
+ m_CEBufferSize_ ++;
+ }
+ }
+ m_CEBufferOffset_ = m_CEBufferSize_ - 1;
+ return m_CEBuffer_[m_CEBufferOffset_];
+ }
+
+ /**
+ * Getting the digit collation elements
+ * @param collator
+ * @param ce current collation element
+ * @param ch current code point
+ * @return digit collation element
+ */
+ private int previousDigit(RuleBasedCollator collator, int ce, char ch)
+ {
+ // We do a check to see if we want to collate digits as numbers; if so we generate
+ // a custom collation key. Otherwise we pull out the value stored in the expansion table.
+ if (m_collator_.m_isNumericCollation_){
+ int leadingZeroIndex = 0;
+ int collateVal = 0;
+ boolean nonZeroValReached = false;
+
+ // clear and set initial string buffer length
+ m_utilStringBuffer_.setLength(3);
+
+ // We parse the source string until we hit a char that's NOT a digit
+ // Use this u_charDigitValue. This might be slow because we have to
+ // handle surrogates...
+ int char32 = ch;
+ if (UTF16.isTrailSurrogate(ch)) {
+ if (!isBackwardsStart()){
+ char lead = (char)previousChar();
+ if (UTF16.isLeadSurrogate(lead)) {
+ char32 = UCharacterProperty.getRawSupplementary(lead,
+ ch);
+ }
+ else {
+ goForwardOne();
+ }
+ }
+ }
+ int digVal = UCharacter.digit(char32);
+ int digIndx = 0;
+ for (;;) {
+ // Make sure we have enough space.
+ if (digIndx >= ((m_utilStringBuffer_.length() - 2) << 1)) {
+ m_utilStringBuffer_.setLength(m_utilStringBuffer_.length()
+ << 1);
+ }
+ // Skipping over "trailing" zeroes but we still add to digIndx.
+ if (digVal != 0 || nonZeroValReached) {
+ if (digVal != 0 && !nonZeroValReached) {
+ nonZeroValReached = true;
+ }
+
+ // We parse the digit string into base 100 numbers (this
+ // fits into a byte).
+ // We only add to the buffer in twos, thus if we are
+ // parsing an odd character, that serves as the 'tens'
+ // digit while the if we are parsing an even one, that is
+ // the 'ones' digit. We dumped the parsed base 100 value
+ // (collateVal) into a buffer. We multiply each collateVal
+ // by 2 (to give us room) and add 5 (to avoid overlapping
+ // magic CE byte values). The last byte we subtract 1 to
+ // ensure it is less than all the other bytes.
+ // Since we're doing in this reverse we want to put the
+ // first digit encountered into the ones place and the
+ // second digit encountered into the tens place.
+
+ if (digIndx % 2 == 1){
+ collateVal += digVal * 10;
+
+ // This removes leading zeroes.
+ if (collateVal == 0 && leadingZeroIndex == 0) {
+ leadingZeroIndex = ((digIndx - 1) >>> 1) + 2;
+ }
+ else if (leadingZeroIndex != 0) {
+ leadingZeroIndex = 0;
+ }
+
+ m_utilStringBuffer_.setCharAt(((digIndx - 1) >>> 1) + 2,
+ (char)((collateVal << 1) + 6));
+ collateVal = 0;
+ }
+ else {
+ collateVal = digVal;
+ }
+ }
+ digIndx ++;
+
+ if (!isBackwardsStart()){
+ backupInternalState(m_utilSpecialBackUp_);
+ char32 = previousChar();
+ if (UTF16.isTrailSurrogate(ch)){
+ if (!isBackwardsStart()) {
+ char lead = (char)previousChar();
+ if (UTF16.isLeadSurrogate(lead)) {
+ char32
+ = UCharacterProperty.getRawSupplementary(
+ lead, ch);
+ }
+ else {
+ updateInternalState(m_utilSpecialBackUp_);
+ }
+ }
+ }
+
+ digVal = UCharacter.digit(char32);
+ if (digVal == -1) {
+ updateInternalState(m_utilSpecialBackUp_);
+ break;
+ }
+ }
+ else {
+ break;
+ }
+ }
+
+ if (nonZeroValReached == false) {
+ digIndx = 2;
+ m_utilStringBuffer_.setCharAt(2, (char)6);
+ }
+
+ if (digIndx % 2 != 0) {
+ if (collateVal == 0 && leadingZeroIndex == 0) {
+ // This removes the leading 0 in a odd number sequence of
+ // numbers e.g. avery001
+ leadingZeroIndex = ((digIndx - 1) >>> 1) + 2;
+ }
+ else {
+ // this is not a leading 0, we add it in
+ m_utilStringBuffer_.setCharAt((digIndx >>> 1) + 2,
+ (char)((collateVal << 1) + 6));
+ digIndx ++;
+ }
+ }
+
+ int endIndex = leadingZeroIndex != 0 ? leadingZeroIndex
+ : ((digIndx >>> 1) + 2) ;
+ digIndx = ((endIndex - 2) << 1) + 1; // removing initial zeros
+ // Subtract one off of the last byte.
+ // Really the first byte here, but it's reversed...
+ m_utilStringBuffer_.setCharAt(2,
+ (char)(m_utilStringBuffer_.charAt(2) - 1));
+ // We want to skip over the first two slots in the buffer.
+ // The first slot is reserved for the header byte CODAN_PLACEHOLDER.
+ // The second slot is for the sign/exponent byte:
+ // 0x80 + (decimalPos/2) & 7f.
+ m_utilStringBuffer_.setCharAt(0, (char)RuleBasedCollator.CODAN_PLACEHOLDER);
+ m_utilStringBuffer_.setCharAt(1,
+ (char)(0x80 + ((digIndx >>> 1) & 0x7F)));
+
+ // Now transfer the collation key to our collIterate struct.
+ // The total size for our collation key is endIndx bumped up to the
+ // next largest even value divided by two.
+ m_CEBufferSize_ = 0;
+ m_CEBuffer_[m_CEBufferSize_ ++]
+ = (((m_utilStringBuffer_.charAt(0) << 8)
+ // Primary weight
+ | m_utilStringBuffer_.charAt(1))
+ << RuleBasedCollator.CE_PRIMARY_SHIFT_)
+ // Secondary weight
+ | (RuleBasedCollator.BYTE_COMMON_
+ << RuleBasedCollator.CE_SECONDARY_SHIFT_)
+ // Tertiary weight.
+ | RuleBasedCollator.BYTE_COMMON_;
+ int i = endIndex - 1; // Reset the index into the buffer.
+ while (i >= 2) {
+ int primWeight = m_utilStringBuffer_.charAt(i --) << 8;
+ if (i >= 2) {
+ primWeight |= m_utilStringBuffer_.charAt(i --);
+ }
+ m_CEBuffer_[m_CEBufferSize_ ++]
+ = (primWeight << RuleBasedCollator.CE_PRIMARY_SHIFT_)
+ | RuleBasedCollator.CE_CONTINUATION_MARKER_;
+ }
+ m_CEBufferOffset_ = m_CEBufferSize_ - 1;
+ return m_CEBuffer_[m_CEBufferOffset_];
+ }
+ else {
+ return collator.m_expansion_[getExpansionOffset(collator, ce)];
+ }
+ }
+
+ /**
+ * Returns previous hangul ces
+ * @param collator current collator
+ * @param ch current character
+ * @return previous hangul ce
+ */
+ private int previousHangul(RuleBasedCollator collator, char ch)
+ {
+ char L = (char)(ch - HANGUL_SBASE_);
+ // we do it in this order since some compilers can do % and / in one
+ // operation
+ char T = (char)(L % HANGUL_TCOUNT_);
+ L /= HANGUL_TCOUNT_;
+ char V = (char)(L % HANGUL_VCOUNT_);
+ L /= HANGUL_VCOUNT_;
+
+ // offset them
+ L += HANGUL_LBASE_;
+ V += HANGUL_VBASE_;
+ T += HANGUL_TBASE_;
+
+ m_CEBufferSize_ = 0;
+ if (!m_collator_.m_isJamoSpecial_) {
+ m_CEBuffer_[m_CEBufferSize_ ++] =
+ collator.m_trie_.getLeadValue(L);
+ m_CEBuffer_[m_CEBufferSize_ ++] =
+ collator.m_trie_.getLeadValue(V);
+ if (T != HANGUL_TBASE_) {
+ m_CEBuffer_[m_CEBufferSize_ ++] =
+ collator.m_trie_.getLeadValue(T);
+ }
+ m_CEBufferOffset_ = m_CEBufferSize_ - 1;
+ return m_CEBuffer_[m_CEBufferOffset_];
+ }
+ else {
+ // Since Hanguls pass the FCD check, it is guaranteed that we won't
+ // be in the normalization buffer if something like this happens
+ // Move Jamos into normalization buffer
+ m_buffer_.append(L);
+ m_buffer_.append(V);
+ if (T != HANGUL_TBASE_) {
+ m_buffer_.append(T);
+ }
+ m_bufferOffset_ = m_buffer_.length();
+ m_FCDStart_ = m_source_.getIndex();
+ m_FCDLimit_ = m_FCDStart_ + 1;
+ return IGNORABLE;
+ }
+ }
+
+ /**
+ * Gets implicit codepoint ces
+ * @param codepoint current codepoint
+ * @return implicit codepoint ces
+ */
+ private int previousImplicit(int codepoint)
+ {
+ int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint);
+ m_CEBufferSize_ = 2;
+ m_CEBufferOffset_ = 1;
+ m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_)
+ | 0x00000505;
+ m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0;
+ return m_CEBuffer_[1];
+ }
+
+ /**
+ * Gets the previous surrogate ce
+ * @param ch current character
+ * @return previous surrogate ce
+ */
+ private int previousSurrogate(char ch)
+ {
+ if (isBackwardsStart()) {
+ // we are at the start of the string, wrong place to be at
+ return CE_NOT_FOUND_;
+ }
+ char prevch = (char)previousChar();
+ // Handles Han and Supplementary characters here.
+ if (UTF16.isLeadSurrogate(prevch)) {
+ return previousImplicit(
+ UCharacterProperty.getRawSupplementary(prevch, ch));
+ }
+ if (prevch != CharacterIterator.DONE) {
+ nextChar();
+ }
+ return CE_NOT_FOUND_; // treat like unassigned
+ }
+
+ /**
+ * Special CE management. Expansions, contractions etc...
+ * @param collator can be plain UCA
+ * @param ce current ce
+ * @param ch current character
+ * @return previous special ce
+ */
+ private int previousSpecial(RuleBasedCollator collator, int ce, char ch)
+ {
+ while(true) {
+ // the only ces that loops are thai, special prefix and
+ // contractions
+ switch (RuleBasedCollator.getTag(ce)) {
+ case CE_NOT_FOUND_TAG_: // this tag always returns
+ return ce;
+ case RuleBasedCollator.CE_SURROGATE_TAG_: // unpaired lead surrogate
+ return CE_NOT_FOUND_;
+ case CE_SPEC_PROC_TAG_:
+ ce = previousSpecialPrefix(collator, ce);
+ break;
+ case CE_CONTRACTION_TAG_:
+ // may loop for first character e.g. "0x0f71" for english
+ if (isBackwardsStart()) {
+ // start of string or this is not the end of any contraction
+ ce = collator.m_contractionCE_[
+ getContractionOffset(collator, ce)];
+ break;
+ }
+ return previousContraction(collator, ce, ch); // else
+ case CE_LONG_PRIMARY_TAG_:
+ return previousLongPrimary(ce);
+ case CE_EXPANSION_TAG_: // always returns
+ return previousExpansion(collator, ce);
+ case CE_DIGIT_TAG_:
+ ce = previousDigit(collator, ce, ch);
+ break;
+ case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF
+ return previousHangul(collator, ch);
+ case CE_LEAD_SURROGATE_TAG_: // D800-DBFF
+ return CE_NOT_FOUND_; // broken surrogate sequence, treat like unassigned
+ case CE_TRAIL_SURROGATE_TAG_: // DC00-DFFF
+ return previousSurrogate(ch);
+ case CE_CJK_IMPLICIT_TAG_:
+ // 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
+ return previousImplicit(ch);
+ case CE_IMPLICIT_TAG_: // everything that is not defined
+ // UCA is filled with these. Tailorings are NOT_FOUND
+ return previousImplicit(ch);
+ case CE_CHARSET_TAG_: // this tag always returns
+ return CE_NOT_FOUND_;
+ default: // this tag always returns
+ ce = IGNORABLE;
+ }
+ if (!RuleBasedCollator.isSpecial(ce)) {
+ break;
+ }
+ }
+ return ce;
+ }
+
+// /**
+// * Gets a character from the source string at a given offset.
+// * Handles both normal and iterative cases.
+// * No error checking and does not access the normalization buffer
+// * - caller beware!
+// * @param offset offset from current position which character is to be
+// * retrieved
+// * @return character at current position + offset
+// */
+// private char peekCharacter(int offset)
+// {
+// if (offset != 0) {
+// int currentoffset = m_source_.getIndex();
+// m_source_.setIndex(currentoffset + offset);
+// char result = (char)m_source_.current();
+// m_source_.setIndex(currentoffset);
+// return result;
+// }
+// else {
+// return (char)m_source_.current();
+// }
+// }
+
+ /**
+ * Moves back 1 position in the source string. This is slightly less
+ * complicated than previousChar in that it doesn't normalize while
+ * moving back. Boundary checks are not performed.
+ * This method is to be used with caution, with the assumption that
+ * moving back one position will not exceed the source limits.
+ * Use only with nextChar() and never call this API twice in a row without
+ * nextChar() in the middle.
+ */
+ private void goBackOne()
+ {
+ if (m_bufferOffset_ >= 0) {
+ m_bufferOffset_ --;
+ }
+ else {
+ m_source_.setIndex(m_source_.getIndex() - 1);
+ }
+ }
+
+ /**
+ * Moves forward 1 position in the source string. This is slightly less
+ * complicated than nextChar in that it doesn't normalize while
+ * moving back. Boundary checks are not performed.
+ * This method is to be used with caution, with the assumption that
+ * moving back one position will not exceed the source limits.
+ * Use only with previousChar() and never call this API twice in a row
+ * without previousChar() in the middle.
+ */
+ private void goForwardOne()
+ {
+ if (m_bufferOffset_ < 0) {
+ // we're working on the source and not normalizing. fast path.
+ // note Thai pre-vowel reordering uses buffer too
+ m_source_.setIndex(m_source_.getIndex() + 1);
+ }
+ else {
+ // we are in the buffer, buffer offset will never be 0 here
+ m_bufferOffset_ ++;
+ }
+ }
+}
diff --git a/main/classes/collate/src/com/ibm/icu/text/CollationKey.java b/main/classes/collate/src/com/ibm/icu/text/CollationKey.java
new file mode 100644
index 00000000000..fddd61cb0a1
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/CollationKey.java
@@ -0,0 +1,624 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+/**
+ * A CollationKey
represents a String
+ * under the rules of a specific Collator
+ * object. Comparing two CollationKey
s returns the
+ * relative order of the String
s they represent.
+ *
+ * Since the rule set of Collator
s can differ, the
+ * sort orders of the same string under two different
+ * Collator
s might differ. Hence comparing
+ * CollationKey
s generated from different
+ * Collator
s can give incorrect results.
+
+ * Both the method
+ * CollationKey.compareTo(CollationKey)
and the method
+ * Collator.compare(String, String)
compare two strings
+ * and returns their relative order. The performance characterictics
+ * of these two approaches can differ.
+ *
+ * During the construction of a CollationKey
, the
+ * entire source string is examined and processed into a series of
+ * bits terminated by a null, that are stored in the CollationKey
.
+ * When CollationKey.compareTo(CollationKey)
executes, it
+ * performs bitwise comparison on the bit sequences. This can incurs
+ * startup cost when creating the CollationKey
, but once
+ * the key is created, binary comparisons are fast. This approach is
+ * recommended when the same strings are to be compared over and over
+ * again.
+ *
+ * On the other hand, implementations of
+ * Collator.compare(String, String)
can examine and
+ * process the strings only until the first characters differing in
+ * order. This approach is recommended if the strings are to be
+ * compared only once.
+ *
+ * More information about the composition of the bit sequence can
+ * be found in the
+ *
+ * user guide .
+ *
+ * The following example shows how CollationKey
s can be used
+ * to sort a list of String
s.
+ *
+ *
+ * // Create an array of CollationKeys for the Strings to be sorted.
+ * Collator myCollator = Collator.getInstance();
+ * CollationKey[] keys = new CollationKey[3];
+ * keys[0] = myCollator.getCollationKey("Tom");
+ * keys[1] = myCollator.getCollationKey("Dick");
+ * keys[2] = myCollator.getCollationKey("Harry");
+ * sort( keys );
+ *
+ * //...
+ *
+ * // Inside body of sort routine, compare keys this way
+ * if( keys[i].compareTo( keys[j] ) > 0 )
+ * // swap keys[i] and keys[j]
+ *
+ * //...
+ *
+ * // Finally, when we've returned from sort.
+ * System.out.println( keys[0].getSourceString() );
+ * System.out.println( keys[1].getSourceString() );
+ * System.out.println( keys[2].getSourceString() );
+ *
+ *
+ *
+ *
+ * This class is not subclassable
+ *
+ * @see Collator
+ * @see RuleBasedCollator
+ * @author Syn Wee Quek
+ * @stable ICU 2.8
+ */
+public final class CollationKey implements Comparable
+{
+ // public inner classes -------------------------------------------------
+
+ /**
+ * Options that used in the API CollationKey.getBound() for getting a
+ * CollationKey based on the bound mode requested.
+ * @stable ICU 2.6
+ */
+ public static final class BoundMode
+ {
+ /*
+ * do not change the values assigned to the members of this enum.
+ * Underlying code depends on them having these numbers
+ */
+
+ /**
+ * Lower bound
+ * @stable ICU 2.6
+ */
+ public static final int LOWER = 0;
+
+ /**
+ * Upper bound that will match strings of exact size
+ * @stable ICU 2.6
+ */
+ public static final int UPPER = 1;
+
+ /**
+ * Upper bound that will match all the strings that have the same
+ * initial substring as the given string
+ * @stable ICU 2.6
+ */
+ public static final int UPPER_LONG = 2;
+
+ /**
+ * Number of bound mode
+ * @stable ICU 2.6
+ */
+ public static final int COUNT = 3;
+
+ /**
+ * Private Constructor
+ */
+ ///CLOVER:OFF
+ private BoundMode(){}
+ ///CLOVER:ON
+ }
+
+ // public constructor ---------------------------------------------------
+
+ /**
+ * CollationKey constructor.
+ * This constructor is given public access, unlike the JDK version, to
+ * allow access to users extending the Collator class. See
+ * {@link Collator#getCollationKey(String)}.
+ * @param source string this CollationKey is to represent
+ * @param key array of bytes that represent the collation order of argument
+ * source terminated by a null
+ * @see Collator
+ * @stable ICU 2.8
+ */
+ public CollationKey(String source, byte key[])
+ {
+ m_source_ = source;
+ m_key_ = key;
+ m_hashCode_ = 0;
+ m_length_ = -1;
+ }
+
+ /**
+ * CollationKey constructor that forces key to release its internal byte
+ * array for adoption. key will have a null byte array after this
+ * construction.
+ * @param source string this CollationKey is to represent
+ * @param key RawCollationKey object that represents the collation order of
+ * argument source.
+ * @see Collator
+ * @see RawCollationKey
+ * @stable ICU 2.8
+ */
+ public CollationKey(String source, RawCollationKey key)
+ {
+ m_source_ = source;
+ m_key_ = key.releaseBytes();
+ m_hashCode_ = 0;
+ m_length_ = -1;
+ }
+
+ // public getters -------------------------------------------------------
+
+ /**
+ * Return the source string that this CollationKey represents.
+ * @return source string that this CollationKey represents
+ * @stable ICU 2.8
+ */
+ public String getSourceString()
+ {
+ return m_source_;
+ }
+
+ /**
+ * Duplicates and returns the value of this CollationKey as a sequence
+ * of big-endian bytes terminated by a null.
+ *
+ * If two CollationKeys can be legitimately compared, then one can
+ * compare the byte arrays of each to obtain the same result, e.g.
+ *
+ * byte key1[] = collationkey1.toByteArray();
+ * byte key2[] = collationkey2.toByteArray();
+ * int key, targetkey;
+ * int i = 0;
+ * do {
+ * key = key1[i] & 0xFF;
+ * targetkey = key2[i] & 0xFF;
+ * if (key < targetkey) {
+ * System.out.println("String 1 is less than string 2");
+ * return;
+ * }
+ * if (targetkey < key) {
+ * System.out.println("String 1 is more than string 2");
+ * }
+ * i ++;
+ * } while (key != 0 && targetKey != 0);
+ *
+ * System.out.println("Strings are equal.");
+ *
+ *
+ * @return CollationKey value in a sequence of big-endian byte bytes
+ * terminated by a null.
+ * @stable ICU 2.8
+ */
+ public byte[] toByteArray()
+ {
+ int length = 0;
+ while (true) {
+ if (m_key_[length] == 0) {
+ break;
+ }
+ length ++;
+ }
+ length ++;
+ byte result[] = new byte[length];
+ System.arraycopy(m_key_, 0, result, 0, length);
+ return result;
+ }
+
+ // public other methods -------------------------------------------------
+
+ /**
+ * Compare this CollationKey to another CollationKey. The
+ * collation rules of the Collator that created this key are
+ * applied.
+ *
+ * Note: Comparison between CollationKeys
+ * created by different Collators might return incorrect
+ * results. See class documentation.
+ *
+ * @param target target CollationKey
+ * @return an integer value. If the value is less than zero this CollationKey
+ * is less than than target, if the value is zero they are equal, and
+ * if the value is greater than zero this CollationKey is greater
+ * than target.
+ * @exception NullPointerException is thrown if argument is null.
+ * @see Collator#compare(String, String)
+ * @stable ICU 2.8
+ */
+ public int compareTo(CollationKey target)
+ {
+ for (int i = 0;; ++i) {
+ int l = m_key_[i]&0xff;
+ int r = target.m_key_[i]&0xff;
+ if (l < r) {
+ return -1;
+ } else if (l > r) {
+ return 1;
+ } else if (l == 0) {
+ return 0;
+ }
+ }
+ }
+
+ /**
+ * Compare this CollationKey and the specified Object for
+ * equality. The collation rules of the Collator that created
+ * this key are applied.
+ *
+ * See note in compareTo(CollationKey) for warnings about
+ * possible incorrect results.
+ *
+ * @param target the object to compare to.
+ * @return true if the two keys compare as equal, false otherwise.
+ * @see #compareTo(CollationKey)
+ * @exception ClassCastException is thrown when the argument is not
+ * a CollationKey. NullPointerException is thrown when the argument
+ * is null.
+ * @stable ICU 2.8
+ */
+ public boolean equals(Object target)
+ {
+ if (!(target instanceof CollationKey)) {
+ return false;
+ }
+
+ return equals((CollationKey)target);
+ }
+
+ /**
+ *
+ * Compare this CollationKey and the argument target CollationKey for
+ * equality.
+ * The collation
+ * rules of the Collator object which created these objects are applied.
+ *
+ *
+ * See note in compareTo(CollationKey) for warnings of incorrect results
+ *
+ * @param target the CollationKey to compare to.
+ * @return true if two objects are equal, false otherwise.
+ * @exception NullPointerException is thrown when the argument is null.
+ * @stable ICU 2.8
+ */
+ public boolean equals(CollationKey target)
+ {
+ if (this == target) {
+ return true;
+ }
+ if (target == null) {
+ return false;
+ }
+ CollationKey other = target;
+ int i = 0;
+ while (true) {
+ if (m_key_[i] != other.m_key_[i]) {
+ return false;
+ }
+ if (m_key_[i] == 0) {
+ break;
+ }
+ i ++;
+ }
+ return true;
+ }
+
+ /**
+ * Returns a hash code for this CollationKey. The hash value is calculated
+ * on the key itself, not the String from which the key was created. Thus
+ * if x and y are CollationKeys, then x.hashCode(x) == y.hashCode()
+ * if x.equals(y) is true. This allows language-sensitive comparison in a
+ * hash table.
+ *
+ * @return the hash value.
+ * @stable ICU 2.8
+ */
+ public int hashCode()
+ {
+ if (m_hashCode_ == 0) {
+ if (m_key_ == null) {
+ m_hashCode_ = 1;
+ }
+ else {
+ int size = m_key_.length >> 1;
+ StringBuilder key = new StringBuilder(size);
+ int i = 0;
+ while (m_key_[i] != 0 && m_key_[i + 1] != 0) {
+ key.append((char)((m_key_[i] << 8) | m_key_[i + 1]));
+ i += 2;
+ }
+ if (m_key_[i] != 0) {
+ key.append((char)(m_key_[i] << 8));
+ }
+ m_hashCode_ = key.toString().hashCode();
+ }
+ }
+ return m_hashCode_;
+ }
+
+ /**
+ *
+ * Produce a bound for the sort order of a given collation key and a
+ * strength level. This API does not attempt to find a bound for the
+ * CollationKey String representation, hence null will be returned in its
+ * place.
+ *
+ *
+ * Resulting bounds can be used to produce a range of strings that are
+ * between upper and lower bounds. For example, if bounds are produced
+ * for a sortkey of string "smith", strings between upper and lower
+ * bounds with primary strength would include "Smith", "SMITH", "sMiTh".
+ *
+ *
+ * There are two upper bounds that can be produced. If BoundMode.UPPER
+ * is produced, strings matched would be as above. However, if a bound
+ * is produced using BoundMode.UPPER_LONG is used, the above example will
+ * also match "Smithsonian" and similar.
+ *
+ *
+ * For more on usage, see example in test procedure
+ *
+ * src/com/ibm/icu/dev/test/collator/CollationAPITest/TestBounds.
+ *
+ *
+ *
+ * Collation keys produced may be compared using the compare API.
+ *
+ * @param boundType Mode of bound required. It can be BoundMode.LOWER, which
+ * produces a lower inclusive bound, BoundMode.UPPER, that
+ * produces upper bound that matches strings of the same
+ * length or BoundMode.UPPER_LONG that matches strings that
+ * have the same starting substring as the source string.
+ * @param noOfLevels Strength levels required in the resulting bound
+ * (for most uses, the recommended value is PRIMARY). This
+ * strength should be less than the maximum strength of
+ * this CollationKey.
+ * See users guide for explanation on the strength levels a
+ * collation key can have.
+ * @return the result bounded CollationKey with a valid sort order but
+ * a null String representation.
+ * @exception IllegalArgumentException thrown when the strength level
+ * requested is higher than or equal to the strength in this
+ * CollationKey.
+ * In the case of an Exception, information
+ * about the maximum strength to use will be returned in the
+ * Exception. The user can then call getBound() again with the
+ * appropriate strength.
+ * @see CollationKey
+ * @see CollationKey.BoundMode
+ * @see Collator#PRIMARY
+ * @see Collator#SECONDARY
+ * @see Collator#TERTIARY
+ * @see Collator#QUATERNARY
+ * @see Collator#IDENTICAL
+ * @stable ICU 2.6
+ */
+ public CollationKey getBound(int boundType, int noOfLevels)
+ {
+ // Scan the string until we skip enough of the key OR reach the end of
+ // the key
+ int offset = 0;
+ int keystrength = Collator.PRIMARY;
+
+ if (noOfLevels > Collator.PRIMARY) {
+ while (offset < m_key_.length && m_key_[offset] != 0) {
+ if (m_key_[offset ++]
+ == RuleBasedCollator.SORT_LEVEL_TERMINATOR_) {
+ keystrength ++;
+ noOfLevels --;
+ if (noOfLevels == Collator.PRIMARY
+ || offset == m_key_.length || m_key_[offset] == 0) {
+ offset --;
+ break;
+ }
+ }
+ }
+ }
+
+ if (noOfLevels > 0) {
+ throw new IllegalArgumentException(
+ "Source collation key has only "
+ + keystrength
+ + " strength level. Call getBound() again "
+ + " with noOfLevels < " + keystrength);
+ }
+
+ // READ ME: this code assumes that the values for BoundMode variables
+ // will not changes. They are set so that the enum value corresponds to
+ // the number of extra bytes each bound type needs.
+ byte resultkey[] = new byte[offset + boundType + 1];
+ System.arraycopy(m_key_, 0, resultkey, 0, offset);
+ switch (boundType) {
+ case BoundMode.LOWER: // = 0
+ // Lower bound just gets terminated. No extra bytes
+ break;
+ case BoundMode.UPPER: // = 1
+ // Upper bound needs one extra byte
+ resultkey[offset ++] = 2;
+ break;
+ case BoundMode.UPPER_LONG: // = 2
+ // Upper long bound needs two extra bytes
+ resultkey[offset ++] = (byte)0xFF;
+ resultkey[offset ++] = (byte)0xFF;
+ break;
+ default:
+ throw new IllegalArgumentException(
+ "Illegal boundType argument");
+ }
+ resultkey[offset ++] = 0;
+ return new CollationKey(null, resultkey);
+ }
+
+
+
+ /**
+ *
+ * Merges this CollationKey with another. Only the sorting order of the
+ * CollationKeys will be merged. This API does not attempt to merge the
+ * String representations of the CollationKeys, hence null will be returned
+ * as the String representation.
+ *
+ *
+ * The strength levels are merged with their corresponding counterparts
+ * (PRIMARIES with PRIMARIES, SECONDARIES with SECONDARIES etc.).
+ *
+ *
+ * The merged String representation of the result CollationKey will be a
+ * concatenation of the String representations of the 2 source
+ * CollationKeys.
+ *
+ *
+ * Between the values from the same level a separator is inserted.
+ * example (uncompressed):
+ *
+ * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
+ * will be merged as
+ * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
+ *
+ *
+ *
+ * This allows for concatenating of first and last names for sorting, among
+ * other things.
+ *
+ *
+ * @param source CollationKey to merge with
+ * @return a CollationKey that contains the valid merged sorting order
+ * with a null String representation,
+ * i.e. new CollationKey(null, merge_sort_order)
+ * @exception IllegalArgumentException thrown if source CollationKey
+ * argument is null or of 0 length.
+ * @stable ICU 2.6
+ */
+ public CollationKey merge(CollationKey source)
+ {
+ // check arguments
+ if (source == null || source.getLength() == 0) {
+ throw new IllegalArgumentException(
+ "CollationKey argument can not be null or of 0 length");
+ }
+
+ getLength(); // gets the length of this sort key
+ int sourcelength = source.getLength();
+ // 1 extra for the last strength that has no seperators
+ byte result[] = new byte[m_length_ + sourcelength + 2];
+
+ // merge the sort keys with the same number of levels
+ int rindex = 0;
+ int index = 0;
+ int sourceindex = 0;
+ while (true) {
+ // while both have another level
+ // copy level from src1 not including 00 or 01
+ // unsigned issues
+ while (m_key_[index] < 0 || m_key_[index] >= MERGE_SEPERATOR_) {
+ result[rindex ++] = m_key_[index ++];
+ }
+
+ // add a 02 merge separator
+ result[rindex ++] = MERGE_SEPERATOR_;
+
+ // copy level from src2 not including 00 or 01
+ while (source.m_key_[sourceindex] < 0
+ || source.m_key_[sourceindex] >= MERGE_SEPERATOR_) {
+ result[rindex ++] = source.m_key_[sourceindex ++];
+ }
+
+ // if both sort keys have another level, then add a 01 level
+ // separator and continue
+ if (m_key_[index] == RuleBasedCollator.SORT_LEVEL_TERMINATOR_
+ && source.m_key_[sourceindex]
+ == RuleBasedCollator.SORT_LEVEL_TERMINATOR_) {
+ ++ index;
+ ++ sourceindex;
+ result[rindex ++] = RuleBasedCollator.SORT_LEVEL_TERMINATOR_;
+ }
+ else {
+ break;
+ }
+ }
+
+ // here, at least one sort key is finished now, but the other one
+ // might have some contents left from containing more levels;
+ // that contents is just appended to the result
+ if (m_key_[index] != 0) {
+ System.arraycopy(m_key_, index, result, rindex,
+ m_length_ - index);
+ }
+ else if (source.m_key_[sourceindex] != 0) {
+ System.arraycopy(source.m_key_, sourceindex, result, rindex,
+ source.m_length_ - sourceindex);
+ }
+ result[result.length - 1] = 0;
+
+ // trust that neither sort key contained illegally embedded zero bytes
+ return new CollationKey(null, result);
+ }
+
+ // private data members -------------------------------------------------
+
+ /**
+ * Sequence of bytes that represents the sort key
+ */
+ private byte m_key_[];
+
+ /**
+ * Source string this CollationKey represents
+ */
+ private String m_source_;
+
+ /**
+ * Hash code for the key
+ */
+ private int m_hashCode_;
+ /**
+ * Gets the length of this CollationKey
+ */
+ private int m_length_;
+ /**
+ * Collation key merge seperator
+ */
+ private static final int MERGE_SEPERATOR_ = 2;
+
+ // private methods ------------------------------------------------------
+
+ /**
+ * Gets the length of the CollationKey
+ * @return length of the CollationKey
+ */
+ private int getLength()
+ {
+ if (m_length_ >= 0) {
+ return m_length_;
+ }
+ int length = m_key_.length;
+ for (int index = 0; index < length; index ++) {
+ if (m_key_[index] == 0) {
+ length = index;
+ break;
+ }
+ }
+ m_length_ = length;
+ return m_length_;
+ }
+}
diff --git a/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java b/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java
new file mode 100644
index 00000000000..edc9a2a9a03
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/CollationParsedRuleBuilder.java
@@ -0,0 +1,4247 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 1996-2011, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.ibm.icu.impl.IntTrieBuilder;
+import com.ibm.icu.impl.Norm2AllModes;
+import com.ibm.icu.impl.Normalizer2Impl;
+import com.ibm.icu.impl.TrieBuilder;
+import com.ibm.icu.impl.TrieIterator;
+import com.ibm.icu.impl.UCharacterProperty;
+import com.ibm.icu.impl.Utility;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UCharacterCategory;
+import com.ibm.icu.util.RangeValueIterator;
+import com.ibm.icu.util.VersionInfo;
+
+/**
+ * Class for building a collator from a list of collation rules. This class is
+ * uses CollationRuleParser
+ *
+ * @author Syn Wee Quek
+ * @since release 2.2, June 11 2002
+ */
+final class CollationParsedRuleBuilder {
+ // package private constructors ------------------------------------------
+
+ /**
+ * Constructor
+ *
+ * @param rules
+ * collation rules
+ * @exception ParseException
+ * thrown when argument rules have an invalid syntax
+ */
+ CollationParsedRuleBuilder(String rules) throws ParseException {
+ m_nfcImpl_.getFCDTrie(); // initialize the optional FCD trie
+ m_parser_ = new CollationRuleParser(rules);
+ m_parser_.assembleTokenList();
+ m_utilColEIter_ = RuleBasedCollator.UCA_
+ .getCollationElementIterator("");
+ }
+
+ // package private inner classes -----------------------------------------
+
+ /**
+ * Inverse UCA wrapper
+ */
+ static class InverseUCA {
+ // package private constructor ---------------------------------------
+
+ InverseUCA() {
+ }
+
+ // package private data member ---------------------------------------
+
+ /**
+ * Array list of characters
+ */
+ int m_table_[];
+ /**
+ * Array list of continuation characters
+ */
+ char m_continuations_[];
+
+ /**
+ * UCA version of inverse UCA table
+ */
+ VersionInfo m_UCA_version_;
+
+ // package private method --------------------------------------------
+
+ /**
+ * Returns the previous inverse ces of the argument ces
+ *
+ * @param ce
+ * ce to test
+ * @param contce
+ * continuation ce to test
+ * @param strength
+ * collation strength
+ * @param prevresult
+ * an array to store the return results previous inverse ce
+ * and previous inverse continuation ce
+ * @return result of the inverse ce
+ */
+ final int getInversePrevCE(int ce, int contce, int strength,
+ int prevresult[]) {
+ int result = findInverseCE(ce, contce);
+
+ if (result < 0) {
+ prevresult[0] = CollationElementIterator.NULLORDER;
+ return -1;
+ }
+
+ ce &= STRENGTH_MASK_[strength];
+ contce &= STRENGTH_MASK_[strength];
+
+ prevresult[0] = ce;
+ prevresult[1] = contce;
+
+ while ((prevresult[0] & STRENGTH_MASK_[strength]) == ce
+ && (prevresult[1] & STRENGTH_MASK_[strength]) == contce
+ && result > 0) {
+ // this condition should prevent falling off the edge of the
+ // world
+ // here, we end up in a singularity - zero
+ prevresult[0] = m_table_[3 * (--result)];
+ prevresult[1] = m_table_[3 * result + 1];
+ }
+ return result;
+ }
+
+ final int getCEStrengthDifference(int CE, int contCE, int prevCE,
+ int prevContCE) {
+ int strength = Collator.TERTIARY;
+ while (((prevCE & STRENGTH_MASK_[strength]) != (CE & STRENGTH_MASK_[strength]) || (prevContCE & STRENGTH_MASK_[strength]) != (contCE & STRENGTH_MASK_[strength]))
+ && (strength != 0)) {
+ strength--;
+ }
+ return strength;
+ }
+
+ private int compareCEs(int source0, int source1, int target0,
+ int target1) {
+ int s1 = source0, s2, t1 = target0, t2;
+ if (RuleBasedCollator.isContinuation(source1)) {
+ s2 = source1;
+ } else {
+ s2 = 0;
+ }
+ if (RuleBasedCollator.isContinuation(target1)) {
+ t2 = target1;
+ } else {
+ t2 = 0;
+ }
+
+ int s = 0, t = 0;
+ if (s1 == t1 && s2 == t2) {
+ return 0;
+ }
+ s = (s1 & 0xFFFF0000) | ((s2 & 0xFFFF0000) >>> 16);
+ t = (t1 & 0xFFFF0000) | ((t2 & 0xFFFF0000) >>> 16);
+ if (s == t) {
+ s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00) >> 8;
+ t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00) >> 8;
+ if (s == t) {
+ s = (s1 & 0x000000FF) << 8 | (s2 & 0x000000FF);
+ t = (t1 & 0x000000FF) << 8 | (t2 & 0x000000FF);
+ return Utility.compareUnsigned(s, t);
+ } else {
+ return Utility.compareUnsigned(s, t);
+ }
+ } else {
+ return Utility.compareUnsigned(s, t);
+ }
+ }
+
+ /**
+ * Finding the inverse CE of the argument CEs
+ *
+ * @param ce
+ * CE to be tested
+ * @param contce
+ * continuation CE
+ * @return inverse CE
+ */
+ int findInverseCE(int ce, int contce) {
+ int bottom = 0;
+ int top = m_table_.length / 3;
+ int result = 0;
+
+ while (bottom < top - 1) {
+ result = (top + bottom) >> 1;
+ int first = m_table_[3 * result];
+ int second = m_table_[3 * result + 1];
+ int comparison = compareCEs(first, second, ce, contce);
+ if (comparison > 0) {
+ top = result;
+ } else if (comparison < 0) {
+ bottom = result;
+ } else {
+ break;
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Getting gap offsets in the inverse UCA
+ *
+ * @param listheader
+ * parsed token lists
+ * @exception Exception
+ * thrown when error occurs while finding the collation
+ * gaps
+ */
+ void getInverseGapPositions(
+ CollationRuleParser.TokenListHeader listheader)
+ throws Exception {
+ // reset all the gaps
+ CollationRuleParser.Token token = listheader.m_first_;
+ int tokenstrength = token.m_strength_;
+
+ for (int i = 0; i < 3; i++) {
+ listheader.m_gapsHi_[3 * i] = 0;
+ listheader.m_gapsHi_[3 * i + 1] = 0;
+ listheader.m_gapsHi_[3 * i + 2] = 0;
+ listheader.m_gapsLo_[3 * i] = 0;
+ listheader.m_gapsLo_[3 * i + 1] = 0;
+ listheader.m_gapsLo_[3 * i + 2] = 0;
+ listheader.m_numStr_[i] = 0;
+ listheader.m_fStrToken_[i] = null;
+ listheader.m_lStrToken_[i] = null;
+ listheader.m_pos_[i] = -1;
+ }
+
+ if ((listheader.m_baseCE_ >>> 24) >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_
+ && (listheader.m_baseCE_ >>> 24) <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_) {
+ // implicits -
+ listheader.m_pos_[0] = 0;
+ int t1 = listheader.m_baseCE_;
+ int t2 = listheader.m_baseContCE_;
+ listheader.m_gapsLo_[0] = mergeCE(t1, t2, Collator.PRIMARY);
+ listheader.m_gapsLo_[1] = mergeCE(t1, t2, Collator.SECONDARY);
+ listheader.m_gapsLo_[2] = mergeCE(t1, t2, Collator.TERTIARY);
+ int primaryCE = t1 & RuleBasedCollator.CE_PRIMARY_MASK_
+ | (t2 & RuleBasedCollator.CE_PRIMARY_MASK_) >>> 16;
+ primaryCE = RuleBasedCollator.impCEGen_
+ .getImplicitFromRaw(RuleBasedCollator.impCEGen_
+ .getRawFromImplicit(primaryCE) + 1);
+
+ t1 = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505;
+ t2 = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_
+ | RuleBasedCollator.CE_CONTINUATION_MARKER_;
+
+ // if (listheader.m_baseCE_ < 0xEF000000) {
+ // // first implicits have three byte primaries, with a gap of
+ // // one so we esentially need to add 2 to the top byte in
+ // // listheader.m_baseContCE_
+ // t2 += 0x02000000;
+ // }
+ // else {
+ // // second implicits have four byte primaries, with a gap of
+ // // IMPLICIT_LAST2_MULTIPLIER_
+ // // Now, this guy is not really accessible here, so until we
+ // // find a better way to pass it around, assume that the gap
+ // is 1
+ // t2 += 0x00020000;
+ // }
+ listheader.m_gapsHi_[0] = mergeCE(t1, t2, Collator.PRIMARY);
+ listheader.m_gapsHi_[1] = mergeCE(t1, t2, Collator.SECONDARY);
+ listheader.m_gapsHi_[2] = mergeCE(t1, t2, Collator.TERTIARY);
+ } else if (listheader.m_indirect_ == true
+ && listheader.m_nextCE_ != 0) {
+ listheader.m_pos_[0] = 0;
+ int t1 = listheader.m_baseCE_;
+ int t2 = listheader.m_baseContCE_;
+ listheader.m_gapsLo_[0] = mergeCE(t1, t2, Collator.PRIMARY);
+ listheader.m_gapsLo_[1] = mergeCE(t1, t2, Collator.SECONDARY);
+ listheader.m_gapsLo_[2] = mergeCE(t1, t2, Collator.TERTIARY);
+ t1 = listheader.m_nextCE_;
+ t2 = listheader.m_nextContCE_;
+ listheader.m_gapsHi_[0] = mergeCE(t1, t2, Collator.PRIMARY);
+ listheader.m_gapsHi_[1] = mergeCE(t1, t2, Collator.SECONDARY);
+ listheader.m_gapsHi_[2] = mergeCE(t1, t2, Collator.TERTIARY);
+ } else {
+ while (true) {
+ if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) {
+ listheader.m_pos_[tokenstrength] = getInverseNext(
+ listheader, tokenstrength);
+ if (listheader.m_pos_[tokenstrength] >= 0) {
+ listheader.m_fStrToken_[tokenstrength] = token;
+ } else {
+ // The CE must be implicit, since it's not in the
+ // table
+ // Error
+ throw new Exception("Internal program error");
+ }
+ }
+
+ while (token != null && token.m_strength_ >= tokenstrength) {
+ if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) {
+ listheader.m_lStrToken_[tokenstrength] = token;
+ }
+ token = token.m_next_;
+ }
+ if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_ - 1) {
+ // check if previous interval is the same and merge the
+ // intervals if it is so
+ if (listheader.m_pos_[tokenstrength] == listheader.m_pos_[tokenstrength + 1]) {
+ listheader.m_fStrToken_[tokenstrength] = listheader.m_fStrToken_[tokenstrength + 1];
+ listheader.m_fStrToken_[tokenstrength + 1] = null;
+ listheader.m_lStrToken_[tokenstrength + 1] = null;
+ listheader.m_pos_[tokenstrength + 1] = -1;
+ }
+ }
+ if (token != null) {
+ tokenstrength = token.m_strength_;
+ } else {
+ break;
+ }
+ }
+ for (int st = 0; st < 3; st++) {
+ int pos = listheader.m_pos_[st];
+ if (pos >= 0) {
+ int t1 = m_table_[3 * pos];
+ int t2 = m_table_[3 * pos + 1];
+ listheader.m_gapsHi_[3 * st] = mergeCE(t1, t2,
+ Collator.PRIMARY);
+ listheader.m_gapsHi_[3 * st + 1] = mergeCE(t1, t2,
+ Collator.SECONDARY);
+ listheader.m_gapsHi_[3 * st + 2] = (t1 & 0x3f) << 24
+ | (t2 & 0x3f) << 16;
+ // pos --;
+ // t1 = m_table_[3 * pos];
+ // t2 = m_table_[3 * pos + 1];
+ t1 = listheader.m_baseCE_;
+ t2 = listheader.m_baseContCE_;
+
+ listheader.m_gapsLo_[3 * st] = mergeCE(t1, t2,
+ Collator.PRIMARY);
+ listheader.m_gapsLo_[3 * st + 1] = mergeCE(t1, t2,
+ Collator.SECONDARY);
+ listheader.m_gapsLo_[3 * st + 2] = (t1 & 0x3f) << 24
+ | (t2 & 0x3f) << 16;
+ }
+ }
+ }
+ }
+
+ /**
+ * Gets the next CE in the inverse table
+ *
+ * @param listheader
+ * token list header
+ * @param strength
+ * collation strength
+ * @return next ce
+ */
+ private final int getInverseNext(
+ CollationRuleParser.TokenListHeader listheader, int strength) {
+ int ce = listheader.m_baseCE_;
+ int secondce = listheader.m_baseContCE_;
+ int result = findInverseCE(ce, secondce);
+
+ if (result < 0) {
+ return -1;
+ }
+
+ ce &= STRENGTH_MASK_[strength];
+ secondce &= STRENGTH_MASK_[strength];
+
+ int nextce = ce;
+ int nextcontce = secondce;
+
+ while ((nextce & STRENGTH_MASK_[strength]) == ce
+ && (nextcontce & STRENGTH_MASK_[strength]) == secondce) {
+ nextce = m_table_[3 * (++result)];
+ nextcontce = m_table_[3 * result + 1];
+ }
+
+ listheader.m_nextCE_ = nextce;
+ listheader.m_nextContCE_ = nextcontce;
+
+ return result;
+ }
+ }
+
+ // package private data members ------------------------------------------
+
+ /**
+ * Inverse UCA, instantiate only when required
+ */
+ static final InverseUCA INVERSE_UCA_;
+
+ /**
+ * UCA and Inverse UCA version do not match
+ */
+ private static final String INV_UCA_VERSION_MISMATCH_ = "UCA versions of UCA and inverse UCA should match";
+
+ /**
+ * UCA and Inverse UCA version do not match
+ */
+ private static final String UCA_NOT_INSTANTIATED_ = "UCA is not instantiated!";
+
+ /**
+ * Initializing the inverse UCA
+ */
+ static {
+ InverseUCA temp = null;
+ try {
+ temp = CollatorReader.getInverseUCA();
+ } catch (IOException e) {
+ }
+ /*
+ * try { String invdat = "/com/ibm/icu/impl/data/invuca.icu";
+ * InputStream i =
+ * CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
+ * BufferedInputStream b = new BufferedInputStream(i, 110000);
+ * INVERSE_UCA_ = CollatorReader.readInverseUCA(b); b.close();
+ * i.close(); } catch (Exception e) { e.printStackTrace(); throw new
+ * RuntimeException(e.getMessage()); }
+ */
+
+ if (temp != null && RuleBasedCollator.UCA_ != null) {
+ if (!temp.m_UCA_version_
+ .equals(RuleBasedCollator.UCA_.m_UCA_version_)) {
+ throw new RuntimeException(INV_UCA_VERSION_MISMATCH_);
+ }
+ } else {
+ throw new RuntimeException(UCA_NOT_INSTANTIATED_);
+ }
+
+ INVERSE_UCA_ = temp;
+ }
+
+ // package private methods -----------------------------------------------
+
+ /**
+ * Parse and sets the collation rules in the argument collator
+ *
+ * @param collator
+ * to set
+ * @exception Exception
+ * thrown when internal program error occurs
+ */
+ void setRules(RuleBasedCollator collator) throws Exception {
+ if (m_parser_.m_resultLength_ > 0 || m_parser_.m_removeSet_ != null) {
+ // we have a set of rules, let's make something of it
+ assembleTailoringTable(collator);
+ } else { // no rules, but no error either must be only options
+ // We will init the collator from UCA
+ collator.setWithUCATables();
+ }
+ // And set only the options
+ m_parser_.setDefaultOptionsInCollator(collator);
+ }
+
+ private void copyRangeFromUCA(BuildTable t, int start, int end) {
+ int u = 0;
+ for (u = start; u <= end; u++) {
+ // if ((CE = ucmpe32_get(t.m_mapping, u)) == UCOL_NOT_FOUND
+ int CE = t.m_mapping_.getValue(u);
+ if (CE == CE_NOT_FOUND_
+ // this test is for contractions that are missing the starting
+ // element. Looks like latin-1 should be done before
+ // assembling the table, even if it results in more false
+ // closure elements
+ || (isContractionTableElement(CE) && getCE(
+ t.m_contractions_, CE, 0) == CE_NOT_FOUND_)) {
+ // m_utilElement_.m_uchars_ = str.toString();
+ m_utilElement_.m_uchars_ = UCharacter.toString(u);
+ m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
+ m_utilElement_.m_prefix_ = 0;
+ m_utilElement_.m_CELength_ = 0;
+ m_utilElement_.m_prefixChars_ = null;
+ m_utilColEIter_.setText(m_utilElement_.m_uchars_);
+ while (CE != CollationElementIterator.NULLORDER) {
+ CE = m_utilColEIter_.next();
+ if (CE != CollationElementIterator.NULLORDER) {
+ m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE;
+ }
+ }
+ addAnElement(t, m_utilElement_);
+ }
+ }
+ }
+
+ /**
+ * 2. Eliminate the negative lists by doing the following for each non-null
+ * negative list: o if previousCE(baseCE, strongestN) != some ListHeader X's
+ * baseCE, create new ListHeader X o reverse the list, add to the end of X's
+ * positive list. Reset the strength of the first item you add, based on the
+ * stronger strength levels of the two lists.
+ *
+ * 3. For each ListHeader with a non-null positive list: o Find all
+ * character strings with CEs between the baseCE and the next/previous CE,
+ * at the strength of the first token. Add these to the tailoring. ? That
+ * is, if UCA has ... x <<< X << x' <<< X' < y ..., and the tailoring has &
+ * x < z... ? Then we change the tailoring to & x <<< X << x' <<< X' < z ...
+ *
+ * It is possible that this part should be done even while constructing list
+ * The problem is that it is unknown what is going to be the strongest
+ * weight. So we might as well do it here o Allocate CEs for each token in
+ * the list, based on the total number N of the largest level difference,
+ * and the gap G between baseCE and nextCE at that level. The relation *
+ * between the last item and nextCE is the same as the strongest strength. o
+ * Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1) ? There are 3
+ * primary items: a, d, e. Fit them into the primary gap. Then fit b and c
+ * into the secondary gap between a and d, then fit q into the tertiary gap
+ * between b and c. o Example: baseCE << b <<< q << c * nextCE(X,2) ? There
+ * are 2 secondary items: b, c. Fit them into the secondary gap. Then fit q
+ * into the tertiary gap between b and c. o When incrementing primary
+ * values, we will not cross high byte boundaries except where there is only
+ * a single-byte primary. That is to ensure that the script reordering will
+ * continue to work.
+ *
+ * @param collator
+ * the rule based collator to update
+ * @exception Exception
+ * thrown when internal program error occurs
+ */
+ void assembleTailoringTable(RuleBasedCollator collator) throws Exception {
+
+ for (int i = 0; i < m_parser_.m_resultLength_; i++) {
+ // now we need to generate the CEs
+ // We stuff the initial value in the buffers, and increase the
+ // appropriate buffer according to strength
+ if (m_parser_.m_listHeader_[i].m_first_ != null) {
+ // if there are any elements
+ // due to the way parser works, subsequent tailorings
+ // may remove all the elements from a sequence, therefore
+ // leaving an empty tailoring sequence.
+ initBuffers(m_parser_.m_listHeader_[i]);
+ }
+ }
+
+ if (m_parser_.m_variableTop_ != null) {
+ // stuff the variable top value
+ m_parser_.m_options_.m_variableTopValue_ = m_parser_.m_variableTop_.m_CE_[0] >>> 16;
+ // remove it from the list
+ if (m_parser_.m_variableTop_.m_listHeader_.m_first_ == m_parser_.m_variableTop_) { // first
+ // in
+ // list
+ m_parser_.m_variableTop_.m_listHeader_.m_first_ = m_parser_.m_variableTop_.m_next_;
+ }
+ if (m_parser_.m_variableTop_.m_listHeader_.m_last_ == m_parser_.m_variableTop_) {
+ // first in list
+ m_parser_.m_variableTop_.m_listHeader_.m_last_ = m_parser_.m_variableTop_.m_previous_;
+ }
+ if (m_parser_.m_variableTop_.m_next_ != null) {
+ m_parser_.m_variableTop_.m_next_.m_previous_ = m_parser_.m_variableTop_.m_previous_;
+ }
+ if (m_parser_.m_variableTop_.m_previous_ != null) {
+ m_parser_.m_variableTop_.m_previous_.m_next_ = m_parser_.m_variableTop_.m_next_;
+ }
+ }
+
+ BuildTable t = new BuildTable(m_parser_);
+
+ // After this, we have assigned CE values to all regular CEs now we
+ // will go through list once more and resolve expansions, make
+ // UCAElements structs and add them to table
+ for (int i = 0; i < m_parser_.m_resultLength_; i++) {
+ // now we need to generate the CEs
+ // We stuff the initial value in the buffers, and increase the
+ // appropriate buffer according to strength */
+ createElements(t, m_parser_.m_listHeader_[i]);
+ }
+
+ m_utilElement_.clear();
+ StringBuilder str = new StringBuilder();
+
+ // add latin-1 stuff
+ copyRangeFromUCA(t, 0, 0xFF);
+
+ // add stuff for copying
+ if (m_parser_.m_copySet_ != null) {
+ int i = 0;
+ for (i = 0; i < m_parser_.m_copySet_.getRangeCount(); i++) {
+ copyRangeFromUCA(t, m_parser_.m_copySet_.getRangeStart(i),
+ m_parser_.m_copySet_.getRangeEnd(i));
+ }
+ }
+
+ // copy contractions from the UCA - this is felt mostly for cyrillic
+ char conts[] = RuleBasedCollator.UCA_CONTRACTIONS_;
+ int offset = 0;
+ while (conts[offset] != 0) {
+ // tailoredCE = ucmpe32_get(t.m_mapping, *conts);
+ int tailoredCE = t.m_mapping_.getValue(conts[offset]);
+ Elements prefixElm = null;
+ if (tailoredCE != CE_NOT_FOUND_) {
+ boolean needToAdd = true;
+ if (isContractionTableElement(tailoredCE)) {
+ if (isTailored(t.m_contractions_, tailoredCE, conts,
+ offset + 1) == true) {
+ needToAdd = false;
+ }
+ }
+ if (!needToAdd && isPrefix(tailoredCE)
+ && conts[offset + 1] == 0) {
+ // pre-context character in UCA
+ // The format for pre-context character is
+ // conts[0]: baseCP conts[1]:0 conts[2]:pre-context CP
+ Elements elm = new Elements();
+ elm.m_cPoints_ = m_utilElement_.m_uchars_;
+ elm.m_CELength_ = 0;
+ elm.m_uchars_ = UCharacter.toString(conts[offset]);
+ elm.m_prefixChars_ = UCharacter.toString(conts[offset + 2]);
+ elm.m_prefix_ = 0; // TODO(claireho) : confirm!
+ prefixElm = t.m_prefixLookup_.get(elm);
+ if ((prefixElm == null)
+ || (prefixElm.m_prefixChars_.charAt(0) != conts[offset + 2])) {
+ needToAdd = true;
+ }
+ }
+ if (m_parser_.m_removeSet_ != null
+ && m_parser_.m_removeSet_.contains(conts[offset])) {
+ needToAdd = false;
+ }
+
+ if (needToAdd == true) {
+ // we need to add if this contraction is not tailored.
+ if (conts[offset + 1] != 0) { // not precontext
+ m_utilElement_.m_prefix_ = 0;
+ m_utilElement_.m_prefixChars_ = null;
+ m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
+ str.delete(0, str.length());
+ str.append(conts[offset]);
+ str.append(conts[offset + 1]);
+ if (conts[offset + 2] != 0) {
+ str.append(conts[offset + 2]);
+ }
+ m_utilElement_.m_uchars_ = str.toString();
+ m_utilElement_.m_CELength_ = 0;
+ m_utilColEIter_.setText(m_utilElement_.m_uchars_);
+ } else { // add a pre-context element
+ int preKeyLen = 0;
+ str.delete(0, str.length()); // clean up
+ m_utilElement_.m_cPoints_ = UCharacter
+ .toString(conts[offset]);
+ m_utilElement_.m_CELength_ = 0;
+ m_utilElement_.m_uchars_ = UCharacter
+ .toString(conts[offset]);
+ m_utilElement_.m_prefixChars_ = UCharacter
+ .toString(conts[offset + 2]);
+ if (prefixElm == null) {
+ m_utilElement_.m_prefix_ = 0;
+ } else { // TODO (claireho): confirm!
+ m_utilElement_.m_prefix_ = m_utilElement_.m_prefix_;
+ // m_utilElement_.m_prefix_= prefixElm.m_prefix_;
+ }
+ m_utilColEIter_.setText(m_utilElement_.m_prefixChars_);
+ while (m_utilColEIter_.next() != CollationElementIterator.NULLORDER) {
+ // count number of keys for pre-context char.
+ preKeyLen++;
+ }
+ str.append(conts[offset + 2]);
+ str.append(conts[offset]);
+ m_utilColEIter_.setText(str.toString());
+ // Skip the keys for prefix character, then copy the
+ // rest to el.
+ while ((preKeyLen-- > 0)
+ && m_utilColEIter_.next() != CollationElementIterator.NULLORDER) {
+ continue;
+ }
+
+ }
+ while (true) {
+ int CE = m_utilColEIter_.next();
+ if (CE != CollationElementIterator.NULLORDER) {
+ m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE;
+ } else {
+ break;
+ }
+ }
+ addAnElement(t, m_utilElement_);
+ }
+ } else if (m_parser_.m_removeSet_ != null
+ && m_parser_.m_removeSet_.contains(conts[offset])) {
+ copyRangeFromUCA(t, conts[offset], conts[offset]);
+ }
+
+ offset += 3;
+ }
+
+ // Add completely ignorable elements
+ processUCACompleteIgnorables(t);
+
+ // canonical closure
+ canonicalClosure(t);
+
+ // still need to produce compatibility closure
+ assembleTable(t, collator);
+ }
+
+ // private inner classes -------------------------------------------------
+
+ @SuppressWarnings("unused")
+ private static class CEGenerator {
+ // package private data members --------------------------------------
+
+ WeightRange m_ranges_[];
+ int m_rangesLength_;
+ int m_byteSize_;
+ int m_start_;
+ int m_limit_;
+ int m_maxCount_;
+ int m_count_;
+ int m_current_;
+ int m_fLow_; // forbidden Low
+ int m_fHigh_; // forbidden High
+
+ // package private constructor ---------------------------------------
+
+ CEGenerator() {
+ m_ranges_ = new WeightRange[7];
+ for (int i = 6; i >= 0; i--) {
+ m_ranges_[i] = new WeightRange();
+ }
+ }
+ }
+
+ private static class WeightRange implements Comparable {
+ // public methods ----------------------------------------------------
+
+ /**
+ * Compares this object with target
+ *
+ * @param target object to compare with
+ * @return 0 if equals, 1 if this is > target, -1 otherwise
+ */
+ public int compareTo(WeightRange target) {
+ return Utility.compareUnsigned(m_start_, target.m_start_);
+ }
+
+ /**
+ * Initialize
+ */
+ public void clear() {
+ m_start_ = 0;
+ m_end_ = 0;
+ m_length_ = 0;
+ m_count_ = 0;
+ m_length2_ = 0;
+ m_count2_ = 0;
+ }
+
+ // package private data members --------------------------------------
+
+ int m_start_;
+ int m_end_;
+ int m_length_;
+ int m_count_;
+ int m_length2_;
+ int m_count2_;
+
+ // package private constructor ---------------------------------------
+
+ WeightRange() {
+ clear();
+ }
+
+ /**
+ * Copy constructor. Cloneable is troublesome, needs to check for
+ * exception
+ *
+ * @param source
+ * to clone
+ */
+ WeightRange(WeightRange source) {
+ m_start_ = source.m_start_;
+ m_end_ = source.m_end_;
+ m_length_ = source.m_length_;
+ m_count_ = source.m_count_;
+ m_length2_ = source.m_length2_;
+ m_count2_ = source.m_count2_;
+ }
+ }
+
+ private static class MaxJamoExpansionTable {
+ // package private data members --------------------------------------
+
+ List m_endExpansionCE_;
+ // vector of booleans
+ List m_isV_;
+ byte m_maxLSize_;
+ byte m_maxVSize_;
+ byte m_maxTSize_;
+
+ // package private constructor ---------------------------------------
+
+ MaxJamoExpansionTable() {
+ m_endExpansionCE_ = new ArrayList();
+ m_isV_ = new ArrayList();
+ m_endExpansionCE_.add(new Integer(0));
+ m_isV_.add(Boolean.FALSE);
+ m_maxLSize_ = 1;
+ m_maxVSize_ = 1;
+ m_maxTSize_ = 1;
+ }
+
+ MaxJamoExpansionTable(MaxJamoExpansionTable table) {
+ m_endExpansionCE_ = new ArrayList(table.m_endExpansionCE_);
+ m_isV_ = new ArrayList(table.m_isV_);
+ m_maxLSize_ = table.m_maxLSize_;
+ m_maxVSize_ = table.m_maxVSize_;
+ m_maxTSize_ = table.m_maxTSize_;
+ }
+ }
+
+ private static class MaxExpansionTable {
+ // package private constructor --------------------------------------
+
+ MaxExpansionTable() {
+ m_endExpansionCE_ = new ArrayList();
+ m_expansionCESize_ = new ArrayList();
+ m_endExpansionCE_.add(new Integer(0));
+ m_expansionCESize_.add(new Byte((byte) 0));
+ }
+
+ MaxExpansionTable(MaxExpansionTable table) {
+ m_endExpansionCE_ = new ArrayList(table.m_endExpansionCE_);
+ m_expansionCESize_ = new ArrayList(table.m_expansionCESize_);
+ }
+
+ // package private data member --------------------------------------
+
+ List m_endExpansionCE_;
+ List m_expansionCESize_;
+ }
+
+ private static class BasicContractionTable {
+ // package private constructors -------------------------------------
+
+ BasicContractionTable() {
+ m_CEs_ = new ArrayList();
+ m_codePoints_ = new StringBuilder();
+ }
+
+ // package private data members -------------------------------------
+
+ StringBuilder m_codePoints_;
+ List m_CEs_;
+ }
+
+ private static class ContractionTable {
+ // package private constructor --------------------------------------
+
+ /**
+ * Builds a contraction table
+ *
+ * @param mapping
+ */
+ ContractionTable(IntTrieBuilder mapping) {
+ m_mapping_ = mapping;
+ m_elements_ = new ArrayList();
+ m_CEs_ = new ArrayList();
+ m_codePoints_ = new StringBuilder();
+ m_offsets_ = new ArrayList();
+ m_currentTag_ = CE_NOT_FOUND_TAG_;
+ }
+
+ /**
+ * Copies a contraction table. Not all data will be copied into their
+ * own object.
+ *
+ * @param table
+ */
+ ContractionTable(ContractionTable table) {
+ m_mapping_ = table.m_mapping_;
+ m_elements_ = new ArrayList(table.m_elements_);
+ m_codePoints_ = new StringBuilder(table.m_codePoints_);
+ m_CEs_ = new ArrayList(table.m_CEs_);
+ m_offsets_ = new ArrayList(table.m_offsets_);
+ m_currentTag_ = table.m_currentTag_;
+ }
+
+ // package private data members ------------------------------------
+
+ /**
+ * Vector of BasicContractionTable
+ */
+ List m_elements_;
+ IntTrieBuilder m_mapping_;
+ StringBuilder m_codePoints_;
+ List m_CEs_;
+ List m_offsets_;
+ int m_currentTag_;
+ }
+
+ /**
+ * Private class for combining mark table. The table is indexed by the class
+ * value(0-255).
+ */
+ @SuppressWarnings("unused")
+ private static class CombinClassTable {
+ /**
+ * accumulated numbers of combining marks.
+ */
+ int[] index = new int[256];
+
+ /**
+ * code point array for combining marks.
+ */
+ char[] cPoints;
+
+ /**
+ * size of cPoints.
+ */
+ int size;
+
+ // constructor
+ CombinClassTable() {
+ cPoints = null;
+ size = 0;
+ pos = 0;
+ curClass = 1;
+ }
+
+ /**
+ * Copy the combining mark table from ccc and index in compact way.
+ *
+ * @param cps
+ * : code point array
+ * @param size
+ * : size of ccc
+ * @param index
+ * : index of combining classes(0-255)
+ */
+ void generate(char[] cps, int numOfCM, int[] ccIndex) {
+ int count = 0;
+
+ cPoints = new char[numOfCM];
+ for (int i = 0; i < 256; i++) {
+ for (int j = 0; j < ccIndex[i]; j++) {
+ cPoints[count++] = cps[(i << 8) + j];
+ }
+ index[i] = count;
+ }
+ size = count;
+ }
+
+ /**
+ * Get first CM(combining mark) with the combining class value cClass.
+ *
+ * @param cClass
+ * : combining class value.
+ * @return combining mark codepoint or 0 if no combining make with class
+ * value cClass
+ */
+ char GetFirstCM(int cClass) {
+ curClass = cClass;
+ if (cPoints == null || cClass == 0
+ || index[cClass] == index[cClass - 1]) {
+ return 0;
+ }
+ pos = 1;
+ return cPoints[index[cClass - 1]];
+ }
+
+ /**
+ * Get next CM(combining mark) with the combining class value cClass.
+ * Return combining mark codepoint or 0 if no next CM.
+ */
+ char GetNextCM() {
+ if (cPoints == null
+ || index[curClass] == (index[curClass - 1] + pos)) {
+ return 0;
+ }
+ return cPoints[index[curClass - 1] + (pos++)];
+ }
+
+ // private data members
+ int pos;
+ int curClass;
+ }
+
+ private static final class BuildTable implements TrieBuilder.DataManipulate {
+ // package private methods ------------------------------------------
+
+ /**
+ * For construction of the Trie tables. Has to be labeled public
+ *
+ * @param cp The value of the code point.
+ * @param offset The value of the offset.
+ * @return data offset or 0
+ */
+ public int getFoldedValue(int cp, int offset) {
+ int limit = cp + 0x400;
+ while (cp < limit) {
+ int value = m_mapping_.getValue(cp);
+ boolean inBlockZero = m_mapping_.isInZeroBlock(cp);
+ int tag = getCETag(value);
+ if (inBlockZero == true) {
+ cp += TrieBuilder.DATA_BLOCK_LENGTH;
+ } else if (!(isSpecial(value) && (tag == CE_IMPLICIT_TAG_ || tag == CE_NOT_FOUND_TAG_))) {
+ // These are values that are starting in either UCA
+ // (IMPLICIT_TAG) or in the tailorings (NOT_FOUND_TAG).
+ // Presence of these tags means that there is nothing in
+ // this position and that it should be skipped.
+ return RuleBasedCollator.CE_SPECIAL_FLAG_
+ | (CE_SURROGATE_TAG_ << 24) | offset;
+ } else {
+ ++cp;
+ }
+ }
+ return 0;
+ }
+
+ // package private constructor --------------------------------------
+
+ /**
+ * Returns a table
+ */
+ BuildTable(CollationRuleParser parser) {
+ m_collator_ = new RuleBasedCollator();
+ m_collator_.setWithUCAData();
+ MaxExpansionTable maxet = new MaxExpansionTable();
+ MaxJamoExpansionTable maxjet = new MaxJamoExpansionTable();
+ m_options_ = parser.m_options_;
+ m_expansions_ = new ArrayList();
+ // Do your own mallocs for the structure, array and have linear
+ // Latin 1
+ int trieinitialvalue = RuleBasedCollator.CE_SPECIAL_FLAG_
+ | (CE_NOT_FOUND_TAG_ << 24);
+ // temporary fix for jb3822, 0x100000 -> 30000
+ m_mapping_ = new IntTrieBuilder(null, 0x30000, trieinitialvalue,
+ trieinitialvalue, true);
+ m_prefixLookup_ = new HashMap();
+ // uhash_open(prefixLookupHash, prefixLookupComp);
+ m_contractions_ = new ContractionTable(m_mapping_);
+ // copy UCA's maxexpansion and merge as we go along
+ m_maxExpansions_ = maxet;
+ // adding an extra initial value for easier manipulation
+ for (int i = 0; i < RuleBasedCollator.UCA_.m_expansionEndCE_.length; i++) {
+ maxet.m_endExpansionCE_.add(new Integer(
+ RuleBasedCollator.UCA_.m_expansionEndCE_[i]));
+ maxet.m_expansionCESize_.add(new Byte(
+ RuleBasedCollator.UCA_.m_expansionEndCEMaxSize_[i]));
+ }
+ m_maxJamoExpansions_ = maxjet;
+
+ m_unsafeCP_ = new byte[UNSAFECP_TABLE_SIZE_];
+ m_contrEndCP_ = new byte[UNSAFECP_TABLE_SIZE_];
+ Arrays.fill(m_unsafeCP_, (byte) 0);
+ Arrays.fill(m_contrEndCP_, (byte) 0);
+ }
+
+ /**
+ * Duplicating a BuildTable. Not all data will be duplicated into their
+ * own object.
+ *
+ * @param table
+ * to clone
+ */
+ BuildTable(BuildTable table) {
+ m_collator_ = table.m_collator_;
+ m_mapping_ = new IntTrieBuilder(table.m_mapping_);
+ m_expansions_ = new ArrayList(table.m_expansions_);
+ m_contractions_ = new ContractionTable(table.m_contractions_);
+ m_contractions_.m_mapping_ = m_mapping_;
+ m_options_ = table.m_options_;
+ m_maxExpansions_ = new MaxExpansionTable(table.m_maxExpansions_);
+ m_maxJamoExpansions_ = new MaxJamoExpansionTable(
+ table.m_maxJamoExpansions_);
+ m_unsafeCP_ = new byte[table.m_unsafeCP_.length];
+ System.arraycopy(table.m_unsafeCP_, 0, m_unsafeCP_, 0,
+ m_unsafeCP_.length);
+ m_contrEndCP_ = new byte[table.m_contrEndCP_.length];
+ System.arraycopy(table.m_contrEndCP_, 0, m_contrEndCP_, 0,
+ m_contrEndCP_.length);
+ }
+
+ // package private data members -------------------------------------
+
+ RuleBasedCollator m_collator_;
+ IntTrieBuilder m_mapping_;
+ List m_expansions_;
+ ContractionTable m_contractions_;
+ // UCATableHeader image;
+ CollationRuleParser.OptionSet m_options_;
+ MaxExpansionTable m_maxExpansions_;
+ MaxJamoExpansionTable m_maxJamoExpansions_;
+ byte m_unsafeCP_[];
+ byte m_contrEndCP_[];
+ Map m_prefixLookup_;
+ CombinClassTable cmLookup = null;
+ }
+
+ private static class Elements {
+ // package private data members -------------------------------------
+
+ String m_prefixChars_;
+ int m_prefix_;
+ String m_uchars_;
+ /**
+ * Working string
+ */
+ String m_cPoints_;
+ /**
+ * Offset to the working string
+ */
+ int m_cPointsOffset_;
+ /**
+ * These are collation elements - there could be more than one - in case
+ * of expansion
+ */
+ int m_CEs_[];
+ int m_CELength_;
+ /**
+ * This is the value element maps in original table
+ */
+ int m_mapCE_;
+ int m_sizePrim_[];
+ int m_sizeSec_[];
+ int m_sizeTer_[];
+ boolean m_variableTop_;
+ boolean m_caseBit_;
+
+ // package private constructors -------------------------------------
+
+ /**
+ * Package private constructor
+ */
+ Elements() {
+ m_sizePrim_ = new int[128];
+ m_sizeSec_ = new int[128];
+ m_sizeTer_ = new int[128];
+ m_CEs_ = new int[256];
+ m_CELength_ = 0;
+ }
+
+ /**
+ * Package private constructor
+ */
+ Elements(Elements element) {
+ m_prefixChars_ = element.m_prefixChars_;
+ m_prefix_ = element.m_prefix_;
+ m_uchars_ = element.m_uchars_;
+ m_cPoints_ = element.m_cPoints_;
+ m_cPointsOffset_ = element.m_cPointsOffset_;
+ m_CEs_ = element.m_CEs_;
+ m_CELength_ = element.m_CELength_;
+ m_mapCE_ = element.m_mapCE_;
+ m_sizePrim_ = element.m_sizePrim_;
+ m_sizeSec_ = element.m_sizeSec_;
+ m_sizeTer_ = element.m_sizeTer_;
+ m_variableTop_ = element.m_variableTop_;
+ m_caseBit_ = element.m_caseBit_;
+ }
+
+ // package private methods -------------------------------------------
+
+ /**
+ * Initializing the elements
+ */
+ public void clear() {
+ m_prefixChars_ = null;
+ m_prefix_ = 0;
+ m_uchars_ = null;
+ m_cPoints_ = null;
+ m_cPointsOffset_ = 0;
+ m_CELength_ = 0;
+ m_mapCE_ = 0;
+ Arrays.fill(m_sizePrim_, 0);
+ Arrays.fill(m_sizeSec_, 0);
+ Arrays.fill(m_sizeTer_, 0);
+ m_variableTop_ = false;
+ m_caseBit_ = false;
+ }
+
+ /**
+ * Hashcode calculation for token
+ *
+ * @return the hashcode
+ */
+ public int hashCode() {
+ String str = m_cPoints_.substring(m_cPointsOffset_);
+ return str.hashCode();
+ }
+
+ /**
+ * Equals calculation
+ *
+ * @param target Object to compare
+ * @return true if target is the same as this object
+ */
+ public boolean equals(Object target) {
+ if (target == this) {
+ return true;
+ }
+ if (target instanceof Elements) {
+ Elements t = (Elements) target;
+ int size = m_cPoints_.length() - m_cPointsOffset_;
+ if (size == t.m_cPoints_.length() - t.m_cPointsOffset_) {
+ return t.m_cPoints_.regionMatches(t.m_cPointsOffset_,
+ m_cPoints_, m_cPointsOffset_, size);
+ }
+ }
+ return false;
+ }
+ }
+
+ // private data member ---------------------------------------------------
+
+ /**
+ * Maximum strength used in CE building
+ */
+ private static final int CE_BASIC_STRENGTH_LIMIT_ = 3;
+ /**
+ * Maximum collation strength
+ */
+ private static final int CE_STRENGTH_LIMIT_ = 16;
+ /**
+ * Strength mask array, used in inverse UCA
+ */
+ private static final int STRENGTH_MASK_[] = { 0xFFFF0000, 0xFFFFFF00,
+ 0xFFFFFFFF };
+ /**
+ * CE tag for not found
+ */
+ private static final int CE_NOT_FOUND_ = 0xF0000000;
+ /**
+ * CE tag for not found
+ */
+ private static final int CE_NOT_FOUND_TAG_ = 0;
+ /**
+ * This code point results in an expansion
+ */
+ private static final int CE_EXPANSION_TAG_ = 1;
+ /**
+ * Start of a contraction
+ */
+ private static final int CE_CONTRACTION_TAG_ = 2;
+ /*
+ * Thai character - do the reordering
+ */
+ // private static final int CE_THAI_TAG_ = 3;
+ /*
+ * Charset processing, not yet implemented
+ */
+ // private static final int CE_CHARSET_TAG_ = 4;
+ /**
+ * Lead surrogate that is tailored and doesn't start a contraction
+ */
+ private static final int CE_SURROGATE_TAG_ = 5;
+ /*
+ * AC00-D7AF
+ */
+ // private static final int CE_HANGUL_SYLLABLE_TAG_ = 6;
+ /*
+ * D800-DBFF
+ */
+ // private static final int CE_LEAD_SURROGATE_TAG_ = 7;
+ /*
+ * DC00-DFFF
+ */
+ // private static final int CE_TRAIL_SURROGATE_TAG_ = 8;
+ /*
+ * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
+ */
+ // private static final int CE_CJK_IMPLICIT_TAG_ = 9;
+ private static final int CE_IMPLICIT_TAG_ = 10;
+ private static final int CE_SPEC_PROC_TAG_ = 11;
+ /**
+ * This is a three byte primary with starting secondaries and tertiaries. It
+ * fits in a single 32 bit CE and is used instead of expansion to save space
+ * without affecting the performance (hopefully)
+ */
+ private static final int CE_LONG_PRIMARY_TAG_ = 12;
+ /**
+ * Unsafe UChar hash table table size. Size is 32 bytes for 1 bit for each
+ * latin 1 char + some power of two for hashing the rest of the chars. Size
+ * in bytes
+ */
+ private static final int UNSAFECP_TABLE_SIZE_ = 1056;
+ /**
+ * Mask value down to "some power of two" -1. Number of bits, not num of
+ * bytes.
+ */
+ private static final int UNSAFECP_TABLE_MASK_ = 0x1fff;
+ /**
+ * Case values
+ */
+ private static final int UPPER_CASE_ = 0x80;
+ private static final int MIXED_CASE_ = 0x40;
+ private static final int LOWER_CASE_ = 0x00;
+ /*
+ * Initial table size
+ */
+ // private static final int INIT_TABLE_SIZE_ = 1028;
+ /*
+ * Header size, copied from ICU4C, to be changed when that value changes
+ */
+ // private static final int HEADER_SIZE_ = 0xC4;
+ /**
+ * Contraction table new element indicator
+ */
+ private static final int CONTRACTION_TABLE_NEW_ELEMENT_ = 0xFFFFFF;
+ /**
+ * Parser for the rules
+ */
+ private CollationRuleParser m_parser_;
+ /**
+ * Utility UCA collation element iterator
+ */
+ private CollationElementIterator m_utilColEIter_;
+ /**
+ * Utility data members
+ */
+ private CEGenerator m_utilGens_[] = { new CEGenerator(), new CEGenerator(),
+ new CEGenerator() };
+ private int m_utilCEBuffer_[] = new int[CE_BASIC_STRENGTH_LIMIT_];
+ private int m_utilIntBuffer_[] = new int[CE_STRENGTH_LIMIT_];
+ private Elements m_utilElement_ = new Elements();
+ private Elements m_utilElement2_ = new Elements();
+ private CollationRuleParser.Token m_utilToken_ = new CollationRuleParser.Token();
+ private int m_utilCountBuffer_[] = new int[6];
+ private long m_utilLongBuffer_[] = new long[5];
+ private WeightRange m_utilLowerWeightRange_[] = { new WeightRange(),
+ new WeightRange(), new WeightRange(), new WeightRange(),
+ new WeightRange() };
+ private WeightRange m_utilUpperWeightRange_[] = { new WeightRange(),
+ new WeightRange(), new WeightRange(), new WeightRange(),
+ new WeightRange() };
+ private WeightRange m_utilWeightRange_ = new WeightRange();
+ private final Normalizer2Impl m_nfcImpl_ = Norm2AllModes.getNFCInstance().impl;
+ private CanonicalIterator m_utilCanIter_ = new CanonicalIterator("");
+ private StringBuilder m_utilStringBuffer_ = new StringBuilder("");
+ // Flag indicating a combining marks table is required or not.
+ private static boolean buildCMTabFlag = false;
+
+ // private methods -------------------------------------------------------
+
+ /**
+ * @param listheader
+ * parsed rule tokens
+ * @exception Exception
+ * thrown when internal error occurs
+ */
+ private void initBuffers(CollationRuleParser.TokenListHeader listheader)
+ throws Exception {
+ CollationRuleParser.Token token = listheader.m_last_;
+ Arrays.fill(m_utilIntBuffer_, 0, CE_STRENGTH_LIMIT_, 0);
+
+ token.m_toInsert_ = 1;
+ m_utilIntBuffer_[token.m_strength_] = 1;
+ while (token.m_previous_ != null) {
+ if (token.m_previous_.m_strength_ < token.m_strength_) {
+ // going up
+ m_utilIntBuffer_[token.m_strength_] = 0;
+ m_utilIntBuffer_[token.m_previous_.m_strength_]++;
+ } else if (token.m_previous_.m_strength_ > token.m_strength_) {
+ // going down
+ m_utilIntBuffer_[token.m_previous_.m_strength_] = 1;
+ } else {
+ m_utilIntBuffer_[token.m_strength_]++;
+ }
+ token = token.m_previous_;
+ token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_];
+ }
+
+ token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_];
+ INVERSE_UCA_.getInverseGapPositions(listheader);
+
+ token = listheader.m_first_;
+ int fstrength = Collator.IDENTICAL;
+ int initstrength = Collator.IDENTICAL;
+
+ m_utilCEBuffer_[Collator.PRIMARY] = mergeCE(listheader.m_baseCE_,
+ listheader.m_baseContCE_, Collator.PRIMARY);
+ m_utilCEBuffer_[Collator.SECONDARY] = mergeCE(listheader.m_baseCE_,
+ listheader.m_baseContCE_, Collator.SECONDARY);
+ m_utilCEBuffer_[Collator.TERTIARY] = mergeCE(listheader.m_baseCE_,
+ listheader.m_baseContCE_, Collator.TERTIARY);
+ while (token != null) {
+ fstrength = token.m_strength_;
+ if (fstrength < initstrength) {
+ initstrength = fstrength;
+ if (listheader.m_pos_[fstrength] == -1) {
+ while (listheader.m_pos_[fstrength] == -1 && fstrength > 0) {
+ fstrength--;
+ }
+ if (listheader.m_pos_[fstrength] == -1) {
+ throw new Exception("Internal program error");
+ }
+ }
+ if (initstrength == Collator.TERTIARY) {
+ // starting with tertiary
+ m_utilCEBuffer_[Collator.PRIMARY] = listheader.m_gapsLo_[fstrength * 3];
+ m_utilCEBuffer_[Collator.SECONDARY] = listheader.m_gapsLo_[fstrength * 3 + 1];
+ m_utilCEBuffer_[Collator.TERTIARY] = getCEGenerator(
+ m_utilGens_[Collator.TERTIARY],
+ listheader.m_gapsLo_, listheader.m_gapsHi_, token,
+ fstrength);
+ } else if (initstrength == Collator.SECONDARY) {
+ // secondaries
+ m_utilCEBuffer_[Collator.PRIMARY] = listheader.m_gapsLo_[fstrength * 3];
+ m_utilCEBuffer_[Collator.SECONDARY] = getCEGenerator(
+ m_utilGens_[Collator.SECONDARY],
+ listheader.m_gapsLo_, listheader.m_gapsHi_, token,
+ fstrength);
+ m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
+ m_utilGens_[Collator.TERTIARY], token,
+ Collator.TERTIARY);
+ } else {
+ // primaries
+ m_utilCEBuffer_[Collator.PRIMARY] = getCEGenerator(
+ m_utilGens_[Collator.PRIMARY],
+ listheader.m_gapsLo_, listheader.m_gapsHi_, token,
+ fstrength);
+ m_utilCEBuffer_[Collator.SECONDARY] = getSimpleCEGenerator(
+ m_utilGens_[Collator.SECONDARY], token,
+ Collator.SECONDARY);
+ m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
+ m_utilGens_[Collator.TERTIARY], token,
+ Collator.TERTIARY);
+ }
+ } else {
+ if (token.m_strength_ == Collator.TERTIARY) {
+ m_utilCEBuffer_[Collator.TERTIARY] = getNextGenerated(m_utilGens_[Collator.TERTIARY]);
+ } else if (token.m_strength_ == Collator.SECONDARY) {
+ m_utilCEBuffer_[Collator.SECONDARY] = getNextGenerated(m_utilGens_[Collator.SECONDARY]);
+ m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
+ m_utilGens_[Collator.TERTIARY], token,
+ Collator.TERTIARY);
+ } else if (token.m_strength_ == Collator.PRIMARY) {
+ m_utilCEBuffer_[Collator.PRIMARY] = getNextGenerated(m_utilGens_[Collator.PRIMARY]);
+ m_utilCEBuffer_[Collator.SECONDARY] = getSimpleCEGenerator(
+ m_utilGens_[Collator.SECONDARY], token,
+ Collator.SECONDARY);
+ m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(
+ m_utilGens_[Collator.TERTIARY], token,
+ Collator.TERTIARY);
+ }
+ }
+ doCE(m_utilCEBuffer_, token);
+ token = token.m_next_;
+ }
+ }
+
+ /**
+ * Get the next generated ce
+ *
+ * @param g
+ * ce generator
+ * @return next generated ce
+ */
+ private int getNextGenerated(CEGenerator g) {
+ g.m_current_ = nextWeight(g);
+ return g.m_current_;
+ }
+
+ /**
+ * @param g
+ * CEGenerator
+ * @param token
+ * rule token
+ * @param strength
+ * @return ce generator
+ * @exception Exception
+ * thrown when internal error occurs
+ */
+ private int getSimpleCEGenerator(CEGenerator g,
+ CollationRuleParser.Token token, int strength) throws Exception {
+ int high, low, count = 1;
+ int maxbyte = (strength == Collator.TERTIARY) ? 0x3F : 0xFF;
+
+ if (strength == Collator.SECONDARY) {
+ low = RuleBasedCollator.COMMON_TOP_2_ << 24;
+ high = 0xFFFFFFFF;
+ count = 0xFF - RuleBasedCollator.COMMON_TOP_2_;
+ } else {
+ low = RuleBasedCollator.BYTE_COMMON_ << 24; // 0x05000000;
+ high = 0x40000000;
+ count = 0x40 - RuleBasedCollator.BYTE_COMMON_;
+ }
+
+ if (token.m_next_ != null && token.m_next_.m_strength_ == strength) {
+ count = token.m_next_.m_toInsert_;
+ }
+
+ g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte,
+ g.m_ranges_);
+ g.m_current_ = RuleBasedCollator.BYTE_COMMON_ << 24;
+
+ if (g.m_rangesLength_ == 0) {
+ throw new Exception("Internal program error");
+ }
+ return g.m_current_;
+ }
+
+ /**
+ * Combines 2 ce into one with respect to the argument strength
+ *
+ * @param ce1
+ * first ce
+ * @param ce2
+ * second ce
+ * @param strength
+ * strength to use
+ * @return combined ce
+ */
+ private static int mergeCE(int ce1, int ce2, int strength) {
+ int mask = RuleBasedCollator.CE_TERTIARY_MASK_;
+ if (strength == Collator.SECONDARY) {
+ mask = RuleBasedCollator.CE_SECONDARY_MASK_;
+ } else if (strength == Collator.PRIMARY) {
+ mask = RuleBasedCollator.CE_PRIMARY_MASK_;
+ }
+ ce1 &= mask;
+ ce2 &= mask;
+ switch (strength) {
+ case Collator.PRIMARY:
+ return ce1 | ce2 >>> 16;
+ case Collator.SECONDARY:
+ return ce1 << 16 | ce2 << 8;
+ default:
+ return ce1 << 24 | ce2 << 16;
+ }
+ }
+
+ /**
+ * @param g
+ * CEGenerator
+ * @param lows
+ * low gap array
+ * @param highs
+ * high gap array
+ * @param token
+ * rule token
+ * @param fstrength
+ * @exception Exception
+ * thrown when internal error occurs
+ */
+ private int getCEGenerator(CEGenerator g, int lows[], int highs[],
+ CollationRuleParser.Token token, int fstrength) throws Exception {
+ int strength = token.m_strength_;
+ int low = lows[fstrength * 3 + strength];
+ int high = highs[fstrength * 3 + strength];
+ int maxbyte = 0;
+ if (strength == Collator.TERTIARY) {
+ maxbyte = 0x3F;
+ } else if (strength == Collator.PRIMARY) {
+ maxbyte = 0xFE;
+ } else {
+ maxbyte = 0xFF;
+ }
+
+ int count = token.m_toInsert_;
+
+ if (Utility.compareUnsigned(low, high) >= 0
+ && strength > Collator.PRIMARY) {
+ int s = strength;
+ while (true) {
+ s--;
+ if (lows[fstrength * 3 + s] != highs[fstrength * 3 + s]) {
+ if (strength == Collator.SECONDARY) {
+ if (low < (RuleBasedCollator.COMMON_TOP_2_ << 24)) {
+ // Override if low range is less than
+ // UCOL_COMMON_TOP2.
+ low = RuleBasedCollator.COMMON_TOP_2_ << 24;
+ }
+ high = 0xFFFFFFFF;
+ } else {
+ if (low < RuleBasedCollator.COMMON_BOTTOM_3 << 24) {
+ // Override if low range is less than
+ // UCOL_COMMON_BOT3.
+ low = RuleBasedCollator.COMMON_BOTTOM_3 << 24;
+ }
+ high = 0x40000000;
+ }
+ break;
+ }
+ if (s < 0) {
+ throw new Exception("Internal program error");
+ }
+ }
+ }
+ if(0 <= low && low < 0x02000000) { // unsigned comparison < 0x02000000
+ // We must not use CE weight byte 02, so we set it as the minimum lower bound.
+ // See http://site.icu-project.org/design/collation/bytes
+ low = 0x02000000;
+ }
+
+ if (strength == Collator.SECONDARY) { // similar as simple
+ if (Utility.compareUnsigned(low,
+ RuleBasedCollator.COMMON_BOTTOM_2_ << 24) >= 0
+ && Utility.compareUnsigned(low,
+ RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) {
+ low = RuleBasedCollator.COMMON_TOP_2_ << 24;
+ }
+ if (Utility.compareUnsigned(high,
+ RuleBasedCollator.COMMON_BOTTOM_2_ << 24) > 0
+ && Utility.compareUnsigned(high,
+ RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) {
+ high = RuleBasedCollator.COMMON_TOP_2_ << 24;
+ }
+ if (Utility.compareUnsigned(low,
+ RuleBasedCollator.COMMON_BOTTOM_2_ << 24) < 0) {
+ g.m_rangesLength_ = allocateWeights(
+ RuleBasedCollator.BYTE_UNSHIFTED_MIN_ << 24, high,
+ count, maxbyte, g.m_ranges_);
+ g.m_current_ = nextWeight(g);
+ // g.m_current_ = RuleBasedCollator.COMMON_BOTTOM_2_ << 24;
+ return g.m_current_;
+ }
+ }
+
+ g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte,
+ g.m_ranges_);
+ if (g.m_rangesLength_ == 0) {
+ throw new Exception("Internal program error");
+ }
+ g.m_current_ = nextWeight(g);
+ return g.m_current_;
+ }
+
+ /**
+ * @param ceparts
+ * list of collation elements parts
+ * @param token
+ * rule token
+ * @exception Exception
+ * thrown when forming case bits for expansions fails
+ */
+ private void doCE(int ceparts[], CollationRuleParser.Token token)
+ throws Exception {
+ // this one makes the table and stuff
+ // int noofbytes[] = new int[3];
+ for (int i = 0; i < 3; i++) {
+ // noofbytes[i] = countBytes(ceparts[i]);
+ m_utilIntBuffer_[i] = countBytes(ceparts[i]);
+ }
+
+ // Here we have to pack CEs from parts
+ int cei = 0;
+ int value = 0;
+
+ while ((cei << 1) < m_utilIntBuffer_[0] || cei < m_utilIntBuffer_[1]
+ || cei < m_utilIntBuffer_[2]) {
+ if (cei > 0) {
+ value = RuleBasedCollator.CE_CONTINUATION_MARKER_;
+ } else {
+ value = 0;
+ }
+
+ if ((cei << 1) < m_utilIntBuffer_[0]) {
+ value |= ((ceparts[0] >> (32 - ((cei + 1) << 4))) & 0xFFFF) << 16;
+ }
+ if (cei < m_utilIntBuffer_[1]) {
+ value |= ((ceparts[1] >> (32 - ((cei + 1) << 3))) & 0xFF) << 8;
+ }
+
+ if (cei < m_utilIntBuffer_[2]) {
+ value |= ((ceparts[2] >> (32 - ((cei + 1) << 3))) & 0x3F);
+ }
+ token.m_CE_[cei] = value;
+ cei++;
+ }
+ if (cei == 0) { // totally ignorable
+ token.m_CELength_ = 1;
+ token.m_CE_[0] = 0;
+ } else { // there is at least something
+ token.m_CELength_ = cei;
+ }
+
+ // Case bits handling for expansion
+ if (token.m_CE_[0] != 0) { // case bits should be set only for
+ // non-ignorables
+ token.m_CE_[0] &= 0xFFFFFF3F; // Clean the case bits field
+ int cSize = (token.m_source_ & 0xFF000000) >>> 24;
+ int startoftokenrule = token.m_source_ & 0x00FFFFFF;
+
+ if (cSize > 1) {
+ // Do it manually
+ String tokenstr = token.m_rules_.substring(startoftokenrule,
+ startoftokenrule + cSize);
+ token.m_CE_[0] |= getCaseBits(tokenstr);
+ } else {
+ // Copy it from the UCA
+ int caseCE = getFirstCE(token.m_rules_.charAt(startoftokenrule));
+ token.m_CE_[0] |= (caseCE & 0xC0);
+ }
+ }
+ }
+
+ /**
+ * Count the number of non-zero bytes used in the ce
+ *
+ * @param ce
+ * @return number of non-zero bytes used in ce
+ */
+ private static final int countBytes(int ce) {
+ int mask = 0xFFFFFFFF;
+ int result = 0;
+ while (mask != 0) {
+ if ((ce & mask) != 0) {
+ result++;
+ }
+ mask >>>= 8;
+ }
+ return result;
+ }
+
+ /**
+ * We are ready to create collation elements
+ *
+ * @param t
+ * build table to insert
+ * @param lh
+ * rule token list header
+ */
+ private void createElements(BuildTable t,
+ CollationRuleParser.TokenListHeader lh) {
+ CollationRuleParser.Token tok = lh.m_first_;
+ m_utilElement_.clear();
+ while (tok != null) {
+ // first, check if there are any expansions
+ // if there are expansions, we need to do a little bit more
+ // processing since parts of expansion can be tailored, while
+ // others are not
+ if (tok.m_expansion_ != 0) {
+ int len = tok.m_expansion_ >>> 24;
+ int currentSequenceLen = len;
+ int expOffset = tok.m_expansion_ & 0x00FFFFFF;
+ m_utilToken_.m_source_ = currentSequenceLen | expOffset;
+ m_utilToken_.m_rules_ = m_parser_.m_source_;
+
+ while (len > 0) {
+ currentSequenceLen = len;
+ while (currentSequenceLen > 0) {
+ m_utilToken_.m_source_ = (currentSequenceLen << 24)
+ | expOffset;
+ CollationRuleParser.Token expt = m_parser_.m_hashTable_.get(m_utilToken_);
+ if (expt != null
+ && expt.m_strength_ != CollationRuleParser.TOKEN_RESET_) {
+ // expansion is tailored
+ int noOfCEsToCopy = expt.m_CELength_;
+ for (int j = 0; j < noOfCEsToCopy; j++) {
+ tok.m_expCE_[tok.m_expCELength_ + j] = expt.m_CE_[j];
+ }
+ tok.m_expCELength_ += noOfCEsToCopy;
+ // never try to add codepoints and CEs.
+ // For some odd reason, it won't work.
+ expOffset += currentSequenceLen; // noOfCEsToCopy;
+ len -= currentSequenceLen; // noOfCEsToCopy;
+ break;
+ } else {
+ currentSequenceLen--;
+ }
+ }
+ if (currentSequenceLen == 0) {
+ // couldn't find any tailored subsequence, will have to
+ // get one from UCA. first, get the UChars from the
+ // rules then pick CEs out until there is no more and
+ // stuff them into expansion
+ m_utilColEIter_.setText(m_parser_.m_source_.substring(
+ expOffset, expOffset + 1));
+ while (true) {
+ int order = m_utilColEIter_.next();
+ if (order == CollationElementIterator.NULLORDER) {
+ break;
+ }
+ tok.m_expCE_[tok.m_expCELength_++] = order;
+ }
+ expOffset++;
+ len--;
+ }
+ }
+ } else {
+ tok.m_expCELength_ = 0;
+ }
+
+ // set the ucaelement with obtained values
+ m_utilElement_.m_CELength_ = tok.m_CELength_ + tok.m_expCELength_;
+
+ // copy CEs
+ System.arraycopy(tok.m_CE_, 0, m_utilElement_.m_CEs_, 0,
+ tok.m_CELength_);
+ System.arraycopy(tok.m_expCE_, 0, m_utilElement_.m_CEs_,
+ tok.m_CELength_, tok.m_expCELength_);
+
+ // copy UChars
+ // We kept prefix and source kind of together, as it is a kind of a
+ // contraction.
+ // However, now we have to slice the prefix off the main thing -
+ m_utilElement_.m_prefix_ = 0;// el.m_prefixChars_;
+ m_utilElement_.m_cPointsOffset_ = 0; // el.m_uchars_;
+ if (tok.m_prefix_ != 0) {
+ // we will just copy the prefix here, and adjust accordingly in
+ // the addPrefix function in ucol_elm. The reason is that we
+ // need to add both composed AND decomposed elements to the
+ // unsafe table.
+ int size = tok.m_prefix_ >> 24;
+ int offset = tok.m_prefix_ & 0x00FFFFFF;
+ m_utilElement_.m_prefixChars_ = m_parser_.m_source_.substring(
+ offset, offset + size);
+ size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24);
+ offset = (tok.m_source_ & 0x00FFFFFF) + (tok.m_prefix_ >> 24);
+ m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
+ offset, offset + size);
+ } else {
+ m_utilElement_.m_prefixChars_ = null;
+ int offset = tok.m_source_ & 0x00FFFFFF;
+ int size = tok.m_source_ >>> 24;
+ m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
+ offset, offset + size);
+ }
+ m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
+
+ boolean containCombinMarks = false;
+ for (int i = 0; i < m_utilElement_.m_cPoints_.length()
+ - m_utilElement_.m_cPointsOffset_; i++) {
+ if (isJamo(m_utilElement_.m_cPoints_.charAt(i))) {
+ t.m_collator_.m_isJamoSpecial_ = true;
+ break;
+ }
+ if (!buildCMTabFlag) {
+ // check combining class
+ int fcd = m_nfcImpl_.getFCD16FromSingleLead(m_utilElement_.m_cPoints_.charAt(i)); // TODO: review for handling supplementary characters
+ if ((fcd & 0xff) == 0) {
+ // reset flag when current char is not combining mark.
+ containCombinMarks = false;
+ } else {
+ containCombinMarks = true;
+ }
+ }
+ }
+
+ if (!buildCMTabFlag && containCombinMarks) {
+ buildCMTabFlag = true;
+ }
+
+ /***
+ * // Case bits handling m_utilElement_.m_CEs_[0] &= 0xFFFFFF3F; //
+ * Clean the case bits field if (m_utilElement_.m_cPoints_.length()
+ * - m_utilElement_.m_cPointsOffset_ > 1) { // Do it manually
+ * m_utilElement_.m_CEs_[0] |=
+ * getCaseBits(m_utilElement_.m_cPoints_); } else { // Copy it from
+ * the UCA int caseCE =
+ * getFirstCE(m_utilElement_.m_cPoints_.charAt(0));
+ * m_utilElement_.m_CEs_[0] |= (caseCE & 0xC0); }
+ ***/
+ // and then, add it
+ addAnElement(t, m_utilElement_);
+ tok = tok.m_next_;
+ }
+ }
+
+ /**
+ * Testing if the string argument has case
+ *
+ * @param src
+ * string
+ * @return the case for this char array
+ * @exception Exception
+ * thrown when internal program error occurs
+ */
+ private final int getCaseBits(String src) throws Exception {
+ int uCount = 0;
+ int lCount = 0;
+ src = Normalizer.decompose(src, true);
+ m_utilColEIter_.setText(src);
+ for (int i = 0; i < src.length(); i++) {
+ m_utilColEIter_.setText(src.substring(i, i + 1));
+ int order = m_utilColEIter_.next();
+ if (RuleBasedCollator.isContinuation(order)) {
+ throw new Exception("Internal program error");
+ }
+ if ((order & RuleBasedCollator.CE_CASE_BIT_MASK_) == UPPER_CASE_) {
+ uCount++;
+ } else {
+ char ch = src.charAt(i);
+ if (UCharacter.isLowerCase(ch)) {
+ lCount++;
+ } else {
+ if (toSmallKana(ch) == ch && toLargeKana(ch) != ch) {
+ lCount++;
+ }
+ }
+ }
+ }
+
+ if (uCount != 0 && lCount != 0) {
+ return MIXED_CASE_;
+ } else if (uCount != 0) {
+ return UPPER_CASE_;
+ } else {
+ return LOWER_CASE_;
+ }
+ }
+
+ /**
+ * Converts a char to the uppercase Kana
+ *
+ * @param ch
+ * character to convert
+ * @return the converted Kana character
+ */
+ private static final char toLargeKana(char ch) {
+ if (0x3042 < ch && ch < 0x30ef) { // Kana range
+ switch (ch - 0x3000) {
+ case 0x41:
+ case 0x43:
+ case 0x45:
+ case 0x47:
+ case 0x49:
+ case 0x63:
+ case 0x83:
+ case 0x85:
+ case 0x8E:
+ case 0xA1:
+ case 0xA3:
+ case 0xA5:
+ case 0xA7:
+ case 0xA9:
+ case 0xC3:
+ case 0xE3:
+ case 0xE5:
+ case 0xEE:
+ ch++;
+ break;
+ case 0xF5:
+ ch = 0x30AB;
+ break;
+ case 0xF6:
+ ch = 0x30B1;
+ break;
+ }
+ }
+ return ch;
+ }
+
+ /**
+ * Converts a char to the lowercase Kana
+ *
+ * @param ch
+ * character to convert
+ * @return the converted Kana character
+ */
+ private static final char toSmallKana(char ch) {
+ if (0x3042 < ch && ch < 0x30ef) { // Kana range
+ switch (ch - 0x3000) {
+ case 0x42:
+ case 0x44:
+ case 0x46:
+ case 0x48:
+ case 0x4A:
+ case 0x64:
+ case 0x84:
+ case 0x86:
+ case 0x8F:
+ case 0xA2:
+ case 0xA4:
+ case 0xA6:
+ case 0xA8:
+ case 0xAA:
+ case 0xC4:
+ case 0xE4:
+ case 0xE6:
+ case 0xEF:
+ ch--;
+ break;
+ case 0xAB:
+ ch = 0x30F5;
+ break;
+ case 0xB1:
+ ch = 0x30F6;
+ break;
+ }
+ }
+ return ch;
+ }
+
+ /**
+ * This should be connected to special Jamo handling.
+ */
+ private int getFirstCE(char ch) {
+ m_utilColEIter_.setText(UCharacter.toString(ch));
+ return m_utilColEIter_.next();
+ }
+
+ /**
+ * This adds a read element, while testing for existence
+ *
+ * @param t
+ * build table
+ * @param element
+ * @return ce
+ */
+ private int addAnElement(BuildTable t, Elements element) {
+ List expansions = t.m_expansions_;
+ element.m_mapCE_ = 0;
+
+ if (element.m_CELength_ == 1) {
+ element.m_mapCE_ = element.m_CEs_[0];
+
+ } else {
+ // unfortunately, it looks like we have to look for a long primary
+ // here since in canonical closure we are going to hit some long
+ // primaries from the first phase, and they will come back as
+ // continuations/expansions destroying the effect of the previous
+ // opitimization. A long primary is a three byte primary with
+ // starting secondaries and tertiaries. It can appear in long runs
+ // of only primary differences (like east Asian tailorings) also,
+ // it should not be an expansion, as expansions would break with
+ // this
+ if (element.m_CELength_ == 2 // a two CE expansion
+ && RuleBasedCollator.isContinuation(element.m_CEs_[1])
+ && (element.m_CEs_[1] & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) == 0 // that
+ // has
+ // only
+ // primaries
+ // in
+ // continuation
+ && (((element.m_CEs_[0] >> 8) & 0xFF) == RuleBasedCollator.BYTE_COMMON_)
+ // a common secondary
+ && ((element.m_CEs_[0] & 0xFF) == RuleBasedCollator.BYTE_COMMON_) // and
+ // a
+ // common
+ // tertiary
+ ) {
+ element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_
+ // a long primary special
+ | (CE_LONG_PRIMARY_TAG_ << 24)
+ // first and second byte of primary
+ | ((element.m_CEs_[0] >> 8) & 0xFFFF00)
+ // third byte of primary
+ | ((element.m_CEs_[1] >> 24) & 0xFF);
+ } else {
+ // omitting expansion offset in builder
+ // (HEADER_SIZE_ >> 2)
+ int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_
+ | (CE_EXPANSION_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_)
+ | (addExpansion(expansions, element.m_CEs_[0]) << 4)
+ & 0xFFFFF0;
+
+ for (int i = 1; i < element.m_CELength_; i++) {
+ addExpansion(expansions, element.m_CEs_[i]);
+ }
+ if (element.m_CELength_ <= 0xF) {
+ expansion |= element.m_CELength_;
+ } else {
+ addExpansion(expansions, 0);
+ }
+ element.m_mapCE_ = expansion;
+ setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1],
+ (byte) element.m_CELength_, t.m_maxExpansions_);
+ if (isJamo(element.m_cPoints_.charAt(0))) {
+ t.m_collator_.m_isJamoSpecial_ = true;
+ setMaxJamoExpansion(element.m_cPoints_.charAt(0),
+ element.m_CEs_[element.m_CELength_ - 1],
+ (byte) element.m_CELength_, t.m_maxJamoExpansions_);
+ }
+ }
+ }
+
+ // We treat digits differently - they are "uber special" and should be
+ // processed differently if numeric collation is on.
+ int uniChar = 0;
+ if ((element.m_uchars_.length() == 2)
+ && UTF16.isLeadSurrogate(element.m_uchars_.charAt(0))) {
+ uniChar = UCharacterProperty.getRawSupplementary(element.m_uchars_
+ .charAt(0), element.m_uchars_.charAt(1));
+ } else if (element.m_uchars_.length() == 1) {
+ uniChar = element.m_uchars_.charAt(0);
+ }
+
+ // Here, we either have one normal CE OR mapCE is set. Therefore, we
+ // stuff only one element to the expansion buffer. When we encounter a
+ // digit and we don't do numeric collation, we will just pick the CE
+ // we have and break out of case (see ucol.cpp ucol_prv_getSpecialCE
+ // && ucol_prv_getSpecialPrevCE). If we picked a special, further
+ // processing will occur. If it's a simple CE, we'll return due
+ // to how the loop is constructed.
+ if (uniChar != 0 && UCharacter.isDigit(uniChar)) {
+ // prepare the element
+ int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_
+ | (CollationElementIterator.CE_DIGIT_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_)
+ | 1;
+ if (element.m_mapCE_ != 0) {
+ // if there is an expansion, we'll pick it here
+ expansion |= (addExpansion(expansions, element.m_mapCE_) << 4);
+ } else {
+ expansion |= (addExpansion(expansions, element.m_CEs_[0]) << 4);
+ }
+ element.m_mapCE_ = expansion;
+ }
+
+ // here we want to add the prefix structure.
+ // I will try to process it as a reverse contraction, if possible.
+ // prefix buffer is already reversed.
+
+ if (element.m_prefixChars_ != null
+ && element.m_prefixChars_.length() - element.m_prefix_ > 0) {
+ // We keep the seen prefix starter elements in a hashtable we need
+ // it to be able to distinguish between the simple codepoints and
+ // prefix starters. Also, we need to use it for canonical closure.
+ m_utilElement2_.m_caseBit_ = element.m_caseBit_;
+ m_utilElement2_.m_CELength_ = element.m_CELength_;
+ m_utilElement2_.m_CEs_ = element.m_CEs_;
+ m_utilElement2_.m_mapCE_ = element.m_mapCE_;
+ // m_utilElement2_.m_prefixChars_ = element.m_prefixChars_;
+ m_utilElement2_.m_sizePrim_ = element.m_sizePrim_;
+ m_utilElement2_.m_sizeSec_ = element.m_sizeSec_;
+ m_utilElement2_.m_sizeTer_ = element.m_sizeTer_;
+ m_utilElement2_.m_variableTop_ = element.m_variableTop_;
+ m_utilElement2_.m_prefix_ = element.m_prefix_;
+ m_utilElement2_.m_prefixChars_ = Normalizer.compose(
+ element.m_prefixChars_, false);
+ m_utilElement2_.m_uchars_ = element.m_uchars_;
+ m_utilElement2_.m_cPoints_ = element.m_cPoints_;
+ m_utilElement2_.m_cPointsOffset_ = 0;
+
+ if (t.m_prefixLookup_ != null) {
+ Elements uCE = t.m_prefixLookup_.get(element);
+ if (uCE != null) {
+ // there is already a set of code points here
+ element.m_mapCE_ = addPrefix(t, uCE.m_mapCE_, element);
+ } else { // no code points, so this spot is clean
+ element.m_mapCE_ = addPrefix(t, CE_NOT_FOUND_, element);
+ uCE = new Elements(element);
+ uCE.m_cPoints_ = uCE.m_uchars_;
+ t.m_prefixLookup_.put(uCE, uCE);
+ }
+ if (m_utilElement2_.m_prefixChars_.length() != element.m_prefixChars_
+ .length()
+ - element.m_prefix_
+ || !m_utilElement2_.m_prefixChars_.regionMatches(0,
+ element.m_prefixChars_, element.m_prefix_,
+ m_utilElement2_.m_prefixChars_.length())) {
+ // do it!
+ m_utilElement2_.m_mapCE_ = addPrefix(t, element.m_mapCE_,
+ m_utilElement2_);
+ }
+ }
+ }
+
+ // We need to use the canonical iterator here
+ // the way we do it is to generate the canonically equivalent strings
+ // for the contraction and then add the sequences that pass FCD check
+ if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1
+ && !(element.m_cPoints_.length() - element.m_cPointsOffset_ == 2
+ && UTF16.isLeadSurrogate(element.m_cPoints_.charAt(0)) && UTF16
+ .isTrailSurrogate(element.m_cPoints_.charAt(1)))) {
+ // this is a contraction, we should check whether a composed form
+ // should also be included
+ m_utilCanIter_.setSource(element.m_cPoints_);
+ String source = m_utilCanIter_.next();
+ while (source != null && source.length() > 0) {
+ if (Normalizer.quickCheck(source, Normalizer.FCD, 0) != Normalizer.NO) {
+ element.m_uchars_ = source;
+ element.m_cPoints_ = element.m_uchars_;
+ finalizeAddition(t, element);
+ }
+ source = m_utilCanIter_.next();
+ }
+
+ return element.m_mapCE_;
+ } else {
+ return finalizeAddition(t, element);
+ }
+ }
+
+ /**
+ * Adds an expansion ce to the expansion vector
+ *
+ * @param expansions
+ * vector to add to
+ * @param value
+ * of the expansion
+ * @return the current position of the new element
+ */
+ private static final int addExpansion(List expansions, int value) {
+ expansions.add(new Integer(value));
+ return expansions.size() - 1;
+ }
+
+ /**
+ * Looks for the maximum length of all expansion sequences ending with the
+ * same collation element. The size required for maxexpansion and maxsize is
+ * returned if the arrays are too small.
+ *
+ * @param endexpansion
+ * the last expansion collation element to be added
+ * @param expansionsize
+ * size of the expansion
+ * @param maxexpansion
+ * data structure to store the maximum expansion data.
+ * @returns size of the maxexpansion and maxsize used.
+ */
+ private static int setMaxExpansion(int endexpansion, byte expansionsize,
+ MaxExpansionTable maxexpansion) {
+ int start = 0;
+ int limit = maxexpansion.m_endExpansionCE_.size();
+ long unsigned = (long) endexpansion;
+ unsigned &= 0xFFFFFFFFl;
+
+ // using binary search to determine if last expansion element is
+ // already in the array
+ int result = -1;
+ if (limit > 0) {
+ while (start < limit - 1) {
+ int mid = (start + limit) >> 1;
+ long unsignedce = (maxexpansion.m_endExpansionCE_
+ .get(mid)).intValue();
+ unsignedce &= 0xFFFFFFFFl;
+ if (unsigned < unsignedce) {
+ limit = mid;
+ } else {
+ start = mid;
+ }
+ }
+
+ if ((maxexpansion.m_endExpansionCE_.get(start)).intValue() == endexpansion) {
+ result = start;
+ }
+ }
+ if (result > -1) {
+ // found the ce in expansion, we'll just modify the size if it
+ // is smaller
+ Object currentsize = maxexpansion.m_expansionCESize_.get(result);
+ if (((Byte) currentsize).byteValue() < expansionsize) {
+ maxexpansion.m_expansionCESize_.set(result, new Byte(
+ expansionsize));
+ }
+ } else {
+ // we'll need to squeeze the value into the array. initial
+ // implementation. shifting the subarray down by 1
+ maxexpansion.m_endExpansionCE_.add(start + 1, new Integer(endexpansion));
+ maxexpansion.m_expansionCESize_.add(start + 1, new Byte(expansionsize));
+ }
+ return maxexpansion.m_endExpansionCE_.size();
+ }
+
+ /**
+ * Sets the maximum length of all jamo expansion sequences ending with the
+ * same collation element. The size required for maxexpansion and maxsize is
+ * returned if the arrays are too small.
+ *
+ * @param ch
+ * the jamo codepoint
+ * @param endexpansion
+ * the last expansion collation element to be added
+ * @param expansionsize
+ * size of the expansion
+ * @param maxexpansion
+ * data structure to store the maximum expansion data.
+ * @returns size of the maxexpansion and maxsize used.
+ */
+ private static int setMaxJamoExpansion(char ch, int endexpansion,
+ byte expansionsize, MaxJamoExpansionTable maxexpansion) {
+ boolean isV = true;
+ if (ch >= 0x1100 && ch <= 0x1112) {
+ // determines L for Jamo, doesn't need to store this since it is
+ // never at the end of a expansion
+ if (maxexpansion.m_maxLSize_ < expansionsize) {
+ maxexpansion.m_maxLSize_ = expansionsize;
+ }
+ return maxexpansion.m_endExpansionCE_.size();
+ }
+
+ if (ch >= 0x1161 && ch <= 0x1175) {
+ // determines V for Jamo
+ if (maxexpansion.m_maxVSize_ < expansionsize) {
+ maxexpansion.m_maxVSize_ = expansionsize;
+ }
+ }
+
+ if (ch >= 0x11A8 && ch <= 0x11C2) {
+ isV = false;
+ // determines T for Jamo
+ if (maxexpansion.m_maxTSize_ < expansionsize) {
+ maxexpansion.m_maxTSize_ = expansionsize;
+ }
+ }
+
+ int pos = maxexpansion.m_endExpansionCE_.size();
+ while (pos > 0) {
+ pos--;
+ if ((maxexpansion.m_endExpansionCE_.get(pos)).intValue() == endexpansion) {
+ return maxexpansion.m_endExpansionCE_.size();
+ }
+ }
+ maxexpansion.m_endExpansionCE_.add(new Integer(endexpansion));
+ maxexpansion.m_isV_.add(isV ? Boolean.TRUE : Boolean.FALSE);
+
+ return maxexpansion.m_endExpansionCE_.size();
+ }
+
+ /**
+ * Adds a prefix to the table
+ *
+ * @param t
+ * build table to update
+ * @param CE
+ * collation element to add
+ * @param element
+ * rule element to add
+ * @return modified ce
+ */
+ private int addPrefix(BuildTable t, int CE, Elements element) {
+ // currently the longest prefix we're supporting in Japanese is two
+ // characters long. Although this table could quite easily mimic
+ // complete contraction stuff there is no good reason to make a general
+ // solution, as it would require some error prone messing.
+ ContractionTable contractions = t.m_contractions_;
+ String oldCP = element.m_cPoints_;
+ int oldCPOffset = element.m_cPointsOffset_;
+
+ contractions.m_currentTag_ = CE_SPEC_PROC_TAG_;
+ // here, we will normalize & add prefix to the table.
+ int size = element.m_prefixChars_.length() - element.m_prefix_;
+ for (int j = 1; j < size; j++) {
+ // First add NFD prefix chars to unsafe CP hash table
+ // Unless it is a trail surrogate, which is handled algoritmically
+ // and shouldn't take up space in the table.
+ char ch = element.m_prefixChars_.charAt(j + element.m_prefix_);
+ if (!UTF16.isTrailSurrogate(ch)) {
+ unsafeCPSet(t.m_unsafeCP_, ch);
+ }
+ }
+
+ // StringBuffer reversed = new StringBuffer();
+ m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
+ for (int j = 0; j < size; j++) {
+ // prefixes are going to be looked up backwards
+ // therefore, we will promptly reverse the prefix buffer...
+ int offset = element.m_prefixChars_.length() - j - 1;
+ m_utilStringBuffer_.append(element.m_prefixChars_.charAt(offset));
+ }
+ element.m_prefixChars_ = m_utilStringBuffer_.toString();
+ element.m_prefix_ = 0;
+
+ // the first codepoint is also unsafe, as it forms a 'contraction' with
+ // the prefix
+ if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(0))) {
+ unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_.charAt(0));
+ }
+
+ element.m_cPoints_ = element.m_prefixChars_;
+ element.m_cPointsOffset_ = element.m_prefix_;
+
+ // Add the last char of the contraction to the contraction-end hash
+ // table. unless it is a trail surrogate, which is handled
+ // algorithmically and shouldn't be in the table
+ if (!UTF16.isTrailSurrogate(element.m_cPoints_
+ .charAt(element.m_cPoints_.length() - 1))) {
+ ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_
+ .charAt(element.m_cPoints_.length() - 1));
+ }
+ // First we need to check if contractions starts with a surrogate
+ // int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);
+
+ // If there are any Jamos in the contraction, we should turn on special
+ // processing for Jamos
+ if (isJamo(element.m_prefixChars_.charAt(element.m_prefix_))) {
+ t.m_collator_.m_isJamoSpecial_ = true;
+ }
+ // then we need to deal with it
+ // we could aready have something in table - or we might not
+ if (!isPrefix(CE)) {
+ // if it wasn't contraction, we wouldn't end up here
+ int firstContractionOffset = addContraction(contractions,
+ CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, CE);
+ int newCE = processContraction(contractions, element, CE_NOT_FOUND_);
+ addContraction(contractions, firstContractionOffset,
+ element.m_prefixChars_.charAt(element.m_prefix_), newCE);
+ addContraction(contractions, firstContractionOffset, (char) 0xFFFF,
+ CE);
+ CE = constructSpecialCE(CE_SPEC_PROC_TAG_, firstContractionOffset);
+ } else {
+ // we are adding to existing contraction
+ // there were already some elements in the table, so we need to add
+ // a new contraction
+ // Two things can happen here: either the codepoint is already in
+ // the table, or it is not
+ char ch = element.m_prefixChars_.charAt(element.m_prefix_);
+ int position = findCP(contractions, CE, ch);
+ if (position > 0) {
+ // if it is we just continue down the chain
+ int eCE = getCE(contractions, CE, position);
+ int newCE = processContraction(contractions, element, eCE);
+ setContraction(contractions, CE, position, ch, newCE);
+ } else {
+ // if it isn't, we will have to create a new sequence
+ processContraction(contractions, element, CE_NOT_FOUND_);
+ insertContraction(contractions, CE, ch, element.m_mapCE_);
+ }
+ }
+
+ element.m_cPoints_ = oldCP;
+ element.m_cPointsOffset_ = oldCPOffset;
+
+ return CE;
+ }
+
+ /**
+ * Checks if the argument ce is a contraction
+ *
+ * @param CE
+ * collation element
+ * @return true if argument ce is a contraction
+ */
+ private static final boolean isContraction(int CE) {
+ return isSpecial(CE) && (getCETag(CE) == CE_CONTRACTION_TAG_);
+ }
+
+ /**
+ * Checks if the argument ce has a prefix
+ *
+ * @param CE
+ * collation element
+ * @return true if argument ce has a prefix
+ */
+ private static final boolean isPrefix(int CE) {
+ return isSpecial(CE) && (getCETag(CE) == CE_SPEC_PROC_TAG_);
+ }
+
+ /**
+ * Checks if the argument ce is special
+ *
+ * @param CE
+ * collation element
+ * @return true if argument ce is special
+ */
+ private static final boolean isSpecial(int CE) {
+ return (CE & RuleBasedCollator.CE_SPECIAL_FLAG_) == 0xF0000000;
+ }
+
+ /**
+ * Checks if the argument ce has a prefix
+ *
+ * @param CE
+ * collation element
+ * @return true if argument ce has a prefix
+ */
+ private static final int getCETag(int CE) {
+ return (CE & RuleBasedCollator.CE_TAG_MASK_) >>> RuleBasedCollator.CE_TAG_SHIFT_;
+ }
+
+ /**
+ * Gets the ce at position in contraction table
+ *
+ * @param table
+ * contraction table
+ * @param position
+ * offset to the contraction table
+ * @return ce
+ */
+ private static final int getCE(ContractionTable table, int element,
+ int position) {
+ element &= 0xFFFFFF;
+ BasicContractionTable tbl = getBasicContractionTable(table, element);
+
+ if (tbl == null) {
+ return CE_NOT_FOUND_;
+ }
+ if (position > tbl.m_CEs_.size() || position == -1) {
+ return CE_NOT_FOUND_;
+ } else {
+ return tbl.m_CEs_.get(position).intValue();
+ }
+ }
+
+ /**
+ * Sets the unsafe character
+ *
+ * @param table
+ * unsafe table
+ * @param c
+ * character to be added
+ */
+ private static final void unsafeCPSet(byte table[], char c) {
+ int hash = c;
+ if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
+ if (hash >= 0xd800 && hash <= 0xf8ff) {
+ // Part of a surrogate, or in private use area.
+ // These don't go in the table
+ return;
+ }
+ hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
+ }
+ table[hash >> 3] |= (1 << (hash & 7));
+ }
+
+ /**
+ * Sets the contraction end character
+ *
+ * @param table
+ * contraction end table
+ * @param c
+ * character to be added
+ */
+ private static final void ContrEndCPSet(byte table[], char c) {
+ int hash = c;
+ if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
+ hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
+ }
+ table[hash >> 3] |= (1 << (hash & 7));
+ }
+
+ /**
+ * Adds more contractions in table. If element is non existant, it creates
+ * on. Returns element handle
+ *
+ * @param table
+ * contraction table
+ * @param element
+ * offset to the contraction table
+ * @param codePoint
+ * codepoint to add
+ * @param value
+ * @return collation element
+ */
+ private static int addContraction(ContractionTable table, int element,
+ char codePoint, int value) {
+ BasicContractionTable tbl = getBasicContractionTable(table, element);
+ if (tbl == null) {
+ tbl = addAContractionElement(table);
+ element = table.m_elements_.size() - 1;
+ }
+
+ tbl.m_CEs_.add(new Integer(value));
+ tbl.m_codePoints_.append(codePoint);
+ return constructSpecialCE(table.m_currentTag_, element);
+ }
+
+ /**
+ * Adds a contraction element to the table
+ *
+ * @param table
+ * contraction table to update
+ * @return contraction
+ */
+ private static BasicContractionTable addAContractionElement(
+ ContractionTable table) {
+ BasicContractionTable result = new BasicContractionTable();
+ table.m_elements_.add(result);
+ return result;
+ }
+
+ /**
+ * Constructs a special ce
+ *
+ * @param tag
+ * special tag
+ * @param CE
+ * collation element
+ * @return a contraction ce
+ */
+ private static final int constructSpecialCE(int tag, int CE) {
+ return RuleBasedCollator.CE_SPECIAL_FLAG_
+ | (tag << RuleBasedCollator.CE_TAG_SHIFT_) | (CE & 0xFFFFFF);
+ }
+
+ /**
+ * Sets and inserts the element that has a contraction
+ *
+ * @param contractions
+ * contraction table
+ * @param element
+ * contracting element
+ * @param existingCE
+ * @return contraction ce
+ */
+ private static int processContraction(ContractionTable contractions,
+ Elements element, int existingCE) {
+ int firstContractionOffset = 0;
+ // end of recursion
+ if (element.m_cPoints_.length() - element.m_cPointsOffset_ == 1) {
+ if (isContractionTableElement(existingCE)
+ && getCETag(existingCE) == contractions.m_currentTag_) {
+ changeContraction(contractions, existingCE, (char) 0,
+ element.m_mapCE_);
+ changeContraction(contractions, existingCE, (char) 0xFFFF,
+ element.m_mapCE_);
+ return existingCE;
+ } else {
+ // can't do just that. existingCe might be a contraction,
+ // meaning that we need to do another step
+ return element.m_mapCE_;
+ }
+ }
+
+ // this recursion currently feeds on the only element we have...
+ // We will have to copy it in order to accomodate for both backward
+ // and forward cycles
+ // we encountered either an empty space or a non-contraction element
+ // this means we are constructing a new contraction sequence
+ element.m_cPointsOffset_++;
+ if (!isContractionTableElement(existingCE)) {
+ // if it wasn't contraction, we wouldn't end up here
+ firstContractionOffset = addContraction(contractions,
+ CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, existingCE);
+ int newCE = processContraction(contractions, element, CE_NOT_FOUND_);
+ addContraction(contractions, firstContractionOffset,
+ element.m_cPoints_.charAt(element.m_cPointsOffset_), newCE);
+ addContraction(contractions, firstContractionOffset, (char) 0xFFFF,
+ existingCE);
+ existingCE = constructSpecialCE(contractions.m_currentTag_,
+ firstContractionOffset);
+ } else {
+ // we are adding to existing contraction
+ // there were already some elements in the table, so we need to add
+ // a new contraction
+ // Two things can happen here: either the codepoint is already in
+ // the table, or it is not
+ int position = findCP(contractions, existingCE, element.m_cPoints_
+ .charAt(element.m_cPointsOffset_));
+ if (position > 0) {
+ // if it is we just continue down the chain
+ int eCE = getCE(contractions, existingCE, position);
+ int newCE = processContraction(contractions, element, eCE);
+ setContraction(contractions, existingCE, position,
+ element.m_cPoints_.charAt(element.m_cPointsOffset_),
+ newCE);
+ } else {
+ // if it isn't, we will have to create a new sequence
+ int newCE = processContraction(contractions, element,
+ CE_NOT_FOUND_);
+ insertContraction(contractions, existingCE, element.m_cPoints_
+ .charAt(element.m_cPointsOffset_), newCE);
+ }
+ }
+ element.m_cPointsOffset_--;
+ return existingCE;
+ }
+
+ /**
+ * Checks if CE belongs to the contraction table
+ *
+ * @param CE
+ * collation element to test
+ * @return true if CE belongs to the contraction table
+ */
+ private static final boolean isContractionTableElement(int CE) {
+ return isSpecial(CE)
+ && (getCETag(CE) == CE_CONTRACTION_TAG_ || getCETag(CE) == CE_SPEC_PROC_TAG_);
+ }
+
+ /**
+ * Gets the codepoint
+ *
+ * @param table
+ * contraction table
+ * @param element
+ * offset to the contraction element in the table
+ * @param codePoint
+ * code point to look for
+ * @return the offset to the code point
+ */
+ private static int findCP(ContractionTable table, int element,
+ char codePoint) {
+ BasicContractionTable tbl = getBasicContractionTable(table, element);
+ if (tbl == null) {
+ return -1;
+ }
+
+ int position = 0;
+ while (codePoint > tbl.m_codePoints_.charAt(position)) {
+ position++;
+ if (position > tbl.m_codePoints_.length()) {
+ return -1;
+ }
+ }
+ if (codePoint == tbl.m_codePoints_.charAt(position)) {
+ return position;
+ } else {
+ return -1;
+ }
+ }
+
+ /**
+ * Gets the contraction element out of the contraction table
+ *
+ * @param table
+ * contraction table
+ * @param offset
+ * to the element in the contraction table
+ * @return basic contraction element at offset in the contraction table
+ */
+ private static final BasicContractionTable getBasicContractionTable(
+ ContractionTable table, int offset) {
+ offset &= 0xFFFFFF;
+ if (offset == 0xFFFFFF) {
+ return null;
+ }
+ return table.m_elements_.get(offset);
+ }
+
+ /**
+ * Changes the contraction element
+ *
+ * @param table
+ * contraction table
+ * @param element
+ * offset to the element in the contraction table
+ * @param codePoint
+ * codepoint
+ * @param newCE
+ * new collation element
+ * @return basic contraction element at offset in the contraction table
+ */
+ private static final int changeContraction(ContractionTable table,
+ int element, char codePoint, int newCE) {
+ BasicContractionTable tbl = getBasicContractionTable(table, element);
+ if (tbl == null) {
+ return 0;
+ }
+ int position = 0;
+ while (codePoint > tbl.m_codePoints_.charAt(position)) {
+ position++;
+ if (position > tbl.m_codePoints_.length()) {
+ return CE_NOT_FOUND_;
+ }
+ }
+ if (codePoint == tbl.m_codePoints_.charAt(position)) {
+ tbl.m_CEs_.set(position, new Integer(newCE));
+ return element & 0xFFFFFF;
+ } else {
+ return CE_NOT_FOUND_;
+ }
+ }
+
+ /**
+ * Sets a part of contraction sequence in table. If element is non existant,
+ * it creates on. Returns element handle.
+ *
+ * @param table
+ * contraction table
+ * @param element
+ * offset to the contraction table
+ * @param offset
+ * @param codePoint
+ * contraction character
+ * @param value
+ * ce value
+ * @return new contraction ce
+ */
+ private static final int setContraction(ContractionTable table,
+ int element, int offset, char codePoint, int value) {
+ element &= 0xFFFFFF;
+ BasicContractionTable tbl = getBasicContractionTable(table, element);
+ if (tbl == null) {
+ tbl = addAContractionElement(table);
+ element = table.m_elements_.size() - 1;
+ }
+
+ tbl.m_CEs_.set(offset, new Integer(value));
+ tbl.m_codePoints_.setCharAt(offset, codePoint);
+ return constructSpecialCE(table.m_currentTag_, element);
+ }
+
+ /**
+ * Inserts a part of contraction sequence in table. Sequences behind the
+ * offset are moved back. If element is non existent, it creates on.
+ *
+ * @param table
+ * contraction
+ * @param element
+ * offset to the table contraction
+ * @param codePoint
+ * code point
+ * @param value
+ * collation element value
+ * @return contraction collation element
+ */
+ private static final int insertContraction(ContractionTable table,
+ int element, char codePoint, int value) {
+ element &= 0xFFFFFF;
+ BasicContractionTable tbl = getBasicContractionTable(table, element);
+ if (tbl == null) {
+ tbl = addAContractionElement(table);
+ element = table.m_elements_.size() - 1;
+ }
+
+ int offset = 0;
+ while (tbl.m_codePoints_.charAt(offset) < codePoint
+ && offset < tbl.m_codePoints_.length()) {
+ offset++;
+ }
+
+ tbl.m_CEs_.add(offset, new Integer(value));
+ tbl.m_codePoints_.insert(offset, codePoint);
+
+ return constructSpecialCE(table.m_currentTag_, element);
+ }
+
+ /**
+ * Finalize addition
+ *
+ * @param t
+ * build table
+ * @param element
+ * to add
+ */
+ private final static int finalizeAddition(BuildTable t, Elements element) {
+ int CE = CE_NOT_FOUND_;
+ // This should add a completely ignorable element to the
+ // unsafe table, so that backward iteration will skip
+ // over it when treating contractions.
+ if (element.m_mapCE_ == 0) {
+ for (int i = 0; i < element.m_cPoints_.length(); i++) {
+ char ch = element.m_cPoints_.charAt(i);
+ if (!UTF16.isTrailSurrogate(ch)) {
+ unsafeCPSet(t.m_unsafeCP_, ch);
+ }
+ }
+ }
+
+ if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1) {
+ // we're adding a contraction
+ int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);
+ CE = t.m_mapping_.getValue(cp);
+ CE = addContraction(t, CE, element);
+ } else {
+ // easy case
+ CE = t.m_mapping_.getValue(element.m_cPoints_
+ .charAt(element.m_cPointsOffset_));
+
+ if (CE != CE_NOT_FOUND_) {
+ if (isContractionTableElement(CE)) {
+ // adding a non contraction element (thai, expansion,
+ // single) to already existing contraction
+ if (!isPrefix(element.m_mapCE_)) {
+ // we cannot reenter prefix elements - as we are going
+ // to create a dead loop
+ // Only expansions and regular CEs can go here...
+ // Contractions will never happen in this place
+ setContraction(t.m_contractions_, CE, 0, (char) 0,
+ element.m_mapCE_);
+ // This loop has to change the CE at the end of
+ // contraction REDO!
+ changeLastCE(t.m_contractions_, CE, element.m_mapCE_);
+ }
+ } else {
+ t.m_mapping_
+ .setValue(element.m_cPoints_
+ .charAt(element.m_cPointsOffset_),
+ element.m_mapCE_);
+ if (element.m_prefixChars_ != null
+ && element.m_prefixChars_.length() > 0
+ && getCETag(CE) != CE_IMPLICIT_TAG_) {
+ // Add CE for standalone precontext char.
+ Elements origElem = new Elements();
+ origElem.m_prefixChars_ = null;
+ origElem.m_uchars_ = element.m_cPoints_;
+ origElem.m_cPoints_ = origElem.m_uchars_;
+ origElem.m_CEs_[0] = CE;
+ origElem.m_mapCE_ = CE;
+ origElem.m_CELength_ = 1;
+ finalizeAddition(t, origElem);
+ }
+ }
+ } else {
+ t.m_mapping_.setValue(element.m_cPoints_
+ .charAt(element.m_cPointsOffset_), element.m_mapCE_);
+ }
+ }
+ return CE;
+ }
+
+ /**
+ * Note regarding surrogate handling: We are interested only in the single
+ * or leading surrogates in a contraction. If a surrogate is somewhere else
+ * in the contraction, it is going to be handled as a pair of code units, as
+ * it doesn't affect the performance AND handling surrogates specially would
+ * complicate code way too much.
+ */
+ private static int addContraction(BuildTable t, int CE, Elements element) {
+ ContractionTable contractions = t.m_contractions_;
+ contractions.m_currentTag_ = CE_CONTRACTION_TAG_;
+
+ // First we need to check if contractions starts with a surrogate
+ int cp = UTF16.charAt(element.m_cPoints_, 0);
+ int cpsize = 1;
+ if (UCharacter.isSupplementary(cp)) {
+ cpsize = 2;
+ }
+ if (cpsize < element.m_cPoints_.length()) {
+ // This is a real contraction, if there are other characters after
+ // the first
+ int size = element.m_cPoints_.length() - element.m_cPointsOffset_;
+ for (int j = 1; j < size; j++) {
+ // First add contraction chars to unsafe CP hash table
+ // Unless it is a trail surrogate, which is handled
+ // algoritmically and shouldn't take up space in the table.
+ if (!UTF16.isTrailSurrogate(element.m_cPoints_
+ .charAt(element.m_cPointsOffset_ + j))) {
+ unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_
+ .charAt(element.m_cPointsOffset_ + j));
+ }
+ }
+ // Add the last char of the contraction to the contraction-end
+ // hash table. unless it is a trail surrogate, which is handled
+ // algorithmically and shouldn't be in the table
+ if (!UTF16.isTrailSurrogate(element.m_cPoints_
+ .charAt(element.m_cPoints_.length() - 1))) {
+ ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_
+ .charAt(element.m_cPoints_.length() - 1));
+ }
+
+ // If there are any Jamos in the contraction, we should turn on
+ // special processing for Jamos
+ if (isJamo(element.m_cPoints_.charAt(element.m_cPointsOffset_))) {
+ t.m_collator_.m_isJamoSpecial_ = true;
+ }
+ // then we need to deal with it
+ // we could aready have something in table - or we might not
+ element.m_cPointsOffset_ += cpsize;
+ if (!isContraction(CE)) {
+ // if it wasn't contraction, we wouldn't end up here
+ int firstContractionOffset = addContraction(contractions,
+ CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, CE);
+ int newCE = processContraction(contractions, element,
+ CE_NOT_FOUND_);
+ addContraction(contractions, firstContractionOffset,
+ element.m_cPoints_.charAt(element.m_cPointsOffset_),
+ newCE);
+ addContraction(contractions, firstContractionOffset,
+ (char) 0xFFFF, CE);
+ CE = constructSpecialCE(CE_CONTRACTION_TAG_,
+ firstContractionOffset);
+ } else {
+ // we are adding to existing contraction
+ // there were already some elements in the table, so we need to
+ // add a new contraction
+ // Two things can happen here: either the codepoint is already
+ // in the table, or it is not
+ int position = findCP(contractions, CE, element.m_cPoints_
+ .charAt(element.m_cPointsOffset_));
+ if (position > 0) {
+ // if it is we just continue down the chain
+ int eCE = getCE(contractions, CE, position);
+ int newCE = processContraction(contractions, element, eCE);
+ setContraction(
+ contractions,
+ CE,
+ position,
+ element.m_cPoints_.charAt(element.m_cPointsOffset_),
+ newCE);
+ } else {
+ // if it isn't, we will have to create a new sequence
+ int newCE = processContraction(contractions, element,
+ CE_NOT_FOUND_);
+ insertContraction(contractions, CE, element.m_cPoints_
+ .charAt(element.m_cPointsOffset_), newCE);
+ }
+ }
+ element.m_cPointsOffset_ -= cpsize;
+ t.m_mapping_.setValue(cp, CE);
+ } else if (!isContraction(CE)) {
+ // this is just a surrogate, and there is no contraction
+ t.m_mapping_.setValue(cp, element.m_mapCE_);
+ } else {
+ // fill out the first stage of the contraction with the surrogate
+ // CE
+ changeContraction(contractions, CE, (char) 0, element.m_mapCE_);
+ changeContraction(contractions, CE, (char) 0xFFFF, element.m_mapCE_);
+ }
+ return CE;
+ }
+
+ /**
+ * this is for adding non contractions
+ *
+ * @param table
+ * contraction table
+ * @param element
+ * offset to the contraction table
+ * @param value
+ * collation element value
+ * @return new collation element
+ */
+ private static final int changeLastCE(ContractionTable table, int element,
+ int value) {
+ BasicContractionTable tbl = getBasicContractionTable(table, element);
+ if (tbl == null) {
+ return 0;
+ }
+
+ tbl.m_CEs_.set(tbl.m_CEs_.size() - 1, new Integer(value));
+ return constructSpecialCE(table.m_currentTag_, element & 0xFFFFFF);
+ }
+
+ /**
+ * Given a set of ranges calculated by allocWeights(), iterate through the
+ * weights. Sets the next weight in cegenerator.m_current_.
+ *
+ * @param cegenerator
+ * object that contains ranges weight range array and its
+ * rangeCount
+ * @return the next weight
+ */
+ private static int nextWeight(CEGenerator cegenerator) {
+ if (cegenerator.m_rangesLength_ > 0) {
+ // get maxByte from the .count field
+ int maxByte = cegenerator.m_ranges_[0].m_count_;
+ // get the next weight
+ int weight = cegenerator.m_ranges_[0].m_start_;
+ if (weight == cegenerator.m_ranges_[0].m_end_) {
+ // this range is finished, remove it and move the following
+ // ones up
+ cegenerator.m_rangesLength_--;
+ if (cegenerator.m_rangesLength_ > 0) {
+ System.arraycopy(cegenerator.m_ranges_, 1,
+ cegenerator.m_ranges_, 0,
+ cegenerator.m_rangesLength_);
+ cegenerator.m_ranges_[0].m_count_ = maxByte;
+ // keep maxByte in ranges[0]
+ }
+ } else {
+ // increment the weight for the next value
+ cegenerator.m_ranges_[0].m_start_ = incWeight(weight,
+ cegenerator.m_ranges_[0].m_length2_, maxByte);
+ }
+ return weight;
+ }
+ return -1;
+ }
+
+ /**
+ * Increment the collation weight
+ *
+ * @param weight
+ * to increment
+ * @param length
+ * @param maxByte
+ * @return new incremented weight
+ */
+ private static final int incWeight(int weight, int length, int maxByte) {
+ while (true) {
+ int b = getWeightByte(weight, length);
+ if (b < maxByte) {
+ return setWeightByte(weight, length, b + 1);
+ } else {
+ // roll over, set this byte to BYTE_FIRST_TAILORED_ and
+ // increment the previous one
+ weight = setWeightByte(weight, length,
+ RuleBasedCollator.BYTE_FIRST_TAILORED_);
+ --length;
+ }
+ }
+ }
+
+ /**
+ * Gets the weight byte
+ *
+ * @param weight
+ * @param index
+ * @return byte
+ */
+ private static final int getWeightByte(int weight, int index) {
+ return (weight >> ((4 - index) << 3)) & 0xff;
+ }
+
+ /**
+ * Set the weight byte in table
+ *
+ * @param weight
+ * @param index
+ * @param b
+ * byte
+ */
+ private static final int setWeightByte(int weight, int index, int b) {
+ index <<= 3;
+ // 0xffffffff except a 00 "hole" for the index-th byte
+ int mask;
+ if (index < 32) {
+ mask = 0xffffffff >>> index;
+ } else {
+ // Do not use int>>>32 because that does not shift at all
+ // while we need it to become 0.
+ //
+ // Java Language Specification (Third Edition) 15.19 Shift Operators:
+ // "If the promoted type of the left-hand operand is int,
+ // only the five lowest-order bits of the right-hand operand
+ // are used as the shift distance.
+ // It is as if the right-hand operand were subjected to
+ // a bitwise logical AND operator & (§15.22.1) with the mask value 0x1f.
+ // The shift distance actually used is therefore
+ // always in the range 0 to 31, inclusive."
+ mask = 0;
+ }
+ index = 32 - index;
+ mask |= 0xffffff00 << index;
+ return (weight & mask) | (b << index);
+ }
+
+ /**
+ * Call getWeightRanges and then determine heuristically which ranges to use
+ * for a given number of weights between (excluding) two limits
+ *
+ * @param lowerLimit
+ * @param upperLimit
+ * @param n
+ * @param maxByte
+ * @param ranges
+ * @return
+ */
+ private int allocateWeights(int lowerLimit, int upperLimit, int n,
+ int maxByte, WeightRange ranges[]) {
+ // number of usable byte values 3..maxByte
+ int countBytes = maxByte - RuleBasedCollator.BYTE_FIRST_TAILORED_ + 1;
+ // [0] unused, [5] to make index checks unnecessary, m_utilCountBuffer_
+ // countBytes to the power of index, m_utilLongBuffer_ for unsignedness
+ // gcc requires explicit initialization
+ m_utilLongBuffer_[0] = 1;
+ m_utilLongBuffer_[1] = countBytes;
+ m_utilLongBuffer_[2] = m_utilLongBuffer_[1] * countBytes;
+ m_utilLongBuffer_[3] = m_utilLongBuffer_[2] * countBytes;
+ m_utilLongBuffer_[4] = m_utilLongBuffer_[3] * countBytes;
+ int rangeCount = getWeightRanges(lowerLimit, upperLimit, maxByte,
+ countBytes, ranges);
+ if (rangeCount <= 0) {
+ return 0;
+ }
+ // what is the maximum number of weights with these ranges?
+ long maxCount = 0;
+ for (int i = 0; i < rangeCount; ++i) {
+ maxCount += (long) ranges[i].m_count_
+ * m_utilLongBuffer_[4 - ranges[i].m_length_];
+ }
+ if (maxCount < n) {
+ return 0;
+ }
+ // set the length2 and count2 fields
+ for (int i = 0; i < rangeCount; ++i) {
+ ranges[i].m_length2_ = ranges[i].m_length_;
+ ranges[i].m_count2_ = ranges[i].m_count_;
+ }
+ // try until we find suitably large ranges
+ while (true) {
+ // get the smallest number of bytes in a range
+ int minLength = ranges[0].m_length2_;
+ // sum up the number of elements that fit into ranges of each byte
+ // length
+ Arrays.fill(m_utilCountBuffer_, 0);
+ for (int i = 0; i < rangeCount; ++i) {
+ m_utilCountBuffer_[ranges[i].m_length2_] += ranges[i].m_count2_;
+ }
+ // now try to allocate n elements in the available short ranges
+ if (n <= m_utilCountBuffer_[minLength]
+ + m_utilCountBuffer_[minLength + 1]) {
+ // trivial cases, use the first few ranges
+ maxCount = 0;
+ rangeCount = 0;
+ do {
+ maxCount += ranges[rangeCount].m_count2_;
+ ++rangeCount;
+ } while (n > maxCount);
+ break;
+ } else if (n <= ranges[0].m_count2_ * countBytes) {
+ // easy case, just make this one range large enough by
+ // lengthening it once more, possibly split it
+ rangeCount = 1;
+ // calculate how to split the range between maxLength-1
+ // (count1) and maxLength (count2)
+ long power_1 = m_utilLongBuffer_[minLength
+ - ranges[0].m_length_];
+ long power = power_1 * countBytes;
+ int count2 = (int) ((n + power - 1) / power);
+ int count1 = ranges[0].m_count_ - count2;
+ // split the range
+ if (count1 < 1) {
+ // lengthen the entire range to maxLength
+ lengthenRange(ranges, 0, maxByte, countBytes);
+ } else {
+ // really split the range
+ // create a new range with the end and initial and current
+ // length of the old one
+ rangeCount = 2;
+ ranges[1].m_end_ = ranges[0].m_end_;
+ ranges[1].m_length_ = ranges[0].m_length_;
+ ranges[1].m_length2_ = minLength;
+ // set the end of the first range according to count1
+ int i = ranges[0].m_length_;
+ int b = getWeightByte(ranges[0].m_start_, i) + count1 - 1;
+ // ranges[0].count and count1 may be >countBytes from
+ // merging adjacent ranges; b > maxByte is possible
+ if (b <= maxByte) {
+ ranges[0].m_end_ = setWeightByte(ranges[0].m_start_, i,
+ b);
+ } else {
+ ranges[0].m_end_ = setWeightByte(incWeight(
+ ranges[0].m_start_, i - 1, maxByte), i, b
+ - countBytes);
+ }
+ // set the bytes in the end weight at length + 1..length2
+ // to maxByte
+ b = (maxByte << 24) | (maxByte << 16) | (maxByte << 8)
+ | maxByte; // this used to be 0xffffffff
+ ranges[0].m_end_ = truncateWeight(ranges[0].m_end_, i)
+ | (b >>> (i << 3)) & (b << ((4 - minLength) << 3));
+ // set the start of the second range to immediately follow
+ // the end of the first one
+ ranges[1].m_start_ = incWeight(ranges[0].m_end_, minLength,
+ maxByte);
+ // set the count values (informational)
+ ranges[0].m_count_ = count1;
+ ranges[1].m_count_ = count2;
+
+ ranges[0].m_count2_ = (int) (count1 * power_1);
+ // will be *countBytes when lengthened
+ ranges[1].m_count2_ = (int) (count2 * power_1);
+
+ // lengthen the second range to maxLength
+ lengthenRange(ranges, 1, maxByte, countBytes);
+ }
+ break;
+ }
+ // no good match, lengthen all minLength ranges and iterate
+ for (int i = 0; ranges[i].m_length2_ == minLength; ++i) {
+ lengthenRange(ranges, i, maxByte, countBytes);
+ }
+ }
+
+ if (rangeCount > 1) {
+ // sort the ranges by weight values
+ Arrays.sort(ranges, 0, rangeCount);
+ }
+
+ // set maxByte in ranges[0] for ucol_nextWeight()
+ ranges[0].m_count_ = maxByte;
+
+ return rangeCount;
+ }
+
+ /**
+ * Updates the range length
+ *
+ * @param range
+ * weight range array
+ * @param offset
+ * to weight range array
+ * @param maxByte
+ * @param countBytes
+ * @return new length
+ */
+ private static final int lengthenRange(WeightRange range[], int offset,
+ int maxByte, int countBytes) {
+ int length = range[offset].m_length2_ + 1;
+ range[offset].m_start_ = setWeightTrail(range[offset].m_start_, length,
+ RuleBasedCollator.BYTE_FIRST_TAILORED_);
+ range[offset].m_end_ = setWeightTrail(range[offset].m_end_, length,
+ maxByte);
+ range[offset].m_count2_ *= countBytes;
+ range[offset].m_length2_ = length;
+ return length;
+ }
+
+ /**
+ * Gets the weight
+ *
+ * @param weight
+ * @param length
+ * @param trail
+ * @return new weight
+ */
+ private static final int setWeightTrail(int weight, int length, int trail) {
+ length = (4 - length) << 3;
+ return (weight & (0xffffff00 << length)) | (trail << length);
+ }
+
+ /**
+ * take two CE weights and calculate the possible ranges of weights between
+ * the two limits, excluding them for weights with up to 4 bytes there are
+ * up to 2*4-1=7 ranges
+ *
+ * @param lowerLimit
+ * @param upperLimit
+ * @param maxByte
+ * @param countBytes
+ * @param ranges
+ * @return weight ranges
+ */
+ private int getWeightRanges(int lowerLimit, int upperLimit, int maxByte,
+ int countBytes, WeightRange ranges[]) {
+ // assume that both lowerLimit & upperLimit are not 0
+ // get the lengths of the limits
+ int lowerLength = lengthOfWeight(lowerLimit);
+ int upperLength = lengthOfWeight(upperLimit);
+ if (Utility.compareUnsigned(lowerLimit, upperLimit) >= 0) {
+ return 0;
+ }
+ // check that neither is a prefix of the other
+ if (lowerLength < upperLength) {
+ if (lowerLimit == truncateWeight(upperLimit, lowerLength)) {
+ return 0;
+ }
+ }
+ // if the upper limit is a prefix of the lower limit then the earlier
+ // test lowerLimit >= upperLimit has caught it
+ // reset local variables
+ // With the limit lengths of 1..4, there are up to 7 ranges for
+ // allocation:
+ // range minimum length
+ // lower[4] 4
+ // lower[3] 3
+ // lower[2] 2
+ // middle 1
+ // upper[2] 2
+ // upper[3] 3
+ // upper[4] 4
+ // We are now going to calculate up to 7 ranges.
+ // Some of them will typically overlap, so we will then have to merge
+ // and eliminate ranges.
+
+ // We have to clean cruft from previous invocations
+ // before doing anything. C++ already does that
+ for (int length = 0; length < 5; length++) {
+ m_utilLowerWeightRange_[length].clear();
+ m_utilUpperWeightRange_[length].clear();
+ }
+ m_utilWeightRange_.clear();
+
+ int weight = lowerLimit;
+ for (int length = lowerLength; length >= 2; --length) {
+ m_utilLowerWeightRange_[length].clear();
+ int trail = getWeightByte(weight, length);
+ if (trail < maxByte) {
+ m_utilLowerWeightRange_[length].m_start_ = incWeightTrail(
+ weight, length);
+ m_utilLowerWeightRange_[length].m_end_ = setWeightTrail(weight,
+ length, maxByte);
+ m_utilLowerWeightRange_[length].m_length_ = length;
+ m_utilLowerWeightRange_[length].m_count_ = maxByte - trail;
+ }
+ weight = truncateWeight(weight, length - 1);
+ }
+ m_utilWeightRange_.m_start_ = incWeightTrail(weight, 1);
+
+ weight = upperLimit;
+ // [0] and [1] are not used - this simplifies indexing,
+ // m_utilUpperWeightRange_
+
+ for (int length = upperLength; length >= 2; length--) {
+ int trail = getWeightByte(weight, length);
+ if (trail > RuleBasedCollator.BYTE_FIRST_TAILORED_) {
+ m_utilUpperWeightRange_[length].m_start_ = setWeightTrail(
+ weight, length, RuleBasedCollator.BYTE_FIRST_TAILORED_);
+ m_utilUpperWeightRange_[length].m_end_ = decWeightTrail(weight,
+ length);
+ m_utilUpperWeightRange_[length].m_length_ = length;
+ m_utilUpperWeightRange_[length].m_count_ = trail
+ - RuleBasedCollator.BYTE_FIRST_TAILORED_;
+ }
+ weight = truncateWeight(weight, length - 1);
+ }
+ m_utilWeightRange_.m_end_ = decWeightTrail(weight, 1);
+
+ // set the middle range
+ m_utilWeightRange_.m_length_ = 1;
+ if (Utility.compareUnsigned(m_utilWeightRange_.m_end_,
+ m_utilWeightRange_.m_start_) >= 0) {
+ // if (m_utilWeightRange_.m_end_ >= m_utilWeightRange_.m_start_) {
+ m_utilWeightRange_.m_count_ = ((m_utilWeightRange_.m_end_ - m_utilWeightRange_.m_start_) >>> 24) + 1;
+ } else {
+ // eliminate overlaps
+ // remove the middle range
+ m_utilWeightRange_.m_count_ = 0;
+ // reduce or remove the lower ranges that go beyond upperLimit
+ for (int length = 4; length >= 2; --length) {
+ if (m_utilLowerWeightRange_[length].m_count_ > 0
+ && m_utilUpperWeightRange_[length].m_count_ > 0) {
+ int start = m_utilUpperWeightRange_[length].m_start_;
+ int end = m_utilLowerWeightRange_[length].m_end_;
+ if (end >= start
+ || incWeight(end, length, maxByte) == start) {
+ // lower and upper ranges collide or are directly
+ // adjacent: merge these two and remove all shorter
+ // ranges
+ start = m_utilLowerWeightRange_[length].m_start_;
+ end = m_utilLowerWeightRange_[length].m_end_ = m_utilUpperWeightRange_[length].m_end_;
+ // merging directly adjacent ranges needs to subtract
+ // the 0/1 gaps in between;
+ // it may result in a range with count>countBytes
+ m_utilLowerWeightRange_[length].m_count_ = getWeightByte(
+ end, length)
+ - getWeightByte(start, length)
+ + 1
+ + countBytes
+ * (getWeightByte(end, length - 1) - getWeightByte(
+ start, length - 1));
+ m_utilUpperWeightRange_[length].m_count_ = 0;
+ while (--length >= 2) {
+ m_utilLowerWeightRange_[length].m_count_ = m_utilUpperWeightRange_[length].m_count_ = 0;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ // copy the ranges, shortest first, into the result array
+ int rangeCount = 0;
+ if (m_utilWeightRange_.m_count_ > 0) {
+ ranges[0] = new WeightRange(m_utilWeightRange_);
+ rangeCount = 1;
+ }
+ for (int length = 2; length <= 4; ++length) {
+ // copy upper first so that later the middle range is more likely
+ // the first one to use
+ if (m_utilUpperWeightRange_[length].m_count_ > 0) {
+ ranges[rangeCount] = new WeightRange(
+ m_utilUpperWeightRange_[length]);
+ ++rangeCount;
+ }
+ if (m_utilLowerWeightRange_[length].m_count_ > 0) {
+ ranges[rangeCount] = new WeightRange(
+ m_utilLowerWeightRange_[length]);
+ ++rangeCount;
+ }
+ }
+ return rangeCount;
+ }
+
+ /**
+ * Truncates the weight with length
+ *
+ * @param weight
+ * @param length
+ * @return truncated weight
+ */
+ private static final int truncateWeight(int weight, int length) {
+ return weight & (0xffffffff << ((4 - length) << 3));
+ }
+
+ /**
+ * Length of the weight
+ *
+ * @param weight
+ * @return length of the weight
+ */
+ private static final int lengthOfWeight(int weight) {
+ if ((weight & 0xffffff) == 0) {
+ return 1;
+ } else if ((weight & 0xffff) == 0) {
+ return 2;
+ } else if ((weight & 0xff) == 0) {
+ return 3;
+ }
+ return 4;
+ }
+
+ /**
+ * Increment the weight trail
+ *
+ * @param weight
+ * @param length
+ * @return new weight
+ */
+ private static final int incWeightTrail(int weight, int length) {
+ return weight + (1 << ((4 - length) << 3));
+ }
+
+ /**
+ * Decrement the weight trail
+ *
+ * @param weight
+ * @param length
+ * @return new weight
+ */
+ private static int decWeightTrail(int weight, int length) {
+ return weight - (1 << ((4 - length) << 3));
+ }
+
+ /**
+ * Gets the codepoint
+ *
+ * @param tbl
+ * contraction table
+ * @param codePoint
+ * code point to look for
+ * @return the offset to the code point
+ */
+ private static int findCP(BasicContractionTable tbl, char codePoint) {
+ int position = 0;
+ while (codePoint > tbl.m_codePoints_.charAt(position)) {
+ position++;
+ if (position > tbl.m_codePoints_.length()) {
+ return -1;
+ }
+ }
+ if (codePoint == tbl.m_codePoints_.charAt(position)) {
+ return position;
+ } else {
+ return -1;
+ }
+ }
+
+ /**
+ * Finds a contraction ce
+ *
+ * @param table
+ * @param element
+ * @param ch
+ * @return ce
+ */
+ private static int findCE(ContractionTable table, int element, char ch) {
+ if (table == null) {
+ return CE_NOT_FOUND_;
+ }
+ BasicContractionTable tbl = getBasicContractionTable(table, element);
+ if (tbl == null) {
+ return CE_NOT_FOUND_;
+ }
+ int position = findCP(tbl, ch);
+ if (position > tbl.m_CEs_.size() || position < 0) {
+ return CE_NOT_FOUND_;
+ }
+ return tbl.m_CEs_.get(position).intValue();
+ }
+
+ /**
+ * Checks if the string is tailored in the contraction
+ *
+ * @param table
+ * contraction table
+ * @param element
+ * @param array
+ * character array to check
+ * @param offset
+ * array offset
+ * @return true if it is tailored
+ */
+ private static boolean isTailored(ContractionTable table, int element,
+ char array[], int offset) {
+ while (array[offset] != 0) {
+ element = findCE(table, element, array[offset]);
+ if (element == CE_NOT_FOUND_) {
+ return false;
+ }
+ if (!isContractionTableElement(element)) {
+ return true;
+ }
+ offset++;
+ }
+ if (getCE(table, element, 0) != CE_NOT_FOUND_) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Assemble RuleBasedCollator
+ *
+ * @param t
+ * build table
+ * @param collator
+ * to update
+ */
+ private void assembleTable(BuildTable t, RuleBasedCollator collator) {
+ IntTrieBuilder mapping = t.m_mapping_;
+ List expansions = t.m_expansions_;
+ ContractionTable contractions = t.m_contractions_;
+ MaxExpansionTable maxexpansion = t.m_maxExpansions_;
+
+ // contraction offset has to be in since we are building on the
+ // UCA contractions
+ // int beforeContractions = (HEADER_SIZE_
+ // + paddedsize(expansions.size() << 2)) >>> 1;
+ collator.m_contractionOffset_ = 0;
+ int contractionsSize = constructTable(contractions);
+
+ // the following operation depends on the trie data. Therefore, we have
+ // to do it before the trie is compacted
+ // sets jamo expansions
+ getMaxExpansionJamo(mapping, maxexpansion, t.m_maxJamoExpansions_,
+ collator.m_isJamoSpecial_);
+
+ // TODO: LATIN1 array is now in the utrie - it should be removed from
+ // the calculation
+ setAttributes(collator, t.m_options_);
+ // copy expansions
+ int size = expansions.size();
+ collator.m_expansion_ = new int[size];
+ for (int i = 0; i < size; i++) {
+ collator.m_expansion_[i] = expansions.get(i).intValue();
+ }
+ // contractions block
+ if (contractionsSize != 0) {
+ // copy contraction index
+ collator.m_contractionIndex_ = new char[contractionsSize];
+ contractions.m_codePoints_.getChars(0, contractionsSize,
+ collator.m_contractionIndex_, 0);
+ // copy contraction collation elements
+ collator.m_contractionCE_ = new int[contractionsSize];
+ for (int i = 0; i < contractionsSize; i++) {
+ collator.m_contractionCE_[i] = contractions.m_CEs_.get(i).intValue();
+ }
+ }
+ // copy mapping table
+ collator.m_trie_ = mapping.serialize(t,
+ RuleBasedCollator.DataManipulate.getInstance());
+ // copy max expansion table
+ // not copying the first element which is a dummy
+ // to be in synch with icu4c's builder, we continue to use the
+ // expansion offset
+ // omitting expansion offset in builder
+ collator.m_expansionOffset_ = 0;
+ size = maxexpansion.m_endExpansionCE_.size();
+ collator.m_expansionEndCE_ = new int[size - 1];
+ for (int i = 1; i < size; i++) {
+ collator.m_expansionEndCE_[i - 1] = maxexpansion.m_endExpansionCE_
+ .get(i).intValue();
+ }
+ collator.m_expansionEndCEMaxSize_ = new byte[size - 1];
+ for (int i = 1; i < size; i++) {
+ collator.m_expansionEndCEMaxSize_[i - 1] = maxexpansion.m_expansionCESize_
+ .get(i).byteValue();
+ }
+ // Unsafe chars table. Finish it off, then copy it.
+ unsafeCPAddCCNZ(t);
+ // Or in unsafebits from UCA, making a combined table.
+ for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i++) {
+ t.m_unsafeCP_[i] |= RuleBasedCollator.UCA_.m_unsafe_[i];
+ }
+ collator.m_unsafe_ = t.m_unsafeCP_;
+
+ // Finish building Contraction Ending chars hash table and then copy it
+ // out.
+ // Or in unsafebits from UCA, making a combined table
+ for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i++) {
+ t.m_contrEndCP_[i] |= RuleBasedCollator.UCA_.m_contractionEnd_[i];
+ }
+ collator.m_contractionEnd_ = t.m_contrEndCP_;
+ }
+
+ /**
+ * Sets this collator to use the all options and tables in UCA.
+ *
+ * @param collator
+ * which attribute is to be set
+ * @param option
+ * to set with
+ */
+ private static final void setAttributes(RuleBasedCollator collator,
+ CollationRuleParser.OptionSet option) {
+ collator.latinOneFailed_ = true;
+ collator.m_caseFirst_ = option.m_caseFirst_;
+ collator.setDecomposition(option.m_decomposition_);
+ collator
+ .setAlternateHandlingShifted(option.m_isAlternateHandlingShifted_);
+ collator.setCaseLevel(option.m_isCaseLevel_);
+ collator.setFrenchCollation(option.m_isFrenchCollation_);
+ collator.m_isHiragana4_ = option.m_isHiragana4_;
+ collator.setStrength(option.m_strength_);
+ collator.m_variableTopValue_ = option.m_variableTopValue_;
+ collator.m_reorderCodes_ = option.m_scriptOrder_;
+ collator.latinOneFailed_ = false;
+ }
+
+ /**
+ * Constructing the contraction table
+ *
+ * @param table
+ * contraction table
+ * @return
+ */
+ private int constructTable(ContractionTable table) {
+ // See how much memory we need
+ int tsize = table.m_elements_.size();
+ if (tsize == 0) {
+ return 0;
+ }
+ table.m_offsets_.clear();
+ int position = 0;
+ for (int i = 0; i < tsize; i++) {
+ table.m_offsets_.add(new Integer(position));
+ position += table.m_elements_.get(i).m_CEs_
+ .size();
+ }
+ table.m_CEs_.clear();
+ table.m_codePoints_.delete(0, table.m_codePoints_.length());
+ // Now stuff the things in
+ StringBuilder cpPointer = table.m_codePoints_;
+ List CEPointer = table.m_CEs_;
+ for (int i = 0; i < tsize; i++) {
+ BasicContractionTable bct = table.m_elements_.get(i);
+ int size = bct.m_CEs_.size();
+ char ccMax = 0;
+ char ccMin = 255;
+ int offset = CEPointer.size();
+ CEPointer.add(bct.m_CEs_.get(0));
+ for (int j = 1; j < size; j++) {
+ char ch = bct.m_codePoints_.charAt(j);
+ char cc = (char) (UCharacter.getCombiningClass(ch) & 0xFF);
+ if (cc > ccMax) {
+ ccMax = cc;
+ }
+ if (cc < ccMin) {
+ ccMin = cc;
+ }
+ cpPointer.append(ch);
+ CEPointer.add(bct.m_CEs_.get(j));
+ }
+ cpPointer.insert(offset,
+ (char) (((ccMin == ccMax) ? 1 : 0 << 8) | ccMax));
+ for (int j = 0; j < size; j++) {
+ if (isContractionTableElement(CEPointer.get(offset + j).intValue())) {
+ int ce = CEPointer.get(offset + j).intValue();
+ CEPointer.set(offset + j,
+ new Integer(constructSpecialCE(getCETag(ce),
+ table.m_offsets_.get(getContractionOffset(ce))
+ .intValue())));
+ }
+ }
+ }
+
+ for (int i = 0; i <= 0x10FFFF; i++) {
+ int CE = table.m_mapping_.getValue(i);
+ if (isContractionTableElement(CE)) {
+ CE = constructSpecialCE(getCETag(CE),
+ table.m_offsets_.get(getContractionOffset(CE)).intValue());
+ table.m_mapping_.setValue(i, CE);
+ }
+ }
+ return position;
+ }
+
+ /**
+ * Get contraction offset
+ *
+ * @param ce
+ * collation element
+ * @return contraction offset
+ */
+ private static final int getContractionOffset(int ce) {
+ return ce & 0xFFFFFF;
+ }
+
+ /**
+ * Gets the maximum Jamo expansion
+ *
+ * @param mapping
+ * trie table
+ * @param maxexpansion
+ * maximum expansion table
+ * @param maxjamoexpansion
+ * maximum jamo expansion table
+ * @param jamospecial
+ * is jamo special?
+ */
+ private static void getMaxExpansionJamo(IntTrieBuilder mapping,
+ MaxExpansionTable maxexpansion,
+ MaxJamoExpansionTable maxjamoexpansion, boolean jamospecial) {
+ int VBASE = 0x1161;
+ int TBASE = 0x11A8;
+ int VCOUNT = 21;
+ int TCOUNT = 28;
+ int v = VBASE + VCOUNT - 1;
+ int t = TBASE + TCOUNT - 1;
+
+ while (v >= VBASE) {
+ int ce = mapping.getValue(v);
+ if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) != RuleBasedCollator.CE_SPECIAL_FLAG_) {
+ setMaxExpansion(ce, (byte) 2, maxexpansion);
+ }
+ v--;
+ }
+
+ while (t >= TBASE) {
+ int ce = mapping.getValue(t);
+ if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) != RuleBasedCollator.CE_SPECIAL_FLAG_) {
+ setMaxExpansion(ce, (byte) 3, maxexpansion);
+ }
+ t--;
+ }
+ // According to the docs, 99% of the time, the Jamo will not be special
+ if (jamospecial) {
+ // gets the max expansion in all unicode characters
+ int count = maxjamoexpansion.m_endExpansionCE_.size();
+ byte maxTSize = (byte) (maxjamoexpansion.m_maxLSize_
+ + maxjamoexpansion.m_maxVSize_ + maxjamoexpansion.m_maxTSize_);
+ byte maxVSize = (byte) (maxjamoexpansion.m_maxLSize_ + maxjamoexpansion.m_maxVSize_);
+
+ while (count > 0) {
+ count--;
+ if ((maxjamoexpansion.m_isV_.get(count))
+ .booleanValue() == true) {
+ setMaxExpansion(
+ (maxjamoexpansion.m_endExpansionCE_
+ .get(count)).intValue(), maxVSize,
+ maxexpansion);
+ } else {
+ setMaxExpansion(
+ (maxjamoexpansion.m_endExpansionCE_
+ .get(count)).intValue(), maxTSize,
+ maxexpansion);
+ }
+ }
+ }
+ }
+
+ /**
+ * To the UnsafeCP hash table, add all chars with combining class != 0
+ *
+ * @param t
+ * build table
+ */
+ private final void unsafeCPAddCCNZ(BuildTable t) {
+ boolean buildCMTable = (buildCMTabFlag & (t.cmLookup == null));
+ char[] cm = null; // combining mark array
+ int[] index = new int[256];
+ int count = 0;
+
+ if (buildCMTable) {
+ cm = new char[0x10000];
+ }
+ for (char c = 0; c < 0xffff; c++) {
+ int fcd = m_nfcImpl_.getFCD16FromSingleLead(c); // TODO: review for handling supplementary characters
+ if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
+ (UTF16.isLeadSurrogate(c) && fcd != 0)) {
+ // c is a leading surrogate with some FCD data
+ unsafeCPSet(t.m_unsafeCP_, c);
+ if (buildCMTable && (fcd != 0)) {
+ int cc = (fcd & 0xff);
+ int pos = (cc << 8) + index[cc];
+ cm[pos] = c;
+ index[cc]++;
+ count++;
+ }
+ }
+ }
+
+ if (t.m_prefixLookup_ != null) {
+ Enumeration els = Collections.enumeration(t.m_prefixLookup_.values());
+ while (els.hasMoreElements()) {
+ Elements e = els.nextElement();
+ // codepoints here are in the NFD form. We need to add the
+ // first code point of the NFC form to unsafe, because
+ // strcoll needs to backup over them.
+ // weiv: This is wrong! See the comment above.
+ // String decomp = Normalizer.decompose(e.m_cPoints_, true);
+ // unsafeCPSet(t.m_unsafeCP_, decomp.charAt(0));
+ // it should be:
+ String comp = Normalizer.compose(e.m_cPoints_, false);
+ unsafeCPSet(t.m_unsafeCP_, comp.charAt(0));
+ }
+ }
+
+ if (buildCMTable) {
+ t.cmLookup = new CombinClassTable();
+ t.cmLookup.generate(cm, count, index);
+ }
+ }
+
+ /**
+ * Create closure
+ *
+ * @param t
+ * build table
+ * @param collator
+ * RuleBasedCollator
+ * @param colEl
+ * collation element iterator
+ * @param start
+ * @param limit
+ * @param type
+ * character type
+ * @return
+ */
+ private boolean enumCategoryRangeClosureCategory(BuildTable t,
+ RuleBasedCollator collator, CollationElementIterator colEl,
+ int start, int limit, int type) {
+ if (type != UCharacterCategory.UNASSIGNED
+ && type != UCharacterCategory.PRIVATE_USE) {
+ // if the range is assigned - we might ommit more categories later
+
+ for (int u32 = start; u32 < limit; u32++) {
+ String decomp = m_nfcImpl_.getDecomposition(u32);
+ if (decomp != null) {
+ String comp = UCharacter.toString(u32);
+ if (!collator.equals(comp, decomp)) {
+ m_utilElement_.m_cPoints_ = decomp;
+ m_utilElement_.m_prefix_ = 0;
+ Elements prefix = t.m_prefixLookup_.get(m_utilElement_);
+ if (prefix == null) {
+ m_utilElement_.m_cPoints_ = comp;
+ m_utilElement_.m_prefix_ = 0;
+ m_utilElement_.m_prefixChars_ = null;
+ colEl.setText(decomp);
+ int ce = colEl.next();
+ m_utilElement_.m_CELength_ = 0;
+ while (ce != CollationElementIterator.NULLORDER) {
+ m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = ce;
+ ce = colEl.next();
+ }
+ } else {
+ m_utilElement_.m_cPoints_ = comp;
+ m_utilElement_.m_prefix_ = 0;
+ m_utilElement_.m_prefixChars_ = null;
+ m_utilElement_.m_CELength_ = 1;
+ m_utilElement_.m_CEs_[0] = prefix.m_mapCE_;
+ // This character uses a prefix. We have to add it
+ // to the unsafe table, as it decomposed form is
+ // already in. In Japanese, this happens for \u309e
+ // & \u30fe
+ // Since unsafeCPSet is static in ucol_elm, we are
+ // going to wrap it up in the unsafeCPAddCCNZ
+ // function
+ }
+ addAnElement(t, m_utilElement_);
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Determine if a character is a Jamo
+ *
+ * @param ch
+ * character to test
+ * @return true if ch is a Jamo, false otherwise
+ */
+ private static final boolean isJamo(char ch) {
+ return (ch >= 0x1100 && ch <= 0x1112) || (ch >= 0x1175 && ch <= 0x1161)
+ || (ch >= 0x11A8 && ch <= 0x11C2);
+ }
+
+ /**
+ * Produces canonical closure
+ */
+ private void canonicalClosure(BuildTable t) {
+ BuildTable temp = new BuildTable(t);
+ assembleTable(temp, temp.m_collator_);
+ // produce canonical closure
+ CollationElementIterator coleiter = temp.m_collator_
+ .getCollationElementIterator("");
+ RangeValueIterator typeiter = UCharacter.getTypeIterator();
+ RangeValueIterator.Element element = new RangeValueIterator.Element();
+ while (typeiter.next(element)) {
+ enumCategoryRangeClosureCategory(t, temp.m_collator_, coleiter,
+ element.start, element.limit, element.value);
+ }
+
+ t.cmLookup = temp.cmLookup;
+ temp.cmLookup = null;
+
+ for (int i = 0; i < m_parser_.m_resultLength_; i++) {
+ char baseChar, firstCM;
+ // now we need to generate the CEs
+ // We stuff the initial value in the buffers, and increase the
+ // appropriate buffer according to strength */
+ // createElements(t, m_parser_.m_listHeader_[i]);
+ CollationRuleParser.Token tok = m_parser_.m_listHeader_[i].m_first_;
+ m_utilElement_.clear();
+ while (tok != null) {
+ m_utilElement_.m_prefix_ = 0;// el.m_prefixChars_;
+ m_utilElement_.m_cPointsOffset_ = 0; // el.m_uchars_;
+ if (tok.m_prefix_ != 0) {
+ // we will just copy the prefix here, and adjust accordingly
+ // in
+ // the addPrefix function in ucol_elm. The reason is that we
+ // need to add both composed AND decomposed elements to the
+ // unsafe table.
+ int size = tok.m_prefix_ >> 24;
+ int offset = tok.m_prefix_ & 0x00FFFFFF;
+ m_utilElement_.m_prefixChars_ = m_parser_.m_source_
+ .substring(offset, offset + size);
+ size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24);
+ offset = (tok.m_source_ & 0x00FFFFFF)
+ + (tok.m_prefix_ >> 24);
+ m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
+ offset, offset + size);
+ } else {
+ m_utilElement_.m_prefixChars_ = null;
+ int offset = tok.m_source_ & 0x00FFFFFF;
+ int size = tok.m_source_ >>> 24;
+ m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(
+ offset, offset + size);
+ }
+ m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
+
+ baseChar = firstCM = 0; // reset
+ for (int j = 0; j < m_utilElement_.m_cPoints_.length()
+ - m_utilElement_.m_cPointsOffset_; j++) {
+
+ int fcd = m_nfcImpl_.getFCD16FromSingleLead(m_utilElement_.m_cPoints_.charAt(j)); // TODO: review for handling supplementary characters
+ if ((fcd & 0xff) == 0) {
+ baseChar = m_utilElement_.m_cPoints_.charAt(j);
+ } else {
+ if ((baseChar != 0) && (firstCM == 0)) {
+ firstCM = m_utilElement_.m_cPoints_.charAt(j); // first
+ // combining
+ // mark
+ }
+ }
+ }
+
+ if ((baseChar != 0) && (firstCM != 0)) {
+ addTailCanonicalClosures(t, temp.m_collator_, coleiter,
+ baseChar, firstCM);
+ }
+ tok = tok.m_next_;
+ }
+ }
+ }
+
+ private void addTailCanonicalClosures(BuildTable t,
+ RuleBasedCollator m_collator, CollationElementIterator colEl,
+ char baseChar, char cMark) {
+ if (t.cmLookup == null) {
+ return;
+ }
+ CombinClassTable cmLookup = t.cmLookup;
+ int[] index = cmLookup.index;
+ int cClass = m_nfcImpl_.getFCD16FromSingleLead(cMark) & 0xff; // TODO: review for handling supplementary characters
+ int maxIndex = 0;
+ char[] precompCh = new char[256];
+ int[] precompClass = new int[256];
+ int precompLen = 0;
+ Elements element = new Elements();
+
+ if (cClass > 0) {
+ maxIndex = index[cClass - 1];
+ }
+ for (int i = 0; i < maxIndex; i++) {
+ StringBuilder decompBuf = new StringBuilder();
+ decompBuf.append(baseChar).append(cmLookup.cPoints[i]);
+ String comp = Normalizer.compose(decompBuf.toString(), false);
+ if (comp.length() == 1) {
+ precompCh[precompLen] = comp.charAt(0);
+ precompClass[precompLen] = (m_nfcImpl_.getFCD16FromSingleLead(cmLookup.cPoints[i]) & 0xff); // TODO: review for handling supplementary characters
+ precompLen++;
+ StringBuilder decomp = new StringBuilder();
+ for (int j = 0; j < m_utilElement_.m_cPoints_.length(); j++) {
+ if (m_utilElement_.m_cPoints_.charAt(j) == cMark) {
+ decomp.append(cmLookup.cPoints[i]);
+ } else {
+ decomp.append(m_utilElement_.m_cPoints_.charAt(j));
+ }
+ }
+ comp = Normalizer.compose(decomp.toString(), false);
+ StringBuilder buf = new StringBuilder(comp);
+ buf.append(cMark);
+ decomp.append(cMark);
+ comp = buf.toString();
+
+ element.m_cPoints_ = decomp.toString();
+ element.m_CELength_ = 0;
+ element.m_prefix_ = 0;
+ Elements prefix = t.m_prefixLookup_.get(element);
+ element.m_cPoints_ = comp;
+ element.m_uchars_ = comp;
+
+ if (prefix == null) {
+ element.m_prefix_ = 0;
+ element.m_prefixChars_ = null;
+ colEl.setText(decomp.toString());
+ int ce = colEl.next();
+ element.m_CELength_ = 0;
+ while (ce != CollationElementIterator.NULLORDER) {
+ element.m_CEs_[element.m_CELength_++] = ce;
+ ce = colEl.next();
+ }
+ } else {
+ element.m_cPoints_ = comp;
+ element.m_prefix_ = 0;
+ element.m_prefixChars_ = null;
+ element.m_CELength_ = 1;
+ element.m_CEs_[0] = prefix.m_mapCE_;
+ }
+ setMapCE(t, element);
+ finalizeAddition(t, element);
+
+ if (comp.length() > 2) {
+ // This is a fix for tailoring contractions with accented
+ // character at the end of contraction string.
+ addFCD4AccentedContractions(t, colEl, comp, element);
+ }
+ if (precompLen > 1) {
+ precompLen = addMultiCMontractions(t, colEl, element,
+ precompCh, precompClass, precompLen, cMark, i,
+ decomp.toString());
+ }
+ }
+ }
+
+ }
+
+ private void setMapCE(BuildTable t, Elements element) {
+ List expansions = t.m_expansions_;
+ element.m_mapCE_ = 0;
+
+ if (element.m_CELength_ == 2 // a two CE expansion
+ && RuleBasedCollator.isContinuation(element.m_CEs_[1])
+ && (element.m_CEs_[1] & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) == 0 // that
+ // has
+ // only
+ // primaries
+ // in
+ // continuation
+ && (((element.m_CEs_[0] >> 8) & 0xFF) == RuleBasedCollator.BYTE_COMMON_)
+ // a common secondary
+ && ((element.m_CEs_[0] & 0xFF) == RuleBasedCollator.BYTE_COMMON_)) { // and
+ // a
+ // common
+ // tertiary
+
+ element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_
+ // a long primary special
+ | (CE_LONG_PRIMARY_TAG_ << 24)
+ // first and second byte of primary
+ | ((element.m_CEs_[0] >> 8) & 0xFFFF00)
+ // third byte of primary
+ | ((element.m_CEs_[1] >> 24) & 0xFF);
+ } else {
+ // omitting expansion offset in builder
+ // (HEADER_SIZE_ >> 2)
+ int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_
+ | (CE_EXPANSION_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_)
+ | (addExpansion(expansions, element.m_CEs_[0]) << 4)
+ & 0xFFFFF0;
+
+ for (int i = 1; i < element.m_CELength_; i++) {
+ addExpansion(expansions, element.m_CEs_[i]);
+ }
+ if (element.m_CELength_ <= 0xF) {
+ expansion |= element.m_CELength_;
+ } else {
+ addExpansion(expansions, 0);
+ }
+ element.m_mapCE_ = expansion;
+ setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1],
+ (byte) element.m_CELength_, t.m_maxExpansions_);
+ }
+ }
+
+ private int addMultiCMontractions(BuildTable t,
+ CollationElementIterator colEl, Elements element, char[] precompCh,
+ int[] precompClass, int maxComp, char cMark, int cmPos,
+ String decomp) {
+
+ CombinClassTable cmLookup = t.cmLookup;
+ char[] combiningMarks = { cMark };
+ int cMarkClass = UCharacter.getCombiningClass(cMark) & 0xFF;
+ String comMark = new String(combiningMarks);
+ int noOfPrecomposedChs = maxComp;
+
+ for (int j = 0; j < maxComp; j++) {
+ int count = 0;
+ StringBuilder temp;
+
+ do {
+ String newDecomp, comp;
+
+ if (count == 0) { // Decompose the saved precomposed char.
+ newDecomp = Normalizer.decompose(
+ new String(precompCh, j, 1), false);
+ temp = new StringBuilder(newDecomp);
+ temp.append(cmLookup.cPoints[cmPos]);
+ newDecomp = temp.toString();
+ } else {
+ temp = new StringBuilder(decomp);
+ temp.append(precompCh[j]);
+ newDecomp = temp.toString();
+ }
+ comp = Normalizer.compose(newDecomp, false);
+ if (comp.length() == 1) {
+ temp.append(cMark);
+ element.m_cPoints_ = temp.toString();
+ element.m_CELength_ = 0;
+ element.m_prefix_ = 0;
+ Elements prefix = t.m_prefixLookup_.get(element);
+ element.m_cPoints_ = comp + comMark;
+ if (prefix == null) {
+ element.m_prefix_ = 0;
+ element.m_prefixChars_ = null;
+ colEl.setText(temp.toString());
+ int ce = colEl.next();
+ element.m_CELength_ = 0;
+ while (ce != CollationElementIterator.NULLORDER) {
+ element.m_CEs_[element.m_CELength_++] = ce;
+ ce = colEl.next();
+ }
+ } else {
+ element.m_cPoints_ = comp;
+ element.m_prefix_ = 0;
+ element.m_prefixChars_ = null;
+ element.m_CELength_ = 1;
+ element.m_CEs_[0] = prefix.m_mapCE_;
+ }
+ setMapCE(t, element);
+ finalizeAddition(t, element);
+ precompCh[noOfPrecomposedChs] = comp.charAt(0);
+ precompClass[noOfPrecomposedChs] = cMarkClass;
+ noOfPrecomposedChs++;
+ }
+ } while (++count < 2 && (precompClass[j] == cMarkClass));
+ }
+ return noOfPrecomposedChs;
+ }
+
+ private void addFCD4AccentedContractions(BuildTable t,
+ CollationElementIterator colEl, String data, Elements element) {
+ String decomp = Normalizer.decompose(data, false);
+ String comp = Normalizer.compose(data, false);
+
+ element.m_cPoints_ = decomp;
+ element.m_CELength_ = 0;
+ element.m_prefix_ = 0;
+ Elements prefix = t.m_prefixLookup_.get(element);
+ if (prefix == null) {
+ element.m_cPoints_ = comp;
+ element.m_prefix_ = 0;
+ element.m_prefixChars_ = null;
+ element.m_CELength_ = 0;
+ colEl.setText(decomp);
+ int ce = colEl.next();
+ element.m_CELength_ = 0;
+ while (ce != CollationElementIterator.NULLORDER) {
+ element.m_CEs_[element.m_CELength_++] = ce;
+ ce = colEl.next();
+ }
+ addAnElement(t, element);
+ }
+ }
+
+ private void processUCACompleteIgnorables(BuildTable t) {
+ TrieIterator trieiterator = new TrieIterator(
+ RuleBasedCollator.UCA_.m_trie_);
+ RangeValueIterator.Element element = new RangeValueIterator.Element();
+ while (trieiterator.next(element)) {
+ int start = element.start;
+ int limit = element.limit;
+ if (element.value == 0) {
+ while (start < limit) {
+ int CE = t.m_mapping_.getValue(start);
+ if (CE == CE_NOT_FOUND_) {
+ m_utilElement_.m_prefix_ = 0;
+ m_utilElement_.m_uchars_ = UCharacter.toString(start);
+ m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
+ m_utilElement_.m_cPointsOffset_ = 0;
+ m_utilElement_.m_CELength_ = 1;
+ m_utilElement_.m_CEs_[0] = 0;
+ addAnElement(t, m_utilElement_);
+ }
+ start++;
+ }
+ }
+ }
+ }
+}
diff --git a/main/classes/collate/src/com/ibm/icu/text/CollationRuleParser.java b/main/classes/collate/src/com/ibm/icu/text/CollationRuleParser.java
new file mode 100644
index 00000000000..8f9865037f4
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/CollationRuleParser.java
@@ -0,0 +1,2358 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2011, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.PatternProps;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.Collator.ReorderCodes;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+
+/**
+* Class for parsing collation rules, produces a list of tokens that will be
+* turned into collation elements
+* @author Syn Wee Quek
+* @since release 2.2, June 7 2002
+*/
+final class CollationRuleParser
+{
+ // public data members ---------------------------------------------------
+
+ // package private constructors ------------------------------------------
+
+ /**
+ * RuleBasedCollator constructor that takes the rules.
+ * Please see RuleBasedCollator class description for more details on the
+ * collation rule syntax.
+ * @see java.util.Locale
+ * @param rules the collation rules to build the collation table from.
+ * @exception ParseException thrown when argument rules have an invalid
+ * syntax.
+ */
+ CollationRuleParser(String rules) throws ParseException
+ {
+ // Prepares m_copySet_ and m_removeSet_.
+ rules = preprocessRules(rules);
+
+ // Save the rules as a long string. The StringBuilder object is
+ // used to store the result of token parsing as well.
+ m_source_ = new StringBuilder(Normalizer.decompose(rules, false).trim());
+ m_rules_ = m_source_.toString();
+
+ // Index of the next unparsed character.
+ m_current_ = 0;
+
+ // Index of the next unwritten character in the parsed result.
+ m_extraCurrent_ = m_source_.length();
+
+ m_variableTop_ = null;
+ m_parsedToken_ = new ParsedToken();
+ m_hashTable_ = new HashMap();
+ m_options_ = new OptionSet(RuleBasedCollator.UCA_);
+ m_listHeader_ = new TokenListHeader[512];
+ m_resultLength_ = 0;
+ // call assembleTokenList() manually, so that we can
+ // init a parser and manually parse tokens
+ //assembleTokenList();
+ }
+
+ // package private inner classes -----------------------------------------
+
+ /**
+ * Collation options set
+ */
+ static class OptionSet
+ {
+ // package private constructor ---------------------------------------
+
+ /**
+ * Initializes the option set with the argument collators
+ * @param collator option to use
+ */
+ OptionSet(RuleBasedCollator collator)
+ {
+ m_variableTopValue_ = collator.m_variableTopValue_;
+ m_isFrenchCollation_ = collator.isFrenchCollation();
+ m_isAlternateHandlingShifted_
+ = collator.isAlternateHandlingShifted();
+ m_caseFirst_ = collator.m_caseFirst_;
+ m_isCaseLevel_ = collator.isCaseLevel();
+ m_decomposition_ = collator.getDecomposition();
+ m_strength_ = collator.getStrength();
+ m_isHiragana4_ = collator.m_isHiragana4_;
+
+ if(collator.m_reorderCodes_ != null){
+ m_scriptOrder_ = new int[collator.m_reorderCodes_.length];
+ for(int i = 0; i < m_scriptOrder_.length; i++){
+ m_scriptOrder_[i] = collator.m_reorderCodes_[i];
+ }
+ }
+
+ }
+
+ // package private data members --------------------------------------
+
+ int m_variableTopValue_;
+ boolean m_isFrenchCollation_;
+ /**
+ * Attribute for handling variable elements
+ */
+ boolean m_isAlternateHandlingShifted_;
+ /**
+ * who goes first, lower case or uppercase
+ */
+ int m_caseFirst_;
+ /**
+ * do we have an extra case level
+ */
+ boolean m_isCaseLevel_;
+ /**
+ * attribute for normalization
+ */
+ int m_decomposition_;
+ /**
+ * attribute for strength
+ */
+ int m_strength_;
+ /**
+ * attribute for special Hiragana
+ */
+ boolean m_isHiragana4_;
+
+ /**
+ * the ordering of the scripts
+ */
+ int[] m_scriptOrder_;
+ }
+
+ /**
+ * List of tokens used by the collation rules
+ */
+ static class TokenListHeader
+ {
+ Token m_first_;
+ Token m_last_;
+ Token m_reset_;
+ boolean m_indirect_;
+ int m_baseCE_;
+ int m_baseContCE_;
+ int m_nextCE_;
+ int m_nextContCE_;
+ int m_previousCE_;
+ int m_previousContCE_;
+ int m_pos_[] = new int[Collator.IDENTICAL + 1];
+ int m_gapsLo_[] = new int[3 * (Collator.TERTIARY + 1)];
+ int m_gapsHi_[] = new int[3 * (Collator.TERTIARY + 1)];
+ int m_numStr_[] = new int[3 * (Collator.TERTIARY + 1)];
+ Token m_fStrToken_[] = new Token[Collator.TERTIARY + 1];
+ Token m_lStrToken_[] = new Token[Collator.TERTIARY + 1];
+ }
+
+ /**
+ * Token wrapper for collation rules
+ */
+ static class Token
+ {
+ // package private data members ---------------------------------------
+
+ int m_CE_[];
+ int m_CELength_;
+ int m_expCE_[];
+ int m_expCELength_;
+ int m_source_;
+ int m_expansion_;
+ int m_prefix_;
+ int m_strength_;
+ int m_toInsert_;
+ int m_polarity_; // 1 for <, <<, <<<, , ; and 0 for >, >>, >>>
+ TokenListHeader m_listHeader_;
+ Token m_previous_;
+ Token m_next_;
+ StringBuilder m_rules_;
+ char m_flags_;
+
+ // package private constructors ---------------------------------------
+
+ Token()
+ {
+ m_CE_ = new int[128];
+ m_expCE_ = new int[128];
+ // TODO: this should also handle reverse
+ m_polarity_ = TOKEN_POLARITY_POSITIVE_;
+ m_next_ = null;
+ m_previous_ = null;
+ m_CELength_ = 0;
+ m_expCELength_ = 0;
+ }
+
+ // package private methods --------------------------------------------
+
+ /**
+ * Hashcode calculation for token
+ * @return the hashcode
+ */
+ public int hashCode()
+ {
+ int result = 0;
+ int len = (m_source_ & 0xFF000000) >>> 24;
+ int inc = ((len - 32) / 32) + 1;
+
+ int start = m_source_ & 0x00FFFFFF;
+ int limit = start + len;
+
+ while (start < limit) {
+ result = (result * 37) + m_rules_.charAt(start);
+ start += inc;
+ }
+ return result;
+ }
+
+ /**
+ * Equals calculation
+ * @param target object to compare
+ * @return true if target is the same as this object
+ */
+ public boolean equals(Object target)
+ {
+ if (target == this) {
+ return true;
+ }
+ if (target instanceof Token) {
+ Token t = (Token)target;
+ int sstart = m_source_ & 0x00FFFFFF;
+ int tstart = t.m_source_ & 0x00FFFFFF;
+ int slimit = (m_source_ & 0xFF000000) >> 24;
+ int tlimit = (m_source_ & 0xFF000000) >> 24;
+
+ int end = sstart + slimit - 1;
+
+ if (m_source_ == 0 || t.m_source_ == 0) {
+ return false;
+ }
+ if (slimit != tlimit) {
+ return false;
+ }
+ if (m_source_ == t.m_source_) {
+ return true;
+ }
+
+ while (sstart < end
+ && m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart))
+ {
+ ++ sstart;
+ ++ tstart;
+ }
+ if (m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+
+ // package private data member -------------------------------------------
+
+ /**
+ * Indicator that the token is resetted yet, ie & in the rules
+ */
+ static final int TOKEN_RESET_ = 0xDEADBEEF;
+
+ /**
+ * Size of the number of tokens
+ */
+ int m_resultLength_;
+ /**
+ * List of parsed tokens
+ */
+ TokenListHeader m_listHeader_[];
+ /**
+ * Variable top token
+ */
+ Token m_variableTop_;
+ /**
+ * Collation options
+ */
+ OptionSet m_options_;
+ /**
+ * Normalized collation rules with some extra characters
+ */
+ StringBuilder m_source_;
+ /**
+ * Hash table to keep all tokens
+ */
+ Map m_hashTable_;
+
+ // package private method ------------------------------------------------
+
+ void setDefaultOptionsInCollator(RuleBasedCollator collator)
+ {
+ collator.m_defaultStrength_ = m_options_.m_strength_;
+ collator.m_defaultDecomposition_ = m_options_.m_decomposition_;
+ collator.m_defaultIsFrenchCollation_ = m_options_.m_isFrenchCollation_;
+ collator.m_defaultIsAlternateHandlingShifted_
+ = m_options_.m_isAlternateHandlingShifted_;
+ collator.m_defaultIsCaseLevel_ = m_options_.m_isCaseLevel_;
+ collator.m_defaultCaseFirst_ = m_options_.m_caseFirst_;
+ collator.m_defaultIsHiragana4_ = m_options_.m_isHiragana4_;
+ collator.m_defaultVariableTopValue_ = m_options_.m_variableTopValue_;
+ if(m_options_.m_scriptOrder_ != null) {
+ collator.m_defaultReorderCodes_ = m_options_.m_scriptOrder_.clone();
+ } else {
+ collator.m_defaultReorderCodes_ = null;
+ }
+ }
+
+ // private inner classes -------------------------------------------------
+
+ /**
+ * This is a token that has been parsed but not yet processed. Used to
+ * reduce the number of arguments in the parser
+ */
+ private static class ParsedToken
+ {
+ // private constructor ----------------------------------------------
+
+ /**
+ * Empty constructor
+ */
+ ParsedToken()
+ {
+ m_charsLen_ = 0;
+ m_charsOffset_ = 0;
+ m_extensionLen_ = 0;
+ m_extensionOffset_ = 0;
+ m_prefixLen_ = 0;
+ m_prefixOffset_ = 0;
+ m_flags_ = 0;
+ m_strength_ = TOKEN_UNSET_;
+ }
+
+ // private data members ---------------------------------------------
+
+ int m_strength_;
+ int m_charsOffset_;
+ int m_charsLen_;
+ int m_extensionOffset_;
+ int m_extensionLen_;
+ int m_prefixOffset_;
+ int m_prefixLen_;
+ char m_flags_;
+ char m_indirectIndex_;
+ }
+
+ /**
+ * Boundary wrappers
+ */
+ private static class IndirectBoundaries
+ {
+ // package private constructor ---------------------------------------
+
+ IndirectBoundaries(int startce[], int limitce[])
+ {
+ // Set values for the top - TODO: once we have values for all the
+ // indirects, we are going to initalize here.
+ m_startCE_ = startce[0];
+ m_startContCE_ = startce[1];
+ if (limitce != null) {
+ m_limitCE_ = limitce[0];
+ m_limitContCE_ = limitce[1];
+ }
+ else {
+ m_limitCE_ = 0;
+ m_limitContCE_ = 0;
+ }
+ }
+
+ // package private data members --------------------------------------
+
+ int m_startCE_;
+ int m_startContCE_;
+ int m_limitCE_;
+ int m_limitContCE_;
+ }
+
+ /**
+ * Collation option rule tag
+ */
+ private static class TokenOption
+ {
+ // package private constructor ---------------------------------------
+
+ TokenOption(String name, int attribute, String suboptions[],
+ int suboptionattributevalue[])
+ {
+ m_name_ = name;
+ m_attribute_ = attribute;
+ m_subOptions_ = suboptions;
+ m_subOptionAttributeValues_ = suboptionattributevalue;
+ }
+
+ // package private data member ---------------------------------------
+
+ private String m_name_;
+ private int m_attribute_;
+ private String m_subOptions_[];
+ private int m_subOptionAttributeValues_[];
+ }
+
+ // private variables -----------------------------------------------------
+
+ /**
+ * Current parsed token
+ */
+ private ParsedToken m_parsedToken_;
+ /**
+ * Collation rule
+ */
+ private String m_rules_;
+ private int m_current_;
+ /**
+ * End of the option while reading.
+ * Need it for UnicodeSet reading support.
+ */
+ private int m_optionEnd_;
+ /*
+ * Current offset in m_source
+ */
+ //private int m_sourceLimit_;
+ /**
+ * Offset to m_source_ ofr the extra expansion characters
+ */
+ private int m_extraCurrent_;
+
+ /**
+ * UnicodeSet that contains code points to be copied from the UCA
+ */
+ UnicodeSet m_copySet_;
+
+ /**
+ * UnicodeSet that contains code points for which we want to remove
+ * UCA contractions. It implies copying of these code points from
+ * the UCA.
+ */
+ UnicodeSet m_removeSet_;
+
+ /*
+ * This is space for the extra strings that need to be unquoted during the
+ * parsing of the rules
+ */
+ //private static final int TOKEN_EXTRA_RULE_SPACE_SIZE_ = 2048;
+ /**
+ * Indicator that the token is not set yet
+ */
+ private static final int TOKEN_UNSET_ = 0xFFFFFFFF;
+ /*
+ * Indicator that the rule is in the > polarity, ie everything on the
+ * right of the rule is less than
+ */
+ //private static final int TOKEN_POLARITY_NEGATIVE_ = 0;
+ /**
+ * Indicator that the rule is in the < polarity, ie everything on the
+ * right of the rule is greater than
+ */
+ private static final int TOKEN_POLARITY_POSITIVE_ = 1;
+ /**
+ * Flag mask to determine if top is set
+ */
+ private static final int TOKEN_TOP_MASK_ = 0x04;
+ /**
+ * Flag mask to determine if variable top is set
+ */
+ private static final int TOKEN_VARIABLE_TOP_MASK_ = 0x08;
+ /**
+ * Flag mask to determine if a before attribute is set
+ */
+ private static final int TOKEN_BEFORE_ = 0x03;
+ /**
+ * For use in parsing token options
+ */
+ private static final int TOKEN_SUCCESS_MASK_ = 0x10;
+
+ /**
+ * These values are used for finding CE values for indirect positioning.
+ * Indirect positioning is a mechanism for allowing resets on symbolic
+ * values. It only works for resets and you cannot tailor indirect names.
+ * An indirect name can define either an anchor point or a range. An anchor
+ * point behaves in exactly the same way as a code point in reset would,
+ * except that it cannot be tailored. A range (we currently only know for
+ * the [top] range will explicitly set the upper bound for generated CEs,
+ * thus allowing for better control over how many CEs can be squeezed
+ * between in the range without performance penalty. In that respect, we use
+ * [top] for tailoring of locales that use CJK characters. Other indirect
+ * values are currently a pure convenience, they can be used to assure that
+ * the CEs will be always positioned in the same place relative to a point
+ * with known properties (e.g. first primary ignorable).
+ */
+ private static final IndirectBoundaries INDIRECT_BOUNDARIES_[];
+
+// /**
+// * Inverse UCA constants
+// */
+// private static final int INVERSE_SIZE_MASK_ = 0xFFF00000;
+// private static final int INVERSE_OFFSET_MASK_ = 0x000FFFFF;
+// private static final int INVERSE_SHIFT_VALUE_ = 20;
+
+ /**
+ * Collation option tags
+ * [last variable] last variable value
+ * [last primary ignorable] largest CE for primary ignorable
+ * [last secondary ignorable] largest CE for secondary ignorable
+ * [last tertiary ignorable] largest CE for tertiary ignorable
+ * [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
+ */
+ private static final TokenOption RULES_OPTIONS_[];
+
+ static
+ {
+ INDIRECT_BOUNDARIES_ = new IndirectBoundaries[15];
+ // UCOL_RESET_TOP_VALUE
+ INDIRECT_BOUNDARIES_[0] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_,
+ RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_);
+ // UCOL_FIRST_PRIMARY_IGNORABLE
+ INDIRECT_BOUNDARIES_[1] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.FIRST_PRIMARY_IGNORABLE_,
+ null);
+ // UCOL_LAST_PRIMARY_IGNORABLE
+ INDIRECT_BOUNDARIES_[2] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.LAST_PRIMARY_IGNORABLE_,
+ null);
+
+ // UCOL_FIRST_SECONDARY_IGNORABLE
+ INDIRECT_BOUNDARIES_[3] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.FIRST_SECONDARY_IGNORABLE_,
+ null);
+ // UCOL_LAST_SECONDARY_IGNORABLE
+ INDIRECT_BOUNDARIES_[4] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.LAST_SECONDARY_IGNORABLE_,
+ null);
+ // UCOL_FIRST_TERTIARY_IGNORABLE
+ INDIRECT_BOUNDARIES_[5] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.FIRST_TERTIARY_IGNORABLE_,
+ null);
+ // UCOL_LAST_TERTIARY_IGNORABLE
+ INDIRECT_BOUNDARIES_[6] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.LAST_TERTIARY_IGNORABLE_,
+ null);
+ // UCOL_FIRST_VARIABLE;
+ INDIRECT_BOUNDARIES_[7] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.FIRST_VARIABLE_,
+ null);
+ // UCOL_LAST_VARIABLE
+ INDIRECT_BOUNDARIES_[8] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.LAST_VARIABLE_,
+ null);
+ // UCOL_FIRST_NON_VARIABLE
+ INDIRECT_BOUNDARIES_[9] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.FIRST_NON_VARIABLE_,
+ null);
+ // UCOL_LAST_NON_VARIABLE
+ INDIRECT_BOUNDARIES_[10] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_,
+ RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_);
+ // UCOL_FIRST_IMPLICIT
+ INDIRECT_BOUNDARIES_[11] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_,
+ null);
+ // UCOL_LAST_IMPLICIT
+ INDIRECT_BOUNDARIES_[12] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.LAST_IMPLICIT_,
+ RuleBasedCollator.UCA_CONSTANTS_.FIRST_TRAILING_);
+ // UCOL_FIRST_TRAILING
+ INDIRECT_BOUNDARIES_[13] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.FIRST_TRAILING_,
+ null);
+ // UCOL_LAST_TRAILING
+ INDIRECT_BOUNDARIES_[14] = new IndirectBoundaries(
+ RuleBasedCollator.UCA_CONSTANTS_.LAST_TRAILING_,
+ null);
+ INDIRECT_BOUNDARIES_[14].m_limitCE_
+ = RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_SPECIAL_MIN_ << 24;
+
+ RULES_OPTIONS_ = new TokenOption[20];
+ String option[] = {"non-ignorable", "shifted"};
+ int value[] = {RuleBasedCollator.AttributeValue.NON_IGNORABLE_,
+ RuleBasedCollator.AttributeValue.SHIFTED_};
+ RULES_OPTIONS_[0] = new TokenOption("alternate",
+ RuleBasedCollator.Attribute.ALTERNATE_HANDLING_,
+ option, value);
+ option = new String[1];
+ option[0] = "2";
+ value = new int[1];
+ value[0] = RuleBasedCollator.AttributeValue.ON_;
+ RULES_OPTIONS_[1] = new TokenOption("backwards",
+ RuleBasedCollator.Attribute.FRENCH_COLLATION_,
+ option, value);
+ String offonoption[] = new String[2];
+ offonoption[0] = "off";
+ offonoption[1] = "on";
+ int offonvalue[] = new int[2];
+ offonvalue[0] = RuleBasedCollator.AttributeValue.OFF_;
+ offonvalue[1] = RuleBasedCollator.AttributeValue.ON_;
+ RULES_OPTIONS_[2] = new TokenOption("caseLevel",
+ RuleBasedCollator.Attribute.CASE_LEVEL_,
+ offonoption, offonvalue);
+ option = new String[3];
+ option[0] = "lower";
+ option[1] = "upper";
+ option[2] = "off";
+ value = new int[3];
+ value[0] = RuleBasedCollator.AttributeValue.LOWER_FIRST_;
+ value[1] = RuleBasedCollator.AttributeValue.UPPER_FIRST_;
+ value[2] = RuleBasedCollator.AttributeValue.OFF_;
+ RULES_OPTIONS_[3] = new TokenOption("caseFirst",
+ RuleBasedCollator.Attribute.CASE_FIRST_,
+ option, value);
+ RULES_OPTIONS_[4] = new TokenOption("normalization",
+ RuleBasedCollator.Attribute.NORMALIZATION_MODE_,
+ offonoption, offonvalue);
+ RULES_OPTIONS_[5] = new TokenOption("hiraganaQ",
+ RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_,
+ offonoption, offonvalue);
+ option = new String[5];
+ option[0] = "1";
+ option[1] = "2";
+ option[2] = "3";
+ option[3] = "4";
+ option[4] = "I";
+ value = new int[5];
+ value[0] = RuleBasedCollator.AttributeValue.PRIMARY_;
+ value[1] = RuleBasedCollator.AttributeValue.SECONDARY_;
+ value[2] = RuleBasedCollator.AttributeValue.TERTIARY_;
+ value[3] = RuleBasedCollator.AttributeValue.QUATERNARY_;
+ value[4] = RuleBasedCollator.AttributeValue.IDENTICAL_;
+ RULES_OPTIONS_[6] = new TokenOption("strength",
+ RuleBasedCollator.Attribute.STRENGTH_,
+ option, value);
+ RULES_OPTIONS_[7] = new TokenOption("variable top",
+ RuleBasedCollator.Attribute.LIMIT_,
+ null, null);
+ RULES_OPTIONS_[8] = new TokenOption("rearrange",
+ RuleBasedCollator.Attribute.LIMIT_,
+ null, null);
+ option = new String[3];
+ option[0] = "1";
+ option[1] = "2";
+ option[2] = "3";
+ value = new int[3];
+ value[0] = RuleBasedCollator.AttributeValue.PRIMARY_;
+ value[1] = RuleBasedCollator.AttributeValue.SECONDARY_;
+ value[2] = RuleBasedCollator.AttributeValue.TERTIARY_;
+ RULES_OPTIONS_[9] = new TokenOption("before",
+ RuleBasedCollator.Attribute.LIMIT_,
+ option, value);
+ RULES_OPTIONS_[10] = new TokenOption("top",
+ RuleBasedCollator.Attribute.LIMIT_,
+ null, null);
+ String firstlastoption[] = new String[7];
+ firstlastoption[0] = "primary";
+ firstlastoption[1] = "secondary";
+ firstlastoption[2] = "tertiary";
+ firstlastoption[3] = "variable";
+ firstlastoption[4] = "regular";
+ firstlastoption[5] = "implicit";
+ firstlastoption[6] = "trailing";
+
+ int firstlastvalue[] = new int[7];
+ Arrays.fill(firstlastvalue, RuleBasedCollator.AttributeValue.PRIMARY_);
+
+ RULES_OPTIONS_[11] = new TokenOption("first",
+ RuleBasedCollator.Attribute.LIMIT_,
+ firstlastoption, firstlastvalue);
+ RULES_OPTIONS_[12] = new TokenOption("last",
+ RuleBasedCollator.Attribute.LIMIT_,
+ firstlastoption, firstlastvalue);
+ RULES_OPTIONS_[13] = new TokenOption("optimize",
+ RuleBasedCollator.Attribute.LIMIT_,
+ null, null);
+ RULES_OPTIONS_[14] = new TokenOption("suppressContractions",
+ RuleBasedCollator.Attribute.LIMIT_,
+ null, null);
+ RULES_OPTIONS_[15] = new TokenOption("undefined",
+ RuleBasedCollator.Attribute.LIMIT_,
+ null, null);
+ RULES_OPTIONS_[16] = new TokenOption("reorder",
+ RuleBasedCollator.Attribute.LIMIT_,
+ null, null);
+ RULES_OPTIONS_[17] = new TokenOption("charsetname",
+ RuleBasedCollator.Attribute.LIMIT_,
+ null, null);
+ RULES_OPTIONS_[18] = new TokenOption("charset",
+ RuleBasedCollator.Attribute.LIMIT_,
+ null, null);
+ RULES_OPTIONS_[19] = new TokenOption("import",
+ RuleBasedCollator.Attribute.LIMIT_,
+ null, null);
+ }
+
+ /**
+ * Utility data members
+ */
+ private Token m_utilToken_ = new Token();
+ private CollationElementIterator m_UCAColEIter_
+ = RuleBasedCollator.UCA_.getCollationElementIterator("");
+ private int m_utilCEBuffer_[] = new int[2];
+
+ private boolean m_isStarred_;
+
+ private int m_currentStarredCharIndex_;
+
+
+ private int m_lastStarredCharIndex_;
+
+ private int m_currentRangeCp_;
+
+ private int m_lastRangeCp_;
+
+ private boolean m_inRange_;
+
+ private int m_previousCp_;
+
+ private boolean m_savedIsStarred_;
+
+
+ // private methods -------------------------------------------------------
+
+ /**
+ * Assembles the token list
+ * @exception ParseException thrown when rules syntax fails
+ */
+ int assembleTokenList() throws ParseException
+ {
+ Token lastToken = null;
+ m_parsedToken_.m_strength_ = TOKEN_UNSET_;
+ int sourcelimit = m_source_.length();
+ int expandNext = 0;
+
+ m_isStarred_ = false;
+
+ while (m_current_ < sourcelimit || m_isStarred_) {
+ m_parsedToken_.m_prefixOffset_ = 0;
+ if (parseNextToken(lastToken == null) < 0) {
+ // we have reached the end
+ continue;
+ }
+ char specs = m_parsedToken_.m_flags_;
+ boolean variableTop = ((specs & TOKEN_VARIABLE_TOP_MASK_) != 0);
+ boolean top = ((specs & TOKEN_TOP_MASK_) != 0);
+ int lastStrength = TOKEN_UNSET_;
+ if (lastToken != null) {
+ lastStrength = lastToken.m_strength_;
+ }
+ m_utilToken_.m_source_ = m_parsedToken_.m_charsLen_ << 24
+ | m_parsedToken_.m_charsOffset_;
+ m_utilToken_.m_rules_ = m_source_;
+ // 4 Lookup each source in the CharsToToken map, and find a
+ // sourcetoken
+ Token sourceToken = m_hashTable_.get(m_utilToken_);
+ if (m_parsedToken_.m_strength_ != TOKEN_RESET_) {
+ if (lastToken == null) {
+ // this means that rules haven't started properly
+ throwParseException(m_source_.toString(), 0);
+ }
+ // 6 Otherwise (when relation != reset)
+ if (sourceToken == null) {
+ // If sourceToken is null, create new one
+ sourceToken = new Token();
+ sourceToken.m_rules_ = m_source_;
+ sourceToken.m_source_ = m_parsedToken_.m_charsLen_ << 24
+ | m_parsedToken_.m_charsOffset_;
+ sourceToken.m_prefix_ = m_parsedToken_.m_prefixLen_ << 24
+ | m_parsedToken_.m_prefixOffset_;
+ // TODO: this should also handle reverse
+ sourceToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_;
+ sourceToken.m_next_ = null;
+ sourceToken.m_previous_ = null;
+ sourceToken.m_CELength_ = 0;
+ sourceToken.m_expCELength_ = 0;
+ m_hashTable_.put(sourceToken, sourceToken);
+ }
+ else {
+ // we could have fished out a reset here
+ if (sourceToken.m_strength_ != TOKEN_RESET_
+ && lastToken != sourceToken) {
+ // otherwise remove sourceToken from where it was.
+
+ // Take care of the next node
+ if (sourceToken.m_next_ != null) {
+ if (sourceToken.m_next_.m_strength_
+ > sourceToken.m_strength_) {
+ sourceToken.m_next_.m_strength_
+ = sourceToken.m_strength_;
+ }
+ sourceToken.m_next_.m_previous_
+ = sourceToken.m_previous_;
+ }
+ else {
+ // sourcetoken is the last token.
+ // Redefine the tail token.
+ sourceToken.m_listHeader_.m_last_
+ = sourceToken.m_previous_;
+ }
+
+ // Take care of the previous node.
+ if (sourceToken.m_previous_ != null) {
+ sourceToken.m_previous_.m_next_
+ = sourceToken.m_next_;
+ }
+ else {
+ // sourcetoken is the first token.
+ // Redefine the head node.
+ sourceToken.m_listHeader_.m_first_
+ = sourceToken.m_next_;
+ }
+ sourceToken.m_next_ = null;
+ sourceToken.m_previous_ = null;
+ }
+ }
+ sourceToken.m_strength_ = m_parsedToken_.m_strength_;
+ sourceToken.m_listHeader_ = lastToken.m_listHeader_;
+
+ // 1. Find the strongest strength in each list, and set
+ // strongestP and strongestN accordingly in the headers.
+ if (lastStrength == TOKEN_RESET_
+ || sourceToken.m_listHeader_.m_first_ == null) {
+ // If LAST is a reset insert sourceToken in the list.
+ if (sourceToken.m_listHeader_.m_first_ == null) {
+ sourceToken.m_listHeader_.m_first_ = sourceToken;
+ sourceToken.m_listHeader_.m_last_ = sourceToken;
+ }
+ else { // we need to find a place for us
+ // and we'll get in front of the same strength
+ if (sourceToken.m_listHeader_.m_first_.m_strength_
+ <= sourceToken.m_strength_) {
+ sourceToken.m_next_
+ = sourceToken.m_listHeader_.m_first_;
+ sourceToken.m_next_.m_previous_ = sourceToken;
+ sourceToken.m_listHeader_.m_first_ = sourceToken;
+ sourceToken.m_previous_ = null;
+ }
+ else {
+ lastToken = sourceToken.m_listHeader_.m_first_;
+ while (lastToken.m_next_ != null
+ && lastToken.m_next_.m_strength_
+ > sourceToken.m_strength_) {
+ lastToken = lastToken.m_next_;
+ }
+ if (lastToken.m_next_ != null) {
+ lastToken.m_next_.m_previous_ = sourceToken;
+ }
+ else {
+ sourceToken.m_listHeader_.m_last_
+ = sourceToken;
+ }
+ sourceToken.m_previous_ = lastToken;
+ sourceToken.m_next_ = lastToken.m_next_;
+ lastToken.m_next_ = sourceToken;
+ }
+ }
+ }
+ else {
+ // Otherwise (when LAST is not a reset)
+ // if polarity (LAST) == polarity(relation), insert
+ // sourceToken after LAST, otherwise insert before.
+ // when inserting after or before, search to the next
+ // position with the same strength in that direction.
+ // (This is called postpone insertion).
+ if (sourceToken != lastToken) {
+ if (lastToken.m_polarity_ == sourceToken.m_polarity_) {
+ while (lastToken.m_next_ != null
+ && lastToken.m_next_.m_strength_
+ > sourceToken.m_strength_) {
+ lastToken = lastToken.m_next_;
+ }
+ sourceToken.m_previous_ = lastToken;
+ if (lastToken.m_next_ != null) {
+ lastToken.m_next_.m_previous_ = sourceToken;
+ }
+ else {
+ sourceToken.m_listHeader_.m_last_ = sourceToken;
+ }
+ sourceToken.m_next_ = lastToken.m_next_;
+ lastToken.m_next_ = sourceToken;
+ }
+ else {
+ while (lastToken.m_previous_ != null
+ && lastToken.m_previous_.m_strength_
+ > sourceToken.m_strength_) {
+ lastToken = lastToken.m_previous_;
+ }
+ sourceToken.m_next_ = lastToken;
+ if (lastToken.m_previous_ != null) {
+ lastToken.m_previous_.m_next_ = sourceToken;
+ }
+ else {
+ sourceToken.m_listHeader_.m_first_
+ = sourceToken;
+ }
+ sourceToken.m_previous_ = lastToken.m_previous_;
+ lastToken.m_previous_ = sourceToken;
+ }
+ }
+ else { // repeated one thing twice in rules, stay with the
+ // stronger strength
+ if (lastStrength < sourceToken.m_strength_) {
+ sourceToken.m_strength_ = lastStrength;
+ }
+ }
+ }
+ // if the token was a variable top, we're gonna put it in
+ if (variableTop == true && m_variableTop_ == null) {
+ variableTop = false;
+ m_variableTop_ = sourceToken;
+ }
+ // Treat the expansions.
+ // There are two types of expansions: explicit (x / y) and
+ // reset based propagating expansions
+ // (&abc * d * e <=> &ab * d / c * e / c)
+ // if both of them are in effect for a token, they are combined.
+ sourceToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24
+ | m_parsedToken_.m_extensionOffset_;
+ if (expandNext != 0) {
+ if (sourceToken.m_strength_ == RuleBasedCollator.PRIMARY) {
+ // primary strength kills off the implicit expansion
+ expandNext = 0;
+ }
+ else if (sourceToken.m_expansion_ == 0) {
+ // if there is no expansion, implicit is just added to
+ // the token
+ sourceToken.m_expansion_ = expandNext;
+ }
+ else {
+ // there is both explicit and implicit expansion.
+ // We need to make a combination
+ int start = expandNext & 0xFFFFFF;
+ int size = expandNext >>> 24;
+ if (size > 0) {
+ m_source_.append(m_source_.substring(start,
+ start + size));
+ }
+ start = m_parsedToken_.m_extensionOffset_;
+ m_source_.append(m_source_.substring(start,
+ start + m_parsedToken_.m_extensionLen_));
+ sourceToken.m_expansion_ = (size
+ + m_parsedToken_.m_extensionLen_) << 24
+ | m_extraCurrent_;
+ m_extraCurrent_ += size + m_parsedToken_.m_extensionLen_;
+ }
+ }
+ // if the previous token was a reset before, the strength of this
+ // token must match the strength of before. Otherwise we have an
+ // undefined situation.
+ // In other words, we currently have a cludge which we use to
+ // represent &a >> x. This is written as &[before 2]a << x.
+ if((lastToken.m_flags_ & TOKEN_BEFORE_) != 0) {
+ int beforeStrength = (lastToken.m_flags_ & TOKEN_BEFORE_) - 1;
+ if(beforeStrength != sourceToken.m_strength_) {
+ throwParseException(m_source_.toString(), m_current_);
+ }
+ }
+
+ }
+ else {
+ if (lastToken != null && lastStrength == TOKEN_RESET_) {
+ // if the previous token was also a reset, this means that
+ // we have two consecutive resets and we want to remove the
+ // previous one if empty
+ if (m_resultLength_ > 0 && m_listHeader_[m_resultLength_ - 1].m_first_ == null) {
+ m_resultLength_ --;
+ }
+ }
+ if (sourceToken == null) {
+ // this is a reset, but it might still be somewhere in the
+ // tailoring, in shorter form
+ int searchCharsLen = m_parsedToken_.m_charsLen_;
+ while (searchCharsLen > 1 && sourceToken == null) {
+ searchCharsLen --;
+ // key = searchCharsLen << 24 | charsOffset;
+ m_utilToken_.m_source_ = searchCharsLen << 24
+ | m_parsedToken_.m_charsOffset_;
+ m_utilToken_.m_rules_ = m_source_;
+ sourceToken = m_hashTable_.get(m_utilToken_);
+ }
+ if (sourceToken != null) {
+ expandNext = (m_parsedToken_.m_charsLen_
+ - searchCharsLen) << 24
+ | (m_parsedToken_.m_charsOffset_
+ + searchCharsLen);
+ }
+ }
+ if ((specs & TOKEN_BEFORE_) != 0) {
+ if (top == false) {
+ // we're doing before & there is no indirection
+ int strength = (specs & TOKEN_BEFORE_) - 1;
+ if (sourceToken != null
+ && sourceToken.m_strength_ != TOKEN_RESET_) {
+ // this is a before that is already ordered in the UCA
+ // - so we need to get the previous with good strength
+ while (sourceToken.m_strength_ > strength
+ && sourceToken.m_previous_ != null) {
+ sourceToken = sourceToken.m_previous_;
+ }
+ // here, either we hit the strength or NULL
+ if (sourceToken.m_strength_ == strength) {
+ if (sourceToken.m_previous_ != null) {
+ sourceToken = sourceToken.m_previous_;
+ }
+ else { // start of list
+ sourceToken
+ = sourceToken.m_listHeader_.m_reset_;
+ }
+ }
+ else { // we hit NULL, we should be doing the else part
+ sourceToken
+ = sourceToken.m_listHeader_.m_reset_;
+ sourceToken = getVirginBefore(sourceToken,
+ strength);
+ }
+ }
+ else {
+ sourceToken
+ = getVirginBefore(sourceToken, strength);
+ }
+ }
+ else {
+ // this is both before and indirection
+ top = false;
+ m_listHeader_[m_resultLength_] = new TokenListHeader();
+ m_listHeader_[m_resultLength_].m_previousCE_ = 0;
+ m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
+ m_listHeader_[m_resultLength_].m_indirect_ = true;
+ // we need to do slightly more work. we need to get the
+ // baseCE using the inverse UCA & getPrevious. The next
+ // bound is not set, and will be decided in ucol_bld
+ int strength = (specs & TOKEN_BEFORE_) - 1;
+ int baseCE = INDIRECT_BOUNDARIES_[
+ m_parsedToken_.m_indirectIndex_].m_startCE_;
+ int baseContCE = INDIRECT_BOUNDARIES_[
+ m_parsedToken_.m_indirectIndex_].m_startContCE_;
+ int ce[] = new int[2];
+ if((baseCE >>> 24 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_)
+ && (baseCE >>> 24 <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_)) { /* implicits - */
+ int primary = baseCE & RuleBasedCollator.CE_PRIMARY_MASK_ | (baseContCE & RuleBasedCollator.CE_PRIMARY_MASK_) >> 16;
+ int raw = RuleBasedCollator.impCEGen_.getRawFromImplicit(primary);
+ int primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(raw-1);
+ ce[0] = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505;
+ ce[1] = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_;
+ } else {
+ CollationParsedRuleBuilder.InverseUCA invuca
+ = CollationParsedRuleBuilder.INVERSE_UCA_;
+ invuca.getInversePrevCE(baseCE, baseContCE, strength,
+ ce);
+ }
+ m_listHeader_[m_resultLength_].m_baseCE_ = ce[0];
+ m_listHeader_[m_resultLength_].m_baseContCE_ = ce[1];
+ m_listHeader_[m_resultLength_].m_nextCE_ = 0;
+ m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
+
+ sourceToken = new Token();
+ expandNext = initAReset(0, sourceToken);
+ }
+ }
+ // 5 If the relation is a reset:
+ // If sourceToken is null
+ // Create new list, create new sourceToken, make the baseCE
+ // from source, put the sourceToken in ListHeader of the new
+ // list
+ if (sourceToken == null) {
+ if (m_listHeader_[m_resultLength_] == null) {
+ m_listHeader_[m_resultLength_] = new TokenListHeader();
+ }
+ // 3 Consider each item: relation, source, and expansion:
+ // e.g. ...< x / y ...
+ // First convert all expansions into normal form.
+ // Examples:
+ // If "xy" doesn't occur earlier in the list or in the UCA,
+ // convert &xy * c * d * ... into &x * c/y * d * ...
+ // Note: reset values can never have expansions, although
+ // they can cause the very next item to have one. They may
+ // be contractions, if they are found earlier in the list.
+ if (top == false) {
+ CollationElementIterator coleiter
+ = RuleBasedCollator.UCA_.getCollationElementIterator(
+ m_source_.substring(m_parsedToken_.m_charsOffset_,
+ m_parsedToken_.m_charsOffset_
+ + m_parsedToken_.m_charsLen_));
+
+ int CE = coleiter.next();
+ // offset to the character in the full rule string
+ int expand = coleiter.getOffset()
+ + m_parsedToken_.m_charsOffset_;
+ int SecondCE = coleiter.next();
+
+ m_listHeader_[m_resultLength_].m_baseCE_
+ = CE & 0xFFFFFF3F;
+ if (RuleBasedCollator.isContinuation(SecondCE)) {
+ m_listHeader_[m_resultLength_].m_baseContCE_
+ = SecondCE;
+ }
+ else {
+ m_listHeader_[m_resultLength_].m_baseContCE_ = 0;
+ }
+ m_listHeader_[m_resultLength_].m_nextCE_ = 0;
+ m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
+ m_listHeader_[m_resultLength_].m_previousCE_ = 0;
+ m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
+ m_listHeader_[m_resultLength_].m_indirect_ = false;
+ sourceToken = new Token();
+ expandNext = initAReset(expand, sourceToken);
+ }
+ else { // top == TRUE
+ top = false;
+ m_listHeader_[m_resultLength_].m_previousCE_ = 0;
+ m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
+ m_listHeader_[m_resultLength_].m_indirect_ = true;
+ IndirectBoundaries ib = INDIRECT_BOUNDARIES_[
+ m_parsedToken_.m_indirectIndex_];
+ m_listHeader_[m_resultLength_].m_baseCE_
+ = ib.m_startCE_;
+ m_listHeader_[m_resultLength_].m_baseContCE_
+ = ib.m_startContCE_;
+ m_listHeader_[m_resultLength_].m_nextCE_
+ = ib.m_limitCE_;
+ m_listHeader_[m_resultLength_].m_nextContCE_
+ = ib.m_limitContCE_;
+ sourceToken = new Token();
+ expandNext = initAReset(0, sourceToken);
+ }
+ }
+ else { // reset to something already in rules
+ top = false;
+ }
+ }
+ // 7 After all this, set LAST to point to sourceToken, and goto
+ // step 3.
+ lastToken = sourceToken;
+ }
+
+ if (m_resultLength_ > 0
+ && m_listHeader_[m_resultLength_ - 1].m_first_ == null) {
+ m_resultLength_ --;
+ }
+ return m_resultLength_;
+ }
+
+ /**
+ * Formats and throws a ParseException
+ * @param rules collation rule that failed
+ * @param offset failed offset in rules
+ * @throws ParseException with failure information
+ */
+ private static final void throwParseException(String rules, int offset)
+ throws ParseException
+ {
+ // for pre-context
+ String precontext = rules.substring(0, offset);
+ String postcontext = rules.substring(offset, rules.length());
+ StringBuilder error = new StringBuilder(
+ "Parse error occurred in rule at offset ");
+ error.append(offset);
+ error.append("\n after the prefix \"");
+ error.append(precontext);
+ error.append("\" before the suffix \"");
+ error.append(postcontext);
+ throw new ParseException(error.toString(), offset);
+ }
+
+ private final boolean doSetTop() {
+ m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
+ m_source_.append((char)0xFFFE);
+ IndirectBoundaries ib =
+ INDIRECT_BOUNDARIES_[m_parsedToken_.m_indirectIndex_];
+ m_source_.append((char)(ib.m_startCE_ >> 16));
+ m_source_.append((char)(ib.m_startCE_ & 0xFFFF));
+ m_extraCurrent_ += 3;
+ if (INDIRECT_BOUNDARIES_[m_parsedToken_.m_indirectIndex_
+ ].m_startContCE_ == 0) {
+ m_parsedToken_.m_charsLen_ = 3;
+ }
+ else {
+ m_source_.append((char)(INDIRECT_BOUNDARIES_[
+ m_parsedToken_.m_indirectIndex_
+ ].m_startContCE_ >> 16));
+ m_source_.append((char)(INDIRECT_BOUNDARIES_[
+ m_parsedToken_.m_indirectIndex_
+ ].m_startContCE_ & 0xFFFF));
+ m_extraCurrent_ += 2;
+ m_parsedToken_.m_charsLen_ = 5;
+ }
+ return true;
+ }
+
+ private static boolean isCharNewLine(char c) {
+ switch (c) {
+ case 0x000A: /* LF */
+ case 0x000D: /* CR */
+ case 0x000C: /* FF */
+ case 0x0085: /* NEL */
+ case 0x2028: /* LS */
+ case 0x2029: /* PS */
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /**
+ * Parses the next token.
+ *
+ * It updates/accesses the following member variables:
+ * m_current_: Index to the next unparsed character (not code point)
+ * in the character array (a StringBuilder object) m_source_.
+ * m_parsedToken_: The parsed token. The following of the token are updated.
+ * .m_strength: The strength of the token.
+ * .m_charsOffset, m_charsLen_: Index to the first character (after operators),
+ * and number of characters in the token.
+ * This may be in the main string, or in the appended string.
+ * .m_extensionOffset_, .m_extensionLen_:
+ * .m_flags:
+ * .m_prefixOffset, .m_prefixLen: Used when "|" is used to specify "context before".
+ * .m_indirectIndex:
+ * @param startofrules
+ * flag indicating if we are at the start of rules
+ * @return the offset of the next unparsed char
+ * @exception ParseException
+ * thrown when rule parsing fails
+ */
+ private int parseNextToken(boolean startofrules) throws ParseException
+ {
+
+ if (m_inRange_) {
+ // We are not done processing a range. Continue it.
+ return processNextCodePointInRange();
+ } else if (m_isStarred_) {
+ // We are not done processing a starred token. Continue it.
+ return processNextTokenInTheStarredList();
+ }
+
+ // Get the next token.
+ int nextOffset = parseNextTokenInternal(startofrules);
+
+ // If the next token is starred and/or in range, we need to handle it here.
+ if (m_inRange_) {
+ // A new range has started.
+ // Check whether it is a chain of ranges with more than one hyphen.
+ if (m_lastRangeCp_ > 0 && m_lastRangeCp_ == m_previousCp_) {
+ throw new ParseException("Chained range syntax", m_current_);
+ }
+
+ // The current token is the first character of the second code point of the range.
+ // Process just that, and then proceed with the star.
+ m_lastRangeCp_ = m_source_.codePointAt(this.m_parsedToken_.m_charsOffset_);
+ if (m_lastRangeCp_ <= m_previousCp_) {
+ throw new ParseException("Invalid range", m_current_);
+ }
+
+ // Set current range code point to process the range loop
+ m_currentRangeCp_ = m_previousCp_ + 1;
+
+ // Set current starred char index to continue processing the starred
+ // expression after the range is done.
+ m_currentStarredCharIndex_ = m_parsedToken_.m_charsOffset_
+ + Character.charCount(m_lastRangeCp_);
+ m_lastStarredCharIndex_ = m_parsedToken_.m_charsOffset_ + m_parsedToken_.m_charsLen_ - 1;
+
+ return processNextCodePointInRange();
+ } else if (m_isStarred_) {
+ // We define two indices m_currentStarredCharIndex_ and m_lastStarredCharIndex_ so that
+ // [m_currentStarredCharIndex_ .. m_lastStarredCharIndex_], both inclusive, need to be
+ // separated into several tokens and returned.
+ m_currentStarredCharIndex_ = m_parsedToken_.m_charsOffset_;
+ m_lastStarredCharIndex_ = m_parsedToken_.m_charsOffset_ + m_parsedToken_.m_charsLen_ - 1;
+
+ return processNextTokenInTheStarredList();
+ }
+ return nextOffset;
+ }
+
+ private int processNextCodePointInRange() throws ParseException {
+ int nChars = Character.charCount(m_currentRangeCp_);
+ m_source_.appendCodePoint(m_currentRangeCp_);
+
+ m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
+ m_parsedToken_.m_charsLen_ = nChars;
+
+ m_extraCurrent_ += nChars;
+ ++m_currentRangeCp_;
+ if (m_currentRangeCp_ > m_lastRangeCp_) {
+ // All the code points in the range are processed.
+ // Turn the range flag off.
+ m_inRange_ = false;
+
+ // If there is a starred portion remaining in the current
+ // parsed token, resume the starred operation.
+ if (m_currentStarredCharIndex_ <= m_lastStarredCharIndex_) {
+ m_isStarred_ = true;
+ } else {
+ m_isStarred_ = false;
+ }
+ } else {
+ m_previousCp_ = m_currentRangeCp_;
+ }
+ return m_current_;
+ }
+
+
+ /**
+ * Extracts the next token from the starred token from
+ * m_currentStarredCharIndex_ and returns it.
+ * @return the offset of the next unparsed char
+ * @throws ParseException
+ */
+ private int processNextTokenInTheStarredList() throws ParseException {
+ // Extract the characters corresponding to the next code point.
+ int cp = m_source_.codePointAt(m_currentStarredCharIndex_);
+ int nChars = Character.charCount(cp);
+
+ m_parsedToken_.m_charsLen_ = nChars;
+ m_parsedToken_.m_charsOffset_ = m_currentStarredCharIndex_;
+ m_currentStarredCharIndex_ += nChars;
+
+ // When we are done parsing the starred string, turn the flag off so that
+ // the normal processing is restored.
+ if (m_currentStarredCharIndex_ > m_lastStarredCharIndex_) {
+ m_isStarred_ = false;
+ }
+ m_previousCp_ = cp;
+ return m_current_;
+ }
+
+ private int resetToTop(boolean top, boolean variableTop,
+ int extensionOffset, int newExtensionLen,
+ byte byteBefore) throws ParseException {
+ m_parsedToken_.m_indirectIndex_ = 5;
+ top = doSetTop();
+ return doEndParseNextToken(TOKEN_RESET_,
+ top,
+ extensionOffset,
+ newExtensionLen,
+ variableTop, byteBefore);
+ }
+
+ /**
+ * Gets the next token and sets the necessary internal variables.
+ * This function parses a starred string as a single token, which will be separated
+ * in the calling function.
+ * @param startofrules Boolean value indicating whether this is the first rule
+ * @return the offset of the next unparsed char
+ * @throws ParseException
+ */
+ @SuppressWarnings("fallthrough")
+ private int parseNextTokenInternal(boolean startofrules) throws ParseException {
+ boolean variabletop = false;
+ boolean top = false;
+ boolean inchars = true;
+ boolean inquote = false;
+ boolean wasinquote = false;
+ byte before = 0;
+ boolean isescaped = false;
+ int /*newcharslen = 0,*/ newextensionlen = 0;
+ int /*charsoffset = 0,*/ extensionoffset = 0;
+ int newstrength = TOKEN_UNSET_;
+
+ initializeParsedToken();
+
+ int limit = m_rules_.length();
+ while (m_current_ < limit) {
+ char ch = m_source_.charAt(m_current_);
+ if (inquote) {
+ if (ch == 0x0027) { // '\''
+ inquote = false;
+ }
+ else {
+ if ((m_parsedToken_.m_charsLen_ == 0) || inchars) {
+ if (m_parsedToken_.m_charsLen_ == 0) {
+ m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
+ }
+ m_parsedToken_.m_charsLen_ ++;
+ }
+ else {
+ if (newextensionlen == 0) {
+ extensionoffset = m_extraCurrent_;
+ }
+ newextensionlen ++;
+ }
+ }
+ }
+ else if (isescaped) {
+ isescaped = false;
+ if (newstrength == TOKEN_UNSET_) {
+ throwParseException(m_rules_, m_current_);
+ }
+ if (ch != 0 && m_current_ != limit) {
+ if (inchars) {
+ if (m_parsedToken_.m_charsLen_ == 0) {
+ m_parsedToken_.m_charsOffset_ = m_current_;
+ }
+ m_parsedToken_.m_charsLen_ ++;
+ }
+ else {
+ if (newextensionlen == 0) {
+ extensionoffset = m_current_;
+ }
+ newextensionlen ++;
+ }
+ }
+ }
+ else {
+ if (!PatternProps.isWhiteSpace(ch)) {
+ // Sets the strength for this entry
+ switch (ch) {
+ case 0x003D : // '='
+ if (newstrength != TOKEN_UNSET_) {
+ return doEndParseNextToken(newstrength,
+ top,
+ extensionoffset,
+ newextensionlen,
+ variabletop, before);
+ }
+ // if we start with strength, we'll reset to top
+ if (startofrules == true) {
+ return resetToTop(top, variabletop, extensionoffset,
+ newextensionlen, before);
+ }
+ newstrength = Collator.IDENTICAL;
+ if (m_source_.charAt(m_current_ + 1) == 0x002A) { // '*'
+ m_current_++;
+ m_isStarred_ = true;
+ }
+ break;
+ case 0x002C : // ','
+ if (newstrength != TOKEN_UNSET_) {
+ return doEndParseNextToken(newstrength,
+ top,
+ extensionoffset,
+ newextensionlen,
+ variabletop, before);
+ }
+ // if we start with strength, we'll reset to top
+ if (startofrules == true) {
+ return resetToTop(top, variabletop, extensionoffset,
+ newextensionlen, before);
+ }
+ newstrength = Collator.TERTIARY;
+ break;
+ case 0x003B : // ';'
+ if (newstrength != TOKEN_UNSET_) {
+ return doEndParseNextToken(newstrength,
+ top,
+ extensionoffset,
+ newextensionlen,
+ variabletop, before);
+ }
+ //if we start with strength, we'll reset to top
+ if(startofrules == true) {
+ return resetToTop(top, variabletop, extensionoffset,
+ newextensionlen, before);
+ }
+ newstrength = Collator.SECONDARY;
+ break;
+ case 0x003C : // '<'
+ if (newstrength != TOKEN_UNSET_) {
+ return doEndParseNextToken(newstrength,
+ top,
+ extensionoffset,
+ newextensionlen,
+ variabletop, before);
+ }
+ // if we start with strength, we'll reset to top
+ if (startofrules == true) {
+ return resetToTop(top, variabletop, extensionoffset,
+ newextensionlen, before);
+ }
+ // before this, do a scan to verify whether this is
+ // another strength
+ if (m_source_.charAt(m_current_ + 1) == 0x003C) {
+ m_current_ ++;
+ if (m_source_.charAt(m_current_ + 1) == 0x003C) {
+ m_current_ ++; // three in a row!
+ newstrength = Collator.TERTIARY;
+ }
+ else { // two in a row
+ newstrength = Collator.SECONDARY;
+ }
+ }
+ else { // just one
+ newstrength = Collator.PRIMARY;
+ }
+ if (m_source_.charAt(m_current_ + 1) == 0x002A) { // '*'
+ m_current_++;
+ m_isStarred_ = true;
+ }
+ break;
+
+ case 0x0026 : // '&'
+ if (newstrength != TOKEN_UNSET_) {
+ return doEndParseNextToken(newstrength,
+ top,
+ extensionoffset,
+ newextensionlen,
+ variabletop, before);
+ }
+ newstrength = TOKEN_RESET_; // PatternEntry::RESET = 0
+ break;
+ case 0x005b : // '['
+ // options - read an option, analyze it
+ m_optionEnd_ = m_rules_.indexOf(0x005d, m_current_);
+ if (m_optionEnd_ != -1) { // ']'
+ byte result = readAndSetOption();
+ m_current_ = m_optionEnd_;
+ if ((result & TOKEN_TOP_MASK_) != 0) {
+ if (newstrength == TOKEN_RESET_) {
+ top = doSetTop();
+ if (before != 0) {
+ // This is a combination of before and
+ // indirection like
+ // '&[before 2][first regular]>> 24 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_)
+ && (basece >>> 24 <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_)) { /* implicits - */
+
+ int primary = basece & RuleBasedCollator.CE_PRIMARY_MASK_ | (basecontce & RuleBasedCollator.CE_PRIMARY_MASK_) >> 16;
+ int raw = RuleBasedCollator.impCEGen_.getRawFromImplicit(primary);
+ ch = RuleBasedCollator.impCEGen_.getCodePointFromRaw(raw-1);
+ int primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(raw-1);
+ m_utilCEBuffer_[0] = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505;
+ m_utilCEBuffer_[1] = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_;
+
+ m_parsedToken_.m_charsOffset_ = m_extraCurrent_;
+ m_source_.append('\uFFFE');
+ m_source_.append((char)ch);
+ m_extraCurrent_ += 2;
+ m_parsedToken_.m_charsLen_++;
+
+ m_utilToken_.m_source_ = (m_parsedToken_.m_charsLen_ << 24)
+ | m_parsedToken_.m_charsOffset_;
+ m_utilToken_.m_rules_ = m_source_;
+ sourcetoken = m_hashTable_.get(m_utilToken_);
+
+ if(sourcetoken == null) {
+ m_listHeader_[m_resultLength_] = new TokenListHeader();
+ m_listHeader_[m_resultLength_].m_baseCE_
+ = m_utilCEBuffer_[0] & 0xFFFFFF3F;
+ if (RuleBasedCollator.isContinuation(m_utilCEBuffer_[1])) {
+ m_listHeader_[m_resultLength_].m_baseContCE_
+ = m_utilCEBuffer_[1];
+ }
+ else {
+ m_listHeader_[m_resultLength_].m_baseContCE_ = 0;
+ }
+ m_listHeader_[m_resultLength_].m_nextCE_ = 0;
+ m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
+ m_listHeader_[m_resultLength_].m_previousCE_ = 0;
+ m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
+ m_listHeader_[m_resultLength_].m_indirect_ = false;
+
+ sourcetoken = new Token();
+ initAReset(-1, sourcetoken);
+ }
+
+ } else {
+
+ // first ce and second ce m_utilCEBuffer_
+ /*int invpos = */CollationParsedRuleBuilder.INVERSE_UCA_.getInversePrevCE(
+ basece, basecontce,
+ strength, m_utilCEBuffer_);
+ // we got the previous CE. Now we need to see if the difference between
+ // the two CEs is really of the requested strength.
+ // if it's a bigger difference (we asked for secondary and got primary), we
+ // need to modify the CE.
+ if(CollationParsedRuleBuilder.INVERSE_UCA_.getCEStrengthDifference(basece, basecontce, m_utilCEBuffer_[0], m_utilCEBuffer_[1]) < strength) {
+ // adjust the strength
+ // now we are in the situation where our baseCE should actually be modified in
+ // order to get the CE in the right position.
+ if(strength == Collator.SECONDARY) {
+ m_utilCEBuffer_[0] = basece - 0x0200;
+ } else { // strength == UCOL_TERTIARY
+ m_utilCEBuffer_[0] = basece - 0x02;
+ }
+ if(RuleBasedCollator.isContinuation(basecontce)) {
+ if(strength == Collator.SECONDARY) {
+ m_utilCEBuffer_[1] = basecontce - 0x0200;
+ } else { // strength == UCOL_TERTIARY
+ m_utilCEBuffer_[1] = basecontce - 0x02;
+ }
+ }
+ }
+
+/*
+ // the code below relies on getting a code point from the inverse table, in order to be
+ // able to merge the situations like &x < 9 &[before 1]a < d. This won't work:
+ // 1. There are many code points that have the same CE
+ // 2. The CE to codepoint table (things pointed to by CETable[3*invPos+2] are broken.
+ // Also, in case when there is no equivalent strength before an element, we have to actually
+ // construct one. For example, &[before 2]a << x won't result in x << a, because the element
+ // before a is a primary difference.
+ ch = CollationParsedRuleBuilder.INVERSE_UCA_.m_table_[3 * invpos
+ + 2];
+ if ((ch & INVERSE_SIZE_MASK_) != 0) {
+ int offset = ch & INVERSE_OFFSET_MASK_;
+ ch = CollationParsedRuleBuilder.INVERSE_UCA_.m_continuations_[
+ offset];
+ }
+ m_source_.append((char)ch);
+ m_extraCurrent_ ++;
+ m_parsedToken_.m_charsOffset_ = m_extraCurrent_ - 1;
+ m_parsedToken_.m_charsLen_ = 1;
+
+ // We got an UCA before. However, this might have been tailored.
+ // example:
+ // &\u30ca = \u306a
+ // &[before 3]\u306a<<<\u306a|\u309d
+
+ m_utilToken_.m_source_ = (m_parsedToken_.m_charsLen_ << 24)
+ | m_parsedToken_.m_charsOffset_;
+ m_utilToken_.m_rules_ = m_source_;
+ sourcetoken = (Token)m_hashTable_.get(m_utilToken_);
+*/
+
+ // here is how it should be. The situation such as &[before 1]a < x, should be
+ // resolved exactly as if we wrote &a > x.
+ // therefore, I don't really care if the UCA value before a has been changed.
+ // However, I do care if the strength between my element and the previous element
+ // is bigger then I wanted. So, if CE < baseCE and I wanted &[before 2], then i'll
+ // have to construct the base CE.
+
+ // if we found a tailored thing, we have to use the UCA value and
+ // construct a new reset token with constructed name
+ //if (sourcetoken != null && sourcetoken.m_strength_ != TOKEN_RESET_) {
+ // character to which we want to anchor is already tailored.
+ // We need to construct a new token which will be the anchor point
+ //m_source_.setCharAt(m_extraCurrent_ - 1, '\uFFFE');
+ //m_source_.append(ch);
+ //m_extraCurrent_ ++;
+ //m_parsedToken_.m_charsLen_ ++;
+ // grab before
+ m_parsedToken_.m_charsOffset_ -= 10;
+ m_parsedToken_.m_charsLen_ += 10;
+ m_listHeader_[m_resultLength_] = new TokenListHeader();
+ m_listHeader_[m_resultLength_].m_baseCE_
+ = m_utilCEBuffer_[0] & 0xFFFFFF3F;
+ if (RuleBasedCollator.isContinuation(m_utilCEBuffer_[1])) {
+ m_listHeader_[m_resultLength_].m_baseContCE_
+ = m_utilCEBuffer_[1];
+ }
+ else {
+ m_listHeader_[m_resultLength_].m_baseContCE_ = 0;
+ }
+ m_listHeader_[m_resultLength_].m_nextCE_ = 0;
+ m_listHeader_[m_resultLength_].m_nextContCE_ = 0;
+ m_listHeader_[m_resultLength_].m_previousCE_ = 0;
+ m_listHeader_[m_resultLength_].m_previousContCE_ = 0;
+ m_listHeader_[m_resultLength_].m_indirect_ = false;
+ sourcetoken = new Token();
+ initAReset(-1, sourcetoken);
+ //}
+ }
+ return sourcetoken;
+ }
+
+ /**
+ * Processing Description.
+ * 1. Build a m_listHeader_. Each list has a header, which contains two lists
+ * (positive and negative), a reset token, a baseCE, nextCE, and
+ * previousCE. The lists and reset may be null.
+ * 2. As you process, you keep a LAST pointer that points to the last token
+ * you handled.
+ * @param expand string offset, -1 for null strings
+ * @param targetToken token to update
+ * @return expandnext offset
+ * @throws ParseException thrown when rules syntax failed
+ */
+ private int initAReset(int expand, Token targetToken) throws ParseException
+ {
+ if (m_resultLength_ == m_listHeader_.length - 1) {
+ // Unfortunately, this won't work, as we store addresses of lhs in
+ // token
+ TokenListHeader temp[] = new TokenListHeader[m_resultLength_ << 1];
+ System.arraycopy(m_listHeader_, 0, temp, 0, m_resultLength_ + 1);
+ m_listHeader_ = temp;
+ }
+ // do the reset thing
+ targetToken.m_rules_ = m_source_;
+ targetToken.m_source_ = m_parsedToken_.m_charsLen_ << 24
+ | m_parsedToken_.m_charsOffset_;
+ targetToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24
+ | m_parsedToken_.m_extensionOffset_;
+ // keep the flags around so that we know about before
+ targetToken.m_flags_ = m_parsedToken_.m_flags_;
+
+ if (m_parsedToken_.m_prefixOffset_ != 0) {
+ throwParseException(m_rules_, m_parsedToken_.m_charsOffset_ - 1);
+ }
+
+ targetToken.m_prefix_ = 0;
+ // TODO: this should also handle reverse
+ targetToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_;
+ targetToken.m_strength_ = TOKEN_RESET_;
+ targetToken.m_next_ = null;
+ targetToken.m_previous_ = null;
+ targetToken.m_CELength_ = 0;
+ targetToken.m_expCELength_ = 0;
+ targetToken.m_listHeader_ = m_listHeader_[m_resultLength_];
+ m_listHeader_[m_resultLength_].m_first_ = null;
+ m_listHeader_[m_resultLength_].m_last_ = null;
+ m_listHeader_[m_resultLength_].m_first_ = null;
+ m_listHeader_[m_resultLength_].m_last_ = null;
+ m_listHeader_[m_resultLength_].m_reset_ = targetToken;
+
+ /* 3 Consider each item: relation, source, and expansion:
+ * e.g. ...< x / y ...
+ * First convert all expansions into normal form. Examples:
+ * If "xy" doesn't occur earlier in the list or in the UCA, convert
+ * &xy * c * d * ... into &x * c/y * d * ...
+ * Note: reset values can never have expansions, although they can
+ * cause the very next item to have one. They may be contractions, if
+ * they are found earlier in the list.
+ */
+ int result = 0;
+ if (expand > 0) {
+ // check to see if there is an expansion
+ if (m_parsedToken_.m_charsLen_ > 1) {
+ targetToken.m_source_ = ((expand
+ - m_parsedToken_.m_charsOffset_ )
+ << 24)
+ | m_parsedToken_.m_charsOffset_;
+ result = ((m_parsedToken_.m_charsLen_
+ + m_parsedToken_.m_charsOffset_ - expand) << 24)
+ | expand;
+ }
+ }
+
+ m_resultLength_ ++;
+ m_hashTable_.put(targetToken, targetToken);
+ return result;
+ }
+
+ /**
+ * Checks if an character is special
+ * @param ch character to test
+ * @return true if the character is special
+ */
+ private static final boolean isSpecialChar(char ch)
+ {
+ return (ch <= 0x002F && ch >= 0x0020) || (ch <= 0x003F && ch >= 0x003A)
+ || (ch <= 0x0060 && ch >= 0x005B)
+ || (ch <= 0x007E && ch >= 0x007D) || ch == 0x007B;
+ }
+
+ private
+ UnicodeSet readAndSetUnicodeSet(String source, int start) throws ParseException
+ {
+ while(source.charAt(start) != '[') { /* advance while we find the first '[' */
+ start++;
+ }
+ // now we need to get a balanced set of '[]'. The problem is that a set can have
+ // many, and *end point to the first closing '['
+ int noOpenBraces = 1;
+ int current = 1; // skip the opening brace
+ while(start+current < source.length() && noOpenBraces != 0) {
+ if(source.charAt(start+current) == '[') {
+ noOpenBraces++;
+ } else if(source.charAt(start+current) == ']') { // closing brace
+ noOpenBraces--;
+ }
+ current++;
+ }
+ //int nextBrace = -1;
+
+ if(noOpenBraces != 0 || (/*nextBrace =*/ source.indexOf("]", start+current) /*']'*/) == -1) {
+ throwParseException(m_rules_, start);
+ }
+ return new UnicodeSet(source.substring(start, start+current)); //uset_openPattern(start, current);
+ }
+
+ /** in C, optionarg is passed by reference to function.
+ * We use a private int to simulate this.
+ */
+ private int m_optionarg_ = 0;
+
+ private int readOption(String rules, int start, int optionend)
+ {
+ m_optionarg_ = 0;
+ int i = 0;
+ while (i < RULES_OPTIONS_.length) {
+ String option = RULES_OPTIONS_[i].m_name_;
+ int optionlength = option.length();
+ if (rules.length() > start + optionlength
+ && option.equalsIgnoreCase(rules.substring(start,
+ start + optionlength))) {
+ if (optionend - start > optionlength) {
+ m_optionarg_ = start + optionlength;
+ // start of the options, skip space
+ while (m_optionarg_ < optionend && PatternProps.isWhiteSpace(rules.charAt(m_optionarg_)))
+ { // eat whitespace
+ m_optionarg_ ++;
+ }
+ }
+ break;
+ }
+ i ++;
+ }
+ if(i == RULES_OPTIONS_.length) {
+ i = -1;
+ }
+ return i;
+ }
+
+ /**
+ * Reads and set collation options
+ * @return TOKEN_SUCCESS if option is set correct, 0 otherwise
+ * @exception ParseException thrown when options in rules are wrong
+ */
+ private byte readAndSetOption() throws ParseException
+ {
+ int start = m_current_ + 1; // skip opening '['
+ int i = readOption(m_rules_, start, m_optionEnd_);
+
+ int optionarg = m_optionarg_;
+
+ if (i < 0) {
+ throwParseException(m_rules_, start);
+ }
+
+ if (i < 7) {
+ if (optionarg != 0) {
+ for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length;
+ j ++) {
+ String subname = RULES_OPTIONS_[i].m_subOptions_[j];
+ int size = optionarg + subname.length();
+ if (m_rules_.length() > size
+ && subname.equalsIgnoreCase(m_rules_.substring(
+ optionarg, size))) {
+ setOptions(m_options_, RULES_OPTIONS_[i].m_attribute_,
+ RULES_OPTIONS_[i].m_subOptionAttributeValues_[j]);
+ return TOKEN_SUCCESS_MASK_;
+ }
+ }
+ }
+ throwParseException(m_rules_, optionarg);
+ }
+ else if (i == 7) { // variable top
+ return TOKEN_SUCCESS_MASK_ | TOKEN_VARIABLE_TOP_MASK_;
+ }
+ else if (i == 8) { // rearrange
+ return TOKEN_SUCCESS_MASK_;
+ }
+ else if (i == 9) { // before
+ if (optionarg != 0) {
+ for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length;
+ j ++) {
+ String subname = RULES_OPTIONS_[i].m_subOptions_[j];
+ int size = optionarg + subname.length();
+ if (m_rules_.length() > size
+ && subname.equalsIgnoreCase(
+ m_rules_.substring(optionarg,
+ optionarg + subname.length()))) {
+ return (byte)(TOKEN_SUCCESS_MASK_
+ | RULES_OPTIONS_[i].m_subOptionAttributeValues_[j]
+ + 1);
+ }
+ }
+ }
+ throwParseException(m_rules_, optionarg);
+ }
+ else if (i == 10) { // top, we are going to have an array with
+ // structures of limit CEs index to this array will be
+ // src->parsedToken.indirectIndex
+ m_parsedToken_.m_indirectIndex_ = 0;
+ return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_;
+ }
+ else if (i < 13) { // first, last
+ for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; j ++) {
+ String subname = RULES_OPTIONS_[i].m_subOptions_[j];
+ int size = optionarg + subname.length();
+ if (m_rules_.length() > size
+ && subname.equalsIgnoreCase(m_rules_.substring(optionarg,
+ size))) {
+ m_parsedToken_.m_indirectIndex_ = (char)(i - 10 + (j << 1));
+ return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_;
+ }
+ }
+ throwParseException(m_rules_, optionarg);
+ }
+ else if(i == 13 || i == 14) { // copy and remove are handled before normalization
+ // we need to move end here
+ int noOpenBraces = 1;
+ m_current_++; // skip opening brace
+ while(m_current_ < m_source_.length() && noOpenBraces != 0) {
+ if(m_source_.charAt(m_current_) == '[') {
+ noOpenBraces++;
+ } else if(m_source_.charAt(m_current_) == ']') { // closing brace
+ noOpenBraces--;
+ }
+ m_current_++;
+ }
+ m_optionEnd_ = m_current_-1;
+ return TOKEN_SUCCESS_MASK_;
+ }
+ else if(i == 16) {
+ m_current_ = m_optionarg_; // skip opening brace and name
+ parseScriptReorder();
+ return TOKEN_SUCCESS_MASK_;
+ }
+ else {
+ throwParseException(m_rules_, optionarg);
+ }
+ return TOKEN_SUCCESS_MASK_; // we will never reach here.
+ }
+
+ /**
+ * Set collation option
+ * @param optionset option set to set
+ * @param attribute type to set
+ * @param value attribute value
+ */
+ private void setOptions(OptionSet optionset, int attribute, int value)
+ {
+ switch (attribute) {
+ case RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_ :
+ optionset.m_isHiragana4_
+ = (value == RuleBasedCollator.AttributeValue.ON_);
+ break;
+ case RuleBasedCollator.Attribute.FRENCH_COLLATION_ :
+ optionset.m_isFrenchCollation_
+ = (value == RuleBasedCollator.AttributeValue.ON_);
+ break;
+ case RuleBasedCollator.Attribute.ALTERNATE_HANDLING_ :
+ optionset.m_isAlternateHandlingShifted_
+ = (value
+ == RuleBasedCollator.AttributeValue.SHIFTED_);
+ break;
+ case RuleBasedCollator.Attribute.CASE_FIRST_ :
+ optionset.m_caseFirst_ = value;
+ break;
+ case RuleBasedCollator.Attribute.CASE_LEVEL_ :
+ optionset.m_isCaseLevel_
+ = (value == RuleBasedCollator.AttributeValue.ON_);
+ break;
+ case RuleBasedCollator.Attribute.NORMALIZATION_MODE_ :
+ if (value == RuleBasedCollator.AttributeValue.ON_) {
+ value = Collator.CANONICAL_DECOMPOSITION;
+ }
+ optionset.m_decomposition_ = value;
+ break;
+ case RuleBasedCollator.Attribute.STRENGTH_ :
+ optionset.m_strength_ = value;
+ break;
+ default :
+ break;
+ }
+ }
+
+ UnicodeSet getTailoredSet() throws ParseException
+ {
+ boolean startOfRules = true;
+ UnicodeSet tailored = new UnicodeSet();
+ String pattern;
+ CanonicalIterator it = new CanonicalIterator("");
+
+ m_parsedToken_.m_strength_ = TOKEN_UNSET_;
+ int sourcelimit = m_source_.length();
+ //int expandNext = 0;
+
+ while (m_current_ < sourcelimit) {
+ m_parsedToken_.m_prefixOffset_ = 0;
+ if (parseNextToken(startOfRules) < 0) {
+ // we have reached the end
+ continue;
+ }
+ startOfRules = false;
+ // The idea is to tokenize the rule set. For each non-reset token,
+ // we add all the canonicaly equivalent FCD sequences
+ if(m_parsedToken_.m_strength_ != TOKEN_RESET_) {
+ it.setSource(m_source_.substring(
+ m_parsedToken_.m_charsOffset_,
+ m_parsedToken_.m_charsOffset_+m_parsedToken_.m_charsLen_));
+ pattern = it.next();
+ while(pattern != null) {
+ if(Normalizer.quickCheck(pattern, Normalizer.FCD,0) != Normalizer.NO) {
+ tailored.add(pattern);
+ }
+ pattern = it.next();
+ }
+ }
+ }
+ return tailored;
+ }
+
+ final private String preprocessRules(String rules) throws ParseException {
+ int optionNumber = -1;
+ int setStart = 0;
+ int i = 0;
+ while(i < rules.length()) {
+ if(rules.charAt(i) == 0x005B) { // [
+ optionNumber = readOption(rules, i+1, rules.length());
+ setStart = m_optionarg_;
+ if(optionNumber == 13) { /* copy - parts of UCA to tailoring */
+ UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart);
+ if(m_copySet_ == null) {
+ m_copySet_ = newSet;
+ } else {
+ m_copySet_.addAll(newSet);
+ }
+ } else if(optionNumber == 14) {
+ UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart);
+ if(m_removeSet_ == null) {
+ m_removeSet_ = newSet;
+ } else {
+ m_removeSet_.addAll(newSet);
+ }
+ } else if(optionNumber == 19) {
+ int optionEndOffset = rules.indexOf(']', i) + 1;
+ ULocale locale = ULocale.forLanguageTag(rules.substring(setStart, optionEndOffset-1));
+ UResourceBundle bundle = UResourceBundle.getBundleInstance(
+ ICUResourceBundle.ICU_BASE_NAME + "/coll", locale.getBaseName());
+
+ String type = locale.getKeywordValue("collation");
+ if(type == null){
+ type = "standard";
+ }
+
+ String importRules = bundle.get("collations")
+ .get(type)
+ .get("Sequence")
+ .getString();
+
+ rules = rules.substring(0, i) + importRules + rules.substring(optionEndOffset);
+ }
+ }
+ i++;
+ }
+ return rules;
+ }
+
+ /* This is the data that is used for non-script reordering codes. These _must_ be kept
+ * in order that they are to be applied as defaults and in synch with the Collator.ReorderCodes statics.
+ */
+ static final String ReorderingTokensArray[] = {
+ "SPACE",
+ "PUNCT",
+ "SYMBOL",
+ "CURRENCY",
+ "DIGIT",
+ };
+
+ int findReorderingEntry(String name) {
+ for (int tokenIndex = 0; tokenIndex < ReorderingTokensArray.length; tokenIndex++) {
+ if (name.equalsIgnoreCase(ReorderingTokensArray[tokenIndex])) {
+ return tokenIndex + ReorderCodes.FIRST;
+ }
+ }
+ return UScript.INVALID_CODE;
+ }
+
+ private void parseScriptReorder() throws ParseException {
+ ArrayList tempOrder = new ArrayList();
+ int end = m_rules_.indexOf(']', m_current_);
+ if (end == -1) {
+ return;
+ }
+ String tokenString = m_rules_.substring(m_current_, end);
+ String[] tokens = tokenString.split("\\s+", 0);
+ String token;
+ for (int tokenIndex = 0; tokenIndex < tokens.length; tokenIndex++) {
+ token = tokens[tokenIndex];
+ int reorderCode = findReorderingEntry(token);
+ if (reorderCode == UScript.INVALID_CODE) {
+ reorderCode = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, token);
+ if (reorderCode < 0) {
+ throw new ParseException(m_rules_, tokenIndex);
+ }
+ }
+ tempOrder.add(reorderCode);
+ }
+ m_options_.m_scriptOrder_ = new int[tempOrder.size()];
+ for(int i = 0; i < tempOrder.size(); i++) {
+ m_options_.m_scriptOrder_[i] = tempOrder.get(i);
+ }
+ }
+}
diff --git a/main/classes/collate/src/com/ibm/icu/text/Collator.java b/main/classes/collate/src/com/ibm/icu/text/Collator.java
new file mode 100644
index 00000000000..e4ccbbaec00
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/Collator.java
@@ -0,0 +1,1258 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2011, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.util.Comparator;
+import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Locale;
+import java.util.MissingResourceException;
+import java.util.Set;
+
+import com.ibm.icu.impl.ICUDebug;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.Norm2AllModes;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.util.Freezable;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+import com.ibm.icu.util.VersionInfo;
+
+/**
+* {@icuenhanced java.text.Collator}.{@icu _usage_}
+*
+* Collator performs locale-sensitive string comparison. A concrete
+* subclass, RuleBasedCollator, allows customization of the collation
+* ordering by the use of rule sets.
+*
+* Following the Unicode
+* Consortium 's specifications for the
+* Unicode Collation
+* Algorithm (UCA) , there are 5 different levels of strength used
+* in comparisons:
+*
+*
+* PRIMARY strength: Typically, this is used to denote differences between
+* base characters (for example, "a" < "b").
+* It is the strongest difference. For example, dictionaries are divided
+* into different sections by base character.
+* SECONDARY strength: Accents in the characters are considered secondary
+* differences (for example, "as" < "às" < "at"). Other
+* differences
+* between letters can also be considered secondary differences, depending
+* on the language. A secondary difference is ignored when there is a
+* primary difference anywhere in the strings.
+* TERTIARY strength: Upper and lower case differences in characters are
+* distinguished at tertiary strength (for example, "ao" < "Ao" <
+* "aò"). In addition, a variant of a letter differs from the base
+* form on the tertiary strength (such as "A" and "Ⓐ"). Another
+* example is the
+* difference between large and small Kana. A tertiary difference is ignored
+* when there is a primary or secondary difference anywhere in the strings.
+* QUATERNARY strength: When punctuation is ignored
+*
+* (see Ignoring Punctuations in the user guide) at PRIMARY to TERTIARY
+* strength, an additional strength level can
+* be used to distinguish words with and without punctuation (for example,
+* "ab" < "a-b" < "aB").
+* This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
+* difference. The QUATERNARY strength should only be used if ignoring
+* punctuation is required.
+* IDENTICAL strength:
+* When all other strengths are equal, the IDENTICAL strength is used as a
+* tiebreaker. The Unicode code point values of the NFD form of each string
+* are compared, just in case there is no difference.
+* For example, Hebrew cantellation marks are only distinguished at this
+* strength. This strength should be used sparingly, as only code point
+* value differences between two strings is an extremely rare occurrence.
+* Using this strength substantially decreases the performance for both
+* comparison and collation key generation APIs. This strength also
+* increases the size of the collation key.
+*
+*
+* Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
+* the canonical decomposition mode and one that does not use any decomposition.
+* The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
+* is not supported here. If the canonical
+* decomposition mode is set, the Collator handles un-normalized text properly,
+* producing the same results as if the text were normalized in NFD. If
+* canonical decomposition is turned off, it is the user's responsibility to
+* ensure that all text is already in the appropriate form before performing
+* a comparison or before getting a CollationKey.
+*
+* For more information about the collation service see the
+* users
+* guide .
+*
+* Examples of use
+*
+* // Get the Collator for US English and set its strength to PRIMARY
+* Collator usCollator = Collator.getInstance(Locale.US);
+* usCollator.setStrength(Collator.PRIMARY);
+* if (usCollator.compare("abc", "ABC") == 0) {
+* System.out.println("Strings are equivalent");
+* }
+*
+* The following example shows how to compare two strings using the
+* Collator for the default locale.
+*
+* // Compare two strings in the default locale
+* Collator myCollator = Collator.getInstance();
+* myCollator.setDecomposition(NO_DECOMPOSITION);
+* if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
+* System.out.println("à\u0325 is not equals to a\u0325̀ without decomposition");
+* myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
+* if (myCollator.compare("à\u0325", "a\u0325̀") != 0) {
+* System.out.println("Error: à\u0325 should be equals to a\u0325̀ with decomposition");
+* }
+* else {
+* System.out.println("à\u0325 is equals to a\u0325̀ with decomposition");
+* }
+* }
+* else {
+* System.out.println("Error: à\u0325 should be not equals to a\u0325̀ without decomposition");
+* }
+*
+*
+* @see RuleBasedCollator
+* @see CollationKey
+* @author Syn Wee Quek
+* @stable ICU 2.8
+*/
+public abstract class Collator implements Comparator, Freezable
+{
+ // public data members ---------------------------------------------------
+
+ /**
+ * Strongest collator strength value. Typically used to denote differences
+ * between base characters. See class documentation for more explanation.
+ * @see #setStrength
+ * @see #getStrength
+ * @stable ICU 2.8
+ */
+ public final static int PRIMARY = 0;
+
+ /**
+ * Second level collator strength value.
+ * Accents in the characters are considered secondary differences.
+ * Other differences between letters can also be considered secondary
+ * differences, depending on the language.
+ * See class documentation for more explanation.
+ * @see #setStrength
+ * @see #getStrength
+ * @stable ICU 2.8
+ */
+ public final static int SECONDARY = 1;
+
+ /**
+ * Third level collator strength value.
+ * Upper and lower case differences in characters are distinguished at this
+ * strength level. In addition, a variant of a letter differs from the base
+ * form on the tertiary level.
+ * See class documentation for more explanation.
+ * @see #setStrength
+ * @see #getStrength
+ * @stable ICU 2.8
+ */
+ public final static int TERTIARY = 2;
+
+ /**
+ * {@icu} Fourth level collator strength value.
+ * When punctuation is ignored
+ *
+ * (see Ignoring Punctuations in the user guide) at PRIMARY to TERTIARY
+ * strength, an additional strength level can
+ * be used to distinguish words with and without punctuation.
+ * See class documentation for more explanation.
+ * @see #setStrength
+ * @see #getStrength
+ * @stable ICU 2.8
+ */
+ public final static int QUATERNARY = 3;
+
+ /**
+ * Smallest Collator strength value. When all other strengths are equal,
+ * the IDENTICAL strength is used as a tiebreaker. The Unicode code point
+ * values of the NFD form of each string are compared, just in case there
+ * is no difference.
+ * See class documentation for more explanation.
+ *
+ *
+ * Note this value is different from JDK's
+ *
+ * @stable ICU 2.8
+ */
+ public final static int IDENTICAL = 15;
+
+ /**
+ * {@icunote} This is for backwards compatibility with Java APIs only. It
+ * should not be used, IDENTICAL should be used instead. ICU's
+ * collation does not support Java's FULL_DECOMPOSITION mode.
+ * @stable ICU 3.4
+ */
+ public final static int FULL_DECOMPOSITION = IDENTICAL;
+
+ /**
+ * Decomposition mode value. With NO_DECOMPOSITION set, Strings
+ * will not be decomposed for collation. This is the default
+ * decomposition setting unless otherwise specified by the locale
+ * used to create the Collator.
+ *
+ * Note this value is different from the JDK's.
+ * @see #CANONICAL_DECOMPOSITION
+ * @see #getDecomposition
+ * @see #setDecomposition
+ * @stable ICU 2.8
+ */
+ public final static int NO_DECOMPOSITION = 16;
+
+ /**
+ * Decomposition mode value. With CANONICAL_DECOMPOSITION set,
+ * characters that are canonical variants according to the Unicode standard
+ * will be decomposed for collation.
+ *
+ * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
+ * described in
+ * Unicode Technical Report #15 .
+ *
+ * @see #NO_DECOMPOSITION
+ * @see #getDecomposition
+ * @see #setDecomposition
+ * @stable ICU 2.8
+ */
+ public final static int CANONICAL_DECOMPOSITION = 17;
+
+ /**
+ * Reordering codes for non-script groups that can be reordered under collation.
+ *
+ * @see #getReorderCodes
+ * @see #setReorderCodes
+ * @see #getEquivalentReorderCodes
+ * @draft ICU 4.8
+ */
+ public static interface ReorderCodes {
+ /**
+ * A special reordering code that is used to specify the default reordering codes for a locale.
+ * @draft ICU 4.8
+ */
+ public final static int DEFAULT = 1;
+ /**
+ * A speical reordering code that is used to specify no reordering codes.
+ * @draft ICU 4.8
+ */
+ public final static int NONE = UScript.UNKNOWN;
+ /**
+ * A special reordering code that is used to specify all other codes used for reordering except
+ * for the codes listed as ReorderingCodes and those listed explicitly in a reordering.
+ * @draft ICU 4.8
+ */
+ public final static int OTHERS = UScript.UNKNOWN;
+ /**
+ * Characters with the space property.
+ * @draft ICU 4.8
+ */
+ public final static int SPACE = 0x1000;
+ /**
+ * The first entry in the enumeration.
+ * @draft ICU 4.8
+ */
+ public final static int FIRST = SPACE;
+ /**
+ * Characters with the punctuation property.
+ * @draft ICU 4.8
+ */
+ public final static int PUNCTUATION = 0x1001;
+ /**
+ * Characters with the symbol property.
+ * @draft ICU 4.8
+ */
+ public final static int SYMBOL = 0x1002;
+ /**
+ * Characters with the currency property.
+ * @draft ICU 4.8
+ */
+ public final static int CURRENCY = 0x1003;
+ /**
+ * Characters with the digit property.
+ * @draft ICU 4.8
+ */
+ public final static int DIGIT = 0x1004;
+ /**
+ * The limit of the reorder codes..
+ * @draft ICU 4.8
+ */
+ public final static int LIMIT = 0x1005;
+ }
+
+ // public methods --------------------------------------------------------
+
+ // public setters --------------------------------------------------------
+
+ /**
+ * Sets this Collator's strength property. The strength property
+ * determines the minimum level of difference considered significant
+ * during comparison.
+ *
+ * The default strength for the Collator is TERTIARY, unless specified
+ * otherwise by the locale used to create the Collator.
+ *
+ * See the Collator class description for an example of use.
+ * @param newStrength the new strength value.
+ * @see #getStrength
+ * @see #PRIMARY
+ * @see #SECONDARY
+ * @see #TERTIARY
+ * @see #QUATERNARY
+ * @see #IDENTICAL
+ * @throws IllegalArgumentException if the new strength value is not one
+ * of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
+ * @stable ICU 2.8
+ */
+ public void setStrength(int newStrength)
+ {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ if ((newStrength != PRIMARY) &&
+ (newStrength != SECONDARY) &&
+ (newStrength != TERTIARY) &&
+ (newStrength != QUATERNARY) &&
+ (newStrength != IDENTICAL)) {
+ throw new IllegalArgumentException("Incorrect comparison level.");
+ }
+ m_strength_ = newStrength;
+ }
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public Collator setStrength2(int newStrength)
+ {
+ setStrength(newStrength);
+ return this;
+ }
+
+ /**
+ * Sets the decomposition mode of this Collator. Setting this
+ * decomposition property with CANONICAL_DECOMPOSITION allows the
+ * Collator to handle un-normalized text properly, producing the
+ * same results as if the text were normalized. If
+ * NO_DECOMPOSITION is set, it is the user's responsibility to
+ * insure that all text is already in the appropriate form before
+ * a comparison or before getting a CollationKey. Adjusting
+ * decomposition mode allows the user to select between faster and
+ * more complete collation behavior.
+ *
+ * Since a great many of the world's languages do not require
+ * text normalization, most locales set NO_DECOMPOSITION as the
+ * default decomposition mode.
+ *
+ * The default decompositon mode for the Collator is
+ * NO_DECOMPOSITON, unless specified otherwise by the locale used
+ * to create the Collator.
+ *
+ * See getDecomposition for a description of decomposition
+ * mode.
+ *
+ * @param decomposition the new decomposition mode
+ * @see #getDecomposition
+ * @see #NO_DECOMPOSITION
+ * @see #CANONICAL_DECOMPOSITION
+ * @throws IllegalArgumentException If the given value is not a valid
+ * decomposition mode.
+ * @stable ICU 2.8
+ */
+ public void setDecomposition(int decomposition)
+ {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+ internalSetDecomposition(decomposition);
+ }
+
+ /**
+ * Internal set decompostion call to workaround frozen state because of self-modification
+ * in the RuleBasedCollator. This method must only be called by code that has
+ * passed the frozen check already and has the lock if the Collator is frozen.
+ * Better still this method should go away and RuleBasedCollator.getSortKeyBytes()
+ * should be fixed to not self-modify.
+ * @param decomposition
+ * @internal
+ */
+ protected void internalSetDecomposition(int decomposition)
+ {
+ if ((decomposition != NO_DECOMPOSITION) &&
+ (decomposition != CANONICAL_DECOMPOSITION)) {
+ throw new IllegalArgumentException("Wrong decomposition mode.");
+ }
+ m_decomposition_ = decomposition;
+ if (decomposition != NO_DECOMPOSITION) {
+ // ensure the FCD data is initialized
+ Norm2AllModes.getFCDNormalizer2();
+ }
+ }
+
+ /**
+ * Sets the reordering codes for this collator.
+ * Reordering codes allow the collation ordering for groups of characters to be changed.
+ * The reordering codes are a combination of UScript codes and ReorderCodes.
+ * These allow the ordering of characters belonging to these groups to be changed as a group.
+ * @param order the reordering codes to apply to this collator; if this is null or an empty array
+ * then this clears any existing reordering
+ * @see #getReorderCodes
+ * @see #getEquivalentReorderCodes
+ * @draft ICU 4.8
+ */
+ public void setReorderCodes(int... order)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ // public getters --------------------------------------------------------
+
+ /**
+ * Returns the Collator for the current default locale.
+ * The default locale is determined by java.util.Locale.getDefault().
+ * @return the Collator for the default locale (for example, en_US) if it
+ * is created successfully. Otherwise if there is no Collator
+ * associated with the current locale, the default UCA collator
+ * will be returned.
+ * @see java.util.Locale#getDefault()
+ * @see #getInstance(Locale)
+ * @stable ICU 2.8
+ */
+ public static final Collator getInstance()
+ {
+ return getInstance(ULocale.getDefault());
+ }
+
+ /**
+ * Clones the collator.
+ * @stable ICU 2.6
+ * @return a clone of this collator.
+ */
+ public Object clone() throws CloneNotSupportedException {
+ return super.clone();
+ }
+
+ // begin registry stuff
+
+ /**
+ * A factory used with registerFactory to register multiple collators and provide
+ * display names for them. If standard locale display names are sufficient,
+ * Collator instances may be registered instead.
+ * Note: as of ICU4J 3.2, the default API for CollatorFactory uses
+ * ULocale instead of Locale. Instead of overriding createCollator(Locale),
+ * new implementations should override createCollator(ULocale). Note that
+ * one of these two methods MUST be overridden or else an infinite
+ * loop will occur.
+ * @stable ICU 2.6
+ */
+ public static abstract class CollatorFactory {
+ /**
+ * Return true if this factory will be visible. Default is true.
+ * If not visible, the locales supported by this factory will not
+ * be listed by getAvailableLocales.
+ *
+ * @return true if this factory is visible
+ * @stable ICU 2.6
+ */
+ public boolean visible() {
+ return true;
+ }
+
+ /**
+ * Return an instance of the appropriate collator. If the locale
+ * is not supported, return null.
+ * Note: as of ICU4J 3.2, implementations should override
+ * this method instead of createCollator(Locale).
+ * @param loc the locale for which this collator is to be created.
+ * @return the newly created collator.
+ * @stable ICU 3.2
+ */
+ public Collator createCollator(ULocale loc) {
+ return createCollator(loc.toLocale());
+ }
+
+ /**
+ * Return an instance of the appropriate collator. If the locale
+ * is not supported, return null.
+ *
Note: as of ICU4J 3.2, implementations should override
+ * createCollator(ULocale) instead of this method, and inherit this
+ * method's implementation. This method is no longer abstract
+ * and instead delegates to createCollator(ULocale).
+ * @param loc the locale for which this collator is to be created.
+ * @return the newly created collator.
+ * @stable ICU 2.6
+ */
+ public Collator createCollator(Locale loc) {
+ return createCollator(ULocale.forLocale(loc));
+ }
+
+ /**
+ * Return the name of the collator for the objectLocale, localized for the displayLocale.
+ * If objectLocale is not visible or not defined by the factory, return null.
+ * @param objectLocale the locale identifying the collator
+ * @param displayLocale the locale for which the display name of the collator should be localized
+ * @return the display name
+ * @stable ICU 2.6
+ */
+ public String getDisplayName(Locale objectLocale, Locale displayLocale) {
+ return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale));
+ }
+
+ /**
+ * Return the name of the collator for the objectLocale, localized for the displayLocale.
+ * If objectLocale is not visible or not defined by the factory, return null.
+ * @param objectLocale the locale identifying the collator
+ * @param displayLocale the locale for which the display name of the collator should be localized
+ * @return the display name
+ * @stable ICU 3.2
+ */
+ public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
+ if (visible()) {
+ Set supported = getSupportedLocaleIDs();
+ String name = objectLocale.getBaseName();
+ if (supported.contains(name)) {
+ return objectLocale.getDisplayName(displayLocale);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Return an unmodifiable collection of the locale names directly
+ * supported by this factory.
+ *
+ * @return the set of supported locale IDs.
+ * @stable ICU 2.6
+ */
+ public abstract Set getSupportedLocaleIDs();
+
+ /**
+ * Empty default constructor.
+ * @stable ICU 2.6
+ */
+ protected CollatorFactory() {
+ }
+ }
+
+ static abstract class ServiceShim {
+ abstract Collator getInstance(ULocale l);
+ abstract Object registerInstance(Collator c, ULocale l);
+ abstract Object registerFactory(CollatorFactory f);
+ abstract boolean unregister(Object k);
+ abstract Locale[] getAvailableLocales(); // TODO remove
+ abstract ULocale[] getAvailableULocales();
+ abstract String getDisplayName(ULocale ol, ULocale dl);
+ }
+
+ private static ServiceShim shim;
+ private static ServiceShim getShim() {
+ // Note: this instantiation is safe on loose-memory-model configurations
+ // despite lack of synchronization, since the shim instance has no state--
+ // it's all in the class init. The worst problem is we might instantiate
+ // two shim instances, but they'll share the same state so that's ok.
+ if (shim == null) {
+ try {
+ Class> cls = Class.forName("com.ibm.icu.text.CollatorServiceShim");
+ shim = (ServiceShim)cls.newInstance();
+ }
+ catch (MissingResourceException e)
+ {
+ ///CLOVER:OFF
+ throw e;
+ ///CLOVER:ON
+ }
+ catch (Exception e) {
+ ///CLOVER:OFF
+ if(DEBUG){
+ e.printStackTrace();
+ }
+ throw new RuntimeException(e.getMessage());
+ ///CLOVER:ON
+ }
+ }
+ return shim;
+ }
+
+ /**
+ * {@icu} Returns the Collator for the desired locale.
+ * @param locale the desired locale.
+ * @return Collator for the desired locale if it is created successfully.
+ * Otherwise if there is no Collator
+ * associated with the current locale, a default UCA collator will
+ * be returned.
+ * @see java.util.Locale
+ * @see java.util.ResourceBundle
+ * @see #getInstance(Locale)
+ * @see #getInstance()
+ * @stable ICU 3.0
+ */
+ public static final Collator getInstance(ULocale locale) {
+ // fetching from service cache is faster than instantiation
+ return getShim().getInstance(locale);
+ }
+
+ /**
+ * Returns the Collator for the desired locale.
+ * @param locale the desired locale.
+ * @return Collator for the desired locale if it is created successfully.
+ * Otherwise if there is no Collator
+ * associated with the current locale, a default UCA collator will
+ * be returned.
+ * @see java.util.Locale
+ * @see java.util.ResourceBundle
+ * @see #getInstance(ULocale)
+ * @see #getInstance()
+ * @stable ICU 2.8
+ */
+ public static final Collator getInstance(Locale locale) {
+ return getInstance(ULocale.forLocale(locale));
+ }
+
+ /**
+ * {@icu} Registers a collator as the default collator for the provided locale. The
+ * collator should not be modified after it is registered.
+ *
+ * @param collator the collator to register
+ * @param locale the locale for which this is the default collator
+ * @return an object that can be used to unregister the registered collator.
+ *
+ * @stable ICU 3.2
+ */
+ public static final Object registerInstance(Collator collator, ULocale locale) {
+ return getShim().registerInstance(collator, locale);
+ }
+
+ /**
+ * {@icu} Registers a collator factory.
+ *
+ * @param factory the factory to register
+ * @return an object that can be used to unregister the registered factory.
+ *
+ * @stable ICU 2.6
+ */
+ public static final Object registerFactory(CollatorFactory factory) {
+ return getShim().registerFactory(factory);
+ }
+
+ /**
+ * {@icu} Unregisters a collator previously registered using registerInstance.
+ * @param registryKey the object previously returned by registerInstance.
+ * @return true if the collator was successfully unregistered.
+ * @stable ICU 2.6
+ */
+ public static final boolean unregister(Object registryKey) {
+ if (shim == null) {
+ return false;
+ }
+ return shim.unregister(registryKey);
+ }
+
+ /**
+ * Returns the set of locales, as Locale objects, for which collators
+ * are installed. Note that Locale objects do not support RFC 3066.
+ * @return the list of locales in which collators are installed.
+ * This list includes any that have been registered, in addition to
+ * those that are installed with ICU4J.
+ * @stable ICU 2.4
+ */
+ public static Locale[] getAvailableLocales() {
+ // TODO make this wrap getAvailableULocales later
+ if (shim == null) {
+ return ICUResourceBundle.getAvailableLocales(
+ ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
+ }
+ return shim.getAvailableLocales();
+ }
+
+ /**
+ * {@icu} Returns the set of locales, as ULocale objects, for which collators
+ * are installed. ULocale objects support RFC 3066.
+ * @return the list of locales in which collators are installed.
+ * This list includes any that have been registered, in addition to
+ * those that are installed with ICU4J.
+ * @stable ICU 3.0
+ */
+ public static final ULocale[] getAvailableULocales() {
+ if (shim == null) {
+ return ICUResourceBundle.getAvailableULocales(
+ ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
+ }
+ return shim.getAvailableULocales();
+ }
+
+ /**
+ * The list of keywords for this service. This must be kept in sync with
+ * the resource data.
+ * @since ICU 3.0
+ */
+ private static final String[] KEYWORDS = { "collation" };
+
+ /**
+ * The resource name for this service. Note that this is not the same as
+ * the keyword for this service.
+ * @since ICU 3.0
+ */
+ private static final String RESOURCE = "collations";
+
+ /**
+ * The resource bundle base name for this service.
+ * *since ICU 3.0
+ */
+
+ private static final String BASE = ICUResourceBundle.ICU_COLLATION_BASE_NAME;
+
+ /**
+ * {@icu} Returns an array of all possible keywords that are relevant to
+ * collation. At this point, the only recognized keyword for this
+ * service is "collation".
+ * @return an array of valid collation keywords.
+ * @see #getKeywordValues
+ * @stable ICU 3.0
+ */
+ public static final String[] getKeywords() {
+ return KEYWORDS;
+ }
+
+ /**
+ * {@icu} Given a keyword, returns an array of all values for
+ * that keyword that are currently in use.
+ * @param keyword one of the keywords returned by getKeywords.
+ * @see #getKeywords
+ * @stable ICU 3.0
+ */
+ public static final String[] getKeywordValues(String keyword) {
+ if (!keyword.equals(KEYWORDS[0])) {
+ throw new IllegalArgumentException("Invalid keyword: " + keyword);
+ }
+ return ICUResourceBundle.getKeywordValues(BASE, RESOURCE);
+ }
+
+ /**
+ * {@icu} Given a key and a locale, returns an array of string values in a preferred
+ * order that would make a difference. These are all and only those values where
+ * the open (creation) of the service with the locale formed from the input locale
+ * plus input keyword and that value has different behavior than creation with the
+ * input locale alone.
+ * @param key one of the keys supported by this service. For now, only
+ * "collation" is supported.
+ * @param locale the locale
+ * @param commonlyUsed if set to true it will return only commonly used values
+ * with the given locale in preferred order. Otherwise,
+ * it will return all the available values for the locale.
+ * @return an array of string values for the given key and the locale.
+ * @stable ICU 4.2
+ */
+ public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
+ boolean commonlyUsed) {
+ // Note: The parameter commonlyUsed is actually not used.
+ // The switch is in the method signature for consistency
+ // with other locale services.
+
+ // Read available collation values from collation bundles
+ String baseLoc = locale.getBaseName();
+ LinkedList values = new LinkedList();
+
+ UResourceBundle bundle = UResourceBundle.getBundleInstance(
+ ICUResourceBundle.ICU_BASE_NAME + "/coll", baseLoc);
+
+ String defcoll = null;
+ while (bundle != null) {
+ UResourceBundle collations = bundle.get("collations");
+ Enumeration collEnum = collations.getKeys();
+ while (collEnum.hasMoreElements()) {
+ String collkey = collEnum.nextElement();
+ if (collkey.equals("default")) {
+ if (defcoll == null) {
+ // Keep the default
+ defcoll = collations.getString("default");
+ }
+ } else if (!values.contains(collkey)) {
+ values.add(collkey);
+ }
+ }
+ bundle = ((ICUResourceBundle)bundle).getParent();
+ }
+ // Reordering
+ Iterator itr = values.iterator();
+ String[] result = new String[values.size()];
+ result[0] = defcoll;
+ int idx = 1;
+ while (itr.hasNext()) {
+ String collKey = itr.next();
+ if (!collKey.equals(defcoll)) {
+ result[idx++] = collKey;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * {@icu} Returns the functionally equivalent locale for the given
+ * requested locale, with respect to given keyword, for the
+ * collation service. If two locales return the same result, then
+ * collators instantiated for these locales will behave
+ * equivalently. The converse is not always true; two collators
+ * may in fact be equivalent, but return different results, due to
+ * internal details. The return result has no other meaning than
+ * that stated above, and implies nothing as to the relationship
+ * between the two locales. This is intended for use by
+ * applications who wish to cache collators, or otherwise reuse
+ * collators when possible. The functional equivalent may change
+ * over time. For more information, please see the
+ * Locales and Services section of the ICU User Guide.
+ * @param keyword a particular keyword as enumerated by
+ * getKeywords.
+ * @param locID The requested locale
+ * @param isAvailable If non-null, isAvailable[0] will receive and
+ * output boolean that indicates whether the requested locale was
+ * 'available' to the collation service. If non-null, isAvailable
+ * must have length >= 1.
+ * @return the locale
+ * @stable ICU 3.0
+ */
+ public static final ULocale getFunctionalEquivalent(String keyword,
+ ULocale locID,
+ boolean isAvailable[]) {
+ return ICUResourceBundle.getFunctionalEquivalent(BASE, ICUResourceBundle.ICU_DATA_CLASS_LOADER, RESOURCE,
+ keyword, locID, isAvailable, true);
+ }
+
+ /**
+ * {@icu} Returns the functionally equivalent locale for the given
+ * requested locale, with respect to given keyword, for the
+ * collation service.
+ * @param keyword a particular keyword as enumerated by
+ * getKeywords.
+ * @param locID The requested locale
+ * @return the locale
+ * @see #getFunctionalEquivalent(String,ULocale,boolean[])
+ * @stable ICU 3.0
+ */
+ public static final ULocale getFunctionalEquivalent(String keyword,
+ ULocale locID) {
+ return getFunctionalEquivalent(keyword, locID, null);
+ }
+
+ /**
+ * {@icu} Returns the name of the collator for the objectLocale, localized for the
+ * displayLocale.
+ * @param objectLocale the locale of the collator
+ * @param displayLocale the locale for the collator's display name
+ * @return the display name
+ * @stable ICU 2.6
+ */
+ static public String getDisplayName(Locale objectLocale, Locale displayLocale) {
+ return getShim().getDisplayName(ULocale.forLocale(objectLocale),
+ ULocale.forLocale(displayLocale));
+ }
+
+ /**
+ * {@icu} Returns the name of the collator for the objectLocale, localized for the
+ * displayLocale.
+ * @param objectLocale the locale of the collator
+ * @param displayLocale the locale for the collator's display name
+ * @return the display name
+ * @stable ICU 3.2
+ */
+ static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
+ return getShim().getDisplayName(objectLocale, displayLocale);
+ }
+
+ /**
+ * {@icu} Returns the name of the collator for the objectLocale, localized for the
+ * current locale.
+ * @param objectLocale the locale of the collator
+ * @return the display name
+ * @stable ICU 2.6
+ */
+ static public String getDisplayName(Locale objectLocale) {
+ return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault());
+ }
+
+ /**
+ * {@icu} Returns the name of the collator for the objectLocale, localized for the
+ * current locale.
+ * @param objectLocale the locale of the collator
+ * @return the display name
+ * @stable ICU 3.2
+ */
+ static public String getDisplayName(ULocale objectLocale) {
+ return getShim().getDisplayName(objectLocale, ULocale.getDefault());
+ }
+
+ /**
+ * Returns this Collator's strength property. The strength property
+ * determines the minimum level of difference considered significant.
+ *
+ * {@icunote} This can return QUATERNARY strength, which is not supported by the
+ * JDK version.
+ *
+ * See the Collator class description for more details.
+ *
+ * @return this Collator's current strength property.
+ * @see #setStrength
+ * @see #PRIMARY
+ * @see #SECONDARY
+ * @see #TERTIARY
+ * @see #QUATERNARY
+ * @see #IDENTICAL
+ * @stable ICU 2.8
+ */
+ public int getStrength()
+ {
+ return m_strength_;
+ }
+
+ /**
+ * Returns the decomposition mode of this Collator. The decomposition mode
+ * determines how Unicode composed characters are handled.
+ *
+ *
+ * See the Collator class description for more details.
+ *
+ * @return the decomposition mode
+ * @see #setDecomposition
+ * @see #NO_DECOMPOSITION
+ * @see #CANONICAL_DECOMPOSITION
+ * @stable ICU 2.8
+ */
+ public int getDecomposition()
+ {
+ return m_decomposition_;
+ }
+
+ // public other methods -------------------------------------------------
+
+ /**
+ * Compares the equality of two text Strings using
+ * this Collator's rules, strength and decomposition mode. Convenience method.
+ * @param source the source string to be compared.
+ * @param target the target string to be compared.
+ * @return true if the strings are equal according to the collation
+ * rules, otherwise false.
+ * @see #compare
+ * @throws NullPointerException thrown if either arguments is null.
+ * @stable ICU 2.8
+ */
+ public boolean equals(String source, String target)
+ {
+ return (compare(source, target) == 0);
+ }
+
+ /**
+ * {@icu} Returns a UnicodeSet that contains all the characters and sequences tailored
+ * in this collator.
+ * @return a pointer to a UnicodeSet object containing all the
+ * code points and sequences that may sort differently than
+ * in the UCA.
+ * @stable ICU 2.4
+ */
+ public UnicodeSet getTailoredSet()
+ {
+ return new UnicodeSet(0, 0x10FFFF);
+ }
+
+ /**
+ * Compares the source text String to the target text String according to
+ * this Collator's rules, strength and decomposition mode.
+ * Returns an integer less than,
+ * equal to or greater than zero depending on whether the source String is
+ * less than, equal to or greater than the target String. See the Collator
+ * class description for an example of use.
+ *
+ * @param source the source String.
+ * @param target the target String.
+ * @return Returns an integer value. Value is less than zero if source is
+ * less than target, value is zero if source and target are equal,
+ * value is greater than zero if source is greater than target.
+ * @see CollationKey
+ * @see #getCollationKey
+ * @throws NullPointerException thrown if either argument is null.
+ * @stable ICU 2.8
+ */
+ public abstract int compare(String source, String target);
+
+ /**
+ * Compares the source Object to the target Object.
+ *
+ * @param source the source Object.
+ * @param target the target Object.
+ * @return Returns an integer value. Value is less than zero if source is
+ * less than target, value is zero if source and target are equal,
+ * value is greater than zero if source is greater than target.
+ * @throws ClassCastException thrown if either arguments cannot be cast to String.
+ * @stable ICU 4.2
+ */
+ public int compare(Object source, Object target) {
+ return compare((String)source, (String)target);
+ }
+
+ /**
+ *
+ * Transforms the String into a CollationKey suitable for efficient
+ * repeated comparison. The resulting key depends on the collator's
+ * rules, strength and decomposition mode.
+ *
+ * See the CollationKey class documentation for more information.
+ * @param source the string to be transformed into a CollationKey.
+ * @return the CollationKey for the given String based on this Collator's
+ * collation rules. If the source String is null, a null
+ * CollationKey is returned.
+ * @see CollationKey
+ * @see #compare(String, String)
+ * @see #getRawCollationKey
+ * @stable ICU 2.8
+ */
+ public abstract CollationKey getCollationKey(String source);
+
+ /**
+ * {@icu} Returns the simpler form of a CollationKey for the String source following
+ * the rules of this Collator and stores the result into the user provided argument
+ * key. If key has a internal byte array of length that's too small for the result,
+ * the internal byte array will be grown to the exact required size.
+ * @param source the text String to be transformed into a RawCollationKey
+ * @return If key is null, a new instance of RawCollationKey will be
+ * created and returned, otherwise the user provided key will be
+ * returned.
+ * @see #compare(String, String)
+ * @see #getCollationKey
+ * @see RawCollationKey
+ * @stable ICU 2.8
+ */
+ public abstract RawCollationKey getRawCollationKey(String source,
+ RawCollationKey key);
+
+ /**
+ * {@icu} Variable top is a two byte primary value which causes all the codepoints
+ * with primary values that are less or equal than the variable top to be
+ * shifted when alternate handling is set to SHIFTED.
+ *
+ *
+ * Sets the variable top to a collation element value of a string supplied.
+ *
+ * @param varTop one or more (if contraction) characters to which the
+ * variable top should be set
+ * @return a int value containing the value of the variable top in upper 16
+ * bits. Lower 16 bits are undefined.
+ * @throws IllegalArgumentException is thrown if varTop argument is not
+ * a valid variable top element. A variable top element is
+ * invalid when it is a contraction that does not exist in the
+ * Collation order or when the PRIMARY strength collation
+ * element for the variable top has more than two bytes
+ * @see #getVariableTop
+ * @see RuleBasedCollator#setAlternateHandlingShifted
+ * @stable ICU 2.6
+ */
+ public abstract int setVariableTop(String varTop);
+
+ /**
+ * {@icu} Returns the variable top value of a Collator.
+ * Lower 16 bits are undefined and should be ignored.
+ * @return the variable top value of a Collator.
+ * @see #setVariableTop
+ * @stable ICU 2.6
+ */
+ public abstract int getVariableTop();
+
+ /**
+ * {@icu} Sets the variable top to a collation element value supplied.
+ * Variable top is set to the upper 16 bits.
+ * Lower 16 bits are ignored.
+ * @param varTop Collation element value, as returned by setVariableTop or
+ * getVariableTop
+ * @see #getVariableTop
+ * @see #setVariableTop
+ * @stable ICU 2.6
+ */
+ public abstract void setVariableTop(int varTop);
+
+ /**
+ * {@icu} Returns the version of this collator object.
+ * @return the version object associated with this collator
+ * @stable ICU 2.8
+ */
+ public abstract VersionInfo getVersion();
+
+ /**
+ * {@icu} Returns the UCA version of this collator object.
+ * @return the version object associated with this collator
+ * @stable ICU 2.8
+ */
+ public abstract VersionInfo getUCAVersion();
+
+ /**
+ * Retrieves the reordering codes for this collator.
+ * These reordering codes are a combination of UScript codes and ReorderCodes.
+ * @return a copy of the reordering codes for this collator;
+ * if none are set then returns an empty array
+ * @see #setReorderCodes
+ * @see #getEquivalentReorderCodes
+ * @draft ICU 4.8
+ */
+ public int[] getReorderCodes()
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
+ * codes are grouped and must reorder together.
+ *
+ * @param reorderCode code for which equivalents to be retrieved
+ * @return the set of all reorder codes in the same group as the given reorder code.
+ * @see #setReorderCodes
+ * @see #getReorderCodes
+ * @draft ICU 4.8
+ */
+ public static int[] getEquivalentReorderCodes(int reorderCode)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+
+ // Freezable interface implementation -------------------------------------------------
+
+ /**
+ * Determines whether the object has been frozen or not.
+ * @draft ICU 4.8
+ */
+ public boolean isFrozen() {
+ return false;
+ }
+
+ /**
+ * Freezes the collaotr.
+ * @return the collator itself.
+ * @draft ICU 4.8
+ */
+ public Collator freeze() {
+ throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
+ }
+
+ /**
+ * Provides for the clone operation. Any clone is initially unfrozen.
+ * @draft ICU 4.8
+ */
+ public Collator cloneAsThawed() {
+ throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
+ }
+
+ // protected constructor -------------------------------------------------
+
+ /**
+ * Empty default constructor to make javadocs happy
+ * @stable ICU 2.4
+ */
+ protected Collator()
+ {
+ }
+
+ // package private methods -----------------------------------------------
+
+ // private data members --------------------------------------------------
+
+ /**
+ * Collation strength
+ */
+ private int m_strength_ = TERTIARY;
+
+ /**
+ * Decomposition mode
+ */
+ private int m_decomposition_ = CANONICAL_DECOMPOSITION;
+
+ private static final boolean DEBUG = ICUDebug.enabled("collator");
+
+ // private methods -------------------------------------------------------
+
+ // end registry stuff
+
+ // -------- BEGIN ULocale boilerplate --------
+
+ /**
+ * {@icu} Returns the locale that was used to create this object, or null.
+ * This may may differ from the locale requested at the time of
+ * this object's creation. For example, if an object is created
+ * for locale en_US_CALIFORNIA , the actual data may be
+ * drawn from en (the actual locale), and
+ * en_US may be the most specific locale that exists (the
+ * valid locale).
+ *
+ * Note: This method will be implemented in ICU 3.0; ICU 2.8
+ * contains a partial preview implementation. The * actual
+ * locale is returned correctly, but the valid locale is
+ * not, in most cases.
+ * @param type type of information requested, either {@link
+ * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
+ * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
+ * @return the information specified by type , or null if
+ * this object was not constructed from locale data.
+ * @see com.ibm.icu.util.ULocale
+ * @see com.ibm.icu.util.ULocale#VALID_LOCALE
+ * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
+ * @draft ICU 2.8 (retain)
+ * @provisional This API might change or be removed in a future release.
+ */
+ public final ULocale getLocale(ULocale.Type type) {
+ return type == ULocale.ACTUAL_LOCALE ?
+ this.actualLocale : this.validLocale;
+ }
+
+ /*
+ * Set information about the locales that were used to create this
+ * object. If the object was not constructed from locale data,
+ * both arguments should be set to null. Otherwise, neither
+ * should be null. The actual locale must be at the same level or
+ * less specific than the valid locale. This method is intended
+ * for use by factories or other entities that create objects of
+ * this class.
+ * @param valid the most specific locale containing any resource
+ * data, or null
+ * @param actual the locale containing data used to construct this
+ * object, or null
+ * @see com.ibm.icu.util.ULocale
+ * @see com.ibm.icu.util.ULocale#VALID_LOCALE
+ * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
+ */
+ final void setLocale(ULocale valid, ULocale actual) {
+ // Change the following to an assertion later
+ ///CLOVER:OFF
+ // The following would not happen since the method is called
+ // by other protected functions that checks and makes sure that
+ // valid and actual are not null before passing
+ if ((valid == null) != (actual == null)) {
+ throw new IllegalArgumentException();
+ }
+ ///CLOVER:ON
+ // Another check we could do is that the actual locale is at
+ // the same level or less specific than the valid locale.
+ this.validLocale = valid;
+ this.actualLocale = actual;
+ }
+
+ /*
+ * The most specific locale containing any resource data, or null.
+ * @see com.ibm.icu.util.ULocale
+ */
+ private ULocale validLocale;
+
+ /*
+ * The locale containing data used to construct this object, or
+ * null.
+ * @see com.ibm.icu.util.ULocale
+ */
+ private ULocale actualLocale;
+
+ // -------- END ULocale boilerplate --------
+}
diff --git a/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java b/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java
new file mode 100644
index 00000000000..3a31ac5ff64
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/CollatorReader.java
@@ -0,0 +1,668 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import com.ibm.icu.impl.ICUBinary;
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.IntTrie;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.CollationParsedRuleBuilder.InverseUCA;
+import com.ibm.icu.text.RuleBasedCollator.LeadByteConstants;
+import com.ibm.icu.text.RuleBasedCollator.UCAConstants;
+import com.ibm.icu.util.VersionInfo;
+
+/**
+ *
+ * Internal reader class for ICU data file uca.icu containing Unicode Collation Algorithm data.
+ *
+ *
+ * This class simply reads uca.icu, authenticates that it is a valid ICU data file and split its contents up into blocks
+ * of data for use in com.ibm.icu.text.Collator .
+ *
+ *
+ * uca.icu which is in big-endian format is jared together with this package.
+ *
+ *
+ * @author Syn Wee Quek
+ * @since release 2.2, April 18 2002
+ */
+
+final class CollatorReader {
+ static char[] read(RuleBasedCollator rbc, UCAConstants ucac, LeadByteConstants leadByteConstants)
+ throws IOException {
+ InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu");
+ BufferedInputStream b = new BufferedInputStream(i, 90000);
+ CollatorReader reader = new CollatorReader(b);
+ char[] result = reader.readImp(rbc, ucac, leadByteConstants);
+ b.close();
+ return result;
+ }
+
+ public static InputStream makeByteBufferInputStream(final ByteBuffer buf) {
+ return new InputStream() {
+ public int read() throws IOException {
+ if (!buf.hasRemaining()) {
+ return -1;
+ }
+ return buf.get() & 0xff;
+ }
+
+ public int read(byte[] bytes, int off, int len) throws IOException {
+ len = Math.min(len, buf.remaining());
+ buf.get(bytes, off, len);
+ return len;
+ }
+ };
+ }
+
+ static void initRBC(RuleBasedCollator rbc, ByteBuffer data) throws IOException {
+ final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
+ int dataLength = data.remaining();
+ // TODO: Change the rest of this class to use the ByteBuffer directly, rather than
+ // a DataInputStream, except for passing an InputStream to ICUBinary.readHeader().
+ // Consider changing ICUBinary to also work with a ByteBuffer.
+ CollatorReader reader = new CollatorReader(makeByteBufferInputStream(data), false);
+ if (dataLength > MIN_BINARY_DATA_SIZE_) {
+ reader.readImp(rbc, null, null);
+ } else {
+ reader.readHeader(rbc);
+ reader.readOptions(rbc);
+ // duplicating UCA_'s data
+ rbc.setWithUCATables();
+ }
+ }
+
+ static InverseUCA getInverseUCA() throws IOException {
+ InverseUCA result = null;
+ InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/coll/invuca.icu");
+ // try {
+ // String invdat = "/com/ibm/icu/impl/data/invuca.icu";
+ // InputStream i = CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
+ BufferedInputStream b = new BufferedInputStream(i, 110000);
+ result = CollatorReader.readInverseUCA(b);
+ b.close();
+ i.close();
+ return result;
+ // } catch (Exception e) {
+ // throw new RuntimeException(e.getMessage());
+ // }
+ }
+
+ // protected constructor ---------------------------------------------
+
+ /**
+ *
+ * Protected constructor.
+ *
+ *
+ * @param inputStream
+ * ICU collator file input stream
+ * @exception IOException
+ * throw if data file fails authentication
+ */
+ private CollatorReader(InputStream inputStream) throws IOException {
+ this(inputStream, true);
+ /*
+ * byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_); // weiv: check
+ * that we have the correct Unicode version in // binary files VersionInfo UCDVersion =
+ * UCharacter.getUnicodeVersion(); if(UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] !=
+ * UCDVersion.getMinor()) { throw new IOException(WRONG_UNICODE_VERSION_ERROR_); } m_dataInputStream_ = new
+ * DataInputStream(inputStream);
+ */
+ }
+
+ /**
+ *
+ * Protected constructor.
+ *
+ *
+ * @param inputStream
+ * ICU uprops.icu file input stream
+ * @param readICUHeader
+ * flag to indicate if the ICU header has to be read
+ * @exception IOException
+ * throw if data file fails authentication
+ */
+ private CollatorReader(InputStream inputStream, boolean readICUHeader) throws IOException {
+ if (readICUHeader) {
+ byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, UCA_AUTHENTICATE_);
+ // weiv: check that we have the correct Unicode version in
+ // binary files
+ VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
+ if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) {
+ throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
+ }
+ }
+ m_dataInputStream_ = new DataInputStream(inputStream);
+ }
+
+ // protected methods -------------------------------------------------
+
+ /**
+ * Read and break up the header stream of data passed in as arguments into meaningful Collator data.
+ *
+ * @param rbc
+ * RuleBasedCollator to populate with header information
+ * @exception IOException
+ * thrown when there's a data error.
+ */
+ private void readHeader(RuleBasedCollator rbc) throws IOException {
+ m_size_ = m_dataInputStream_.readInt();
+ // all the offsets are in bytes
+ // to get the address add to the header address and cast properly
+ // Default options int options
+ m_headerSize_ = m_dataInputStream_.readInt(); // start of options
+ int readcount = 8; // for size and headersize
+ // structure which holds values for indirect positioning and implicit
+ // ranges
+ m_UCAConstOffset_ = m_dataInputStream_.readInt();
+ readcount += 4;
+ // this one is needed only for UCA, to copy the appropriate
+ // contractions
+ /*int contractionUCACombos =*/ m_dataInputStream_.readInt();
+ readcount += 4;
+ // reserved for future use
+ m_dataInputStream_.skipBytes(4);
+ readcount += 4;
+ // const uint8_t *mappingPosition;
+ int mapping = m_dataInputStream_.readInt();
+ readcount += 4;
+ // uint32_t *expansion;
+ rbc.m_expansionOffset_ = m_dataInputStream_.readInt();
+ readcount += 4;
+ // UChar *contractionIndex;
+ rbc.m_contractionOffset_ = m_dataInputStream_.readInt();
+ readcount += 4;
+ // uint32_t *contractionCEs;
+ int contractionCE = m_dataInputStream_.readInt();
+ readcount += 4;
+ // needed for various closures int contractionSize
+ int contractionSize = m_dataInputStream_.readInt();
+ readcount += 4;
+ // array of last collation element in expansion
+ int expansionEndCE = m_dataInputStream_.readInt();
+ readcount += 4;
+ // array of maximum expansion size corresponding to the expansion
+ // collation elements with last element in expansionEndCE
+ int expansionEndCEMaxSize = m_dataInputStream_.readInt();
+ readcount += 4;
+ // size of endExpansionCE int expansionEndCESize
+ /* int endExpansionCECount = */m_dataInputStream_.readInt();
+ readcount += 4;
+ // hash table of unsafe code points
+ int unsafe = m_dataInputStream_.readInt();
+ readcount += 4;
+ // hash table of final code points in contractions.
+ int contractionEnd = m_dataInputStream_.readInt();
+ readcount += 4;
+ // int CEcount = m_dataInputStream_.readInt();
+ int contractionUCACombosSize = m_dataInputStream_.readInt();
+ readcount += 4;
+ // is jamoSpecial
+ rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
+ readcount++;
+ // isBigEndian and charSetFamily
+ m_dataInputStream_.skipBytes(2);
+ readcount += 2;
+ int contractionUCACombosWidth = m_dataInputStream_.readByte();
+ readcount += 1;
+ rbc.m_version_ = readVersion(m_dataInputStream_);
+ readcount += 4;
+ rbc.m_UCA_version_ = readVersion(m_dataInputStream_);
+ readcount += 4;
+ rbc.m_UCD_version_ = readVersion(m_dataInputStream_);
+ readcount += 4;
+ /*VersionInfo formatVersion =*/ readVersion(m_dataInputStream_);
+ readcount += 4;
+ rbc.m_scriptToLeadBytes = m_dataInputStream_.readInt();
+ readcount += 4;
+ rbc.m_leadByteToScripts = m_dataInputStream_.readInt();
+ readcount += 4;
+
+ // byte charsetName[] = new byte[32]; // for charset CEs
+ m_dataInputStream_.skipBytes(32);
+ readcount += 32;
+
+ m_dataInputStream_.skipBytes(44); // for future use
+ readcount += 44;
+ if (m_headerSize_ < readcount) {
+ // /CLOVER:OFF
+ throw new IOException("Internal Error: Header size error");
+ // /CLOVER:ON
+ }
+ m_dataInputStream_.skipBytes(m_headerSize_ - readcount);
+
+ if (rbc.m_contractionOffset_ == 0) { // contraction can be null
+ rbc.m_contractionOffset_ = mapping;
+ contractionCE = mapping;
+ }
+ m_optionSize_ = rbc.m_expansionOffset_ - m_headerSize_;
+ m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;
+ m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;
+ m_contractionCESize_ = mapping - contractionCE;
+ // m_trieSize_ = expansionEndCE - mapping;
+ m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
+ m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
+ m_unsafeSize_ = contractionEnd - unsafe;
+ // m_UCAValuesSize_ = m_size_ - UCAConst; // UCA value, will be handled later
+ m_UCAcontractionSize_ = contractionUCACombosSize * contractionUCACombosWidth * 2;
+
+ // treat it as normal collator first
+ // for normal collator there is no UCA contraction
+ // contractions (UChar[contractionSize] + CE[contractionSize])
+ m_contractionSize_ = contractionSize * 2 + contractionSize * 4;
+
+ rbc.m_contractionOffset_ >>= 1; // casting to ints
+ rbc.m_expansionOffset_ >>= 2; // casting to chars
+ }
+
+ /**
+ * Read and break up the collation options passed in the stream of data and update the argument Collator with the
+ * results
+ *
+ * @param rbc
+ * RuleBasedCollator to populate
+ * @exception IOException
+ * thrown when there's a data error.
+ */
+ private void readOptions(RuleBasedCollator rbc) throws IOException {
+ int readcount = 0;
+ rbc.m_defaultVariableTopValue_ = m_dataInputStream_.readInt();
+ readcount += 4;
+ rbc.m_defaultIsFrenchCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
+ readcount += 4;
+ rbc.m_defaultIsAlternateHandlingShifted_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.SHIFTED_);
+ readcount += 4;
+ rbc.m_defaultCaseFirst_ = m_dataInputStream_.readInt();
+ readcount += 4;
+ // rbc.m_defaultIsCaseLevel_ = (m_dataInputStream_.readInt()
+ // == RuleBasedCollator.AttributeValue.ON_);
+ int defaultIsCaseLevel = m_dataInputStream_.readInt();
+ rbc.m_defaultIsCaseLevel_ = (defaultIsCaseLevel == RuleBasedCollator.AttributeValue.ON_);
+ readcount += 4;
+ int value = m_dataInputStream_.readInt();
+ readcount += 4;
+ if (value == RuleBasedCollator.AttributeValue.ON_) {
+ value = Collator.CANONICAL_DECOMPOSITION;
+ } else {
+ value = Collator.NO_DECOMPOSITION;
+ }
+ rbc.m_defaultDecomposition_ = value;
+ rbc.m_defaultStrength_ = m_dataInputStream_.readInt();
+ readcount += 4;
+ rbc.m_defaultIsHiragana4_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
+ readcount += 4;
+ rbc.m_defaultIsNumericCollation_ = (m_dataInputStream_.readInt() == RuleBasedCollator.AttributeValue.ON_);
+ readcount += 4;
+ m_dataInputStream_.skip(60); // reserved for future use
+ readcount += 60;
+ m_dataInputStream_.skipBytes(m_optionSize_ - readcount);
+ if (m_optionSize_ < readcount) {
+ // /CLOVER:OFF
+ throw new IOException("Internal Error: Option size error");
+ // /CLOVER:ON
+ }
+ }
+
+ /**
+ * Read and break up the stream of data passed in as arguments into meaningful Collator data.
+ *
+ * @param rbc
+ * RuleBasedCollator to populate
+ * @param UCAConst
+ * object to fill up with UCA constants if we are reading the UCA collator, if not use a null
+ * @param leadByteConstants
+ * @return UCAContractions array filled up with the UCA contractions if we are reading the UCA collator
+ * @exception IOException
+ * thrown when there's a data error.
+ */
+ private char[] readImp(RuleBasedCollator rbc, RuleBasedCollator.UCAConstants UCAConst,
+ RuleBasedCollator.LeadByteConstants leadByteConstants) throws IOException {
+ char ucaContractions[] = null; // return result
+
+ readHeader(rbc);
+ // header size has been checked by readHeader
+ int readcount = m_headerSize_;
+ // option size has been checked by readOptions
+ readOptions(rbc);
+ readcount += m_optionSize_;
+ m_expansionSize_ >>= 2;
+ rbc.m_expansion_ = new int[m_expansionSize_];
+ for (int i = 0; i < m_expansionSize_; i++) {
+ rbc.m_expansion_[i] = m_dataInputStream_.readInt();
+ }
+ readcount += (m_expansionSize_ << 2);
+ if (m_contractionIndexSize_ > 0) {
+ m_contractionIndexSize_ >>= 1;
+ rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
+ for (int i = 0; i < m_contractionIndexSize_; i++) {
+ rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
+ }
+ readcount += (m_contractionIndexSize_ << 1);
+ m_contractionCESize_ >>= 2;
+ rbc.m_contractionCE_ = new int[m_contractionCESize_];
+ for (int i = 0; i < m_contractionCESize_; i++) {
+ rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
+ }
+ readcount += (m_contractionCESize_ << 2);
+ }
+ rbc.m_trie_ = new IntTrie(m_dataInputStream_, RuleBasedCollator.DataManipulate.getInstance());
+ if (!rbc.m_trie_.isLatin1Linear()) {
+ throw new IOException("Data corrupted, " + "Collator Tries expected to have linear "
+ + "latin one data arrays");
+ }
+ readcount += rbc.m_trie_.getSerializedDataSize();
+ m_expansionEndCESize_ >>= 2;
+ rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
+ for (int i = 0; i < m_expansionEndCESize_; i++) {
+ rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
+ }
+ readcount += (m_expansionEndCESize_ << 2);
+ rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
+ for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i++) {
+ rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();
+ }
+ readcount += m_expansionEndCEMaxSizeSize_;
+ rbc.m_unsafe_ = new byte[m_unsafeSize_];
+ for (int i = 0; i < m_unsafeSize_; i++) {
+ rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
+ }
+ readcount += m_unsafeSize_;
+ if (UCAConst != null) {
+ // we are reading the UCA
+ // unfortunately the UCA offset in any collator data is not 0 and
+ // only refers to the UCA data
+ // m_contractionSize_ -= m_UCAValuesSize_;
+ m_contractionSize_ = m_UCAConstOffset_ - readcount;
+ } else {
+ m_contractionSize_ = m_size_ - readcount;
+ }
+ rbc.m_contractionEnd_ = new byte[m_contractionSize_];
+ for (int i = 0; i < m_contractionSize_; i++) {
+ rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
+ }
+ readcount += m_contractionSize_;
+ if (UCAConst != null) {
+ UCAConst.FIRST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
+ int readUCAConstcount = 4;
+ UCAConst.FIRST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_TERTIARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_TERTIARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_SECONDARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_SECONDARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_PRIMARY_IGNORABLE_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_PRIMARY_IGNORABLE_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_VARIABLE_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_VARIABLE_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_VARIABLE_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_VARIABLE_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_NON_VARIABLE_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_NON_VARIABLE_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.RESET_TOP_VALUE_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.RESET_TOP_VALUE_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_IMPLICIT_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_IMPLICIT_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_IMPLICIT_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_IMPLICIT_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_TRAILING_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.FIRST_TRAILING_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_TRAILING_[0] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.LAST_TRAILING_[1] = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.PRIMARY_TOP_MIN_ = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.PRIMARY_IMPLICIT_MIN_ = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.PRIMARY_IMPLICIT_MAX_ = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.PRIMARY_TRAILING_MIN_ = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.PRIMARY_TRAILING_MAX_ = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.PRIMARY_SPECIAL_MIN_ = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+ UCAConst.PRIMARY_SPECIAL_MAX_ = m_dataInputStream_.readInt();
+ readUCAConstcount += 4;
+
+ readcount += readUCAConstcount;
+
+ int resultsize = (rbc.m_scriptToLeadBytes - readcount) / 2;
+ ucaContractions = new char[resultsize];
+ for (int i = 0; i < resultsize; i++) {
+ ucaContractions[i] = m_dataInputStream_.readChar();
+ }
+ readcount += m_UCAcontractionSize_;
+ }
+
+ if (leadByteConstants != null) {
+ readcount += m_dataInputStream_.skip(rbc.m_scriptToLeadBytes - readcount);
+ leadByteConstants.read(m_dataInputStream_);
+ readcount += leadByteConstants.getSerializedDataSize();
+ }
+
+ if (readcount != m_size_) {
+ // /CLOVER:OFF
+ throw new IOException("Internal Error: Data file size error");
+ // /CLOVER:ON
+ }
+ return ucaContractions;
+ }
+
+ /**
+ * Reads in the inverse uca data
+ *
+ * @param input
+ * input stream with the inverse uca data
+ * @return an object containing the inverse uca data
+ * @exception IOException
+ * thrown when error occurs while reading the inverse uca
+ */
+ private static CollationParsedRuleBuilder.InverseUCA readInverseUCA(InputStream inputStream) throws IOException {
+ byte[] UnicodeVersion = ICUBinary.readHeader(inputStream, INVERSE_UCA_DATA_FORMAT_ID_,
+ INVERSE_UCA_AUTHENTICATE_);
+
+ // weiv: check that we have the correct Unicode version in
+ // binary files
+ VersionInfo UCDVersion = UCharacter.getUnicodeVersion();
+ if (UnicodeVersion[0] != UCDVersion.getMajor() || UnicodeVersion[1] != UCDVersion.getMinor()) {
+ throw new IOException(WRONG_UNICODE_VERSION_ERROR_);
+ }
+
+ CollationParsedRuleBuilder.InverseUCA result = new CollationParsedRuleBuilder.InverseUCA();
+ DataInputStream input = new DataInputStream(inputStream);
+ input.readInt(); // bytesize
+ int tablesize = input.readInt(); // in int size
+ int contsize = input.readInt(); // in char size
+ input.readInt(); // table in bytes
+ input.readInt(); // conts in bytes
+ result.m_UCA_version_ = readVersion(input);
+ input.skipBytes(8); // skip padding
+
+ int size = tablesize * 3; // one column for each strength
+ result.m_table_ = new int[size];
+ result.m_continuations_ = new char[contsize];
+
+ for (int i = 0; i < size; i++) {
+ result.m_table_[i] = input.readInt();
+ }
+ for (int i = 0; i < contsize; i++) {
+ result.m_continuations_[i] = input.readChar();
+ }
+ input.close();
+ return result;
+ }
+
+ /**
+ * Reads four bytes from the input and returns a VersionInfo object. Use it to read different collator versions.
+ *
+ * @param input
+ * already instantiated DataInputStream, positioned at the start of four version bytes
+ * @return a ready VersionInfo object
+ * @throws IOException
+ * thrown when error occurs while reading version bytes
+ */
+
+ protected static VersionInfo readVersion(DataInputStream input) throws IOException {
+ byte[] version = new byte[4];
+ version[0] = input.readByte();
+ version[1] = input.readByte();
+ version[2] = input.readByte();
+ version[3] = input.readByte();
+
+ VersionInfo result = VersionInfo.getInstance((int) version[0], (int) version[1], (int) version[2],
+ (int) version[3]);
+
+ return result;
+ }
+
+ // private inner class -----------------------------------------------
+
+ // private variables -------------------------------------------------
+
+ /**
+ * Authenticate uca data format version
+ */
+ private static final ICUBinary.Authenticate UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
+ public boolean isDataVersionAcceptable(byte version[]) {
+ return version[0] == DATA_FORMAT_VERSION_[0] && version[1] >= DATA_FORMAT_VERSION_[1];
+ // Too harsh
+ // && version[1] == DATA_FORMAT_VERSION_[1]
+ // && version[2] == DATA_FORMAT_VERSION_[2]
+ // && version[3] == DATA_FORMAT_VERSION_[3];
+ }
+ };
+
+ /**
+ * Authenticate uca data format version
+ */
+ private static final ICUBinary.Authenticate INVERSE_UCA_AUTHENTICATE_ = new ICUBinary.Authenticate() {
+ public boolean isDataVersionAcceptable(byte version[]) {
+ return version[0] == INVERSE_UCA_DATA_FORMAT_VERSION_[0]
+ && version[1] >= INVERSE_UCA_DATA_FORMAT_VERSION_[1];
+ }
+ };
+
+ /**
+ * Data input stream for uca.icu
+ */
+ private DataInputStream m_dataInputStream_;
+
+ /**
+ * File format version and id that this class understands. No guarantees are made if a older version is used
+ */
+ private static final byte DATA_FORMAT_VERSION_[] = { (byte) 0x3, (byte) 0x0, (byte) 0x0, (byte) 0x0 };
+ private static final byte DATA_FORMAT_ID_[] = { (byte) 0x55, (byte) 0x43, (byte) 0x6f, (byte) 0x6c };
+ /**
+ * Inverse UCA file format version and id that this class understands. No guarantees are made if a older version is
+ * used
+ */
+ private static final byte INVERSE_UCA_DATA_FORMAT_VERSION_[] = { (byte) 0x2, (byte) 0x1, (byte) 0x0, (byte) 0x0 };
+ private static final byte INVERSE_UCA_DATA_FORMAT_ID_[] = { (byte) 0x49, (byte) 0x6e, (byte) 0x76, (byte) 0x43 };
+
+ /**
+ * Wrong unicode version error string
+ */
+ private static final String WRONG_UNICODE_VERSION_ERROR_ = "Unicode version in binary image is not compatible with the current Unicode version";
+
+ /**
+ * Size of expansion table in bytes
+ */
+ private int m_expansionSize_;
+ /**
+ * Size of contraction index table in bytes
+ */
+ private int m_contractionIndexSize_;
+ /**
+ * Size of contraction table in bytes
+ */
+ private int m_contractionCESize_;
+ /*
+ * Size of the Trie in bytes
+ */
+ // private int m_trieSize_;
+ /**
+ * Size of the table that contains information about collation elements that end with an expansion
+ */
+ private int m_expansionEndCESize_;
+ /**
+ * Size of the table that contains information about the maximum size of collation elements that end with a
+ * particular expansion CE corresponding to the ones in expansionEndCE
+ */
+ private int m_expansionEndCEMaxSizeSize_;
+ /**
+ * Size of the option table that contains information about the collation options
+ */
+ private int m_optionSize_;
+ /**
+ * Size of the whole data file minusing the ICU header
+ */
+ private int m_size_;
+ /**
+ * Size of the collation data header
+ */
+ private int m_headerSize_;
+ /**
+ * Size of the table that contains information about the "Unsafe" codepoints
+ */
+ private int m_unsafeSize_;
+ /**
+ * Size in bytes of the table that contains information about codepoints that ends with a contraction
+ */
+ private int m_contractionSize_;
+ /**
+ * Size of the table that contains UCA contraction information in bytes
+ */
+ private int m_UCAcontractionSize_;
+ /**
+ * Offset of the UCA Const
+ */
+ private int m_UCAConstOffset_;
+
+ // private methods ---------------------------------------------------
+
+}
diff --git a/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java b/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java
new file mode 100644
index 00000000000..ab1a7f4a590
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/CollatorServiceShim.java
@@ -0,0 +1,145 @@
+/**
+*******************************************************************************
+* Copyright (C) 2003-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+
+package com.ibm.icu.text;
+
+import java.util.Locale;
+import java.util.MissingResourceException;
+import java.util.Set;
+
+import com.ibm.icu.impl.ICULocaleService;
+import com.ibm.icu.impl.ICULocaleService.LocaleKeyFactory;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.ICUService;
+import com.ibm.icu.impl.ICUService.Factory;
+import com.ibm.icu.text.Collator.CollatorFactory;
+import com.ibm.icu.util.ULocale;
+
+final class CollatorServiceShim extends Collator.ServiceShim {
+
+ Collator getInstance(ULocale locale) {
+ // use service cache, it's faster than instantiation
+// if (service.isDefault()) {
+// return new RuleBasedCollator(locale);
+// }
+ try {
+ ULocale[] actualLoc = new ULocale[1];
+ Collator coll = (Collator)service.get(locale, actualLoc);
+ if (coll == null) {
+ ///CLOVER:OFF
+ //Can't really change coll after it's been initialized
+ throw new MissingResourceException("Could not locate Collator data", "", "");
+ ///CLOVER:ON
+ }
+ coll = (Collator) coll.clone();
+ coll.setLocale(actualLoc[0], actualLoc[0]); // services make no distinction between actual & valid
+ return coll;
+ }
+ catch (CloneNotSupportedException e) {
+ ///CLOVER:OFF
+ throw new IllegalStateException(e.getMessage());
+ ///CLOVER:ON
+ }
+ }
+
+ Object registerInstance(Collator collator, ULocale locale) {
+ return service.registerObject(collator, locale);
+ }
+
+ Object registerFactory(CollatorFactory f) {
+ class CFactory extends LocaleKeyFactory {
+ CollatorFactory delegate;
+
+ CFactory(CollatorFactory fctry) {
+ super(fctry.visible());
+ this.delegate = fctry;
+ }
+
+ public Object handleCreate(ULocale loc, int kind, ICUService srvc) {
+ Object coll = delegate.createCollator(loc);
+ return coll;
+ }
+
+ public String getDisplayName(String id, ULocale displayLocale) {
+ ULocale objectLocale = new ULocale(id);
+ return delegate.getDisplayName(objectLocale, displayLocale);
+ }
+
+ public Set getSupportedIDs() {
+ return delegate.getSupportedLocaleIDs();
+ }
+ }
+
+ return service.registerFactory(new CFactory(f));
+ }
+
+ boolean unregister(Object registryKey) {
+ return service.unregisterFactory((Factory)registryKey);
+ }
+
+ Locale[] getAvailableLocales() {
+ // TODO rewrite this to just wrap getAvailableULocales later
+ Locale[] result;
+ if (service.isDefault()) {
+ result = ICUResourceBundle.getAvailableLocales(ICUResourceBundle.ICU_COLLATION_BASE_NAME,
+ ICUResourceBundle.ICU_DATA_CLASS_LOADER);
+ } else {
+ result = service.getAvailableLocales();
+ }
+ return result;
+ }
+
+ ULocale[] getAvailableULocales() {
+ ULocale[] result;
+ if (service.isDefault()) {
+ result = ICUResourceBundle.getAvailableULocales(ICUResourceBundle.ICU_COLLATION_BASE_NAME,
+ ICUResourceBundle.ICU_DATA_CLASS_LOADER);
+ } else {
+ result = service.getAvailableULocales();
+ }
+ return result;
+ }
+
+ String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
+ String id = objectLocale.getName();
+ return service.getDisplayName(id, displayLocale);
+ }
+
+ private static class CService extends ICULocaleService {
+ CService() {
+ super("Collator");
+
+ class CollatorFactory extends ICUResourceBundleFactory {
+ CollatorFactory() {
+ super(ICUResourceBundle.ICU_COLLATION_BASE_NAME);
+ }
+
+ protected Object handleCreate(ULocale uloc, int kind, ICUService srvc) {
+ return new RuleBasedCollator(uloc);
+ }
+ }
+
+ this.registerFactory(new CollatorFactory());
+ markDefault();
+ }
+ ///CLOVER:OFF
+ // The following method can not be reached by testing
+ protected Object handleDefault(Key key, String[] actualIDReturn) {
+ if (actualIDReturn != null) {
+ actualIDReturn[0] = "root";
+ }
+ try {
+ return new RuleBasedCollator(ULocale.ROOT);
+ }
+ catch (MissingResourceException e) {
+ return null;
+ }
+ }
+ ///CLOVER:ON
+ }
+ private static ICULocaleService service = new CService();
+}
diff --git a/main/classes/collate/src/com/ibm/icu/text/RawCollationKey.java b/main/classes/collate/src/com/ibm/icu/text/RawCollationKey.java
new file mode 100644
index 00000000000..68e7863e387
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/RawCollationKey.java
@@ -0,0 +1,102 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.text;
+
+import com.ibm.icu.util.ByteArrayWrapper;
+
+/**
+ *
+ * Simple class wrapper to store the internal byte representation of a
+ * CollationKey. Unlike the CollationKey, this class do not contain information
+ * on the source string the sort order represents. RawCollationKey is mutable
+ * and users can reuse its objects with the method in
+ * RuleBasedCollator.getRawCollationKey(..).
+ *
+ *
+ * Please refer to the documentation on CollationKey for a detail description
+ * on the internal byte representation. Note the internal byte representation
+ * is always null-terminated.
+ *
+ *
+ * Example of use:
+ * String str[] = {.....};
+ * RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance();
+ * RawCollationKey key = new RawCollationKey(128);
+ * for (int i = 0; i < str.length; i ++) {
+ * collator.getRawCollationKey(str[i], key);
+ * // do something with key.bytes
+ * }
+ *
+ * Note: Comparison between RawCollationKeys created by
+ * different Collators might return incorrect results.
+ * See class documentation for Collator.
+ * @stable ICU 2.8
+ * @see RuleBasedCollator
+ * @see CollationKey
+ */
+public final class RawCollationKey extends ByteArrayWrapper
+{
+ // public constructors --------------------------------------------------
+
+ /**
+ * Default constructor, internal byte array is null and its size set to 0.
+ * @stable ICU 2.8
+ */
+ public RawCollationKey()
+ {
+ }
+
+ /**
+ * RawCollationKey created with an empty internal byte array of length
+ * capacity. Size of the internal byte array will be set to 0.
+ * @param capacity length of internal byte array
+ * @stable ICU 2.8
+ */
+ public RawCollationKey(int capacity)
+ {
+ bytes = new byte[capacity];
+ }
+
+ /**
+ * RawCollationKey created, adopting bytes as the internal byte array.
+ * Size of the internal byte array will be set to 0.
+ * @param bytes byte array to be adopted by RawCollationKey
+ * @stable ICU 2.8
+ */
+ public RawCollationKey(byte[] bytes)
+ {
+ this.bytes = bytes;
+ }
+
+ /**
+ * Construct a RawCollationKey from a byte array and size.
+ * @param bytesToAdopt the byte array to adopt
+ * @param size the length of valid data in the byte array
+ * @throws IndexOutOfBoundsException if bytesToAdopt == null and size != 0, or
+ * size < 0, or size > bytesToAdopt.length.
+ * @stable ICU 2.8
+ */
+ public RawCollationKey(byte[] bytesToAdopt, int size)
+ {
+ super(bytesToAdopt, size);
+ }
+
+ /**
+ * Compare this RawCollationKey to another, which must not be null. This overrides
+ * the inherited implementation to ensure the returned values are -1, 0, or 1.
+ * @param rhs the RawCollationKey to compare to.
+ * @return -1, 0, or 1 as this compares less than, equal to, or
+ * greater than rhs.
+ * @throws ClassCastException if the other object is not a RawCollationKey.
+ * @stable ICU 4.4
+ */
+ public int compareTo(RawCollationKey rhs) {
+ int result = super.compareTo(rhs);
+ return result < 0 ? -1 : result == 0 ? 0 : 1;
+ }
+}
diff --git a/main/classes/collate/src/com/ibm/icu/text/RbnfScannerProviderImpl.java b/main/classes/collate/src/com/ibm/icu/text/RbnfScannerProviderImpl.java
new file mode 100644
index 00000000000..55e19a1c5c7
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/RbnfScannerProviderImpl.java
@@ -0,0 +1,273 @@
+/*
+*******************************************************************************
+* Copyright (C) 2009-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+
+package com.ibm.icu.text;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import com.ibm.icu.util.ULocale;
+
+/**
+ * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat
+ * implementation behind setLenientParseMode, which is based on Collator.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider {
+ private Map cache;
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public RbnfScannerProviderImpl() {
+ cache = new HashMap();
+ }
+
+ /**
+ * Returns a collation-based scanner.
+ *
+ * Only primary differences are treated as significant. This means that case
+ * differences, accent differences, alternate spellings of the same letter
+ * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
+ * matching the text. In many cases, numerals will be accepted in place of words
+ * or phrases as well.
+ *
+ * For example, all of the following will correctly parse as 255 in English in
+ * lenient-parse mode:
+ * "two hundred fifty-five"
+ * "two hundred fifty five"
+ * "TWO HUNDRED FIFTY-FIVE"
+ * "twohundredfiftyfive"
+ * "2 hundred fifty-5"
+ *
+ * The Collator used is determined by the locale that was
+ * passed to this object on construction. The description passed to this object
+ * on construction may supply additional collation rules that are appended to the
+ * end of the default collator for the locale, enabling additional equivalences
+ * (such as adding more ignorable characters or permitting spelled-out version of
+ * symbols; see the demo program for examples).
+ *
+ * It's important to emphasize that even strict parsing is relatively lenient: it
+ * will accept some text that it won't produce as output. In English, for example,
+ * it will correctly parse "two hundred zero" and "fifteen hundred".
+ *
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ public RbnfLenientScanner get(ULocale locale, String extras) {
+ RbnfLenientScanner result = null;
+ String key = locale.toString() + "/" + extras;
+ synchronized(cache) {
+ result = cache.get(key);
+ if (result != null) {
+ return result;
+ }
+ }
+ result = createScanner(locale, extras);
+ synchronized(cache) {
+ cache.put(key, result);
+ }
+ return result;
+ }
+
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ protected RbnfLenientScanner createScanner(ULocale locale, String extras) {
+ RuleBasedCollator collator = null;
+ try {
+ // create a default collator based on the locale,
+ // then pull out that collator's rules, append any additional
+ // rules specified in the description, and create a _new_
+ // collator based on the combination of those rules
+ collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale());
+ if (extras != null) {
+ String rules = collator.getRules() + extras;
+ collator = new RuleBasedCollator(rules);
+ }
+ collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
+ }
+ catch (Exception e) {
+ // If we get here, it means we have a malformed set of
+ // collation rules, which hopefully won't happen
+ ///CLOVER:OFF
+ if (true){ // debug hook
+ e.printStackTrace(); System.out.println("++++");
+ }
+ collator = null;
+ ///CLOVER:ON
+ }
+
+ return new RbnfLenientScannerImpl(collator);
+ }
+
+ private static class RbnfLenientScannerImpl implements RbnfLenientScanner {
+ private final RuleBasedCollator collator;
+
+ private RbnfLenientScannerImpl(RuleBasedCollator rbc) {
+ this.collator = rbc;
+ }
+
+ public boolean allIgnorable(String s) {
+ CollationElementIterator iter = collator.getCollationElementIterator(s);
+
+ int o = iter.next();
+ while (o != CollationElementIterator.NULLORDER
+ && CollationElementIterator.primaryOrder(o) == 0) {
+ o = iter.next();
+ }
+ return o == CollationElementIterator.NULLORDER;
+ }
+
+ public int[] findText(String str, String key, int startingAt) {
+ int p = startingAt;
+ int keyLen = 0;
+
+ // basically just isolate smaller and smaller substrings of
+ // the target string (each running to the end of the string,
+ // and with the first one running from startingAt to the end)
+ // and then use prefixLength() to see if the search key is at
+ // the beginning of each substring. This is excruciatingly
+ // slow, but it will locate the key and tell use how long the
+ // matching text was.
+ while (p < str.length() && keyLen == 0) {
+ keyLen = prefixLength(str.substring(p), key);
+ if (keyLen != 0) {
+ return new int[] { p, keyLen };
+ }
+ ++p;
+ }
+ // if we make it to here, we didn't find it. Return -1 for the
+ // location. The length should be ignored, but set it to 0,
+ // which should be "safe"
+ return new int[] { -1, 0 };
+ }
+
+ ///CLOVER:OFF
+ // The following method contains the same signature as findText
+ // and has never been used by anything once.
+ @SuppressWarnings("unused")
+ public int[] findText2(String str, String key, int startingAt) {
+
+ CollationElementIterator strIter = collator.getCollationElementIterator(str);
+ CollationElementIterator keyIter = collator.getCollationElementIterator(key);
+
+ int keyStart = -1;
+
+ strIter.setOffset(startingAt);
+
+ int oStr = strIter.next();
+ int oKey = keyIter.next();
+ while (oKey != CollationElementIterator.NULLORDER) {
+ while (oStr != CollationElementIterator.NULLORDER &&
+ CollationElementIterator.primaryOrder(oStr) == 0)
+ oStr = strIter.next();
+
+ while (oKey != CollationElementIterator.NULLORDER &&
+ CollationElementIterator.primaryOrder(oKey) == 0)
+ oKey = keyIter.next();
+
+ if (oStr == CollationElementIterator.NULLORDER) {
+ return new int[] { -1, 0 };
+ }
+
+ if (oKey == CollationElementIterator.NULLORDER) {
+ break;
+ }
+
+ if (CollationElementIterator.primaryOrder(oStr) ==
+ CollationElementIterator.primaryOrder(oKey)) {
+ keyStart = strIter.getOffset();
+ oStr = strIter.next();
+ oKey = keyIter.next();
+ } else {
+ if (keyStart != -1) {
+ keyStart = -1;
+ keyIter.reset();
+ } else {
+ oStr = strIter.next();
+ }
+ }
+ }
+
+ if (oKey == CollationElementIterator.NULLORDER) {
+ return new int[] { keyStart, strIter.getOffset() - keyStart };
+ }
+
+ return new int[] { -1, 0 };
+ }
+ ///CLOVER:ON
+
+ public int prefixLength(String str, String prefix) {
+ // Create two collation element iterators, one over the target string
+ // and another over the prefix.
+ //
+ // Previous code was matching "fifty-" against " fifty" and leaving
+ // the number " fifty-7" to parse as 43 (50 - 7).
+ // Also it seems that if we consume the entire prefix, that's ok even
+ // if we've consumed the entire string, so I switched the logic to
+ // reflect this.
+
+ CollationElementIterator strIter = collator.getCollationElementIterator(str);
+ CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix);
+
+ // match collation elements between the strings
+ int oStr = strIter.next();
+ int oPrefix = prefixIter.next();
+
+ while (oPrefix != CollationElementIterator.NULLORDER) {
+ // skip over ignorable characters in the target string
+ while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr !=
+ CollationElementIterator.NULLORDER) {
+ oStr = strIter.next();
+ }
+
+ // skip over ignorable characters in the prefix
+ while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix !=
+ CollationElementIterator.NULLORDER) {
+ oPrefix = prefixIter.next();
+ }
+
+ // if skipping over ignorables brought to the end of
+ // the prefix, we DID match: drop out of the loop
+ if (oPrefix == CollationElementIterator.NULLORDER) {
+ break;
+ }
+
+ // if skipping over ignorables brought us to the end
+ // of the target string, we didn't match and return 0
+ if (oStr == CollationElementIterator.NULLORDER) {
+ return 0;
+ }
+
+ // match collation elements from the two strings
+ // (considering only primary differences). If we
+ // get a mismatch, dump out and return 0
+ if (CollationElementIterator.primaryOrder(oStr) !=
+ CollationElementIterator.primaryOrder(oPrefix)) {
+ return 0;
+ }
+
+ // otherwise, advance to the next character in each string
+ // and loop (we drop out of the loop when we exhaust
+ // collation elements in the prefix)
+
+ oStr = strIter.next();
+ oPrefix = prefixIter.next();
+ }
+
+ int result = strIter.getOffset();
+ if (oStr != CollationElementIterator.NULLORDER) {
+ --result;
+ }
+ return result;
+ }
+ }
+}
\ No newline at end of file
diff --git a/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java b/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java
new file mode 100644
index 00000000000..727ee346245
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java
@@ -0,0 +1,4854 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 1996-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.text;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.text.CharacterIterator;
+import java.text.ParseException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.MissingResourceException;
+import java.util.Set;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import com.ibm.icu.impl.BOCU;
+import com.ibm.icu.impl.ICUDebug;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.ImplicitCEGenerator;
+import com.ibm.icu.impl.IntTrie;
+import com.ibm.icu.impl.StringUCharacterIterator;
+import com.ibm.icu.impl.Trie;
+import com.ibm.icu.impl.TrieIterator;
+import com.ibm.icu.impl.Utility;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.util.RangeValueIterator;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+import com.ibm.icu.util.VersionInfo;
+
+/**
+ *
+ * RuleBasedCollator is a concrete subclass of Collator. It allows customization of the Collator via user-specified rule
+ * sets. RuleBasedCollator is designed to be fully compliant to the Unicode Collation Algorithm (UCA) and conforms to ISO 14651.
+ *
+ *
+ *
+ * Users are strongly encouraged to read the users
+ * guide for more information about the collation service before using this class.
+ *
+ *
+ *
+ * Create a RuleBasedCollator from a locale by calling the getInstance(Locale) factory method in the base class
+ * Collator. Collator.getInstance(Locale) creates a RuleBasedCollator object based on the collation rules defined by the
+ * argument locale. If a customized collation ordering ar attributes is required, use the RuleBasedCollator(String)
+ * constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on UCA, while
+ * re-adjusting the attributes and orders of the characters in the specified rule accordingly.
+ *
+ *
+ *
+ * RuleBasedCollator provides correct collation orders for most locales supported in ICU. If specific data for a locale
+ * is not available, the orders eventually falls back to the UCA
+ * collation order .
+ *
+ *
+ *
+ * For information about the collation rule syntax and details about customization, please refer to the Collation customization section of the
+ * user's guide.
+ *
+ *
+ *
+ * Note that there are some differences between the Collation rule syntax used in Java and ICU4J:
+ *
+ *
+ * According to the JDK documentation:
+ *
+ * Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule is in force when a Thai vowel of the range
+ * \U0E40-\U0E44 precedes a Thai consonant of the range \U0E01-\U0E2E OR a Lao vowel of the range
+ * \U0EC0-\U0EC4 precedes a Lao consonant of the range \U0E81-\U0EAE then the vowel is placed after the
+ * consonant for collation purposes.
+ *
+ *
+ * If a rule is without the modifier '!', the Thai/Lao vowel-consonant swapping is not turned on.
+ *
+ *
+ *
+ * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao vowel-consonant swapping, since the UCA clearly
+ * states that it has to be supported to ensure a correct sorting order. If a '!' is encountered, it is ignored.
+ *
+ * As mentioned in the documentation of the base class Collator, compatibility decomposition mode is not supported.
+ *
+ *
+ * Examples
+ *
+ *
+ * Creating Customized RuleBasedCollators:
+ *
+ *
+ * String simple = "& a < b < c < d";
+ * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple);
+ *
+ * String norwegian = "& a , A < b , B < c , C < d , D < e , E "
+ * + "< f , F < g , G < h , H < i , I < j , "
+ * + "J < k , K < l , L < m , M < n , N < "
+ * + "o , O < p , P < q , Q < r , R < s , S < "
+ * + "t , T < u , U < v , V < w , W < x , X "
+ * + "< y , Y < z , Z < \u00E5 = a\u030A "
+ * + ", \u00C5 = A\u030A ; aa , AA < \u00E6 "
+ * + ", \u00C6 < \u00F8 , \u00D8";
+ * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian);
+ *
+ *
+ *
+ *
+ * Concatenating rules to combine Collator
s:
+ *
+ *
+ * // Create an en_US Collator object
+ * RuleBasedCollator en_USCollator = (RuleBasedCollator)
+ * Collator.getInstance(new Locale("en", "US", ""));
+ * // Create a da_DK Collator object
+ * RuleBasedCollator da_DKCollator = (RuleBasedCollator)
+ * Collator.getInstance(new Locale("da", "DK", ""));
+ * // Combine the two
+ * // First, get the collation rules from en_USCollator
+ * String en_USRules = en_USCollator.getRules();
+ * // Second, get the collation rules from da_DKCollator
+ * String da_DKRules = da_DKCollator.getRules();
+ * RuleBasedCollator newCollator =
+ * new RuleBasedCollator(en_USRules + da_DKRules);
+ * // newCollator has the combined rules
+ *
+ *
+ *
+ *
+ * Making changes to an existing RuleBasedCollator to create a new Collator
object, by appending changes to
+ * the existing rule:
+ *
+ *
+ * // Create a new Collator object with additional rules
+ * String addRules = "& C < ch, cH, Ch, CH";
+ * RuleBasedCollator myCollator =
+ * new RuleBasedCollator(en_USCollator.getRules() + addRules);
+ * // myCollator contains the new rules
+ *
+ *
+ *
+ *
+ * How to change the order of non-spacing accents:
+ *
+ *
+ * // old rule with main accents
+ * String oldRules = "= \u0301 ; \u0300 ; \u0302 ; \u0308 "
+ * + "; \u0327 ; \u0303 ; \u0304 ; \u0305 "
+ * + "; \u0306 ; \u0307 ; \u0309 ; \u030A "
+ * + "; \u030B ; \u030C ; \u030D ; \u030E "
+ * + "; \u030F ; \u0310 ; \u0311 ; \u0312 "
+ * + "< a , A ; ae, AE ; \u00e6 , \u00c6 "
+ * + "< b , B < c, C < e, E & C < d , D";
+ * // change the order of accent characters
+ * String addOn = "& \u0300 ; \u0308 ; \u0302";
+ * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);
+ *
+ *
+ *
+ *
+ * Putting in a new primary ordering before the default setting, e.g. sort English characters before or after Japanese
+ * characters in the Japanese Collator
:
+ *
+ *
+ * // get en_US Collator rules
+ * RuleBasedCollator en_USCollator
+ * = (RuleBasedCollator)Collator.getInstance(Locale.US);
+ * // add a few Japanese characters to sort before English characters
+ * // suppose the last character before the first base letter 'a' in
+ * // the English collation rule is \u2212
+ * String jaString = "& \u2212 < \u3041, \u3042 < \u3043, "
+ * + "\u3044";
+ * RuleBasedCollator myJapaneseCollator
+ * = new RuleBasedCollator(en_USCollator.getRules() + jaString);
+ *
+ *
+ *
+ *
+ *
+ * This class is not subclassable
+ *
+ *
+ * @author Syn Wee Quek
+ * @stable ICU 2.8
+ */
+public final class RuleBasedCollator extends Collator {
+ // public constructors ---------------------------------------------------
+
+ /**
+ *
+ * Constructor that takes the argument rules for customization. The collator will be based on UCA, with the
+ * attributes and re-ordering of the characters specified in the argument rules.
+ *
+ *
+ * See the user guide's section on
+ * Collation Customization for details on the rule syntax.
+ *
+ *
+ * @param rules
+ * the collation rules to build the collation table from.
+ * @exception ParseException
+ * and IOException thrown. ParseException thrown when argument rules have an invalid syntax.
+ * IOException thrown when an error occured while reading internal data.
+ * @stable ICU 2.8
+ */
+ public RuleBasedCollator(String rules) throws Exception {
+ checkUCA();
+ if (rules == null) {
+ throw new IllegalArgumentException("Collation rules can not be null");
+ }
+ init(rules);
+ }
+
+ // public methods --------------------------------------------------------
+
+ /**
+ * Clones the RuleBasedCollator
+ *
+ * @return a new instance of this RuleBasedCollator object
+ * @stable ICU 2.8
+ */
+ public Object clone() throws CloneNotSupportedException {
+ return clone(isFrozen());
+ }
+
+ /**
+ * Clones the RuleBasedCollator
+ *
+ * @param frozen should the clone be frozen or not
+ * @return a new instance of this RuleBasedCollator object
+ */
+ private Object clone(boolean frozen) throws CloneNotSupportedException {
+ //TODO: once buffer and threading issue is resolved have frozen clone just return itself
+ RuleBasedCollator result = (RuleBasedCollator) super.clone();
+ if (latinOneCEs_ != null) {
+ result.m_reallocLatinOneCEs_ = true;
+ result.m_ContInfo_ = new ContractionInfo();
+ }
+
+ // since all collation data in the RuleBasedCollator do not change
+ // we can safely assign the result.fields to this collator
+ // except in cases where we can't
+ result.collationBuffer = null;
+ result.frozenLock = frozen ? new ReentrantLock() : null;
+ return result;
+ }
+
+ /**
+ * Return a CollationElementIterator for the given String.
+ *
+ * @see CollationElementIterator
+ * @stable ICU 2.8
+ */
+ public CollationElementIterator getCollationElementIterator(String source) {
+ return new CollationElementIterator(source, this);
+ }
+
+ /**
+ * Return a CollationElementIterator for the given CharacterIterator. The source iterator's integrity will be
+ * preserved since a new copy will be created for use.
+ *
+ * @see CollationElementIterator
+ * @stable ICU 2.8
+ */
+ public CollationElementIterator getCollationElementIterator(CharacterIterator source) {
+ CharacterIterator newsource = (CharacterIterator) source.clone();
+ return new CollationElementIterator(newsource, this);
+ }
+
+ /**
+ * Return a CollationElementIterator for the given UCharacterIterator. The source iterator's integrity will be
+ * preserved since a new copy will be created for use.
+ *
+ * @see CollationElementIterator
+ * @stable ICU 2.8
+ */
+ public CollationElementIterator getCollationElementIterator(UCharacterIterator source) {
+ return new CollationElementIterator(source, this);
+ }
+
+ // Freezable interface implementation -------------------------------------------------
+
+ /**
+ * Determines whether the object has been frozen or not.
+ * @draft ICU 4.8
+ */
+ public boolean isFrozen() {
+ return frozenLock != null;
+ }
+
+ /**
+ * Freezes the collator.
+ * @return the collator itself.
+ * @draft ICU 4.8
+ */
+ public Collator freeze() {
+ if (!isFrozen()) {
+ frozenLock = new ReentrantLock();
+ }
+ return this;
+ }
+
+ /**
+ * Provides for the clone operation. Any clone is initially unfrozen.
+ * @draft ICU 4.8
+ */
+ public RuleBasedCollator cloneAsThawed() {
+ RuleBasedCollator clone = null;
+ try {
+ clone = (RuleBasedCollator) clone(false);
+ } catch (CloneNotSupportedException e) {
+ // Clone is implemented
+ }
+ return clone;
+ }
+
+ // public setters --------------------------------------------------------
+
+ /**
+ * Sets the Hiragana Quaternary mode to be on or off. When the Hiragana Quaternary mode is turned on, the collator
+ * positions Hiragana characters before all non-ignorable characters in QUATERNARY strength. This is to produce a
+ * correct JIS collation order, distinguishing between Katakana and Hiragana characters.
+ *
+ * @param flag
+ * true if Hiragana Quaternary mode is to be on, false otherwise
+ * @see #setHiraganaQuaternaryDefault
+ * @see #isHiraganaQuaternary
+ * @stable ICU 2.8
+ */
+ public void setHiraganaQuaternary(boolean flag) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ m_isHiragana4_ = flag;
+ updateInternalState();
+ }
+
+ /**
+ * Sets the Hiragana Quaternary mode to the initial mode set during construction of the RuleBasedCollator. See
+ * setHiraganaQuaternary(boolean) for more details.
+ *
+ * @see #setHiraganaQuaternary(boolean)
+ * @see #isHiraganaQuaternary
+ * @stable ICU 2.8
+ */
+ public void setHiraganaQuaternaryDefault() {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ m_isHiragana4_ = m_defaultIsHiragana4_;
+ updateInternalState();
+ }
+
+ /**
+ * Sets whether uppercase characters sort before lowercase characters or vice versa, in strength TERTIARY. The
+ * default mode is false, and so lowercase characters sort before uppercase characters. If true, sort upper case
+ * characters first.
+ *
+ * @param upperfirst
+ * true to sort uppercase characters before lowercase characters, false to sort lowercase characters
+ * before uppercase characters
+ * @see #isLowerCaseFirst
+ * @see #isUpperCaseFirst
+ * @see #setLowerCaseFirst
+ * @see #setCaseFirstDefault
+ * @stable ICU 2.8
+ */
+ public void setUpperCaseFirst(boolean upperfirst) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ if (upperfirst) {
+ if (m_caseFirst_ != AttributeValue.UPPER_FIRST_) {
+ latinOneRegenTable_ = true;
+ }
+ m_caseFirst_ = AttributeValue.UPPER_FIRST_;
+ } else {
+ if (m_caseFirst_ != AttributeValue.OFF_) {
+ latinOneRegenTable_ = true;
+ }
+ m_caseFirst_ = AttributeValue.OFF_;
+ }
+ updateInternalState();
+ }
+
+ /**
+ * Sets the orders of lower cased characters to sort before upper cased characters, in strength TERTIARY. The
+ * default mode is false. If true is set, the RuleBasedCollator will sort lower cased characters before the upper
+ * cased ones. Otherwise, if false is set, the RuleBasedCollator will ignore case preferences.
+ *
+ * @param lowerfirst
+ * true for sorting lower cased characters before upper cased characters, false to ignore case
+ * preferences.
+ * @see #isLowerCaseFirst
+ * @see #isUpperCaseFirst
+ * @see #setUpperCaseFirst
+ * @see #setCaseFirstDefault
+ * @stable ICU 2.8
+ */
+ public void setLowerCaseFirst(boolean lowerfirst) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ if (lowerfirst) {
+ if (m_caseFirst_ != AttributeValue.LOWER_FIRST_) {
+ latinOneRegenTable_ = true;
+ }
+ m_caseFirst_ = AttributeValue.LOWER_FIRST_;
+ } else {
+ if (m_caseFirst_ != AttributeValue.OFF_) {
+ latinOneRegenTable_ = true;
+ }
+ m_caseFirst_ = AttributeValue.OFF_;
+ }
+ updateInternalState();
+ }
+
+ /**
+ * Sets the case first mode to the initial mode set during construction of the RuleBasedCollator. See
+ * setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more details.
+ *
+ * @see #isLowerCaseFirst
+ * @see #isUpperCaseFirst
+ * @see #setLowerCaseFirst(boolean)
+ * @see #setUpperCaseFirst(boolean)
+ * @stable ICU 2.8
+ */
+ public final void setCaseFirstDefault() {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ if (m_caseFirst_ != m_defaultCaseFirst_) {
+ latinOneRegenTable_ = true;
+ }
+ m_caseFirst_ = m_defaultCaseFirst_;
+ updateInternalState();
+ }
+
+ /**
+ * Sets the alternate handling mode to the initial mode set during construction of the RuleBasedCollator. See
+ * setAlternateHandling(boolean) for more details.
+ *
+ * @see #setAlternateHandlingShifted(boolean)
+ * @see #isAlternateHandlingShifted()
+ * @stable ICU 2.8
+ */
+ public void setAlternateHandlingDefault() {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;
+ updateInternalState();
+ }
+
+ /**
+ * Sets the case level mode to the initial mode set during construction of the RuleBasedCollator. See
+ * setCaseLevel(boolean) for more details.
+ *
+ * @see #setCaseLevel(boolean)
+ * @see #isCaseLevel
+ * @stable ICU 2.8
+ */
+ public void setCaseLevelDefault() {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ m_isCaseLevel_ = m_defaultIsCaseLevel_;
+ updateInternalState();
+ }
+
+ /**
+ * Sets the decomposition mode to the initial mode set during construction of the RuleBasedCollator. See
+ * setDecomposition(int) for more details.
+ *
+ * @see #getDecomposition
+ * @see #setDecomposition(int)
+ * @stable ICU 2.8
+ */
+ public void setDecompositionDefault() {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ setDecomposition(m_defaultDecomposition_);
+ updateInternalState();
+ }
+
+ /**
+ * Sets the French collation mode to the initial mode set during construction of the RuleBasedCollator. See
+ * setFrenchCollation(boolean) for more details.
+ *
+ * @see #isFrenchCollation
+ * @see #setFrenchCollation(boolean)
+ * @stable ICU 2.8
+ */
+ public void setFrenchCollationDefault() {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ if (m_isFrenchCollation_ != m_defaultIsFrenchCollation_) {
+ latinOneRegenTable_ = true;
+ }
+ m_isFrenchCollation_ = m_defaultIsFrenchCollation_;
+ updateInternalState();
+ }
+
+ /**
+ * Sets the collation strength to the initial mode set during the construction of the RuleBasedCollator. See
+ * setStrength(int) for more details.
+ *
+ * @see #setStrength(int)
+ * @see #getStrength
+ * @stable ICU 2.8
+ */
+ public void setStrengthDefault() {
+ setStrength(m_defaultStrength_);
+ updateInternalState();
+ }
+
+ /**
+ * Method to set numeric collation to its default value. When numeric collation is turned on, this Collator
+ * generates a collation key for the numeric value of substrings of digits. This is a way to get '100' to sort AFTER
+ * '2'
+ *
+ * @see #getNumericCollation
+ * @see #setNumericCollation
+ * @stable ICU 2.8
+ */
+ public void setNumericCollationDefault() {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ setNumericCollation(m_defaultIsNumericCollation_);
+ updateInternalState();
+ }
+
+ /**
+ * Sets the mode for the direction of SECONDARY weights to be used in French collation. The default value is false,
+ * which treats SECONDARY weights in the order they appear. If set to true, the SECONDARY weights will be sorted
+ * backwards. See the section on
+ * French collation for more information.
+ *
+ * @param flag
+ * true to set the French collation on, false to set it off
+ * @stable ICU 2.8
+ * @see #isFrenchCollation
+ * @see #setFrenchCollationDefault
+ */
+ public void setFrenchCollation(boolean flag) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ if (m_isFrenchCollation_ != flag) {
+ latinOneRegenTable_ = true;
+ }
+ m_isFrenchCollation_ = flag;
+ updateInternalState();
+ }
+
+ /**
+ * Sets the alternate handling for QUATERNARY strength to be either shifted or non-ignorable. See the UCA definition
+ * on Alternate Weighting . This
+ * attribute will only be effective when QUATERNARY strength is set. The default value for this mode is false,
+ * corresponding to the NON_IGNORABLE mode in UCA. In the NON-IGNORABLE mode, the RuleBasedCollator will treats all
+ * the codepoints with non-ignorable primary weights in the same way. If the mode is set to true, the behaviour
+ * corresponds to SHIFTED defined in UCA, this causes codepoints with PRIMARY orders that are equal or below the
+ * variable top value to be ignored in PRIMARY order and moved to the QUATERNARY order.
+ *
+ * @param shifted
+ * true if SHIFTED behaviour for alternate handling is desired, false for the NON_IGNORABLE behaviour.
+ * @see #isAlternateHandlingShifted
+ * @see #setAlternateHandlingDefault
+ * @stable ICU 2.8
+ */
+ public void setAlternateHandlingShifted(boolean shifted) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ m_isAlternateHandlingShifted_ = shifted;
+ updateInternalState();
+ }
+
+ /**
+ *
+ * When case level is set to true, an additional weight is formed between the SECONDARY and TERTIARY weight, known
+ * as the case level. The case level is used to distinguish large and small Japanese Kana characters. Case level
+ * could also be used in other situations. For example to distinguish certain Pinyin characters. The default value
+ * is false, which means the case level is not generated. The contents of the case level are affected by the case
+ * first mode. A simple way to ignore accent differences in a string is to set the strength to PRIMARY and enable
+ * case level.
+ *
+ *
+ * See the section on case
+ * level for more information.
+ *
+ *
+ * @param flag
+ * true if case level sorting is required, false otherwise
+ * @stable ICU 2.8
+ * @see #setCaseLevelDefault
+ * @see #isCaseLevel
+ */
+ public void setCaseLevel(boolean flag) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ m_isCaseLevel_ = flag;
+ updateInternalState();
+ }
+
+ /**
+ *
+ * Sets this Collator's strength property. The strength property determines the minimum level of difference
+ * considered significant during comparison.
+ *
+ *
+ * See the Collator class description for an example of use.
+ *
+ *
+ * @param newStrength
+ * the new strength value.
+ * @see #getStrength
+ * @see #setStrengthDefault
+ * @see #PRIMARY
+ * @see #SECONDARY
+ * @see #TERTIARY
+ * @see #QUATERNARY
+ * @see #IDENTICAL
+ * @exception IllegalArgumentException
+ * If the new strength value is not one of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
+ * @stable ICU 2.8
+ */
+ public void setStrength(int newStrength) {
+ super.setStrength(newStrength);
+ updateInternalState();
+ }
+
+ /**
+ *
+ * Variable top is a two byte primary value which causes all the codepoints with primary values that are less or
+ * equal than the variable top to be shifted when alternate handling is set to SHIFTED.
+ *
+ *
+ * Sets the variable top to a collation element value of a string supplied.
+ *
+ *
+ * @param varTop
+ * one or more (if contraction) characters to which the variable top should be set
+ * @return a int value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined.
+ * @exception IllegalArgumentException
+ * is thrown if varTop argument is not a valid variable top element. A variable top element is
+ * invalid when
+ *
+ * it is a contraction that does not exist in the Collation order
+ * when the PRIMARY strength collation element for the variable top has more than two bytes
+ * when the varTop argument is null or zero in length.
+ *
+ * @see #getVariableTop
+ * @see RuleBasedCollator#setAlternateHandlingShifted
+ * @stable ICU 2.6
+ */
+ public int setVariableTop(String varTop) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ if (varTop == null || varTop.length() == 0) {
+ throw new IllegalArgumentException("Variable top argument string can not be null or zero in length.");
+ }
+
+ CollationBuffer buffer = null;
+ try {
+ buffer = getCollationBuffer();
+ return setVariableTop(varTop, buffer);
+ } finally {
+ releaseCollationBuffer(buffer);
+ }
+
+ }
+
+ private int setVariableTop(String varTop, CollationBuffer buffer) {
+ buffer.m_srcUtilColEIter_.setText(varTop);
+ int ce = buffer.m_srcUtilColEIter_.next();
+
+ // here we check if we have consumed all characters
+ // you can put in either one character or a contraction
+ // you shouldn't put more...
+ if (buffer.m_srcUtilColEIter_.getOffset() != varTop.length() || ce == CollationElementIterator.NULLORDER) {
+ throw new IllegalArgumentException("Variable top argument string is a contraction that does not exist "
+ + "in the Collation order");
+ }
+
+ int nextCE = buffer.m_srcUtilColEIter_.next();
+
+ if ((nextCE != CollationElementIterator.NULLORDER)
+ && (!isContinuation(nextCE) || (nextCE & CE_PRIMARY_MASK_) != 0)) {
+ throw new IllegalArgumentException("Variable top argument string can only have a single collation "
+ + "element that has less than or equal to two PRIMARY strength " + "bytes");
+ }
+
+ m_variableTopValue_ = (ce & CE_PRIMARY_MASK_) >> 16;
+
+ return ce & CE_PRIMARY_MASK_;
+ }
+
+ /**
+ * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits. Lower 16
+ * bits are ignored.
+ *
+ * @param varTop
+ * Collation element value, as returned by setVariableTop or getVariableTop
+ * @see #getVariableTop
+ * @see #setVariableTop(String)
+ * @stable ICU 2.6
+ */
+ public void setVariableTop(int varTop) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ m_variableTopValue_ = (varTop & CE_PRIMARY_MASK_) >> 16;
+ }
+
+ /**
+ * When numeric collation is turned on, this Collator generates a collation key for the numeric value of substrings
+ * of digits. This is a way to get '100' to sort AFTER '2'
+ *
+ * @param flag
+ * true to turn numeric collation on and false to turn it off
+ * @see #getNumericCollation
+ * @see #setNumericCollationDefault
+ * @stable ICU 2.8
+ */
+ public void setNumericCollation(boolean flag) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ // sort substrings of digits as numbers
+ m_isNumericCollation_ = flag;
+ updateInternalState();
+ }
+
+ /**
+ * Sets the reordering codes for this collator.
+ * Collation reordering allows scripts and some other defined blocks of characters
+ * to be moved relative to each other as a block. This reordering is done on top of
+ * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
+ * at the start and/or the end of the collation order.
+ * By default, reordering codes specified for the start of the order are placed in the
+ * order given after a group of “special” non-script blocks. These special groups of characters
+ * are space, punctuation, symbol, currency, and digit. These special groups are represented with
+ * {@link Collator.ReorderCodes}. Script groups can be intermingled with
+ * these special non-script blocks if those special blocks are explicitly specified in the reordering.
+ *
The special code {@link Collator.ReorderCodes#OTHERS OTHERS} stands for any script that is not explicitly
+ * mentioned in the list of reordering codes given. Anything that is after {@link Collator.ReorderCodes#OTHERS OTHERS}
+ * will go at the very end of the reordering in the order given.
+ *
The special reorder code {@link Collator.ReorderCodes#DEFAULT DEFAULT} will reset the reordering for this collator
+ * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
+ * was specified when this collator was created from resource data or from rules. The
+ * {@link Collator.ReorderCodes#DEFAULT DEFAULT} code must be the sole code supplied when it used. If not
+ * that will result in an {@link IllegalArgumentException} being thrown.
+ *
The special reorder code {@link Collator.ReorderCodes#NONE NONE} will remove any reordering for this collator.
+ * The result of setting no reordering will be to have the DUCET/CLDR reordering used. The
+ * {@link Collator.ReorderCodes#NONE NONE} code must be the sole code supplied when it used.
+ * @param order the reordering codes to apply to this collator; if this is null or an empty array
+ * then this clears any existing reordering
+ * @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts)
+ * @see #getReorderCodes
+ * @see #getEquivalentReorderCodes
+ * @draft ICU 4.8
+ */
+ public void setReorderCodes(int... order) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify frozen object");
+ }
+
+ if (order != null && order.length > 0) {
+ m_reorderCodes_ = order.clone();
+ } else {
+ m_reorderCodes_ = null;
+ }
+ buildPermutationTable();
+ }
+
+ // public getters --------------------------------------------------------
+
+ /**
+ * Gets the collation rules for this RuleBasedCollator. Equivalent to String getRules(RuleOption.FULL_RULES).
+ *
+ * @return returns the collation rules
+ * @see #getRules(boolean)
+ * @stable ICU 2.8
+ */
+ public String getRules() {
+ return m_rules_;
+ }
+
+ /**
+ * Returns current rules. The argument defines whether full rules (UCA + tailored) rules are returned or just the
+ * tailoring.
+ *
+ * @param fullrules
+ * true if the rules that defines the full set of collation order is required, otherwise false for
+ * returning only the tailored rules
+ * @return the current rules that defines this Collator.
+ * @see #getRules()
+ * @stable ICU 2.6
+ */
+ public String getRules(boolean fullrules) {
+ if (!fullrules) {
+ return m_rules_;
+ }
+ // take the UCA rules and append real rules at the end
+ return UCA_.m_rules_.concat(m_rules_);
+ }
+
+ /**
+ * Get an UnicodeSet that contains all the characters and sequences tailored in this collator.
+ *
+ * @return a pointer to a UnicodeSet object containing all the code points and sequences that may sort differently
+ * than in the UCA.
+ * @stable ICU 2.4
+ */
+ public UnicodeSet getTailoredSet() {
+ try {
+ CollationRuleParser src = new CollationRuleParser(getRules());
+ return src.getTailoredSet();
+ } catch (Exception e) {
+ throw new IllegalStateException("A tailoring rule should not " + "have errors. Something is quite wrong!");
+ }
+ }
+
+ private class contContext {
+ RuleBasedCollator coll;
+ UnicodeSet contractions;
+ UnicodeSet expansions;
+ UnicodeSet removedContractions;
+ boolean addPrefixes;
+
+ contContext(RuleBasedCollator coll, UnicodeSet contractions, UnicodeSet expansions,
+ UnicodeSet removedContractions, boolean addPrefixes) {
+ this.coll = coll;
+ this.contractions = contractions;
+ this.expansions = expansions;
+ this.removedContractions = removedContractions;
+ this.addPrefixes = addPrefixes;
+ }
+ }
+
+ private void addSpecial(contContext c, StringBuilder buffer, int CE) {
+ StringBuilder b = new StringBuilder();
+ int offset = (CE & 0xFFFFFF) - c.coll.m_contractionOffset_;
+ int newCE = c.coll.m_contractionCE_[offset];
+ // we might have a contraction that ends from previous level
+ if (newCE != CollationElementIterator.CE_NOT_FOUND_) {
+ if (isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_ && isSpecial(newCE)
+ && getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) {
+ addSpecial(c, buffer, newCE);
+ }
+ if (buffer.length() > 1) {
+ if (c.contractions != null) {
+ c.contractions.add(buffer.toString());
+ }
+ if (c.expansions != null && isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {
+ c.expansions.add(buffer.toString());
+ }
+ }
+ }
+
+ offset++;
+ // check whether we're doing contraction or prefix
+ if (getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) {
+ while (c.coll.m_contractionIndex_[offset] != 0xFFFF) {
+ b.delete(0, b.length());
+ b.append(buffer);
+ newCE = c.coll.m_contractionCE_[offset];
+ b.insert(0, c.coll.m_contractionIndex_[offset]);
+ if (isSpecial(newCE)
+ && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) {
+ addSpecial(c, b, newCE);
+ } else {
+ if (c.contractions != null) {
+ c.contractions.add(b.toString());
+ }
+ if (c.expansions != null && isSpecial(newCE)
+ && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) {
+ c.expansions.add(b.toString());
+ }
+ }
+ offset++;
+ }
+ } else if (getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_) {
+ while (c.coll.m_contractionIndex_[offset] != 0xFFFF) {
+ b.delete(0, b.length());
+ b.append(buffer);
+ newCE = c.coll.m_contractionCE_[offset];
+ b.append(c.coll.m_contractionIndex_[offset]);
+ if (isSpecial(newCE)
+ && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) {
+ addSpecial(c, b, newCE);
+ } else {
+ if (c.contractions != null) {
+ c.contractions.add(b.toString());
+ }
+ if (c.expansions != null && isSpecial(newCE)
+ && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) {
+ c.expansions.add(b.toString());
+ }
+ }
+ offset++;
+ }
+ }
+ }
+
+ private void processSpecials(contContext c) {
+ int internalBufferSize = 512;
+ TrieIterator trieiterator = new TrieIterator(c.coll.m_trie_);
+ RangeValueIterator.Element element = new RangeValueIterator.Element();
+ while (trieiterator.next(element)) {
+ int start = element.start;
+ int limit = element.limit;
+ int CE = element.value;
+ StringBuilder contraction = new StringBuilder(internalBufferSize);
+
+ if (isSpecial(CE)) {
+ if (((getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) || getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_)) {
+ while (start < limit) {
+ // if there are suppressed contractions, we don't
+ // want to add them.
+ if (c.removedContractions != null && c.removedContractions.contains(start)) {
+ start++;
+ continue;
+ }
+ // we start our contraction from middle, since we don't know if it
+ // will grow toward right or left
+ contraction.append((char) start);
+ addSpecial(c, contraction, CE);
+ start++;
+ }
+ } else if (c.expansions != null && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {
+ while (start < limit) {
+ c.expansions.add(start++);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Gets unicode sets containing contractions and/or expansions of a collator
+ *
+ * @param contractions
+ * if not null, set to contain contractions
+ * @param expansions
+ * if not null, set to contain expansions
+ * @param addPrefixes
+ * add the prefix contextual elements to contractions
+ * @throws Exception
+ * Throws an exception if any errors occurs.
+ * @stable ICU 3.4
+ */
+ public void getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes)
+ throws Exception {
+ if (contractions != null) {
+ contractions.clear();
+ }
+ if (expansions != null) {
+ expansions.clear();
+ }
+ String rules = getRules();
+ try {
+ CollationRuleParser src = new CollationRuleParser(rules);
+ contContext c = new contContext(RuleBasedCollator.UCA_, contractions, expansions, src.m_removeSet_,
+ addPrefixes);
+
+ // Add the UCA contractions
+ processSpecials(c);
+ // This is collator specific. Add contractions from a collator
+ c.coll = this;
+ c.removedContractions = null;
+ processSpecials(c);
+ } catch (Exception e) {
+ throw e;
+ }
+ }
+
+ /**
+ *
+ * Get a Collation key for the argument String source from this RuleBasedCollator.
+ *
+ *
+ * General recommendation:
+ * If comparison are to be done to the same String multiple times, it would be more efficient to generate
+ * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If the each
+ * Strings are compared to only once, using the method RuleBasedCollator.compare(String, String) will have a better
+ * performance.
+ *
+ *
+ * See the class documentation for an explanation about CollationKeys.
+ *
+ *
+ * @param source
+ * the text String to be transformed into a collation key.
+ * @return the CollationKey for the given String based on this RuleBasedCollator's collation rules. If the source
+ * String is null, a null CollationKey is returned.
+ * @see CollationKey
+ * @see #compare(String, String)
+ * @see #getRawCollationKey
+ * @stable ICU 2.8
+ */
+ public CollationKey getCollationKey(String source) {
+ if (source == null) {
+ return null;
+ }
+ CollationBuffer buffer = null;
+ try {
+ buffer = getCollationBuffer();
+ return getCollationKey(source, buffer);
+ } finally {
+ releaseCollationBuffer(buffer);
+ }
+ }
+
+ private CollationKey getCollationKey(String source, CollationBuffer buffer) {
+ buffer.m_utilRawCollationKey_ = getRawCollationKey(source, buffer.m_utilRawCollationKey_, buffer);
+ return new CollationKey(source, buffer.m_utilRawCollationKey_);
+ }
+
+ /**
+ * Gets the simpler form of a CollationKey for the String source following the rules of this Collator and stores the
+ * result into the user provided argument key. If key has a internal byte array of length that's too small for the
+ * result, the internal byte array will be grown to the exact required size.
+ *
+ * @param source the text String to be transformed into a RawCollationKey
+ * @param key output RawCollationKey to store results
+ * @return If key is null, a new instance of RawCollationKey will be created and returned, otherwise the user
+ * provided key will be returned.
+ * @see #getCollationKey
+ * @see #compare(String, String)
+ * @see RawCollationKey
+ * @stable ICU 2.8
+ */
+ public RawCollationKey getRawCollationKey(String source, RawCollationKey key) {
+ if (source == null) {
+ return null;
+ }
+ CollationBuffer buffer = null;
+ try {
+ buffer = getCollationBuffer();
+ return getRawCollationKey(source, key, buffer);
+ } finally {
+ releaseCollationBuffer(buffer);
+ }
+ }
+
+ private RawCollationKey getRawCollationKey(String source, RawCollationKey key, CollationBuffer buffer) {
+ int strength = getStrength();
+ buffer.m_utilCompare0_ = m_isCaseLevel_;
+ // m_utilCompare1_ = true;
+ buffer.m_utilCompare2_ = strength >= SECONDARY;
+ buffer.m_utilCompare3_ = strength >= TERTIARY;
+ buffer.m_utilCompare4_ = strength >= QUATERNARY;
+ buffer.m_utilCompare5_ = strength == IDENTICAL;
+
+ boolean doFrench = m_isFrenchCollation_ && buffer.m_utilCompare2_;
+ // TODO: UCOL_COMMON_BOT4 should be a function of qShifted.
+ // If we have no qShifted, we don't need to set UCOL_COMMON_BOT4 so
+ // high.
+ int commonBottom4 = ((m_variableTopValue_ >>> 8) + 1) & LAST_BYTE_MASK_;
+ byte hiragana4 = 0;
+ if (m_isHiragana4_ && buffer.m_utilCompare4_) {
+ // allocate one more space for hiragana, value for hiragana
+ hiragana4 = (byte) commonBottom4;
+ commonBottom4++;
+ }
+
+ int bottomCount4 = 0xFF - commonBottom4;
+ // If we need to normalize, we'll do it all at once at the beginning!
+ if (buffer.m_utilCompare5_ && Normalizer.quickCheck(source, Normalizer.NFD, 0) != Normalizer.YES) {
+ // if it is identical strength, we have to normalize the string to
+ // NFD so that it will be appended correctly to the end of the sort
+ // key
+ source = Normalizer.decompose(source, false);
+ } else if (getDecomposition() != NO_DECOMPOSITION
+ && Normalizer.quickCheck(source, Normalizer.FCD, 0) != Normalizer.YES) {
+ // for the rest of the strength, if decomposition is on, FCD is
+ // enough for us to work on.
+ source = Normalizer.normalize(source, Normalizer.FCD);
+ }
+ getSortKeyBytes(source, doFrench, hiragana4, commonBottom4, bottomCount4, buffer);
+ if (key == null) {
+ key = new RawCollationKey();
+ }
+ getSortKey(source, doFrench, commonBottom4, bottomCount4, key, buffer);
+ return key;
+ }
+
+ /**
+ * Return true if an uppercase character is sorted before the corresponding lowercase character. See
+ * setCaseFirst(boolean) for details.
+ *
+ * @see #setUpperCaseFirst
+ * @see #setLowerCaseFirst
+ * @see #isLowerCaseFirst
+ * @see #setCaseFirstDefault
+ * @return true if upper cased characters are sorted before lower cased characters, false otherwise
+ * @stable ICU 2.8
+ */
+ public boolean isUpperCaseFirst() {
+ return (m_caseFirst_ == AttributeValue.UPPER_FIRST_);
+ }
+
+ /**
+ * Return true if a lowercase character is sorted before the corresponding uppercase character. See
+ * setCaseFirst(boolean) for details.
+ *
+ * @see #setUpperCaseFirst
+ * @see #setLowerCaseFirst
+ * @see #isUpperCaseFirst
+ * @see #setCaseFirstDefault
+ * @return true lower cased characters are sorted before upper cased characters, false otherwise
+ * @stable ICU 2.8
+ */
+ public boolean isLowerCaseFirst() {
+ return (m_caseFirst_ == AttributeValue.LOWER_FIRST_);
+ }
+
+ /**
+ * Checks if the alternate handling behaviour is the UCA defined SHIFTED or NON_IGNORABLE. If return value is true,
+ * then the alternate handling attribute for the Collator is SHIFTED. Otherwise if return value is false, then the
+ * alternate handling attribute for the Collator is NON_IGNORABLE See setAlternateHandlingShifted(boolean) for more
+ * details.
+ *
+ * @return true or false
+ * @see #setAlternateHandlingShifted(boolean)
+ * @see #setAlternateHandlingDefault
+ * @stable ICU 2.8
+ */
+ public boolean isAlternateHandlingShifted() {
+ return m_isAlternateHandlingShifted_;
+ }
+
+ /**
+ * Checks if case level is set to true. See setCaseLevel(boolean) for details.
+ *
+ * @return the case level mode
+ * @see #setCaseLevelDefault
+ * @see #isCaseLevel
+ * @see #setCaseLevel(boolean)
+ * @stable ICU 2.8
+ */
+ public boolean isCaseLevel() {
+ return m_isCaseLevel_;
+ }
+
+ /**
+ * Checks if French Collation is set to true. See setFrenchCollation(boolean) for details.
+ *
+ * @return true if French Collation is set to true, false otherwise
+ * @see #setFrenchCollation(boolean)
+ * @see #setFrenchCollationDefault
+ * @stable ICU 2.8
+ */
+ public boolean isFrenchCollation() {
+ return m_isFrenchCollation_;
+ }
+
+ /**
+ * Checks if the Hiragana Quaternary mode is set on. See setHiraganaQuaternary(boolean) for more details.
+ *
+ * @return flag true if Hiragana Quaternary mode is on, false otherwise
+ * @see #setHiraganaQuaternaryDefault
+ * @see #setHiraganaQuaternary(boolean)
+ * @stable ICU 2.8
+ */
+ public boolean isHiraganaQuaternary() {
+ return m_isHiragana4_;
+ }
+
+ /**
+ * Gets the variable top value of a Collator. Lower 16 bits are undefined and should be ignored.
+ *
+ * @return the variable top value of a Collator.
+ * @see #setVariableTop
+ * @stable ICU 2.6
+ */
+ public int getVariableTop() {
+ return m_variableTopValue_ << 16;
+ }
+
+ /**
+ * Method to retrieve the numeric collation value. When numeric collation is turned on, this Collator generates a
+ * collation key for the numeric value of substrings of digits. This is a way to get '100' to sort AFTER '2'
+ *
+ * @see #setNumericCollation
+ * @see #setNumericCollationDefault
+ * @return true if numeric collation is turned on, false otherwise
+ * @stable ICU 2.8
+ */
+ public boolean getNumericCollation() {
+ return m_isNumericCollation_;
+ }
+
+ /**
+ * Retrieves the reordering codes for this collator.
+ * These reordering codes are a combination of UScript codes and ReorderCodes.
+ * @return a copy of the reordering codes for this collator;
+ * if none are set then returns an empty array
+ * @see #setReorderCodes
+ * @see #getEquivalentReorderCodes
+ * @draft ICU 4.8
+ */
+ public int[] getReorderCodes() {
+ if (m_reorderCodes_ != null) {
+ return m_reorderCodes_.clone();
+ } else {
+ return LeadByteConstants.EMPTY_INT_ARRAY;
+ }
+ }
+
+ /**
+ * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
+ * codes are grouped and must reorder together.
+ *
+ * @param reorderCode code for which equivalents to be retrieved
+ * @return the set of all reorder codes in the same group as the given reorder code.
+ * @see #setReorderCodes
+ * @see #getReorderCodes
+ * @draft ICU 4.8
+ */
+ public static int[] getEquivalentReorderCodes(int reorderCode) {
+ Set equivalentCodesSet = new HashSet();
+ int[] leadBytes = RuleBasedCollator.LEADBYTE_CONSTANTS_.getLeadBytesForReorderCode(reorderCode);
+ for (int leadByte : leadBytes) {
+ int[] codes = RuleBasedCollator.LEADBYTE_CONSTANTS_.getReorderCodesForLeadByte(leadByte);
+ for (int code : codes) {
+ equivalentCodesSet.add(code);
+ }
+ }
+ int[] equivalentCodes = new int[equivalentCodesSet.size()];
+ int i = 0;
+ for (int code : equivalentCodesSet) {
+ equivalentCodes[i++] = code;
+ }
+ return equivalentCodes;
+ }
+
+ // public other methods -------------------------------------------------
+
+ /**
+ * Compares the equality of two RuleBasedCollator objects. RuleBasedCollator objects are equal if they have the same
+ * collation rules and the same attributes.
+ *
+ * @param obj
+ * the RuleBasedCollator to be compared to.
+ * @return true if this RuleBasedCollator has exactly the same collation behaviour as obj, false otherwise.
+ * @stable ICU 2.8
+ */
+ public boolean equals(Object obj) {
+ if (obj == null) {
+ return false; // super does class check
+ }
+ if (this == obj) {
+ return true;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ RuleBasedCollator other = (RuleBasedCollator) obj;
+ // all other non-transient information is also contained in rules.
+ if (getStrength() != other.getStrength() || getDecomposition() != other.getDecomposition()
+ || other.m_caseFirst_ != m_caseFirst_ || other.m_caseSwitch_ != m_caseSwitch_
+ || other.m_isAlternateHandlingShifted_ != m_isAlternateHandlingShifted_
+ || other.m_isCaseLevel_ != m_isCaseLevel_ || other.m_isFrenchCollation_ != m_isFrenchCollation_
+ || other.m_isHiragana4_ != m_isHiragana4_) {
+ return false;
+ }
+ if (m_reorderCodes_ != null ^ other.m_reorderCodes_ != null) {
+ return false;
+ }
+ if (m_reorderCodes_ != null) {
+ if (m_reorderCodes_.length != other.m_reorderCodes_.length) {
+ return false;
+ }
+ for (int i = 0; i < m_reorderCodes_.length; i++) {
+ if (m_reorderCodes_[i] != other.m_reorderCodes_[i]) {
+ return false;
+ }
+ }
+ }
+ boolean rules = m_rules_ == other.m_rules_;
+ if (!rules && (m_rules_ != null && other.m_rules_ != null)) {
+ rules = m_rules_.equals(other.m_rules_);
+ }
+ if (!rules || !ICUDebug.enabled("collation")) {
+ return rules;
+ }
+ if (m_addition3_ != other.m_addition3_ || m_bottom3_ != other.m_bottom3_
+ || m_bottomCount3_ != other.m_bottomCount3_ || m_common3_ != other.m_common3_
+ || m_isSimple3_ != other.m_isSimple3_ || m_mask3_ != other.m_mask3_
+ || m_minContractionEnd_ != other.m_minContractionEnd_ || m_minUnsafe_ != other.m_minUnsafe_
+ || m_top3_ != other.m_top3_ || m_topCount3_ != other.m_topCount3_
+ || !Arrays.equals(m_unsafe_, other.m_unsafe_)) {
+ return false;
+ }
+ if (!m_trie_.equals(other.m_trie_)) {
+ // we should use the trie iterator here, but then this part is
+ // only used in the test.
+ for (int i = UCharacter.MAX_VALUE; i >= UCharacter.MIN_VALUE; i--) {
+ int v = m_trie_.getCodePointValue(i);
+ int otherv = other.m_trie_.getCodePointValue(i);
+ if (v != otherv) {
+ int mask = v & (CE_TAG_MASK_ | CE_SPECIAL_FLAG_);
+ if (mask == (otherv & 0xff000000)) {
+ v &= 0xffffff;
+ otherv &= 0xffffff;
+ if (mask == 0xf1000000) {
+ v -= (m_expansionOffset_ << 4);
+ otherv -= (other.m_expansionOffset_ << 4);
+ } else if (mask == 0xf2000000) {
+ v -= m_contractionOffset_;
+ otherv -= other.m_contractionOffset_;
+ }
+ if (v == otherv) {
+ continue;
+ }
+ }
+ return false;
+ }
+ }
+ }
+ if (!Arrays.equals(m_contractionCE_, other.m_contractionCE_)
+ || !Arrays.equals(m_contractionEnd_, other.m_contractionEnd_)
+ || !Arrays.equals(m_contractionIndex_, other.m_contractionIndex_)
+ || !Arrays.equals(m_expansion_, other.m_expansion_)
+ || !Arrays.equals(m_expansionEndCE_, other.m_expansionEndCE_)) {
+ return false;
+ }
+ // not comparing paddings
+ for (int i = 0; i < m_expansionEndCE_.length; i++) {
+ if (m_expansionEndCEMaxSize_[i] != other.m_expansionEndCEMaxSize_[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Generates a unique hash code for this RuleBasedCollator.
+ *
+ * @return the unique hash code for this Collator
+ * @stable ICU 2.8
+ */
+ public int hashCode() {
+ String rules = getRules();
+ if (rules == null) {
+ rules = "";
+ }
+ return rules.hashCode();
+ }
+
+ /**
+ * Compares the source text String to the target text String according to the collation rules, strength and
+ * decomposition mode for this RuleBasedCollator. Returns an integer less than, equal to or greater than zero
+ * depending on whether the source String is less than, equal to or greater than the target String. See the Collator
+ * class description for an example of use.
+ *
+ * General recommendation:
+ * If comparison are to be done to the same String multiple times, it would be more efficient to generate
+ * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If speed
+ * performance is critical and object instantiation is to be reduced, further optimization may be achieved by
+ * generating a simpler key of the form RawCollationKey and reusing this RawCollationKey object with the method
+ * RuleBasedCollator.getRawCollationKey. Internal byte representation can be directly accessed via RawCollationKey
+ * and stored for future use. Like CollationKey, RawCollationKey provides a method RawCollationKey.compareTo for key
+ * comparisons. If the each Strings are compared to only once, using the method RuleBasedCollator.compare(String,
+ * String) will have a better performance.
+ *
+ *
+ * @param source
+ * the source text String.
+ * @param target
+ * the target text String.
+ * @return Returns an integer value. Value is less than zero if source is less than target, value is zero if source
+ * and target are equal, value is greater than zero if source is greater than target.
+ * @see CollationKey
+ * @see #getCollationKey
+ * @stable ICU 2.8
+ */
+ public int compare(String source, String target) {
+ if (source == target) {
+ return 0;
+ }
+ CollationBuffer buffer = null;
+ try {
+ buffer = getCollationBuffer();
+ return compare(source, target, buffer);
+ } finally {
+ releaseCollationBuffer(buffer);
+ }
+ }
+
+ private int compare(String source, String target, CollationBuffer buffer) {
+ // Find the length of any leading portion that is equal
+ int offset = getFirstUnmatchedOffset(source, target);
+ // return compareRegular(source, target, offset);
+ if (latinOneUse_) {
+ if ((offset < source.length() && source.charAt(offset) > ENDOFLATINONERANGE_)
+ || (offset < target.length() && target.charAt(offset) > ENDOFLATINONERANGE_)) {
+ // source or target start with non-latin-1
+ return compareRegular(source, target, offset, buffer);
+ } else {
+ return compareUseLatin1(source, target, offset, buffer);
+ }
+ } else {
+ return compareRegular(source, target, offset, buffer);
+ }
+ }
+
+ // package private inner interfaces --------------------------------------
+
+ /**
+ * Attribute values to be used when setting the Collator options
+ */
+ static interface AttributeValue {
+ /**
+ * Indicates that the default attribute value will be used. See individual attribute for details on its default
+ * value.
+ */
+ static final int DEFAULT_ = -1;
+ /**
+ * Primary collation strength
+ */
+ static final int PRIMARY_ = Collator.PRIMARY;
+ /**
+ * Secondary collation strength
+ */
+ static final int SECONDARY_ = Collator.SECONDARY;
+ /**
+ * Tertiary collation strength
+ */
+ static final int TERTIARY_ = Collator.TERTIARY;
+ /**
+ * Default collation strength
+ */
+ static final int DEFAULT_STRENGTH_ = Collator.TERTIARY;
+ /**
+ * Internal use for strength checks in Collation elements
+ */
+ static final int CE_STRENGTH_LIMIT_ = Collator.TERTIARY + 1;
+ /**
+ * Quaternary collation strength
+ */
+ static final int QUATERNARY_ = 3;
+ /**
+ * Identical collation strength
+ */
+ static final int IDENTICAL_ = Collator.IDENTICAL;
+ /**
+ * Internal use for strength checks
+ */
+ static final int STRENGTH_LIMIT_ = Collator.IDENTICAL + 1;
+ /**
+ * Turn the feature off - works for FRENCH_COLLATION, CASE_LEVEL, HIRAGANA_QUATERNARY_MODE and
+ * DECOMPOSITION_MODE
+ */
+ static final int OFF_ = 16;
+ /**
+ * Turn the feature on - works for FRENCH_COLLATION, CASE_LEVEL, HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
+ */
+ static final int ON_ = 17;
+ /**
+ * Valid for ALTERNATE_HANDLING. Alternate handling will be shifted
+ */
+ static final int SHIFTED_ = 20;
+ /**
+ * Valid for ALTERNATE_HANDLING. Alternate handling will be non ignorable
+ */
+ static final int NON_IGNORABLE_ = 21;
+ /**
+ * Valid for CASE_FIRST - lower case sorts before upper case
+ */
+ static final int LOWER_FIRST_ = 24;
+ /**
+ * Upper case sorts before lower case
+ */
+ static final int UPPER_FIRST_ = 25;
+ /**
+ * Number of attribute values
+ */
+ static final int LIMIT_ = 29;
+ }
+
+ /**
+ * Attributes that collation service understands. All the attributes can take DEFAULT value, as well as the values
+ * specific to each one.
+ */
+ static interface Attribute {
+ /**
+ * Attribute for direction of secondary weights - used in French. Acceptable values are ON, which results in
+ * secondary weights being considered backwards and OFF which treats secondary weights in the order they appear.
+ */
+ static final int FRENCH_COLLATION_ = 0;
+ /**
+ * Attribute for handling variable elements. Acceptable values are NON_IGNORABLE (default) which treats all the
+ * codepoints with non-ignorable primary weights in the same way, and SHIFTED which causes codepoints with
+ * primary weights that are equal or below the variable top value to be ignored on primary level and moved to
+ * the quaternary level.
+ */
+ static final int ALTERNATE_HANDLING_ = 1;
+ /**
+ * Controls the ordering of upper and lower case letters. Acceptable values are OFF (default), which orders
+ * upper and lower case letters in accordance to their tertiary weights, UPPER_FIRST which forces upper case
+ * letters to sort before lower case letters, and LOWER_FIRST which does the opposite.
+ */
+ static final int CASE_FIRST_ = 2;
+ /**
+ * Controls whether an extra case level (positioned before the third level) is generated or not. Acceptable
+ * values are OFF (default), when case level is not generated, and ON which causes the case level to be
+ * generated. Contents of the case level are affected by the value of CASE_FIRST attribute. A simple way to
+ * ignore accent differences in a string is to set the strength to PRIMARY and enable case level.
+ */
+ static final int CASE_LEVEL_ = 3;
+ /**
+ * Controls whether the normalization check and necessary normalizations are performed. When set to OFF
+ * (default) no normalization check is performed. The correctness of the result is guaranteed only if the input
+ * data is in so-called FCD form (see users manual for more info). When set to ON, an incremental check is
+ * performed to see whether the input data is in the FCD form. If the data is not in the FCD form, incremental
+ * NFD normalization is performed.
+ */
+ static final int NORMALIZATION_MODE_ = 4;
+ /**
+ * The strength attribute. Can be either PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL. The usual
+ * strength for most locales (except Japanese) is tertiary. Quaternary strength is useful when combined with
+ * shifted setting for alternate handling attribute and for JIS x 4061 collation, when it is used to distinguish
+ * between Katakana and Hiragana (this is achieved by setting the HIRAGANA_QUATERNARY mode to on. Otherwise,
+ * quaternary level is affected only by the number of non ignorable code points in the string. Identical
+ * strength is rarely useful, as it amounts to codepoints of the NFD form of the string.
+ */
+ static final int STRENGTH_ = 5;
+ /**
+ * When turned on, this attribute positions Hiragana before all non-ignorables on quaternary level. This is a
+ * sneaky way to produce JIS sort order.
+ */
+ static final int HIRAGANA_QUATERNARY_MODE_ = 6;
+ /**
+ * Attribute count
+ */
+ static final int LIMIT_ = 7;
+ }
+
+ /**
+ * DataManipulate singleton
+ */
+ static class DataManipulate implements Trie.DataManipulate {
+ // public methods ----------------------------------------------------
+
+ /**
+ * Internal method called to parse a lead surrogate's ce for the offset to the next trail surrogate data.
+ *
+ * @param ce
+ * collation element of the lead surrogate
+ * @return data offset or 0 for the next trail surrogate
+ * @stable ICU 2.8
+ */
+ public final int getFoldingOffset(int ce) {
+ if (isSpecial(ce) && getTag(ce) == CE_SURROGATE_TAG_) {
+ return (ce & 0xFFFFFF);
+ }
+ return 0;
+ }
+
+ /**
+ * Get singleton object
+ */
+ public static final DataManipulate getInstance() {
+ if (m_instance_ == null) {
+ m_instance_ = new DataManipulate();
+ }
+ return m_instance_;
+ }
+
+ // private data member ----------------------------------------------
+
+ /**
+ * Singleton instance
+ */
+ private static DataManipulate m_instance_;
+
+ // private constructor ----------------------------------------------
+
+ /**
+ * private to prevent initialization
+ */
+ private DataManipulate() {
+ }
+ }
+
+ /**
+ * UCAConstants
+ */
+ static final class UCAConstants {
+ int FIRST_TERTIARY_IGNORABLE_[] = new int[2]; // 0x00000000
+ int LAST_TERTIARY_IGNORABLE_[] = new int[2]; // 0x00000000
+ int FIRST_PRIMARY_IGNORABLE_[] = new int[2]; // 0x00008705
+ int FIRST_SECONDARY_IGNORABLE_[] = new int[2]; // 0x00000000
+ int LAST_SECONDARY_IGNORABLE_[] = new int[2]; // 0x00000500
+ int LAST_PRIMARY_IGNORABLE_[] = new int[2]; // 0x0000DD05
+ int FIRST_VARIABLE_[] = new int[2]; // 0x05070505
+ int LAST_VARIABLE_[] = new int[2]; // 0x13CF0505
+ int FIRST_NON_VARIABLE_[] = new int[2]; // 0x16200505
+ int LAST_NON_VARIABLE_[] = new int[2]; // 0x767C0505
+ int RESET_TOP_VALUE_[] = new int[2]; // 0x9F000303
+ int FIRST_IMPLICIT_[] = new int[2];
+ int LAST_IMPLICIT_[] = new int[2];
+ int FIRST_TRAILING_[] = new int[2];
+ int LAST_TRAILING_[] = new int[2];
+ int PRIMARY_TOP_MIN_;
+ int PRIMARY_IMPLICIT_MIN_; // 0xE8000000
+ int PRIMARY_IMPLICIT_MAX_; // 0xF0000000
+ int PRIMARY_TRAILING_MIN_; // 0xE8000000
+ int PRIMARY_TRAILING_MAX_; // 0xF0000000
+ int PRIMARY_SPECIAL_MIN_; // 0xE8000000
+ int PRIMARY_SPECIAL_MAX_; // 0xF0000000
+ }
+
+ /**
+ * Script to Lead Byte and Lead Byte to Script Data
+ *
+ */
+ static final class LeadByteConstants {
+ private static final int DATA_MASK_FOR_INDEX = 0x8000;
+ private static final int[] EMPTY_INT_ARRAY = new int[0];
+
+ private int serializedSize = 0;
+
+ private Map SCRIPT_TO_LEAD_BYTES_INDEX;
+ private byte[] SCRIPT_TO_LEAD_BYTES_DATA;
+
+ private int[] LEAD_BYTE_TO_SCRIPTS_INDEX;
+ private byte[] LEAD_BYTE_TO_SCRIPTS_DATA;
+
+ LeadByteConstants() {
+ }
+
+ void read(DataInputStream dis) throws IOException {
+ int readcount = 0;
+ int indexCount;
+ int dataSize;
+
+ // script to lead bytes
+ indexCount = dis.readShort();
+ readcount += 2;
+ dataSize = dis.readShort();
+ readcount += 2;
+ this.SCRIPT_TO_LEAD_BYTES_INDEX = new HashMap();
+ //System.out.println("Script to Lead Bytes Index - Count = " + indexCount);
+ for (int index = 0; index < indexCount; index++) {
+ int reorderCode = dis.readShort(); // reorder code
+ readcount += 2;
+ int dataOffset = 0xffff & dis.readShort(); // data offset
+ readcount += 2;
+ // System.out.println("\t-------------");
+ // System.out.println("\toffset = " + Integer.toHexString(readcount - 4));
+ // System.out.println("\treorderCode = " + Integer.toHexString(reorderCode));
+ // System.out.println("\tdataOffset = " + Integer.toHexString(dataOffset));
+ this.SCRIPT_TO_LEAD_BYTES_INDEX.put(reorderCode, dataOffset);
+ }
+
+ this.SCRIPT_TO_LEAD_BYTES_DATA = new byte[dataSize * 2];
+ dis.readFully(this.SCRIPT_TO_LEAD_BYTES_DATA, 0, this.SCRIPT_TO_LEAD_BYTES_DATA.length);
+ readcount += this.SCRIPT_TO_LEAD_BYTES_DATA.length;
+
+ // lead byte to scripts
+ indexCount = dis.readShort();
+ readcount += 2;
+ dataSize = dis.readShort();
+ readcount += 2;
+ this.LEAD_BYTE_TO_SCRIPTS_INDEX = new int[indexCount];
+ //System.out.println("Lead Byte to Scripts Index - Count = " + indexCount);
+ for (int index = 0; index < indexCount; index++) {
+ this.LEAD_BYTE_TO_SCRIPTS_INDEX[index] = 0xffff & dis.readShort();
+ readcount += 2;
+ // System.out.println("\t-------------");
+ // System.out.println("\toffset = " + Integer.toHexString(readcount - 2));
+ // System.out.println("\tindex = " + Integer.toHexString(index));
+ // System.out.println("\tdataOffset = " + Integer.toHexString(this.LEAD_BYTE_TO_SCRIPTS_INDEX[index]));
+ }
+
+ this.LEAD_BYTE_TO_SCRIPTS_DATA = new byte[dataSize * 2];
+ dis.readFully(this.LEAD_BYTE_TO_SCRIPTS_DATA, 0, this.LEAD_BYTE_TO_SCRIPTS_DATA.length);
+ readcount += this.LEAD_BYTE_TO_SCRIPTS_DATA.length;
+
+ this.serializedSize = readcount;
+ }
+
+ int getSerializedDataSize() {
+ return this.serializedSize;
+ }
+
+ int[] getReorderCodesForLeadByte(int leadByte) {
+ if (leadByte >= this.LEAD_BYTE_TO_SCRIPTS_INDEX.length) {
+ return EMPTY_INT_ARRAY;
+ }
+ int offset = this.LEAD_BYTE_TO_SCRIPTS_INDEX[leadByte];
+ if (offset == 0) {
+ return EMPTY_INT_ARRAY;
+ }
+ int[] reorderCodes;
+ if ((offset & DATA_MASK_FOR_INDEX) == DATA_MASK_FOR_INDEX) {
+ reorderCodes = new int[1];
+ reorderCodes[0] = offset & ~DATA_MASK_FOR_INDEX;
+ } else {
+ int length = readShort(this.LEAD_BYTE_TO_SCRIPTS_DATA, offset);
+ offset++;
+
+ reorderCodes = new int[length];
+ for (int code = 0; code < length; code++, offset++) {
+ reorderCodes[code] = readShort(this.LEAD_BYTE_TO_SCRIPTS_DATA, offset);
+ }
+ }
+ return reorderCodes;
+ }
+
+ int[] getLeadBytesForReorderCode(int reorderCode) {
+ if (!this.SCRIPT_TO_LEAD_BYTES_INDEX.containsKey(reorderCode)) {
+ return EMPTY_INT_ARRAY;
+ }
+ int offset = this.SCRIPT_TO_LEAD_BYTES_INDEX.get(reorderCode);
+
+ if (offset == 0) {
+ return EMPTY_INT_ARRAY;
+ }
+
+ int[] leadBytes;
+ if ((offset & DATA_MASK_FOR_INDEX) == DATA_MASK_FOR_INDEX) {
+ leadBytes = new int[1];
+ leadBytes[0] = offset & ~DATA_MASK_FOR_INDEX;
+ } else {
+ int length = readShort(this.SCRIPT_TO_LEAD_BYTES_DATA, offset);
+ offset++;
+
+ leadBytes = new int[length];
+ for (int leadByte = 0; leadByte < length; leadByte++, offset++) {
+ leadBytes[leadByte] = readShort(this.SCRIPT_TO_LEAD_BYTES_DATA, offset);
+ }
+ }
+ return leadBytes;
+ }
+
+ private static int readShort(byte[] data, int offset) {
+ return (0xff & data[offset * 2]) << 8 | (data[offset * 2 + 1] & 0xff);
+ }
+ }
+
+ // package private data member -------------------------------------------
+
+ static final byte BYTE_FIRST_TAILORED_ = (byte) 0x04;
+ static final byte BYTE_COMMON_ = (byte) 0x05;
+ static final int COMMON_TOP_2_ = 0x86; // int for unsigness
+ static final int COMMON_BOTTOM_2_ = BYTE_COMMON_;
+ static final int COMMON_BOTTOM_3 = 0x05;
+ /**
+ * Case strength mask
+ */
+ static final int CE_CASE_BIT_MASK_ = 0xC0;
+ static final int CE_TAG_SHIFT_ = 24;
+ static final int CE_TAG_MASK_ = 0x0F000000;
+
+ static final int CE_SPECIAL_FLAG_ = 0xF0000000;
+ /**
+ * Lead surrogate that is tailored and doesn't start a contraction
+ */
+ static final int CE_SURROGATE_TAG_ = 5;
+ /**
+ * Mask to get the primary strength of the collation element
+ */
+ static final int CE_PRIMARY_MASK_ = 0xFFFF0000;
+ /**
+ * Mask to get the secondary strength of the collation element
+ */
+ static final int CE_SECONDARY_MASK_ = 0xFF00;
+ /**
+ * Mask to get the tertiary strength of the collation element
+ */
+ static final int CE_TERTIARY_MASK_ = 0xFF;
+ /**
+ * Primary strength shift
+ */
+ static final int CE_PRIMARY_SHIFT_ = 16;
+ /**
+ * Secondary strength shift
+ */
+ static final int CE_SECONDARY_SHIFT_ = 8;
+ /**
+ * Continuation marker
+ */
+ static final int CE_CONTINUATION_MARKER_ = 0xC0;
+
+ /**
+ * Size of collator raw data headers and options before the expansion data. This is used when expansion ces are to
+ * be retrieved. ICU4C uses the expansion offset starting from UCollator.UColHeader, hence ICU4J will have to minus
+ * that off to get the right expansion ce offset. In number of ints.
+ */
+ int m_expansionOffset_;
+ /**
+ * Size of collator raw data headers, options and expansions before contraction data. This is used when contraction
+ * ces are to be retrieved. ICU4C uses contraction offset starting from UCollator.UColHeader, hence ICU4J will have
+ * to minus that off to get the right contraction ce offset. In number of chars.
+ */
+ int m_contractionOffset_;
+ /**
+ * Flag indicator if Jamo is special
+ */
+ boolean m_isJamoSpecial_;
+
+ // Collator options ------------------------------------------------------
+
+ int m_defaultVariableTopValue_;
+ boolean m_defaultIsFrenchCollation_;
+ boolean m_defaultIsAlternateHandlingShifted_;
+ int m_defaultCaseFirst_;
+ boolean m_defaultIsCaseLevel_;
+ int m_defaultDecomposition_;
+ int m_defaultStrength_;
+ boolean m_defaultIsHiragana4_;
+ boolean m_defaultIsNumericCollation_;
+ /**
+ * Default script order - the one created at initial rule parse time
+ */
+ int[] m_defaultReorderCodes_;
+
+ /**
+ * Value of the variable top
+ */
+ int m_variableTopValue_;
+ /**
+ * Attribute for special Hiragana
+ */
+ boolean m_isHiragana4_;
+ /**
+ * Case sorting customization
+ */
+ int m_caseFirst_;
+ /**
+ * Numeric collation option
+ */
+ boolean m_isNumericCollation_;
+ /**
+ * Script order
+ */
+ int[] m_reorderCodes_;
+
+ // end Collator options --------------------------------------------------
+
+ /**
+ * Expansion table
+ */
+ int m_expansion_[];
+ /**
+ * Contraction index table
+ */
+ char m_contractionIndex_[];
+ /**
+ * Contraction CE table
+ */
+ int m_contractionCE_[];
+ /**
+ * Data trie
+ */
+ IntTrie m_trie_;
+ /**
+ * Table to store all collation elements that are the last element of an expansion. This is for use in StringSearch.
+ */
+ int m_expansionEndCE_[];
+ /**
+ * Table to store the maximum size of any expansions that end with the corresponding collation element in
+ * m_expansionEndCE_. For use in StringSearch too
+ */
+ byte m_expansionEndCEMaxSize_[];
+ /**
+ * Heuristic table to store information on whether a char character is considered "unsafe". "Unsafe" character are
+ * combining marks or those belonging to some contraction sequence from the offset 1 onwards. E.g. if "ABC" is the
+ * only contraction, then 'B' and 'C' are considered unsafe. If we have another contraction "ZA" with the one above,
+ * then 'A', 'B', 'C' are "unsafe" but 'Z' is not.
+ */
+ byte m_unsafe_[];
+ /**
+ * Table to store information on whether a codepoint can occur as the last character in a contraction
+ */
+ byte m_contractionEnd_[];
+ /**
+ * Original collation rules
+ */
+ String m_rules_;
+ /**
+ * The smallest "unsafe" codepoint
+ */
+ char m_minUnsafe_;
+ /**
+ * The smallest codepoint that could be the end of a contraction
+ */
+ char m_minContractionEnd_;
+ /**
+ * General version of the collator
+ */
+ VersionInfo m_version_;
+ /**
+ * UCA version
+ */
+ VersionInfo m_UCA_version_;
+ /**
+ * UCD version
+ */
+ VersionInfo m_UCD_version_;
+ /**
+ * Lead byte and script data
+ */
+ int m_leadByteToScripts;
+ int m_scriptToLeadBytes;
+ /**
+ * UnicodeData.txt property object
+ */
+ static final RuleBasedCollator UCA_;
+ /**
+ * UCA Constants
+ */
+ static final UCAConstants UCA_CONSTANTS_;
+ /**
+ * Lead Byte Constants
+ */
+ static LeadByteConstants LEADBYTE_CONSTANTS_;
+ /**
+ * Table for UCA and builder use
+ */
+ static final char UCA_CONTRACTIONS_[];
+
+ private static boolean UCA_INIT_COMPLETE;
+
+ /**
+ * Implicit generator
+ */
+ static final ImplicitCEGenerator impCEGen_;
+
+ static final byte SORT_LEVEL_TERMINATOR_ = 1;
+
+ // These are values from UCA required for
+ // implicit generation and supressing sort key compression
+ // they should regularly be in the UCA, but if one
+ // is running without UCA, it could be a problem
+ static final int maxRegularPrimary = 0x7A;
+ static final int minImplicitPrimary = 0xE0;
+ static final int maxImplicitPrimary = 0xE4;
+
+ // block to initialise character property database
+ static {
+ // take pains to let static class init succeed, otherwise the class itself won't exist and
+ // clients will get a NoClassDefFoundException. Instead, make the constructors fail if
+ // we can't load the UCA data.
+
+ RuleBasedCollator iUCA_ = null;
+ UCAConstants iUCA_CONSTANTS_ = null;
+ LeadByteConstants iLEADBYTE_CONSTANTS = null;
+ char iUCA_CONTRACTIONS_[] = null;
+ ImplicitCEGenerator iimpCEGen_ = null;
+ try {
+ // !!! note what's going on here...
+ // even though the static init of the class is not yet complete, we
+ // instantiate an instance of the class. So we'd better be sure that
+ // instantiation doesn't rely on the static initialization that's
+ // not complete yet!
+ iUCA_ = new RuleBasedCollator();
+ iUCA_CONSTANTS_ = new UCAConstants();
+ iLEADBYTE_CONSTANTS = new LeadByteConstants();
+ iUCA_CONTRACTIONS_ = CollatorReader.read(iUCA_, iUCA_CONSTANTS_, iLEADBYTE_CONSTANTS);
+
+ // called before doing canonical closure for the UCA.
+ iimpCEGen_ = new ImplicitCEGenerator(minImplicitPrimary, maxImplicitPrimary);
+ // iimpCEGen_ = new ImplicitCEGenerator(iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_,
+ // iUCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_);
+ iUCA_.init();
+ ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(
+ ICUResourceBundle.ICU_COLLATION_BASE_NAME, ULocale.ENGLISH);
+ iUCA_.m_rules_ = (String) rb.getObject("UCARules");
+ } catch (MissingResourceException ex) {
+ // throw ex;
+ } catch (IOException e) {
+ // e.printStackTrace();
+ // throw new MissingResourceException(e.getMessage(),"","");
+ }
+
+ UCA_ = iUCA_;
+ UCA_CONSTANTS_ = iUCA_CONSTANTS_;
+ LEADBYTE_CONSTANTS_ = iLEADBYTE_CONSTANTS;
+ UCA_CONTRACTIONS_ = iUCA_CONTRACTIONS_;
+ impCEGen_ = iimpCEGen_;
+
+ UCA_INIT_COMPLETE = true;
+ }
+
+ private static void checkUCA() throws MissingResourceException {
+ if (UCA_INIT_COMPLETE && UCA_ == null) {
+ throw new MissingResourceException("Collator UCA data unavailable", "", "");
+ }
+ }
+
+ // package private constructors ------------------------------------------
+
+ /**
+ *
+ * Private contructor for use by subclasses. Public access to creating Collators is handled by the API
+ * Collator.getInstance() or RuleBasedCollator(String rules).
+ *
+ *
+ * This constructor constructs the UCA collator internally
+ *
+ */
+ RuleBasedCollator() {
+ checkUCA();
+ }
+
+ /**
+ * Constructors a RuleBasedCollator from the argument locale. If no resource bundle is associated with the locale,
+ * UCA is used instead.
+ *
+ * @param locale
+ */
+ RuleBasedCollator(ULocale locale) {
+ checkUCA();
+ ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(
+ ICUResourceBundle.ICU_COLLATION_BASE_NAME, locale);
+ if (rb != null) {
+ try {
+ // Use keywords, if supplied for lookup
+ String collkey = locale.getKeywordValue("collation");
+ if (collkey == null) {
+ collkey = rb.getStringWithFallback("collations/default");
+ }
+
+ // collations/default will always give a string back
+ // keyword for the real collation data
+ // if "collations/collkey" will return null if collkey == null
+ ICUResourceBundle elements = rb.getWithFallback("collations/" + collkey);
+ if (elements != null) {
+ // TODO: Determine actual & valid locale correctly
+ ULocale uloc = rb.getULocale();
+ setLocale(uloc, uloc);
+
+ m_rules_ = elements.getString("Sequence");
+ ByteBuffer buf = elements.get("%%CollationBin").getBinary();
+ // %%CollationBin
+ if (buf != null) {
+ // m_rules_ = (String)rules[1][1];
+ CollatorReader.initRBC(this, buf);
+ /*
+ * BufferedInputStream input = new BufferedInputStream( new ByteArrayInputStream(map)); /*
+ * CollatorReader reader = new CollatorReader(input, false); if (map.length >
+ * MIN_BINARY_DATA_SIZE_) { reader.read(this, null); } else { reader.readHeader(this);
+ * reader.readOptions(this); // duplicating UCA_'s data setWithUCATables(); }
+ */
+ // at this point, we have read in the collator
+ // now we need to check whether the binary image has
+ // the right UCA and other versions
+ if (!m_UCA_version_.equals(UCA_.m_UCA_version_) || !m_UCD_version_.equals(UCA_.m_UCD_version_)) {
+ init(m_rules_);
+ return;
+ }
+ try {
+ UResourceBundle reorderRes = elements.get("%%ReorderCodes");
+ if (reorderRes != null) {
+ int[] reorderCodes = reorderRes.getIntVector();
+ setReorderCodes(reorderCodes);
+ m_defaultReorderCodes_ = reorderCodes.clone();
+ }
+ } catch (MissingResourceException e) {
+ // ignore
+ }
+ init();
+ return;
+ } else {
+ init(m_rules_);
+ return;
+ }
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ // if failed use UCA.
+ }
+ }
+ setWithUCAData();
+ }
+
+ // package private methods -----------------------------------------------
+
+ /**
+ * Sets this collator to use the tables in UCA. Note options not taken care of here.
+ */
+ final void setWithUCATables() {
+ m_contractionOffset_ = UCA_.m_contractionOffset_;
+ m_expansionOffset_ = UCA_.m_expansionOffset_;
+ m_expansion_ = UCA_.m_expansion_;
+ m_contractionIndex_ = UCA_.m_contractionIndex_;
+ m_contractionCE_ = UCA_.m_contractionCE_;
+ m_trie_ = UCA_.m_trie_;
+ m_expansionEndCE_ = UCA_.m_expansionEndCE_;
+ m_expansionEndCEMaxSize_ = UCA_.m_expansionEndCEMaxSize_;
+ m_unsafe_ = UCA_.m_unsafe_;
+ m_contractionEnd_ = UCA_.m_contractionEnd_;
+ m_minUnsafe_ = UCA_.m_minUnsafe_;
+ m_minContractionEnd_ = UCA_.m_minContractionEnd_;
+ }
+
+ /**
+ * Sets this collator to use the all options and tables in UCA.
+ */
+ final void setWithUCAData() {
+ latinOneFailed_ = true;
+
+ m_addition3_ = UCA_.m_addition3_;
+ m_bottom3_ = UCA_.m_bottom3_;
+ m_bottomCount3_ = UCA_.m_bottomCount3_;
+ m_caseFirst_ = UCA_.m_caseFirst_;
+ m_caseSwitch_ = UCA_.m_caseSwitch_;
+ m_common3_ = UCA_.m_common3_;
+ m_contractionOffset_ = UCA_.m_contractionOffset_;
+ setDecomposition(UCA_.getDecomposition());
+ m_defaultCaseFirst_ = UCA_.m_defaultCaseFirst_;
+ m_defaultDecomposition_ = UCA_.m_defaultDecomposition_;
+ m_defaultIsAlternateHandlingShifted_ = UCA_.m_defaultIsAlternateHandlingShifted_;
+ m_defaultIsCaseLevel_ = UCA_.m_defaultIsCaseLevel_;
+ m_defaultIsFrenchCollation_ = UCA_.m_defaultIsFrenchCollation_;
+ m_defaultIsHiragana4_ = UCA_.m_defaultIsHiragana4_;
+ m_defaultStrength_ = UCA_.m_defaultStrength_;
+ m_defaultVariableTopValue_ = UCA_.m_defaultVariableTopValue_;
+ m_defaultIsNumericCollation_ = UCA_.m_defaultIsNumericCollation_;
+ m_expansionOffset_ = UCA_.m_expansionOffset_;
+ m_isAlternateHandlingShifted_ = UCA_.m_isAlternateHandlingShifted_;
+ m_isCaseLevel_ = UCA_.m_isCaseLevel_;
+ m_isFrenchCollation_ = UCA_.m_isFrenchCollation_;
+ m_isHiragana4_ = UCA_.m_isHiragana4_;
+ m_isJamoSpecial_ = UCA_.m_isJamoSpecial_;
+ m_isSimple3_ = UCA_.m_isSimple3_;
+ m_mask3_ = UCA_.m_mask3_;
+ m_minContractionEnd_ = UCA_.m_minContractionEnd_;
+ m_minUnsafe_ = UCA_.m_minUnsafe_;
+ m_rules_ = UCA_.m_rules_;
+ setStrength(UCA_.getStrength());
+ m_top3_ = UCA_.m_top3_;
+ m_topCount3_ = UCA_.m_topCount3_;
+ m_variableTopValue_ = UCA_.m_variableTopValue_;
+ m_isNumericCollation_ = UCA_.m_isNumericCollation_;
+ setWithUCATables();
+ latinOneFailed_ = false;
+ }
+
+ /**
+ * Test whether a char character is potentially "unsafe" for use as a collation starting point. "Unsafe" characters
+ * are combining marks or those belonging to some contraction sequence from the offset 1 onwards. E.g. if "ABC" is
+ * the only contraction, then 'B' and 'C' are considered unsafe. If we have another contraction "ZA" with the one
+ * above, then 'A', 'B', 'C' are "unsafe" but 'Z' is not.
+ *
+ * @param ch
+ * character to determin
+ * @return true if ch is unsafe, false otherwise
+ */
+ final boolean isUnsafe(char ch) {
+ if (ch < m_minUnsafe_) {
+ return false;
+ }
+
+ if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
+ if (UTF16.isLeadSurrogate(ch) || UTF16.isTrailSurrogate(ch)) {
+ // Trail surrogate are always considered unsafe.
+ return true;
+ }
+ ch &= HEURISTIC_OVERFLOW_MASK_;
+ ch += HEURISTIC_OVERFLOW_OFFSET_;
+ }
+ int value = m_unsafe_[ch >> HEURISTIC_SHIFT_];
+ return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;
+ }
+
+ /**
+ * Approximate determination if a char character is at a contraction end. Guaranteed to be true if a character is at
+ * the end of a contraction, otherwise it is not deterministic.
+ *
+ * @param ch
+ * character to be determined
+ */
+ final boolean isContractionEnd(char ch) {
+ if (UTF16.isTrailSurrogate(ch)) {
+ return true;
+ }
+
+ if (ch < m_minContractionEnd_) {
+ return false;
+ }
+
+ if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
+ ch &= HEURISTIC_OVERFLOW_MASK_;
+ ch += HEURISTIC_OVERFLOW_OFFSET_;
+ }
+ int value = m_contractionEnd_[ch >> HEURISTIC_SHIFT_];
+ return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;
+ }
+
+ /**
+ * Retrieve the tag of a special ce
+ *
+ * @param ce
+ * ce to test
+ * @return tag of ce
+ */
+ static int getTag(int ce) {
+ return (ce & CE_TAG_MASK_) >> CE_TAG_SHIFT_;
+ }
+
+ /**
+ * Checking if ce is special
+ *
+ * @param ce
+ * to check
+ * @return true if ce is special
+ */
+ static boolean isSpecial(int ce) {
+ return (ce & CE_SPECIAL_FLAG_) == CE_SPECIAL_FLAG_;
+ }
+
+ /**
+ * Checks if the argument ce is a continuation
+ *
+ * @param ce
+ * collation element to test
+ * @return true if ce is a continuation
+ */
+ static final boolean isContinuation(int ce) {
+ return ce != CollationElementIterator.NULLORDER && (ce & CE_CONTINUATION_TAG_) == CE_CONTINUATION_TAG_;
+ }
+
+ // private inner classes ------------------------------------------------
+
+ // private variables -----------------------------------------------------
+
+ /**
+ * The smallest natural unsafe or contraction end char character before tailoring. This is a combining mark.
+ */
+ private static final int DEFAULT_MIN_HEURISTIC_ = 0x300;
+ /**
+ * Heuristic table table size. Size is 32 bytes, 1 bit for each latin 1 char, and some power of two for hashing the
+ * rest of the chars. Size in bytes.
+ */
+ private static final char HEURISTIC_SIZE_ = 1056;
+ /**
+ * Mask value down to "some power of two" - 1, number of bits, not num of bytes.
+ */
+ private static final char HEURISTIC_OVERFLOW_MASK_ = 0x1fff;
+ /**
+ * Unsafe character shift
+ */
+ private static final int HEURISTIC_SHIFT_ = 3;
+ /**
+ * Unsafe character addition for character too large, it has to be folded then incremented.
+ */
+ private static final char HEURISTIC_OVERFLOW_OFFSET_ = 256;
+ /**
+ * Mask value to get offset in heuristic table.
+ */
+ private static final char HEURISTIC_MASK_ = 7;
+
+ private int m_caseSwitch_;
+ private int m_common3_;
+ private int m_mask3_;
+ /**
+ * When switching case, we need to add or subtract different values.
+ */
+ private int m_addition3_;
+ /**
+ * Upper range when compressing
+ */
+ private int m_top3_;
+ /**
+ * Upper range when compressing
+ */
+ private int m_bottom3_;
+ private int m_topCount3_;
+ private int m_bottomCount3_;
+ /**
+ * Script reordering table
+ */
+ private byte[] m_leadBytePermutationTable_;
+ /**
+ * Case first constants
+ */
+ private static final int CASE_SWITCH_ = 0xC0;
+ private static final int NO_CASE_SWITCH_ = 0;
+ /**
+ * Case level constants
+ */
+ private static final int CE_REMOVE_CASE_ = 0x3F;
+ private static final int CE_KEEP_CASE_ = 0xFF;
+ /**
+ * Case strength mask
+ */
+ private static final int CE_CASE_MASK_3_ = 0xFF;
+ /**
+ * Sortkey size factor. Values can be changed.
+ */
+ private static final double PROPORTION_2_ = 0.5;
+ private static final double PROPORTION_3_ = 0.667;
+
+ // These values come from the UCA ----------------------------------------
+
+ /**
+ * This is an enum that lists magic special byte values from the fractional UCA
+ */
+ // private static final byte BYTE_ZERO_ = 0x0;
+ // private static final byte BYTE_LEVEL_SEPARATOR_ = (byte)0x01;
+ // private static final byte BYTE_SORTKEY_GLUE_ = (byte)0x02;
+ private static final byte BYTE_SHIFT_PREFIX_ = (byte) 0x03;
+ /* private */static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_;
+ // private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_;
+ // TODO: Make the following values dynamic since they change with almost every UCA version.
+ static final byte CODAN_PLACEHOLDER = 0x12;
+ private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte) 0x5B;
+
+ private static final byte BYTE_UNSHIFTED_MAX_ = (byte) 0xFF;
+ private static final int TOTAL_2_ = COMMON_TOP_2_ - COMMON_BOTTOM_2_ - 1;
+ private static final int FLAG_BIT_MASK_CASE_SWITCH_OFF_ = 0x80;
+ private static final int FLAG_BIT_MASK_CASE_SWITCH_ON_ = 0x40;
+ private static final int COMMON_TOP_CASE_SWITCH_OFF_3_ = 0x85;
+ private static final int COMMON_TOP_CASE_SWITCH_LOWER_3_ = 0x45;
+ private static final int COMMON_TOP_CASE_SWITCH_UPPER_3_ = 0xC5;
+ private static final int COMMON_BOTTOM_3_ = 0x05;
+ private static final int COMMON_BOTTOM_CASE_SWITCH_UPPER_3_ = 0x86;
+ private static final int COMMON_BOTTOM_CASE_SWITCH_LOWER_3_ = COMMON_BOTTOM_3_;
+ private static final int TOP_COUNT_2_ = (int) (PROPORTION_2_ * TOTAL_2_);
+ private static final int BOTTOM_COUNT_2_ = TOTAL_2_ - TOP_COUNT_2_;
+ private static final int COMMON_2_ = COMMON_BOTTOM_2_;
+ private static final int COMMON_UPPER_FIRST_3_ = 0xC5;
+ private static final int COMMON_NORMAL_3_ = COMMON_BOTTOM_3_;
+ // private static final int COMMON_4_ = (byte)0xFF;
+
+ /*
+ * Minimum size required for the binary collation data in bytes. Size of UCA header + size of options to 4 bytes
+ */
+ // private static final int MIN_BINARY_DATA_SIZE_ = (42 + 25) << 2;
+
+ /**
+ * If this collator is to generate only simple tertiaries for fast path
+ */
+ private boolean m_isSimple3_;
+
+ /**
+ * French collation sorting flag
+ */
+ private boolean m_isFrenchCollation_;
+ /**
+ * Flag indicating if shifted is requested for Quaternary alternate handling. If this is not true, the default for
+ * alternate handling will be non-ignorable.
+ */
+ private boolean m_isAlternateHandlingShifted_;
+ /**
+ * Extra case level for sorting
+ */
+ private boolean m_isCaseLevel_;
+ /**
+ * Frozen state of the collator.
+ */
+ private Lock frozenLock;
+
+
+ private static final int SORT_BUFFER_INIT_SIZE_ = 128;
+ private static final int SORT_BUFFER_INIT_SIZE_1_ = SORT_BUFFER_INIT_SIZE_ << 3;
+ private static final int SORT_BUFFER_INIT_SIZE_2_ = SORT_BUFFER_INIT_SIZE_;
+ private static final int SORT_BUFFER_INIT_SIZE_3_ = SORT_BUFFER_INIT_SIZE_;
+ private static final int SORT_BUFFER_INIT_SIZE_CASE_ = SORT_BUFFER_INIT_SIZE_ >> 2;
+ private static final int SORT_BUFFER_INIT_SIZE_4_ = SORT_BUFFER_INIT_SIZE_;
+
+ private static final int CE_CONTINUATION_TAG_ = 0xC0;
+ private static final int CE_REMOVE_CONTINUATION_MASK_ = 0xFFFFFF3F;
+
+ private static final int LAST_BYTE_MASK_ = 0xFF;
+
+ // private static final int CE_RESET_TOP_VALUE_ = 0x9F000303;
+ // private static final int CE_NEXT_TOP_VALUE_ = 0xE8960303;
+
+ private static final byte SORT_CASE_BYTE_START_ = (byte) 0x80;
+ private static final byte SORT_CASE_SHIFT_START_ = (byte) 7;
+
+ /**
+ * CE buffer size
+ */
+ private static final int CE_BUFFER_SIZE_ = 512;
+
+ // variables for Latin-1 processing
+ boolean latinOneUse_ = false;
+ boolean latinOneRegenTable_ = false;
+ boolean latinOneFailed_ = false;
+
+ int latinOneTableLen_ = 0;
+ int latinOneCEs_[] = null;
+
+ private final class CollationBuffer {
+ /**
+ * Bunch of utility iterators
+ */
+ protected StringUCharacterIterator m_srcUtilIter_;
+ protected CollationElementIterator m_srcUtilColEIter_;
+ protected StringUCharacterIterator m_tgtUtilIter_;
+ protected CollationElementIterator m_tgtUtilColEIter_;
+
+ /**
+ * Utility comparison flags
+ */
+ protected boolean m_utilCompare0_;
+ // private boolean m_utilCompare1_;
+ protected boolean m_utilCompare2_;
+ protected boolean m_utilCompare3_;
+ protected boolean m_utilCompare4_;
+ protected boolean m_utilCompare5_;
+
+ /**
+ * Utility byte buffer
+ */
+ protected byte m_utilBytes0_[];
+ protected byte m_utilBytes1_[];
+ protected byte m_utilBytes2_[];
+ protected byte m_utilBytes3_[];
+ protected byte m_utilBytes4_[];
+ // private byte m_utilBytes5_[];
+
+ protected RawCollationKey m_utilRawCollationKey_;
+
+ protected int m_utilBytesCount0_;
+ protected int m_utilBytesCount1_;
+ protected int m_utilBytesCount2_;
+ protected int m_utilBytesCount3_;
+ protected int m_utilBytesCount4_;
+ // private int m_utilBytesCount5_;
+
+ // private int m_utilCount0_;
+ // private int m_utilCount1_;
+ protected int m_utilCount2_;
+ protected int m_utilCount3_;
+ protected int m_utilCount4_;
+ // private int m_utilCount5_;
+
+ protected int m_utilFrenchStart_;
+ protected int m_utilFrenchEnd_;
+
+ /**
+ * Preparing the CE buffers. will be filled during the primary phase
+ */
+ protected int m_srcUtilCEBuffer_[];
+ protected int m_tgtUtilCEBuffer_[];
+ protected int m_srcUtilCEBufferSize_;
+ protected int m_tgtUtilCEBufferSize_;
+
+ protected int m_srcUtilContOffset_;
+ protected int m_tgtUtilContOffset_;
+
+ protected int m_srcUtilOffset_;
+ protected int m_tgtUtilOffset_;
+
+ private CollationBuffer() {
+ initBuffers();
+ }
+
+ /**
+ * Initializes utility iterators and byte buffer used by compare
+ */
+ protected final void initBuffers() {
+ resetBuffers();
+ m_srcUtilIter_ = new StringUCharacterIterator();
+ m_srcUtilColEIter_ = new CollationElementIterator(m_srcUtilIter_, RuleBasedCollator.this);
+ m_tgtUtilIter_ = new StringUCharacterIterator();
+ m_tgtUtilColEIter_ = new CollationElementIterator(m_tgtUtilIter_, RuleBasedCollator.this);
+ m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; // case
+ m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; // primary
+ m_utilBytes2_ = new byte[SORT_BUFFER_INIT_SIZE_2_]; // secondary
+ m_utilBytes3_ = new byte[SORT_BUFFER_INIT_SIZE_3_]; // tertiary
+ m_utilBytes4_ = new byte[SORT_BUFFER_INIT_SIZE_4_]; // Quaternary
+ m_srcUtilCEBuffer_ = new int[CE_BUFFER_SIZE_];
+ m_tgtUtilCEBuffer_ = new int[CE_BUFFER_SIZE_];
+ }
+
+ protected final void resetBuffers() {
+ m_utilCompare0_ = false;
+ // private boolean m_utilCompare1_;
+ m_utilCompare2_ = false;
+ m_utilCompare3_ = false;
+ m_utilCompare4_ = false;
+ m_utilCompare5_ = false;
+
+ m_utilBytesCount0_ = 0;
+ m_utilBytesCount1_ = 0;
+ m_utilBytesCount2_ = 0;
+ m_utilBytesCount3_ = 0;
+ m_utilBytesCount4_ = 0;
+ // private int m_utilBytesCount5_;
+
+ m_utilCount2_ = 0;
+ m_utilCount3_ = 0;
+ m_utilCount4_ = 0;
+
+ m_utilFrenchStart_ = 0;
+ m_utilFrenchEnd_ = 0;
+
+ m_srcUtilContOffset_ = 0;
+ m_tgtUtilContOffset_ = 0;
+
+ m_srcUtilOffset_ = 0;
+ m_tgtUtilOffset_ = 0;
+ }
+ }
+
+ // private methods -------------------------------------------------------
+
+ private void init(String rules) throws Exception {
+ setWithUCAData();
+ CollationParsedRuleBuilder builder = new CollationParsedRuleBuilder(rules);
+ builder.setRules(this);
+ m_rules_ = rules;
+ init();
+ buildPermutationTable();
+ }
+
+ private final int compareRegular(String source, String target, int offset, CollationBuffer buffer) {
+ buffer.resetBuffers();
+
+ int strength = getStrength();
+ // setting up the collator parameters
+ buffer.m_utilCompare0_ = m_isCaseLevel_;
+ // m_utilCompare1_ = true;
+ buffer.m_utilCompare2_ = strength >= SECONDARY;
+ buffer.m_utilCompare3_ = strength >= TERTIARY;
+ buffer.m_utilCompare4_ = strength >= QUATERNARY;
+ buffer.m_utilCompare5_ = strength == IDENTICAL;
+ boolean doFrench = m_isFrenchCollation_ && buffer.m_utilCompare2_;
+ boolean doShift4 = m_isAlternateHandlingShifted_ && buffer.m_utilCompare4_;
+ boolean doHiragana4 = m_isHiragana4_ && buffer.m_utilCompare4_;
+
+ if (doHiragana4 && doShift4) {
+ String sourcesub = source.substring(offset);
+ String targetsub = target.substring(offset);
+ return compareBySortKeys(sourcesub, targetsub, buffer);
+ }
+
+ // This is the lowest primary value that will not be ignored if shifted
+ int lowestpvalue = m_isAlternateHandlingShifted_ ? m_variableTopValue_ << 16 : 0;
+ buffer.m_srcUtilCEBufferSize_ = 0;
+ buffer.m_tgtUtilCEBufferSize_ = 0;
+ int result = doPrimaryCompare(doHiragana4, lowestpvalue, source, target, offset, buffer);
+ if (buffer.m_srcUtilCEBufferSize_ == -1 && buffer.m_tgtUtilCEBufferSize_ == -1) {
+ // since the cebuffer is cleared when we have determined that
+ // either source is greater than target or vice versa, the return
+ // result is the comparison result and not the hiragana result
+ return result;
+ }
+
+ int hiraganaresult = result;
+
+ if (buffer.m_utilCompare2_) {
+ result = doSecondaryCompare(doFrench, buffer);
+ if (result != 0) {
+ return result;
+ }
+ }
+ // doing the case bit
+ if (buffer.m_utilCompare0_) {
+ result = doCaseCompare(buffer);
+ if (result != 0) {
+ return result;
+ }
+ }
+ // Tertiary level
+ if (buffer.m_utilCompare3_) {
+ result = doTertiaryCompare(buffer);
+ if (result != 0) {
+ return result;
+ }
+ }
+
+ if (doShift4) { // checkQuad
+ result = doQuaternaryCompare(lowestpvalue, buffer);
+ if (result != 0) {
+ return result;
+ }
+ } else if (doHiragana4 && hiraganaresult != 0) {
+ // If we're fine on quaternaries, we might be different
+ // on Hiragana. This, however, might fail us in shifted.
+ return hiraganaresult;
+ }
+
+ // For IDENTICAL comparisons, we use a bitwise character comparison
+ // as a tiebreaker if all else is equal.
+ // Getting here should be quite rare - strings are not identical -
+ // that is checked first, but compared == through all other checks.
+ if (buffer.m_utilCompare5_) {
+ return doIdenticalCompare(source, target, offset, true);
+ }
+ return 0;
+ }
+
+ // Is this primary weight compressible?
+ // Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit).
+ // TODO: This should use per-lead-byte flags from FractionalUCA.txt.
+ static boolean isCompressible(int primary1) {
+ return BYTE_FIRST_NON_LATIN_PRIMARY_ <= primary1 && primary1 <= maxRegularPrimary;
+ }
+
+ /**
+ * Gets the 2 bytes of primary order and adds it to the primary byte array
+ *
+ * @param ce
+ * current ce
+ * @param notIsContinuation
+ * flag indicating if the current bytes belong to a continuation ce
+ * @param doShift
+ * flag indicating if ce is to be shifted
+ * @param leadPrimary
+ * lead primary used for compression
+ * @param commonBottom4
+ * common byte value for Quaternary
+ * @param bottomCount4
+ * smallest byte value for Quaternary
+ * @return the new lead primary for compression
+ */
+ private final int doPrimaryBytes(int ce, boolean notIsContinuation, boolean doShift, int leadPrimary,
+ int commonBottom4, int bottomCount4, CollationBuffer buffer) {
+
+ int p2 = (ce >>>= 16) & LAST_BYTE_MASK_; // in ints for unsigned
+ int p1 = ce >>> 8; // comparison
+ int originalP1 = p1;
+ if (notIsContinuation) {
+ if (m_leadBytePermutationTable_ != null) {
+ p1 = 0xff & m_leadBytePermutationTable_[p1];
+ }
+ }
+
+ if (doShift) {
+ if (buffer.m_utilCount4_ > 0) {
+ while (buffer.m_utilCount4_ > bottomCount4) {
+ buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonBottom4 + bottomCount4));
+ buffer.m_utilBytesCount4_++;
+ buffer.m_utilCount4_ -= bottomCount4;
+ }
+ buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonBottom4 + (buffer.m_utilCount4_ - 1)));
+ buffer.m_utilBytesCount4_++;
+ buffer.m_utilCount4_ = 0;
+ }
+ // dealing with a variable and we're treating them as shifted
+ // This is a shifted ignorable
+ if (p1 != 0) {
+ // we need to check this since we could be in continuation
+ buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) p1);
+ buffer.m_utilBytesCount4_++;
+ }
+ if (p2 != 0) {
+ buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) p2);
+ buffer.m_utilBytesCount4_++;
+ }
+ } else {
+ // Note: This code assumes that the table is well built
+ // i.e. not having 0 bytes where they are not supposed to be.
+ // Usually, we'll have non-zero primary1 & primary2, except
+ // in cases of LatinOne and friends, when primary2 will be
+ // regular and simple sortkey calc
+ if (p1 != CollationElementIterator.IGNORABLE) {
+ if (notIsContinuation) {
+ if (leadPrimary == p1) {
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p2);
+ buffer.m_utilBytesCount1_++;
+ } else {
+ if (leadPrimary != 0) {
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_,
+ ((p1 > leadPrimary) ? BYTE_UNSHIFTED_MAX_ : BYTE_UNSHIFTED_MIN_));
+ buffer.m_utilBytesCount1_++;
+ }
+ if (p2 == CollationElementIterator.IGNORABLE) {
+ // one byter, not compressed
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p1);
+ buffer.m_utilBytesCount1_++;
+ leadPrimary = 0;
+ } else if (isCompressible(originalP1)) {
+ // compress
+ leadPrimary = p1;
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p1);
+ buffer.m_utilBytesCount1_++;
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p2);
+ buffer.m_utilBytesCount1_++;
+ } else {
+ leadPrimary = 0;
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p1);
+ buffer.m_utilBytesCount1_++;
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p2);
+ buffer.m_utilBytesCount1_++;
+ }
+ }
+ } else {
+ // continuation, add primary to the key, no compression
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p1);
+ buffer.m_utilBytesCount1_++;
+ if (p2 != CollationElementIterator.IGNORABLE) {
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) p2);
+ // second part
+ buffer.m_utilBytesCount1_++;
+ }
+ }
+ }
+ }
+ return leadPrimary;
+ }
+
+ /**
+ * Gets the secondary byte and adds it to the secondary byte array
+ *
+ * @param ce current ce
+ * @param notIsContinuation flag indicating if the current bytes belong to a continuation ce
+ * @param doFrench flag indicator if french sort is to be performed
+ * @param buffer collation buffer temporary state
+ */
+ private final void doSecondaryBytes(int ce, boolean notIsContinuation, boolean doFrench, CollationBuffer buffer) {
+ int s = (ce >>= 8) & LAST_BYTE_MASK_; // int for comparison
+ if (s != 0) {
+ if (!doFrench) {
+ // This is compression code.
+ if (s == COMMON_2_ && notIsContinuation) {
+ buffer.m_utilCount2_++;
+ } else {
+ if (buffer.m_utilCount2_ > 0) {
+ if (s > COMMON_2_) { // not necessary for 4th level.
+ while (buffer.m_utilCount2_ > TOP_COUNT_2_) {
+ buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_,
+ (byte) (COMMON_TOP_2_ - TOP_COUNT_2_));
+ buffer.m_utilBytesCount2_++;
+ buffer.m_utilCount2_ -= TOP_COUNT_2_;
+ }
+ buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_,
+ (byte) (COMMON_TOP_2_ - (buffer.m_utilCount2_ - 1)));
+ buffer.m_utilBytesCount2_++;
+ } else {
+ while (buffer.m_utilCount2_ > BOTTOM_COUNT_2_) {
+ buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_,
+ (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
+ buffer.m_utilBytesCount2_++;
+ buffer.m_utilCount2_ -= BOTTOM_COUNT_2_;
+ }
+ buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_,
+ (byte) (COMMON_BOTTOM_2_ + (buffer.m_utilCount2_ - 1)));
+ buffer.m_utilBytesCount2_++;
+ }
+ buffer.m_utilCount2_ = 0;
+ }
+ buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_, (byte) s);
+ buffer.m_utilBytesCount2_++;
+ }
+ } else {
+ buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_, (byte) s);
+ buffer.m_utilBytesCount2_++;
+ // Do the special handling for French secondaries
+ // We need to get continuation elements and do intermediate
+ // restore
+ // abc1c2c3de with french secondaries need to be edc1c2c3ba
+ // NOT edc3c2c1ba
+ if (notIsContinuation) {
+ if (buffer.m_utilFrenchStart_ != -1) {
+ // reverse secondaries from frenchStartPtr up to
+ // frenchEndPtr
+ reverseBuffer(buffer.m_utilBytes2_, buffer.m_utilFrenchStart_, buffer.m_utilFrenchEnd_);
+ buffer.m_utilFrenchStart_ = -1;
+ }
+ } else {
+ if (buffer.m_utilFrenchStart_ == -1) {
+ buffer.m_utilFrenchStart_ = buffer.m_utilBytesCount2_ - 2;
+ }
+ buffer.m_utilFrenchEnd_ = buffer.m_utilBytesCount2_ - 1;
+ }
+ }
+ }
+ }
+
+ /**
+ * Reverse the argument buffer
+ *
+ * @param buffer to reverse
+ * @param start index in buffer to start from
+ * @param end index in buffer to end at
+ */
+ private static void reverseBuffer(byte buffer[], int start, int end) {
+ while (start < end) {
+ byte b = buffer[start];
+ buffer[start++] = buffer[end];
+ buffer[end--] = b;
+ }
+ }
+
+ /**
+ * Insert the case shifting byte if required
+ *
+ * @param caseshift value
+ * @return new caseshift value
+ */
+ private final int doCaseShift(int caseshift, CollationBuffer buffer) {
+ if (caseshift == 0) {
+ buffer.m_utilBytes0_ = append(buffer.m_utilBytes0_, buffer.m_utilBytesCount0_, SORT_CASE_BYTE_START_);
+ buffer.m_utilBytesCount0_++;
+ caseshift = SORT_CASE_SHIFT_START_;
+ }
+ return caseshift;
+ }
+
+ /**
+ * Performs the casing sort
+ *
+ * @param tertiary byte in ints for easy comparison
+ * @param notIsContinuation flag indicating if the current bytes belong to a continuation ce
+ * @param caseshift
+ * @param buffer collation buffer temporary state
+ * @return the new value of case shift
+ */
+ private final int doCaseBytes(int tertiary, boolean notIsContinuation, int caseshift, CollationBuffer buffer) {
+ caseshift = doCaseShift(caseshift, buffer);
+
+ if (notIsContinuation && tertiary != 0) {
+ byte casebits = (byte) (tertiary & 0xC0);
+ if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
+ if (casebits == 0) {
+ buffer.m_utilBytes0_[buffer.m_utilBytesCount0_ - 1] |= (1 << (--caseshift));
+ } else {
+ // second bit
+ caseshift = doCaseShift(caseshift - 1, buffer);
+ buffer.m_utilBytes0_[buffer.m_utilBytesCount0_ - 1] |= ((casebits >> 6) & 1) << (--caseshift);
+ }
+ } else {
+ if (casebits != 0) {
+ buffer.m_utilBytes0_[buffer.m_utilBytesCount0_ - 1] |= 1 << (--caseshift);
+ // second bit
+ caseshift = doCaseShift(caseshift, buffer);
+ buffer.m_utilBytes0_[buffer.m_utilBytesCount0_ - 1] |= ((casebits >> 7) & 1) << (--caseshift);
+ } else {
+ caseshift--;
+ }
+ }
+ }
+
+ return caseshift;
+ }
+
+ /**
+ * Gets the tertiary byte and adds it to the tertiary byte array
+ *
+ * @param tertiary byte in int for easy comparison
+ * @param notIsContinuation flag indicating if the current bytes belong to a continuation ce
+ * @param buffer collation buffer temporary state
+ */
+ private final void doTertiaryBytes(int tertiary, boolean notIsContinuation, CollationBuffer buffer) {
+ if (tertiary != 0) {
+ // This is compression code.
+ // sequence size check is included in the if clause
+ if (tertiary == m_common3_ && notIsContinuation) {
+ buffer.m_utilCount3_++;
+ } else {
+ int common3 = m_common3_ & LAST_BYTE_MASK_;
+ if (tertiary > common3 && m_common3_ == COMMON_NORMAL_3_) {
+ tertiary += m_addition3_;
+ } else if (tertiary <= common3 && m_common3_ == COMMON_UPPER_FIRST_3_) {
+ tertiary -= m_addition3_;
+ }
+ if (buffer.m_utilCount3_ > 0) {
+ if (tertiary > common3) {
+ while (buffer.m_utilCount3_ > m_topCount3_) {
+ buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) (m_top3_ - m_topCount3_));
+ buffer.m_utilBytesCount3_++;
+ buffer.m_utilCount3_ -= m_topCount3_;
+ }
+ buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_,
+ (byte) (m_top3_ - (buffer.m_utilCount3_ - 1)));
+ buffer.m_utilBytesCount3_++;
+ } else {
+ while (buffer.m_utilCount3_ > m_bottomCount3_) {
+ buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_,
+ (byte) (m_bottom3_ + m_bottomCount3_));
+ buffer.m_utilBytesCount3_++;
+ buffer.m_utilCount3_ -= m_bottomCount3_;
+ }
+ buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_,
+ (byte) (m_bottom3_ + (buffer.m_utilCount3_ - 1)));
+ buffer.m_utilBytesCount3_++;
+ }
+ buffer.m_utilCount3_ = 0;
+ }
+ buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) tertiary);
+ buffer.m_utilBytesCount3_++;
+ }
+ }
+ }
+
+ /**
+ * Gets the Quaternary byte and adds it to the Quaternary byte array
+ *
+ * @param isCodePointHiragana flag indicator if the previous codepoint we dealt with was Hiragana
+ * @param commonBottom4 smallest common Quaternary byte
+ * @param bottomCount4 smallest Quaternary byte
+ * @param hiragana4 hiragana Quaternary byte
+ * @param buffer collation buffer temporary state
+ */
+ private final void doQuaternaryBytes(boolean isCodePointHiragana, int commonBottom4, int bottomCount4,
+ byte hiragana4, CollationBuffer buffer) {
+ if (isCodePointHiragana) { // This was Hiragana, need to note it
+ if (buffer.m_utilCount4_ > 0) { // Close this part
+ while (buffer.m_utilCount4_ > bottomCount4) {
+ buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonBottom4 + bottomCount4));
+ buffer.m_utilBytesCount4_++;
+ buffer.m_utilCount4_ -= bottomCount4;
+ }
+ buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonBottom4 + (buffer.m_utilCount4_ - 1)));
+ buffer.m_utilBytesCount4_++;
+ buffer.m_utilCount4_ = 0;
+ }
+ buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, hiragana4); // Add the Hiragana
+ buffer.m_utilBytesCount4_++;
+ } else { // This wasn't Hiragana, so we can continue adding stuff
+ buffer.m_utilCount4_++;
+ }
+ }
+
+ /**
+ * Iterates through the argument string for all ces. Split the ces into their relevant primaries, secondaries etc.
+ *
+ * @param source normalized string
+ * @param doFrench flag indicator if special handling of French has to be done
+ * @param hiragana4 offset for Hiragana quaternary
+ * @param commonBottom4 smallest common quaternary byte
+ * @param bottomCount4 smallest quaternary byte
+ * @param buffer collation buffer temporary state
+ */
+ private final void getSortKeyBytes(String source, boolean doFrench, byte hiragana4, int commonBottom4,
+ int bottomCount4, CollationBuffer buffer)
+
+ {
+ int backupDecomposition = getDecomposition();
+ // TODO- hack fix around frozen state - stop self-modification
+ internalSetDecomposition(NO_DECOMPOSITION); // have to revert to backup later
+ buffer.m_srcUtilIter_.setText(source);
+ buffer.m_srcUtilColEIter_.setText(buffer.m_srcUtilIter_);
+ buffer.m_utilFrenchStart_ = -1;
+ buffer.m_utilFrenchEnd_ = -1;
+
+ boolean doShift = false;
+ boolean notIsContinuation = false;
+
+ int leadPrimary = 0; // int for easier comparison
+ int caseShift = 0;
+
+ while (true) {
+ int ce = buffer.m_srcUtilColEIter_.next();
+ if (ce == CollationElementIterator.NULLORDER) {
+ break;
+ }
+
+ if (ce == CollationElementIterator.IGNORABLE) {
+ continue;
+ }
+
+ notIsContinuation = !isContinuation(ce);
+
+ boolean isPrimaryByteIgnorable = (ce & CE_PRIMARY_MASK_) == 0;
+ // actually we can just check that the first byte is 0
+ // generation stuffs the order left first
+ boolean isSmallerThanVariableTop = (ce >>> CE_PRIMARY_SHIFT_) <= m_variableTopValue_;
+ doShift = (m_isAlternateHandlingShifted_
+ && ((notIsContinuation && isSmallerThanVariableTop && !isPrimaryByteIgnorable) // primary byte not 0
+ || (!notIsContinuation && doShift)) || (doShift && isPrimaryByteIgnorable));
+ if (doShift && isPrimaryByteIgnorable) {
+ // amendment to the UCA says that primary ignorables and other
+ // ignorables should be removed if following a shifted code
+ // point
+ // if we were shifted and we got an ignorable code point
+ // we should just completely ignore it
+ continue;
+ }
+ leadPrimary = doPrimaryBytes(ce, notIsContinuation, doShift, leadPrimary, commonBottom4, bottomCount4, buffer);
+
+ if (doShift) {
+ continue;
+ }
+ if (buffer.m_utilCompare2_) {
+ doSecondaryBytes(ce, notIsContinuation, doFrench, buffer);
+ }
+
+ int t = ce & LAST_BYTE_MASK_;
+ if (!notIsContinuation) {
+ t = ce & CE_REMOVE_CONTINUATION_MASK_;
+ }
+
+ if (buffer.m_utilCompare0_ && (!isPrimaryByteIgnorable || buffer.m_utilCompare2_)) {
+ // do the case level if we need to do it. We don't want to calculate
+ // case level for primary ignorables if we have only primary strength and case level
+ // otherwise we would break well formedness of CEs
+ caseShift = doCaseBytes(t, notIsContinuation, caseShift, buffer);
+ } else if (notIsContinuation) {
+ t ^= m_caseSwitch_;
+ }
+
+ t &= m_mask3_;
+
+ if (buffer.m_utilCompare3_) {
+ doTertiaryBytes(t, notIsContinuation, buffer);
+ }
+
+ if (buffer.m_utilCompare4_ && notIsContinuation) { // compare quad
+ doQuaternaryBytes(buffer.m_srcUtilColEIter_.m_isCodePointHiragana_, commonBottom4, bottomCount4, hiragana4, buffer);
+ }
+ }
+ // TODO - hack fix around frozen state - stop self-modification
+ internalSetDecomposition(backupDecomposition); // reverts to original
+ if (buffer.m_utilFrenchStart_ != -1) {
+ // one last round of checks
+ reverseBuffer(buffer.m_utilBytes2_, buffer.m_utilFrenchStart_, buffer.m_utilFrenchEnd_);
+ }
+ }
+
+ /**
+ * From the individual strength byte results the final compact sortkey will be calculated.
+ *
+ * @param source text string
+ * @param doFrench flag indicating that special handling of French has to be done
+ * @param commonBottom4 smallest common quaternary byte
+ * @param bottomCount4 smallest quaternary byte
+ * @param key output RawCollationKey to store results, key cannot be null
+ * @param buffer collation buffer temporary state
+ */
+ private final void getSortKey(String source, boolean doFrench, int commonBottom4, int bottomCount4,
+ RawCollationKey key, CollationBuffer buffer) {
+ // we have done all the CE's, now let's put them together to form
+ // a key
+ if (buffer.m_utilCompare2_) {
+ doSecondary(doFrench, buffer);
+ }
+ // adding case level should be independent of secondary level
+ if (buffer.m_utilCompare0_) {
+ doCase(buffer);
+ }
+ if (buffer.m_utilCompare3_) {
+ doTertiary(buffer);
+ if (buffer.m_utilCompare4_) {
+ doQuaternary(commonBottom4, bottomCount4, buffer);
+ if (buffer.m_utilCompare5_) {
+ doIdentical(source, buffer);
+ }
+
+ }
+ }
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) 0);
+ buffer.m_utilBytesCount1_++;
+
+ key.set(buffer.m_utilBytes1_, 0, buffer.m_utilBytesCount1_);
+ }
+
+ /**
+ * Packs the French bytes
+ * @param buffer collation buffer temporary state
+ */
+ private static final void doFrench(CollationBuffer buffer) {
+ for (int i = 0; i < buffer.m_utilBytesCount2_; i++) {
+ byte s = buffer.m_utilBytes2_[buffer.m_utilBytesCount2_ - i - 1];
+ // This is compression code.
+ if (s == COMMON_2_) {
+ ++buffer.m_utilCount2_;
+ } else {
+ if (buffer.m_utilCount2_ > 0) {
+ // getting the unsigned value
+ if ((s & LAST_BYTE_MASK_) > COMMON_2_) {
+ // not necessary for 4th level.
+ while (buffer.m_utilCount2_ > TOP_COUNT_2_) {
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_,
+ (byte) (COMMON_TOP_2_ - TOP_COUNT_2_));
+ buffer.m_utilBytesCount1_++;
+ buffer.m_utilCount2_ -= TOP_COUNT_2_;
+ }
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_,
+ (byte) (COMMON_TOP_2_ - (buffer.m_utilCount2_ - 1)));
+ buffer.m_utilBytesCount1_++;
+ } else {
+ while (buffer.m_utilCount2_ > BOTTOM_COUNT_2_) {
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_,
+ (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
+ buffer.m_utilBytesCount1_++;
+ buffer.m_utilCount2_ -= BOTTOM_COUNT_2_;
+ }
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_,
+ (byte) (COMMON_BOTTOM_2_ + (buffer.m_utilCount2_ - 1)));
+ buffer.m_utilBytesCount1_++;
+ }
+ buffer.m_utilCount2_ = 0;
+ }
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, s);
+ buffer.m_utilBytesCount1_++;
+ }
+ }
+ if (buffer.m_utilCount2_ > 0) {
+ while (buffer.m_utilCount2_ > BOTTOM_COUNT_2_) {
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
+ buffer.m_utilBytesCount1_++;
+ buffer.m_utilCount2_ -= BOTTOM_COUNT_2_;
+ }
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, (byte) (COMMON_BOTTOM_2_ + (buffer.m_utilCount2_ - 1)));
+ buffer.m_utilBytesCount1_++;
+ }
+ }
+
+ /**
+ * Compacts the secondary bytes and stores them into the primary array
+ *
+ * @param doFrench flag indicator that French has to be handled specially
+ * @param buffer collation buffer temporary state
+ */
+ private static final void doSecondary(boolean doFrench, CollationBuffer buffer) {
+ if (buffer.m_utilCount2_ > 0) {
+ while (buffer.m_utilCount2_ > BOTTOM_COUNT_2_) {
+ buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_, (byte) (COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
+ buffer.m_utilBytesCount2_++;
+ buffer.m_utilCount2_ -= BOTTOM_COUNT_2_;
+ }
+ buffer.m_utilBytes2_ = append(buffer.m_utilBytes2_, buffer.m_utilBytesCount2_, (byte) (COMMON_BOTTOM_2_ + (buffer.m_utilCount2_ - 1)));
+ buffer.m_utilBytesCount2_++;
+ }
+
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, SORT_LEVEL_TERMINATOR_);
+ buffer.m_utilBytesCount1_++;
+
+ if (doFrench) { // do the reverse copy
+ doFrench(buffer);
+ } else {
+ if (buffer.m_utilBytes1_.length <= buffer.m_utilBytesCount1_ + buffer.m_utilBytesCount2_) {
+ buffer.m_utilBytes1_ = increase(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount2_);
+ }
+ System.arraycopy(buffer.m_utilBytes2_, 0, buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount2_);
+ buffer.m_utilBytesCount1_ += buffer.m_utilBytesCount2_;
+ }
+ }
+
+ /**
+ * Increase buffer size
+ *
+ * @param buffer array of bytes
+ * @param size of the byte array
+ * @param incrementsize size to increase
+ * @return the new buffer
+ */
+ private static final byte[] increase(byte buffer[], int size, int incrementsize) {
+ byte result[] = new byte[buffer.length + incrementsize];
+ System.arraycopy(buffer, 0, result, 0, size);
+ return result;
+ }
+
+ /**
+ * Increase buffer size
+ *
+ * @param buffer array of ints
+ * @param size of the byte array
+ * @param incrementsize size to increase
+ * @return the new buffer
+ */
+ private static final int[] increase(int buffer[], int size, int incrementsize) {
+ int result[] = new int[buffer.length + incrementsize];
+ System.arraycopy(buffer, 0, result, 0, size);
+ return result;
+ }
+
+ /**
+ * Compacts the case bytes and stores them into the primary array
+ *
+ * @param buffer collation buffer temporary state
+ */
+ private static final void doCase(CollationBuffer buffer) {
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, SORT_LEVEL_TERMINATOR_);
+ buffer.m_utilBytesCount1_++;
+ if (buffer.m_utilBytes1_.length <= buffer.m_utilBytesCount1_ + buffer.m_utilBytesCount0_) {
+ buffer.m_utilBytes1_ = increase(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount0_);
+ }
+ System.arraycopy(buffer.m_utilBytes0_, 0, buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount0_);
+ buffer.m_utilBytesCount1_ += buffer.m_utilBytesCount0_;
+ }
+
+ /**
+ * Compacts the tertiary bytes and stores them into the primary array
+ *
+ * @param buffer collation buffer temporary state
+ */
+ private final void doTertiary(CollationBuffer buffer) {
+ if (buffer.m_utilCount3_ > 0) {
+ if (m_common3_ != COMMON_BOTTOM_3_) {
+ while (buffer.m_utilCount3_ >= m_topCount3_) {
+ buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) (m_top3_ - m_topCount3_));
+ buffer.m_utilBytesCount3_++;
+ buffer.m_utilCount3_ -= m_topCount3_;
+ }
+ buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) (m_top3_ - buffer.m_utilCount3_));
+ buffer.m_utilBytesCount3_++;
+ } else {
+ while (buffer.m_utilCount3_ > m_bottomCount3_) {
+ buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) (m_bottom3_ + m_bottomCount3_));
+ buffer.m_utilBytesCount3_++;
+ buffer.m_utilCount3_ -= m_bottomCount3_;
+ }
+ buffer.m_utilBytes3_ = append(buffer.m_utilBytes3_, buffer.m_utilBytesCount3_, (byte) (m_bottom3_ + (buffer.m_utilCount3_ - 1)));
+ buffer.m_utilBytesCount3_++;
+ }
+ }
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, SORT_LEVEL_TERMINATOR_);
+ buffer.m_utilBytesCount1_++;
+ if (buffer.m_utilBytes1_.length <= buffer.m_utilBytesCount1_ + buffer.m_utilBytesCount3_) {
+ buffer.m_utilBytes1_ = increase(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount3_);
+ }
+ System.arraycopy(buffer.m_utilBytes3_, 0, buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount3_);
+ buffer.m_utilBytesCount1_ += buffer.m_utilBytesCount3_;
+ }
+
+ /**
+ * Compacts the quaternary bytes and stores them into the primary array
+ *
+ * @param buffer collation buffer temporary state
+ */
+ private final void doQuaternary(int commonbottom4, int bottomcount4, CollationBuffer buffer) {
+ if (buffer.m_utilCount4_ > 0) {
+ while (buffer.m_utilCount4_ > bottomcount4) {
+ buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonbottom4 + bottomcount4));
+ buffer.m_utilBytesCount4_++;
+ buffer.m_utilCount4_ -= bottomcount4;
+ }
+ buffer.m_utilBytes4_ = append(buffer.m_utilBytes4_, buffer.m_utilBytesCount4_, (byte) (commonbottom4 + (buffer.m_utilCount4_ - 1)));
+ buffer.m_utilBytesCount4_++;
+ }
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, SORT_LEVEL_TERMINATOR_);
+ buffer.m_utilBytesCount1_++;
+ if (buffer.m_utilBytes1_.length <= buffer.m_utilBytesCount1_ + buffer.m_utilBytesCount4_) {
+ buffer.m_utilBytes1_ = increase(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount4_);
+ }
+ System.arraycopy(buffer.m_utilBytes4_, 0, buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, buffer.m_utilBytesCount4_);
+ buffer.m_utilBytesCount1_ += buffer.m_utilBytesCount4_;
+ }
+
+ /**
+ * Deals with the identical sort. Appends the BOCSU version of the source string to the ends of the byte buffer.
+ *
+ * @param source text string
+ * @param buffer collation buffer temporary state
+ */
+ private static final void doIdentical(String source, CollationBuffer buffer) {
+ int isize = BOCU.getCompressionLength(source);
+ buffer.m_utilBytes1_ = append(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, SORT_LEVEL_TERMINATOR_);
+ buffer.m_utilBytesCount1_++;
+ if (buffer.m_utilBytes1_.length <= buffer.m_utilBytesCount1_ + isize) {
+ buffer.m_utilBytes1_ = increase(buffer.m_utilBytes1_, buffer.m_utilBytesCount1_, 1 + isize);
+ }
+ buffer.m_utilBytesCount1_ = BOCU.compress(source, buffer.m_utilBytes1_, buffer.m_utilBytesCount1_);
+ }
+
+ /**
+ * Gets the offset of the first unmatched characters in source and target. This method returns the offset of the
+ * start of a contraction or a combining sequence, if the first difference is in the middle of such a sequence.
+ *
+ * @param source
+ * string
+ * @param target
+ * string
+ * @return offset of the first unmatched characters in source and target.
+ */
+ private final int getFirstUnmatchedOffset(String source, String target) {
+ int result = 0;
+ int slength = source.length();
+ int tlength = target.length();
+ int minlength = slength;
+ if (minlength > tlength) {
+ minlength = tlength;
+ }
+ while (result < minlength && source.charAt(result) == target.charAt(result)) {
+ result++;
+ }
+ if (result > 0) {
+ // There is an identical portion at the beginning of the two
+ // strings. If the identical portion ends within a contraction or a
+ // combining character sequence, back up to the start of that
+ // sequence.
+ char schar = 0;
+ char tchar = 0;
+ if (result < minlength) {
+ schar = source.charAt(result); // first differing chars
+ tchar = target.charAt(result);
+ } else {
+ schar = source.charAt(minlength - 1);
+ if (isUnsafe(schar)) {
+ tchar = schar;
+ } else if (slength == tlength) {
+ return result;
+ } else if (slength < tlength) {
+ tchar = target.charAt(result);
+ } else {
+ schar = source.charAt(result);
+ }
+ }
+ if (isUnsafe(schar) || isUnsafe(tchar)) {
+ // We are stopped in the middle of a contraction or combining
+ // sequence.
+ // Look backwards for the part of the string for the start of
+ // the sequence
+ // It doesn't matter which string we scan, since they are the
+ // same in this region.
+ do {
+ result--;
+ } while (result > 0 && isUnsafe(source.charAt(result)));
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Appending an byte to an array of bytes and increases it if we run out of space
+ *
+ * @param array
+ * of byte arrays
+ * @param appendindex
+ * index in the byte array to append
+ * @param value
+ * to append
+ * @return array if array size can accomodate the new value, otherwise a bigger array will be created and returned
+ */
+ private static final byte[] append(byte array[], int appendindex, byte value) {
+ try {
+ array[appendindex] = value;
+ } catch (ArrayIndexOutOfBoundsException e) {
+ array = increase(array, appendindex, SORT_BUFFER_INIT_SIZE_);
+ array[appendindex] = value;
+ }
+ return array;
+ }
+
+ /**
+ * This is a trick string compare function that goes in and uses sortkeys to compare. It is used when compare gets
+ * in trouble and needs to bail out.
+ *
+ * @param source text string
+ * @param target text string
+ * @param buffer collation buffer temporary state
+ */
+ private final int compareBySortKeys(String source, String target, CollationBuffer buffer)
+ {
+ buffer.m_utilRawCollationKey_ = getRawCollationKey(source, buffer.m_utilRawCollationKey_);
+ // this method is very seldom called
+ RawCollationKey targetkey = getRawCollationKey(target, null);
+ return buffer.m_utilRawCollationKey_.compareTo(targetkey);
+ }
+
+ /**
+ * Performs the primary comparisons, and fills up the CE buffer at the same time. The return value toggles between
+ * the comparison result and the hiragana result. If either the source is greater than target or vice versa, the
+ * return result is the comparison result, ie 1 or -1, furthermore the cebuffers will be cleared when that happens.
+ * If the primary comparisons are equal, we'll have to continue with secondary comparison. In this case the cebuffer
+ * will not be cleared and the return result will be the hiragana result.
+ *
+ * @param doHiragana4 flag indicator that Hiragana Quaternary has to be observed
+ * @param lowestpvalue the lowest primary value that will not be ignored if alternate handling is shifted
+ * @param source text string
+ * @param target text string
+ * @param textoffset offset in text to start the comparison
+ * @param buffer collation buffer temporary state
+ * @return comparion result if a primary difference is found, otherwise hiragana result
+ */
+ private final int doPrimaryCompare(boolean doHiragana4, int lowestpvalue, String source, String target,
+ int textoffset, CollationBuffer buffer)
+
+ {
+ // Preparing the context objects for iterating over strings
+ buffer.m_srcUtilIter_.setText(source);
+ buffer.m_srcUtilColEIter_.setText(buffer.m_srcUtilIter_, textoffset);
+ buffer.m_tgtUtilIter_.setText(target);
+ buffer.m_tgtUtilColEIter_.setText(buffer.m_tgtUtilIter_, textoffset);
+
+ // Non shifted primary processing is quite simple
+ if (!m_isAlternateHandlingShifted_) {
+ int hiraganaresult = 0;
+ while (true) {
+ int sorder = 0;
+ // We fetch CEs until we hit a non ignorable primary or end.
+ do {
+ sorder = buffer.m_srcUtilColEIter_.next();
+ buffer.m_srcUtilCEBuffer_ = append(buffer.m_srcUtilCEBuffer_, buffer.m_srcUtilCEBufferSize_, sorder);
+ buffer.m_srcUtilCEBufferSize_++;
+ sorder &= CE_PRIMARY_MASK_;
+ } while (sorder == CollationElementIterator.IGNORABLE);
+
+ int torder = 0;
+ do {
+ torder = buffer.m_tgtUtilColEIter_.next();
+ buffer.m_tgtUtilCEBuffer_ = append(buffer.m_tgtUtilCEBuffer_, buffer.m_tgtUtilCEBufferSize_, torder);
+ buffer.m_tgtUtilCEBufferSize_++;
+ torder &= CE_PRIMARY_MASK_;
+ } while (torder == CollationElementIterator.IGNORABLE);
+
+ if (!isContinuation(sorder) && m_leadBytePermutationTable_ != null) {
+ sorder = (m_leadBytePermutationTable_[((sorder >> 24) + 256) % 256] << 24) | (sorder & 0x00FFFFFF);
+ torder = (m_leadBytePermutationTable_[((torder >> 24) + 256) % 256] << 24) | (torder & 0x00FFFFFF);
+ }
+
+ // if both primaries are the same
+ if (sorder == torder) {
+ // and there are no more CEs, we advance to the next level
+ // see if we are at the end of either string
+ if (buffer.m_srcUtilCEBuffer_[buffer.m_srcUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER) {
+ if (buffer.m_tgtUtilCEBuffer_[buffer.m_tgtUtilCEBufferSize_ - 1] != CollationElementIterator.NULLORDER) {
+ return -1;
+ }
+ break;
+ } else if (buffer.m_tgtUtilCEBuffer_[buffer.m_tgtUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER) {
+ return 1;
+ }
+ if (doHiragana4 && hiraganaresult == 0
+ && buffer.m_srcUtilColEIter_.m_isCodePointHiragana_ != buffer.m_tgtUtilColEIter_.m_isCodePointHiragana_) {
+ if (buffer.m_srcUtilColEIter_.m_isCodePointHiragana_) {
+ hiraganaresult = -1;
+ } else {
+ hiraganaresult = 1;
+ }
+ }
+ } else {
+ // if two primaries are different, we are done
+ return endPrimaryCompare(sorder, torder, buffer);
+ }
+ }
+ // no primary difference... do the rest from the buffers
+ return hiraganaresult;
+ } else { // shifted - do a slightly more complicated processing :)
+ while (true) {
+ int sorder = getPrimaryShiftedCompareCE(buffer.m_srcUtilColEIter_, lowestpvalue, true, buffer);
+ int torder = getPrimaryShiftedCompareCE(buffer.m_tgtUtilColEIter_, lowestpvalue, false, buffer);
+ if (sorder == torder) {
+ if (buffer.m_srcUtilCEBuffer_[buffer.m_srcUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER) {
+ break;
+ } else {
+ continue;
+ }
+ } else {
+ return endPrimaryCompare(sorder, torder, buffer);
+ }
+ } // no primary difference... do the rest from the buffers
+ }
+ return 0;
+ }
+
+ /**
+ * This is used only for primary strength when we know that sorder is already different from torder. Compares sorder
+ * and torder, returns -1 if sorder is less than torder. Clears the cebuffer at the same time.
+ *
+ * @param sorder source strength order
+ * @param torder target strength order
+ * @param buffer collation buffer temporary state
+ * @return the comparison result of sorder and torder
+ */
+ private static final int endPrimaryCompare(int sorder, int torder, CollationBuffer buffer) {
+ // if we reach here, the ce offset accessed is the last ce
+ // appended to the buffer
+ boolean isSourceNullOrder = (buffer.m_srcUtilCEBuffer_[buffer.m_srcUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER);
+ boolean isTargetNullOrder = (buffer.m_tgtUtilCEBuffer_[buffer.m_tgtUtilCEBufferSize_ - 1] == CollationElementIterator.NULLORDER);
+ buffer.m_srcUtilCEBufferSize_ = -1;
+ buffer.m_tgtUtilCEBufferSize_ = -1;
+ if (isSourceNullOrder) {
+ return -1;
+ }
+ if (isTargetNullOrder) {
+ return 1;
+ }
+ // getting rid of the sign
+ sorder >>>= CE_PRIMARY_SHIFT_;
+ torder >>>= CE_PRIMARY_SHIFT_;
+ if (sorder < torder) {
+ return -1;
+ }
+ return 1;
+ }
+
+ /**
+ * Calculates the next primary shifted value and fills up cebuffer with the next non-ignorable ce.
+ *
+ * @param coleiter collation element iterator
+ * @param doHiragana4 flag indicator if hiragana quaternary is to be handled
+ * @param lowestpvalue lowest primary shifted value that will not be ignored
+ * @param buffer collation buffer temporary state
+ * @return result next modified ce
+ */
+ private static final int getPrimaryShiftedCompareCE(CollationElementIterator coleiter, int lowestpvalue, boolean isSrc, CollationBuffer buffer)
+ {
+ boolean shifted = false;
+ int result = CollationElementIterator.IGNORABLE;
+ int cebuffer[] = buffer.m_srcUtilCEBuffer_;
+ int cebuffersize = buffer.m_srcUtilCEBufferSize_;
+ if (!isSrc) {
+ cebuffer = buffer.m_tgtUtilCEBuffer_;
+ cebuffersize = buffer.m_tgtUtilCEBufferSize_;
+ }
+ while (true) {
+ result = coleiter.next();
+ if (result == CollationElementIterator.NULLORDER) {
+ cebuffer = append(cebuffer, cebuffersize, result);
+ cebuffersize++;
+ break;
+ } else if (result == CollationElementIterator.IGNORABLE
+ || (shifted && (result & CE_PRIMARY_MASK_) == CollationElementIterator.IGNORABLE)) {
+ // UCA amendment - ignore ignorables that follow shifted code
+ // points
+ continue;
+ } else if (isContinuation(result)) {
+ if ((result & CE_PRIMARY_MASK_) != CollationElementIterator.IGNORABLE) {
+ // There is primary value
+ if (shifted) {
+ result = (result & CE_PRIMARY_MASK_) | CE_CONTINUATION_MARKER_;
+ // preserve interesting continuation
+ cebuffer = append(cebuffer, cebuffersize, result);
+ cebuffersize++;
+ continue;
+ } else {
+ cebuffer = append(cebuffer, cebuffersize, result);
+ cebuffersize++;
+ break;
+ }
+ } else { // Just lower level values
+ if (!shifted) {
+ cebuffer = append(cebuffer, cebuffersize, result);
+ cebuffersize++;
+ }
+ }
+ } else { // regular
+ if (Utility.compareUnsigned(result & CE_PRIMARY_MASK_, lowestpvalue) > 0) {
+ cebuffer = append(cebuffer, cebuffersize, result);
+ cebuffersize++;
+ break;
+ } else {
+ if ((result & CE_PRIMARY_MASK_) != 0) {
+ shifted = true;
+ result &= CE_PRIMARY_MASK_;
+ cebuffer = append(cebuffer, cebuffersize, result);
+ cebuffersize++;
+ continue;
+ } else {
+ cebuffer = append(cebuffer, cebuffersize, result);
+ cebuffersize++;
+ shifted = false;
+ continue;
+ }
+ }
+ }
+ }
+ if (isSrc) {
+ buffer.m_srcUtilCEBuffer_ = cebuffer;
+ buffer.m_srcUtilCEBufferSize_ = cebuffersize;
+ } else {
+ buffer.m_tgtUtilCEBuffer_ = cebuffer;
+ buffer.m_tgtUtilCEBufferSize_ = cebuffersize;
+ }
+ result &= CE_PRIMARY_MASK_;
+ return result;
+ }
+
+ /**
+ * Appending an int to an array of ints and increases it if we run out of space
+ *
+ * @param array
+ * of int arrays
+ * @param appendindex
+ * index at which value will be appended
+ * @param value
+ * to append
+ * @return array if size is not increased, otherwise a new array will be returned
+ */
+ private static final int[] append(int array[], int appendindex, int value) {
+ if (appendindex + 1 >= array.length) {
+ array = increase(array, appendindex, CE_BUFFER_SIZE_);
+ }
+ array[appendindex] = value;
+ return array;
+ }
+
+ /**
+ * Does secondary strength comparison based on the collected ces.
+ *
+ * @param doFrench flag indicates if French ordering is to be done
+ * @param buffer collation buffer temporary state
+ * @return the secondary strength comparison result
+ */
+ private static final int doSecondaryCompare(boolean doFrench, CollationBuffer buffer) {
+ // now, we're gonna reexamine collected CEs
+ if (!doFrench) { // normal
+ int soffset = 0;
+ int toffset = 0;
+ while (true) {
+ int sorder = CollationElementIterator.IGNORABLE;
+ while (sorder == CollationElementIterator.IGNORABLE) {
+ sorder = buffer.m_srcUtilCEBuffer_[soffset++] & CE_SECONDARY_MASK_;
+ }
+ int torder = CollationElementIterator.IGNORABLE;
+ while (torder == CollationElementIterator.IGNORABLE) {
+ torder = buffer.m_tgtUtilCEBuffer_[toffset++] & CE_SECONDARY_MASK_;
+ }
+
+ if (sorder == torder) {
+ if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
+ if (buffer.m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
+ return -1;
+ }
+ break;
+ } else if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
+ return 1;
+ }
+ } else {
+ if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
+ return -1;
+ }
+ if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
+ return 1;
+ }
+ return (sorder < torder) ? -1 : 1;
+ }
+ }
+ } else { // do the French
+ buffer.m_srcUtilContOffset_ = 0;
+ buffer.m_tgtUtilContOffset_ = 0;
+ buffer.m_srcUtilOffset_ = buffer.m_srcUtilCEBufferSize_ - 2;
+ buffer.m_tgtUtilOffset_ = buffer.m_tgtUtilCEBufferSize_ - 2;
+ while (true) {
+ int sorder = getSecondaryFrenchCE(true, buffer);
+ int torder = getSecondaryFrenchCE(false, buffer);
+ if (sorder == torder) {
+ if ((buffer.m_srcUtilOffset_ < 0 && buffer.m_tgtUtilOffset_ < 0)
+ || (buffer.m_srcUtilOffset_ >= 0 && buffer.m_srcUtilCEBuffer_[buffer.m_srcUtilOffset_] == CollationElementIterator.NULLORDER)) {
+ break;
+ }
+ } else {
+ return (sorder < torder) ? -1 : 1;
+ }
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Calculates the next secondary french CE.
+ *
+ * @param isSrc flag indicator if we are calculating the src ces
+ * @param buffer collation buffer temporary state
+ * @return result next modified ce
+ */
+ private static final int getSecondaryFrenchCE(boolean isSrc, CollationBuffer buffer) {
+ int result = CollationElementIterator.IGNORABLE;
+ int offset = buffer.m_srcUtilOffset_;
+ int continuationoffset = buffer.m_srcUtilContOffset_;
+ int cebuffer[] = buffer.m_srcUtilCEBuffer_;
+ if (!isSrc) {
+ offset = buffer.m_tgtUtilOffset_;
+ continuationoffset = buffer.m_tgtUtilContOffset_;
+ cebuffer = buffer.m_tgtUtilCEBuffer_;
+ }
+
+ while (result == CollationElementIterator.IGNORABLE && offset >= 0) {
+ if (continuationoffset == 0) {
+ result = cebuffer[offset];
+ while (isContinuation(cebuffer[offset--])) {
+ }
+ // after this, sorder is at the start of continuation,
+ // and offset points before that
+ if (isContinuation(cebuffer[offset + 1])) {
+ // save offset for later
+ continuationoffset = offset;
+ offset += 2;
+ }
+ } else {
+ result = cebuffer[offset++];
+ if (!isContinuation(result)) {
+ // we have finished with this continuation
+ offset = continuationoffset;
+ // reset the pointer to before continuation
+ continuationoffset = 0;
+ continue;
+ }
+ }
+ result &= CE_SECONDARY_MASK_; // remove continuation bit
+ }
+ if (isSrc) {
+ buffer.m_srcUtilOffset_ = offset;
+ buffer.m_srcUtilContOffset_ = continuationoffset;
+ } else {
+ buffer.m_tgtUtilOffset_ = offset;
+ buffer.m_tgtUtilContOffset_ = continuationoffset;
+ }
+ return result;
+ }
+
+ /**
+ * Does case strength comparison based on the collected ces.
+ *
+ * @param buffer collation buffer temporary state
+ * @return the case strength comparison result
+ */
+ private final int doCaseCompare(CollationBuffer buffer) {
+ int soffset = 0;
+ int toffset = 0;
+ while (true) {
+ int sorder = CollationElementIterator.IGNORABLE;
+ int torder = CollationElementIterator.IGNORABLE;
+ while ((sorder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
+ sorder = buffer.m_srcUtilCEBuffer_[soffset++];
+ if (!isContinuation(sorder) && ((sorder & CE_PRIMARY_MASK_) != 0 || buffer.m_utilCompare2_ == true)) {
+ // primary ignorables should not be considered on the case level when the strength is primary
+ // otherwise, the CEs stop being well-formed
+ sorder &= CE_CASE_MASK_3_;
+ sorder ^= m_caseSwitch_;
+ } else {
+ sorder = CollationElementIterator.IGNORABLE;
+ }
+ }
+
+ while ((torder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
+ torder = buffer.m_tgtUtilCEBuffer_[toffset++];
+ if (!isContinuation(torder) && ((torder & CE_PRIMARY_MASK_) != 0 || buffer.m_utilCompare2_ == true)) {
+ // primary ignorables should not be considered on the case level when the strength is primary
+ // otherwise, the CEs stop being well-formed
+ torder &= CE_CASE_MASK_3_;
+ torder ^= m_caseSwitch_;
+ } else {
+ torder = CollationElementIterator.IGNORABLE;
+ }
+ }
+
+ sorder &= CE_CASE_BIT_MASK_;
+ torder &= CE_CASE_BIT_MASK_;
+ if (sorder == torder) {
+ // checking end of strings
+ if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
+ if (buffer.m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
+ return -1;
+ }
+ break;
+ } else if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
+ return 1;
+ }
+ } else {
+ if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
+ return -1;
+ }
+ if (buffer.m_tgtUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
+ return 1;
+ }
+ return (sorder < torder) ? -1 : 1;
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Does tertiary strength comparison based on the collected ces.
+ *
+ * @param buffer collation buffer temporary state
+ * @return the tertiary strength comparison result
+ */
+ private final int doTertiaryCompare(CollationBuffer buffer) {
+ int soffset = 0;
+ int toffset = 0;
+ while (true) {
+ int sorder = CollationElementIterator.IGNORABLE;
+ int torder = CollationElementIterator.IGNORABLE;
+ while ((sorder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
+ sorder = buffer.m_srcUtilCEBuffer_[soffset++] & m_mask3_;
+ if (!isContinuation(sorder)) {
+ sorder ^= m_caseSwitch_;
+ } else {
+ sorder &= CE_REMOVE_CASE_;
+ }
+ }
+
+ while ((torder & CE_REMOVE_CASE_) == CollationElementIterator.IGNORABLE) {
+ torder = buffer.m_tgtUtilCEBuffer_[toffset++] & m_mask3_;
+ if (!isContinuation(torder)) {
+ torder ^= m_caseSwitch_;
+ } else {
+ torder &= CE_REMOVE_CASE_;
+ }
+ }
+
+ if (sorder == torder) {
+ if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
+ if (buffer.m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
+ return -1;
+ }
+ break;
+ } else if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
+ return 1;
+ }
+ } else {
+ if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
+ return -1;
+ }
+ if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
+ return 1;
+ }
+ return (sorder < torder) ? -1 : 1;
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Does quaternary strength comparison based on the collected ces.
+ *
+ * @param lowestpvalue the lowest primary value that will not be ignored if alternate handling is shifted
+ * @param buffer collation buffer temporary state
+ * @return the quaternary strength comparison result
+ */
+ private final int doQuaternaryCompare(int lowestpvalue, CollationBuffer buffer) {
+ boolean sShifted = true;
+ boolean tShifted = true;
+ int soffset = 0;
+ int toffset = 0;
+ while (true) {
+ int sorder = CollationElementIterator.IGNORABLE;
+ int torder = CollationElementIterator.IGNORABLE;
+ while (sorder == CollationElementIterator.IGNORABLE || (isContinuation(sorder) && !sShifted)) {
+ sorder = buffer.m_srcUtilCEBuffer_[soffset++];
+ if (isContinuation(sorder)) {
+ if (!sShifted) {
+ continue;
+ }
+ } else if (Utility.compareUnsigned(sorder, lowestpvalue) > 0
+ || (sorder & CE_PRIMARY_MASK_) == CollationElementIterator.IGNORABLE) {
+ // non continuation
+ sorder = CE_PRIMARY_MASK_;
+ sShifted = false;
+ } else {
+ sShifted = true;
+ }
+ }
+ sorder >>>= CE_PRIMARY_SHIFT_;
+ while (torder == CollationElementIterator.IGNORABLE || (isContinuation(torder) && !tShifted)) {
+ torder = buffer.m_tgtUtilCEBuffer_[toffset++];
+ if (isContinuation(torder)) {
+ if (!tShifted) {
+ continue;
+ }
+ } else if (Utility.compareUnsigned(torder, lowestpvalue) > 0
+ || (torder & CE_PRIMARY_MASK_) == CollationElementIterator.IGNORABLE) {
+ // non continuation
+ torder = CE_PRIMARY_MASK_;
+ tShifted = false;
+ } else {
+ tShifted = true;
+ }
+ }
+ torder >>>= CE_PRIMARY_SHIFT_;
+
+ if (sorder == torder) {
+ if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
+ if (buffer.m_tgtUtilCEBuffer_[toffset - 1] != CollationElementIterator.NULLORDER) {
+ return -1;
+ }
+ break;
+ } else if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
+ return 1;
+ }
+ } else {
+ if (buffer.m_srcUtilCEBuffer_[soffset - 1] == CollationElementIterator.NULLORDER) {
+ return -1;
+ }
+ if (buffer.m_tgtUtilCEBuffer_[toffset - 1] == CollationElementIterator.NULLORDER) {
+ return 1;
+ }
+ return (sorder < torder) ? -1 : 1;
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Internal function. Does byte level string compare. Used by strcoll if strength == identical and strings are
+ * otherwise equal. This is a rare case. Comparison must be done on NFD normalized strings. FCD is not good enough.
+ *
+ * @param source
+ * text
+ * @param target
+ * text
+ * @param offset
+ * of the first difference in the text strings
+ * @param normalize
+ * flag indicating if we are to normalize the text before comparison
+ * @return 1 if source is greater than target, -1 less than and 0 if equals
+ */
+ private static final int doIdenticalCompare(String source, String target, int offset, boolean normalize)
+
+ {
+ if (normalize) {
+ if (Normalizer.quickCheck(source, Normalizer.NFD, 0) != Normalizer.YES) {
+ source = Normalizer.decompose(source, false);
+ }
+
+ if (Normalizer.quickCheck(target, Normalizer.NFD, 0) != Normalizer.YES) {
+ target = Normalizer.decompose(target, false);
+ }
+ offset = 0;
+ }
+
+ return doStringCompare(source, target, offset);
+ }
+
+ /**
+ * Compares string for their codepoint order. This comparison handles surrogate characters and place them after the
+ * all non surrogate characters.
+ *
+ * @param source
+ * text
+ * @param target
+ * text
+ * @param offset
+ * start offset for comparison
+ * @return 1 if source is greater than target, -1 less than and 0 if equals
+ */
+ private static final int doStringCompare(String source, String target, int offset) {
+ // compare identical prefixes - they do not need to be fixed up
+ char schar = 0;
+ char tchar = 0;
+ int slength = source.length();
+ int tlength = target.length();
+ int minlength = Math.min(slength, tlength);
+ while (offset < minlength) {
+ schar = source.charAt(offset);
+ tchar = target.charAt(offset++);
+ if (schar != tchar) {
+ break;
+ }
+ }
+
+ if (schar == tchar && offset == minlength) {
+ if (slength > minlength) {
+ return 1;
+ }
+ if (tlength > minlength) {
+ return -1;
+ }
+ return 0;
+ }
+
+ // if both values are in or above the surrogate range, Fix them up.
+ if (schar >= UTF16.LEAD_SURROGATE_MIN_VALUE && tchar >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
+ schar = fixupUTF16(schar);
+ tchar = fixupUTF16(tchar);
+ }
+
+ // now c1 and c2 are in UTF-32-compatible order
+ return (schar < tchar) ? -1 : 1; // schar and tchar has to be different
+ }
+
+ /**
+ * Rotate surrogates to the top to get code point order
+ */
+ private static final char fixupUTF16(char ch) {
+ if (ch >= 0xe000) {
+ ch -= 0x800;
+ } else {
+ ch += 0x2000;
+ }
+ return ch;
+ }
+
+ private static final int UCOL_REORDER_CODE_IGNORE = ReorderCodes.LIMIT + 1;
+ /**
+ * Builds the lead byte permuatation table
+ */
+ private void buildPermutationTable() {
+ if (m_reorderCodes_ == null || m_reorderCodes_.length == 0 || (m_reorderCodes_.length == 1 && m_reorderCodes_[0] == ReorderCodes.NONE)) {
+ m_leadBytePermutationTable_ = null;
+ return;
+ }
+
+ if (m_reorderCodes_[0] == ReorderCodes.DEFAULT) {
+ if (m_reorderCodes_.length != 1) {
+ throw new IllegalArgumentException("Illegal collation reorder codes - default reorder code must be the only code in the list.");
+ }
+ // swap the reorder codes for those at build of the rules
+ if (m_defaultReorderCodes_ == null || m_defaultReorderCodes_.length == 0) {
+ m_leadBytePermutationTable_ = null;
+ }
+ m_reorderCodes_ = m_defaultReorderCodes_.clone();
+ }
+
+ // TODO - these need to be read in from the UCA data file
+ // The lowest byte that hasn't been assigned a mapping
+ int toBottom = 0x03;
+ // The highest byte that hasn't been assigned a mapping
+ int toTop = 0xe4;
+
+ // filled slots in the output m_scriptOrder_
+ boolean[] permutationSlotFilled = new boolean[256];
+
+ // used lead bytes
+ boolean[] newLeadByteUsed = new boolean[256];
+
+ if (m_leadBytePermutationTable_ == null) {
+ m_leadBytePermutationTable_ = new byte[256];
+ }
+
+ // prefill the reordering codes with the leading entries
+ int[] internalReorderCodes = new int[m_reorderCodes_.length + (ReorderCodes.LIMIT - ReorderCodes.FIRST)];
+ for (int codeIndex = 0; codeIndex < ReorderCodes.LIMIT - ReorderCodes.FIRST; codeIndex++) {
+ internalReorderCodes[codeIndex] = ReorderCodes.FIRST + codeIndex;
+ }
+ for (int codeIndex = 0; codeIndex < m_reorderCodes_.length; codeIndex++) {
+ internalReorderCodes[codeIndex + (ReorderCodes.LIMIT - ReorderCodes.FIRST)] = m_reorderCodes_[codeIndex];
+ if (m_reorderCodes_[codeIndex] >= ReorderCodes.FIRST && m_reorderCodes_[codeIndex] < ReorderCodes.LIMIT) {
+ internalReorderCodes[m_reorderCodes_[codeIndex] - ReorderCodes.FIRST] = UCOL_REORDER_CODE_IGNORE;
+ }
+ }
+
+ /*
+ * Start from the front of the list and place each script we encounter at the earliest possible locatation
+ * in the permutation table. If we encounter UNKNOWN, start processing from the back, and place each script
+ * in the last possible location. At each step, we also need to make sure that any scripts that need to not
+ * be moved are copied to their same location in the final table.
+ */
+ boolean fromTheBottom = true;
+ int reorderCodesIndex = -1;
+ for (int reorderCodesCount = 0; reorderCodesCount < internalReorderCodes.length; reorderCodesCount++) {
+ reorderCodesIndex += fromTheBottom ? 1 : -1;
+ int next = internalReorderCodes[reorderCodesIndex];
+ if (next == UCOL_REORDER_CODE_IGNORE) {
+ continue;
+ }
+ if (next == UScript.UNKNOWN) {
+ if (fromTheBottom == false) {
+ // double turnaround
+ m_leadBytePermutationTable_ = null;
+ throw new IllegalArgumentException("Illegal collation reorder codes - two \"from the end\" markers.");
+ }
+ fromTheBottom = false;
+ reorderCodesIndex = internalReorderCodes.length;
+ continue;
+ }
+
+ int[] leadBytes = RuleBasedCollator.LEADBYTE_CONSTANTS_.getLeadBytesForReorderCode(next);
+ if (fromTheBottom) {
+ for (int leadByte : leadBytes) {
+ // don't place a lead byte twice in the permutation table
+ if (permutationSlotFilled[leadByte]) {
+ // lead byte already used
+ m_leadBytePermutationTable_ = null;
+ throw new IllegalArgumentException("Illegal reorder codes specified - multiple codes with the same lead byte.");
+ }
+ m_leadBytePermutationTable_[leadByte] = (byte) toBottom;
+ newLeadByteUsed[toBottom] = true;
+ permutationSlotFilled[leadByte] = true;
+ toBottom++;
+ }
+ } else {
+ for (int leadByteIndex = leadBytes.length - 1; leadByteIndex >= 0; leadByteIndex--) {
+ int leadByte = leadBytes[leadByteIndex];
+ // don't place a lead byte twice in the permutation table
+ if (permutationSlotFilled[leadByte]) {
+ // lead byte already used
+ m_leadBytePermutationTable_ = null;
+ throw new IllegalArgumentException("Illegal reorder codes specified - multiple codes with the same lead byte.");
+ }
+
+ m_leadBytePermutationTable_[leadByte] = (byte) toTop;
+ newLeadByteUsed[toTop] = true;
+ permutationSlotFilled[leadByte] = true;
+ toTop--;
+ }
+ }
+ }
+
+ /* Copy everything that's left over */
+ int reorderCode = 0;
+ for (int i = 0; i < 256; i++) {
+ if (!permutationSlotFilled[i]) {
+ while (newLeadByteUsed[reorderCode]) {
+ if (reorderCode > 255) {
+ throw new IllegalArgumentException("Unable to fill collation reordering table slots - no available reordering code.");
+ }
+ reorderCode++;
+ }
+ m_leadBytePermutationTable_[i] = (byte) reorderCode;
+ permutationSlotFilled[i] = true;
+ newLeadByteUsed[reorderCode] = true;
+ }
+ }
+
+ // for (int i = 0; i < 256; i++){
+ // System.out.println(Integer.toString(i, 16) + " -> " + Integer.toString(m_scriptReorderTable_[i], 16));
+ // }
+ latinOneRegenTable_ = true;
+ updateInternalState();
+ }
+
+ /**
+ * Resets the internal case data members and compression values.
+ */
+ private void updateInternalState() {
+ if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
+ m_caseSwitch_ = CASE_SWITCH_;
+ } else {
+ m_caseSwitch_ = NO_CASE_SWITCH_;
+ }
+
+ if (m_isCaseLevel_ || m_caseFirst_ == AttributeValue.OFF_) {
+ m_mask3_ = CE_REMOVE_CASE_;
+ m_common3_ = COMMON_NORMAL_3_;
+ m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_OFF_;
+ m_top3_ = COMMON_TOP_CASE_SWITCH_OFF_3_;
+ m_bottom3_ = COMMON_BOTTOM_3_;
+ } else {
+ m_mask3_ = CE_KEEP_CASE_;
+ m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_;
+ if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
+ m_common3_ = COMMON_UPPER_FIRST_3_;
+ m_top3_ = COMMON_TOP_CASE_SWITCH_UPPER_3_;
+ m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_UPPER_3_;
+ } else {
+ m_common3_ = COMMON_NORMAL_3_;
+ m_top3_ = COMMON_TOP_CASE_SWITCH_LOWER_3_;
+ m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_LOWER_3_;
+ }
+ }
+
+ // Set the compression values
+ int total3 = m_top3_ - COMMON_BOTTOM_3_ - 1;
+ // we multilply double with int, but need only int
+ m_topCount3_ = (int) (PROPORTION_3_ * total3);
+ m_bottomCount3_ = total3 - m_topCount3_;
+
+ if (!m_isCaseLevel_ && getStrength() == AttributeValue.TERTIARY_ && !m_isFrenchCollation_
+ && !m_isAlternateHandlingShifted_) {
+ m_isSimple3_ = true;
+ } else {
+ m_isSimple3_ = false;
+ }
+ if (!m_isCaseLevel_ && getStrength() <= AttributeValue.TERTIARY_ && !m_isNumericCollation_
+ && !m_isAlternateHandlingShifted_ && !latinOneFailed_) {
+ if (latinOneCEs_ == null || latinOneRegenTable_) {
+ if (setUpLatinOne()) { // if we succeed in building latin1 table, we'll use it
+ latinOneUse_ = true;
+ } else {
+ latinOneUse_ = false;
+ latinOneFailed_ = true;
+ }
+ latinOneRegenTable_ = false;
+ } else { // latin1Table exists and it doesn't need to be regenerated, just use it
+ latinOneUse_ = true;
+ }
+ } else {
+ latinOneUse_ = false;
+ }
+
+ }
+
+ /**
+ * Initializes the RuleBasedCollator
+ */
+ private final void init() {
+ for (m_minUnsafe_ = 0; m_minUnsafe_ < DEFAULT_MIN_HEURISTIC_; m_minUnsafe_++) {
+ // Find the smallest unsafe char.
+ if (isUnsafe(m_minUnsafe_)) {
+ break;
+ }
+ }
+
+ for (m_minContractionEnd_ = 0; m_minContractionEnd_ < DEFAULT_MIN_HEURISTIC_; m_minContractionEnd_++) {
+ // Find the smallest contraction-ending char.
+ if (isContractionEnd(m_minContractionEnd_)) {
+ break;
+ }
+ }
+ latinOneFailed_ = true;
+ setStrength(m_defaultStrength_);
+ setDecomposition(m_defaultDecomposition_);
+ m_variableTopValue_ = m_defaultVariableTopValue_;
+ m_isFrenchCollation_ = m_defaultIsFrenchCollation_;
+ m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;
+ m_isCaseLevel_ = m_defaultIsCaseLevel_;
+ m_caseFirst_ = m_defaultCaseFirst_;
+ m_isHiragana4_ = m_defaultIsHiragana4_;
+ m_isNumericCollation_ = m_defaultIsNumericCollation_;
+ latinOneFailed_ = false;
+ if (m_defaultReorderCodes_ != null) {
+ m_reorderCodes_ = m_defaultReorderCodes_.clone();
+ } else {
+ m_reorderCodes_ = null;
+ }
+ updateInternalState();
+ }
+
+ // Consts for Latin-1 special processing
+ private static final int ENDOFLATINONERANGE_ = 0xFF;
+ private static final int LATINONETABLELEN_ = (ENDOFLATINONERANGE_ + 50);
+ private static final int BAIL_OUT_CE_ = 0xFF000000;
+
+ /**
+ * Generate latin-1 tables
+ */
+
+ private class shiftValues {
+ int primShift = 24;
+ int secShift = 24;
+ int terShift = 24;
+ }
+
+ private final void addLatinOneEntry(char ch, int CE, shiftValues sh) {
+ int primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;
+ boolean continuation = isContinuation(CE);
+ boolean reverseSecondary = false;
+ if (!continuation) {
+ tertiary = ((CE & m_mask3_));
+ tertiary ^= m_caseSwitch_;
+ reverseSecondary = true;
+ } else {
+ tertiary = (byte) ((CE & CE_REMOVE_CONTINUATION_MASK_));
+ tertiary &= CE_REMOVE_CASE_;
+ reverseSecondary = false;
+ }
+
+ secondary = ((CE >>>= 8) & LAST_BYTE_MASK_);
+ primary2 = ((CE >>>= 8) & LAST_BYTE_MASK_);
+ primary1 = (CE >>> 8);
+
+ if (primary1 != 0) {
+ if (m_leadBytePermutationTable_ != null && !continuation) {
+ primary1 = m_leadBytePermutationTable_[primary1];
+ }
+ latinOneCEs_[ch] |= (primary1 << sh.primShift);
+ sh.primShift -= 8;
+ }
+ if (primary2 != 0) {
+ if (sh.primShift < 0) {
+ latinOneCEs_[ch] = BAIL_OUT_CE_;
+ latinOneCEs_[latinOneTableLen_ + ch] = BAIL_OUT_CE_;
+ latinOneCEs_[2 * latinOneTableLen_ + ch] = BAIL_OUT_CE_;
+ return;
+ }
+ latinOneCEs_[ch] |= (primary2 << sh.primShift);
+ sh.primShift -= 8;
+ }
+ if (secondary != 0) {
+ if (reverseSecondary && m_isFrenchCollation_) { // reverse secondary
+ latinOneCEs_[latinOneTableLen_ + ch] >>>= 8; // make space for secondary
+ latinOneCEs_[latinOneTableLen_ + ch] |= (secondary << 24);
+ } else { // normal case
+ latinOneCEs_[latinOneTableLen_ + ch] |= (secondary << sh.secShift);
+ }
+ sh.secShift -= 8;
+ }
+ if (tertiary != 0) {
+ latinOneCEs_[2 * latinOneTableLen_ + ch] |= (tertiary << sh.terShift);
+ sh.terShift -= 8;
+ }
+ }
+
+ private final void resizeLatinOneTable(int newSize) {
+ int newTable[] = new int[3 * newSize];
+ int sizeToCopy = ((newSize < latinOneTableLen_) ? newSize : latinOneTableLen_);
+ // uprv_memset(newTable, 0, newSize*sizeof(uint32_t)*3); // automatically cleared.
+ System.arraycopy(latinOneCEs_, 0, newTable, 0, sizeToCopy);
+ System.arraycopy(latinOneCEs_, latinOneTableLen_, newTable, newSize, sizeToCopy);
+ System.arraycopy(latinOneCEs_, 2 * latinOneTableLen_, newTable, 2 * newSize, sizeToCopy);
+ latinOneTableLen_ = newSize;
+ latinOneCEs_ = newTable;
+ }
+
+ private final boolean setUpLatinOne() {
+ if (latinOneCEs_ == null || m_reallocLatinOneCEs_) {
+ latinOneCEs_ = new int[3 * LATINONETABLELEN_];
+ latinOneTableLen_ = LATINONETABLELEN_;
+ m_reallocLatinOneCEs_ = false;
+ } else {
+ Arrays.fill(latinOneCEs_, 0);
+ }
+ if (m_ContInfo_ == null) {
+ m_ContInfo_ = new ContractionInfo();
+ }
+ char ch = 0;
+ // StringBuffer sCh = new StringBuffer();
+ // CollationElementIterator it = getCollationElementIterator(sCh.toString());
+ CollationElementIterator it = getCollationElementIterator("");
+
+ shiftValues s = new shiftValues();
+ int CE = 0;
+ char contractionOffset = ENDOFLATINONERANGE_ + 1;
+
+ for (ch = 0; ch <= ENDOFLATINONERANGE_; ch++) {
+ s.primShift = 24;
+ s.secShift = 24;
+ s.terShift = 24;
+ if (ch < 0x100) {
+ CE = m_trie_.getLatin1LinearValue(ch);
+ } else {
+ CE = m_trie_.getLeadValue(ch);
+ if (CE == CollationElementIterator.CE_NOT_FOUND_) {
+ CE = UCA_.m_trie_.getLeadValue(ch);
+ }
+ }
+ if (!isSpecial(CE)) {
+ addLatinOneEntry(ch, CE, s);
+ } else {
+ switch (RuleBasedCollator.getTag(CE)) {
+ case CollationElementIterator.CE_EXPANSION_TAG_:
+ case CollationElementIterator.CE_DIGIT_TAG_:
+ // sCh.delete(0, sCh.length());
+ // sCh.append(ch);
+ // it.setText(sCh.toString());
+ it.setText(UCharacter.toString(ch));
+ while ((CE = it.next()) != CollationElementIterator.NULLORDER) {
+ if (s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {
+ latinOneCEs_[ch] = BAIL_OUT_CE_;
+ latinOneCEs_[latinOneTableLen_ + ch] = BAIL_OUT_CE_;
+ latinOneCEs_[2 * latinOneTableLen_ + ch] = BAIL_OUT_CE_;
+ break;
+ }
+ addLatinOneEntry(ch, CE, s);
+ }
+ break;
+ case CollationElementIterator.CE_CONTRACTION_TAG_:
+ // here is the trick
+ // F2 is contraction. We do something very similar to contractions
+ // but have two indices, one in the real contraction table and the
+ // other to where we stuffed things. This hopes that we don't have
+ // many contractions (this should work for latin-1 tables).
+ {
+ if ((CE & 0x00FFF000) != 0) {
+ latinOneFailed_ = true;
+ return false;
+ }
+
+ int UCharOffset = (CE & 0xFFFFFF) - m_contractionOffset_; // getContractionOffset(CE)]
+
+ CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table
+
+ latinOneCEs_[ch] = CE;
+ latinOneCEs_[latinOneTableLen_ + ch] = CE;
+ latinOneCEs_[2 * latinOneTableLen_ + ch] = CE;
+
+ // We're going to jump into contraction table, pick the elements
+ // and use them
+ do {
+ // CE = *(contractionCEs + (UCharOffset - contractionIndex));
+ CE = m_contractionCE_[UCharOffset];
+ if (isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) {
+ int i; /* general counter */
+ // uint32_t *CEOffset = (uint32_t *)image+getExpansionOffset(CE); /* find the offset to
+ // expansion table */
+ int offset = ((CE & 0xFFFFF0) >> 4) - m_expansionOffset_; // it.getExpansionOffset(this,
+ // CE);
+ int size = CE & 0xF; // getExpansionCount(CE);
+ // CE = *CEOffset++;
+ if (size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
+ for (i = 0; i < size; i++) {
+ if (s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {
+ latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
+ latinOneCEs_[latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
+ latinOneCEs_[2 * latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
+ break;
+ }
+ addLatinOneEntry(contractionOffset, m_expansion_[offset + i], s);
+ }
+ } else { /* else, we do */
+ while (m_expansion_[offset] != 0) {
+ if (s.primShift < 0 || s.secShift < 0 || s.terShift < 0) {
+ latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
+ latinOneCEs_[latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
+ latinOneCEs_[2 * latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
+ break;
+ }
+ addLatinOneEntry(contractionOffset, m_expansion_[offset++], s);
+ }
+ }
+ contractionOffset++;
+ } else if (!isSpecial(CE)) {
+ addLatinOneEntry(contractionOffset++, CE, s);
+ } else {
+ latinOneCEs_[contractionOffset] = BAIL_OUT_CE_;
+ latinOneCEs_[latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
+ latinOneCEs_[2 * latinOneTableLen_ + contractionOffset] = BAIL_OUT_CE_;
+ contractionOffset++;
+ }
+ UCharOffset++;
+ s.primShift = 24;
+ s.secShift = 24;
+ s.terShift = 24;
+ if (contractionOffset == latinOneTableLen_) { // we need to reallocate
+ resizeLatinOneTable(2 * latinOneTableLen_);
+ }
+ } while (m_contractionIndex_[UCharOffset] != 0xFFFF);
+ }
+ break;
+ case CollationElementIterator.CE_SPEC_PROC_TAG_: {
+ // 0xB7 is a precontext character defined in UCA5.1, a special
+ // handle is implemeted in order to save LatinOne table for
+ // most locales.
+ if (ch == 0xb7) {
+ addLatinOneEntry(ch, CE, s);
+ } else {
+ latinOneFailed_ = true;
+ return false;
+ }
+ }
+ break;
+ default:
+ latinOneFailed_ = true;
+ return false;
+ }
+ }
+ }
+ // compact table
+ if (contractionOffset < latinOneTableLen_) {
+ resizeLatinOneTable(contractionOffset);
+ }
+ return true;
+ }
+
+ private class ContractionInfo {
+ int index;
+ }
+
+ ContractionInfo m_ContInfo_;
+
+ private int getLatinOneContraction(int strength, int CE, String s) {
+ // int strength, int CE, String s, Integer ind) {
+ int len = s.length();
+ // const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);
+ int UCharOffset = (CE & 0xFFF) - m_contractionOffset_;
+ int offset = 1;
+ int latinOneOffset = (CE & 0x00FFF000) >>> 12;
+ char schar = 0, tchar = 0;
+
+ for (;;) {
+ /*
+ * if(len == -1) { if(s[*index] == 0) { // end of string
+ * return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]); } else { schar = s[*index]; }
+ * } else {
+ */
+ if (m_ContInfo_.index == len) {
+ return (latinOneCEs_[strength * latinOneTableLen_ + latinOneOffset]);
+ } else {
+ schar = s.charAt(m_ContInfo_.index);
+ }
+ // }
+
+ while (schar > (tchar = m_contractionIndex_[UCharOffset + offset]/** (UCharOffset+offset) */
+ )) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
+ offset++;
+ }
+
+ if (schar == tchar) {
+ m_ContInfo_.index++;
+ return (latinOneCEs_[strength * latinOneTableLen_ + latinOneOffset + offset]);
+ } else {
+ if (schar > ENDOFLATINONERANGE_ /* & 0xFF00 */) {
+ return BAIL_OUT_CE_;
+ }
+ // skip completely ignorables
+ int isZeroCE = m_trie_.getLeadValue(schar); // UTRIE_GET32_FROM_LEAD(coll->mapping, schar);
+ if (isZeroCE == 0) { // we have to ignore completely ignorables
+ m_ContInfo_.index++;
+ continue;
+ }
+
+ return (latinOneCEs_[strength * latinOneTableLen_ + latinOneOffset]);
+ }
+ }
+ }
+
+ /**
+ * This is a fast strcoll, geared towards text in Latin-1. It supports contractions of size two, French secondaries
+ * and case switching. You can use it with strengths primary to tertiary. It does not support shifted and case
+ * level. It relies on the table build by setupLatin1Table. If it doesn't understand something, it will go to the
+ * regular strcoll.
+ * @param buffer collation buffer temporary state
+ */
+ private final int compareUseLatin1(String source, String target, int startOffset, CollationBuffer buffer) {
+ int sLen = source.length();
+ int tLen = target.length();
+
+ int strength = getStrength();
+
+ int sIndex = startOffset, tIndex = startOffset;
+ char sChar = 0, tChar = 0;
+ int sOrder = 0, tOrder = 0;
+
+ boolean endOfSource = false;
+
+ // uint32_t *elements = coll->latinOneCEs;
+
+ boolean haveContractions = false; // if we have contractions in our string
+ // we cannot do French secondary
+
+ int offset = latinOneTableLen_;
+
+ // Do the primary level
+ primLoop:
+ for (;;) {
+ while (sOrder == 0) { // this loop skips primary ignorables
+ // sOrder=getNextlatinOneCE(source);
+ if (sIndex == sLen) {
+ endOfSource = true;
+ break;
+ }
+ sChar = source.charAt(sIndex++); // [sIndex++];
+ // }
+ if (sChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out
+ // fprintf(stderr, "R");
+ return compareRegular(source, target, startOffset, buffer);
+ }
+ sOrder = latinOneCEs_[sChar];
+ if (isSpecial(sOrder)) { // if we got a special
+ // specials can basically be either contractions or bail-out signs. If we get anything
+ // else, we'll bail out anywasy
+ if (getTag(sOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) {
+ m_ContInfo_.index = sIndex;
+ sOrder = getLatinOneContraction(0, sOrder, source);
+ sIndex = m_ContInfo_.index;
+ haveContractions = true; // if there are contractions, we cannot do French secondary
+ // However, if there are contractions in the table, but we always use just one char,
+ // we might be able to do French. This should be checked out.
+ }
+ if (isSpecial(sOrder) /* == UCOL_BAIL_OUT_CE */) {
+ // fprintf(stderr, "S");
+ return compareRegular(source, target, startOffset, buffer);
+ }
+ }
+ }
+
+ while (tOrder == 0) { // this loop skips primary ignorables
+ // tOrder=getNextlatinOneCE(target);
+ if (tIndex == tLen) {
+ if (endOfSource) {
+ break primLoop;
+ } else {
+ return 1;
+ }
+ }
+ tChar = target.charAt(tIndex++); // [tIndex++];
+ if (tChar > ENDOFLATINONERANGE_) { // if we encounter non-latin-1, we bail out
+ // fprintf(stderr, "R");
+ return compareRegular(source, target, startOffset, buffer);
+ }
+ tOrder = latinOneCEs_[tChar];
+ if (isSpecial(tOrder)) {
+ // Handling specials, see the comments for source
+ if (getTag(tOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) {
+ m_ContInfo_.index = tIndex;
+ tOrder = getLatinOneContraction(0, tOrder, target);
+ tIndex = m_ContInfo_.index;
+ haveContractions = true;
+ }
+ if (isSpecial(tOrder)/* == UCOL_BAIL_OUT_CE */) {
+ // fprintf(stderr, "S");
+ return compareRegular(source, target, startOffset, buffer);
+ }
+ }
+ }
+ if (endOfSource) { // source is finished, but target is not, say the result.
+ return -1;
+ }
+
+ if (sOrder == tOrder) { // if we have same CEs, we continue the loop
+ sOrder = 0;
+ tOrder = 0;
+ continue;
+ } else {
+ // compare current top bytes
+ if (((sOrder ^ tOrder) & 0xFF000000) != 0) {
+ // top bytes differ, return difference
+ if (sOrder >>> 8 < tOrder >>> 8) {
+ return -1;
+ } else {
+ return 1;
+ }
+ // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24);
+ // since we must return enum value
+ }
+
+ // top bytes match, continue with following bytes
+ sOrder <<= 8;
+ tOrder <<= 8;
+ }
+ }
+
+ // after primary loop, we definitely know the sizes of strings,
+ // so we set it and use simpler loop for secondaries and tertiaries
+ // sLen = sIndex; tLen = tIndex;
+ if (strength >= SECONDARY) {
+ // adjust the table beggining
+ // latinOneCEs_ += coll->latinOneTableLen;
+ endOfSource = false;
+
+ if (!m_isFrenchCollation_) { // non French
+ // This loop is a simplified copy of primary loop
+ // at this point we know that whole strings are latin-1, so we don't
+ // check for that. We also know that we only have contractions as
+ // specials.
+ // sIndex = 0; tIndex = 0;
+ sIndex = startOffset;
+ tIndex = startOffset;
+ secLoop: for (;;) {
+ while (sOrder == 0) {
+ if (sIndex == sLen) {
+ endOfSource = true;
+ break;
+ }
+ sChar = source.charAt(sIndex++); // [sIndex++];
+ sOrder = latinOneCEs_[offset + sChar];
+ if (isSpecial(sOrder)) {
+ m_ContInfo_.index = sIndex;
+ sOrder = getLatinOneContraction(1, sOrder, source);
+ sIndex = m_ContInfo_.index;
+ }
+ }
+
+ while (tOrder == 0) {
+ if (tIndex == tLen) {
+ if (endOfSource) {
+ break secLoop;
+ } else {
+ return 1;
+ }
+ }
+ tChar = target.charAt(tIndex++); // [tIndex++];
+ tOrder = latinOneCEs_[offset + tChar];
+ if (isSpecial(tOrder)) {
+ m_ContInfo_.index = tIndex;
+ tOrder = getLatinOneContraction(1, tOrder, target);
+ tIndex = m_ContInfo_.index;
+ }
+ }
+ if (endOfSource) {
+ return -1;
+ }
+
+ if (sOrder == tOrder) {
+ sOrder = 0;
+ tOrder = 0;
+ continue;
+ } else {
+ // see primary loop for comments on this
+ if (((sOrder ^ tOrder) & 0xFF000000) != 0) {
+ if (sOrder >>> 8 < tOrder >>> 8) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+ sOrder <<= 8;
+ tOrder <<= 8;
+ }
+ }
+ } else { // French
+ if (haveContractions) { // if we have contractions, we have to bail out
+ // since we don't really know how to handle them here
+ return compareRegular(source, target, startOffset, buffer);
+ }
+ // For French, we go backwards
+ sIndex = sLen;
+ tIndex = tLen;
+ secFLoop: for (;;) {
+ while (sOrder == 0) {
+ if (sIndex == startOffset) {
+ endOfSource = true;
+ break;
+ }
+ sChar = source.charAt(--sIndex); // [--sIndex];
+ sOrder = latinOneCEs_[offset + sChar];
+ // don't even look for contractions
+ }
+
+ while (tOrder == 0) {
+ if (tIndex == startOffset) {
+ if (endOfSource) {
+ break secFLoop;
+ } else {
+ return 1;
+ }
+ }
+ tChar = target.charAt(--tIndex); // [--tIndex];
+ tOrder = latinOneCEs_[offset + tChar];
+ // don't even look for contractions
+ }
+ if (endOfSource) {
+ return -1;
+ }
+
+ if (sOrder == tOrder) {
+ sOrder = 0;
+ tOrder = 0;
+ continue;
+ } else {
+ // see the primary loop for comments
+ if (((sOrder ^ tOrder) & 0xFF000000) != 0) {
+ if (sOrder >>> 8 < tOrder >>> 8) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+ sOrder <<= 8;
+ tOrder <<= 8;
+ }
+ }
+ }
+ }
+
+ if (strength >= TERTIARY) {
+ // tertiary loop is the same as secondary (except no French)
+ offset += latinOneTableLen_;
+ // sIndex = 0; tIndex = 0;
+ sIndex = startOffset;
+ tIndex = startOffset;
+ endOfSource = false;
+ for (;;) {
+ while (sOrder == 0) {
+ if (sIndex == sLen) {
+ endOfSource = true;
+ break;
+ }
+ sChar = source.charAt(sIndex++); // [sIndex++];
+ sOrder = latinOneCEs_[offset + sChar];
+ if (isSpecial(sOrder)) {
+ m_ContInfo_.index = sIndex;
+ sOrder = getLatinOneContraction(2, sOrder, source);
+ sIndex = m_ContInfo_.index;
+ }
+ }
+ while (tOrder == 0) {
+ if (tIndex == tLen) {
+ if (endOfSource) {
+ return 0; // if both strings are at the end, they are equal
+ } else {
+ return 1;
+ }
+ }
+ tChar = target.charAt(tIndex++); // [tIndex++];
+ tOrder = latinOneCEs_[offset + tChar];
+ if (isSpecial(tOrder)) {
+ m_ContInfo_.index = tIndex;
+ tOrder = getLatinOneContraction(2, tOrder, target);
+ tIndex = m_ContInfo_.index;
+ }
+ }
+ if (endOfSource) {
+ return -1;
+ }
+ if (sOrder == tOrder) {
+ sOrder = 0;
+ tOrder = 0;
+ continue;
+ } else {
+ if (((sOrder ^ tOrder) & 0xff000000) != 0) {
+ if (sOrder >>> 8 < tOrder >>> 8) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+ sOrder <<= 8;
+ tOrder <<= 8;
+ }
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Get the version of this collator object.
+ *
+ * @return the version object associated with this collator
+ * @stable ICU 2.8
+ */
+ public VersionInfo getVersion() {
+ /* RunTime version */
+ int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor();
+ /* Builder version */
+ int bdVersion = m_version_.getMajor();
+
+ /*
+ * Charset Version. Need to get the version from cnv files makeconv should populate cnv files with version and
+ * an api has to be provided in ucnv.h to obtain this version
+ */
+ int csVersion = 0;
+
+ /* combine the version info */
+ int cmbVersion = ((rtVersion << 11) | (bdVersion << 6) | (csVersion)) & 0xFFFF;
+
+ /* Tailoring rules */
+ return VersionInfo.getInstance(cmbVersion >> 8, cmbVersion & 0xFF, m_version_.getMinor(),
+ UCA_.m_UCA_version_.getMajor());
+
+ // versionInfo[0] = (uint8_t)(cmbVersion>>8);
+ // versionInfo[1] = (uint8_t)cmbVersion;
+ // versionInfo[2] = coll->image->version[1];
+ // versionInfo[3] = coll->UCA->image->UCAVersion[0];
+ }
+
+ /**
+ * Get the UCA version of this collator object.
+ *
+ * @return the version object associated with this collator
+ * @stable ICU 2.8
+ */
+ public VersionInfo getUCAVersion() {
+ return UCA_.m_UCA_version_;
+ }
+
+ private transient boolean m_reallocLatinOneCEs_;
+
+ private CollationBuffer collationBuffer;
+
+ private final CollationBuffer getCollationBuffer() {
+ if (isFrozen()) {
+ frozenLock.lock();
+ }
+ if (collationBuffer == null) {
+ collationBuffer = new CollationBuffer();
+ } else {
+ collationBuffer.resetBuffers();
+ }
+ return collationBuffer;
+ }
+
+ private final void releaseCollationBuffer(CollationBuffer buffer) {
+ if (isFrozen()) {
+ frozenLock.unlock();
+ }
+ }
+}
diff --git a/main/classes/collate/src/com/ibm/icu/text/StringSearch.java b/main/classes/collate/src/com/ibm/icu/text/StringSearch.java
new file mode 100644
index 00000000000..11265cd21ea
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/text/StringSearch.java
@@ -0,0 +1,3178 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2011, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+
+package com.ibm.icu.text;
+
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.Locale;
+
+import com.ibm.icu.impl.CharacterIteratorWrapper;
+import com.ibm.icu.impl.Norm2AllModes;
+import com.ibm.icu.impl.Normalizer2Impl;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.util.ULocale;
+
+/**
+ *
+ * StringSearch
is the concrete subclass of
+ * SearchIterator
that provides language-sensitive text searching
+ * based on the comparison rules defined in a {@link RuleBasedCollator} object.
+ *
+ *
+ * StringSearch
uses a version of the fast Boyer-Moore search
+ * algorithm that has been adapted to work with the large character set of
+ * Unicode. Refer to
+ *
+ * "Efficient Text Searching in Java" , published in the
+ * Java Report on February, 1999, for further information on the
+ * algorithm.
+ *
+ *
+ * Users are also strongly encouraged to read the section on
+ *
+ * String Search and
+ *
+ * Collation in the user guide before attempting to use this class.
+ *
+ *
+ * String searching becomes a little complicated when accents are encountered at
+ * match boundaries. If a match is found and it has preceding or trailing
+ * accents not part of the match, the result returned will include the
+ * preceding accents up to the first base character, if the pattern searched
+ * for starts an accent. Likewise,
+ * if the pattern ends with an accent, all trailing accents up to the first
+ * base character will be included in the result.
+ *
+ *
+ * For example, if a match is found in target text "a\u0325\u0300" for
+ * the pattern
+ * "a\u0325", the result returned by StringSearch will be the index 0 and
+ * length 3 <0, 3>. If a match is found in the target
+ * "a\u0325\u0300"
+ * for the pattern "\u0300", then the result will be index 1 and length 2
+ * <1, 2>.
+ *
+ *
+ * In the case where the decomposition mode is on for the RuleBasedCollator,
+ * all matches that starts or ends with an accent will have its results include
+ * preceding or following accents respectively. For example, if pattern "a" is
+ * looked for in the target text "á\u0325", the result will be
+ * index 0 and length 2 <0, 2>.
+ *
+ *
+ * The StringSearch class provides two options to handle accent matching
+ * described below:
+ *
+ *
+ * Let S' be the sub-string of a text string S between the offsets start and
+ * end <start, end>.
+ *
+ * A pattern string P matches a text string S at the offsets <start,
+ * length>
+ *
+ * if
+ *
+ * option 1. P matches some canonical equivalent string of S'. Suppose the
+ * RuleBasedCollator used for searching has a collation strength of
+ * TERTIARY, all accents are non-ignorable. If the pattern
+ * "a\u0300" is searched in the target text
+ * "a\u0325\u0300",
+ * a match will be found, since the target text is canonically
+ * equivalent to "a\u0300\u0325"
+ * option 2. P matches S' and if P starts or ends with a combining mark,
+ * there exists no non-ignorable combining mark before or after S'
+ * in S respectively. Following the example above, the pattern
+ * "a\u0300" will not find a match in "a\u0325\u0300",
+ * since
+ * there exists a non-ignorable accent '\u0325' in the middle of
+ * 'a' and '\u0300'. Even with a target text of
+ * "a\u0300\u0325" a match will not be found because of the
+ * non-ignorable trailing accent \u0325.
+ *
+ * Option 2. will be the default mode for dealing with boundary accents unless
+ * specified via the API setCanonical(boolean).
+ * One restriction is to be noted for option 1. Currently there are no
+ * composite characters that consists of a character with combining class > 0
+ * before a character with combining class == 0. However, if such a character
+ * exists in the future, the StringSearch may not work correctly with option 1
+ * when such characters are encountered.
+ *
+ *
+ * SearchIterator provides APIs to specify the starting position
+ * within the text string to be searched, e.g. setIndex ,
+ * preceding and following . Since the starting position will
+ * be set as it is specified, please take note that there are some dangerous
+ * positions which the search may render incorrect results:
+ *
+ * The midst of a substring that requires decomposition.
+ * If the following match is to be found, the position should not be the
+ * second character which requires to be swapped with the preceding
+ * character. Vice versa, if the preceding match is to be found,
+ * position to search from should not be the first character which
+ * requires to be swapped with the next character. E.g certain Thai and
+ * Lao characters require swapping.
+ * If a following pattern match is to be found, any position within a
+ * contracting sequence except the first will fail. Vice versa if a
+ * preceding pattern match is to be found, a invalid starting point
+ * would be any character within a contracting sequence except the last.
+ *
+ *
+ *
+ * Though collator attributes will be taken into consideration while
+ * performing matches, there are no APIs provided in StringSearch for setting
+ * and getting the attributes. These attributes can be set by getting the
+ * collator from getCollator and using the APIs in
+ * com.ibm.icu.text.Collator . To update StringSearch to the new
+ * collator attributes, reset() or
+ * setCollator(RuleBasedCollator) has to be called.
+ *
+ *
+ * Consult the
+ *
+ * String Search user guide and the SearchIterator
+ * documentation for more information and examples of use.
+ *
+ *
+ * This class is not subclassable
+ *
+ * @see SearchIterator
+ * @see RuleBasedCollator
+ * @author Laura Werner, synwee
+ * @stable ICU 2.0
+ */
+// internal notes: all methods do not guarantee the correct status of the
+// characteriterator. the caller has to maintain the original index position
+// if necessary. methods could change the index position as it deems fit
+public final class StringSearch extends SearchIterator
+{
+
+ // public constructors --------------------------------------------------
+
+ /**
+ * Initializes the iterator to use the language-specific rules defined in
+ * the argument collator to search for argument pattern in the argument
+ * target text. The argument breakiter is used to define logical matches.
+ * See super class documentation for more details on the use of the target
+ * text and BreakIterator.
+ * @param pattern text to look for.
+ * @param target target text to search for pattern.
+ * @param collator RuleBasedCollator that defines the language rules
+ * @param breakiter A {@link BreakIterator} that is used to determine the
+ * boundaries of a logical match. This argument can be null.
+ * @exception IllegalArgumentException thrown when argument target is null,
+ * or of length 0
+ * @see BreakIterator
+ * @see RuleBasedCollator
+ * @see SearchIterator
+ * @stable ICU 2.0
+ */
+ public StringSearch(String pattern, CharacterIterator target,
+ RuleBasedCollator collator, BreakIterator breakiter)
+ {
+ super(target, breakiter);
+ m_textBeginOffset_ = targetText.getBeginIndex();
+ m_textLimitOffset_ = targetText.getEndIndex();
+ m_collator_ = collator;
+ m_colEIter_ = m_collator_.getCollationElementIterator(target);
+ m_utilColEIter_ = collator.getCollationElementIterator("");
+ m_ceMask_ = getMask(m_collator_.getStrength());
+ m_isCanonicalMatch_ = false;
+ m_pattern_ = new Pattern(pattern);
+ m_matchedIndex_ = DONE;
+ m_charBreakIter_ = BreakIterator.getCharacterInstance(/*m_collator_.getLocale(ULocale.ACTUAL_LOCALE)*/);
+ m_charBreakIter_.setText(target);
+ initialize();
+ }
+
+ /**
+ * Initializes the iterator to use the language-specific rules defined in
+ * the argument collator to search for argument pattern in the argument
+ * target text. No BreakIterators are set to test for logical matches.
+ * @param pattern text to look for.
+ * @param target target text to search for pattern.
+ * @param collator RuleBasedCollator that defines the language rules
+ * @exception IllegalArgumentException thrown when argument target is null,
+ * or of length 0
+ * @see RuleBasedCollator
+ * @see SearchIterator
+ * @stable ICU 2.0
+ */
+ public StringSearch(String pattern, CharacterIterator target,
+ RuleBasedCollator collator)
+ {
+ this(pattern, target, collator, null/*BreakIterator.getCharacterInstance()*/);
+ }
+
+ /**
+ * Initializes the iterator to use the language-specific rules and
+ * break iterator rules defined in the argument locale to search for
+ * argument pattern in the argument target text.
+ * See super class documentation for more details on the use of the target
+ * text and BreakIterator.
+ * @param pattern text to look for.
+ * @param target target text to search for pattern.
+ * @param locale locale to use for language and break iterator rules
+ * @exception IllegalArgumentException thrown when argument target is null,
+ * or of length 0. ClassCastException thrown if the collator for
+ * the specified locale is not a RuleBasedCollator.
+ * @see BreakIterator
+ * @see RuleBasedCollator
+ * @see SearchIterator
+ * @stable ICU 2.0
+ */
+ public StringSearch(String pattern, CharacterIterator target, Locale locale)
+ {
+ this(pattern, target, ULocale.forLocale(locale));
+ }
+
+ /**
+ * Initializes the iterator to use the language-specific rules and
+ * break iterator rules defined in the argument locale to search for
+ * argument pattern in the argument target text.
+ * See super class documentation for more details on the use of the target
+ * text and BreakIterator.
+ * @param pattern text to look for.
+ * @param target target text to search for pattern.
+ * @param locale ulocale to use for language and break iterator rules
+ * @exception IllegalArgumentException thrown when argument target is null,
+ * or of length 0. ClassCastException thrown if the collator for
+ * the specified locale is not a RuleBasedCollator.
+ * @see BreakIterator
+ * @see RuleBasedCollator
+ * @see SearchIterator
+ * @stable ICU 3.2
+ */
+ public StringSearch(String pattern, CharacterIterator target, ULocale locale)
+ {
+ this(pattern, target, (RuleBasedCollator)Collator.getInstance(locale),
+ null/*BreakIterator.getCharacterInstance(locale)*/);
+ }
+
+ /**
+ * Initializes the iterator to use the language-specific rules and
+ * break iterator rules defined in the default locale to search for
+ * argument pattern in the argument target text.
+ * See super class documentation for more details on the use of the target
+ * text and BreakIterator.
+ * @param pattern text to look for.
+ * @param target target text to search for pattern.
+ * @exception IllegalArgumentException thrown when argument target is null,
+ * or of length 0. ClassCastException thrown if the collator for
+ * the default locale is not a RuleBasedCollator.
+ * @see BreakIterator
+ * @see RuleBasedCollator
+ * @see SearchIterator
+ * @stable ICU 2.0
+ */
+ public StringSearch(String pattern, String target)
+ {
+ this(pattern, new StringCharacterIterator(target),
+ (RuleBasedCollator)Collator.getInstance(),
+ null/*BreakIterator.getCharacterInstance()*/);
+ }
+
+ // public getters -----------------------------------------------------
+
+ /**
+ *
+ * Gets the RuleBasedCollator used for the language rules.
+ *
+ *
+ * Since StringSearch depends on the returned RuleBasedCollator, any
+ * changes to the RuleBasedCollator result should follow with a call to
+ * either StringSearch.reset() or
+ * StringSearch.setCollator(RuleBasedCollator) to ensure the correct
+ * search behaviour.
+ *
+ * @return RuleBasedCollator used by this StringSearch
+ * @see RuleBasedCollator
+ * @see #setCollator
+ * @stable ICU 2.0
+ */
+ public RuleBasedCollator getCollator()
+ {
+ return m_collator_;
+ }
+
+ /**
+ * Returns the pattern for which StringSearch is searching for.
+ * @return the pattern searched for
+ * @stable ICU 2.0
+ */
+ public String getPattern()
+ {
+ return m_pattern_.targetText;
+ }
+
+ /**
+ * Return the index in the target text where the iterator is currently
+ * positioned at.
+ * If the iteration has gone past the end of the target text or past
+ * the beginning for a backwards search, {@link #DONE} is returned.
+ * @return index in the target text where the iterator is currently
+ * positioned at
+ * @stable ICU 2.8
+ */
+ public int getIndex()
+ {
+ int result = m_colEIter_.getOffset();
+ if (isOutOfBounds(m_textBeginOffset_, m_textLimitOffset_, result)) {
+ return DONE;
+ }
+ return result;
+ }
+
+ /**
+ * Determines whether canonical matches (option 1, as described in the
+ * class documentation) is set.
+ * See setCanonical(boolean) for more information.
+ * @see #setCanonical
+ * @return true if canonical matches is set, false otherwise
+ * @stable ICU 2.8
+ */
+ public boolean isCanonical()
+ {
+ return m_isCanonicalMatch_;
+ }
+
+ // public setters -----------------------------------------------------
+
+ /**
+ *
+ * Sets the RuleBasedCollator to be used for language-specific searching.
+ *
+ *
+ * This method causes internal data such as Boyer-Moore shift tables
+ * to be recalculated, but the iterator's position is unchanged.
+ *
+ * @param collator to use for this StringSearch
+ * @exception IllegalArgumentException thrown when collator is null
+ * @see #getCollator
+ * @stable ICU 2.0
+ */
+ public void setCollator(RuleBasedCollator collator)
+ {
+ if (collator == null) {
+ throw new IllegalArgumentException("Collator can not be null");
+ }
+ m_collator_ = collator;
+ m_ceMask_ = getMask(m_collator_.getStrength());
+ // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
+ initialize();
+ m_colEIter_.setCollator(m_collator_);
+ m_utilColEIter_.setCollator(m_collator_);
+ m_charBreakIter_ = BreakIterator.getCharacterInstance(/*collator.getLocale(ULocale.VALID_LOCALE)*/);
+ m_charBreakIter_.setText(targetText);
+ }
+
+ /**
+ *
+ * Set the pattern to search for.
+ *
+ *
+ * This method causes internal data such as Boyer-Moore shift tables
+ * to be recalculated, but the iterator's position is unchanged.
+ *
+ * @param pattern for searching
+ * @see #getPattern
+ * @exception IllegalArgumentException thrown if pattern is null or of
+ * length 0
+ * @stable ICU 2.0
+ */
+ public void setPattern(String pattern)
+ {
+ if (pattern == null || pattern.length() <= 0) {
+ throw new IllegalArgumentException(
+ "Pattern to search for can not be null or of length 0");
+ }
+ m_pattern_.targetText = pattern;
+ initialize();
+ }
+
+ /**
+ * Set the target text to be searched. Text iteration will hence begin at
+ * the start of the text string. This method is useful if you want to
+ * re-use an iterator to search within a different body of text.
+ * @param text new text iterator to look for match,
+ * @exception IllegalArgumentException thrown when text is null or has
+ * 0 length
+ * @see #getTarget
+ * @stable ICU 2.8
+ */
+ public void setTarget(CharacterIterator text)
+ {
+ super.setTarget(text);
+ m_textBeginOffset_ = targetText.getBeginIndex();
+ m_textLimitOffset_ = targetText.getEndIndex();
+ m_colEIter_.setText(targetText);
+ m_charBreakIter_.setText(targetText);
+ }
+
+ /**
+ *
+ * Sets the position in the target text which the next search will start
+ * from to the argument. This method clears all previous states.
+ *
+ *
+ * This method takes the argument position and sets the position in the
+ * target text accordingly, without checking if position is pointing to a
+ * valid starting point to begin searching.
+ *
+ *
+ * Search positions that may render incorrect results are highlighted in
+ * the class documentation.
+ *
+ * @param position index to start next search from.
+ * @exception IndexOutOfBoundsException thrown if argument position is out
+ * of the target text range.
+ * @see #getIndex
+ * @stable ICU 2.8
+ */
+ public void setIndex(int position)
+ {
+ super.setIndex(position);
+ m_matchedIndex_ = DONE;
+ m_colEIter_.setExactOffset(position);
+ }
+
+ /**
+ *
+ * Set the canonical match mode. See class documentation for details.
+ * The default setting for this property is false.
+ *
+ * @param allowCanonical flag indicator if canonical matches are allowed
+ * @see #isCanonical
+ * @stable ICU 2.8
+ */
+ public void setCanonical(boolean allowCanonical)
+ {
+ m_isCanonicalMatch_ = allowCanonical;
+ if (m_isCanonicalMatch_ == true) {
+ if (m_canonicalPrefixAccents_ == null) {
+ m_canonicalPrefixAccents_ = new StringBuilder();
+ }
+ else {
+ m_canonicalPrefixAccents_.delete(0,
+ m_canonicalPrefixAccents_.length());
+ }
+ if (m_canonicalSuffixAccents_ == null) {
+ m_canonicalSuffixAccents_ = new StringBuilder();
+ }
+ else {
+ m_canonicalSuffixAccents_.delete(0,
+ m_canonicalSuffixAccents_.length());
+ }
+ }
+ }
+
+ // public miscellaneous methods -----------------------------------------
+
+ /**
+ *
+ * Resets the search iteration. All properties will be reset to the
+ * default value.
+ *
+ *
+ * Search will begin at the start of the target text if a forward iteration
+ * is initiated before a backwards iteration. Otherwise if a
+ * backwards iteration is initiated before a forwards iteration, the search
+ * will begin at the end of the target text.
+ *
+ *
+ * Canonical match option will be reset to false, ie an exact match.
+ *
+ * @stable ICU 2.8
+ */
+ public void reset()
+ {
+ // reset is setting the attributes that are already in string search,
+ // hence all attributes in the collator should be retrieved without any
+ // problems
+ super.reset();
+ m_isCanonicalMatch_ = false;
+ m_ceMask_ = getMask(m_collator_.getStrength());
+ // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
+ initialize();
+ m_colEIter_.setCollator(m_collator_);
+ m_colEIter_.reset();
+ m_utilColEIter_.setCollator(m_collator_);
+ }
+
+ // protected methods -----------------------------------------------------
+
+ /**
+ *
+ * Concrete method to provide the mechanism
+ * for finding the next forwards match in the target text.
+ * See super class documentation for its use.
+ *
+ * @param start index in the target text at which the forwards search
+ * should begin.
+ * @return the starting index of the next forwards match if found, DONE
+ * otherwise
+ * @see #handlePrevious(int)
+ * @see #DONE
+ * @stable ICU 2.8
+ */
+ protected int handleNext(int start)
+ {
+ if (m_pattern_.m_CELength_ == 0) {
+ matchLength = 0;
+ if (m_matchedIndex_ == DONE && start == m_textBeginOffset_) {
+ m_matchedIndex_ = start;
+ return m_matchedIndex_;
+ }
+
+ targetText.setIndex(start);
+ char ch = targetText.current();
+ // ch can never be done, it is handled by next()
+ char ch2 = targetText.next();
+ if (ch2 == CharacterIterator.DONE) {
+ m_matchedIndex_ = DONE;
+ }
+ else {
+ m_matchedIndex_ = targetText.getIndex();
+ }
+ if (UTF16.isLeadSurrogate(ch) && UTF16.isTrailSurrogate(ch2)) {
+ targetText.next();
+ m_matchedIndex_ = targetText.getIndex();
+ }
+ }
+ else {
+ if (matchLength <= 0) {
+ // we must have reversed direction after we reached the start
+ // of the target text
+ // see SearchIterator next(), it checks the bounds and returns
+ // if it exceeds the range. It does not allow setting of
+ // m_matchedIndex
+ if (start == m_textBeginOffset_) {
+ m_matchedIndex_ = DONE;
+ }
+ else {
+ // for boundary check purposes. this will ensure that the
+ // next match will not preceed the current offset
+ // note search->matchedIndex will always be set to something
+ // in the code
+ m_matchedIndex_ = start - 1;
+ }
+ }
+
+ // status checked below
+ if (m_isCanonicalMatch_) {
+ // can't use exact here since extra accents are allowed.
+ handleNextCanonical(start);
+ }
+ else {
+ handleNextExact(start);
+ }
+ }
+ if (m_matchedIndex_ == DONE) {
+ targetText.setIndex(m_textLimitOffset_);
+ }
+ else {
+ targetText.setIndex(m_matchedIndex_);
+ }
+ return m_matchedIndex_;
+ }
+
+ /**
+ *
+ * Concrete method to provide the mechanism
+ * for finding the next backwards match in the target text.
+ * See super class documentation for its use.
+ *
+ * @param start index in the target text at which the backwards search
+ * should begin.
+ * @return the starting index of the next backwards match if found, DONE
+ * otherwise
+ * @see #handleNext(int)
+ * @see #DONE
+ * @stable ICU 2.8
+ */
+ protected int handlePrevious(int start)
+ {
+ if (m_pattern_.m_CELength_ == 0) {
+ matchLength = 0;
+ // start can never be DONE or 0, it is handled in previous
+ targetText.setIndex(start);
+ char ch = targetText.previous();
+ if (ch == CharacterIterator.DONE) {
+ m_matchedIndex_ = DONE;
+ }
+ else {
+ m_matchedIndex_ = targetText.getIndex();
+ if (UTF16.isTrailSurrogate(ch)) {
+ if (UTF16.isLeadSurrogate(targetText.previous())) {
+ m_matchedIndex_ = targetText.getIndex();
+ }
+ }
+ }
+ }
+ else {
+ if (matchLength == 0) {
+ // we must have reversed direction after we reached the end
+ // of the target text
+ // see SearchIterator next(), it checks the bounds and returns
+ // if it exceeds the range. It does not allow setting of
+ // m_matchedIndex
+ m_matchedIndex_ = DONE;
+ }
+ if (m_isCanonicalMatch_) {
+ // can't use exact here since extra accents are allowed.
+ handlePreviousCanonical(start);
+ }
+ else {
+ handlePreviousExact(start);
+ }
+ }
+
+ if (m_matchedIndex_ == DONE) {
+ targetText.setIndex(m_textBeginOffset_);
+ }
+ else {
+ targetText.setIndex(m_matchedIndex_);
+ }
+ return m_matchedIndex_;
+ }
+
+ // private static inner classes ----------------------------------------
+
+ private static class Pattern
+ {
+ // protected methods -----------------------------------------------
+
+ /**
+ * Pattern string
+ */
+ protected String targetText;
+ /**
+ * Array containing the collation elements of targetText
+ */
+ protected int m_CE_[];
+ /**
+ * Number of collation elements in m_CE_
+ */
+ protected int m_CELength_;
+ /**
+ * Flag indicator if targetText starts with an accent
+ */
+ protected boolean m_hasPrefixAccents_;
+ /**
+ * Flag indicator if targetText ends with an accent
+ */
+ protected boolean m_hasSuffixAccents_;
+ /**
+ * Default number of characters to shift for Boyer Moore
+ */
+ protected int m_defaultShiftSize_;
+ /**
+ * Number of characters to shift for Boyer Moore, depending on the
+ * source text to search
+ */
+ protected char m_shift_[];
+ /**
+ * Number of characters to shift backwards for Boyer Moore, depending
+ * on the source text to search
+ */
+ protected char m_backShift_[];
+
+ // protected constructors ------------------------------------------
+
+ /**
+ * Empty constructor
+ */
+ protected Pattern(String pattern)
+ {
+ targetText = pattern;
+ m_CE_ = new int[INITIAL_ARRAY_SIZE_];
+ m_CELength_ = 0;
+ m_hasPrefixAccents_ = false;
+ m_hasSuffixAccents_ = false;
+ m_defaultShiftSize_ = 1;
+ m_shift_ = new char[MAX_TABLE_SIZE_];
+ m_backShift_ = new char[MAX_TABLE_SIZE_];
+ }
+ }
+
+
+ // private data members ------------------------------------------------
+
+ /**
+ * target text begin offset. Each targetText has a valid contiguous region
+ * to iterate and this data member is the offset to the first such
+ * character in the region.
+ */
+ private int m_textBeginOffset_;
+ /**
+ * target text limit offset. Each targetText has a valid contiguous region
+ * to iterate and this data member is the offset to 1 after the last such
+ * character in the region.
+ */
+ private int m_textLimitOffset_;
+ /**
+ * Upon completion of a search, m_matchIndex_ will store starting offset in
+ * m_text for the match. The Value DONE is the default value.
+ * If we are not at the start of the text or the end of the text and
+ * m_matchedIndex_ is DONE it means that we can find any more matches in
+ * that particular direction
+ */
+ private int m_matchedIndex_;
+ /**
+ * Current pattern to search for
+ */
+ private Pattern m_pattern_;
+ /**
+ * Collator whose rules are used to perform the search
+ */
+ private RuleBasedCollator m_collator_;
+ /**
+ * The collation element iterator for the text source.
+ */
+ private CollationElementIterator m_colEIter_;
+ /**
+ * Utility collation element, used throughout program for temporary
+ * iteration.
+ */
+ private CollationElementIterator m_utilColEIter_;
+ /**
+ * The mask used on the collation elements to retrieve the valid strength
+ * weight
+ */
+ private int m_ceMask_;
+ /**
+ * Buffer storing accents during a canonical search
+ */
+ private StringBuilder m_canonicalPrefixAccents_;
+ /**
+ * Buffer storing accents during a canonical search
+ */
+ private StringBuilder m_canonicalSuffixAccents_;
+ /**
+ * Flag to indicate if canonical search is to be done.
+ * E.g looking for "a\u0300" in "a\u0318\u0300" will yield the match at 0.
+ */
+ private boolean m_isCanonicalMatch_;
+ /**
+ * Character break iterator for boundary checking.
+ */
+ private BreakIterator m_charBreakIter_;
+ private final Normalizer2Impl m_nfcImpl_ = Norm2AllModes.getNFCInstance().impl;
+ /**
+ * Size of the shift tables
+ */
+ private static final int MAX_TABLE_SIZE_ = 257;
+ /**
+ * Initial array size
+ */
+ private static final int INITIAL_ARRAY_SIZE_ = 256;
+ /**
+ * Utility mask
+ */
+ private static final int SECOND_LAST_BYTE_SHIFT_ = 8;
+ /**
+ * Utility mask
+ */
+ private static final int LAST_BYTE_MASK_ = 0xff;
+ /**
+ * Utility buffer for return values and temporary storage
+ */
+ private int m_utilBuffer_[] = new int[2];
+ /**
+ * Unsigned 32-Bit Integer Mask
+ */
+ private static final long UNSIGNED_32BIT_MASK = 0xffffffffL;
+
+ // private methods -------------------------------------------------------
+
+ /**
+ * Hash a collation element from its full size (32 bits) down into a
+ * value that can be used as an index into the shift tables. Right
+ * now we do a modulus by the size of the hash table.
+ * @param ce collation element
+ * @return collapsed version of the collation element
+ */
+ private static final int hash(int ce)
+ {
+ // the old value UCOL_PRIMARYORDER(ce) % MAX_TABLE_SIZE_ does not work
+ // well with the new collation where most of the latin 1 characters
+ // are of the value xx000xxx. their hashes will most of the time be 0
+ // to be discussed on the hash algo.
+ return CollationElementIterator.primaryOrder(ce) % MAX_TABLE_SIZE_;
+ }
+
+ private final char getFCD(int c) {
+ return (char)m_nfcImpl_.getFCD16(c);
+ }
+ /**
+ * Gets the fcd value for a character at the argument index.
+ * This method takes into accounts of the supplementary characters.
+ * Note this method changes the offset in the character iterator.
+ * @param str UTF16 string where character for fcd retrieval resides
+ * @param offset position of the character whose fcd is to be retrieved
+ * @return fcd value
+ */
+ private final char getFCD(CharacterIterator str, int offset)
+ {
+ char ch = str.setIndex(offset);
+ int result = m_nfcImpl_.getFCD16FromSingleLead(ch);
+ if (result != 0 && Character.isHighSurrogate(ch)) {
+ char c2 = str.next();
+ if (Character.isLowSurrogate(c2)) {
+ result = m_nfcImpl_.getFCD16(Character.toCodePoint(ch, c2));
+ } else {
+ result = 0;
+ }
+ }
+ return (char)result;
+ }
+ /**
+ * Gets the FCD value for the code point before the input offset.
+ * Modifies the iterator's index.
+ * @param iter text iterator
+ * @param offset index after the character to test
+ * @return FCD value for the character before offset
+ */
+ private final int getFCDBefore(CharacterIterator iter, int offset) {
+ int result;
+ iter.setIndex(offset);
+ char c = iter.previous();
+ if (UTF16.isSurrogate(c)) {
+ if (Normalizer2Impl.UTF16Plus.isSurrogateLead(c)) {
+ result = 0;
+ } else {
+ char lead = iter.previous();
+ if (Character.isHighSurrogate(lead)) {
+ result = m_nfcImpl_.getFCD16(Character.toCodePoint(lead, c));
+ } else {
+ result = 0;
+ }
+ }
+ } else {
+ result = m_nfcImpl_.getFCD16FromSingleLead(c);
+ }
+ return result;
+ }
+ /**
+ * Gets the fcd value for a character at the argument index.
+ * This method takes into accounts of the supplementary characters.
+ * @param str UTF16 string where character for fcd retrieval resides
+ * @param offset position of the character whose fcd is to be retrieved
+ * @return fcd value
+ */
+ private final char getFCD(String str, int offset)
+ {
+ char ch = str.charAt(offset);
+ int result = m_nfcImpl_.getFCD16FromSingleLead(ch);
+ if (result != 0 && Character.isHighSurrogate(ch)) {
+ char c2;
+ if (++offset < str.length() && Character.isLowSurrogate(c2 = str.charAt(offset))) {
+ result = m_nfcImpl_.getFCD16(Character.toCodePoint(ch, c2));
+ } else {
+ result = 0;
+ }
+ }
+ return (char)result;
+ }
+
+ /**
+ * Getting the modified collation elements taking into account the collation
+ * attributes
+ * @param ce
+ * @return the modified collation element
+ */
+ private final int getCE(int ce)
+ {
+ // note for tertiary we can't use the collator->tertiaryMask, that
+ // is a preprocessed mask that takes into account case options. since
+ // we are only concerned with exact matches, we don't need that.
+ ce &= m_ceMask_;
+
+ if (m_collator_.isAlternateHandlingShifted()) {
+ // alternate handling here, since only the 16 most significant
+ // digits is only used, we can safely do a compare without masking
+ // if the ce is a variable, we mask and get only the primary values
+ // no shifting to quartenary is required since all primary values
+ // less than variabletop will need to be masked off anyway.
+ if (((m_collator_.m_variableTopValue_ << 16) & UNSIGNED_32BIT_MASK) > (ce & UNSIGNED_32BIT_MASK)) {
+ if (m_collator_.getStrength() == Collator.QUATERNARY) {
+ ce = CollationElementIterator.primaryOrder(ce);
+ }
+ else {
+ ce = CollationElementIterator.IGNORABLE;
+ }
+ }
+ }
+
+ return ce;
+ }
+
+ /**
+ * Appends a int to a int array, increasing the size of the array when
+ * we are out of space.
+ * @param offset in array to append to
+ * @param value to append
+ * @param array to append to
+ * @return the array appended to, this could be a new and bigger array
+ */
+ private static final int[] append(int offset, int value, int array[])
+ {
+ if (offset >= array.length) {
+ int temp[] = new int[offset + INITIAL_ARRAY_SIZE_];
+ System.arraycopy(array, 0, temp, 0, array.length);
+ array = temp;
+ }
+ array[offset] = value;
+ return array;
+ }
+
+ /**
+ * Initializing the ce table for a pattern. Stores non-ignorable collation
+ * keys. Table size will be estimated by the size of the pattern text.
+ * Table expansion will be perform as we go along. Adding 1 to ensure that
+ * the table size definitely increases.
+ * Internal method, status assumed to be a success.
+ * @return total number of expansions
+ */
+ private final int initializePatternCETable()
+ {
+ m_utilColEIter_.setText(m_pattern_.targetText);
+
+ int offset = 0;
+ int result = 0;
+ int ce = m_utilColEIter_.next();
+
+ while (ce != CollationElementIterator.NULLORDER) {
+ int newce = getCE(ce);
+ if (newce != CollationElementIterator.IGNORABLE) {
+ m_pattern_.m_CE_ = append(offset, newce, m_pattern_.m_CE_);
+ offset ++;
+ }
+ result += m_utilColEIter_.getMaxExpansion(ce) - 1;
+ ce = m_utilColEIter_.next();
+ }
+
+ m_pattern_.m_CE_ = append(offset, 0, m_pattern_.m_CE_);
+ m_pattern_.m_CELength_ = offset;
+
+ return result;
+ }
+
+ /**
+ * Initializes the pattern struct.
+ * Internal method, status assumed to be success.
+ * @return expansionsize the total expansion size of the pattern
+ */
+ private final int initializePattern()
+ {
+ if (m_collator_.getStrength() == Collator.PRIMARY) {
+ m_pattern_.m_hasPrefixAccents_ = false;
+ m_pattern_.m_hasSuffixAccents_ = false;
+ } else {
+ m_pattern_.m_hasPrefixAccents_ = (getFCD(m_pattern_.targetText, 0)
+ >> SECOND_LAST_BYTE_SHIFT_) != 0;
+ m_pattern_.m_hasSuffixAccents_ = (getFCD(m_pattern_.targetText.codePointBefore(
+ m_pattern_.targetText.length()))
+ & LAST_BYTE_MASK_) != 0;
+ }
+ // since intializePattern is an internal method status is a success.
+ return initializePatternCETable();
+ }
+
+ /**
+ * Initializing shift tables, with the default values.
+ * If a corresponding default value is 0, the shift table is not set.
+ * @param shift table for forwards shift
+ * @param backshift table for backwards shift
+ * @param cetable table containing pattern ce
+ * @param cesize size of the pattern ces
+ * @param expansionsize total size of the expansions
+ * @param defaultforward the default forward value
+ * @param defaultbackward the default backward value
+ */
+ private final void setShiftTable(char shift[],
+ char backshift[],
+ int cetable[], int cesize,
+ int expansionsize,
+ char defaultforward,
+ char defaultbackward)
+ {
+ // estimate the value to shift. to do that we estimate the smallest
+ // number of characters to give the relevant ces, ie approximately
+ // the number of ces minus their expansion, since expansions can come
+ // from a character.
+ for (int count = 0; count < MAX_TABLE_SIZE_; count ++) {
+ shift[count] = defaultforward;
+ }
+ cesize --; // down to the last index
+ for (int count = 0; count < cesize; count ++) {
+ // number of ces from right of array to the count
+ int temp = defaultforward - count - 1;
+ shift[hash(cetable[count])] = temp > 1 ? ((char)temp) : 1;
+ }
+ shift[hash(cetable[cesize])] = 1;
+ // for ignorables we just shift by one. see test examples.
+ shift[hash(0)] = 1;
+
+ for (int count = 0; count < MAX_TABLE_SIZE_; count ++) {
+ backshift[count] = defaultbackward;
+ }
+ for (int count = cesize; count > 0; count --) {
+ // the original value count does not seem to work
+ backshift[hash(cetable[count])] = (char)(count > expansionsize ?
+ count - expansionsize : 1);
+ }
+ backshift[hash(cetable[0])] = 1;
+ backshift[hash(0)] = 1;
+ }
+
+ /**
+ * Building of the pattern collation element list and the Boyer Moore
+ * StringSearch table.
+ * The canonical match will only be performed after the default match
+ * fails.
+ * For both cases we need to remember the size of the composed and
+ * decomposed versions of the string. Since the Boyer-Moore shift
+ * calculations shifts by a number of characters in the text and tries to
+ * match the pattern from that offset, the shift value can not be too large
+ * in case we miss some characters. To choose a right shift size, we
+ * estimate the NFC form of the and use its size as a shift guide. The NFC
+ * form should be the small possible representation of the pattern. Anyways,
+ * we'll err on the smaller shift size. Hence the calculation for
+ * minlength. Canonical match will be performed slightly differently. We'll
+ * split the pattern into 3 parts, the prefix accents (PA), the middle
+ * string bounded by the first and last base character (MS), the ending
+ * accents (EA). Matches will be done on MS first, and only when we match
+ * MS then some processing will be required for the prefix and end accents
+ * in order to determine if they match PA and EA. Hence the default shift
+ * values for the canonical match will take the size of either end's accent
+ * into consideration. Forwards search will take the end accents into
+ * consideration for the default shift values and the backwards search will
+ * take the prefix accents into consideration.
+ * If pattern has no non-ignorable ce, we return a illegal argument
+ * error.
+ */
+ private final void initialize()
+ {
+ m_nfcImpl_.getFCDTrie(); // ensure the FCD data is initialized
+ int expandlength = initializePattern();
+ if (m_pattern_.m_CELength_ > 0) {
+ char minlength = (char)(m_pattern_.m_CELength_ > expandlength
+ ? m_pattern_.m_CELength_ - expandlength : 1);
+ m_pattern_.m_defaultShiftSize_ = minlength;
+ setShiftTable(m_pattern_.m_shift_, m_pattern_.m_backShift_,
+ m_pattern_.m_CE_, m_pattern_.m_CELength_,
+ expandlength, minlength, minlength);
+ }
+ else {
+ m_pattern_.m_defaultShiftSize_ = 0;
+ }
+ }
+
+ /**
+ * Determine whether the search text bounded by the offset start and end is
+ * one or more whole units of text as determined by the breakiterator in
+ * StringSearch.
+ * @param start target text start offset
+ * @param end target text end offset
+ */
+ private final boolean isBreakUnit(int start, int end)
+ {
+ if (breakIterator != null) {
+ int startindex = breakIterator.first();
+ int endindex = breakIterator.last();
+
+ // out-of-range indexes are never boundary positions
+ if (start < startindex || start > endindex || end < startindex
+ || end > endindex) {
+ return false;
+ }
+ // otherwise, we can use following() on the position before the
+ // specified one and return true of the position we get back is the
+ // one the user specified
+ boolean result = (start == startindex
+ || breakIterator.following(start - 1) == start)
+ && (end == endindex
+ || breakIterator.following(end - 1) == end);
+ if (result) {
+ // iterates the individual ces
+ m_utilColEIter_.setText(
+ new CharacterIteratorWrapper(targetText), start);
+ for (int count = 0; count < m_pattern_.m_CELength_;
+ count ++) {
+ int ce = getCE(m_utilColEIter_.next());
+ if (ce == CollationElementIterator.IGNORABLE) {
+ count --;
+ continue;
+ }
+ if (ce != m_pattern_.m_CE_[count]) {
+ return false;
+ }
+ }
+ int nextce = m_utilColEIter_.next();
+ while (m_utilColEIter_.getOffset() == end
+ && getCE(nextce) == CollationElementIterator.IGNORABLE) {
+ nextce = m_utilColEIter_.next();
+ }
+ if (nextce != CollationElementIterator.NULLORDER
+ && m_utilColEIter_.getOffset() == end) {
+ // extra collation elements at the end of the match
+ return false;
+ }
+ }
+ return result;
+ }
+ return true;
+ }
+
+ /**
+ * Getting the next base character offset if current offset is an accent,
+ * or the current offset if the current character contains a base character.
+ * accents the following base character will be returned
+ * @param text string
+ * @param textoffset current offset
+ * @param textlength length of text string
+ * @return the next base character or the current offset
+ * if the current character is contains a base character.
+ */
+ private final int getNextBaseOffset(CharacterIterator text, int textoffset)
+ {
+ if (textoffset >= text.getEndIndex()) {
+ return textoffset;
+ }
+ // iteration ends with reading CharacterIterator.DONE which has fcd==0
+ char c = text.setIndex(textoffset);
+ for (;;) {
+ if ((m_nfcImpl_.getFCD16FromSingleLead(c) >> SECOND_LAST_BYTE_SHIFT_) == 0) {
+ return textoffset;
+ }
+ char next = text.next();
+ if (Character.isSurrogatePair(c, next)) {
+ int fcd = m_nfcImpl_.getFCD16(Character.toCodePoint(c, next));
+ if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) {
+ return textoffset;
+ }
+ next = text.next();
+ textoffset += 2;
+ } else {
+ ++textoffset;
+ }
+ c = next;
+ }
+ }
+
+ /**
+ * Gets the next base character offset depending on the string search
+ * pattern data
+ * @param textoffset one offset away from the last character
+ * to search for.
+ * @return start index of the next base character or the current offset
+ * if the current character is contains a base character.
+ */
+ private final int getNextBaseOffset(int textoffset)
+ {
+ if (m_pattern_.m_hasSuffixAccents_ && textoffset < m_textLimitOffset_) {
+ if ((getFCDBefore(targetText, textoffset) & LAST_BYTE_MASK_) != 0) {
+ return getNextBaseOffset(targetText, textoffset);
+ }
+ }
+ return textoffset;
+ }
+
+ /**
+ * Shifting the collation element iterator position forward to prepare for
+ * a following match. If the last character is a unsafe character, we'll
+ * only shift by 1 to capture contractions, normalization etc.
+ * Internal method, status assumed to be success.
+ * @param textoffset start text position to do search
+ * @param ce the text ce which failed the match.
+ * @param patternceindex index of the ce within the pattern ce buffer which
+ * failed the match
+ * @return final offset
+ */
+ private int shiftForward(int textoffset, int ce, int patternceindex)
+
+ {
+ if (ce != CollationElementIterator.NULLORDER) {
+ int shift = m_pattern_.m_shift_[hash(ce)];
+ // this is to adjust for characters in the middle of the
+ // substring for matching that failed.
+ int adjust = m_pattern_.m_CELength_ - patternceindex;
+ if (adjust > 1 && shift >= adjust) {
+ shift -= adjust - 1;
+ }
+ textoffset += shift;
+ }
+ else {
+ textoffset += m_pattern_.m_defaultShiftSize_;
+ }
+
+ textoffset = getNextBaseOffset(textoffset);
+ // check for unsafe characters
+ // * if it is the start or middle of a contraction: to be done after
+ // a initial match is found
+ // * thai or lao base consonant character: similar to contraction
+ // * high surrogate character: similar to contraction
+ // * next character is a accent: shift to the next base character
+ return textoffset;
+ }
+
+ /**
+ * Gets the offset to the next safe point in text.
+ * ie. not the middle of a contraction, swappable characters or
+ * supplementary characters.
+ * @param textoffset offset in string
+ * @param end offset in string
+ * @return offset to the next safe character
+ */
+ private final int getNextSafeOffset(int textoffset, int end)
+ {
+ int result = textoffset; // first contraction character
+ targetText.setIndex(result);
+ while (result != end &&
+ m_collator_.isUnsafe(targetText.current())) {
+ result ++;
+ targetText.setIndex(result);
+ }
+ return result;
+ }
+
+ /**
+ * This checks for accents in the potential match started with a composite
+ * character.
+ * This is really painful... we have to check that composite character do
+ * not have any extra accents. We have to normalize the potential match and
+ * find the immediate decomposed character before the match.
+ * The first composite character would have been taken care of by the fcd
+ * checks in checkForwardExactMatch.
+ * This is the slow path after the fcd of the first character and
+ * the last character has been checked by checkForwardExactMatch and we
+ * determine that the potential match has extra non-ignorable preceding
+ * ces.
+ * E.g. looking for \u0301 acute in \u01FA A ring above and acute,
+ * checkExtraMatchAccent should fail since there is a middle ring in
+ * \u01FA Note here that accents checking are slow and cautioned in the API
+ * docs.
+ * Internal method, status assumed to be a success, caller should check
+ * status before calling this method
+ * @param start index of the potential unfriendly composite character
+ * @param end index of the potential unfriendly composite character
+ * @return true if there is non-ignorable accents before at the beginning
+ * of the match, false otherwise.
+ */
+ private final boolean checkExtraMatchAccents(int start, int end)
+ {
+ boolean result = false;
+ if (m_pattern_.m_hasPrefixAccents_) {
+ targetText.setIndex(start);
+
+ if (UTF16.isLeadSurrogate(targetText.next())) {
+ if (!UTF16.isTrailSurrogate(targetText.next())) {
+ targetText.previous();
+ }
+ }
+ // we are only concerned with the first composite character
+ String str = getString(targetText, start, end);
+ if (Normalizer.quickCheck(str, Normalizer.NFD,0)
+ == Normalizer.NO) {
+ int safeoffset = getNextSafeOffset(start, end);
+ if (safeoffset != end) {
+ safeoffset ++;
+ }
+ String decomp = Normalizer.decompose(
+ str.substring(0, safeoffset - start), false);
+ m_utilColEIter_.setText(decomp);
+ int firstce = m_pattern_.m_CE_[0];
+ boolean ignorable = true;
+ int ce = CollationElementIterator.IGNORABLE;
+ int offset = 0;
+ while (ce != firstce) {
+ offset = m_utilColEIter_.getOffset();
+ if (ce != firstce
+ && ce != CollationElementIterator.IGNORABLE) {
+ ignorable = false;
+ }
+ ce = m_utilColEIter_.next();
+ }
+ m_utilColEIter_.setExactOffset(offset); // back up 1 to the
+ m_utilColEIter_.previous(); // right offset
+ offset = m_utilColEIter_.getOffset();
+ result = !ignorable && (UCharacter.getCombiningClass(
+ UTF16.charAt(decomp, offset)) != 0);
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Used by exact matches, checks if there are accents before the match.
+ * This is really painful... we have to check that composite characters at
+ * the start of the matches have to not have any extra accents.
+ * We check the FCD of the character first, if it starts with an accent and
+ * the first pattern ce does not match the first ce of the character, we
+ * bail.
+ * Otherwise we try normalizing the first composite
+ * character and find the immediate decomposed character before the match to
+ * see if it is an non-ignorable accent.
+ * Now normalizing the first composite character is enough because we ensure
+ * that when the match is passed in here with extra beginning ces, the
+ * first or last ce that match has to occur within the first character.
+ * E.g. looking for \u0301 acute in \u01FA A ring above and acute,
+ * checkExtraMatchAccent should fail since there is a middle ring in \u01FA
+ * Note here that accents checking are slow and cautioned in the API docs.
+ * @param start offset
+ * @param end offset
+ * @return true if there are accents on either side of the match,
+ * false otherwise
+ */
+ private final boolean hasAccentsBeforeMatch(int start, int end)
+ {
+ if (m_pattern_.m_hasPrefixAccents_) {
+ // we have been iterating forwards previously
+ boolean ignorable = true;
+ int firstce = m_pattern_.m_CE_[0];
+ m_colEIter_.setExactOffset(start);
+ int ce = getCE(m_colEIter_.next());
+ while (ce != firstce) {
+ if (ce != CollationElementIterator.IGNORABLE) {
+ ignorable = false;
+ }
+ ce = getCE(m_colEIter_.next());
+ }
+ if (!ignorable && m_colEIter_.isInBuffer()) {
+ // within normalization buffer, discontiguous handled here
+ return true;
+ }
+
+ // within text
+ boolean accent = (getFCD(targetText, start) >> SECOND_LAST_BYTE_SHIFT_)
+ != 0;
+ if (!accent) {
+ return checkExtraMatchAccents(start, end);
+ }
+ if (!ignorable) {
+ return true;
+ }
+ if (start > m_textBeginOffset_) {
+ targetText.setIndex(start);
+ targetText.previous();
+ if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_)
+ != 0) {
+ m_colEIter_.setExactOffset(start);
+ ce = m_colEIter_.previous();
+ if (ce != CollationElementIterator.NULLORDER
+ && ce != CollationElementIterator.IGNORABLE) {
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Used by exact matches, checks if there are accents bounding the match.
+ * Note this is the initial boundary check. If the potential match
+ * starts or ends with composite characters, the accents in those
+ * characters will be determined later.
+ * Not doing backwards iteration here, since discontiguos contraction for
+ * backwards collation element iterator, use up too many characters.
+ * E.g. looking for \u030A ring in \u01FA A ring above and acute,
+ * should fail since there is a acute at the end of \u01FA
+ * Note here that accents checking are slow and cautioned in the API docs.
+ * @param start offset of match
+ * @param end end offset of the match
+ * @return true if there are accents on either side of the match,
+ * false otherwise
+ */
+ private final boolean hasAccentsAfterMatch(int start, int end)
+ {
+ if (m_pattern_.m_hasSuffixAccents_) {
+ targetText.setIndex(end);
+ if (end > m_textBeginOffset_
+ && UTF16.isTrailSurrogate(targetText.previous())) {
+ if (targetText.getIndex() > m_textBeginOffset_ &&
+ !UTF16.isLeadSurrogate(targetText.previous())) {
+ targetText.next();
+ }
+ }
+ if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) != 0) {
+ int firstce = m_pattern_.m_CE_[0];
+ m_colEIter_.setExactOffset(start);
+ while (getCE(m_colEIter_.next()) != firstce) {
+ }
+ int count = 1;
+ while (count < m_pattern_.m_CELength_) {
+ if (getCE(m_colEIter_.next())
+ == CollationElementIterator.IGNORABLE) {
+ count --;
+ }
+ count ++;
+ }
+ //int ce = getCE(m_colEIter_.next());
+ int ce = m_colEIter_.next();
+ if (ce != CollationElementIterator.NULLORDER
+ && ce != CollationElementIterator.IGNORABLE) {
+ ce = getCE(ce);
+ }
+ if (ce != CollationElementIterator.NULLORDER
+ && ce != CollationElementIterator.IGNORABLE) {
+ if (m_colEIter_.getOffset() <= end) {
+ return true;
+ }
+ if ((getFCD(targetText, end) >> SECOND_LAST_BYTE_SHIFT_)
+ != 0) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Checks if the offset runs out of the text string range
+ * @param textstart offset of the first character in the range
+ * @param textlimit limit offset of the text string range
+ * @param offset to test
+ * @return true if offset is out of bounds, false otherwise
+ */
+ private static final boolean isOutOfBounds(int textstart, int textlimit,
+ int offset)
+ {
+ return offset < textstart || offset > textlimit;
+ }
+
+ /**
+ * Checks for identical match
+ * @param strsrch string search data
+ * @param start offset of possible match
+ * @param end offset of possible match
+ * @return true if identical match is found
+ */
+ private final boolean checkIdentical(int start, int end)
+ {
+ if (m_collator_.getStrength() != Collator.IDENTICAL) {
+ return true;
+ }
+
+ String textstr = getString(targetText, start, end - start);
+ if (Normalizer.quickCheck(textstr, Normalizer.NFD,0)
+ == Normalizer.NO) {
+ textstr = Normalizer.decompose(textstr, false);
+ }
+ String patternstr = m_pattern_.targetText;
+ if (Normalizer.quickCheck(patternstr, Normalizer.NFD,0)
+ == Normalizer.NO) {
+ patternstr = Normalizer.decompose(patternstr, false);
+ }
+ return textstr.equals(patternstr);
+ }
+
+ /**
+ * Checks to see if the match is repeated
+ * @param start new match start index
+ * @param limit new match limit index
+ * @return true if the the match is repeated, false otherwise
+ */
+ private final boolean checkRepeatedMatch(int start, int limit)
+ {
+ if (m_matchedIndex_ == DONE) {
+ return false;
+ }
+ int end = limit - 1; // last character in the match
+ int lastmatchend = m_matchedIndex_ + matchLength - 1;
+ if (!isOverlapping()) {
+ return (start >= m_matchedIndex_ && start <= lastmatchend)
+ || (end >= m_matchedIndex_ && end <= lastmatchend)
+ || (start <= m_matchedIndex_ && end >= lastmatchend);
+
+ }
+ return start <= m_matchedIndex_ && end >= lastmatchend;
+ }
+
+ /**
+ * Checks match for contraction.
+ * If the match ends with a partial contraction we fail.
+ * If the match starts too far off (because of backwards iteration) we try
+ * to chip off the extra characters depending on whether a breakiterator
+ * has been used.
+ * Temporary utility buffer used to return modified start and end.
+ * @param start offset of potential match, to be modified if necessary
+ * @param end offset of potential match, to be modified if necessary
+ * @return true if match passes the contraction test, false otherwise.
+ */
+ private final boolean checkNextExactContractionMatch(int start, int end)
+ {
+ // This part checks if either ends of the match contains potential
+ // contraction. If so we'll have to iterate through them
+ char endchar = 0;
+ if (end < m_textLimitOffset_) {
+ targetText.setIndex(end);
+ endchar = targetText.current();
+ }
+ char poststartchar = 0;
+ if (start + 1 < m_textLimitOffset_) {
+ targetText.setIndex(start + 1);
+ poststartchar = targetText.current();
+ }
+ if (m_collator_.isUnsafe(endchar)
+ || m_collator_.isUnsafe(poststartchar)) {
+ // expansion prefix, what's left to iterate
+ int bufferedCEOffset = m_colEIter_.m_CEBufferOffset_;
+ boolean hasBufferedCE = bufferedCEOffset > 0;
+ m_colEIter_.setExactOffset(start);
+ int temp = start;
+ while (bufferedCEOffset > 0) {
+ // getting rid of the redundant ce, caused by setOffset.
+ // since backward contraction/expansion may have extra ces if
+ // we are in the normalization buffer, hasAccentsBeforeMatch
+ // would have taken care of it.
+ // E.g. the character \u01FA will have an expansion of 3, but
+ // if we are only looking for acute and ring \u030A and \u0301,
+ // we'll have to skip the first ce in the expansion buffer.
+ m_colEIter_.next();
+ if (m_colEIter_.getOffset() != temp) {
+ start = temp;
+ temp = m_colEIter_.getOffset();
+ }
+ bufferedCEOffset --;
+ }
+
+ int count = 0;
+ while (count < m_pattern_.m_CELength_) {
+ int ce = getCE(m_colEIter_.next());
+ if (ce == CollationElementIterator.IGNORABLE) {
+ continue;
+ }
+ if (hasBufferedCE && count == 0
+ && m_colEIter_.getOffset() != temp) {
+ start = temp;
+ temp = m_colEIter_.getOffset();
+ }
+ if (ce != m_pattern_.m_CE_[count]) {
+ end ++;
+ end = getNextBaseOffset(end);
+ m_utilBuffer_[0] = start;
+ m_utilBuffer_[1] = end;
+ return false;
+ }
+ count ++;
+ }
+ }
+ m_utilBuffer_[0] = start;
+ m_utilBuffer_[1] = end;
+ return true;
+ }
+
+
+ /**
+ * Checks and sets the match information if found.
+ * Checks
+ *
+ * the potential match does not repeat the previous match
+ * boundaries are correct
+ * exact matches has no extra accents
+ * identical matchesb
+ * potential match does not end in the middle of a contraction
+ *
+ * Otherwise the offset will be shifted to the next character.
+ * The result m_matchIndex_ and m_matchLength_ will be set to the truncated
+ * more fitting result value.
+ * Uses the temporary utility buffer for storing the modified textoffset.
+ * @param textoffset offset in the collation element text.
+ * @return true if the match is valid, false otherwise
+ */
+ private final boolean checkNextExactMatch(int textoffset)
+ {
+ int start = m_colEIter_.getOffset();
+ if (!checkNextExactContractionMatch(start, textoffset)) {
+ // returns the modified textoffset
+ m_utilBuffer_[0] = m_utilBuffer_[1];
+ return false;
+ }
+
+ start = m_utilBuffer_[0];
+ textoffset = m_utilBuffer_[1];
+ // this totally matches, however we need to check if it is repeating
+ if (!isBreakUnit(start, textoffset)
+ || checkRepeatedMatch(start, textoffset)
+ || hasAccentsBeforeMatch(start, textoffset)
+ || !checkIdentical(start, textoffset)
+ || hasAccentsAfterMatch(start, textoffset)) {
+ textoffset ++;
+ textoffset = getNextBaseOffset(textoffset);
+ m_utilBuffer_[0] = textoffset;
+ return false;
+ }
+
+ if (m_collator_.getStrength() == Collator.PRIMARY) {
+ textoffset = checkBreakBoundary(textoffset);
+ }
+
+ // totally match, we will get rid of the ending ignorables.
+ m_matchedIndex_ = start;
+ matchLength = textoffset - start;
+ return true;
+ }
+
+ /**
+ * Getting the previous base character offset, or the current offset if the
+ * current character is a base character
+ * @param text the source text to work on
+ * @param textoffset one offset after the current character
+ * @return the offset of the next character after the base character or the
+ * first composed character with accents
+ */
+ private final int getPreviousBaseOffset(CharacterIterator text,
+ int textoffset)
+ {
+ if (textoffset > m_textBeginOffset_) {
+ while (true) {
+ int result = textoffset;
+ text.setIndex(result);
+ if (UTF16.isTrailSurrogate(text.previous())) {
+ if (text.getIndex() != text.getBeginIndex() &&
+ !UTF16.isLeadSurrogate(text.previous())) {
+ text.next();
+ }
+ }
+ textoffset = text.getIndex();
+ char fcd = getFCD(text, textoffset);
+ if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) {
+ if ((fcd & LAST_BYTE_MASK_) != 0) {
+ return textoffset;
+ }
+ return result;
+ }
+ if (textoffset == m_textBeginOffset_) {
+ return m_textBeginOffset_;
+ }
+ }
+ }
+ return textoffset;
+ }
+
+ /**
+ * Getting the indexes of the accents that are not blocked in the argument
+ * accent array
+ * @param accents accents in nfd.
+ * @param accentsindex array to store the indexes of accents in accents that
+ * are not blocked
+ * @return the length of populated accentsindex
+ */
+ private int getUnblockedAccentIndex(StringBuilder accents,
+ int accentsindex[])
+ {
+ int index = 0;
+ int length = accents.length();
+ int cclass = 0;
+ int result = 0;
+ while (index < length) {
+ int codepoint = UTF16.charAt(accents, index);
+ int tempclass = UCharacter.getCombiningClass(codepoint);
+ if (tempclass != cclass) {
+ cclass = tempclass;
+ accentsindex[result] = index;
+ result ++;
+ }
+ if (UCharacter.isSupplementary(codepoint)) {
+ index += 2;
+ }
+ else {
+ index ++;
+ }
+ }
+ accentsindex[result] = length;
+ return result;
+ }
+
+ /**
+ * Appends 3 StringBuilder/CharacterIterator together into a destination
+ * string buffer.
+ * @param source1 string buffer
+ * @param source2 character iterator
+ * @param start2 start of the character iterator to merge
+ * @param end2 end of the character iterator to merge
+ * @param source3 string buffer
+ * @return appended string buffer
+ */
+ private static final StringBuilder merge(StringBuilder source1,
+ CharacterIterator source2,
+ int start2, int end2,
+ StringBuilder source3)
+ {
+ StringBuilder result = new StringBuilder();
+ if (source1 != null && source1.length() != 0) {
+ result.append(source1);
+ }
+ source2.setIndex(start2);
+ while (source2.getIndex() < end2) {
+ result.append(source2.current());
+ source2.next();
+ }
+ if (source3 != null && source3.length() != 0) {
+ result.append(source3);
+ }
+ return result;
+ }
+
+ /**
+ * Running through a collation element iterator to see if the contents
+ * matches pattern in string search data
+ * @param coleiter collation element iterator to test
+ * @return true if a match if found, false otherwise
+ */
+ private final boolean checkCollationMatch(CollationElementIterator coleiter)
+ {
+ int patternceindex = m_pattern_.m_CELength_;
+ int offset = 0;
+ while (patternceindex > 0) {
+ int ce = getCE(coleiter.next());
+ if (ce == CollationElementIterator.IGNORABLE) {
+ continue;
+ }
+ if (ce != m_pattern_.m_CE_[offset]) {
+ return false;
+ }
+ offset ++;
+ patternceindex --;
+ }
+ return true;
+ }
+
+ /**
+ * Rearranges the front accents to try matching.
+ * Prefix accents in the text will be grouped according to their combining
+ * class and the groups will be mixed and matched to try find the perfect
+ * match with the pattern.
+ * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
+ * step 1: split "\u030A\u0301" into 6 other type of potential accent
+ * substrings "\u030A", "\u0301", "\u0325", "\u030A\u0301",
+ * "\u030A\u0325", "\u0301\u0325".
+ * step 2: check if any of the generated substrings matches the pattern.
+ * Internal method, status is assumed to be success, caller has to check
+ * status before calling this method.
+ * @param start first offset of the accents to start searching
+ * @param end start of the last accent set
+ * @return DONE if a match is not found, otherwise return the starting
+ * offset of the match. Note this start includes all preceding
+ * accents.
+ */
+ private int doNextCanonicalPrefixMatch(int start, int end)
+ {
+ if ((getFCD(targetText, start) & LAST_BYTE_MASK_) == 0) {
+ // die... failed at a base character
+ return DONE;
+ }
+
+ start = targetText.getIndex(); // index changed by fcd
+ int offset = getNextBaseOffset(targetText, start);
+ start = getPreviousBaseOffset(start);
+
+ StringBuilder accents = new StringBuilder();
+ String accentstr = getString(targetText, start, offset - start);
+ // normalizing the offensive string
+ if (Normalizer.quickCheck(accentstr, Normalizer.NFD,0)
+ == Normalizer.NO) {
+ accentstr = Normalizer.decompose(accentstr, false);
+ }
+ accents.append(accentstr);
+
+ int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];
+ int accentsize = getUnblockedAccentIndex(accents, accentsindex);
+ int count = (2 << (accentsize - 1)) - 1;
+ while (count > 0) {
+ // copy the base characters
+ m_canonicalPrefixAccents_.delete(0,
+ m_canonicalPrefixAccents_.length());
+ int k = 0;
+ for (; k < accentsindex[0]; k ++) {
+ m_canonicalPrefixAccents_.append(accents.charAt(k));
+ }
+ // forming all possible canonical rearrangement by dropping
+ // sets of accents
+ for (int i = 0; i <= accentsize - 1; i ++) {
+ int mask = 1 << (accentsize - i - 1);
+ if ((count & mask) != 0) {
+ for (int j = accentsindex[i]; j < accentsindex[i + 1];
+ j ++) {
+ m_canonicalPrefixAccents_.append(accents.charAt(j));
+ }
+ }
+ }
+ StringBuilder match = merge(m_canonicalPrefixAccents_,
+ targetText, offset, end,
+ m_canonicalSuffixAccents_);
+
+ // if status is a failure, ucol_setText does nothing.
+ // run the collator iterator through this match
+ m_utilColEIter_.setText(match.toString());
+ if (checkCollationMatch(m_utilColEIter_)) {
+ return start;
+ }
+ count --;
+ }
+ return DONE;
+ }
+
+ /**
+ * Gets the offset to the safe point in text before textoffset.
+ * ie. not the middle of a contraction, swappable characters or
+ * supplementary characters.
+ * @param start offset in string
+ * @param textoffset offset in string
+ * @return offset to the previous safe character
+ */
+ private final int getPreviousSafeOffset(int start, int textoffset)
+ {
+ int result = textoffset; // first contraction character
+ targetText.setIndex(textoffset);
+ while (result >= start && m_collator_.isUnsafe(targetText.previous())) {
+ result = targetText.getIndex();
+ }
+ if (result != start) {
+ // the first contraction character is consider unsafe here
+ result = targetText.getIndex(); // originally result --;
+ }
+ return result;
+ }
+
+ /**
+ * Take the rearranged end accents and tries matching. If match failed at
+ * a seperate preceding set of accents (seperated from the rearranged on by
+ * at least a base character) then we rearrange the preceding accents and
+ * tries matching again.
+ * We allow skipping of the ends of the accent set if the ces do not match.
+ * However if the failure is found before the accent set, it fails.
+ * Internal method, status assumed to be success, caller has to check
+ * status before calling this method.
+ * @param textoffset of the start of the rearranged accent
+ * @return DONE if a match is not found, otherwise return the starting
+ * offset of the match. Note this start includes all preceding
+ * accents.
+ */
+ private int doNextCanonicalSuffixMatch(int textoffset)
+ {
+ int safelength = 0;
+ StringBuilder safetext;
+ int safeoffset = m_textBeginOffset_;
+
+ if (textoffset != m_textBeginOffset_
+ && m_canonicalSuffixAccents_.length() > 0
+ && m_collator_.isUnsafe(m_canonicalSuffixAccents_.charAt(0))) {
+ safeoffset = getPreviousSafeOffset(m_textBeginOffset_,
+ textoffset);
+ safelength = textoffset - safeoffset;
+ safetext = merge(null, targetText, safeoffset, textoffset,
+ m_canonicalSuffixAccents_);
+ }
+ else {
+ safetext = m_canonicalSuffixAccents_;
+ }
+
+ // if status is a failure, ucol_setText does nothing
+ CollationElementIterator coleiter = m_utilColEIter_;
+ coleiter.setText(safetext.toString());
+ // status checked in loop below
+
+ int ceindex = m_pattern_.m_CELength_ - 1;
+ boolean isSafe = true; // indication flag for position in safe zone
+
+ while (ceindex >= 0) {
+ int textce = coleiter.previous();
+ if (textce == CollationElementIterator.NULLORDER) {
+ // check if we have passed the safe buffer
+ if (coleiter == m_colEIter_) {
+ return DONE;
+ }
+ coleiter = m_colEIter_;
+ if (safetext != m_canonicalSuffixAccents_) {
+ safetext.delete(0, safetext.length());
+ }
+ coleiter.setExactOffset(safeoffset);
+ // status checked at the start of the loop
+ isSafe = false;
+ continue;
+ }
+ textce = getCE(textce);
+ if (textce != CollationElementIterator.IGNORABLE
+ && textce != m_pattern_.m_CE_[ceindex]) {
+ // do the beginning stuff
+ int failedoffset = coleiter.getOffset();
+ if (isSafe && failedoffset >= safelength) {
+ // alas... no hope. failed at rearranged accent set
+ return DONE;
+ }
+ else {
+ if (isSafe) {
+ failedoffset += safeoffset;
+ }
+
+ // try rearranging the front accents
+ int result = doNextCanonicalPrefixMatch(failedoffset,
+ textoffset);
+ if (result != DONE) {
+ // if status is a failure, ucol_setOffset does nothing
+ m_colEIter_.setExactOffset(result);
+ }
+ return result;
+ }
+ }
+ if (textce == m_pattern_.m_CE_[ceindex]) {
+ ceindex --;
+ }
+ }
+ // set offset here
+ if (isSafe) {
+ int result = coleiter.getOffset();
+ // sets the text iterator with the correct expansion and offset
+ int leftoverces = coleiter.m_CEBufferOffset_;
+ if (result >= safelength) {
+ result = textoffset;
+ }
+ else {
+ result += safeoffset;
+ }
+ m_colEIter_.setExactOffset(result);
+ m_colEIter_.m_CEBufferOffset_ = leftoverces;
+ return result;
+ }
+
+ return coleiter.getOffset();
+ }
+
+ /**
+ * Trying out the substring and sees if it can be a canonical match.
+ * This will try normalizing the end accents and arranging them into
+ * canonical equivalents and check their corresponding ces with the pattern
+ * ce.
+ * Suffix accents in the text will be grouped according to their combining
+ * class and the groups will be mixed and matched to try find the perfect
+ * match with the pattern.
+ * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
+ * step 1: split "\u030A\u0301" into 6 other type of potential accent
+ * substrings
+ * "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325",
+ * "\u0301\u0325".
+ * step 2: check if any of the generated substrings matches the pattern.
+ * @param textoffset end offset in the collation element text that ends with
+ * the accents to be rearranged
+ * @return true if the match is valid, false otherwise
+ */
+ private boolean doNextCanonicalMatch(int textoffset)
+ {
+ int offset = m_colEIter_.getOffset();
+ targetText.setIndex(textoffset);
+ if (UTF16.isTrailSurrogate(targetText.previous())
+ && targetText.getIndex() > m_textBeginOffset_) {
+ if (!UTF16.isLeadSurrogate(targetText.previous())) {
+ targetText.next();
+ }
+ }
+ if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) == 0) {
+ if (m_pattern_.m_hasPrefixAccents_) {
+ offset = doNextCanonicalPrefixMatch(offset, textoffset);
+ if (offset != DONE) {
+ m_colEIter_.setExactOffset(offset);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ if (!m_pattern_.m_hasSuffixAccents_) {
+ return false;
+ }
+
+ StringBuilder accents = new StringBuilder();
+ // offset to the last base character in substring to search
+ int baseoffset = getPreviousBaseOffset(targetText, textoffset);
+ // normalizing the offensive string
+ String accentstr = getString(targetText, baseoffset,
+ textoffset - baseoffset);
+ if (Normalizer.quickCheck(accentstr, Normalizer.NFD,0)
+ == Normalizer.NO) {
+ accentstr = Normalizer.decompose(accentstr, false);
+ }
+ accents.append(accentstr);
+ // status checked in loop below
+
+ int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];
+ int size = getUnblockedAccentIndex(accents, accentsindex);
+
+ // 2 power n - 1 plus the full set of accents
+ int count = (2 << (size - 1)) - 1;
+ while (count > 0) {
+ m_canonicalSuffixAccents_.delete(0,
+ m_canonicalSuffixAccents_.length());
+ // copy the base characters
+ for (int k = 0; k < accentsindex[0]; k ++) {
+ m_canonicalSuffixAccents_.append(accents.charAt(k));
+ }
+ // forming all possible canonical rearrangement by dropping
+ // sets of accents
+ for (int i = 0; i <= size - 1; i ++) {
+ int mask = 1 << (size - i - 1);
+ if ((count & mask) != 0) {
+ for (int j = accentsindex[i]; j < accentsindex[i + 1];
+ j ++) {
+ m_canonicalSuffixAccents_.append(accents.charAt(j));
+ }
+ }
+ }
+ offset = doNextCanonicalSuffixMatch(baseoffset);
+ if (offset != DONE) {
+ return true; // match found
+ }
+ count --;
+ }
+ return false;
+ }
+
+ /**
+ * Gets the previous base character offset depending on the string search
+ * pattern data
+ * @param strsrch string search data
+ * @param textoffset current offset, current character
+ * @return the offset of the next character after this base character or
+ * itself if it is a composed character with accents
+ */
+ private final int getPreviousBaseOffset(int textoffset)
+ {
+ if (m_pattern_.m_hasPrefixAccents_ && textoffset > m_textBeginOffset_) {
+ int offset = textoffset;
+ if ((getFCD(targetText, offset) >> SECOND_LAST_BYTE_SHIFT_) != 0) {
+ return getPreviousBaseOffset(targetText, textoffset);
+ }
+ }
+ return textoffset;
+ }
+
+ /**
+ * Checks match for contraction.
+ * If the match ends with a partial contraction we fail.
+ * If the match starts too far off (because of backwards iteration) we try
+ * to chip off the extra characters.
+ * Uses the temporary util buffer for return values of the modified start
+ * and end.
+ * @param start offset of potential match, to be modified if necessary
+ * @param end offset of potential match, to be modified if necessary
+ * @return true if match passes the contraction test, false otherwise.
+ */
+ private boolean checkNextCanonicalContractionMatch(int start, int end)
+ {
+ // This part checks if either ends of the match contains potential
+ // contraction. If so we'll have to iterate through them
+ char schar = 0;
+ char echar = 0;
+ if (end < m_textLimitOffset_) {
+ targetText.setIndex(end);
+ echar = targetText.current();
+ }
+ if (start < m_textLimitOffset_) {
+ targetText.setIndex(start + 1);
+ schar = targetText.current();
+ }
+ if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) {
+ int expansion = m_colEIter_.m_CEBufferOffset_;
+ boolean hasExpansion = expansion > 0;
+ m_colEIter_.setExactOffset(start);
+ int temp = start;
+ while (expansion > 0) {
+ // getting rid of the redundant ce, caused by setOffset.
+ // since backward contraction/expansion may have extra ces if
+ // we are in the normalization buffer, hasAccentsBeforeMatch
+ // would have taken care of it.
+ // E.g. the character \u01FA will have an expansion of 3, but
+ // if we are only looking for acute and ring \u030A and \u0301,
+ // we'll have to skip the first ce in the expansion buffer.
+ m_colEIter_.next();
+ if (m_colEIter_.getOffset() != temp) {
+ start = temp;
+ temp = m_colEIter_.getOffset();
+ }
+ expansion --;
+ }
+
+ int count = 0;
+ while (count < m_pattern_.m_CELength_) {
+ int ce = getCE(m_colEIter_.next());
+ // status checked below, note that if status is a failure
+ // ucol_next returns UCOL_NULLORDER
+ if (ce == CollationElementIterator.IGNORABLE) {
+ continue;
+ }
+ if (hasExpansion && count == 0
+ && m_colEIter_.getOffset() != temp) {
+ start = temp;
+ temp = m_colEIter_.getOffset();
+ }
+
+ if (count == 0 && ce != m_pattern_.m_CE_[0]) {
+ // accents may have extra starting ces, this occurs when a
+ // pure accent pattern is matched without rearrangement
+ // text \u0325\u0300 and looking for \u0300
+ int expected = m_pattern_.m_CE_[0];
+ if ((getFCD(targetText, start) & LAST_BYTE_MASK_) != 0) {
+ ce = getCE(m_colEIter_.next());
+ while (ce != expected
+ && ce != CollationElementIterator.NULLORDER
+ && m_colEIter_.getOffset() <= end) {
+ ce = getCE(m_colEIter_.next());
+ }
+ }
+ }
+ if (ce != m_pattern_.m_CE_[count]) {
+ end ++;
+ end = getNextBaseOffset(end);
+ m_utilBuffer_[0] = start;
+ m_utilBuffer_[1] = end;
+ return false;
+ }
+ count ++;
+ }
+ }
+ m_utilBuffer_[0] = start;
+ m_utilBuffer_[1] = end;
+ return true;
+ }
+
+ /**
+ * Checks and sets the match information if found.
+ * Checks
+ *
+ * the potential match does not repeat the previous match
+ * boundaries are correct
+ * potential match does not end in the middle of a contraction
+ * identical matches
+ *
+ * Otherwise the offset will be shifted to the next character.
+ * The result m_matchIndex_ and m_matchLength_ will be set to the truncated
+ * more fitting result value.
+ * Uses the temporary utility buffer for storing the modified textoffset.
+ * @param textoffset offset in the collation element text.
+ * @return true if the match is valid, false otherwise
+ */
+ private boolean checkNextCanonicalMatch(int textoffset)
+ {
+ // to ensure that the start and ends are not composite characters
+ // if we have a canonical accent match
+ if ((m_pattern_.m_hasSuffixAccents_
+ && m_canonicalSuffixAccents_.length() != 0) ||
+ (m_pattern_.m_hasPrefixAccents_
+ && m_canonicalPrefixAccents_.length() != 0)) {
+ m_matchedIndex_ = getPreviousBaseOffset(m_colEIter_.getOffset());
+ matchLength = textoffset - m_matchedIndex_;
+ return true;
+ }
+
+ int start = m_colEIter_.getOffset();
+ if (!checkNextCanonicalContractionMatch(start, textoffset)) {
+ // return the modified textoffset
+ m_utilBuffer_[0] = m_utilBuffer_[1];
+ return false;
+ }
+ start = m_utilBuffer_[0];
+ textoffset = m_utilBuffer_[1];
+ start = getPreviousBaseOffset(start);
+ // this totally matches, however we need to check if it is repeating
+ if (checkRepeatedMatch(start, textoffset)
+ || !isBreakUnit(start, textoffset)
+ || !checkIdentical(start, textoffset)) {
+ textoffset ++;
+ textoffset = getNextBaseOffset(targetText, textoffset);
+ m_utilBuffer_[0] = textoffset;
+ return false;
+ }
+
+ m_matchedIndex_ = start;
+ matchLength = textoffset - start;
+ return true;
+ }
+
+ /**
+ * Shifting the collation element iterator position forward to prepare for
+ * a preceding match. If the first character is a unsafe character, we'll
+ * only shift by 1 to capture contractions, normalization etc.
+ * @param textoffset start text position to do search
+ * @param ce the text ce which failed the match.
+ * @param patternceindex index of the ce within the pattern ce buffer which
+ * failed the match
+ * @return final offset
+ */
+ private int reverseShift(int textoffset, int ce, int patternceindex)
+ {
+ if (isOverlapping()) {
+ if (textoffset != m_textLimitOffset_) {
+ textoffset --;
+ }
+ else {
+ textoffset -= m_pattern_.m_defaultShiftSize_;
+ }
+ }
+ else {
+ if (ce != CollationElementIterator.NULLORDER) {
+ int shift = m_pattern_.m_backShift_[hash(ce)];
+
+ // this is to adjust for characters in the middle of the substring
+ // for matching that failed.
+ int adjust = patternceindex;
+ if (adjust > 1 && shift > adjust) {
+ shift -= adjust - 1;
+ }
+ textoffset -= shift;
+ }
+ else {
+ textoffset -= m_pattern_.m_defaultShiftSize_;
+ }
+ }
+
+ textoffset = getPreviousBaseOffset(textoffset);
+ return textoffset;
+ }
+
+ /**
+ * Checks match for contraction.
+ * If the match starts with a partial contraction we fail.
+ * Uses the temporary utility buffer to return the modified start and end.
+ * @param start offset of potential match, to be modified if necessary
+ * @param end offset of potential match, to be modified if necessary
+ * @return true if match passes the contraction test, false otherwise.
+ */
+ private boolean checkPreviousExactContractionMatch(int start, int end)
+ {
+ // This part checks if either ends of the match contains potential
+ // contraction. If so we'll have to iterate through them
+ char echar = 0;
+ if (end < m_textLimitOffset_) {
+ targetText.setIndex(end);
+ echar = targetText.current();
+ }
+ char schar = 0;
+ if (start + 1 < m_textLimitOffset_) {
+ targetText.setIndex(start + 1);
+ schar = targetText.current();
+ }
+ if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) {
+ // expansion suffix, what's left to iterate
+ int expansion = m_colEIter_.m_CEBufferSize_
+ - m_colEIter_.m_CEBufferOffset_;
+ boolean hasExpansion = expansion > 0;
+ m_colEIter_.setExactOffset(end);
+ int temp = end;
+ while (expansion > 0) {
+ // getting rid of the redundant ce
+ // since forward contraction/expansion may have extra ces
+ // if we are in the normalization buffer, hasAccentsBeforeMatch
+ // would have taken care of it.
+ // E.g. the character \u01FA will have an expansion of 3, but if
+ // we are only looking for A ring A\u030A, we'll have to skip the
+ // last ce in the expansion buffer
+ m_colEIter_.previous();
+ if (m_colEIter_.getOffset() != temp) {
+ end = temp;
+ temp = m_colEIter_.getOffset();
+ }
+ expansion --;
+ }
+
+ int count = m_pattern_.m_CELength_;
+ while (count > 0) {
+ int ce = getCE(m_colEIter_.previous());
+ // status checked below, note that if status is a failure
+ // ucol_previous returns UCOL_NULLORDER
+ if (ce == CollationElementIterator.IGNORABLE) {
+ continue;
+ }
+ if (hasExpansion && count == 0
+ && m_colEIter_.getOffset() != temp) {
+ end = temp;
+ temp = m_colEIter_.getOffset();
+ }
+ if (ce != m_pattern_.m_CE_[count - 1]) {
+ start --;
+ start = getPreviousBaseOffset(targetText, start);
+ m_utilBuffer_[0] = start;
+ m_utilBuffer_[1] = end;
+ return false;
+ }
+ count --;
+ }
+ }
+ m_utilBuffer_[0] = start;
+ m_utilBuffer_[1] = end;
+ return true;
+ }
+
+ /**
+ * Checks and sets the match information if found.
+ * Checks
+ *
+ * the current match does not repeat the last match
+ * boundaries are correct
+ * exact matches has no extra accents
+ * identical matches
+ *
+ * Otherwise the offset will be shifted to the preceding character.
+ * Uses the temporary utility buffer to store the modified textoffset.
+ * @param textoffset offset in the collation element text. the returned value
+ * will be the truncated start offset of the match or the new start
+ * search offset.
+ * @return true if the match is valid, false otherwise
+ */
+ private final boolean checkPreviousExactMatch(int textoffset)
+ {
+ // to ensure that the start and ends are not composite characters
+ int end = m_colEIter_.getOffset();
+ if (!checkPreviousExactContractionMatch(textoffset, end)) {
+ return false;
+ }
+ textoffset = m_utilBuffer_[0];
+ end = m_utilBuffer_[1];
+
+ // this totally matches, however we need to check if it is repeating
+ // the old match
+ if (checkRepeatedMatch(textoffset, end)
+ || !isBreakUnit(textoffset, end)
+ || hasAccentsBeforeMatch(textoffset, end)
+ || !checkIdentical(textoffset, end)
+ || hasAccentsAfterMatch(textoffset, end)) {
+ textoffset --;
+ textoffset = getPreviousBaseOffset(targetText, textoffset);
+ m_utilBuffer_[0] = textoffset;
+ return false;
+ }
+
+ if (m_collator_.getStrength() == Collator.PRIMARY) {
+ end = checkBreakBoundary(end);
+ }
+
+ m_matchedIndex_ = textoffset;
+ matchLength = end - textoffset;
+ return true;
+ }
+
+ /**
+ * Rearranges the end accents to try matching.
+ * Suffix accents in the text will be grouped according to their combining
+ * class and the groups will be mixed and matched to try find the perfect
+ * match with the pattern.
+ * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
+ * step 1: split "\u030A\u0301" into 6 other type of potential accent
+ * substrings
+ * "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325",
+ * "\u0301\u0325".
+ * step 2: check if any of the generated substrings matches the pattern.
+ * @param start offset of the first base character
+ * @param end start of the last accent set
+ * @return DONE if a match is not found, otherwise return the ending
+ * offset of the match. Note this start includes all following
+ * accents.
+ */
+ private int doPreviousCanonicalSuffixMatch(int start, int end)
+ {
+ targetText.setIndex(end);
+ if (UTF16.isTrailSurrogate(targetText.previous())
+ && targetText.getIndex() > m_textBeginOffset_) {
+ if (!UTF16.isLeadSurrogate(targetText.previous())) {
+ targetText.next();
+ }
+ }
+ if ((getFCD(targetText, targetText.getIndex()) & LAST_BYTE_MASK_) == 0) {
+ // die... failed at a base character
+ return DONE;
+ }
+ end = getNextBaseOffset(targetText, end);
+
+ StringBuilder accents = new StringBuilder();
+ int offset = getPreviousBaseOffset(targetText, end);
+ // normalizing the offensive string
+ String accentstr = getString(targetText, offset, end - offset);
+ if (Normalizer.quickCheck(accentstr, Normalizer.NFD,0)
+ == Normalizer.NO) {
+ accentstr = Normalizer.decompose(accentstr, false);
+ }
+ accents.append(accentstr);
+
+ int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];
+ int accentsize = getUnblockedAccentIndex(accents, accentsindex);
+ int count = (2 << (accentsize - 1)) - 1;
+ while (count > 0) {
+ m_canonicalSuffixAccents_.delete(0,
+ m_canonicalSuffixAccents_.length());
+ // copy the base characters
+ for (int k = 0; k < accentsindex[0]; k ++) {
+ m_canonicalSuffixAccents_.append(accents.charAt(k));
+ }
+ // forming all possible canonical rearrangement by dropping
+ // sets of accents
+ for (int i = 0; i <= accentsize - 1; i ++) {
+ int mask = 1 << (accentsize - i - 1);
+ if ((count & mask) != 0) {
+ for (int j = accentsindex[i]; j < accentsindex[i + 1];
+ j ++) {
+ m_canonicalSuffixAccents_.append(accents.charAt(j));
+ }
+ }
+ }
+ StringBuilder match = merge(m_canonicalPrefixAccents_, targetText,
+ start, offset,
+ m_canonicalSuffixAccents_);
+ // run the collator iterator through this match
+ // if status is a failure ucol_setText does nothing
+ m_utilColEIter_.setText(match.toString());
+ if (checkCollationMatch(m_utilColEIter_)) {
+ return end;
+ }
+ count --;
+ }
+ return DONE;
+ }
+
+ /**
+ * Take the rearranged start accents and tries matching. If match failed at
+ * a seperate following set of accents (seperated from the rearranged on by
+ * at least a base character) then we rearrange the preceding accents and
+ * tries matching again.
+ * We allow skipping of the ends of the accent set if the ces do not match.
+ * However if the failure is found before the accent set, it fails.
+ * Internal method, status assumed to be success, caller has to check
+ * status before calling this method.
+ * @param textoffset of the ends of the rearranged accent
+ * @return DONE if a match is not found, otherwise return the ending offset
+ * of the match. Note this start includes all following accents.
+ */
+ private int doPreviousCanonicalPrefixMatch(int textoffset)
+ {
+ // int safelength = 0;
+ StringBuilder safetext;
+ int safeoffset = textoffset;
+
+ if (textoffset > m_textBeginOffset_
+ && m_collator_.isUnsafe(m_canonicalPrefixAccents_.charAt(
+ m_canonicalPrefixAccents_.length() - 1))) {
+ safeoffset = getNextSafeOffset(textoffset, m_textLimitOffset_);
+ //safelength = safeoffset - textoffset;
+ safetext = merge(m_canonicalPrefixAccents_, targetText, textoffset,
+ safeoffset, null);
+ }
+ else {
+ safetext = m_canonicalPrefixAccents_;
+ }
+
+ // if status is a failure, ucol_setText does nothing
+ CollationElementIterator coleiter = m_utilColEIter_;
+ coleiter.setText(safetext.toString());
+ // status checked in loop below
+
+ int ceindex = 0;
+ boolean isSafe = true; // safe zone indication flag for position
+ int prefixlength = m_canonicalPrefixAccents_.length();
+
+ while (ceindex < m_pattern_.m_CELength_) {
+ int textce = coleiter.next();
+ if (textce == CollationElementIterator.NULLORDER) {
+ // check if we have passed the safe buffer
+ if (coleiter == m_colEIter_) {
+ return DONE;
+ }
+ if (safetext != m_canonicalPrefixAccents_) {
+ safetext.delete(0, safetext.length());
+ }
+ coleiter = m_colEIter_;
+ coleiter.setExactOffset(safeoffset);
+ // status checked at the start of the loop
+ isSafe = false;
+ continue;
+ }
+ textce = getCE(textce);
+ if (textce != CollationElementIterator.IGNORABLE
+ && textce != m_pattern_.m_CE_[ceindex]) {
+ // do the beginning stuff
+ int failedoffset = coleiter.getOffset();
+ if (isSafe && failedoffset <= prefixlength) {
+ // alas... no hope. failed at rearranged accent set
+ return DONE;
+ }
+ else {
+ if (isSafe) {
+ failedoffset = safeoffset - failedoffset;
+ if (safetext != m_canonicalPrefixAccents_) {
+ safetext.delete(0, safetext.length());
+ }
+ }
+
+ // try rearranging the end accents
+ int result = doPreviousCanonicalSuffixMatch(textoffset,
+ failedoffset);
+ if (result != DONE) {
+ // if status is a failure, ucol_setOffset does nothing
+ m_colEIter_.setExactOffset(result);
+ }
+ return result;
+ }
+ }
+ if (textce == m_pattern_.m_CE_[ceindex]) {
+ ceindex ++;
+ }
+ }
+ // set offset here
+ if (isSafe) {
+ int result = coleiter.getOffset();
+ // sets the text iterator here with the correct expansion and offset
+ int leftoverces = coleiter.m_CEBufferSize_
+ - coleiter.m_CEBufferOffset_;
+ if (result <= prefixlength) {
+ result = textoffset;
+ }
+ else {
+ result = textoffset + (safeoffset - result);
+ }
+ m_colEIter_.setExactOffset(result);
+ m_colEIter_.m_CEBufferOffset_ = m_colEIter_.m_CEBufferSize_
+ - leftoverces;
+ return result;
+ }
+
+ return coleiter.getOffset();
+ }
+
+ /**
+ * Trying out the substring and sees if it can be a canonical match.
+ * This will try normalizing the starting accents and arranging them into
+ * canonical equivalents and check their corresponding ces with the pattern
+ * ce.
+ * Prefix accents in the text will be grouped according to their combining
+ * class and the groups will be mixed and matched to try find the perfect
+ * match with the pattern.
+ * So for instance looking for "\u0301" in "\u030A\u0301\u0325"
+ * step 1: split "\u030A\u0301" into 6 other type of potential accent
+ * substrings
+ * "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325",
+ * "\u0301\u0325".
+ * step 2: check if any of the generated substrings matches the pattern.
+ * @param textoffset start offset in the collation element text that starts
+ * with the accents to be rearranged
+ * @return true if the match is valid, false otherwise
+ */
+ private boolean doPreviousCanonicalMatch(int textoffset)
+ {
+ int offset = m_colEIter_.getOffset();
+ if ((getFCD(targetText, textoffset) >> SECOND_LAST_BYTE_SHIFT_) == 0) {
+ if (m_pattern_.m_hasSuffixAccents_) {
+ offset = doPreviousCanonicalSuffixMatch(textoffset, offset);
+ if (offset != DONE) {
+ m_colEIter_.setExactOffset(offset);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ if (!m_pattern_.m_hasPrefixAccents_) {
+ return false;
+ }
+
+ StringBuilder accents = new StringBuilder();
+ // offset to the last base character in substring to search
+ int baseoffset = getNextBaseOffset(targetText, textoffset);
+ // normalizing the offensive string
+ String textstr = getString(targetText, textoffset,
+ baseoffset - textoffset);
+ if (Normalizer.quickCheck(textstr, Normalizer.NFD,0)
+ == Normalizer.NO) {
+ textstr = Normalizer.decompose(textstr, false);
+ }
+ accents.append(textstr);
+ // status checked in loop
+
+ int accentsindex[] = new int[INITIAL_ARRAY_SIZE_];
+ int size = getUnblockedAccentIndex(accents, accentsindex);
+
+ // 2 power n - 1 plus the full set of accents
+ int count = (2 << (size - 1)) - 1;
+ while (count > 0) {
+ m_canonicalPrefixAccents_.delete(0,
+ m_canonicalPrefixAccents_.length());
+ // copy the base characters
+ for (int k = 0; k < accentsindex[0]; k ++) {
+ m_canonicalPrefixAccents_.append(accents.charAt(k));
+ }
+ // forming all possible canonical rearrangement by dropping
+ // sets of accents
+ for (int i = 0; i <= size - 1; i ++) {
+ int mask = 1 << (size - i - 1);
+ if ((count & mask) != 0) {
+ for (int j = accentsindex[i]; j < accentsindex[i + 1];
+ j ++) {
+ m_canonicalPrefixAccents_.append(accents.charAt(j));
+ }
+ }
+ }
+ offset = doPreviousCanonicalPrefixMatch(baseoffset);
+ if (offset != DONE) {
+ return true; // match found
+ }
+ count --;
+ }
+ return false;
+ }
+
+ /**
+ * Checks match for contraction.
+ * If the match starts with a partial contraction we fail.
+ * Uses the temporary utility buffer to return the modified start and end.
+ * @param start offset of potential match, to be modified if necessary
+ * @param end offset of potential match, to be modified if necessary
+ * @return true if match passes the contraction test, false otherwise.
+ */
+ private boolean checkPreviousCanonicalContractionMatch(int start, int end)
+ {
+ int temp = end;
+ // This part checks if either ends of the match contains potential
+ // contraction. If so we'll have to iterate through them
+ char echar = 0;
+ char schar = 0;
+ if (end < m_textLimitOffset_) {
+ targetText.setIndex(end);
+ echar = targetText.current();
+ }
+ if (start + 1 < m_textLimitOffset_) {
+ targetText.setIndex(start + 1);
+ schar = targetText.current();
+ }
+ if (m_collator_.isUnsafe(echar) || m_collator_.isUnsafe(schar)) {
+ int expansion = m_colEIter_.m_CEBufferSize_
+ - m_colEIter_.m_CEBufferOffset_;
+ boolean hasExpansion = expansion > 0;
+ m_colEIter_.setExactOffset(end);
+ while (expansion > 0) {
+ // getting rid of the redundant ce
+ // since forward contraction/expansion may have extra ces
+ // if we are in the normalization buffer, hasAccentsBeforeMatch
+ // would have taken care of it.
+ // E.g. the character \u01FA will have an expansion of 3, but
+ // if we are only looking for A ring A\u030A, we'll have to
+ // skip the last ce in the expansion buffer
+ m_colEIter_.previous();
+ if (m_colEIter_.getOffset() != temp) {
+ end = temp;
+ temp = m_colEIter_.getOffset();
+ }
+ expansion --;
+ }
+
+ int count = m_pattern_.m_CELength_;
+ while (count > 0) {
+ int ce = getCE(m_colEIter_.previous());
+ // status checked below, note that if status is a failure
+ // previous() returns NULLORDER
+ if (ce == CollationElementIterator.IGNORABLE) {
+ continue;
+ }
+ if (hasExpansion && count == 0
+ && m_colEIter_.getOffset() != temp) {
+ end = temp;
+ temp = m_colEIter_.getOffset();
+ }
+ if (count == m_pattern_.m_CELength_
+ && ce != m_pattern_.m_CE_[m_pattern_.m_CELength_ - 1]) {
+ // accents may have extra starting ces, this occurs when a
+ // pure accent pattern is matched without rearrangement
+ int expected = m_pattern_.m_CE_[m_pattern_.m_CELength_ - 1];
+ targetText.setIndex(end);
+ if (UTF16.isTrailSurrogate(targetText.previous())) {
+ if (targetText.getIndex() > m_textBeginOffset_ &&
+ !UTF16.isLeadSurrogate(targetText.previous())) {
+ targetText.next();
+ }
+ }
+ end = targetText.getIndex();
+ if ((getFCD(targetText, end) & LAST_BYTE_MASK_) != 0) {
+ ce = getCE(m_colEIter_.previous());
+ while (ce != expected
+ && ce != CollationElementIterator.NULLORDER
+ && m_colEIter_.getOffset() <= start) {
+ ce = getCE(m_colEIter_.previous());
+ }
+ }
+ }
+ if (ce != m_pattern_.m_CE_[count - 1]) {
+ start --;
+ start = getPreviousBaseOffset(start);
+ m_utilBuffer_[0] = start;
+ m_utilBuffer_[1] = end;
+ return false;
+ }
+ count --;
+ }
+ }
+ m_utilBuffer_[0] = start;
+ m_utilBuffer_[1] = end;
+ return true;
+ }
+
+ /**
+ * Checks and sets the match information if found.
+ * Checks
+ *
+ * the potential match does not repeat the previous match
+ * boundaries are correct
+ * potential match does not end in the middle of a contraction
+ * identical matches
+ *
+ * Otherwise the offset will be shifted to the next character.
+ * Uses the temporary utility buffer for storing the modified textoffset.
+ * @param textoffset offset in the collation element text. the returned
+ * value will be the truncated start offset of the match or the
+ * new start search offset.
+ * @return true if the match is valid, false otherwise
+ */
+ private boolean checkPreviousCanonicalMatch(int textoffset)
+ {
+ // to ensure that the start and ends are not composite characters
+ // if we have a canonical accent match
+ if (m_pattern_.m_hasSuffixAccents_
+ && m_canonicalSuffixAccents_.length() != 0
+ || m_pattern_.m_hasPrefixAccents_
+ && m_canonicalPrefixAccents_.length() != 0) {
+ m_matchedIndex_ = textoffset;
+ matchLength = getNextBaseOffset(m_colEIter_.getOffset())
+ - textoffset;
+ return true;
+ }
+
+ int end = m_colEIter_.getOffset();
+ if (!checkPreviousCanonicalContractionMatch(textoffset, end)) {
+ // storing the modified textoffset
+ return false;
+ }
+ textoffset = m_utilBuffer_[0];
+ end = m_utilBuffer_[1];
+ end = getNextBaseOffset(end);
+ // this totally matches, however we need to check if it is repeating
+ if (checkRepeatedMatch(textoffset, end)
+ || !isBreakUnit(textoffset, end)
+ || !checkIdentical(textoffset, end)) {
+ textoffset --;
+ textoffset = getPreviousBaseOffset(textoffset);
+ m_utilBuffer_[0] = textoffset;
+ return false;
+ }
+
+ m_matchedIndex_ = textoffset;
+ matchLength = end - textoffset;
+ return true;
+ }
+
+ /**
+ * Method that does the next exact match
+ * @param start the offset to start shifting from and performing the
+ * next exact match
+ */
+ private void handleNextExact(int start)
+ {
+ int textoffset = shiftForward(start,
+ CollationElementIterator.NULLORDER,
+ m_pattern_.m_CELength_);
+ int targetce = CollationElementIterator.IGNORABLE;
+ while (textoffset <= m_textLimitOffset_) {
+ m_colEIter_.setExactOffset(textoffset);
+ int patternceindex = m_pattern_.m_CELength_ - 1;
+ boolean found = false;
+ int lastce = CollationElementIterator.NULLORDER;
+
+ while (true) {
+ // finding the last pattern ce match, imagine composite
+ // characters. for example: search for pattern A in text \u00C0
+ // we'll have to skip \u0300 the grave first before we get to A
+ targetce = m_colEIter_.previous();
+ if (targetce == CollationElementIterator.NULLORDER) {
+ found = false;
+ break;
+ }
+ targetce = getCE(targetce);
+ if (targetce == CollationElementIterator.IGNORABLE &&
+ m_colEIter_.isInBuffer()) {
+ // this is for the text \u0315\u0300 that requires
+ // normalization and pattern \u0300, where \u0315 is ignorable
+ continue;
+ }
+ if (lastce == CollationElementIterator.NULLORDER
+ || lastce == CollationElementIterator.IGNORABLE) {
+ lastce = targetce;
+ }
+ if (targetce == m_pattern_.m_CE_[patternceindex]) {
+ // the first ce can be a contraction
+ found = true;
+ break;
+ }
+ if (m_colEIter_.m_CEBufferOffset_ <= 0) {
+ found = false;
+ break;
+ }
+ }
+
+ while (found && patternceindex > 0) {
+ lastce = targetce;
+ targetce = m_colEIter_.previous();
+ if (targetce == CollationElementIterator.NULLORDER) {
+ found = false;
+ break;
+ }
+ targetce = getCE(targetce);
+ if (targetce == CollationElementIterator.IGNORABLE) {
+ continue;
+ }
+
+ patternceindex --;
+ found = found && targetce == m_pattern_.m_CE_[patternceindex];
+ }
+
+ targetce = lastce;
+
+ if (!found) {
+ textoffset = shiftForward(textoffset, lastce, patternceindex);
+ // status checked at loop.
+ patternceindex = m_pattern_.m_CELength_;
+ continue;
+ }
+
+ if (checkNextExactMatch(textoffset)) {
+ // status checked in ucol_setOffset
+ return;
+ }
+ textoffset = m_utilBuffer_[0];
+ }
+ setMatchNotFound();
+ }
+
+ /**
+ * Method that does the next canonical match
+ * @param start the offset to start shifting from and performing the
+ * next canonical match
+ */
+ private void handleNextCanonical(int start)
+ {
+ boolean hasPatternAccents =
+ m_pattern_.m_hasSuffixAccents_ || m_pattern_.m_hasPrefixAccents_;
+
+ // shifting it check for setting offset
+ // if setOffset is called previously or there was no previous match, we
+ // leave the offset as it is.
+ int textoffset = shiftForward(start, CollationElementIterator.NULLORDER,
+ m_pattern_.m_CELength_);
+ m_canonicalPrefixAccents_.delete(0, m_canonicalPrefixAccents_.length());
+ m_canonicalSuffixAccents_.delete(0, m_canonicalSuffixAccents_.length());
+ int targetce = CollationElementIterator.IGNORABLE;
+
+ while (textoffset <= m_textLimitOffset_)
+ {
+ m_colEIter_.setExactOffset(textoffset);
+ int patternceindex = m_pattern_.m_CELength_ - 1;
+ boolean found = false;
+ int lastce = CollationElementIterator.NULLORDER;
+
+ while (true) {
+ // finding the last pattern ce match, imagine composite characters
+ // for example: search for pattern A in text \u00C0
+ // we'll have to skip \u0300 the grave first before we get to A
+ targetce = m_colEIter_.previous();
+ if (targetce == CollationElementIterator.NULLORDER) {
+ found = false;
+ break;
+ }
+ targetce = getCE(targetce);
+ if (lastce == CollationElementIterator.NULLORDER
+ || lastce == CollationElementIterator.IGNORABLE) {
+ lastce = targetce;
+ }
+ if (targetce == m_pattern_.m_CE_[patternceindex]) {
+ // the first ce can be a contraction
+ found = true;
+ break;
+ }
+ if (m_colEIter_.m_CEBufferOffset_ <= 0) {
+ found = false;
+ break;
+ }
+ }
+
+ while (found && patternceindex > 0) {
+ targetce = m_colEIter_.previous();
+ if (targetce == CollationElementIterator.NULLORDER) {
+ found = false;
+ break;
+ }
+ targetce = getCE(targetce);
+ if (targetce == CollationElementIterator.IGNORABLE) {
+ continue;
+ }
+
+ patternceindex --;
+ found = found && targetce == m_pattern_.m_CE_[patternceindex];
+ }
+
+ // initializing the rearranged accent array
+ if (hasPatternAccents && !found) {
+ found = doNextCanonicalMatch(textoffset);
+ }
+
+ if (!found) {
+ textoffset = shiftForward(textoffset, lastce, patternceindex);
+ // status checked at loop
+ patternceindex = m_pattern_.m_CELength_;
+ continue;
+ }
+
+ if (checkNextCanonicalMatch(textoffset)) {
+ return;
+ }
+ textoffset = m_utilBuffer_[0];
+ }
+ setMatchNotFound();
+ }
+
+ /**
+ * Method that does the previous exact match
+ * @param start the offset to start shifting from and performing the
+ * previous exact match
+ */
+ private void handlePreviousExact(int start)
+ {
+ int textoffset = reverseShift(start, CollationElementIterator.NULLORDER,
+ m_pattern_.m_CELength_);
+ while (textoffset >= m_textBeginOffset_)
+ {
+ m_colEIter_.setExactOffset(textoffset);
+ int patternceindex = 1;
+ int targetce = CollationElementIterator.IGNORABLE;
+ boolean found = false;
+ int firstce = CollationElementIterator.NULLORDER;
+
+ while (true) {
+ // finding the first pattern ce match, imagine composite
+ // characters. for example: search for pattern \u0300 in text
+ // \u00C0, we'll have to skip A first before we get to
+ // \u0300 the grave accent
+ targetce = m_colEIter_.next();
+ if (targetce == CollationElementIterator.NULLORDER) {
+ found = false;
+ break;
+ }
+ targetce = getCE(targetce);
+ if (firstce == CollationElementIterator.NULLORDER
+ || firstce == CollationElementIterator.IGNORABLE) {
+ firstce = targetce;
+ }
+ if (targetce == CollationElementIterator.IGNORABLE && m_collator_.getStrength() != Collator.PRIMARY) {
+ continue;
+ }
+ if (targetce == m_pattern_.m_CE_[0]) {
+ found = true;
+ break;
+ }
+ if (m_colEIter_.m_CEBufferOffset_ == -1
+ || m_colEIter_.m_CEBufferOffset_
+ == m_colEIter_.m_CEBufferSize_) {
+ // checking for accents in composite character
+ found = false;
+ break;
+ }
+ }
+
+ //targetce = firstce;
+
+ while (found && patternceindex < m_pattern_.m_CELength_) {
+ firstce = targetce;
+ targetce = m_colEIter_.next();
+ if (targetce == CollationElementIterator.NULLORDER) {
+ found = false;
+ break;
+ }
+ targetce = getCE(targetce);
+ if (targetce == CollationElementIterator.IGNORABLE) {
+ continue;
+ }
+
+ found = found && targetce == m_pattern_.m_CE_[patternceindex];
+ patternceindex ++;
+ }
+
+ targetce = firstce;
+
+ if (!found) {
+ textoffset = reverseShift(textoffset, targetce, patternceindex);
+ patternceindex = 0;
+ continue;
+ }
+
+ if (checkPreviousExactMatch(textoffset)) {
+ return;
+ }
+ textoffset = m_utilBuffer_[0];
+ }
+ setMatchNotFound();
+ }
+
+ /**
+ * Method that does the previous canonical match
+ * @param start the offset to start shifting from and performing the
+ * previous canonical match
+ */
+ private void handlePreviousCanonical(int start)
+ {
+ boolean hasPatternAccents =
+ m_pattern_.m_hasSuffixAccents_ || m_pattern_.m_hasPrefixAccents_;
+
+ // shifting it check for setting offset
+ // if setOffset is called previously or there was no previous match, we
+ // leave the offset as it is.
+ int textoffset = reverseShift(start, CollationElementIterator.NULLORDER,
+ m_pattern_.m_CELength_);
+ m_canonicalPrefixAccents_.delete(0, m_canonicalPrefixAccents_.length());
+ m_canonicalSuffixAccents_.delete(0, m_canonicalSuffixAccents_.length());
+
+ while (textoffset >= m_textBeginOffset_)
+ {
+ m_colEIter_.setExactOffset(textoffset);
+ int patternceindex = 1;
+ int targetce = CollationElementIterator.IGNORABLE;
+ boolean found = false;
+ int firstce = CollationElementIterator.NULLORDER;
+
+ while (true) {
+ // finding the first pattern ce match, imagine composite
+ // characters. for example: search for pattern \u0300 in text
+ // \u00C0, we'll have to skip A first before we get to
+ // \u0300 the grave accent
+ targetce = m_colEIter_.next();
+ if (targetce == CollationElementIterator.NULLORDER) {
+ found = false;
+ break;
+ }
+ targetce = getCE(targetce);
+ if (firstce == CollationElementIterator.NULLORDER
+ || firstce == CollationElementIterator.IGNORABLE) {
+ firstce = targetce;
+ }
+
+ if (targetce == m_pattern_.m_CE_[0]) {
+ // the first ce can be a contraction
+ found = true;
+ break;
+ }
+ if (m_colEIter_.m_CEBufferOffset_ == -1
+ || m_colEIter_.m_CEBufferOffset_
+ == m_colEIter_.m_CEBufferSize_) {
+ // checking for accents in composite character
+ found = false;
+ break;
+ }
+ }
+
+ targetce = firstce;
+
+ while (found && patternceindex < m_pattern_.m_CELength_) {
+ targetce = m_colEIter_.next();
+ if (targetce == CollationElementIterator.NULLORDER) {
+ found = false;
+ break;
+ }
+ targetce = getCE(targetce);
+ if (targetce == CollationElementIterator.IGNORABLE) {
+ continue;
+ }
+
+ found = found && targetce == m_pattern_.m_CE_[patternceindex];
+ patternceindex ++;
+ }
+
+ // initializing the rearranged accent array
+ if (hasPatternAccents && !found) {
+ found = doPreviousCanonicalMatch(textoffset);
+ }
+
+ if (!found) {
+ textoffset = reverseShift(textoffset, targetce, patternceindex);
+ patternceindex = 0;
+ continue;
+ }
+
+ if (checkPreviousCanonicalMatch(textoffset)) {
+ return;
+ }
+ textoffset = m_utilBuffer_[0];
+ }
+ setMatchNotFound();
+ }
+
+ /**
+ * Gets a substring out of a CharacterIterator
+ * @param text CharacterIterator
+ * @param start start offset
+ * @param length of substring
+ * @return substring from text starting at start and length length
+ */
+ private static final String getString(CharacterIterator text, int start,
+ int length)
+ {
+ StringBuilder result = new StringBuilder(length);
+ int offset = text.getIndex();
+ text.setIndex(start);
+ for (int i = 0; i < length; i ++) {
+ result.append(text.current());
+ text.next();
+ }
+ text.setIndex(offset);
+ return result.toString();
+ }
+
+ /**
+ * Getting the mask for collation strength
+ * @param strength collation strength
+ * @return collation element mask
+ */
+ private static final int getMask(int strength)
+ {
+ switch (strength)
+ {
+ case Collator.PRIMARY:
+ return RuleBasedCollator.CE_PRIMARY_MASK_;
+ case Collator.SECONDARY:
+ return RuleBasedCollator.CE_SECONDARY_MASK_
+ | RuleBasedCollator.CE_PRIMARY_MASK_;
+ default:
+ return RuleBasedCollator.CE_TERTIARY_MASK_
+ | RuleBasedCollator.CE_SECONDARY_MASK_
+ | RuleBasedCollator.CE_PRIMARY_MASK_;
+ }
+ }
+
+ /**
+ * Sets match not found
+ */
+ private void setMatchNotFound()
+ {
+ // this method resets the match result regardless of the error status.
+ m_matchedIndex_ = DONE;
+ setMatchLength(0);
+ }
+
+ /**
+ * Check the boundaries of the match.
+ */
+ private int checkBreakBoundary(int end) {
+ if (!m_charBreakIter_.isBoundary(end)) {
+ end = m_charBreakIter_.following(end);
+ }
+ return end;
+ }
+}
diff --git a/main/classes/collate/src/com/ibm/icu/util/GlobalizationPreferences.java b/main/classes/collate/src/com/ibm/icu/util/GlobalizationPreferences.java
new file mode 100644
index 00000000000..9c7e7eec88d
--- /dev/null
+++ b/main/classes/collate/src/com/ibm/icu/util/GlobalizationPreferences.java
@@ -0,0 +1,1513 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2004-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+*/
+package com.ibm.icu.util;
+
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+
+import com.ibm.icu.impl.Utility;
+import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.Collator;
+import com.ibm.icu.text.DateFormat;
+import com.ibm.icu.text.NumberFormat;
+import com.ibm.icu.text.SimpleDateFormat;
+
+/**
+ * This convenience class provides a mechanism for bundling together different
+ * globalization preferences. It includes:
+ *
+ * A list of locales/languages in preference order
+ * A territory
+ * A currency
+ * A timezone
+ * A calendar
+ * A collator (for language-sensitive sorting, searching, and matching).
+ * Explicit overrides for date/time formats, etc.
+ *
+ * The class will heuristically compute implicit, heuristic values for the above
+ * based on available data if explicit values are not supplied. These implicit
+ * values can be presented to users for confirmation, or replacement if the
+ * values are incorrect.
+ *
+ * To reset any explicit field so that it will get heuristic values, pass in
+ * null. For example, myPreferences.setLocale(null);
+ *
+ * All of the heuristics can be customized by subclasses, by overriding
+ * getTerritory(), guessCollator(), etc.
+ *
+ * The class also supplies display names for languages, scripts, territories,
+ * currencies, timezones, etc. These are computed according to the
+ * locale/language preference list. Thus, if the preference is Breton; French;
+ * English, then the display name for a language will be returned in Breton if
+ * available, otherwise in French if available, otherwise in English.
+ *
+ * The codes used to reference territory, currency, etc. are as defined elsewhere
+ * in ICU, and are taken from CLDR (which reflects RFC 3066bis usage, ISO 4217,
+ * and the TZ Timezone database identifiers).
+ *
+ * This is at a prototype stage, and has not incorporated all the design
+ * changes that we would like yet; further feedback is welcome.
+ * Note:
+ *
+ * to get the display name for the first day of the week, use the calendar +
+ * display names.
+ * to get the work days, ask the calendar (when that is available).
+ * to get papersize / measurement system/bidi-orientation, ask the locale
+ * (when that is available there)
+ * to get the field order in a date, and whether a time is 24hour or not,
+ * ask the DateFormat (when that is available there)
+ * it will support HOST locale when it becomes available (it is a special
+ * locale that will ask the services to use the host platform's values).
+ *
+ *
+ * @draft ICU 3.6 (retainAll)
+ * @provisional This API might change or be removed in a future release.
+ */
+
+//TODO:
+// - Add Holidays
+// - Add convenience to get/take Locale as well as ULocale.
+// - Add Lenient datetime formatting when that is available.
+// - Should this be serializable?
+// - Other utilities?
+
+public class GlobalizationPreferences implements Freezable {
+
+ /**
+ * Default constructor
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences(){}
+ /**
+ * Number Format type
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int
+ NF_NUMBER = 0, // NumberFormat.NUMBERSTYLE
+ NF_CURRENCY = 1, // NumberFormat.CURRENCYSTYLE
+ NF_PERCENT = 2, // NumberFormat.PERCENTSTYLE
+ NF_SCIENTIFIC = 3, // NumberFormat.SCIENTIFICSTYLE
+ NF_INTEGER = 4; // NumberFormat.INTEGERSTYLE
+
+ private static final int NF_LIMIT = NF_INTEGER + 1;
+
+ /**
+ * Date Format type
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int
+ DF_FULL = DateFormat.FULL, // 0
+ DF_LONG = DateFormat.LONG, // 1
+ DF_MEDIUM = DateFormat.MEDIUM, // 2
+ DF_SHORT = DateFormat.SHORT, // 3
+ DF_NONE = 4;
+
+ private static final int DF_LIMIT = DF_NONE + 1;
+
+ /**
+ * For selecting a choice of display names
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int
+ ID_LOCALE = 0,
+ ID_LANGUAGE = 1,
+ ID_SCRIPT = 2,
+ ID_TERRITORY = 3,
+ ID_VARIANT = 4,
+ ID_KEYWORD = 5,
+ ID_KEYWORD_VALUE = 6,
+ ID_CURRENCY = 7,
+ ID_CURRENCY_SYMBOL = 8,
+ ID_TIMEZONE = 9;
+
+ //private static final int ID_LIMIT = ID_TIMEZONE + 1;
+
+ /**
+ * Break iterator type
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int
+ BI_CHARACTER = BreakIterator.KIND_CHARACTER, // 0
+ BI_WORD = BreakIterator.KIND_WORD, // 1
+ BI_LINE = BreakIterator.KIND_LINE, // 2
+ BI_SENTENCE = BreakIterator.KIND_SENTENCE, // 3
+ BI_TITLE = BreakIterator.KIND_TITLE; // 4
+
+ private static final int BI_LIMIT = BI_TITLE + 1;
+
+ /**
+ * Sets the language/locale priority list. If other information is
+ * not (yet) available, this is used to to produce a default value
+ * for the appropriate territory, currency, timezone, etc. The
+ * user should be given the opportunity to correct those defaults
+ * in case they are incorrect.
+ *
+ * @param inputLocales list of locales in priority order, eg {"be", "fr"}
+ * for Breton first, then French if that fails.
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setLocales(List inputLocales) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ locales = processLocales(inputLocales);
+ return this;
+ }
+
+ /**
+ * Get a copy of the language/locale priority list
+ *
+ * @return a copy of the language/locale priority list.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public List getLocales() {
+ List result;
+ if (locales == null) {
+ result = guessLocales();
+ } else {
+ result = new ArrayList();
+ result.addAll(locales);
+ }
+ return result;
+ }
+
+ /**
+ * Convenience function for getting the locales in priority order
+ * @param index The index (0..n) of the desired item.
+ * @return desired item. null if index is out of range
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public ULocale getLocale(int index) {
+ List lcls = locales;
+ if (lcls == null) {
+ lcls = guessLocales();
+ }
+ if (index >= 0 && index < lcls.size()) {
+ return lcls.get(index);
+ }
+ return null;
+ }
+
+ /**
+ * Convenience routine for setting the language/locale priority
+ * list from an array.
+ *
+ * @see #setLocales(List locales)
+ * @param uLocales list of locales in an array
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setLocales(ULocale[] uLocales) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ return setLocales(Arrays.asList(uLocales));
+ }
+
+ /**
+ * Convenience routine for setting the language/locale priority
+ * list from a single locale/language.
+ *
+ * @see #setLocales(List locales)
+ * @param uLocale single locale
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setLocale(ULocale uLocale) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ return setLocales(new ULocale[]{uLocale});
+ }
+
+ /**
+ * Convenience routine for setting the locale priority list from
+ * an Accept-Language string.
+ * @see #setLocales(List locales)
+ * @param acceptLanguageString Accept-Language list, as defined by
+ * Section 14.4 of the RFC 2616 (HTTP 1.1)
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setLocales(String acceptLanguageString) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ ULocale[] acceptLocales = null;
+ try {
+ acceptLocales = ULocale.parseAcceptLanguage(acceptLanguageString, true);
+ } catch (ParseException pe) {
+ //TODO: revisit after 3.8
+ throw new IllegalArgumentException("Invalid Accept-Language string");
+ }
+ return setLocales(acceptLocales);
+ }
+
+ /**
+ * Convenience function to get a ResourceBundle instance using
+ * the specified base name based on the language/locale priority list
+ * stored in this object.
+ *
+ * @param baseName the base name of the resource bundle, a fully qualified
+ * class name
+ * @return a resource bundle for the given base name and locale based on the
+ * language/locale priority list stored in this object
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public ResourceBundle getResourceBundle(String baseName) {
+ return getResourceBundle(baseName, null);
+ }
+
+ /**
+ * Convenience function to get a ResourceBundle instance using
+ * the specified base name and class loader based on the language/locale
+ * priority list stored in this object.
+ *
+ * @param baseName the base name of the resource bundle, a fully qualified
+ * class name
+ * @param loader the class object from which to load the resource bundle
+ * @return a resource bundle for the given base name and locale based on the
+ * language/locale priority list stored in this object
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public ResourceBundle getResourceBundle(String baseName, ClassLoader loader) {
+ UResourceBundle urb = null;
+ UResourceBundle candidate = null;
+ String actualLocaleName = null;
+ List fallbacks = getLocales();
+ for (int i = 0; i < fallbacks.size(); i++) {
+ String localeName = (fallbacks.get(i)).toString();
+ if (actualLocaleName != null && localeName.equals(actualLocaleName)) {
+ // Actual locale name in the previous round may exactly matches
+ // with the next fallback locale
+ urb = candidate;
+ break;
+ }
+ try {
+ if (loader == null) {
+ candidate = UResourceBundle.getBundleInstance(baseName, localeName);
+ }
+ else {
+ candidate = UResourceBundle.getBundleInstance(baseName, localeName, loader);
+ }
+ if (candidate != null) {
+ actualLocaleName = candidate.getULocale().getName();
+ if (actualLocaleName.equals(localeName)) {
+ urb = candidate;
+ break;
+ }
+ if (urb == null) {
+ // Preserve the available bundle as the last resort
+ urb = candidate;
+ }
+ }
+ } catch (MissingResourceException mre) {
+ actualLocaleName = null;
+ continue;
+ }
+ }
+ if (urb == null) {
+ throw new MissingResourceException("Can't find bundle for base name "
+ + baseName, baseName, "");
+ }
+ return urb;
+ }
+
+ /**
+ * Sets the territory, which is a valid territory according to for
+ * RFC 3066 (or successor). If not otherwise set, default
+ * currency and timezone values will be set from this. The user
+ * should be given the opportunity to correct those defaults in
+ * case they are incorrect.
+ *
+ * @param territory code
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setTerritory(String territory) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ this.territory = territory; // immutable, so don't need to clone
+ return this;
+ }
+
+ /**
+ * Gets the territory setting. If it wasn't explicitly set, it is
+ * computed from the general locale setting.
+ *
+ * @return territory code, explicit or implicit.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getTerritory() {
+ if (territory == null) {
+ return guessTerritory();
+ }
+ return territory; // immutable, so don't need to clone
+ }
+
+ /**
+ * Sets the currency code. If this has not been set, uses default for territory.
+ *
+ * @param currency Valid ISO 4217 currency code.
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setCurrency(Currency currency) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ this.currency = currency; // immutable, so don't need to clone
+ return this;
+ }
+
+ /**
+ * Get a copy of the currency computed according to the settings.
+ *
+ * @return currency code, explicit or implicit.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Currency getCurrency() {
+ if (currency == null) {
+ return guessCurrency();
+ }
+ return currency; // immutable, so don't have to clone
+ }
+
+ /**
+ * Sets the calendar. If this has not been set, uses default for territory.
+ *
+ * @param calendar arbitrary calendar
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setCalendar(Calendar calendar) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ this.calendar = (Calendar) calendar.clone(); // clone for safety
+ return this;
+ }
+
+ /**
+ * Get a copy of the calendar according to the settings.
+ *
+ * @return calendar explicit or implicit.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Calendar getCalendar() {
+ if (calendar == null) {
+ return guessCalendar();
+ }
+ Calendar temp = (Calendar) calendar.clone(); // clone for safety
+ temp.setTimeZone(getTimeZone());
+ temp.setTimeInMillis(System.currentTimeMillis());
+ return temp;
+ }
+
+ /**
+ * Sets the timezone ID. If this has not been set, uses default for territory.
+ *
+ * @param timezone a valid TZID (see UTS#35).
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setTimeZone(TimeZone timezone) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ this.timezone = (TimeZone) timezone.clone(); // clone for safety;
+ return this;
+ }
+
+ /**
+ * Get the timezone. It was either explicitly set, or is
+ * heuristically computed from other settings.
+ *
+ * @return timezone, either implicitly or explicitly set
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public TimeZone getTimeZone() {
+ if (timezone == null) {
+ return guessTimeZone();
+ }
+ return (TimeZone) timezone.clone(); // clone for safety
+ }
+
+ /**
+ * Get a copy of the collator according to the settings.
+ *
+ * @return collator explicit or implicit.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Collator getCollator() {
+ if (collator == null) {
+ return guessCollator();
+ }
+ try {
+ return (Collator) collator.clone(); // clone for safety
+ } catch (CloneNotSupportedException e) {
+ throw new IllegalStateException("Error in cloning collator");
+ }
+ }
+
+ /**
+ * Explicitly set the collator for this object.
+ * @param collator The collator object to be passed.
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setCollator(Collator collator) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ try {
+ this.collator = (Collator) collator.clone(); // clone for safety
+ } catch (CloneNotSupportedException e) {
+ throw new IllegalStateException("Error in cloning collator");
+ }
+ return this;
+ }
+
+ /**
+ * Get a copy of the break iterator for the specified type according to the
+ * settings.
+ *
+ * @param type break type - BI_CHARACTER or BI_WORD, BI_LINE, BI_SENTENCE, BI_TITLE
+ * @return break iterator explicit or implicit
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public BreakIterator getBreakIterator(int type) {
+ if (type < BI_CHARACTER || type >= BI_LIMIT) {
+ throw new IllegalArgumentException("Illegal break iterator type");
+ }
+ if (breakIterators == null || breakIterators[type] == null) {
+ return guessBreakIterator(type);
+ }
+ return (BreakIterator) breakIterators[type].clone(); // clone for safety
+ }
+
+ /**
+ * Explicitly set the break iterator for this object.
+ *
+ * @param type break type - BI_CHARACTER or BI_WORD, BI_LINE, BI_SENTENCE, BI_TITLE
+ * @param iterator a break iterator
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setBreakIterator(int type, BreakIterator iterator) {
+ if (type < BI_CHARACTER || type >= BI_LIMIT) {
+ throw new IllegalArgumentException("Illegal break iterator type");
+ }
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ if (breakIterators == null)
+ breakIterators = new BreakIterator[BI_LIMIT];
+ breakIterators[type] = (BreakIterator) iterator.clone(); // clone for safety
+ return this;
+ }
+
+ /**
+ * Get the display name for an ID: language, script, territory, currency, timezone...
+ * Uses the language priority list to do so.
+ *
+ * @param id language code, script code, ...
+ * @param type specifies the type of the ID: ID_LANGUAGE, etc.
+ * @return the display name
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public String getDisplayName(String id, int type) {
+ String result = id;
+ for (ULocale locale : getLocales()) {
+ if (!isAvailableLocale(locale, TYPE_GENERIC)) {
+ continue;
+ }
+ switch (type) {
+ case ID_LOCALE:
+ result = ULocale.getDisplayName(id, locale);
+ break;
+ case ID_LANGUAGE:
+ result = ULocale.getDisplayLanguage(id, locale);
+ break;
+ case ID_SCRIPT:
+ result = ULocale.getDisplayScript("und-" + id, locale);
+ break;
+ case ID_TERRITORY:
+ result = ULocale.getDisplayCountry("und-" + id, locale);
+ break;
+ case ID_VARIANT:
+ // TODO fix variant parsing
+ result = ULocale.getDisplayVariant("und-QQ-" + id, locale);
+ break;
+ case ID_KEYWORD:
+ result = ULocale.getDisplayKeyword(id, locale);
+ break;
+ case ID_KEYWORD_VALUE:
+ String[] parts = new String[2];
+ Utility.split(id,'=',parts);
+ result = ULocale.getDisplayKeywordValue("und@"+id, parts[0], locale);
+ // TODO fix to tell when successful
+ if (result.equals(parts[1])) {
+ continue;
+ }
+ break;
+ case ID_CURRENCY_SYMBOL:
+ case ID_CURRENCY:
+ Currency temp = new Currency(id);
+ result =temp.getName(locale, type==ID_CURRENCY
+ ? Currency.LONG_NAME
+ : Currency.SYMBOL_NAME, new boolean[1]);
+ // TODO: have method that doesn't take parameter. Add
+ // function to determine whether string is choice
+ // format.
+ // TODO: have method that doesn't require us
+ // to create a currency
+ break;
+ case ID_TIMEZONE:
+ SimpleDateFormat dtf = new SimpleDateFormat("vvvv",locale);
+ dtf.setTimeZone(TimeZone.getTimeZone(id));
+ result = dtf.format(new Date());
+ // TODO, have method that doesn't require us to create a timezone
+ // fix other hacks
+ // hack for couldn't match
+
+ boolean isBadStr = false;
+ // Matcher badTimeZone = Pattern.compile("[A-Z]{2}|.*\\s\\([A-Z]{2}\\)").matcher("");
+ // badtzstr = badTimeZone.reset(result).matches();
+ String teststr = result;
+ int sidx = result.indexOf('(');
+ int eidx = result.indexOf(')');
+ if (sidx != -1 && eidx != -1 && (eidx - sidx) == 3) {
+ teststr = result.substring(sidx+1, eidx);
+ }
+ if (teststr.length() == 2) {
+ isBadStr = true;
+ for (int i = 0; i < 2; i++) {
+ char c = teststr.charAt(i);
+ if (c < 'A' || 'Z' < c) {
+ isBadStr = false;
+ break;
+ }
+ }
+ }
+ if (isBadStr) {
+ continue;
+ }
+ break;
+ default:
+ throw new IllegalArgumentException("Unknown type: " + type);
+ }
+
+ // TODO need better way of seeing if we fell back to root!!
+ // This will not work at all for lots of stuff
+ if (!id.equals(result)) {
+ return result;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Set an explicit date format. Overrides the locale priority list for
+ * a particular combination of dateStyle and timeStyle. DF_NONE should
+ * be used if for the style, where only the date or time format individually
+ * is being set.
+ *
+ * @param dateStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE
+ * @param timeStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE
+ * @param format The date format
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setDateFormat(int dateStyle, int timeStyle, DateFormat format) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ if (dateFormats == null) {
+ dateFormats = new DateFormat[DF_LIMIT][DF_LIMIT];
+ }
+ dateFormats[dateStyle][timeStyle] = (DateFormat) format.clone(); // for safety
+ return this;
+ }
+
+ /**
+ * Gets a date format according to the current settings. If there
+ * is an explicit (non-null) date/time format set, a copy of that
+ * is returned. Otherwise, the language priority list is used.
+ * DF_NONE should be used for the style, where only the date or
+ * time format individually is being gotten.
+ *
+ * @param dateStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE
+ * @param timeStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE
+ * @return a DateFormat, according to the above description
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public DateFormat getDateFormat(int dateStyle, int timeStyle) {
+ if (dateStyle == DF_NONE && timeStyle == DF_NONE
+ || dateStyle < 0 || dateStyle >= DF_LIMIT
+ || timeStyle < 0 || timeStyle >= DF_LIMIT) {
+ throw new IllegalArgumentException("Illegal date format style arguments");
+ }
+ DateFormat result = null;
+ if (dateFormats != null) {
+ result = dateFormats[dateStyle][timeStyle];
+ }
+ if (result != null) {
+ result = (DateFormat) result.clone(); // clone for safety
+ // Not sure overriding configuration is what we really want...
+ result.setTimeZone(getTimeZone());
+ } else {
+ result = guessDateFormat(dateStyle, timeStyle);
+ }
+ return result;
+ }
+
+ /**
+ * Gets a number format according to the current settings. If
+ * there is an explicit (non-null) number format set, a copy of
+ * that is returned. Otherwise, the language priority list is
+ * used.
+ *
+ * @param style NF_NUMBER, NF_CURRENCY, NF_PERCENT, NF_SCIENTIFIC, NF_INTEGER
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public NumberFormat getNumberFormat(int style) {
+ if (style < 0 || style >= NF_LIMIT) {
+ throw new IllegalArgumentException("Illegal number format type");
+ }
+ NumberFormat result = null;
+ if (numberFormats != null) {
+ result = numberFormats[style];
+ }
+ if (result != null) {
+ result = (NumberFormat) result.clone(); // clone for safety (later optimize)
+ } else {
+ result = guessNumberFormat(style);
+ }
+ return result;
+ }
+
+ /**
+ * Sets a number format explicitly. Overrides the general locale settings.
+ *
+ * @param style NF_NUMBER, NF_CURRENCY, NF_PERCENT, NF_SCIENTIFIC, NF_INTEGER
+ * @param format The number format
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences setNumberFormat(int style, NumberFormat format) {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ if (numberFormats == null) {
+ numberFormats = new NumberFormat[NF_LIMIT];
+ }
+ numberFormats[style] = (NumberFormat) format.clone(); // for safety
+ return this;
+ }
+
+ /**
+ * Restore the object to the initial state.
+ *
+ * @return this, for chaining
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences reset() {
+ if (isFrozen()) {
+ throw new UnsupportedOperationException("Attempt to modify immutable object");
+ }
+ locales = null;
+ territory = null;
+ calendar = null;
+ collator = null;
+ breakIterators = null;
+ timezone = null;
+ currency = null;
+ dateFormats = null;
+ numberFormats = null;
+ implicitLocales = null;
+ return this;
+ }
+
+ /**
+ * Process a language/locale priority list specified via setLocales
.
+ * The input locale list may be expanded or re-ordered to represent the prioritized
+ * language/locale order actually used by this object by the algorithm explained
+ * below.
+ *
+ *
+ * Step 1 : Move later occurrence of more specific locale before earlier
+ * occurrence of less specific locale.
+ *
+ * Before: en, fr_FR, en_US, en_GB
+ *
+ * After: en_US, en_GB, en, fr_FR
+ *
+ *
+ * Step 2 : Append a fallback locale to each locale.
+ *
+ * Before: en_US, en_GB, en, fr_FR
+ *
+ * After: en_US, en, en_GB, en, en, fr_FR, fr
+ *
+ *
+ * Step 3 : Remove earlier occurrence of duplicated locale entries.
+ *
+ * Before: en_US, en, en_GB, en, en, fr_FR, fr
+ *
+ * After: en_US, en_GB, en, fr_FR, fr
+ *
+ *
+ * The final locale list is used to produce a default value for the appropriate territory,
+ * currency, timezone, etc. The list also represents the lookup order used in
+ * getResourceBundle
for this object. A subclass may override this method
+ * to customize the algorithm used for populating the locale list.
+ *
+ * @param inputLocales The list of input locales
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected List processLocales(List inputLocales) {
+ List result = new ArrayList();
+ /*
+ * Step 1: Relocate later occurrence of more specific locale
+ * before earlier occurrence of less specific locale.
+ *
+ * Example:
+ * Before - en_US, fr_FR, zh, en_US_Boston, zh_TW, zh_Hant, fr_CA
+ * After - en_US_Boston, en_US, fr_FR, zh_TW, zh_Hant, zh, fr_CA
+ */
+ for (int i = 0; i < inputLocales.size(); i++) {
+ ULocale uloc = inputLocales.get(i);
+
+ String language = uloc.getLanguage();
+ String script = uloc.getScript();
+ String country = uloc.getCountry();
+ String variant = uloc.getVariant();
+
+ boolean bInserted = false;
+ for (int j = 0; j < result.size(); j++) {
+ // Check if this locale is more specific
+ // than existing locale entries already inserted
+ // in the destination list
+ ULocale u = result.get(j);
+ if (!u.getLanguage().equals(language)) {
+ continue;
+ }
+ String s = u.getScript();
+ String c = u.getCountry();
+ String v = u.getVariant();
+ if (!s.equals(script)) {
+ if (s.length() == 0 && c.length() == 0 && v.length() == 0) {
+ result.add(j, uloc);
+ bInserted = true;
+ break;
+ } else if (s.length() == 0 && c.equals(country)) {
+ // We want to see zh_Hant_HK before zh_HK
+ result.add(j, uloc);
+ bInserted = true;
+ break;
+ } else if (script.length() == 0 && country.length() > 0 && c.length() == 0) {
+ // We want to see zh_HK before zh_Hant
+ result.add(j, uloc);
+ bInserted = true;
+ break;
+ }
+ continue;
+ }
+ if (!c.equals(country)) {
+ if (c.length() == 0 && v.length() == 0) {
+ result.add(j, uloc);
+ bInserted = true;
+ break;
+ }
+ }
+ if (!v.equals(variant) && v.length() == 0) {
+ result.add(j, uloc);
+ bInserted = true;
+ break;
+ }
+ }
+ if (!bInserted) {
+ // Add this locale at the end of the list
+ result.add(uloc);
+ }
+ }
+
+ // TODO: Locale aliases might be resolved here
+ // For example, zh_Hant_TW = zh_TW
+
+ /*
+ * Step 2: Append fallback locales for each entry
+ *
+ * Example:
+ * Before - en_US_Boston, en_US, fr_FR, zh_TW, zh_Hant, zh, fr_CA
+ * After - en_US_Boston, en_US, en, en_US, en, fr_FR, fr,
+ * zh_TW, zn, zh_Hant, zh, zh, fr_CA, fr
+ */
+ int index = 0;
+ while (index < result.size()) {
+ ULocale uloc = result.get(index);
+ while (true) {
+ uloc = uloc.getFallback();
+ if (uloc.getLanguage().length() == 0) {
+ break;
+ }
+ index++;
+ result.add(index, uloc);
+ }
+ index++;
+ }
+
+ /*
+ * Step 3: Remove earlier occurrence of duplicated locales
+ *
+ * Example:
+ * Before - en_US_Boston, en_US, en, en_US, en, fr_FR, fr,
+ * zh_TW, zn, zh_Hant, zh, zh, fr_CA, fr
+ * After - en_US_Boston, en_US, en, fr_FR, zh_TW, zh_Hant,
+ * zh, fr_CA, fr
+ */
+ index = 0;
+ while (index < result.size() - 1) {
+ ULocale uloc = result.get(index);
+ boolean bRemoved = false;
+ for (int i = index + 1; i < result.size(); i++) {
+ if (uloc.equals(result.get(i))) {
+ // Remove earlier one
+ result.remove(index);
+ bRemoved = true;
+ break;
+ }
+ }
+ if (!bRemoved) {
+ index++;
+ }
+ }
+ return result;
+ }
+
+
+ /**
+ * This function can be overridden by subclasses to use different heuristics.
+ * It MUST return a 'safe' value,
+ * one whose modification will not affect this object.
+ *
+ * @param dateStyle
+ * @param timeStyle
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected DateFormat guessDateFormat(int dateStyle, int timeStyle) {
+ DateFormat result;
+ ULocale dfLocale = getAvailableLocale(TYPE_DATEFORMAT);
+ if (dfLocale == null) {
+ dfLocale = ULocale.ROOT;
+ }
+ if (timeStyle == DF_NONE) {
+ result = DateFormat.getDateInstance(getCalendar(), dateStyle, dfLocale);
+ } else if (dateStyle == DF_NONE) {
+ result = DateFormat.getTimeInstance(getCalendar(), timeStyle, dfLocale);
+ } else {
+ result = DateFormat.getDateTimeInstance(getCalendar(), dateStyle, timeStyle, dfLocale);
+ }
+ return result;
+ }
+
+ /**
+ * This function can be overridden by subclasses to use different heuristics.
+ * It MUST return a 'safe' value,
+ * one whose modification will not affect this object.
+ *
+ * @param style
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected NumberFormat guessNumberFormat(int style) {
+ NumberFormat result;
+ ULocale nfLocale = getAvailableLocale(TYPE_NUMBERFORMAT);
+ if (nfLocale == null) {
+ nfLocale = ULocale.ROOT;
+ }
+ switch (style) {
+ case NF_NUMBER:
+ result = NumberFormat.getInstance(nfLocale);
+ break;
+ case NF_SCIENTIFIC:
+ result = NumberFormat.getScientificInstance(nfLocale);
+ break;
+ case NF_INTEGER:
+ result = NumberFormat.getIntegerInstance(nfLocale);
+ break;
+ case NF_PERCENT:
+ result = NumberFormat.getPercentInstance(nfLocale);
+ break;
+ case NF_CURRENCY:
+ result = NumberFormat.getCurrencyInstance(nfLocale);
+ result.setCurrency(getCurrency());
+ break;
+ default:
+ throw new IllegalArgumentException("Unknown number format style");
+ }
+ return result;
+ }
+
+ /**
+ * This function can be overridden by subclasses to use different heuristics.
+ *
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected String guessTerritory() {
+ String result;
+ // pass through locales to see if there is a territory.
+ for (ULocale locale : getLocales()) {
+ result = locale.getCountry();
+ if (result.length() != 0) {
+ return result;
+ }
+ }
+ // if not, guess from the first language tag, or maybe from
+ // intersection of languages, eg nl + fr => BE
+ // TODO: fix using real data
+ // for now, just use fixed values
+ ULocale firstLocale = getLocale(0);
+ String language = firstLocale.getLanguage();
+ String script = firstLocale.getScript();
+ result = null;
+ if (script.length() != 0) {
+ result = language_territory_hack_map.get(language + "_" + script);
+ }
+ if (result == null) {
+ result = language_territory_hack_map.get(language);
+ }
+ if (result == null) {
+ result = "US"; // need *some* default
+ }
+ return result;
+ }
+
+ /**
+ * This function can be overridden by subclasses to use different heuristics
+ *
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected Currency guessCurrency() {
+ return Currency.getInstance(new ULocale("und-" + getTerritory()));
+ }
+
+ /**
+ * This function can be overridden by subclasses to use different heuristics
+ * It MUST return a 'safe' value,
+ * one whose modification will not affect this object.
+ *
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected List guessLocales() {
+ if (implicitLocales == null) {
+ List result = new ArrayList(1);
+ result.add(ULocale.getDefault());
+ implicitLocales = processLocales(result);
+ }
+ return implicitLocales;
+ }
+
+ /**
+ * This function can be overridden by subclasses to use different heuristics.
+ * It MUST return a 'safe' value,
+ * one whose modification will not affect this object.
+ *
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected Collator guessCollator() {
+ ULocale collLocale = getAvailableLocale(TYPE_COLLATOR);
+ if (collLocale == null) {
+ collLocale = ULocale.ROOT;
+ }
+ return Collator.getInstance(collLocale);
+ }
+
+ /**
+ * This function can be overridden by subclasses to use different heuristics.
+ * It MUST return a 'safe' value,
+ * one whose modification will not affect this object.
+ *
+ * @param type
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected BreakIterator guessBreakIterator(int type) {
+ BreakIterator bitr = null;
+ ULocale brkLocale = getAvailableLocale(TYPE_BREAKITERATOR);
+ if (brkLocale == null) {
+ brkLocale = ULocale.ROOT;
+ }
+ switch (type) {
+ case BI_CHARACTER:
+ bitr = BreakIterator.getCharacterInstance(brkLocale);
+ break;
+ case BI_TITLE:
+ bitr = BreakIterator.getTitleInstance(brkLocale);
+ break;
+ case BI_WORD:
+ bitr = BreakIterator.getWordInstance(brkLocale);
+ break;
+ case BI_LINE:
+ bitr = BreakIterator.getLineInstance(brkLocale);
+ break;
+ case BI_SENTENCE:
+ bitr = BreakIterator.getSentenceInstance(brkLocale);
+ break;
+ default:
+ throw new IllegalArgumentException("Unknown break iterator type");
+ }
+ return bitr;
+ }
+
+ /**
+ * This function can be overridden by subclasses to use different heuristics.
+ * It MUST return a 'safe' value,
+ * one whose modification will not affect this object.
+ *
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected TimeZone guessTimeZone() {
+ // TODO fix using real data
+ // for single-zone countries, pick that zone
+ // for others, pick the most populous zone
+ // for now, just use fixed value
+ // NOTE: in a few cases can do better by looking at language.
+ // Eg haw+US should go to Pacific/Honolulu
+ // fr+CA should go to America/Montreal
+ String timezoneString = territory_tzid_hack_map.get(getTerritory());
+ if (timezoneString == null) {
+ String[] attempt = TimeZone.getAvailableIDs(getTerritory());
+ if (attempt.length == 0) {
+ timezoneString = "Etc/GMT"; // gotta do something
+ } else {
+ int i;
+ // this all needs to be fixed to use real data. But for now, do slightly better by skipping cruft
+ for (i = 0; i < attempt.length; ++i) {
+ if (attempt[i].indexOf("/") >= 0) break;
+ }
+ if (i > attempt.length) i = 0;
+ timezoneString = attempt[i];
+ }
+ }
+ return TimeZone.getTimeZone(timezoneString);
+ }
+
+ /**
+ * This function can be overridden by subclasses to use different heuristics.
+ * It MUST return a 'safe' value,
+ * one whose modification will not affect this object.
+ *
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected Calendar guessCalendar() {
+ ULocale calLocale = getAvailableLocale(TYPE_CALENDAR);
+ if (calLocale == null) {
+ calLocale = ULocale.US;
+ }
+ return Calendar.getInstance(getTimeZone(), calLocale);
+ }
+
+ // PRIVATES
+
+ private List locales;
+ private String territory;
+ private Currency currency;
+ private TimeZone timezone;
+ private Calendar calendar;
+ private Collator collator;
+ private BreakIterator[] breakIterators;
+ private DateFormat[][] dateFormats;
+ private NumberFormat[] numberFormats;
+ private List implicitLocales;
+
+ {
+ reset();
+ }
+
+
+ private ULocale getAvailableLocale(int type) {
+ List locs = getLocales();
+ ULocale result = null;
+ for (int i = 0; i < locs.size(); i++) {
+ ULocale l = locs.get(i);
+ if (isAvailableLocale(l, type)) {
+ result = l;
+ break;
+ }
+ }
+ return result;
+ }
+
+ private boolean isAvailableLocale(ULocale loc, int type) {
+ BitSet bits = available_locales.get(loc);
+ if (bits != null && bits.get(type)) {
+ return true;
+ }
+ return false;
+ }
+
+ /*
+ * Available locales for service types
+ */
+ private static final HashMap available_locales = new HashMap();
+ private static final int
+ TYPE_GENERIC = 0,
+ TYPE_CALENDAR = 1,
+ TYPE_DATEFORMAT= 2,
+ TYPE_NUMBERFORMAT = 3,
+ TYPE_COLLATOR = 4,
+ TYPE_BREAKITERATOR = 5,
+ TYPE_LIMIT = TYPE_BREAKITERATOR + 1;
+
+ static {
+ BitSet bits;
+ ULocale[] allLocales = ULocale.getAvailableLocales();
+ for (int i = 0; i < allLocales.length; i++) {
+ bits = new BitSet(TYPE_LIMIT);
+ available_locales.put(allLocales[i], bits);
+ bits.set(TYPE_GENERIC);
+ }
+
+ ULocale[] calLocales = Calendar.getAvailableULocales();
+ for (int i = 0; i < calLocales.length; i++) {
+ bits = available_locales.get(calLocales[i]);
+ if (bits == null) {
+ bits = new BitSet(TYPE_LIMIT);
+ available_locales.put(allLocales[i], bits);
+ }
+ bits.set(TYPE_CALENDAR);
+ }
+
+ ULocale[] dateLocales = DateFormat.getAvailableULocales();
+ for (int i = 0; i < dateLocales.length; i++) {
+ bits = available_locales.get(dateLocales[i]);
+ if (bits == null) {
+ bits = new BitSet(TYPE_LIMIT);
+ available_locales.put(allLocales[i], bits);
+ }
+ bits.set(TYPE_DATEFORMAT);
+ }
+
+ ULocale[] numLocales = NumberFormat.getAvailableULocales();
+ for (int i = 0; i < numLocales.length; i++) {
+ bits = available_locales.get(numLocales[i]);
+ if (bits == null) {
+ bits = new BitSet(TYPE_LIMIT);
+ available_locales.put(allLocales[i], bits);
+ }
+ bits.set(TYPE_NUMBERFORMAT);
+ }
+
+ ULocale[] collLocales = Collator.getAvailableULocales();
+ for (int i = 0; i < collLocales.length; i++) {
+ bits = available_locales.get(collLocales[i]);
+ if (bits == null) {
+ bits = new BitSet(TYPE_LIMIT);
+ available_locales.put(allLocales[i], bits);
+ }
+ bits.set(TYPE_COLLATOR);
+ }
+
+ ULocale[] brkLocales = BreakIterator.getAvailableULocales();
+ for (int i = 0; i < brkLocales.length; i++) {
+ bits = available_locales.get(brkLocales[i]);
+ bits.set(TYPE_BREAKITERATOR);
+ }
+ }
+
+ /** WARNING: All of this data is temporary, until we start importing from CLDR!!!
+ *
+ */
+ private static final Map language_territory_hack_map = new HashMap();
+ private static final String[][] language_territory_hack = {
+ {"af", "ZA"},
+ {"am", "ET"},
+ {"ar", "SA"},
+ {"as", "IN"},
+ {"ay", "PE"},
+ {"az", "AZ"},
+ {"bal", "PK"},
+ {"be", "BY"},
+ {"bg", "BG"},
+ {"bn", "IN"},
+ {"bs", "BA"},
+ {"ca", "ES"},
+ {"ch", "MP"},
+ {"cpe", "SL"},
+ {"cs", "CZ"},
+ {"cy", "GB"},
+ {"da", "DK"},
+ {"de", "DE"},
+ {"dv", "MV"},
+ {"dz", "BT"},
+ {"el", "GR"},
+ {"en", "US"},
+ {"es", "ES"},
+ {"et", "EE"},
+ {"eu", "ES"},
+ {"fa", "IR"},
+ {"fi", "FI"},
+ {"fil", "PH"},
+ {"fj", "FJ"},
+ {"fo", "FO"},
+ {"fr", "FR"},
+ {"ga", "IE"},
+ {"gd", "GB"},
+ {"gl", "ES"},
+ {"gn", "PY"},
+ {"gu", "IN"},
+ {"gv", "GB"},
+ {"ha", "NG"},
+ {"he", "IL"},
+ {"hi", "IN"},
+ {"ho", "PG"},
+ {"hr", "HR"},
+ {"ht", "HT"},
+ {"hu", "HU"},
+ {"hy", "AM"},
+ {"id", "ID"},
+ {"is", "IS"},
+ {"it", "IT"},
+ {"ja", "JP"},
+ {"ka", "GE"},
+ {"kk", "KZ"},
+ {"kl", "GL"},
+ {"km", "KH"},
+ {"kn", "IN"},
+ {"ko", "KR"},
+ {"kok", "IN"},
+ {"ks", "IN"},
+ {"ku", "TR"},
+ {"ky", "KG"},
+ {"la", "VA"},
+ {"lb", "LU"},
+ {"ln", "CG"},
+ {"lo", "LA"},
+ {"lt", "LT"},
+ {"lv", "LV"},
+ {"mai", "IN"},
+ {"men", "GN"},
+ {"mg", "MG"},
+ {"mh", "MH"},
+ {"mk", "MK"},
+ {"ml", "IN"},
+ {"mn", "MN"},
+ {"mni", "IN"},
+ {"mo", "MD"},
+ {"mr", "IN"},
+ {"ms", "MY"},
+ {"mt", "MT"},
+ {"my", "MM"},
+ {"na", "NR"},
+ {"nb", "NO"},
+ {"nd", "ZA"},
+ {"ne", "NP"},
+ {"niu", "NU"},
+ {"nl", "NL"},
+ {"nn", "NO"},
+ {"no", "NO"},
+ {"nr", "ZA"},
+ {"nso", "ZA"},
+ {"ny", "MW"},
+ {"om", "KE"},
+ {"or", "IN"},
+ {"pa", "IN"},
+ {"pau", "PW"},
+ {"pl", "PL"},
+ {"ps", "PK"},
+ {"pt", "BR"},
+ {"qu", "PE"},
+ {"rn", "BI"},
+ {"ro", "RO"},
+ {"ru", "RU"},
+ {"rw", "RW"},
+ {"sd", "IN"},
+ {"sg", "CF"},
+ {"si", "LK"},
+ {"sk", "SK"},
+ {"sl", "SI"},
+ {"sm", "WS"},
+ {"so", "DJ"},
+ {"sq", "CS"},
+ {"sr", "CS"},
+ {"ss", "ZA"},
+ {"st", "ZA"},
+ {"sv", "SE"},
+ {"sw", "KE"},
+ {"ta", "IN"},
+ {"te", "IN"},
+ {"tem", "SL"},
+ {"tet", "TL"},
+ {"th", "TH"},
+ {"ti", "ET"},
+ {"tg", "TJ"},
+ {"tk", "TM"},
+ {"tkl", "TK"},
+ {"tvl", "TV"},
+ {"tl", "PH"},
+ {"tn", "ZA"},
+ {"to", "TO"},
+ {"tpi", "PG"},
+ {"tr", "TR"},
+ {"ts", "ZA"},
+ {"uk", "UA"},
+ {"ur", "IN"},
+ {"uz", "UZ"},
+ {"ve", "ZA"},
+ {"vi", "VN"},
+ {"wo", "SN"},
+ {"xh", "ZA"},
+ {"zh", "CN"},
+ {"zh_Hant", "TW"},
+ {"zu", "ZA"},
+ {"aa", "ET"},
+ {"byn", "ER"},
+ {"eo", "DE"},
+ {"gez", "ET"},
+ {"haw", "US"},
+ {"iu", "CA"},
+ {"kw", "GB"},
+ {"sa", "IN"},
+ {"sh", "HR"},
+ {"sid", "ET"},
+ {"syr", "SY"},
+ {"tig", "ER"},
+ {"tt", "RU"},
+ {"wal", "ET"}, };
+ static {
+ for (int i = 0; i < language_territory_hack.length; ++i) {
+ language_territory_hack_map.put(language_territory_hack[i][0],language_territory_hack[i][1]);
+ }
+ }
+
+ static final Map territory_tzid_hack_map = new HashMap();
+ static final String[][] territory_tzid_hack = {
+ {"AQ", "Antarctica/McMurdo"},
+ {"AR", "America/Buenos_Aires"},
+ {"AU", "Australia/Sydney"},
+ {"BR", "America/Sao_Paulo"},
+ {"CA", "America/Toronto"},
+ {"CD", "Africa/Kinshasa"},
+ {"CL", "America/Santiago"},
+ {"CN", "Asia/Shanghai"},
+ {"EC", "America/Guayaquil"},
+ {"ES", "Europe/Madrid"},
+ {"GB", "Europe/London"},
+ {"GL", "America/Godthab"},
+ {"ID", "Asia/Jakarta"},
+ {"ML", "Africa/Bamako"},
+ {"MX", "America/Mexico_City"},
+ {"MY", "Asia/Kuala_Lumpur"},
+ {"NZ", "Pacific/Auckland"},
+ {"PT", "Europe/Lisbon"},
+ {"RU", "Europe/Moscow"},
+ {"UA", "Europe/Kiev"},
+ {"US", "America/New_York"},
+ {"UZ", "Asia/Tashkent"},
+ {"PF", "Pacific/Tahiti"},
+ {"FM", "Pacific/Kosrae"},
+ {"KI", "Pacific/Tarawa"},
+ {"KZ", "Asia/Almaty"},
+ {"MH", "Pacific/Majuro"},
+ {"MN", "Asia/Ulaanbaatar"},
+ {"SJ", "Arctic/Longyearbyen"},
+ {"UM", "Pacific/Midway"},
+ };
+ static {
+ for (int i = 0; i < territory_tzid_hack.length; ++i) {
+ territory_tzid_hack_map.put(territory_tzid_hack[i][0],territory_tzid_hack[i][1]);
+ }
+ }
+
+ // Freezable implementation
+
+ private boolean frozen;
+
+ /**
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean isFrozen() {
+ return frozen;
+ }
+
+ /**
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences freeze() {
+ frozen = true;
+ return this;
+ }
+
+ /**
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public GlobalizationPreferences cloneAsThawed() {
+ try {
+ GlobalizationPreferences result = (GlobalizationPreferences) clone();
+ result.frozen = false;
+ return result;
+ } catch (CloneNotSupportedException e) {
+ // will always work
+ return null;
+ }
+ }
+}
+
diff --git a/main/classes/core/.classpath b/main/classes/core/.classpath
new file mode 100644
index 00000000000..11e1777405a
--- /dev/null
+++ b/main/classes/core/.classpath
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/main/classes/core/.externalToolBuilders/copy-data-core.launch b/main/classes/core/.externalToolBuilders/copy-data-core.launch
new file mode 100644
index 00000000000..0bf20451c65
--- /dev/null
+++ b/main/classes/core/.externalToolBuilders/copy-data-core.launch
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/classes/core/.project b/main/classes/core/.project
new file mode 100644
index 00000000000..cbd1fb16590
--- /dev/null
+++ b/main/classes/core/.project
@@ -0,0 +1,28 @@
+
+
+ icu4j-core
+
+
+ icu4j-shared
+
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+
+ org.eclipse.ui.externaltools.ExternalToolBuilder
+ full,incremental,
+
+
+ LaunchConfigHandle
+ <project>/.externalToolBuilders/copy-data-core.launch
+
+
+
+
+
+ org.eclipse.jdt.core.javanature
+
+
diff --git a/main/classes/core/.settings/org.eclipse.core.resources.prefs b/main/classes/core/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 00000000000..49e0113abc0
--- /dev/null
+++ b/main/classes/core/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,3 @@
+#Fri Nov 05 14:17:53 EDT 2010
+eclipse.preferences.version=1
+encoding/=UTF-8
diff --git a/main/classes/core/.settings/org.eclipse.jdt.core.prefs b/main/classes/core/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 00000000000..e46367f90ca
--- /dev/null
+++ b/main/classes/core/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,345 @@
+#Thu Aug 27 17:47:12 EDT 2009
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.doc.comment.support=enabled
+org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
+org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning
+org.eclipse.jdt.core.compiler.problem.deadCode=warning
+org.eclipse.jdt.core.compiler.problem.deprecation=ignore
+org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
+org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
+org.eclipse.jdt.core.compiler.problem.discouragedReference=warning
+org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.fallthroughCase=warning
+org.eclipse.jdt.core.compiler.problem.fatalOptionalError=enabled
+org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
+org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning
+org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=error
+org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning
+org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning
+org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore
+org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
+org.eclipse.jdt.core.compiler.problem.invalidJavadoc=warning
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTags=enabled
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsDeprecatedRef=disabled
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsNotVisibleRef=enabled
+org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsVisibility=public
+org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
+org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning
+org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
+org.eclipse.jdt.core.compiler.problem.missingJavadocComments=ignore
+org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsVisibility=public
+org.eclipse.jdt.core.compiler.problem.missingJavadocTagDescription=all_standard_tags
+org.eclipse.jdt.core.compiler.problem.missingJavadocTags=ignore
+org.eclipse.jdt.core.compiler.problem.missingJavadocTagsOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.missingJavadocTagsVisibility=public
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning
+org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore
+org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning
+org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning
+org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
+org.eclipse.jdt.core.compiler.problem.nullReference=warning
+org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning
+org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
+org.eclipse.jdt.core.compiler.problem.rawTypeReference=warning
+org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
+org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
+org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
+org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning
+org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
+org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
+org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning
+org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=warning
+org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
+org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
+org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
+org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.unusedImport=warning
+org.eclipse.jdt.core.compiler.problem.unusedLabel=warning
+org.eclipse.jdt.core.compiler.problem.unusedLocal=warning
+org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
+org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning
+org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
+org.eclipse.jdt.core.compiler.source=1.5
+org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_assignment=0
+org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
+org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
+org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
+org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
+org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
+org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
+org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
+org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
+org.eclipse.jdt.core.formatter.blank_lines_after_package=1
+org.eclipse.jdt.core.formatter.blank_lines_before_field=0
+org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
+org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
+org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
+org.eclipse.jdt.core.formatter.blank_lines_before_method=1
+org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
+org.eclipse.jdt.core.formatter.blank_lines_before_package=0
+org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
+org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
+org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
+org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
+org.eclipse.jdt.core.formatter.comment.format_block_comments=true
+org.eclipse.jdt.core.formatter.comment.format_header=false
+org.eclipse.jdt.core.formatter.comment.format_html=true
+org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
+org.eclipse.jdt.core.formatter.comment.format_line_comments=true
+org.eclipse.jdt.core.formatter.comment.format_source_code=true
+org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
+org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
+org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
+org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert
+org.eclipse.jdt.core.formatter.comment.line_length=120
+org.eclipse.jdt.core.formatter.compact_else_if=true
+org.eclipse.jdt.core.formatter.continuation_indentation=2
+org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
+org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
+org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
+org.eclipse.jdt.core.formatter.indent_empty_lines=false
+org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
+org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
+org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
+org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false
+org.eclipse.jdt.core.formatter.indentation.size=4
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member=insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
+org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
+org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
+org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
+org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
+org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.join_lines_in_comments=true
+org.eclipse.jdt.core.formatter.join_wrapped_lines=true
+org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
+org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
+org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
+org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
+org.eclipse.jdt.core.formatter.lineSplit=120
+org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
+org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
+org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
+org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
+org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
+org.eclipse.jdt.core.formatter.tabulation.char=space
+org.eclipse.jdt.core.formatter.tabulation.size=4
+org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
+org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
diff --git a/main/classes/core/.settings/org.eclipse.jdt.ui.prefs b/main/classes/core/.settings/org.eclipse.jdt.ui.prefs
new file mode 100644
index 00000000000..646a3929f0a
--- /dev/null
+++ b/main/classes/core/.settings/org.eclipse.jdt.ui.prefs
@@ -0,0 +1,10 @@
+#Wed Jun 17 11:09:27 EDT 2009
+eclipse.preferences.version=1
+formatter_profile=_ICU4J Standard
+formatter_settings_version=11
+org.eclipse.jdt.ui.ignorelowercasenames=true
+org.eclipse.jdt.ui.importorder=java;javax;org;com;
+org.eclipse.jdt.ui.javadoc=true
+org.eclipse.jdt.ui.ondemandthreshold=99
+org.eclipse.jdt.ui.staticondemandthreshold=99
+org.eclipse.jdt.ui.text.custom_code_templates=/**\r\n * @return the ${bare_field_name}\r\n */ /**\r\n * @param ${param} the ${bare_field_name} to set\r\n */ /**\r\n * ${tags}\r\n */ /*\r\n *******************************************************************************\r\n * Copyright (C) ${year}, International Business Machines Corporation and *\r\n * others. All Rights Reserved. *\r\n *******************************************************************************\r\n */ /**\r\n * @author ${user}\r\n *\r\n * ${tags}\r\n */ /**\r\n * \r\n */ /**\r\n * ${tags}\r\n */ /* (non-Javadoc)\r\n * ${see_to_overridden}\r\n */ /**\r\n * ${tags}\r\n * ${see_to_target}\r\n */ ${filecomment}\r\n${package_declaration}\r\n\r\n${typecomment}\r\n${type_declaration} \r\n \r\n \r\n \r\n // ${todo} Auto-generated catch block\r\n${exception_var}.printStackTrace(); // ${todo} Auto-generated method stub\r\n${body_statement} ${body_statement}\r\n// ${todo} Auto-generated constructor stub return ${field}; ${field} \= ${param};
diff --git a/main/classes/core/build.properties b/main/classes/core/build.properties
new file mode 100644
index 00000000000..a21fb196196
--- /dev/null
+++ b/main/classes/core/build.properties
@@ -0,0 +1,6 @@
+#*******************************************************************************
+#* Copyright (C) 2009, International Business Machines Corporation and *
+#* others. All Rights Reserved. *
+#*******************************************************************************
+shared.dir = ../../shared
+javac.compilerarg = -Xlint:all,-deprecation,-dep-ann
diff --git a/main/classes/core/build.xml b/main/classes/core/build.xml
new file mode 100644
index 00000000000..0971e073b41
--- /dev/null
+++ b/main/classes/core/build.xml
@@ -0,0 +1,61 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/classes/core/core-build.launch b/main/classes/core/core-build.launch
new file mode 100644
index 00000000000..2b3b3d7d1ca
--- /dev/null
+++ b/main/classes/core/core-build.launch
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/classes/core/manifest.stub b/main/classes/core/manifest.stub
new file mode 100644
index 00000000000..d60cc7672d6
--- /dev/null
+++ b/main/classes/core/manifest.stub
@@ -0,0 +1,17 @@
+Manifest-Version: 1.0
+Specification-Title: International Components for Unicode for Java (core)
+Specification-Version: @SPECVERSION@
+Specification-Vendor: icu-project.org
+Implementation-Title: International Components for Unicode for Java (core)
+Implementation-Version: @IMPLVERSION@
+Implementation-Vendor: IBM Corporation
+Implementation-Vendor-Id: com.ibm
+Bundle-ManifestVersion: 2
+Bundle-Name: ICU4J core
+Bundle-Description: International Components for Unicode for Java (core)
+Bundle-SymbolicName: com.ibm.icu.core
+Bundle-Version: @IMPLVERSION@
+Bundle-Vendor: IBM Corporation
+Bundle-Copyright: @COPYRIGHT@
+Bundle-RequiredExecutionEnvironment: @EXECENV@
+Main-Class: com.ibm.icu.util.VersionInfo
diff --git a/main/classes/core/src/com/ibm/icu/ICUConfig.properties b/main/classes/core/src/com/ibm/icu/ICUConfig.properties
new file mode 100644
index 00000000000..01a5ef68d06
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/ICUConfig.properties
@@ -0,0 +1,37 @@
+#*
+#*******************************************************************************
+#* Copyright (C) 2008-2011, International Business Machines Corporation and *
+#* others. All Rights Reserved. *
+#*******************************************************************************
+#* This is the properties contains ICU runtime configuration
+#*
+
+#
+# The default TimeZone implementation type used by the ICU TimeZone
+# factory method. [ ICU | JDK ]
+#
+com.ibm.icu.util.TimeZone.DefaultTimeZoneType = ICU
+
+#
+# By default, DecimalFormat uses some internal equivalent character
+# data in addition to ones in DecimalFormatSymbols for parsing
+# decimal/grouping separators. When this property is true,
+# DecimalFormat uses separators configured by DecimalFormatSymbols only
+# and does not try to find a match in the internal equivalent character
+# data.
+#
+com.ibm.icu.text.DecimalFormat.SkipExtendedSeparatorParsing = false
+
+
+#
+# [Internal Use Only]
+# Disable resource path scan for building full locale name list
+# at run time.
+#
+com.ibm.icu.impl.ICUResourceBundle.skipRuntimeLocaleResourceScan = false
+
+#
+# [Internal Use Only]
+# Time zone names service factory
+#
+# com.ibm.icu.text.TimeZoneNames.Factory.impl = com.ibm.icu.impl.TimeZoneNamesFactoryImpl
diff --git a/main/classes/core/src/com/ibm/icu/impl/Assert.java b/main/classes/core/src/com/ibm/icu/impl/Assert.java
new file mode 100644
index 00000000000..7d8b113e6dd
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/Assert.java
@@ -0,0 +1,23 @@
+/*
+*******************************************************************************
+* Copyright (C) 2005-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*/
+package com.ibm.icu.impl;
+
+// 1.3 compatibility layer
+public class Assert {
+ public static void fail(Exception e) {
+ fail(e.toString()); // can't wrap exceptions in jdk 1.3
+ }
+ public static void fail(String msg) {
+ throw new IllegalStateException("failure '" + msg + "'");
+ }
+ public static void assrt(boolean val) {
+ if (!val) throw new IllegalStateException("assert failed");
+ }
+ public static void assrt(String msg, boolean val) {
+ if (!val) throw new IllegalStateException("assert '" + msg + "' failed");
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/BMPSet.java b/main/classes/core/src/com/ibm/icu/impl/BMPSet.java
new file mode 100644
index 00000000000..7cb471c9a2a
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/BMPSet.java
@@ -0,0 +1,500 @@
+/*
+ ******************************************************************************
+ *
+ * Copyright (C) 2009-2010, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ ******************************************************************************
+ */
+
+package com.ibm.icu.impl;
+
+import com.ibm.icu.text.UnicodeSet.SpanCondition;
+
+/*
+ * Helper class for frozen UnicodeSets, implements contains() and span() optimized for BMP code points.
+ *
+ * Latin-1: Look up bytes. 2-byte characters: Bits organized vertically. 3-byte characters: Use zero/one/mixed data
+ * per 64-block in U+0000..U+FFFF, with mixed for illegal ranges. Supplementary characters: Call contains() on the
+ * parent set.
+ */
+public final class BMPSet {
+ public static int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000);
+
+ /*
+ * One boolean ('true' or 'false') per Latin-1 character.
+ */
+ private boolean[] latin1Contains;
+
+ /*
+ * One bit per code point from U+0000..U+07FF. The bits are organized vertically; consecutive code points
+ * correspond to the same bit positions in consecutive table words. With code point parts lead=c{10..6}
+ * trail=c{5..0} it is set.contains(c)==(table7FF[trail] bit lead)
+ *
+ * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD) for faster validity checking at
+ * runtime.
+ */
+ private int[] table7FF;
+
+ /*
+ * One bit per 64 BMP code points. The bits are organized vertically; consecutive 64-code point blocks
+ * correspond to the same bit position in consecutive table words. With code point parts lead=c{15..12}
+ * t1=c{11..6} test bits (lead+16) and lead in bmpBlockBits[t1]. If the upper bit is 0, then the lower bit
+ * indicates if contains(c) for all code points in the 64-block. If the upper bit is 1, then the block is mixed
+ * and set.contains(c) must be called.
+ *
+ * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to the result of contains(FFFD) for faster
+ * validity checking at runtime.
+ */
+ private int[] bmpBlockBits;
+
+ /*
+ * Inversion list indexes for restricted binary searches in findCodePoint(), from findCodePoint(U+0800, U+1000,
+ * U+2000, .., U+F000, U+10000). U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
+ * always looked up in the bit tables. The last pair of indexes is for finding supplementary code points.
+ */
+ private int[] list4kStarts;
+
+ /*
+ * The inversion list of the parent set, for the slower contains() implementation for mixed BMP blocks and for
+ * supplementary code points. The list is terminated with list[listLength-1]=0x110000.
+ */
+ private final int[] list;
+ private final int listLength; // length used; list may be longer to minimize reallocs
+
+ public BMPSet(final int[] parentList, int parentListLength) {
+ list = parentList;
+ listLength = parentListLength;
+ latin1Contains = new boolean[0x100];
+ table7FF = new int[64];
+ bmpBlockBits = new int[64];
+ list4kStarts = new int[18];
+
+ /*
+ * Set the list indexes for binary searches for U+0800, U+1000, U+2000, .., U+F000, U+10000. U+0800 is the
+ * first 3-byte-UTF-8 code point. Lower code points are looked up in the bit tables. The last pair of
+ * indexes is for finding supplementary code points.
+ */
+ list4kStarts[0] = findCodePoint(0x800, 0, listLength - 1);
+ int i;
+ for (i = 1; i <= 0x10; ++i) {
+ list4kStarts[i] = findCodePoint(i << 12, list4kStarts[i - 1], listLength - 1);
+ }
+ list4kStarts[0x11] = listLength - 1;
+
+ initBits();
+ }
+
+ public BMPSet(final BMPSet otherBMPSet, final int[] newParentList, int newParentListLength) {
+ list = newParentList;
+ listLength = newParentListLength;
+ latin1Contains = otherBMPSet.latin1Contains.clone();
+ table7FF = otherBMPSet.table7FF.clone();
+ bmpBlockBits = otherBMPSet.bmpBlockBits.clone();
+ list4kStarts = otherBMPSet.list4kStarts.clone();
+ }
+
+ public boolean contains(int c) {
+ if (c <= 0xff) {
+ return (latin1Contains[c]);
+ } else if (c <= 0x7ff) {
+ return ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0);
+ } else if (c < 0xd800 || (c >= 0xe000 && c <= 0xffff)) {
+ int lead = c >> 12;
+ int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
+ if (twoBits <= 1) {
+ // All 64 code points with the same bits 15..6
+ // are either in the set or not.
+ return (0 != twoBits);
+ } else {
+ // Look up the code point in its 4k block of code points.
+ return containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1]);
+ }
+ } else if (c <= 0x10ffff) {
+ // surrogate or supplementary code point
+ return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
+ } else {
+ // Out-of-range code points get false, consistent with long-standing
+ // behavior of UnicodeSet.contains(c).
+ return false;
+ }
+ }
+
+ /*
+ * Span the initial substring for which each character c has spanCondition==contains(c). It must be
+ * spanCondition==0 or 1.
+ *
+ * @param start The start index
+ * @param end The end index
+ * @return The length of the span.
+ *
+ * NOTE: to reduce the overhead of function call to contains(c), it is manually inlined here. Check for
+ * sufficient length for trail unit for each surrogate pair. Handle single surrogates as surrogate code points
+ * as usual in ICU.
+ */
+ public final int span(CharSequence s, int start, int end, SpanCondition spanCondition) {
+ char c, c2;
+ int i = start;
+ int limit = Math.min(s.length(), end);
+ if (SpanCondition.NOT_CONTAINED != spanCondition) {
+ // span
+ while (i < limit) {
+ c = s.charAt(i);
+ if (c <= 0xff) {
+ if (!latin1Contains[c]) {
+ break;
+ }
+ } else if (c <= 0x7ff) {
+ if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) {
+ break;
+ }
+ } else if (c < 0xd800 ||
+ c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) {
+ int lead = c >> 12;
+ int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
+ if (twoBits <= 1) {
+ // All 64 code points with the same bits 15..6
+ // are either in the set or not.
+ if (twoBits == 0) {
+ break;
+ }
+ } else {
+ // Look up the code point in its 4k block of code points.
+ if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
+ break;
+ }
+ }
+ } else {
+ // surrogate pair
+ int supplementary = UCharacterProperty.getRawSupplementary(c, c2);
+ if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
+ break;
+ }
+ ++i;
+ }
+ ++i;
+ }
+ } else {
+ // span not
+ while (i < limit) {
+ c = s.charAt(i);
+ if (c <= 0xff) {
+ if (latin1Contains[c]) {
+ break;
+ }
+ } else if (c <= 0x7ff) {
+ if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) {
+ break;
+ }
+ } else if (c < 0xd800 ||
+ c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) {
+ int lead = c >> 12;
+ int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
+ if (twoBits <= 1) {
+ // All 64 code points with the same bits 15..6
+ // are either in the set or not.
+ if (twoBits != 0) {
+ break;
+ }
+ } else {
+ // Look up the code point in its 4k block of code points.
+ if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
+ break;
+ }
+ }
+ } else {
+ // surrogate pair
+ int supplementary = UCharacterProperty.getRawSupplementary(c, c2);
+ if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
+ break;
+ }
+ ++i;
+ }
+ ++i;
+ }
+ }
+ return i - start;
+ }
+
+ /*
+ * Symmetrical with span().
+ * Span the trailing substring for which each character c has spanCondition==contains(c). It must be s.length >=
+ * limit and spanCondition==0 or 1.
+ *
+ * @return The string index which starts the span (i.e. inclusive).
+ */
+ public final int spanBack(CharSequence s, int limit, SpanCondition spanCondition) {
+ char c, c2;
+
+ limit = Math.min(s.length(), limit);
+ if (SpanCondition.NOT_CONTAINED != spanCondition) {
+ // span
+ for (;;) {
+ c = s.charAt(--limit);
+ if (c <= 0xff) {
+ if (!latin1Contains[c]) {
+ break;
+ }
+ } else if (c <= 0x7ff) {
+ if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) {
+ break;
+ }
+ } else if (c < 0xd800 ||
+ c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) {
+ int lead = c >> 12;
+ int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
+ if (twoBits <= 1) {
+ // All 64 code points with the same bits 15..6
+ // are either in the set or not.
+ if (twoBits == 0) {
+ break;
+ }
+ } else {
+ // Look up the code point in its 4k block of code points.
+ if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
+ break;
+ }
+ }
+ } else {
+ // surrogate pair
+ int supplementary = UCharacterProperty.getRawSupplementary(c2, c);
+ if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
+ break;
+ }
+ --limit;
+ }
+ if (0 == limit) {
+ return 0;
+ }
+ }
+ } else {
+ // span not
+ for (;;) {
+ c = s.charAt(--limit);
+ if (c <= 0xff) {
+ if (latin1Contains[c]) {
+ break;
+ }
+ } else if (c <= 0x7ff) {
+ if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) {
+ break;
+ }
+ } else if (c < 0xd800 ||
+ c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) {
+ int lead = c >> 12;
+ int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
+ if (twoBits <= 1) {
+ // All 64 code points with the same bits 15..6
+ // are either in the set or not.
+ if (twoBits != 0) {
+ break;
+ }
+ } else {
+ // Look up the code point in its 4k block of code points.
+ if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
+ break;
+ }
+ }
+ } else {
+ // surrogate pair
+ int supplementary = UCharacterProperty.getRawSupplementary(c2, c);
+ if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
+ break;
+ }
+ --limit;
+ }
+ if (0 == limit) {
+ return 0;
+ }
+ }
+ }
+ return limit + 1;
+ }
+
+ /*
+ * Set bits in a bit rectangle in "vertical" bit organization. start> 6;
+ int trail = start & 0x3f;
+
+ // Set one bit indicating an all-one block.
+ int bits = 1 << lead;
+ if ((start + 1) == limit) { // Single-character shortcut.
+ table[trail] |= bits;
+ return;
+ }
+
+ int limitLead = limit >> 6;
+ int limitTrail = limit & 0x3f;
+
+ if (lead == limitLead) {
+ // Partial vertical bit column.
+ while (trail < limitTrail) {
+ table[trail++] |= bits;
+ }
+ } else {
+ // Partial vertical bit column,
+ // followed by a bit rectangle,
+ // followed by another partial vertical bit column.
+ if (trail > 0) {
+ do {
+ table[trail++] |= bits;
+ } while (trail < 64);
+ ++lead;
+ }
+ if (lead < limitLead) {
+ bits = ~((1 << lead) - 1);
+ if (limitLead < 0x20) {
+ bits &= (1 << limitLead) - 1;
+ }
+ for (trail = 0; trail < 64; ++trail) {
+ table[trail] |= bits;
+ }
+ }
+ bits = 1 << limitLead;
+ for (trail = 0; trail < limitTrail; ++trail) {
+ table[trail] |= bits;
+ }
+ }
+ }
+
+ private void initBits() {
+ int start, limit;
+ int listIndex = 0;
+
+ // Set latin1Contains[].
+ do {
+ start = list[listIndex++];
+ if (listIndex < listLength) {
+ limit = list[listIndex++];
+ } else {
+ limit = 0x110000;
+ }
+ if (start >= 0x100) {
+ break;
+ }
+ do {
+ latin1Contains[start++] = true;
+ } while (start < limit && start < 0x100);
+ } while (limit <= 0x100);
+
+ // Set table7FF[].
+ while (start < 0x800) {
+ set32x64Bits(table7FF, start, limit <= 0x800 ? limit : 0x800);
+ if (limit > 0x800) {
+ start = 0x800;
+ break;
+ }
+
+ start = list[listIndex++];
+ if (listIndex < listLength) {
+ limit = list[listIndex++];
+ } else {
+ limit = 0x110000;
+ }
+ }
+
+ // Set bmpBlockBits[].
+ int minStart = 0x800;
+ while (start < 0x10000) {
+ if (limit > 0x10000) {
+ limit = 0x10000;
+ }
+
+ if (start < minStart) {
+ start = minStart;
+ }
+ if (start < limit) { // Else: Another range entirely in a known mixed-value block.
+ if (0 != (start & 0x3f)) {
+ // Mixed-value block of 64 code points.
+ start >>= 6;
+ bmpBlockBits[start & 0x3f] |= 0x10001 << (start >> 6);
+ start = (start + 1) << 6; // Round up to the next block boundary.
+ minStart = start; // Ignore further ranges in this block.
+ }
+ if (start < limit) {
+ if (start < (limit & ~0x3f)) {
+ // Multiple all-ones blocks of 64 code points each.
+ set32x64Bits(bmpBlockBits, start >> 6, limit >> 6);
+ }
+
+ if (0 != (limit & 0x3f)) {
+ // Mixed-value block of 64 code points.
+ limit >>= 6;
+ bmpBlockBits[limit & 0x3f] |= 0x10001 << (limit >> 6);
+ limit = (limit + 1) << 6; // Round up to the next block boundary.
+ minStart = limit; // Ignore further ranges in this block.
+ }
+ }
+ }
+
+ if (limit == 0x10000) {
+ break;
+ }
+
+ start = list[listIndex++];
+ if (listIndex < listLength) {
+ limit = list[listIndex++];
+ } else {
+ limit = 0x110000;
+ }
+ }
+ }
+
+
+ /**
+ * Same as UnicodeSet.findCodePoint(int c) except that the binary search is restricted for finding code
+ * points in a certain range.
+ *
+ * For restricting the search for finding in the range start..end, pass in lo=findCodePoint(start) and
+ * hi=findCodePoint(end) with 0<=lo<=hi= hi || c >= list[hi - 1])
+ return hi;
+ // invariant: c >= list[lo]
+ // invariant: c < list[hi]
+ for (;;) {
+ int i = (lo + hi) >> 1;
+ if (i == lo) {
+ break; // Found!
+ } else if (c < list[i]) {
+ hi = i;
+ } else {
+ lo = i;
+ }
+ }
+ return hi;
+ }
+
+ private final boolean containsSlow(int c, int lo, int hi) {
+ return (0 != (findCodePoint(c, lo, hi) & 1));
+ }
+}
+
diff --git a/main/classes/core/src/com/ibm/icu/impl/BOCU.java b/main/classes/core/src/com/ibm/icu/impl/BOCU.java
new file mode 100644
index 00000000000..898eb47b846
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/BOCU.java
@@ -0,0 +1,378 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2009, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+package com.ibm.icu.impl;
+
+import com.ibm.icu.text.UCharacterIterator;
+
+/**
+ * Binary Ordered Compression for Unicode
+ *
+ * Users are strongly encouraged to read the ICU paper on
+ *
+ * BOCU before attempting to use this class.
+ *
+ * BOCU is used to compress unicode text into a stream of unsigned
+ * bytes. For many kinds of text the compression compares favorably
+ * to UTF-8, and for some kinds of text (such as CJK) it does better.
+ * The resulting bytes will compare in the same order as the original
+ * code points. The byte stream does not contain the values 0, 1, or
+ * 2.
+ *
+ * One example of a use of BOCU is in
+ * com.ibm.icu.text.Collator#getCollationKey(String) for a RuleBasedCollator object with
+ * collation strength IDENTICAL. The result CollationKey will consist of the
+ * collation order of the source string followed by the BOCU result of the
+ * source string.
+ *
+ *
+ * Unlike a UTF encoding, BOCU-compressed text is not suitable for
+ * random access.
+ *
+ * Method: Slope Detection Remember the previous code point
+ * (initial 0). For each code point in the string, encode the
+ * difference with the previous one. Similar to a UTF, the length of
+ * the byte sequence is encoded in the lead bytes. Unlike a UTF, the
+ * trail byte values may overlap with lead/single byte values. The
+ * signedness of the difference must be encoded as the most
+ * significant part.
+ *
+ * We encode differences with few bytes if their absolute values
+ * are small. For correct ordering, we must treat the entire value
+ * range -10ffff..+10ffff in ascending order, which forbids encoding
+ * the sign and the absolute value separately. Instead, we split the
+ * lead byte range in the middle and encode non-negative values going
+ * up and negative values going down.
+ *
+ * For very small absolute values, the difference is added to a
+ * middle byte value for single-byte encoded differences. For
+ * somewhat larger absolute values, the difference is divided by the
+ * number of byte values available, the modulo is used for one trail
+ * byte, and the remainder is added to a lead byte avoiding the
+ * single-byte range. For large absolute values, the difference is
+ * similarly encoded in three bytes. (Syn Wee, I need examples
+ * here.)
+ *
+ * BOCU does not use byte values 0, 1, or 2, but uses all other
+ * byte values for lead and single bytes, so that the middle range of
+ * single bytes is as large as possible.
+ *
+ * Note that the lead byte ranges overlap some, but that the
+ * sequences as a whole are well ordered. I.e., even if the lead byte
+ * is the same for sequences of different lengths, the trail bytes
+ * establish correct order. It would be possible to encode slightly
+ * larger ranges for each length (>1) by subtracting the lower bound
+ * of the range. However, that would also slow down the calculation.
+ * (Syn Wee, need an example).
+ *
+ * For the actual string encoding, an optimization moves the
+ * previous code point value to the middle of its Unicode script block
+ * to minimize the differences in same-script text runs. (Syn Wee,
+ * need an example.)
+ *
+ * @author Syn Wee Quek
+ * @since release 2.2, May 3rd 2002
+ */
+public class BOCU
+{
+ // public constructors --------------------------------------------------
+
+ // public methods -------------------------------------------------------
+
+ /**
+ * Encode the code points of a string as a sequence of bytes,
+ * preserving lexical order.
+ * The minimum size of buffer required for the compression can be
+ * preflighted by getCompressionLength(String).
+ * @param source text source
+ * @param buffer output buffer
+ * @param offset to start writing to
+ * @return end offset where the writing stopped
+ * @see #getCompressionLength(String)
+ * @exception ArrayIndexOutOfBoundsException thrown if size of buffer is
+ * too small for the output.
+ */
+ public static int compress(String source, byte buffer[], int offset)
+ {
+ int prev = 0;
+ UCharacterIterator iterator = UCharacterIterator.getInstance(source);
+ int codepoint = iterator.nextCodePoint();
+ while (codepoint != UCharacterIterator.DONE) {
+ if (prev < 0x4e00 || prev >= 0xa000) {
+ prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
+ }
+ else {
+ // Unihan U+4e00..U+9fa5:
+ // double-bytes down from the upper end
+ prev = 0x9fff - SLOPE_REACH_POS_2_;
+ }
+
+ offset = writeDiff(codepoint - prev, buffer, offset);
+ prev = codepoint;
+ codepoint = iterator.nextCodePoint();
+ }
+ return offset;
+ }
+
+ /**
+ * Return the number of bytes that compress() would write.
+ * @param source text source string
+ * @return the length of the BOCU result
+ * @see #compress(String, byte[], int)
+ */
+ public static int getCompressionLength(String source)
+ {
+ int prev = 0;
+ int result = 0;
+ UCharacterIterator iterator = UCharacterIterator.getInstance(source);
+ int codepoint = iterator.nextCodePoint();
+ while (codepoint != UCharacterIterator.DONE) {
+ if (prev < 0x4e00 || prev >= 0xa000) {
+ prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
+ }
+ else {
+ // Unihan U+4e00..U+9fa5:
+ // double-bytes down from the upper end
+ prev = 0x9fff - SLOPE_REACH_POS_2_;
+ }
+
+ codepoint = iterator.nextCodePoint();
+ result += lengthOfDiff(codepoint - prev);
+ prev = codepoint;
+ }
+ return result;
+ }
+
+ // public setter methods -------------------------------------------------
+
+ // public getter methods ------------------------------------------------
+
+ // public other methods -------------------------------------------------
+
+ // protected constructor ------------------------------------------------
+
+ // protected data members ------------------------------------------------
+
+ // protected methods -----------------------------------------------------
+
+ // private data members --------------------------------------------------
+
+ /**
+ * Do not use byte values 0, 1, 2 because they are separators in sort keys.
+ */
+ private static final int SLOPE_MIN_ = 3;
+ private static final int SLOPE_MAX_ = 0xff;
+ private static final int SLOPE_MIDDLE_ = 0x81;
+ private static final int SLOPE_TAIL_COUNT_ = SLOPE_MAX_ - SLOPE_MIN_ + 1;
+ //private static final int SLOPE_MAX_BYTES_ = 4;
+
+ /**
+ * Number of lead bytes:
+ * 1 middle byte for 0
+ * 2*80=160 single bytes for !=0
+ * 2*42=84 for double-byte values
+ * 2*3=6 for 3-byte values
+ * 2*1=2 for 4-byte values
+ *
+ * The sum must be <=SLOPE_TAIL_COUNT.
+ *
+ * Why these numbers?
+ * - There should be >=128 single-byte values to cover 128-blocks
+ * with small scripts.
+ * - There should be >=20902 single/double-byte values to cover Unihan.
+ * - It helps CJK Extension B some if there are 3-byte values that cover
+ * the distance between them and Unihan.
+ * This also helps to jump among distant places in the BMP.
+ * - Four-byte values are necessary to cover the rest of Unicode.
+ *
+ * Symmetrical lead byte counts are for convenience.
+ * With an equal distribution of even and odd differences there is also
+ * no advantage to asymmetrical lead byte counts.
+ */
+ private static final int SLOPE_SINGLE_ = 80;
+ private static final int SLOPE_LEAD_2_ = 42;
+ private static final int SLOPE_LEAD_3_ = 3;
+ //private static final int SLOPE_LEAD_4_ = 1;
+
+ /**
+ * The difference value range for single-byters.
+ */
+ private static final int SLOPE_REACH_POS_1_ = SLOPE_SINGLE_;
+ private static final int SLOPE_REACH_NEG_1_ = (-SLOPE_SINGLE_);
+
+ /**
+ * The difference value range for double-byters.
+ */
+ private static final int SLOPE_REACH_POS_2_ =
+ SLOPE_LEAD_2_ * SLOPE_TAIL_COUNT_ + SLOPE_LEAD_2_ - 1;
+ private static final int SLOPE_REACH_NEG_2_ = (-SLOPE_REACH_POS_2_ - 1);
+
+ /**
+ * The difference value range for 3-byters.
+ */
+ private static final int SLOPE_REACH_POS_3_ = SLOPE_LEAD_3_
+ * SLOPE_TAIL_COUNT_
+ * SLOPE_TAIL_COUNT_
+ + (SLOPE_LEAD_3_ - 1)
+ * SLOPE_TAIL_COUNT_ +
+ (SLOPE_TAIL_COUNT_ - 1);
+ private static final int SLOPE_REACH_NEG_3_ = (-SLOPE_REACH_POS_3_ - 1);
+
+ /**
+ * The lead byte start values.
+ */
+ private static final int SLOPE_START_POS_2_ = SLOPE_MIDDLE_
+ + SLOPE_SINGLE_ + 1;
+ private static final int SLOPE_START_POS_3_ = SLOPE_START_POS_2_
+ + SLOPE_LEAD_2_;
+ private static final int SLOPE_START_NEG_2_ = SLOPE_MIDDLE_ +
+ SLOPE_REACH_NEG_1_;
+ private static final int SLOPE_START_NEG_3_ = SLOPE_START_NEG_2_
+ - SLOPE_LEAD_2_;
+
+ // private constructor ---------------------------------------------------
+
+ /**
+ * Constructor private to prevent initialization
+ */
+ ///CLOVER:OFF
+ private BOCU()
+ {
+ }
+ ///CLOVER:ON
+
+ // private methods -------------------------------------------------------
+
+ /**
+ * Integer division and modulo with negative numerators
+ * yields negative modulo results and quotients that are one more than
+ * what we need here.
+ * @param number which operations are to be performed on
+ * @param factor the factor to use for division
+ * @return (result of division) << 32 | modulo
+ */
+ private static final long getNegDivMod(int number, int factor)
+ {
+ int modulo = number % factor;
+ long result = number / factor;
+ if (modulo < 0) {
+ -- result;
+ modulo += factor;
+ }
+ return (result << 32) | modulo;
+ }
+
+ /**
+ * Encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes,
+ * preserving lexical order
+ * @param diff
+ * @param buffer byte buffer to append to
+ * @param offset to the byte buffer to start appending
+ * @return end offset where the appending stops
+ */
+ private static final int writeDiff(int diff, byte buffer[], int offset)
+ {
+ if (diff >= SLOPE_REACH_NEG_1_) {
+ if (diff <= SLOPE_REACH_POS_1_) {
+ buffer[offset ++] = (byte)(SLOPE_MIDDLE_ + diff);
+ }
+ else if (diff <= SLOPE_REACH_POS_2_) {
+ buffer[offset ++] = (byte)(SLOPE_START_POS_2_
+ + (diff / SLOPE_TAIL_COUNT_));
+ buffer[offset ++] = (byte)(SLOPE_MIN_ +
+ (diff % SLOPE_TAIL_COUNT_));
+ }
+ else if (diff <= SLOPE_REACH_POS_3_) {
+ buffer[offset + 2] = (byte)(SLOPE_MIN_
+ + (diff % SLOPE_TAIL_COUNT_));
+ diff /= SLOPE_TAIL_COUNT_;
+ buffer[offset + 1] = (byte)(SLOPE_MIN_
+ + (diff % SLOPE_TAIL_COUNT_));
+ buffer[offset] = (byte)(SLOPE_START_POS_3_
+ + (diff / SLOPE_TAIL_COUNT_));
+ offset += 3;
+ }
+ else {
+ buffer[offset + 3] = (byte)(SLOPE_MIN_
+ + diff % SLOPE_TAIL_COUNT_);
+ diff /= SLOPE_TAIL_COUNT_;
+ buffer[offset] = (byte)(SLOPE_MIN_
+ + diff % SLOPE_TAIL_COUNT_);
+ diff /= SLOPE_TAIL_COUNT_;
+ buffer[offset + 1] = (byte)(SLOPE_MIN_
+ + diff % SLOPE_TAIL_COUNT_);
+ buffer[offset] = (byte)SLOPE_MAX_;
+ offset += 4;
+ }
+ }
+ else {
+ long division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+ int modulo = (int)division;
+ if (diff >= SLOPE_REACH_NEG_2_) {
+ diff = (int)(division >> 32);
+ buffer[offset ++] = (byte)(SLOPE_START_NEG_2_ + diff);
+ buffer[offset ++] = (byte)(SLOPE_MIN_ + modulo);
+ }
+ else if (diff >= SLOPE_REACH_NEG_3_) {
+ buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo);
+ diff = (int)(division >> 32);
+ division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+ modulo = (int)division;
+ diff = (int)(division >> 32);
+ buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo);
+ buffer[offset] = (byte)(SLOPE_START_NEG_3_ + diff);
+ offset += 3;
+ }
+ else {
+ buffer[offset + 3] = (byte)(SLOPE_MIN_ + modulo);
+ diff = (int)(division >> 32);
+ division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+ modulo = (int)division;
+ diff = (int)(division >> 32);
+ buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo);
+ division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+ modulo = (int)division;
+ buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo);
+ buffer[offset] = SLOPE_MIN_;
+ offset += 4;
+ }
+ }
+ return offset;
+ }
+
+ /**
+ * How many bytes would writeDiff() write?
+ * @param diff
+ */
+ private static final int lengthOfDiff(int diff)
+ {
+ if (diff >= SLOPE_REACH_NEG_1_) {
+ if (diff <= SLOPE_REACH_POS_1_) {
+ return 1;
+ }
+ else if (diff <= SLOPE_REACH_POS_2_) {
+ return 2;
+ }
+ else if(diff <= SLOPE_REACH_POS_3_) {
+ return 3;
+ }
+ else {
+ return 4;
+ }
+ }
+ else {
+ if (diff >= SLOPE_REACH_NEG_2_) {
+ return 2;
+ }
+ else if (diff >= SLOPE_REACH_NEG_3_) {
+ return 3;
+ }
+ else {
+ return 4;
+ }
+ }
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/CacheBase.java b/main/classes/core/src/com/ibm/icu/impl/CacheBase.java
new file mode 100644
index 00000000000..11738a12a73
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/CacheBase.java
@@ -0,0 +1,39 @@
+/*
+*******************************************************************************
+* Copyright (C) 2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*/
+package com.ibm.icu.impl;
+
+/**
+ * Base class for cache implementations.
+ * To use, instantiate a subclass of a concrete implementation class, where the subclass
+ * implements the createInstance() method, and call get() with the key and the data.
+ * The get() call will use the data only if it needs to call createInstance(),
+ * otherwise the data is ignored.
+ *
+ * @param Cache lookup key type
+ * @param Cache instance value type
+ * @param Data type for creating a new instance value
+ *
+ * @author Markus Scherer, Mark Davis
+ */
+public abstract class CacheBase {
+ /**
+ * Retrieves an instance from the cache. Calls createInstance(key, data) if the cache
+ * does not already contain an instance with this key.
+ * Ignores data if the cache already contains an instance with this key.
+ * @param key Cache lookup key for the requested instance
+ * @param data Data for createInstance() if the instance is not already cached
+ * @return The requested instance
+ */
+ public abstract V getInstance(K key, D data);
+ /**
+ * Creates an instance for the key and data. Must be overridden.
+ * @param key Cache lookup key for the requested instance
+ * @param data Data for the instance creation
+ * @return The requested instance
+ */
+ protected abstract V createInstance(K key, D data);
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/CalendarAstronomer.java b/main/classes/core/src/com/ibm/icu/impl/CalendarAstronomer.java
new file mode 100644
index 00000000000..a6f3528db7b
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/CalendarAstronomer.java
@@ -0,0 +1,1666 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.impl;
+
+import java.util.Date;
+import java.util.TimeZone;
+
+/**
+ * CalendarAstronomer
is a class that can perform the calculations to
+ * determine the positions of the sun and moon, the time of sunrise and
+ * sunset, and other astronomy-related data. The calculations it performs
+ * are in some cases quite complicated, and this utility class saves you
+ * the trouble of worrying about them.
+ *
+ * The measurement of time is a very important part of astronomy. Because
+ * astronomical bodies are constantly in motion, observations are only valid
+ * at a given moment in time. Accordingly, each CalendarAstronomer
+ * object has a time
property that determines the date
+ * and time for which its calculations are performed. You can set and
+ * retrieve this property with {@link #setDate setDate}, {@link #getDate getDate}
+ * and related methods.
+ *
+ * Almost all of the calculations performed by this class, or by any
+ * astronomer, are approximations to various degrees of accuracy. The
+ * calculations in this class are mostly modelled after those described
+ * in the book
+ *
+ * Practical Astronomy With Your Calculator , by Peter J.
+ * Duffett-Smith, Cambridge University Press, 1990. This is an excellent
+ * book, and if you want a greater understanding of how these calculations
+ * are performed it a very good, readable starting point.
+ *
+ * WARNING: This class is very early in its development, and
+ * it is highly likely that its API will change to some degree in the future.
+ * At the moment, it basically does just enough to support {@link com.ibm.icu.util.IslamicCalendar}
+ * and {@link com.ibm.icu.util.ChineseCalendar}.
+ *
+ * @author Laura Werner
+ * @author Alan Liu
+ * @internal
+ */
+public class CalendarAstronomer {
+
+ //-------------------------------------------------------------------------
+ // Astronomical constants
+ //-------------------------------------------------------------------------
+
+ /**
+ * The number of standard hours in one sidereal day.
+ * Approximately 24.93.
+ * @internal
+ */
+ public static final double SIDEREAL_DAY = 23.93446960027;
+
+ /**
+ * The number of sidereal hours in one mean solar day.
+ * Approximately 24.07.
+ * @internal
+ */
+ public static final double SOLAR_DAY = 24.065709816;
+
+ /**
+ * The average number of solar days from one new moon to the next. This is the time
+ * it takes for the moon to return the same ecliptic longitude as the sun.
+ * It is longer than the sidereal month because the sun's longitude increases
+ * during the year due to the revolution of the earth around the sun.
+ * Approximately 29.53.
+ *
+ * @see #SIDEREAL_MONTH
+ * @internal
+ */
+ public static final double SYNODIC_MONTH = 29.530588853;
+
+ /**
+ * The average number of days it takes
+ * for the moon to return to the same ecliptic longitude relative to the
+ * stellar background. This is referred to as the sidereal month.
+ * It is shorter than the synodic month due to
+ * the revolution of the earth around the sun.
+ * Approximately 27.32.
+ *
+ * @see #SYNODIC_MONTH
+ * @internal
+ */
+ public static final double SIDEREAL_MONTH = 27.32166;
+
+ /**
+ * The average number number of days between successive vernal equinoxes.
+ * Due to the precession of the earth's
+ * axis, this is not precisely the same as the sidereal year.
+ * Approximately 365.24
+ *
+ * @see #SIDEREAL_YEAR
+ * @internal
+ */
+ public static final double TROPICAL_YEAR = 365.242191;
+
+ /**
+ * The average number of days it takes
+ * for the sun to return to the same position against the fixed stellar
+ * background. This is the duration of one orbit of the earth about the sun
+ * as it would appear to an outside observer.
+ * Due to the precession of the earth's
+ * axis, this is not precisely the same as the tropical year.
+ * Approximately 365.25.
+ *
+ * @see #TROPICAL_YEAR
+ * @internal
+ */
+ public static final double SIDEREAL_YEAR = 365.25636;
+
+ //-------------------------------------------------------------------------
+ // Time-related constants
+ //-------------------------------------------------------------------------
+
+ /**
+ * The number of milliseconds in one second.
+ * @internal
+ */
+ public static final int SECOND_MS = 1000;
+
+ /**
+ * The number of milliseconds in one minute.
+ * @internal
+ */
+ public static final int MINUTE_MS = 60*SECOND_MS;
+
+ /**
+ * The number of milliseconds in one hour.
+ * @internal
+ */
+ public static final int HOUR_MS = 60*MINUTE_MS;
+
+ /**
+ * The number of milliseconds in one day.
+ * @internal
+ */
+ public static final long DAY_MS = 24*HOUR_MS;
+
+ /**
+ * The start of the julian day numbering scheme used by astronomers, which
+ * is 1/1/4713 BC (Julian), 12:00 GMT. This is given as the number of milliseconds
+ * since 1/1/1970 AD (Gregorian), a negative number.
+ * Note that julian day numbers and
+ * the Julian calendar are not the same thing. Also note that
+ * julian days start at noon , not midnight.
+ * @internal
+ */
+ public static final long JULIAN_EPOCH_MS = -210866760000000L;
+
+// static {
+// Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"));
+// cal.clear();
+// cal.set(cal.ERA, 0);
+// cal.set(cal.YEAR, 4713);
+// cal.set(cal.MONTH, cal.JANUARY);
+// cal.set(cal.DATE, 1);
+// cal.set(cal.HOUR_OF_DAY, 12);
+// System.out.println("1.5 Jan 4713 BC = " + cal.getTime().getTime());
+
+// cal.clear();
+// cal.set(cal.YEAR, 2000);
+// cal.set(cal.MONTH, cal.JANUARY);
+// cal.set(cal.DATE, 1);
+// cal.add(cal.DATE, -1);
+// System.out.println("0.0 Jan 2000 = " + cal.getTime().getTime());
+// }
+
+ /**
+ * Milliseconds value for 0.0 January 2000 AD.
+ */
+ static final long EPOCH_2000_MS = 946598400000L;
+
+ //-------------------------------------------------------------------------
+ // Assorted private data used for conversions
+ //-------------------------------------------------------------------------
+
+ // My own copies of these so compilers are more likely to optimize them away
+ static private final double PI = 3.14159265358979323846;
+ static private final double PI2 = PI * 2.0;
+
+ static private final double RAD_HOUR = 12 / PI; // radians -> hours
+ static private final double DEG_RAD = PI / 180; // degrees -> radians
+ static private final double RAD_DEG = 180 / PI; // radians -> degrees
+
+ //-------------------------------------------------------------------------
+ // Constructors
+ //-------------------------------------------------------------------------
+
+ /**
+ * Construct a new CalendarAstronomer
object that is initialized to
+ * the current date and time.
+ * @internal
+ */
+ public CalendarAstronomer() {
+ this(System.currentTimeMillis());
+ }
+
+ /**
+ * Construct a new CalendarAstronomer
object that is initialized to
+ * the specified date and time.
+ * @internal
+ */
+ public CalendarAstronomer(Date d) {
+ this(d.getTime());
+ }
+
+ /**
+ * Construct a new CalendarAstronomer
object that is initialized to
+ * the specified time. The time is expressed as a number of milliseconds since
+ * January 1, 1970 AD (Gregorian).
+ *
+ * @see java.util.Date#getTime()
+ * @internal
+ */
+ public CalendarAstronomer(long aTime) {
+ time = aTime;
+ }
+
+ /**
+ * Construct a new CalendarAstronomer
object with the given
+ * latitude and longitude. The object's time is set to the current
+ * date and time.
+ *
+ * @param longitude The desired longitude, in degrees east of
+ * the Greenwich meridian.
+ *
+ * @param latitude The desired latitude, in degrees . Positive
+ * values signify North, negative South.
+ *
+ * @see java.util.Date#getTime()
+ * @internal
+ */
+ public CalendarAstronomer(double longitude, double latitude) {
+ this();
+ fLongitude = normPI(longitude * DEG_RAD);
+ fLatitude = normPI(latitude * DEG_RAD);
+ fGmtOffset = (long)(fLongitude * 24 * HOUR_MS / PI2);
+ }
+
+
+ //-------------------------------------------------------------------------
+ // Time and date getters and setters
+ //-------------------------------------------------------------------------
+
+ /**
+ * Set the current date and time of this CalendarAstronomer
object. All
+ * astronomical calculations are performed based on this time setting.
+ *
+ * @param aTime the date and time, expressed as the number of milliseconds since
+ * 1/1/1970 0:00 GMT (Gregorian).
+ *
+ * @see #setDate
+ * @see #getTime
+ * @internal
+ */
+ public void setTime(long aTime) {
+ time = aTime;
+ clearCache();
+ }
+
+ /**
+ * Set the current date and time of this CalendarAstronomer
object. All
+ * astronomical calculations are performed based on this time setting.
+ *
+ * @param date the time and date, expressed as a Date
object.
+ *
+ * @see #setTime
+ * @see #getDate
+ * @internal
+ */
+ public void setDate(Date date) {
+ setTime(date.getTime());
+ }
+
+ /**
+ * Set the current date and time of this CalendarAstronomer
object. All
+ * astronomical calculations are performed based on this time setting.
+ *
+ * @param jdn the desired time, expressed as a "julian day number",
+ * which is the number of elapsed days since
+ * 1/1/4713 BC (Julian), 12:00 GMT. Note that julian day
+ * numbers start at noon . To get the jdn for
+ * the corresponding midnight, subtract 0.5.
+ *
+ * @see #getJulianDay
+ * @see #JULIAN_EPOCH_MS
+ * @internal
+ */
+ public void setJulianDay(double jdn) {
+ time = (long)(jdn * DAY_MS) + JULIAN_EPOCH_MS;
+ clearCache();
+ julianDay = jdn;
+ }
+
+ /**
+ * Get the current time of this CalendarAstronomer
object,
+ * represented as the number of milliseconds since
+ * 1/1/1970 AD 0:00 GMT (Gregorian).
+ *
+ * @see #setTime
+ * @see #getDate
+ * @internal
+ */
+ public long getTime() {
+ return time;
+ }
+
+ /**
+ * Get the current time of this CalendarAstronomer
object,
+ * represented as a Date
object.
+ *
+ * @see #setDate
+ * @see #getTime
+ * @internal
+ */
+ public Date getDate() {
+ return new Date(time);
+ }
+
+ /**
+ * Get the current time of this CalendarAstronomer
object,
+ * expressed as a "julian day number", which is the number of elapsed
+ * days since 1/1/4713 BC (Julian), 12:00 GMT.
+ *
+ * @see #setJulianDay
+ * @see #JULIAN_EPOCH_MS
+ * @internal
+ */
+ public double getJulianDay() {
+ if (julianDay == INVALID) {
+ julianDay = (double)(time - JULIAN_EPOCH_MS) / (double)DAY_MS;
+ }
+ return julianDay;
+ }
+
+ /**
+ * Return this object's time expressed in julian centuries:
+ * the number of centuries after 1/1/1900 AD, 12:00 GMT
+ *
+ * @see #getJulianDay
+ * @internal
+ */
+ public double getJulianCentury() {
+ if (julianCentury == INVALID) {
+ julianCentury = (getJulianDay() - 2415020.0) / 36525;
+ }
+ return julianCentury;
+ }
+
+ /**
+ * Returns the current Greenwich sidereal time, measured in hours
+ * @internal
+ */
+ public double getGreenwichSidereal() {
+ if (siderealTime == INVALID) {
+ // See page 86 of "Practial Astronomy with your Calculator",
+ // by Peter Duffet-Smith, for details on the algorithm.
+
+ double UT = normalize((double)time/HOUR_MS, 24);
+
+ siderealTime = normalize(getSiderealOffset() + UT*1.002737909, 24);
+ }
+ return siderealTime;
+ }
+
+ private double getSiderealOffset() {
+ if (siderealT0 == INVALID) {
+ double JD = Math.floor(getJulianDay() - 0.5) + 0.5;
+ double S = JD - 2451545.0;
+ double T = S / 36525.0;
+ siderealT0 = normalize(6.697374558 + 2400.051336*T + 0.000025862*T*T, 24);
+ }
+ return siderealT0;
+ }
+
+ /**
+ * Returns the current local sidereal time, measured in hours
+ * @internal
+ */
+ public double getLocalSidereal() {
+ return normalize(getGreenwichSidereal() + (double)fGmtOffset/HOUR_MS, 24);
+ }
+
+ /**
+ * Converts local sidereal time to Universal Time.
+ *
+ * @param lst The Local Sidereal Time, in hours since sidereal midnight
+ * on this object's current date.
+ *
+ * @return The corresponding Universal Time, in milliseconds since
+ * 1 Jan 1970, GMT.
+ */
+ private long lstToUT(double lst) {
+ // Convert to local mean time
+ double lt = normalize((lst - getSiderealOffset()) * 0.9972695663, 24);
+
+ // Then find local midnight on this day
+ long base = DAY_MS * ((time + fGmtOffset)/DAY_MS) - fGmtOffset;
+
+ //out(" lt =" + lt + " hours");
+ //out(" base=" + new Date(base));
+
+ return base + (long)(lt * HOUR_MS);
+ }
+
+
+ //-------------------------------------------------------------------------
+ // Coordinate transformations, all based on the current time of this object
+ //-------------------------------------------------------------------------
+
+ /**
+ * Convert from ecliptic to equatorial coordinates.
+ *
+ * @param ecliptic A point in the sky in ecliptic coordinates.
+ * @return The corresponding point in equatorial coordinates.
+ * @internal
+ */
+ public final Equatorial eclipticToEquatorial(Ecliptic ecliptic)
+ {
+ return eclipticToEquatorial(ecliptic.longitude, ecliptic.latitude);
+ }
+
+ /**
+ * Convert from ecliptic to equatorial coordinates.
+ *
+ * @param eclipLong The ecliptic longitude
+ * @param eclipLat The ecliptic latitude
+ *
+ * @return The corresponding point in equatorial coordinates.
+ * @internal
+ */
+ public final Equatorial eclipticToEquatorial(double eclipLong, double eclipLat)
+ {
+ // See page 42 of "Practial Astronomy with your Calculator",
+ // by Peter Duffet-Smith, for details on the algorithm.
+
+ double obliq = eclipticObliquity();
+ double sinE = Math.sin(obliq);
+ double cosE = Math.cos(obliq);
+
+ double sinL = Math.sin(eclipLong);
+ double cosL = Math.cos(eclipLong);
+
+ double sinB = Math.sin(eclipLat);
+ double cosB = Math.cos(eclipLat);
+ double tanB = Math.tan(eclipLat);
+
+ return new Equatorial(Math.atan2(sinL*cosE - tanB*sinE, cosL),
+ Math.asin(sinB*cosE + cosB*sinE*sinL) );
+ }
+
+ /**
+ * Convert from ecliptic longitude to equatorial coordinates.
+ *
+ * @param eclipLong The ecliptic longitude
+ *
+ * @return The corresponding point in equatorial coordinates.
+ * @internal
+ */
+ public final Equatorial eclipticToEquatorial(double eclipLong)
+ {
+ return eclipticToEquatorial(eclipLong, 0); // TODO: optimize
+ }
+
+ /**
+ * @internal
+ */
+ public Horizon eclipticToHorizon(double eclipLong)
+ {
+ Equatorial equatorial = eclipticToEquatorial(eclipLong);
+
+ double H = getLocalSidereal()*PI/12 - equatorial.ascension; // Hour-angle
+
+ double sinH = Math.sin(H);
+ double cosH = Math.cos(H);
+ double sinD = Math.sin(equatorial.declination);
+ double cosD = Math.cos(equatorial.declination);
+ double sinL = Math.sin(fLatitude);
+ double cosL = Math.cos(fLatitude);
+
+ double altitude = Math.asin(sinD*sinL + cosD*cosL*cosH);
+ double azimuth = Math.atan2(-cosD*cosL*sinH, sinD - sinL * Math.sin(altitude));
+
+ return new Horizon(azimuth, altitude);
+ }
+
+
+ //-------------------------------------------------------------------------
+ // The Sun
+ //-------------------------------------------------------------------------
+
+ //
+ // Parameters of the Sun's orbit as of the epoch Jan 0.0 1990
+ // Angles are in radians (after multiplying by PI/180)
+ //
+ static final double JD_EPOCH = 2447891.5; // Julian day of epoch
+
+ static final double SUN_ETA_G = 279.403303 * PI/180; // Ecliptic longitude at epoch
+ static final double SUN_OMEGA_G = 282.768422 * PI/180; // Ecliptic longitude of perigee
+ static final double SUN_E = 0.016713; // Eccentricity of orbit
+ //double sunR0 = 1.495585e8; // Semi-major axis in KM
+ //double sunTheta0 = 0.533128 * PI/180; // Angular diameter at R0
+
+ // The following three methods, which compute the sun parameters
+ // given above for an arbitrary epoch (whatever time the object is
+ // set to), make only a small difference as compared to using the
+ // above constants. E.g., Sunset times might differ by ~12
+ // seconds. Furthermore, the eta-g computation is befuddled by
+ // Duffet-Smith's incorrect coefficients (p.86). I've corrected
+ // the first-order coefficient but the others may be off too - no
+ // way of knowing without consulting another source.
+
+// /**
+// * Return the sun's ecliptic longitude at perigee for the current time.
+// * See Duffett-Smith, p. 86.
+// * @return radians
+// */
+// private double getSunOmegaG() {
+// double T = getJulianCentury();
+// return (281.2208444 + (1.719175 + 0.000452778*T)*T) * DEG_RAD;
+// }
+
+// /**
+// * Return the sun's ecliptic longitude for the current time.
+// * See Duffett-Smith, p. 86.
+// * @return radians
+// */
+// private double getSunEtaG() {
+// double T = getJulianCentury();
+// //return (279.6966778 + (36000.76892 + 0.0003025*T)*T) * DEG_RAD;
+// //
+// // The above line is from Duffett-Smith, and yields manifestly wrong
+// // results. The below constant is derived empirically to match the
+// // constant he gives for the 1990 EPOCH.
+// //
+// return (279.6966778 + (-0.3262541582718024 + 0.0003025*T)*T) * DEG_RAD;
+// }
+
+// /**
+// * Return the sun's eccentricity of orbit for the current time.
+// * See Duffett-Smith, p. 86.
+// * @return double
+// */
+// private double getSunE() {
+// double T = getJulianCentury();
+// return 0.01675104 - (0.0000418 + 0.000000126*T)*T;
+// }
+
+ /**
+ * The longitude of the sun at the time specified by this object.
+ * The longitude is measured in radians along the ecliptic
+ * from the "first point of Aries," the point at which the ecliptic
+ * crosses the earth's equatorial plane at the vernal equinox.
+ *
+ * Currently, this method uses an approximation of the two-body Kepler's
+ * equation for the earth and the sun. It does not take into account the
+ * perturbations caused by the other planets, the moon, etc.
+ * @internal
+ */
+ public double getSunLongitude()
+ {
+ // See page 86 of "Practial Astronomy with your Calculator",
+ // by Peter Duffet-Smith, for details on the algorithm.
+
+ if (sunLongitude == INVALID) {
+ double[] result = getSunLongitude(getJulianDay());
+ sunLongitude = result[0];
+ meanAnomalySun = result[1];
+ }
+ return sunLongitude;
+ }
+
+ /**
+ * TODO Make this public when the entire class is package-private.
+ */
+ /*public*/ double[] getSunLongitude(double julian)
+ {
+ // See page 86 of "Practial Astronomy with your Calculator",
+ // by Peter Duffet-Smith, for details on the algorithm.
+
+ double day = julian - JD_EPOCH; // Days since epoch
+
+ // Find the angular distance the sun in a fictitious
+ // circular orbit has travelled since the epoch.
+ double epochAngle = norm2PI(PI2/TROPICAL_YEAR*day);
+
+ // The epoch wasn't at the sun's perigee; find the angular distance
+ // since perigee, which is called the "mean anomaly"
+ double meanAnomaly = norm2PI(epochAngle + SUN_ETA_G - SUN_OMEGA_G);
+
+ // Now find the "true anomaly", e.g. the real solar longitude
+ // by solving Kepler's equation for an elliptical orbit
+ // NOTE: The 3rd ed. of the book lists omega_g and eta_g in different
+ // equations; omega_g is to be correct.
+ return new double[] {
+ norm2PI(trueAnomaly(meanAnomaly, SUN_E) + SUN_OMEGA_G),
+ meanAnomaly
+ };
+ }
+
+ /**
+ * The position of the sun at this object's current date and time,
+ * in equatorial coordinates.
+ * @internal
+ */
+ public Equatorial getSunPosition() {
+ return eclipticToEquatorial(getSunLongitude(), 0);
+ }
+
+ private static class SolarLongitude {
+ double value;
+ SolarLongitude(double val) { value = val; }
+ }
+
+ /**
+ * Constant representing the vernal equinox.
+ * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}.
+ * Note: In this case, "vernal" refers to the northern hemisphere's seasons.
+ * @internal
+ */
+ public static final SolarLongitude VERNAL_EQUINOX = new SolarLongitude(0);
+
+ /**
+ * Constant representing the summer solstice.
+ * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}.
+ * Note: In this case, "summer" refers to the northern hemisphere's seasons.
+ * @internal
+ */
+ public static final SolarLongitude SUMMER_SOLSTICE = new SolarLongitude(PI/2);
+
+ /**
+ * Constant representing the autumnal equinox.
+ * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}.
+ * Note: In this case, "autumn" refers to the northern hemisphere's seasons.
+ * @internal
+ */
+ public static final SolarLongitude AUTUMN_EQUINOX = new SolarLongitude(PI);
+
+ /**
+ * Constant representing the winter solstice.
+ * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}.
+ * Note: In this case, "winter" refers to the northern hemisphere's seasons.
+ * @internal
+ */
+ public static final SolarLongitude WINTER_SOLSTICE = new SolarLongitude((PI*3)/2);
+
+ /**
+ * Find the next time at which the sun's ecliptic longitude will have
+ * the desired value.
+ * @internal
+ */
+ public long getSunTime(double desired, boolean next)
+ {
+ return timeOfAngle( new AngleFunc() { public double eval() { return getSunLongitude(); } },
+ desired,
+ TROPICAL_YEAR,
+ MINUTE_MS,
+ next);
+ }
+
+ /**
+ * Find the next time at which the sun's ecliptic longitude will have
+ * the desired value.
+ * @internal
+ */
+ public long getSunTime(SolarLongitude desired, boolean next) {
+ return getSunTime(desired.value, next);
+ }
+
+ /**
+ * Returns the time (GMT) of sunrise or sunset on the local date to which
+ * this calendar is currently set.
+ *
+ * NOTE: This method only works well if this object is set to a
+ * time near local noon. Because of variations between the local
+ * official time zone and the geographic longitude, the
+ * computation can flop over into an adjacent day if this object
+ * is set to a time near local midnight.
+ *
+ * @internal
+ */
+ public long getSunRiseSet(boolean rise)
+ {
+ long t0 = time;
+
+ // Make a rough guess: 6am or 6pm local time on the current day
+ long noon = ((time + fGmtOffset)/DAY_MS)*DAY_MS - fGmtOffset + 12*HOUR_MS;
+
+ setTime(noon + (long)((rise ? -6 : 6) * HOUR_MS));
+
+ long t = riseOrSet(new CoordFunc() {
+ public Equatorial eval() { return getSunPosition(); }
+ },
+ rise,
+ .533 * DEG_RAD, // Angular Diameter
+ 34 /60.0 * DEG_RAD, // Refraction correction
+ MINUTE_MS / 12); // Desired accuracy
+
+ setTime(t0);
+ return t;
+ }
+
+// Commented out - currently unused. ICU 2.6, Alan
+// //-------------------------------------------------------------------------
+// // Alternate Sun Rise/Set
+// // See Duffett-Smith p.93
+// //-------------------------------------------------------------------------
+//
+// // This yields worse results (as compared to USNO data) than getSunRiseSet().
+// /**
+// * TODO Make this public when the entire class is package-private.
+// */
+// /*public*/ long getSunRiseSet2(boolean rise) {
+// // 1. Calculate coordinates of the sun's center for midnight
+// double jd = Math.floor(getJulianDay() - 0.5) + 0.5;
+// double[] sl = getSunLongitude(jd);
+// double lambda1 = sl[0];
+// Equatorial pos1 = eclipticToEquatorial(lambda1, 0);
+//
+// // 2. Add ... to lambda to get position 24 hours later
+// double lambda2 = lambda1 + 0.985647*DEG_RAD;
+// Equatorial pos2 = eclipticToEquatorial(lambda2, 0);
+//
+// // 3. Calculate LSTs of rising and setting for these two positions
+// double tanL = Math.tan(fLatitude);
+// double H = Math.acos(-tanL * Math.tan(pos1.declination));
+// double lst1r = (PI2 + pos1.ascension - H) * 24 / PI2;
+// double lst1s = (pos1.ascension + H) * 24 / PI2;
+// H = Math.acos(-tanL * Math.tan(pos2.declination));
+// double lst2r = (PI2-H + pos2.ascension ) * 24 / PI2;
+// double lst2s = (H + pos2.ascension ) * 24 / PI2;
+// if (lst1r > 24) lst1r -= 24;
+// if (lst1s > 24) lst1s -= 24;
+// if (lst2r > 24) lst2r -= 24;
+// if (lst2s > 24) lst2s -= 24;
+//
+// // 4. Convert LSTs to GSTs. If GST1 > GST2, add 24 to GST2.
+// double gst1r = lstToGst(lst1r);
+// double gst1s = lstToGst(lst1s);
+// double gst2r = lstToGst(lst2r);
+// double gst2s = lstToGst(lst2s);
+// if (gst1r > gst2r) gst2r += 24;
+// if (gst1s > gst2s) gst2s += 24;
+//
+// // 5. Calculate GST at 0h UT of this date
+// double t00 = utToGst(0);
+//
+// // 6. Calculate GST at 0h on the observer's longitude
+// double offset = Math.round(fLongitude*12/PI); // p.95 step 6; he _rounds_ to nearest 15 deg.
+// double t00p = t00 - offset*1.002737909;
+// if (t00p < 0) t00p += 24; // do NOT normalize
+//
+// // 7. Adjust
+// if (gst1r < t00p) {
+// gst1r += 24;
+// gst2r += 24;
+// }
+// if (gst1s < t00p) {
+// gst1s += 24;
+// gst2s += 24;
+// }
+//
+// // 8.
+// double gstr = (24.07*gst1r-t00*(gst2r-gst1r))/(24.07+gst1r-gst2r);
+// double gsts = (24.07*gst1s-t00*(gst2s-gst1s))/(24.07+gst1s-gst2s);
+//
+// // 9. Correct for parallax, refraction, and sun's diameter
+// double dec = (pos1.declination + pos2.declination) / 2;
+// double psi = Math.acos(Math.sin(fLatitude) / Math.cos(dec));
+// double x = 0.830725 * DEG_RAD; // parallax+refraction+diameter
+// double y = Math.asin(Math.sin(x) / Math.sin(psi)) * RAD_DEG;
+// double delta_t = 240 * y / Math.cos(dec) / 3600; // hours
+//
+// // 10. Add correction to GSTs, subtract from GSTr
+// gstr -= delta_t;
+// gsts += delta_t;
+//
+// // 11. Convert GST to UT and then to local civil time
+// double ut = gstToUt(rise ? gstr : gsts);
+// //System.out.println((rise?"rise=":"set=") + ut + ", delta_t=" + delta_t);
+// long midnight = DAY_MS * (time / DAY_MS); // Find UT midnight on this day
+// return midnight + (long) (ut * 3600000);
+// }
+
+// Commented out - currently unused. ICU 2.6, Alan
+// /**
+// * Convert local sidereal time to Greenwich sidereal time.
+// * Section 15. Duffett-Smith p.21
+// * @param lst in hours (0..24)
+// * @return GST in hours (0..24)
+// */
+// double lstToGst(double lst) {
+// double delta = fLongitude * 24 / PI2;
+// return normalize(lst - delta, 24);
+// }
+
+// Commented out - currently unused. ICU 2.6, Alan
+// /**
+// * Convert UT to GST on this date.
+// * Section 12. Duffett-Smith p.17
+// * @param ut in hours
+// * @return GST in hours
+// */
+// double utToGst(double ut) {
+// return normalize(getT0() + ut*1.002737909, 24);
+// }
+
+// Commented out - currently unused. ICU 2.6, Alan
+// /**
+// * Convert GST to UT on this date.
+// * Section 13. Duffett-Smith p.18
+// * @param gst in hours
+// * @return UT in hours
+// */
+// double gstToUt(double gst) {
+// return normalize(gst - getT0(), 24) * 0.9972695663;
+// }
+
+// Commented out - currently unused. ICU 2.6, Alan
+// double getT0() {
+// // Common computation for UT <=> GST
+//
+// // Find JD for 0h UT
+// double jd = Math.floor(getJulianDay() - 0.5) + 0.5;
+//
+// double s = jd - 2451545.0;
+// double t = s / 36525.0;
+// double t0 = 6.697374558 + (2400.051336 + 0.000025862*t)*t;
+// return t0;
+// }
+
+// Commented out - currently unused. ICU 2.6, Alan
+// //-------------------------------------------------------------------------
+// // Alternate Sun Rise/Set
+// // See sci.astro FAQ
+// // http://www.faqs.org/faqs/astronomy/faq/part3/section-5.html
+// //-------------------------------------------------------------------------
+//
+// // Note: This method appears to produce inferior accuracy as
+// // compared to getSunRiseSet().
+//
+// /**
+// * TODO Make this public when the entire class is package-private.
+// */
+// /*public*/ long getSunRiseSet3(boolean rise) {
+//
+// // Compute day number for 0.0 Jan 2000 epoch
+// double d = (double)(time - EPOCH_2000_MS) / DAY_MS;
+//
+// // Now compute the Local Sidereal Time, LST:
+// //
+// double LST = 98.9818 + 0.985647352 * d + /*UT*15 + long*/
+// fLongitude*RAD_DEG;
+// //
+// // (east long. positive). Note that LST is here expressed in degrees,
+// // where 15 degrees corresponds to one hour. Since LST really is an angle,
+// // it's convenient to use one unit---degrees---throughout.
+//
+// // COMPUTING THE SUN'S POSITION
+// // ----------------------------
+// //
+// // To be able to compute the Sun's rise/set times, you need to be able to
+// // compute the Sun's position at any time. First compute the "day
+// // number" d as outlined above, for the desired moment. Next compute:
+// //
+// double oblecl = 23.4393 - 3.563E-7 * d;
+// //
+// double w = 282.9404 + 4.70935E-5 * d;
+// double M = 356.0470 + 0.9856002585 * d;
+// double e = 0.016709 - 1.151E-9 * d;
+// //
+// // This is the obliquity of the ecliptic, plus some of the elements of
+// // the Sun's apparent orbit (i.e., really the Earth's orbit): w =
+// // argument of perihelion, M = mean anomaly, e = eccentricity.
+// // Semi-major axis is here assumed to be exactly 1.0 (while not strictly
+// // true, this is still an accurate approximation). Next compute E, the
+// // eccentric anomaly:
+// //
+// double E = M + e*(180/PI) * Math.sin(M*DEG_RAD) * ( 1.0 + e*Math.cos(M*DEG_RAD) );
+// //
+// // where E and M are in degrees. This is it---no further iterations are
+// // needed because we know e has a sufficiently small value. Next compute
+// // the true anomaly, v, and the distance, r:
+// //
+// /* r * cos(v) = */ double A = Math.cos(E*DEG_RAD) - e;
+// /* r * sin(v) = */ double B = Math.sqrt(1 - e*e) * Math.sin(E*DEG_RAD);
+// //
+// // and
+// //
+// // r = sqrt( A*A + B*B )
+// double v = Math.atan2( B, A )*RAD_DEG;
+// //
+// // The Sun's true longitude, slon, can now be computed:
+// //
+// double slon = v + w;
+// //
+// // Since the Sun is always at the ecliptic (or at least very very close to
+// // it), we can use simplified formulae to convert slon (the Sun's ecliptic
+// // longitude) to sRA and sDec (the Sun's RA and Dec):
+// //
+// // sin(slon) * cos(oblecl)
+// // tan(sRA) = -------------------------
+// // cos(slon)
+// //
+// // sin(sDec) = sin(oblecl) * sin(slon)
+// //
+// // As was the case when computing az, the Azimuth, if possible use an
+// // atan2() function to compute sRA.
+//
+// double sRA = Math.atan2(Math.sin(slon*DEG_RAD) * Math.cos(oblecl*DEG_RAD), Math.cos(slon*DEG_RAD))*RAD_DEG;
+//
+// double sin_sDec = Math.sin(oblecl*DEG_RAD) * Math.sin(slon*DEG_RAD);
+// double sDec = Math.asin(sin_sDec)*RAD_DEG;
+//
+// // COMPUTING RISE AND SET TIMES
+// // ----------------------------
+// //
+// // To compute when an object rises or sets, you must compute when it
+// // passes the meridian and the HA of rise/set. Then the rise time is
+// // the meridian time minus HA for rise/set, and the set time is the
+// // meridian time plus the HA for rise/set.
+// //
+// // To find the meridian time, compute the Local Sidereal Time at 0h local
+// // time (or 0h UT if you prefer to work in UT) as outlined above---name
+// // that quantity LST0. The Meridian Time, MT, will now be:
+// //
+// // MT = RA - LST0
+// double MT = normalize(sRA - LST, 360);
+// //
+// // where "RA" is the object's Right Ascension (in degrees!). If negative,
+// // add 360 deg to MT. If the object is the Sun, leave the time as it is,
+// // but if it's stellar, multiply MT by 365.2422/366.2422, to convert from
+// // sidereal to solar time. Now, compute HA for rise/set, name that
+// // quantity HA0:
+// //
+// // sin(h0) - sin(lat) * sin(Dec)
+// // cos(HA0) = ---------------------------------
+// // cos(lat) * cos(Dec)
+// //
+// // where h0 is the altitude selected to represent rise/set. For a purely
+// // mathematical horizon, set h0 = 0 and simplify to:
+// //
+// // cos(HA0) = - tan(lat) * tan(Dec)
+// //
+// // If you want to account for refraction on the atmosphere, set h0 = -35/60
+// // degrees (-35 arc minutes), and if you want to compute the rise/set times
+// // for the Sun's upper limb, set h0 = -50/60 (-50 arc minutes).
+// //
+// double h0 = -50/60 * DEG_RAD;
+//
+// double HA0 = Math.acos(
+// (Math.sin(h0) - Math.sin(fLatitude) * sin_sDec) /
+// (Math.cos(fLatitude) * Math.cos(sDec*DEG_RAD)))*RAD_DEG;
+//
+// // When HA0 has been computed, leave it as it is for the Sun but multiply
+// // by 365.2422/366.2422 for stellar objects, to convert from sidereal to
+// // solar time. Finally compute:
+// //
+// // Rise time = MT - HA0
+// // Set time = MT + HA0
+// //
+// // convert the times from degrees to hours by dividing by 15.
+// //
+// // If you'd like to check that your calculations are accurate or just
+// // need a quick result, check the USNO's Sun or Moon Rise/Set Table,
+// // .
+//
+// double result = MT + (rise ? -HA0 : HA0); // in degrees
+//
+// // Find UT midnight on this day
+// long midnight = DAY_MS * (time / DAY_MS);
+//
+// return midnight + (long) (result * 3600000 / 15);
+// }
+
+ //-------------------------------------------------------------------------
+ // The Moon
+ //-------------------------------------------------------------------------
+
+ static final double moonL0 = 318.351648 * PI/180; // Mean long. at epoch
+ static final double moonP0 = 36.340410 * PI/180; // Mean long. of perigee
+ static final double moonN0 = 318.510107 * PI/180; // Mean long. of node
+ static final double moonI = 5.145366 * PI/180; // Inclination of orbit
+ static final double moonE = 0.054900; // Eccentricity of orbit
+
+ // These aren't used right now
+ static final double moonA = 3.84401e5; // semi-major axis (km)
+ static final double moonT0 = 0.5181 * PI/180; // Angular size at distance A
+ static final double moonPi = 0.9507 * PI/180; // Parallax at distance A
+
+ /**
+ * The position of the moon at the time set on this
+ * object, in equatorial coordinates.
+ * @internal
+ */
+ public Equatorial getMoonPosition()
+ {
+ //
+ // See page 142 of "Practial Astronomy with your Calculator",
+ // by Peter Duffet-Smith, for details on the algorithm.
+ //
+ if (moonPosition == null) {
+ // Calculate the solar longitude. Has the side effect of
+ // filling in "meanAnomalySun" as well.
+ double sunLong = getSunLongitude();
+
+ //
+ // Find the # of days since the epoch of our orbital parameters.
+ // TODO: Convert the time of day portion into ephemeris time
+ //
+ double day = getJulianDay() - JD_EPOCH; // Days since epoch
+
+ // Calculate the mean longitude and anomaly of the moon, based on
+ // a circular orbit. Similar to the corresponding solar calculation.
+ double meanLongitude = norm2PI(13.1763966*PI/180*day + moonL0);
+ double meanAnomalyMoon = norm2PI(meanLongitude - 0.1114041*PI/180 * day - moonP0);
+
+ //
+ // Calculate the following corrections:
+ // Evection: the sun's gravity affects the moon's eccentricity
+ // Annual Eqn: variation in the effect due to earth-sun distance
+ // A3: correction factor (for ???)
+ //
+ double evection = 1.2739*PI/180 * Math.sin(2 * (meanLongitude - sunLong)
+ - meanAnomalyMoon);
+ double annual = 0.1858*PI/180 * Math.sin(meanAnomalySun);
+ double a3 = 0.3700*PI/180 * Math.sin(meanAnomalySun);
+
+ meanAnomalyMoon += evection - annual - a3;
+
+ //
+ // More correction factors:
+ // center equation of the center correction
+ // a4 yet another error correction (???)
+ //
+ // TODO: Skip the equation of the center correction and solve Kepler's eqn?
+ //
+ double center = 6.2886*PI/180 * Math.sin(meanAnomalyMoon);
+ double a4 = 0.2140*PI/180 * Math.sin(2 * meanAnomalyMoon);
+
+ // Now find the moon's corrected longitude
+ moonLongitude = meanLongitude + evection + center - annual + a4;
+
+ //
+ // And finally, find the variation, caused by the fact that the sun's
+ // gravitational pull on the moon varies depending on which side of
+ // the earth the moon is on
+ //
+ double variation = 0.6583*PI/180 * Math.sin(2*(moonLongitude - sunLong));
+
+ moonLongitude += variation;
+
+ //
+ // What we've calculated so far is the moon's longitude in the plane
+ // of its own orbit. Now map to the ecliptic to get the latitude
+ // and longitude. First we need to find the longitude of the ascending
+ // node, the position on the ecliptic where it is crossed by the moon's
+ // orbit as it crosses from the southern to the northern hemisphere.
+ //
+ double nodeLongitude = norm2PI(moonN0 - 0.0529539*PI/180 * day);
+
+ nodeLongitude -= 0.16*PI/180 * Math.sin(meanAnomalySun);
+
+ double y = Math.sin(moonLongitude - nodeLongitude);
+ double x = Math.cos(moonLongitude - nodeLongitude);
+
+ moonEclipLong = Math.atan2(y*Math.cos(moonI), x) + nodeLongitude;
+ double moonEclipLat = Math.asin(y * Math.sin(moonI));
+
+ moonPosition = eclipticToEquatorial(moonEclipLong, moonEclipLat);
+ }
+ return moonPosition;
+ }
+
+ /**
+ * The "age" of the moon at the time specified in this object.
+ * This is really the angle between the
+ * current ecliptic longitudes of the sun and the moon,
+ * measured in radians.
+ *
+ * @see #getMoonPhase
+ * @internal
+ */
+ public double getMoonAge() {
+ // See page 147 of "Practial Astronomy with your Calculator",
+ // by Peter Duffet-Smith, for details on the algorithm.
+ //
+ // Force the moon's position to be calculated. We're going to use
+ // some the intermediate results cached during that calculation.
+ //
+ getMoonPosition();
+
+ return norm2PI(moonEclipLong - sunLongitude);
+ }
+
+ /**
+ * Calculate the phase of the moon at the time set in this object.
+ * The returned phase is a double
in the range
+ * 0 <= phase < 1
, interpreted as follows:
+ *
+ * 0.00: New moon
+ * 0.25: First quarter
+ * 0.50: Full moon
+ * 0.75: Last quarter
+ *
+ *
+ * @see #getMoonAge
+ * @internal
+ */
+ public double getMoonPhase() {
+ // See page 147 of "Practial Astronomy with your Calculator",
+ // by Peter Duffet-Smith, for details on the algorithm.
+ return 0.5 * (1 - Math.cos(getMoonAge()));
+ }
+
+ private static class MoonAge {
+ double value;
+ MoonAge(double val) { value = val; }
+ }
+
+ /**
+ * Constant representing a new moon.
+ * For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime}
+ * @internal
+ */
+ public static final MoonAge NEW_MOON = new MoonAge(0);
+
+ /**
+ * Constant representing the moon's first quarter.
+ * For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime}
+ * @internal
+ */
+ public static final MoonAge FIRST_QUARTER = new MoonAge(PI/2);
+
+ /**
+ * Constant representing a full moon.
+ * For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime}
+ * @internal
+ */
+ public static final MoonAge FULL_MOON = new MoonAge(PI);
+
+ /**
+ * Constant representing the moon's last quarter.
+ * For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime}
+ * @internal
+ */
+ public static final MoonAge LAST_QUARTER = new MoonAge((PI*3)/2);
+
+ /**
+ * Find the next or previous time at which the Moon's ecliptic
+ * longitude will have the desired value.
+ *
+ * @param desired The desired longitude.
+ * @param next true if the next occurrance of the phase
+ * is desired, false for the previous occurrance.
+ * @internal
+ */
+ public long getMoonTime(double desired, boolean next)
+ {
+ return timeOfAngle( new AngleFunc() {
+ public double eval() { return getMoonAge(); } },
+ desired,
+ SYNODIC_MONTH,
+ MINUTE_MS,
+ next);
+ }
+
+ /**
+ * Find the next or previous time at which the moon will be in the
+ * desired phase.
+ *
+ * @param desired The desired phase of the moon.
+ * @param next true if the next occurrance of the phase
+ * is desired, false for the previous occurrance.
+ * @internal
+ */
+ public long getMoonTime(MoonAge desired, boolean next) {
+ return getMoonTime(desired.value, next);
+ }
+
+ /**
+ * Returns the time (GMT) of sunrise or sunset on the local date to which
+ * this calendar is currently set.
+ * @internal
+ */
+ public long getMoonRiseSet(boolean rise)
+ {
+ return riseOrSet(new CoordFunc() {
+ public Equatorial eval() { return getMoonPosition(); }
+ },
+ rise,
+ .533 * DEG_RAD, // Angular Diameter
+ 34 /60.0 * DEG_RAD, // Refraction correction
+ MINUTE_MS); // Desired accuracy
+ }
+
+ //-------------------------------------------------------------------------
+ // Interpolation methods for finding the time at which a given event occurs
+ //-------------------------------------------------------------------------
+
+ private interface AngleFunc {
+ public double eval();
+ }
+
+ private long timeOfAngle(AngleFunc func, double desired,
+ double periodDays, long epsilon, boolean next)
+ {
+ // Find the value of the function at the current time
+ double lastAngle = func.eval();
+
+ // Find out how far we are from the desired angle
+ double deltaAngle = norm2PI(desired - lastAngle) ;
+
+ // Using the average period, estimate the next (or previous) time at
+ // which the desired angle occurs.
+ double deltaT = (deltaAngle + (next ? 0 : -PI2)) * (periodDays*DAY_MS) / PI2;
+
+ double lastDeltaT = deltaT; // Liu
+ long startTime = time; // Liu
+
+ setTime(time + (long)deltaT);
+
+ // Now iterate until we get the error below epsilon. Throughout
+ // this loop we use normPI to get values in the range -Pi to Pi,
+ // since we're using them as correction factors rather than absolute angles.
+ do {
+ // Evaluate the function at the time we've estimated
+ double angle = func.eval();
+
+ // Find the # of milliseconds per radian at this point on the curve
+ double factor = Math.abs(deltaT / normPI(angle-lastAngle));
+
+ // Correct the time estimate based on how far off the angle is
+ deltaT = normPI(desired - angle) * factor;
+
+ // HACK:
+ //
+ // If abs(deltaT) begins to diverge we need to quit this loop.
+ // This only appears to happen when attempting to locate, for
+ // example, a new moon on the day of the new moon. E.g.:
+ //
+ // This result is correct:
+ // newMoon(7508(Mon Jul 23 00:00:00 CST 1990,false))=
+ // Sun Jul 22 10:57:41 CST 1990
+ //
+ // But attempting to make the same call a day earlier causes deltaT
+ // to diverge:
+ // CalendarAstronomer.timeOfAngle() diverging: 1.348508727575625E9 ->
+ // 1.3649828540224032E9
+ // newMoon(7507(Sun Jul 22 00:00:00 CST 1990,false))=
+ // Sun Jul 08 13:56:15 CST 1990
+ //
+ // As a temporary solution, we catch this specific condition and
+ // adjust our start time by one eighth period days (either forward
+ // or backward) and try again.
+ // Liu 11/9/00
+ if (Math.abs(deltaT) > Math.abs(lastDeltaT)) {
+ long delta = (long) (periodDays * DAY_MS / 8);
+ setTime(startTime + (next ? delta : -delta));
+ return timeOfAngle(func, desired, periodDays, epsilon, next);
+ }
+
+ lastDeltaT = deltaT;
+ lastAngle = angle;
+
+ setTime(time + (long)deltaT);
+ }
+ while (Math.abs(deltaT) > epsilon);
+
+ return time;
+ }
+
+ private interface CoordFunc {
+ public Equatorial eval();
+ }
+
+ private long riseOrSet(CoordFunc func, boolean rise,
+ double diameter, double refraction,
+ long epsilon)
+ {
+ Equatorial pos = null;
+ double tanL = Math.tan(fLatitude);
+ long deltaT = Long.MAX_VALUE;
+ int count = 0;
+
+ //
+ // Calculate the object's position at the current time, then use that
+ // position to calculate the time of rising or setting. The position
+ // will be different at that time, so iterate until the error is allowable.
+ //
+ do {
+ // See "Practical Astronomy With Your Calculator, section 33.
+ pos = func.eval();
+ double angle = Math.acos(-tanL * Math.tan(pos.declination));
+ double lst = ((rise ? PI2-angle : angle) + pos.ascension ) * 24 / PI2;
+
+ // Convert from LST to Universal Time.
+ long newTime = lstToUT( lst );
+
+ deltaT = newTime - time;
+ setTime(newTime);
+ }
+ while (++ count < 5 && Math.abs(deltaT) > epsilon);
+
+ // Calculate the correction due to refraction and the object's angular diameter
+ double cosD = Math.cos(pos.declination);
+ double psi = Math.acos(Math.sin(fLatitude) / cosD);
+ double x = diameter / 2 + refraction;
+ double y = Math.asin(Math.sin(x) / Math.sin(psi));
+ long delta = (long)((240 * y * RAD_DEG / cosD)*SECOND_MS);
+
+ return time + (rise ? -delta : delta);
+ }
+
+ //-------------------------------------------------------------------------
+ // Other utility methods
+ //-------------------------------------------------------------------------
+
+ /***
+ * Given 'value', add or subtract 'range' until 0 <= 'value' < range.
+ * The modulus operator.
+ */
+ private static final double normalize(double value, double range) {
+ return value - range * Math.floor(value / range);
+ }
+
+ /**
+ * Normalize an angle so that it's in the range 0 - 2pi.
+ * For positive angles this is just (angle % 2pi), but the Java
+ * mod operator doesn't work that way for negative numbers....
+ */
+ private static final double norm2PI(double angle) {
+ return normalize(angle, PI2);
+ }
+
+ /**
+ * Normalize an angle into the range -PI - PI
+ */
+ private static final double normPI(double angle) {
+ return normalize(angle + PI, PI2) - PI;
+ }
+
+ /**
+ * Find the "true anomaly" (longitude) of an object from
+ * its mean anomaly and the eccentricity of its orbit. This uses
+ * an iterative solution to Kepler's equation.
+ *
+ * @param meanAnomaly The object's longitude calculated as if it were in
+ * a regular, circular orbit, measured in radians
+ * from the point of perigee.
+ *
+ * @param eccentricity The eccentricity of the orbit
+ *
+ * @return The true anomaly (longitude) measured in radians
+ */
+ private double trueAnomaly(double meanAnomaly, double eccentricity)
+ {
+ // First, solve Kepler's equation iteratively
+ // Duffett-Smith, p.90
+ double delta;
+ double E = meanAnomaly;
+ do {
+ delta = E - eccentricity * Math.sin(E) - meanAnomaly;
+ E = E - delta / (1 - eccentricity * Math.cos(E));
+ }
+ while (Math.abs(delta) > 1e-5); // epsilon = 1e-5 rad
+
+ return 2.0 * Math.atan( Math.tan(E/2) * Math.sqrt( (1+eccentricity)
+ /(1-eccentricity) ) );
+ }
+
+ /**
+ * Return the obliquity of the ecliptic (the angle between the ecliptic
+ * and the earth's equator) at the current time. This varies due to
+ * the precession of the earth's axis.
+ *
+ * @return the obliquity of the ecliptic relative to the equator,
+ * measured in radians.
+ */
+ private double eclipticObliquity() {
+ if (eclipObliquity == INVALID) {
+ final double epoch = 2451545.0; // 2000 AD, January 1.5
+
+ double T = (getJulianDay() - epoch) / 36525;
+
+ eclipObliquity = 23.439292
+ - 46.815/3600 * T
+ - 0.0006/3600 * T*T
+ + 0.00181/3600 * T*T*T;
+
+ eclipObliquity *= DEG_RAD;
+ }
+ return eclipObliquity;
+ }
+
+
+ //-------------------------------------------------------------------------
+ // Private data
+ //-------------------------------------------------------------------------
+
+ /**
+ * Current time in milliseconds since 1/1/1970 AD
+ * @see java.util.Date#getTime
+ */
+ private long time;
+
+ /* These aren't used yet, but they'll be needed for sunset calculations
+ * and equatorial to horizon coordinate conversions
+ */
+ private double fLongitude = 0.0;
+ private double fLatitude = 0.0;
+ private long fGmtOffset = 0;
+
+ //
+ // The following fields are used to cache calculated results for improved
+ // performance. These values all depend on the current time setting
+ // of this object, so the clearCache method is provided.
+ //
+ static final private double INVALID = Double.MIN_VALUE;
+
+ private transient double julianDay = INVALID;
+ private transient double julianCentury = INVALID;
+ private transient double sunLongitude = INVALID;
+ private transient double meanAnomalySun = INVALID;
+ private transient double moonLongitude = INVALID;
+ private transient double moonEclipLong = INVALID;
+ //private transient double meanAnomalyMoon = INVALID;
+ private transient double eclipObliquity = INVALID;
+ private transient double siderealT0 = INVALID;
+ private transient double siderealTime = INVALID;
+
+ private transient Equatorial moonPosition = null;
+
+ private void clearCache() {
+ julianDay = INVALID;
+ julianCentury = INVALID;
+ sunLongitude = INVALID;
+ meanAnomalySun = INVALID;
+ moonLongitude = INVALID;
+ moonEclipLong = INVALID;
+ //meanAnomalyMoon = INVALID;
+ eclipObliquity = INVALID;
+ siderealTime = INVALID;
+ siderealT0 = INVALID;
+ moonPosition = null;
+ }
+
+ //private static void out(String s) {
+ // System.out.println(s);
+ //}
+
+ //private static String deg(double rad) {
+ // return Double.toString(rad * RAD_DEG);
+ //}
+
+ //private static String hours(long ms) {
+ // return Double.toString((double)ms / HOUR_MS) + " hours";
+ //}
+
+ /**
+ * @internal
+ */
+ public String local(long localMillis) {
+ return new Date(localMillis - TimeZone.getDefault().getRawOffset()).toString();
+ }
+
+
+ /**
+ * Represents the position of an object in the sky relative to the ecliptic,
+ * the plane of the earth's orbit around the Sun.
+ * This is a spherical coordinate system in which the latitude
+ * specifies the position north or south of the plane of the ecliptic.
+ * The longitude specifies the position along the ecliptic plane
+ * relative to the "First Point of Aries", which is the Sun's position in the sky
+ * at the Vernal Equinox.
+ *
+ * Note that Ecliptic objects are immutable and cannot be modified
+ * once they are constructed. This allows them to be passed and returned by
+ * value without worrying about whether other code will modify them.
+ *
+ * @see CalendarAstronomer.Equatorial
+ * @see CalendarAstronomer.Horizon
+ * @internal
+ */
+ public static final class Ecliptic {
+ /**
+ * Constructs an Ecliptic coordinate object.
+ *
+ * @param lat The ecliptic latitude, measured in radians.
+ * @param lon The ecliptic longitude, measured in radians.
+ * @internal
+ */
+ public Ecliptic(double lat, double lon) {
+ latitude = lat;
+ longitude = lon;
+ }
+
+ /**
+ * Return a string representation of this object
+ * @internal
+ */
+ public String toString() {
+ return Double.toString(longitude*RAD_DEG) + "," + (latitude*RAD_DEG);
+ }
+
+ /**
+ * The ecliptic latitude, in radians. This specifies an object's
+ * position north or south of the plane of the ecliptic,
+ * with positive angles representing north.
+ * @internal
+ */
+ public final double latitude;
+
+ /**
+ * The ecliptic longitude, in radians.
+ * This specifies an object's position along the ecliptic plane
+ * relative to the "First Point of Aries", which is the Sun's position
+ * in the sky at the Vernal Equinox,
+ * with positive angles representing east.
+ *
+ * A bit of trivia: the first point of Aries is currently in the
+ * constellation Pisces, due to the precession of the earth's axis.
+ * @internal
+ */
+ public final double longitude;
+ }
+
+ /**
+ * Represents the position of an
+ * object in the sky relative to the plane of the earth's equator.
+ * The Right Ascension specifies the position east or west
+ * along the equator, relative to the sun's position at the vernal
+ * equinox. The Declination is the position north or south
+ * of the equatorial plane.
+ *
+ * Note that Equatorial objects are immutable and cannot be modified
+ * once they are constructed. This allows them to be passed and returned by
+ * value without worrying about whether other code will modify them.
+ *
+ * @see CalendarAstronomer.Ecliptic
+ * @see CalendarAstronomer.Horizon
+ * @internal
+ */
+ public static final class Equatorial {
+ /**
+ * Constructs an Equatorial coordinate object.
+ *
+ * @param asc The right ascension, measured in radians.
+ * @param dec The declination, measured in radians.
+ * @internal
+ */
+ public Equatorial(double asc, double dec) {
+ ascension = asc;
+ declination = dec;
+ }
+
+ /**
+ * Return a string representation of this object, with the
+ * angles measured in degrees.
+ * @internal
+ */
+ public String toString() {
+ return Double.toString(ascension*RAD_DEG) + "," + (declination*RAD_DEG);
+ }
+
+ /**
+ * Return a string representation of this object with the right ascension
+ * measured in hours, minutes, and seconds.
+ * @internal
+ */
+ public String toHmsString() {
+ return radToHms(ascension) + "," + radToDms(declination);
+ }
+
+ /**
+ * The right ascension, in radians.
+ * This is the position east or west along the equator
+ * relative to the sun's position at the vernal equinox,
+ * with positive angles representing East.
+ * @internal
+ */
+ public final double ascension;
+
+ /**
+ * The declination, in radians.
+ * This is the position north or south of the equatorial plane,
+ * with positive angles representing north.
+ * @internal
+ */
+ public final double declination;
+ }
+
+ /**
+ * Represents the position of an object in the sky relative to
+ * the local horizon.
+ * The Altitude represents the object's elevation above the horizon,
+ * with objects below the horizon having a negative altitude.
+ * The Azimuth is the geographic direction of the object from the
+ * observer's position, with 0 representing north. The azimuth increases
+ * clockwise from north.
+ *
+ * Note that Horizon objects are immutable and cannot be modified
+ * once they are constructed. This allows them to be passed and returned by
+ * value without worrying about whether other code will modify them.
+ *
+ * @see CalendarAstronomer.Ecliptic
+ * @see CalendarAstronomer.Equatorial
+ * @internal
+ */
+ public static final class Horizon {
+ /**
+ * Constructs a Horizon coordinate object.
+ *
+ * @param alt The altitude, measured in radians above the horizon.
+ * @param azim The azimuth, measured in radians clockwise from north.
+ * @internal
+ */
+ public Horizon(double alt, double azim) {
+ altitude = alt;
+ azimuth = azim;
+ }
+
+ /**
+ * Return a string representation of this object, with the
+ * angles measured in degrees.
+ * @internal
+ */
+ public String toString() {
+ return Double.toString(altitude*RAD_DEG) + "," + (azimuth*RAD_DEG);
+ }
+
+ /**
+ * The object's altitude above the horizon, in radians.
+ * @internal
+ */
+ public final double altitude;
+
+ /**
+ * The object's direction, in radians clockwise from north.
+ * @internal
+ */
+ public final double azimuth;
+ }
+
+ static private String radToHms(double angle) {
+ int hrs = (int) (angle*RAD_HOUR);
+ int min = (int)((angle*RAD_HOUR - hrs) * 60);
+ int sec = (int)((angle*RAD_HOUR - hrs - min/60.0) * 3600);
+
+ return Integer.toString(hrs) + "h" + min + "m" + sec + "s";
+ }
+
+ static private String radToDms(double angle) {
+ int deg = (int) (angle*RAD_DEG);
+ int min = (int)((angle*RAD_DEG - deg) * 60);
+ int sec = (int)((angle*RAD_DEG - deg - min/60.0) * 3600);
+
+ return Integer.toString(deg) + "\u00b0" + min + "'" + sec + "\"";
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/CalendarCache.java b/main/classes/core/src/com/ibm/icu/impl/CalendarCache.java
new file mode 100644
index 00000000000..c83562a9722
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/CalendarCache.java
@@ -0,0 +1,127 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+/**
+ * @internal
+ */
+public class CalendarCache
+{
+ /**
+ * @internal
+ */
+ public CalendarCache() {
+ makeArrays(arraySize);
+ }
+
+ private void makeArrays(int newSize) {
+ keys = new long[newSize];
+ values = new long[newSize];
+
+ for (int i = 0; i < newSize; i++) {
+ values[i] = EMPTY;
+ }
+ arraySize = newSize;
+ threshold = (int)(arraySize * 0.75);
+ size = 0;
+ }
+
+ /**
+ * @internal
+ */
+ public synchronized long get(long key) {
+ return values[findIndex(key)];
+ }
+
+ /**
+ * @internal
+ */
+ public synchronized void put(long key, long value)
+ {
+ if (size >= threshold) {
+ rehash();
+ }
+ int index = findIndex(key);
+
+ keys[index] = key;
+ values[index] = value;
+ size++;
+ }
+
+ private final int findIndex(long key) {
+ int index = hash(key);
+ int delta = 0;
+
+ while (values[index] != EMPTY && keys[index] != key)
+ {
+ if (delta == 0) {
+ delta = hash2(key);
+ }
+ index = (index + delta) % arraySize;
+ }
+ return index;
+ }
+
+ private void rehash()
+ {
+ int oldSize = arraySize;
+ long[] oldKeys = keys;
+ long[] oldValues = values;
+
+ if (pIndex < primes.length - 1) {
+ arraySize = primes[++pIndex];
+ } else {
+ arraySize = arraySize * 2 + 1;
+ }
+ size = 0;
+
+ makeArrays(arraySize);
+ for (int i = 0; i < oldSize; i++) {
+ if (oldValues[i] != EMPTY) {
+ put(oldKeys[i], oldValues[i]);
+ }
+ }
+ oldKeys = oldValues = null; // Help out the garbage collector
+ }
+
+
+ /**
+ * Produce a uniformly-distributed hash value from an integer key.
+ * This is essentially a linear congruential random number generator
+ * that uses the key as its seed value.
+ */
+ private final int hash(long key)
+ {
+ int h = (int)((key * 15821 + 1) % arraySize);
+ if (h < 0) {
+ h += arraySize;
+ }
+ return h;
+ }
+
+ private final int hash2(long key) {
+ return arraySize - 2 - (int)(key % (arraySize-2) );
+ }
+
+ static private final int primes[] = { // 5, 17, 31, 47, // for testing
+ 61, 127, 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521,
+ 131071, 262139,
+ };
+
+ private int pIndex = 0;
+ private int size = 0;
+ private int arraySize = primes[pIndex];
+ private int threshold = (arraySize * 3) / 4;
+
+ private long[] keys = new long[arraySize];
+ private long[] values = new long[arraySize];
+
+ /**
+ * @internal
+ */
+ static public long EMPTY = Long.MIN_VALUE;
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/CalendarData.java b/main/classes/core/src/com/ibm/icu/impl/CalendarData.java
new file mode 100644
index 00000000000..f217321a1b2
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/CalendarData.java
@@ -0,0 +1,167 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2004-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.ArrayList;
+import java.util.MissingResourceException;
+
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+import com.ibm.icu.util.UResourceBundleIterator;
+
+/**
+ * This class abstracts access to calendar (Calendar and DateFormat) data.
+ * @internal ICU 3.0
+ */
+public class CalendarData {
+ /**
+ * Construct a CalendarData from the given locale.
+ * @param loc locale to use. The 'calendar' keyword will be ignored.
+ * @param type calendar type. NULL indicates the gregorian calendar.
+ * No default lookup is done.
+ */
+ public CalendarData(ULocale loc, String type) {
+ this((ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, loc), type);
+ }
+
+ public CalendarData(ICUResourceBundle b, String type) {
+ fBundle = b;
+ if((type == null) || (type.equals("")) || (type.equals("gregorian"))) {
+ fMainType = "gregorian";
+ fFallbackType = null;
+ } else {
+ fMainType = type;
+ fFallbackType = "gregorian";
+ }
+ }
+
+ /**
+ * Load data for calendar. Note, this object owns the resources, do NOT call ures_close()!
+ *
+ * @param key Resource key to data
+ * @internal
+ */
+ public ICUResourceBundle get(String key) {
+ try {
+ return fBundle.getWithFallback("calendar/" + fMainType + "/" + key);
+ } catch(MissingResourceException m) {
+ if(fFallbackType != null) {
+ return fBundle.getWithFallback("calendar/" + fFallbackType + "/" + key);
+ }
+ throw m;
+
+ }
+ }
+
+ /**
+ * Load data for calendar. Note, this object owns the resources, do NOT call ures_close()!
+ * There is an implicit key of 'format'
+ * data is located in: "calendar/key/format/subKey"
+ * for example, calendar/dayNames/format/abbreviated
+ *
+ * @param key Resource key to data
+ * @param subKey Resource key to data
+ * @internal
+ */
+ public ICUResourceBundle get(String key, String subKey) {
+ try {
+ return fBundle.getWithFallback("calendar/" + fMainType + "/" + key + "/format/" + subKey);
+ } catch(MissingResourceException m) {
+ if(fFallbackType != null) {
+ return fBundle.getWithFallback("calendar/" + fFallbackType + "/" + key + "/format/" + subKey);
+ }
+ throw m;
+
+ }
+ }
+
+ /**
+ * Load data for calendar. Note, this object owns the resources, do NOT call ures_close()!
+ * data is located in: "calendar/key/contextKey/subKey"
+ * for example, calendar/dayNames/stand-alone/narrow
+ *
+ * @param key Resource key to data
+ * @param contextKey Resource key to data
+ * @param subKey Resource key to data
+ * @internal
+ */
+ public ICUResourceBundle get(String key, String contextKey, String subKey) {
+ try {
+ return fBundle.getWithFallback("calendar/" + fMainType + "/" + key + "/" + contextKey + "/" + subKey);
+ } catch(MissingResourceException m) {
+ if(fFallbackType != null) {
+ return fBundle.getWithFallback("calendar/" + fFallbackType + "/" + key + "/" + contextKey + "/" + subKey);
+ }
+ throw m;
+
+ }
+ }
+
+ public String[] getStringArray(String key) {
+ return get(key).getStringArray();
+ }
+
+ public String[] getStringArray(String key, String subKey) {
+ return get(key, subKey).getStringArray();
+ }
+
+ public String[] getStringArray(String key, String contextKey, String subKey) {
+ return get(key, contextKey, subKey).getStringArray();
+ }
+ public String[] getEras(String subkey){
+ ICUResourceBundle bundle = get("eras/"+subkey);
+ return bundle.getStringArray();
+ }
+ public String[] getDateTimePatterns(){
+ ICUResourceBundle bundle = get("DateTimePatterns");
+ ArrayList list = new ArrayList();
+ UResourceBundleIterator iter = bundle.getIterator();
+ while (iter.hasNext()) {
+ UResourceBundle patResource = iter.next();
+ int resourceType = patResource.getType();
+ switch (resourceType) {
+ case UResourceBundle.STRING:
+ list.add(patResource.getString());
+ break;
+ case UResourceBundle.ARRAY:
+ String[] items = patResource.getStringArray();
+ list.add(items[0]);
+ break;
+ }
+ }
+
+ return list.toArray(new String[list.size()]);
+ }
+
+ public String[] getOverrides(){
+ ICUResourceBundle bundle = get("DateTimePatterns");
+ ArrayList list = new ArrayList();
+ UResourceBundleIterator iter = bundle.getIterator();
+ while (iter.hasNext()) {
+ UResourceBundle patResource = iter.next();
+ int resourceType = patResource.getType();
+ switch (resourceType) {
+ case UResourceBundle.STRING:
+ list.add(null);
+ break;
+ case UResourceBundle.ARRAY:
+ String[] items = patResource.getStringArray();
+ list.add(items[1]);
+ break;
+ }
+ }
+ return list.toArray(new String[list.size()]);
+ }
+
+ public ULocale getULocale() {
+ return fBundle.getULocale();
+ }
+
+ private ICUResourceBundle fBundle;
+ private String fMainType;
+ private String fFallbackType;
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java b/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java
new file mode 100644
index 00000000000..c43675be412
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/CalendarUtil.java
@@ -0,0 +1,100 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.MissingResourceException;
+
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+
+/**
+ * Calendar utilities.
+ *
+ * Date/time format service classes in com.ibm.icu.text packages
+ * sometimes need to access calendar internal APIs. But calendar
+ * classes are in com.ibm.icu.util package, so the package local
+ * cannot be used. This class is added in com.ibm.icu.impl
+ * package for sharing some calendar internal code for calendar
+ * and date format.
+ */
+public class CalendarUtil {
+
+ private static ICUCache CALTYPE_CACHE = new SimpleCache();
+
+ private static final String CALKEY = "calendar";
+ private static final String DEFCAL = "gregorian";
+
+ /**
+ * Returns a calendar type for the given locale.
+ * When the given locale has calendar keyword, the
+ * value of calendar keyword is returned. Otherwise,
+ * the default calendar type for the locale is returned.
+ * @param loc The locale
+ * @return Calendar type string, such as "gregorian"
+ */
+ public static String getCalendarType(ULocale loc) {
+ String calType = null;
+
+ calType = loc.getKeywordValue(CALKEY);
+ if (calType != null) {
+ return calType;
+ }
+
+ String baseLoc = loc.getBaseName();
+
+ // Check the cache
+ calType = CALTYPE_CACHE.get(baseLoc);
+ if (calType != null) {
+ return calType;
+ }
+
+ // Canonicalize, so grandfathered variant will be transformed to keywords
+ ULocale canonical = ULocale.createCanonical(loc.toString());
+ calType = canonical.getKeywordValue("calendar");
+
+ if (calType == null) {
+ // When calendar keyword is not available, use the locale's
+ // region to get the default calendar type
+ String region = canonical.getCountry();
+ if (region.length() == 0) {
+ ULocale fullLoc = ULocale.addLikelySubtags(canonical);
+ region = fullLoc.getCountry();
+ }
+
+ // Read supplementalData to get the default calendar type for
+ // the locale's region
+ try {
+ UResourceBundle rb = UResourceBundle.getBundleInstance(
+ ICUResourceBundle.ICU_BASE_NAME,
+ "supplementalData",
+ ICUResourceBundle.ICU_DATA_CLASS_LOADER);
+ UResourceBundle calPref = rb.get("calendarPreferenceData");
+ UResourceBundle order = null;
+ try {
+ order = calPref.get(region);
+ } catch (MissingResourceException mre) {
+ // use "001" as fallback
+ order = calPref.get("001");
+ }
+ // the first calendar type is the default for the region
+ calType = order.getString(0);
+ } catch (MissingResourceException mre) {
+ // fall through
+ }
+
+ if (calType == null) {
+ // Use "gregorian" as the last resort fallback.
+ calType = DEFCAL;
+ }
+ }
+
+ // Cache the resolved value for the next time
+ CALTYPE_CACHE.put(baseLoc, calType);
+
+ return calType;
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/CharTrie.java b/main/classes/core/src/com/ibm/icu/impl/CharTrie.java
new file mode 100644
index 00000000000..4d4fb5af09a
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/CharTrie.java
@@ -0,0 +1,309 @@
+/*
+******************************************************************************
+* Copyright (C) 1996-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+******************************************************************************
+*/
+
+package com.ibm.icu.impl;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import com.ibm.icu.text.UTF16;
+
+/**
+ * Trie implementation which stores data in char, 16 bits.
+ * @author synwee
+ * @see com.ibm.icu.impl.Trie
+ * @since release 2.1, Jan 01 2002
+ */
+
+ // note that i need to handle the block calculations later, since chartrie
+ // in icu4c uses the same index array.
+public class CharTrie extends Trie
+{
+ // public constructors ---------------------------------------------
+
+ /**
+ * Creates a new Trie with the settings for the trie data.
+ * Unserialize the 32-bit-aligned input stream and use the data for the
+ * trie.
+ * @param inputStream file input stream to a ICU data file, containing
+ * the trie
+ * @param dataManipulate object which provides methods to parse the char
+ * data
+ * @throws IOException thrown when data reading fails
+ */
+ public CharTrie(InputStream inputStream,
+ DataManipulate dataManipulate) throws IOException
+ {
+ super(inputStream, dataManipulate);
+
+ if (!isCharTrie()) {
+ throw new IllegalArgumentException(
+ "Data given does not belong to a char trie.");
+ }
+ }
+
+ /**
+ * Make a dummy CharTrie.
+ * A dummy trie is an empty runtime trie, used when a real data trie cannot
+ * be loaded.
+ *
+ * The trie always returns the initialValue,
+ * or the leadUnitValue for lead surrogate code points.
+ * The Latin-1 part is always set up to be linear.
+ *
+ * @param initialValue the initial value that is set for all code points
+ * @param leadUnitValue the value for lead surrogate code _units_ that do not
+ * have associated supplementary data
+ * @param dataManipulate object which provides methods to parse the char data
+ */
+ @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770
+ public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
+ super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
+
+ int dataLength, latin1Length, i, limit;
+ char block;
+
+ /* calculate the actual size of the dummy trie data */
+
+ /* max(Latin-1, block 0) */
+ dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
+ if(leadUnitValue!=initialValue) {
+ dataLength+=DATA_BLOCK_LENGTH;
+ }
+ m_data_=new char[dataLength];
+ m_dataLength_=dataLength;
+
+ m_initialValue_=(char)initialValue;
+
+ /* fill the index and data arrays */
+
+ /* indexes are preset to 0 (block 0) */
+
+ /* Latin-1 data */
+ for(i=0; i>INDEX_STAGE_2_SHIFT_);
+ i=0xd800>>INDEX_STAGE_1_SHIFT_;
+ limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
+ for(; i> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+ + (ch & INDEX_STAGE_3_MASK_);
+ return m_data_[offset];
+ }
+
+ // handle U+D800..U+10FFFF
+ offset = getCodePointOffset(ch);
+
+ // return -1 if there is an error, in this case we return the default
+ // value: m_initialValue_
+ return (offset >= 0) ? m_data_[offset] : m_initialValue_;
+ }
+
+ /**
+ * Gets the value to the data which this lead surrogate character points
+ * to.
+ * Returned data may contain folding offset information for the next
+ * trailing surrogate character.
+ * This method does not guarantee correct results for trail surrogates.
+ * @param ch lead surrogate character
+ * @return data value
+ */
+ public final char getLeadValue(char ch)
+ {
+ return m_data_[getLeadOffset(ch)];
+ }
+
+ /**
+ * Get the value associated with the BMP code point.
+ * Lead surrogate code points are treated as normal code points, with
+ * unfolded values that may differ from getLeadValue() results.
+ * @param ch the input BMP code point
+ * @return trie data value associated with the BMP codepoint
+ */
+ public final char getBMPValue(char ch)
+ {
+ return m_data_[getBMPOffset(ch)];
+ }
+
+ /**
+ * Get the value associated with a pair of surrogates.
+ * @param lead a lead surrogate
+ * @param trail a trail surrogate
+ */
+ public final char getSurrogateValue(char lead, char trail)
+ {
+ int offset = getSurrogateOffset(lead, trail);
+ if (offset > 0) {
+ return m_data_[offset];
+ }
+ return m_initialValue_;
+ }
+
+ /**
+ * Get a value from a folding offset (from the value of a lead surrogate)
+ * and a trail surrogate.
+ * If the
+ * @param leadvalue value associated with the lead surrogate which contains
+ * the folding offset
+ * @param trail surrogate
+ * @return trie data value associated with the trail character
+ */
+ public final char getTrailValue(int leadvalue, char trail)
+ {
+ if (m_dataManipulate_ == null) {
+ throw new NullPointerException(
+ "The field DataManipulate in this Trie is null");
+ }
+ int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
+ if (offset > 0) {
+ return m_data_[getRawOffset(offset,
+ (char)(trail & SURROGATE_MASK_))];
+ }
+ return m_initialValue_;
+ }
+
+ /**
+ *
Gets the latin 1 fast path value.
+ * Note this only works if latin 1 characters have their own linear
+ * array.
+ * @param ch latin 1 characters
+ * @return value associated with latin character
+ */
+ public final char getLatin1LinearValue(char ch)
+ {
+ return m_data_[INDEX_STAGE_3_MASK_ + 1 + m_dataOffset_ + ch];
+ }
+
+ /**
+ * Checks if the argument Trie has the same data as this Trie
+ * @param other Trie to check
+ * @return true if the argument Trie has the same data as this Trie, false
+ * otherwise
+ */
+ ///CLOVER:OFF
+ public boolean equals(Object other)
+ {
+ boolean result = super.equals(other);
+ if (result && other instanceof CharTrie) {
+ CharTrie othertrie = (CharTrie)other;
+ return m_initialValue_ == othertrie.m_initialValue_;
+ }
+ return false;
+ }
+ ///CLOVER:ON
+
+ // protected methods -----------------------------------------------
+
+ /**
+ * Parses the input stream and stores its trie content into a index and
+ * data array
+ * @param inputStream data input stream containing trie data
+ * @exception IOException thrown when data reading fails
+ */
+ protected final void unserialize(InputStream inputStream)
+ throws IOException
+ {
+ DataInputStream input = new DataInputStream(inputStream);
+ int indexDataLength = m_dataOffset_ + m_dataLength_;
+ m_index_ = new char[indexDataLength];
+ for (int i = 0; i < indexDataLength; i ++) {
+ m_index_[i] = input.readChar();
+ }
+ m_data_ = m_index_;
+ m_initialValue_ = m_data_[m_dataOffset_];
+ }
+
+ /**
+ * Gets the offset to the data which the surrogate pair points to.
+ * @param lead lead surrogate
+ * @param trail trailing surrogate
+ * @return offset to data
+ */
+ protected final int getSurrogateOffset(char lead, char trail)
+ {
+ if (m_dataManipulate_ == null) {
+ throw new NullPointerException(
+ "The field DataManipulate in this Trie is null");
+ }
+
+ // get fold position for the next trail surrogate
+ int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
+
+ // get the real data from the folded lead/trail units
+ if (offset > 0) {
+ return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
+ }
+
+ // return -1 if there is an error, in this case we return the default
+ // value: m_initialValue_
+ return -1;
+ }
+
+ /**
+ * Gets the value at the argument index.
+ * For use internally in TrieIterator.
+ * @param index value at index will be retrieved
+ * @return 32 bit value
+ * @see com.ibm.icu.impl.TrieIterator
+ */
+ protected final int getValue(int index)
+ {
+ return m_data_[index];
+ }
+
+ /**
+ * Gets the default initial value
+ * @return 32 bit value
+ */
+ protected final int getInitialValue()
+ {
+ return m_initialValue_;
+ }
+
+ // private data members --------------------------------------------
+
+ /**
+ * Default value
+ */
+ private char m_initialValue_;
+ /**
+ * Array of char data
+ */
+ private char m_data_[];
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/CharacterIteratorWrapper.java b/main/classes/core/src/com/ibm/icu/impl/CharacterIteratorWrapper.java
new file mode 100644
index 00000000000..3e0dcd0f97c
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/CharacterIteratorWrapper.java
@@ -0,0 +1,148 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.text.CharacterIterator;
+
+import com.ibm.icu.text.UCharacterIterator;
+
+/**
+ * This class is a wrapper around CharacterIterator and implements the
+ * UCharacterIterator protocol
+ * @author ram
+ */
+
+public class CharacterIteratorWrapper extends UCharacterIterator {
+
+ private CharacterIterator iterator;
+
+
+ public CharacterIteratorWrapper(CharacterIterator iter){
+ if(iter==null){
+ throw new IllegalArgumentException();
+ }
+ iterator = iter;
+ }
+
+ /**
+ * @see UCharacterIterator#current()
+ */
+ public int current() {
+ int c = iterator.current();
+ if(c==CharacterIterator.DONE){
+ return DONE;
+ }
+ return c;
+ }
+
+ /**
+ * @see UCharacterIterator#getLength()
+ */
+ public int getLength() {
+ return (iterator.getEndIndex() - iterator.getBeginIndex());
+ }
+
+ /**
+ * @see UCharacterIterator#getIndex()
+ */
+ public int getIndex() {
+ return iterator.getIndex();
+ }
+
+ /**
+ * @see UCharacterIterator#next()
+ */
+ public int next() {
+ int i = iterator.current();
+ iterator.next();
+ if(i==CharacterIterator.DONE){
+ return DONE;
+ }
+ return i;
+ }
+
+ /**
+ * @see UCharacterIterator#previous()
+ */
+ public int previous() {
+ int i = iterator.previous();
+ if(i==CharacterIterator.DONE){
+ return DONE;
+ }
+ return i;
+ }
+
+ /**
+ * @see UCharacterIterator#setIndex(int)
+ */
+ public void setIndex(int index) {
+ try{
+ iterator.setIndex(index);
+ }catch(IllegalArgumentException e){
+ throw new IndexOutOfBoundsException();
+ }
+ }
+
+ /**
+ * @see UCharacterIterator#setToLimit()
+ */
+ public void setToLimit() {
+ iterator.setIndex(iterator.getEndIndex());
+ }
+
+ /**
+ * @see UCharacterIterator#getText(char[])
+ */
+ public int getText(char[] fillIn, int offset){
+ int length =iterator.getEndIndex() - iterator.getBeginIndex();
+ int currentIndex = iterator.getIndex();
+ if(offset < 0 || offset + length > fillIn.length){
+ throw new IndexOutOfBoundsException(Integer.toString(length));
+ }
+
+ for (char ch = iterator.first(); ch != CharacterIterator.DONE; ch = iterator.next()) {
+ fillIn[offset++] = ch;
+ }
+ iterator.setIndex(currentIndex);
+
+ return length;
+ }
+
+ /**
+ * Creates a clone of this iterator. Clones the underlying character iterator.
+ * @see UCharacterIterator#clone()
+ */
+ public Object clone(){
+ try {
+ CharacterIteratorWrapper result = (CharacterIteratorWrapper) super.clone();
+ result.iterator = (CharacterIterator)this.iterator.clone();
+ return result;
+ } catch (CloneNotSupportedException e) {
+ return null; // only invoked if bad underlying character iterator
+ }
+ }
+
+
+ public int moveIndex(int delta){
+ int length = iterator.getEndIndex() - iterator.getBeginIndex();
+ int idx = iterator.getIndex()+delta;
+
+ if(idx < 0) {
+ idx = 0;
+ } else if(idx > length) {
+ idx = length;
+ }
+ return iterator.setIndex(idx);
+ }
+
+ /**
+ * @see UCharacterIterator#getCharacterIterator()
+ */
+ public CharacterIterator getCharacterIterator(){
+ return (CharacterIterator)iterator.clone();
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/CurrencyData.java b/main/classes/core/src/com/ibm/icu/impl/CurrencyData.java
new file mode 100644
index 00000000000..c1af2ef9269
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/CurrencyData.java
@@ -0,0 +1,152 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2009-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.Collections;
+import java.util.Map;
+
+import com.ibm.icu.text.CurrencyDisplayNames;
+import com.ibm.icu.util.ULocale;
+
+public class CurrencyData {
+ public static final CurrencyDisplayInfoProvider provider;
+
+ public static interface CurrencyDisplayInfoProvider {
+ CurrencyDisplayInfo getInstance(ULocale locale, boolean withFallback);
+ boolean hasData();
+ }
+
+ public static abstract class CurrencyDisplayInfo extends CurrencyDisplayNames {
+ public abstract Map getUnitPatterns();
+ public abstract CurrencyFormatInfo getFormatInfo(String isoCode);
+ public abstract CurrencySpacingInfo getSpacingInfo();
+ }
+
+ public static final class CurrencyFormatInfo {
+ public final String currencyPattern;
+ public final char monetarySeparator;
+ public final char monetaryGroupingSeparator;
+
+ public CurrencyFormatInfo(String currencyPattern, char monetarySeparator,
+ char monetaryGroupingSeparator) {
+ this.currencyPattern = currencyPattern;
+ this.monetarySeparator = monetarySeparator;
+ this.monetaryGroupingSeparator = monetaryGroupingSeparator;
+ }
+ }
+
+ public static final class CurrencySpacingInfo {
+ public final String beforeCurrencyMatch;
+ public final String beforeContextMatch;
+ public final String beforeInsert;
+ public final String afterCurrencyMatch;
+ public final String afterContextMatch;
+ public final String afterInsert;
+
+ public CurrencySpacingInfo(
+ String beforeCurrencyMatch, String beforeContextMatch, String beforeInsert,
+ String afterCurrencyMatch, String afterContextMatch, String afterInsert) {
+ this.beforeCurrencyMatch = beforeCurrencyMatch;
+ this.beforeContextMatch = beforeContextMatch;
+ this.beforeInsert = beforeInsert;
+ this.afterCurrencyMatch = afterCurrencyMatch;
+ this.afterContextMatch = afterContextMatch;
+ this.afterInsert = afterInsert;
+ }
+
+
+ private static final String DEFAULT_CUR_MATCH = "[:letter:]";
+ private static final String DEFAULT_CTX_MATCH = "[:digit:]";
+ private static final String DEFAULT_INSERT = " ";
+
+ public static final CurrencySpacingInfo DEFAULT = new CurrencySpacingInfo(
+ DEFAULT_CUR_MATCH, DEFAULT_CTX_MATCH, DEFAULT_INSERT,
+ DEFAULT_CUR_MATCH, DEFAULT_CTX_MATCH, DEFAULT_INSERT);
+ }
+
+ static {
+ CurrencyDisplayInfoProvider temp = null;
+ try {
+ Class> clzz = Class.forName("com.ibm.icu.impl.ICUCurrencyDisplayInfoProvider");
+ temp = (CurrencyDisplayInfoProvider) clzz.newInstance();
+ } catch (Throwable t) {
+ temp = new CurrencyDisplayInfoProvider() {
+ public CurrencyDisplayInfo getInstance(ULocale locale, boolean withFallback) {
+ return DefaultInfo.getWithFallback(withFallback);
+ }
+
+ public boolean hasData() {
+ return false;
+ }
+ };
+ }
+ provider = temp;
+ }
+
+ public static class DefaultInfo extends CurrencyDisplayInfo {
+ private final boolean fallback;
+
+ private DefaultInfo(boolean fallback) {
+ this.fallback = fallback;
+ }
+
+ public static final CurrencyDisplayInfo getWithFallback(boolean fallback) {
+ return fallback ? FALLBACK_INSTANCE : NO_FALLBACK_INSTANCE;
+ }
+
+ @Override
+ public String getName(String isoCode) {
+ return fallback ? isoCode : null;
+ }
+
+ @Override
+ public String getPluralName(String isoCode, String pluralType) {
+ return fallback ? isoCode : null;
+ }
+
+ @Override
+ public String getSymbol(String isoCode) {
+ return fallback ? isoCode : null;
+ }
+
+ @Override
+ public Map symbolMap() {
+ return Collections.emptyMap();
+ }
+
+ @Override
+ public Map nameMap() {
+ return Collections.emptyMap();
+ }
+
+ @Override
+ public ULocale getLocale() {
+ return ULocale.ROOT;
+ }
+
+ @Override
+ public Map getUnitPatterns() {
+ if (fallback) {
+ return Collections.emptyMap();
+ }
+ return null;
+ }
+
+ @Override
+ public CurrencyFormatInfo getFormatInfo(String isoCode) {
+ return null;
+ }
+
+ @Override
+ public CurrencySpacingInfo getSpacingInfo() {
+ return fallback ? CurrencySpacingInfo.DEFAULT : null;
+ }
+
+ private static final CurrencyDisplayInfo FALLBACK_INSTANCE = new DefaultInfo(true);
+ private static final CurrencyDisplayInfo NO_FALLBACK_INSTANCE = new DefaultInfo(false);
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java b/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java
new file mode 100644
index 00000000000..18927a14b8c
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java
@@ -0,0 +1,257 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*/
+package com.ibm.icu.impl;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.math.BigInteger;
+import java.text.FieldPosition;
+import java.text.ParsePosition;
+import java.util.Arrays;
+import java.util.MissingResourceException;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.math.BigDecimal;
+import com.ibm.icu.text.NumberFormat;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+
+/*
+ * NumberFormat implementation dedicated/optimized for DateFormat,
+ * used by SimpleDateFormat implementation.
+ */
+public final class DateNumberFormat extends NumberFormat {
+
+ private static final long serialVersionUID = -6315692826916346953L;
+
+ private char[] digits;
+ private char zeroDigit; // For backwards compatibility
+ private char minusSign;
+ private boolean positiveOnly = false;
+
+ private transient char[] decimalBuf = new char[20]; // 20 digits is good enough to store Long.MAX_VALUE
+
+ private static SimpleCache CACHE = new SimpleCache();
+
+ private int maxIntDigits;
+ private int minIntDigits;
+
+ public DateNumberFormat(ULocale loc, String digitString, String nsName) {
+ initialize(loc,digitString,nsName);
+ }
+
+ public DateNumberFormat(ULocale loc, char zeroDigit, String nsName) {
+ StringBuffer buf = new StringBuffer();
+ for ( int i = 0 ; i < 10 ; i++ ) {
+ buf.append((char)(zeroDigit+i));
+ }
+ initialize(loc,buf.toString(),nsName);
+ }
+
+ private void initialize(ULocale loc,String digitString,String nsName) {
+ char[] elems = CACHE.get(loc);
+ if (elems == null) {
+ // Missed cache
+ String minusString;
+ ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, loc);
+ try {
+ minusString = rb.getStringWithFallback("NumberElements/"+nsName+"/symbols/minusSign");
+ } catch (MissingResourceException ex) {
+ if ( !nsName.equals("latn") ) {
+ try {
+ minusString = rb.getStringWithFallback("NumberElements/latn/symbols/minusSign");
+ } catch (MissingResourceException ex1) {
+ minusString = "-";
+ }
+ } else {
+ minusString = "-";
+ }
+ }
+ elems = new char[11];
+ for ( int i = 0 ; i < 10 ; i++ ) {
+ elems[i] = digitString.charAt(i);
+ }
+ elems[10] = minusString.charAt(0);
+ CACHE.put(loc, elems);
+ }
+
+ digits = new char[10];
+ System.arraycopy(elems, 0, digits, 0, 10);
+ zeroDigit = digits[0];
+
+ minusSign = elems[10];
+ }
+
+ public void setMaximumIntegerDigits(int newValue) {
+ maxIntDigits = newValue;
+ }
+
+ public int getMaximumIntegerDigits() {
+ return maxIntDigits;
+ }
+
+ public void setMinimumIntegerDigits(int newValue) {
+ minIntDigits = newValue;
+ }
+
+ public int getMinimumIntegerDigits() {
+ return minIntDigits;
+ }
+
+ /* For supporting SimpleDateFormat.parseInt */
+ public void setParsePositiveOnly(boolean isPositiveOnly) {
+ positiveOnly = isPositiveOnly;
+ }
+
+ public char getZeroDigit() {
+ return zeroDigit;
+ }
+
+ public void setZeroDigit(char zero) {
+ zeroDigit = zero;
+ if (digits == null) {
+ digits = new char[10];
+ }
+ digits[0] = zero;
+ for ( int i = 1 ; i < 10 ; i++ ) {
+ digits[i] = (char)(zero+i);
+ }
+ }
+
+ public char[] getDigits() {
+ return digits;
+ }
+
+ public StringBuffer format(double number, StringBuffer toAppendTo,
+ FieldPosition pos) {
+ throw new UnsupportedOperationException("StringBuffer format(double, StringBuffer, FieldPostion) is not implemented");
+ }
+
+ public StringBuffer format(long numberL, StringBuffer toAppendTo,
+ FieldPosition pos) {
+
+ if (numberL < 0) {
+ // negative
+ toAppendTo.append(minusSign);
+ numberL = -numberL;
+ }
+
+ // Note: NumberFormat used by DateFormat only uses int numbers.
+ // Remainder operation on 32bit platform using long is significantly slower
+ // than int. So, this method casts long number into int.
+ int number = (int)numberL;
+
+ int limit = decimalBuf.length < maxIntDigits ? decimalBuf.length : maxIntDigits;
+ int index = limit - 1;
+ while (true) {
+ decimalBuf[index] = digits[(number % 10)];
+ number /= 10;
+ if (index == 0 || number == 0) {
+ break;
+ }
+ index--;
+ }
+ int padding = minIntDigits - (limit - index);
+ for (; padding > 0; padding--) {
+ decimalBuf[--index] = digits[0];
+ }
+ int length = limit - index;
+ toAppendTo.append(decimalBuf, index, length);
+ pos.setBeginIndex(0);
+ if (pos.getField() == NumberFormat.INTEGER_FIELD) {
+ pos.setEndIndex(length);
+ } else {
+ pos.setEndIndex(0);
+ }
+ return toAppendTo;
+ }
+
+ public StringBuffer format(BigInteger number, StringBuffer toAppendTo,
+ FieldPosition pos) {
+ throw new UnsupportedOperationException("StringBuffer format(BigInteger, StringBuffer, FieldPostion) is not implemented");
+ }
+
+ public StringBuffer format(java.math.BigDecimal number, StringBuffer toAppendTo,
+ FieldPosition pos) {
+ throw new UnsupportedOperationException("StringBuffer format(BigDecimal, StringBuffer, FieldPostion) is not implemented");
+ }
+
+ public StringBuffer format(BigDecimal number,
+ StringBuffer toAppendTo, FieldPosition pos) {
+ throw new UnsupportedOperationException("StringBuffer format(BigDecimal, StringBuffer, FieldPostion) is not implemented");
+ }
+
+ /*
+ * Note: This method only parse integer numbers which can be represented by long
+ */
+ private static final long PARSE_THRESHOLD = 922337203685477579L; // (Long.MAX_VALUE / 10) - 1
+
+ public Number parse(String text, ParsePosition parsePosition) {
+ long num = 0;
+ boolean sawNumber = false;
+ boolean negative = false;
+ int base = parsePosition.getIndex();
+ int offset = 0;
+ for (; base + offset < text.length(); offset++) {
+ char ch = text.charAt(base + offset);
+ if (offset == 0 && ch == minusSign) {
+ if (positiveOnly) {
+ break;
+ }
+ negative = true;
+ } else {
+ int digit = ch - digits[0];
+ if (digit < 0 || 9 < digit) {
+ digit = UCharacter.digit(ch);
+ }
+ if (digit < 0 || 9 < digit) {
+ for ( digit = 0 ; digit < 10 ; digit++ ) {
+ if ( ch == digits[digit]) {
+ break;
+ }
+ }
+ }
+ if (0 <= digit && digit <= 9 && num < PARSE_THRESHOLD) {
+ sawNumber = true;
+ num = num * 10 + digit;
+ } else {
+ break;
+ }
+ }
+ }
+ Number result = null;
+ if (sawNumber) {
+ num = negative ? num * (-1) : num;
+ result = new Long(num);
+ parsePosition.setIndex(base + offset);
+ }
+ return result;
+ }
+
+ public boolean equals(Object obj) {
+ if (obj == null || !super.equals(obj) || !(obj instanceof DateNumberFormat)) {
+ return false;
+ }
+ DateNumberFormat other = (DateNumberFormat)obj;
+ return (this.maxIntDigits == other.maxIntDigits
+ && this.minIntDigits == other.minIntDigits
+ && this.minusSign == other.minusSign
+ && this.positiveOnly == other.positiveOnly
+ && Arrays.equals(this.digits, other.digits));
+ }
+
+ private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException {
+ stream.defaultReadObject();
+ if (digits == null) {
+ setZeroDigit(zeroDigit);
+ }
+ // re-allocate the work buffer
+ decimalBuf = new char[20];
+ }
+}
+
+//eof
diff --git a/main/classes/core/src/com/ibm/icu/impl/Differ.java b/main/classes/core/src/com/ibm/icu/impl/Differ.java
new file mode 100644
index 00000000000..069ccd16966
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/Differ.java
@@ -0,0 +1,172 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2009, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+
+package com.ibm.icu.impl;
+
+/** VERY Basic Diff program. Compares two sequences of objects fed into it, and
+ * lets you know where they are different.
+ * @author Mark Davis
+ * @version 1.0
+ */
+
+final public class Differ {
+// public static final String copyright =
+// "Copyright (C) 2010, International Business Machines Corporation and others. All Rights Reserved.";
+
+ /**
+ * @param stackSize The size of the largest difference you expect.
+ * @param matchCount The number of items that have to be the same to count as a match
+ */
+ @SuppressWarnings("unchecked")
+ public Differ(int stackSize, int matchCount) {
+ this.STACKSIZE = stackSize;
+ this.EQUALSIZE = matchCount;
+ a = (T[]) new Object[stackSize+matchCount];
+ b = (T[]) new Object[stackSize+matchCount];
+ }
+
+ public void add (T aStr, T bStr) {
+ addA(aStr);
+ addB(bStr);
+ }
+
+ public void addA (T aStr) {
+ flush();
+ a[aCount++] = aStr;
+ }
+
+ public void addB (T bStr) {
+ flush();
+ b[bCount++] = bStr;
+ }
+
+ public int getALine(int offset) {
+ return aLine + maxSame + offset;
+ }
+
+ public T getA(int offset) {
+ if (offset < 0) return last;
+ if (offset > aTop-maxSame) return next;
+ return a[offset];
+ }
+
+ public int getACount() {
+ return aTop-maxSame;
+ }
+
+ public int getBCount() {
+ return bTop-maxSame;
+ }
+
+ public int getBLine(int offset) {
+ return bLine + maxSame + offset;
+ }
+
+ public T getB(int offset) {
+ if (offset < 0) return last;
+ if (offset > bTop-maxSame) return next;
+ return b[offset];
+ }
+
+ public void checkMatch(boolean finalPass) {
+ // find the initial strings that are the same
+ int max = aCount;
+ if (max > bCount) max = bCount;
+ int i;
+ for (i = 0; i < max; ++i) {
+ if (!a[i].equals(b[i])) break;
+ }
+ // at this point, all items up to i are equal
+ maxSame = i;
+ aTop = bTop = maxSame;
+ if (maxSame > 0) last = a[maxSame-1];
+ next = null;
+
+ if (finalPass) {
+ aTop = aCount;
+ bTop = bCount;
+ next = null;
+ return;
+ }
+
+ if (aCount - maxSame < EQUALSIZE || bCount - maxSame < EQUALSIZE) return;
+
+ // now see if the last few a's occur anywhere in the b's, or vice versa
+ int match = find (a, aCount-EQUALSIZE, aCount, b, maxSame, bCount);
+ if (match != -1) {
+ aTop = aCount-EQUALSIZE;
+ bTop = match;
+ next = a[aTop];
+ return;
+ }
+ match = find (b, bCount-EQUALSIZE, bCount, a, maxSame, aCount);
+ if (match != -1) {
+ bTop = bCount-EQUALSIZE;
+ aTop = match;
+ next = b[bTop];
+ return;
+ }
+ if (aCount >= STACKSIZE || bCount >= STACKSIZE) {
+ // flush some of them
+ aCount = (aCount + maxSame) / 2;
+ bCount = (bCount + maxSame) / 2;
+ next = null;
+ }
+ }
+
+ /** Convenient utility
+ * finds a segment of the first array in the second array.
+ * @return -1 if not found, otherwise start position in b
+ */
+
+ public int find (T[] aArr, int aStart, int aEnd, T[] bArr, int bStart, int bEnd) {
+ int len = aEnd - aStart;
+ int bEndMinus = bEnd - len;
+ tryA:
+ for (int i = bStart; i <= bEndMinus; ++i) {
+ for (int j = 0; j < len; ++j) {
+ if (!bArr[i + j].equals(aArr[aStart + j])) continue tryA;
+ }
+ return i; // we have a match!
+ }
+ return -1;
+ }
+
+ // ====================== PRIVATES ======================
+
+ private void flush() {
+ if (aTop != 0) {
+ int newCount = aCount-aTop;
+ System.arraycopy(a, aTop, a, 0, newCount);
+ aCount = newCount;
+ aLine += aTop;
+ aTop = 0;
+ }
+
+ if (bTop != 0) {
+ int newCount = bCount-bTop;
+ System.arraycopy(b, bTop, b, 0, newCount);
+ bCount = newCount;
+ bLine += bTop;
+ bTop = 0;
+ }
+ }
+
+ private int STACKSIZE;
+ private int EQUALSIZE;
+
+ private T [] a;
+ private T [] b;
+ private T last = null;
+ private T next = null;
+ private int aCount = 0;
+ private int bCount = 0;
+ private int aLine = 1;
+ private int bLine = 1;
+ private int maxSame = 0, aTop = 0, bTop = 0;
+
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/Grego.java b/main/classes/core/src/com/ibm/icu/impl/Grego.java
new file mode 100644
index 00000000000..e5fbaccee88
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/Grego.java
@@ -0,0 +1,213 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2003-2008, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ * Partial port from ICU4C's Grego class in i18n/gregoimp.h.
+ *
+ * Methods ported, or moved here from OlsonTimeZone, initially
+ * for work on Jitterbug 5470:
+ * tzdata2006n Brazil incorrect fall-back date 2009-mar-01
+ * Only the methods necessary for that work are provided - this is not a full
+ * port of ICU4C's Grego class (yet).
+ *
+ * These utilities are used by both OlsonTimeZone and SimpleTimeZone.
+ */
+
+package com.ibm.icu.impl;
+
+import com.ibm.icu.util.Calendar;
+
+/**
+ * A utility class providing proleptic Gregorian calendar functions
+ * used by time zone and calendar code. Do not instantiate.
+ *
+ * Note: Unlike GregorianCalendar, all computations performed by this
+ * class occur in the pure proleptic GregorianCalendar.
+ */
+public class Grego {
+
+ // Max/min milliseconds
+ public static final long MIN_MILLIS = -184303902528000000L;
+ public static final long MAX_MILLIS = 183882168921600000L;
+
+ public static final int MILLIS_PER_SECOND = 1000;
+ public static final int MILLIS_PER_MINUTE = 60*MILLIS_PER_SECOND;
+ public static final int MILLIS_PER_HOUR = 60*MILLIS_PER_MINUTE;
+ public static final int MILLIS_PER_DAY = 24*MILLIS_PER_HOUR;
+
+ // January 1, 1 CE Gregorian
+ private static final int JULIAN_1_CE = 1721426;
+
+ // January 1, 1970 CE Gregorian
+ private static final int JULIAN_1970_CE = 2440588;
+
+ private static final int[] MONTH_LENGTH = new int[] {
+ 31,28,31,30,31,30,31,31,30,31,30,31,
+ 31,29,31,30,31,30,31,31,30,31,30,31
+ };
+
+ private static final int[] DAYS_BEFORE = new int[] {
+ 0,31,59,90,120,151,181,212,243,273,304,334,
+ 0,31,60,91,121,152,182,213,244,274,305,335 };
+
+ /**
+ * Return true if the given year is a leap year.
+ * @param year Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc.
+ * @return true if the year is a leap year
+ */
+ public static final boolean isLeapYear(int year) {
+ // year&0x3 == year%4
+ return ((year&0x3) == 0) && ((year%100 != 0) || (year%400 == 0));
+ }
+
+ /**
+ * Return the number of days in the given month.
+ * @param year Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc.
+ * @param month 0-based month, with 0==Jan
+ * @return the number of days in the given month
+ */
+ public static final int monthLength(int year, int month) {
+ return MONTH_LENGTH[month + (isLeapYear(year) ? 12 : 0)];
+ }
+
+ /**
+ * Return the length of a previous month of the Gregorian calendar.
+ * @param year Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc.
+ * @param month 0-based month, with 0==Jan
+ * @return the number of days in the month previous to the given month
+ */
+ public static final int previousMonthLength(int year, int month) {
+ return (month > 0) ? monthLength(year, month-1) : 31;
+ }
+
+ /**
+ * Convert a year, month, and day-of-month, given in the proleptic
+ * Gregorian calendar, to 1970 epoch days.
+ * @param year Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc.
+ * @param month 0-based month, with 0==Jan
+ * @param dom 1-based day of month
+ * @return the day number, with day 0 == Jan 1 1970
+ */
+ public static long fieldsToDay(int year, int month, int dom) {
+ int y = year - 1;
+ long julian =
+ 365 * y + floorDivide(y, 4) + (JULIAN_1_CE - 3) + // Julian cal
+ floorDivide(y, 400) - floorDivide(y, 100) + 2 + // => Gregorian cal
+ DAYS_BEFORE[month + (isLeapYear(year) ? 12 : 0)] + dom; // => month/dom
+ return julian - JULIAN_1970_CE; // JD => epoch day
+ }
+
+ /**
+ * Return the day of week on the 1970-epoch day
+ * @param day the 1970-epoch day (integral value)
+ * @return the day of week
+ */
+ public static int dayOfWeek(long day) {
+ long[] remainder = new long[1];
+ floorDivide(day + Calendar.THURSDAY, 7, remainder);
+ int dayOfWeek = (int)remainder[0];
+ dayOfWeek = (dayOfWeek == 0) ? 7 : dayOfWeek;
+ return dayOfWeek;
+ }
+
+ public static int[] dayToFields(long day, int[] fields) {
+ if (fields == null || fields.length < 5) {
+ fields = new int[5];
+ }
+ // Convert from 1970 CE epoch to 1 CE epoch (Gregorian calendar)
+ day += JULIAN_1970_CE - JULIAN_1_CE;
+
+ long[] rem = new long[1];
+ long n400 = floorDivide(day, 146097, rem);
+ long n100 = floorDivide(rem[0], 36524, rem);
+ long n4 = floorDivide(rem[0], 1461, rem);
+ long n1 = floorDivide(rem[0], 365, rem);
+
+ int year = (int)(400 * n400 + 100 * n100 + 4 * n4 + n1);
+ int dayOfYear = (int)rem[0];
+ if (n100 == 4 || n1 == 4) {
+ dayOfYear = 365; // Dec 31 at end of 4- or 400-yr cycle
+ }
+ else {
+ ++year;
+ }
+
+ boolean isLeap = isLeapYear(year);
+ int correction = 0;
+ int march1 = isLeap ? 60 : 59; // zero-based DOY for March 1
+ if (dayOfYear >= march1) {
+ correction = isLeap ? 1 : 2;
+ }
+ int month = (12 * (dayOfYear + correction) + 6) / 367; // zero-based month
+ int dayOfMonth = dayOfYear - DAYS_BEFORE[isLeap ? month + 12 : month] + 1; // one-based DOM
+ int dayOfWeek = (int)((day + 2) % 7); // day 0 is Monday(2)
+ if (dayOfWeek < 1 /* Sunday */) {
+ dayOfWeek += 7;
+ }
+ dayOfYear++; // 1-based day of year
+
+ fields[0] = year;
+ fields[1] = month;
+ fields[2] = dayOfMonth;
+ fields[3] = dayOfWeek;
+ fields[4] = dayOfYear;
+
+ return fields;
+ }
+
+ /*
+ * Convert long time to date/time fields
+ *
+ * result[0] : year
+ * result[1] : month
+ * result[2] : dayOfMonth
+ * result[3] : dayOfWeek
+ * result[4] : dayOfYear
+ * result[5] : millisecond in day
+ */
+ public static int[] timeToFields(long time, int[] fields) {
+ if (fields == null || fields.length < 6) {
+ fields = new int[6];
+ }
+ long[] remainder = new long[1];
+ long day = floorDivide(time, 24*60*60*1000 /* milliseconds per day */, remainder);
+ dayToFields(day, fields);
+ fields[5] = (int)remainder[0];
+ return fields;
+ }
+
+ public static long floorDivide(long numerator, long denominator) {
+ // We do this computation in order to handle
+ // a numerator of Long.MIN_VALUE correctly
+ return (numerator >= 0) ?
+ numerator / denominator :
+ ((numerator + 1) / denominator) - 1;
+ }
+
+ private static long floorDivide(long numerator, long denominator, long[] remainder) {
+ if (numerator >= 0) {
+ remainder[0] = numerator % denominator;
+ return numerator / denominator;
+ }
+ long quotient = ((numerator + 1) / denominator) - 1;
+ remainder[0] = numerator - (quotient * denominator);
+ return quotient;
+ }
+
+ /*
+ * Returns the ordinal number for the specified day of week in the month.
+ * The valid return value is 1, 2, 3, 4 or -1.
+ */
+ public static int getDayOfWeekInMonth(int year, int month, int dayOfMonth) {
+ int weekInMonth = (dayOfMonth + 6)/7;
+ if (weekInMonth == 4) {
+ if (dayOfMonth + 7 > monthLength(year, month)) {
+ weekInMonth = -1;
+ }
+ } else if (weekInMonth == 5) {
+ weekInMonth = -1;
+ }
+ return weekInMonth;
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java b/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java
new file mode 100644
index 00000000000..634f418d254
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java
@@ -0,0 +1,171 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import com.ibm.icu.util.VersionInfo;
+
+public final class ICUBinary
+{
+ // public inner interface ------------------------------------------------
+
+ /**
+ * Special interface for data authentication
+ */
+ public static interface Authenticate
+ {
+ /**
+ * Method used in ICUBinary.readHeader() to provide data format
+ * authentication.
+ * @param version version of the current data
+ * @return true if dataformat is an acceptable version, false otherwise
+ */
+ public boolean isDataVersionAcceptable(byte version[]);
+ }
+
+ // public methods --------------------------------------------------------
+
+ /**
+ * ICU data header reader method.
+ * Takes a ICU generated big-endian input stream, parse the ICU standard
+ * file header and authenticates them.
+ * Header format:
+ *
+ * Header size (char)
+ * Magic number 1 (byte)
+ * Magic number 2 (byte)
+ * Rest of the header size (char)
+ * Reserved word (char)
+ * Big endian indicator (byte)
+ * Character set family indicator (byte)
+ * Size of a char (byte) for c++ and c use
+ * Reserved byte (byte)
+ * Data format identifier (4 bytes), each ICU data has its own
+ * identifier to distinguish them. [0] major [1] minor
+ * [2] milli [3] micro
+ * Data version (4 bytes), the change version of the ICU data
+ * [0] major [1] minor [2] milli [3] micro
+ * Unicode version (4 bytes) this ICU is based on.
+ *
+ *
+ *
+ * Example of use:
+ *
+ * try {
+ * FileInputStream input = new FileInputStream(filename);
+ * If (Utility.readICUDataHeader(input, dataformat, dataversion,
+ * unicode) {
+ * System.out.println("Verified file header, this is a ICU data file");
+ * }
+ * } catch (IOException e) {
+ * System.out.println("This is not a ICU data file");
+ * }
+ *
+ *
+ * @param inputStream input stream that contains the ICU data header
+ * @param dataFormatIDExpected Data format expected. An array of 4 bytes
+ * information about the data format.
+ * E.g. data format ID 1.2.3.4. will became an array of
+ * {1, 2, 3, 4}
+ * @param authenticate user defined extra data authentication. This value
+ * can be null, if no extra authentication is needed.
+ * @exception IOException thrown if there is a read error or
+ * when header authentication fails.
+ */
+ public static final byte[] readHeader(InputStream inputStream,
+ byte dataFormatIDExpected[],
+ Authenticate authenticate)
+ throws IOException
+ {
+ DataInputStream input = new DataInputStream(inputStream);
+ char headersize = input.readChar();
+ int readcount = 2;
+ //reading the header format
+ byte magic1 = input.readByte();
+ readcount ++;
+ byte magic2 = input.readByte();
+ readcount ++;
+ if (magic1 != MAGIC1 || magic2 != MAGIC2) {
+ throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_);
+ }
+
+ input.readChar(); // reading size
+ readcount += 2;
+ input.readChar(); // reading reserved word
+ readcount += 2;
+ byte bigendian = input.readByte();
+ readcount ++;
+ byte charset = input.readByte();
+ readcount ++;
+ byte charsize = input.readByte();
+ readcount ++;
+ input.readByte(); // reading reserved byte
+ readcount ++;
+
+ byte dataFormatID[] = new byte[4];
+ input.readFully(dataFormatID);
+ readcount += 4;
+ byte dataVersion[] = new byte[4];
+ input.readFully(dataVersion);
+ readcount += 4;
+ byte unicodeVersion[] = new byte[4];
+ input.readFully(unicodeVersion);
+ readcount += 4;
+ if (headersize < readcount) {
+ throw new IOException("Internal Error: Header size error");
+ }
+ input.skipBytes(headersize - readcount);
+
+ if (bigendian != BIG_ENDIAN_ || charset != CHAR_SET_
+ || charsize != CHAR_SIZE_
+ || !Arrays.equals(dataFormatIDExpected, dataFormatID)
+ || (authenticate != null
+ && !authenticate.isDataVersionAcceptable(dataVersion))) {
+ throw new IOException(HEADER_AUTHENTICATION_FAILED_);
+ }
+ return unicodeVersion;
+ }
+
+ /**
+ * Same as readHeader(), but returns a VersionInfo rather than a byte[].
+ */
+ public static final VersionInfo readHeaderAndDataVersion(InputStream inputStream,
+ byte dataFormatIDExpected[],
+ Authenticate authenticate)
+ throws IOException {
+ byte[] dataVersion = readHeader(inputStream, dataFormatIDExpected, authenticate);
+ return VersionInfo.getInstance(dataVersion[0], dataVersion[1],
+ dataVersion[2], dataVersion[3]);
+ }
+
+ // private variables -------------------------------------------------
+
+ /**
+ * Magic numbers to authenticate the data file
+ */
+ private static final byte MAGIC1 = (byte)0xda;
+ private static final byte MAGIC2 = (byte)0x27;
+
+ /**
+ * File format authentication values
+ */
+ private static final byte BIG_ENDIAN_ = 1;
+ private static final byte CHAR_SET_ = 0;
+ private static final byte CHAR_SIZE_ = 2;
+
+ /**
+ * Error messages
+ */
+ private static final String MAGIC_NUMBER_AUTHENTICATION_FAILED_ =
+ "ICU data file error: Not an ICU data file";
+ private static final String HEADER_AUTHENTICATION_FAILED_ =
+ "ICU data file error: Header authentication failed, please check if you have a valid ICU data file";
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUBinaryStream.java b/main/classes/core/src/com/ibm/icu/impl/ICUBinaryStream.java
new file mode 100644
index 00000000000..f19da11900a
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUBinaryStream.java
@@ -0,0 +1,61 @@
+/*
+**********************************************************************
+* Copyright (c) 2002-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: November 5 2002
+* Since: ICU 2.4
+**********************************************************************
+*/
+package com.ibm.icu.impl;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * A DataInputStream that implements random-access seeking. For this
+ * to work, the size of the data stream must be known in advance, or
+ * the data must be supplied as a raw byte[] array.
+ *
+ * Seeking doesn't work directly on all streams. If a given stream
+ * doesn't support seeking, extract the bytes into a byte[] array and
+ * use the byte[] constructor.
+ */
+class ICUBinaryStream extends DataInputStream {
+
+ /**
+ * Construct a stream from the given stream and size.
+ * @param stream the stream of data
+ * @param size the number of bytes that should be made available
+ * for seeking. Bytes beyond this may be read, but seeking will
+ * not work for offset >= size.
+ */
+ public ICUBinaryStream(InputStream stream, int size) {
+ super(stream);
+ mark(size);
+ }
+
+ /**
+ * Construct a stream from the given raw bytes.
+ */
+ public ICUBinaryStream(byte[] raw) {
+ this(new ByteArrayInputStream(raw), raw.length);
+ }
+
+ /**
+ * Seek to the given offset. Offset is from the position of the
+ * stream passed to the constructor, or from the start of the
+ * byte[] array.
+ */
+ public void seek(int offset) throws IOException {
+ reset();
+ int actual = skipBytes(offset);
+ if (actual != offset) {
+ throw new IllegalStateException("Skip(" + offset + ") only skipped " +
+ actual + " bytes");
+ }
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUCache.java b/main/classes/core/src/com/ibm/icu/impl/ICUCache.java
new file mode 100644
index 00000000000..5e8a08c2e43
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUCache.java
@@ -0,0 +1,21 @@
+/*
+ ***************************************************************************
+ * Copyright (c) 2007-2009 International Business Machines Corporation and *
+ * others. All rights reserved. *
+ ***************************************************************************
+*/
+
+package com.ibm.icu.impl;
+
+public interface ICUCache {
+ // Type of reference holding the Map instance
+ public static final int SOFT = 0;
+ public static final int WEAK = 1;
+
+ // NULL object, which may be used for a cache key
+ public static final Object NULL = new Object();
+
+ public void clear();
+ public void put(K key, V value);
+ public V get(Object key);
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUConfig.java b/main/classes/core/src/com/ibm/icu/impl/ICUConfig.java
new file mode 100644
index 00000000000..b875286fffe
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUConfig.java
@@ -0,0 +1,77 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.AccessControlException;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.MissingResourceException;
+import java.util.Properties;
+
+/**
+ * ICUConfig is a class used for accessing ICU4J runtime configuration.
+ */
+public class ICUConfig {
+ public static final String CONFIG_PROPS_FILE = "/com/ibm/icu/ICUConfig.properties";
+ private static final Properties CONFIG_PROPS;
+
+ static {
+ CONFIG_PROPS = new Properties();
+ try {
+ InputStream is = ICUData.getStream(CONFIG_PROPS_FILE);
+ if (is != null) {
+ CONFIG_PROPS.load(is);
+ }
+ } catch (MissingResourceException mre) {
+ // If it does not exist, ignore.
+ } catch (IOException ioe) {
+ // Any IO errors, ignore
+ }
+ }
+
+ /**
+ * Get ICU configuration property value for the given name.
+ * @param name The configuration property name
+ * @return The configuration property value, or null if it does not exist.
+ */
+ public static String get(String name) {
+ return get(name, null);
+ }
+
+ /**
+ * Get ICU configuration property value for the given name.
+ * @param name The configuration property name
+ * @param def The default value
+ * @return The configuration property value. If the property does not
+ * exist, def
is returned.
+ */
+ public static String get(String name, String def) {
+ String val = null;
+ final String fname = name;
+ if (System.getSecurityManager() != null) {
+ try {
+ val = AccessController.doPrivileged(new PrivilegedAction() {
+ public String run() {
+ return System.getProperty(fname);
+ }
+ });
+ } catch (AccessControlException e) {
+ // ignore
+ // TODO log this message
+ }
+ } else {
+ val = System.getProperty(name);
+ }
+
+ if (val == null) {
+ val = CONFIG_PROPS.getProperty(name, def);
+ }
+ return val;
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUData.java b/main/classes/core/src/com/ibm/icu/impl/ICUData.java
new file mode 100644
index 00000000000..b47b278db85
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUData.java
@@ -0,0 +1,113 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2004-2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ * Created on Feb 4, 2004
+ *
+ */
+package com.ibm.icu.impl;
+
+import java.io.InputStream;
+import java.net.URL;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.MissingResourceException;
+
+/**
+ * Provides access to ICU data files as InputStreams. Implements security checking.
+ */
+public final class ICUData {
+ /*
+ * Return a URL to the ICU resource names resourceName. The
+ * resource name should either be an absolute path, or a path relative to
+ * com.ibm.icu.impl (e.g., most likely it is 'data/foo'). If required
+ * is true, throw an MissingResourceException instead of returning a null result.
+ */
+ public static boolean exists(final String resourceName) {
+ URL i = null;
+ if (System.getSecurityManager() != null) {
+ i = AccessController.doPrivileged(new PrivilegedAction() {
+ public URL run() {
+ return ICUData.class.getResource(resourceName);
+ }
+ });
+ } else {
+ i = ICUData.class.getResource(resourceName);
+ }
+ return i != null;
+ }
+
+ private static InputStream getStream(final Class> root, final String resourceName, boolean required) {
+ InputStream i = null;
+
+ if (System.getSecurityManager() != null) {
+ i = AccessController.doPrivileged(new PrivilegedAction() {
+ public InputStream run() {
+ return root.getResourceAsStream(resourceName);
+ }
+ });
+ } else {
+ i = root.getResourceAsStream(resourceName);
+ }
+
+ if (i == null && required) {
+ throw new MissingResourceException("could not locate data " +resourceName, root.getPackage().getName(), resourceName);
+ }
+ return i;
+ }
+
+ private static InputStream getStream(final ClassLoader loader, final String resourceName, boolean required) {
+ InputStream i = null;
+ if (System.getSecurityManager() != null) {
+ i = AccessController.doPrivileged(new PrivilegedAction() {
+ public InputStream run() {
+ return loader.getResourceAsStream(resourceName);
+ }
+ });
+ } else {
+ i = loader.getResourceAsStream(resourceName);
+ }
+ if (i == null && required) {
+ throw new MissingResourceException("could not locate data", loader.toString(), resourceName);
+ }
+ return i;
+ }
+
+ public static InputStream getStream(ClassLoader loader, String resourceName){
+ return getStream(loader,resourceName, false);
+ }
+
+ public static InputStream getRequiredStream(ClassLoader loader, String resourceName){
+ return getStream(loader, resourceName, true);
+ }
+
+ /*
+ * Convenience override that calls getStream(ICUData.class, resourceName, false);
+ */
+ public static InputStream getStream(String resourceName) {
+ return getStream(ICUData.class, resourceName, false);
+ }
+
+ /*
+ * Convenience method that calls getStream(ICUData.class, resourceName, true).
+ */
+ public static InputStream getRequiredStream(String resourceName) {
+ return getStream(ICUData.class, resourceName, true);
+ }
+
+ /*
+ * Convenience override that calls getStream(root, resourceName, false);
+ */
+ public static InputStream getStream(Class> root, String resourceName) {
+ return getStream(root, resourceName, false);
+ }
+
+ /*
+ * Convenience method that calls getStream(root, resourceName, true).
+ */
+ public static InputStream getRequiredStream(Class> root, String resourceName) {
+ return getStream(root, resourceName, true);
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUDataVersion.java b/main/classes/core/src/com/ibm/icu/impl/ICUDataVersion.java
new file mode 100644
index 00000000000..5c1f02ac44b
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUDataVersion.java
@@ -0,0 +1,89 @@
+/*
+*******************************************************************************
+* Copyright (C) 2009-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*/
+
+package com.ibm.icu.impl;
+
+import java.util.MissingResourceException;
+
+import com.ibm.icu.util.UResourceBundle;
+import com.ibm.icu.util.VersionInfo;
+
+public final class ICUDataVersion {
+ private static final String U_ICU_VERSION_BUNDLE = "icuver";
+ private static final String U_ICU_STD_BUNDLE = "icustd";
+
+ private static final String U_ICU_DATA_KEY = "DataVersion";
+
+ /**
+ * This function loads up icuver and compares the data version to the wired-in ICU_DATA_VERSION.
+ * If icuver shows something less than ICU_DATA_VERSION it returns TRUE, else FALSE. The version
+ * found will be returned in the first fillin parameter (if non-null), and *isModified will be set
+ * to TRUE if "icustd" is NOT found. Thus, if the data has been repackaged or modified, "icustd"
+ * (standard ICU) will be missing, and the function will alert the caller that the data is not standard.
+ *
+ * @param dataVersionFillin icuver data version information to be filled in if not-null
+ * @return TRUE if ICU_DATA_VERSION is newer than icuver, else FALSE
+ */
+ public static boolean isDataOlder(VersionInfo dataVersionFillin) {
+ boolean result = true;
+
+ VersionInfo dataVersion = getDataVersion();
+
+ if (dataVersion!= null) {
+ if (dataVersion.compareTo(VersionInfo.ICU_DATA_VERSION) != -1) {
+ result = false;
+ }
+
+ if (dataVersionFillin != null) {
+ dataVersionFillin = VersionInfo.getInstance(dataVersion.toString());
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * This function tests whether "icustd" is available in the data. If the data has been repackaged or modified, "icustd"
+ * (standard ICU) will be missing, and the function will alert the caller that the data is not standard.
+ *
+ * @return TRUE if data has been modified, else FALSE
+ */
+ public static boolean isDataModified() {
+ if (hasICUSTDBundle()) {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * This function retrieves the data version from icuver and returns a VersionInfo object with that version information.
+ *
+ * @return Current icu data version
+ */
+ public static VersionInfo getDataVersion() {
+ UResourceBundle icudatares = null;
+ try {
+ icudatares = UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME, ICUDataVersion.U_ICU_VERSION_BUNDLE, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
+ icudatares = icudatares.get(ICUDataVersion.U_ICU_DATA_KEY);
+ } catch (MissingResourceException ex) {
+ return null;
+ }
+
+ return VersionInfo.getInstance(icudatares.getString());
+ }
+
+ private static boolean hasICUSTDBundle() {
+ try {
+ UResourceBundle.getBundleInstance(ICUDataVersion.U_ICU_STD_BUNDLE);
+ } catch (MissingResourceException ex) {
+ return false;
+ }
+
+ return true;
+ }
+
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUDebug.java b/main/classes/core/src/com/ibm/icu/impl/ICUDebug.java
new file mode 100644
index 00000000000..0076ee75c32
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUDebug.java
@@ -0,0 +1,129 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import com.ibm.icu.util.VersionInfo;
+
+public final class ICUDebug {
+ private static String params;
+ static {
+ try {
+ params = System.getProperty("ICUDebug");
+ }
+ catch (SecurityException e) {
+ }
+ }
+ private static boolean debug = params != null;
+ private static boolean help = debug && (params.equals("") || params.indexOf("help") != -1);
+
+ static {
+ if (debug) {
+ System.out.println("\nICUDebug=" + params);
+ }
+ }
+
+ public static final String javaVersionString = System.getProperty("java.version", "0");
+ public static final boolean isJDK14OrHigher;
+ public static final VersionInfo javaVersion;
+
+ public static VersionInfo getInstanceLenient(String s) {
+ // Extracting ASCII numbers up to 4 delimited by
+ // any non digit characters
+ int[] ver = new int[4];
+ boolean numeric = false;
+ int i = 0, vidx = 0;
+ while (i < s.length()) {
+ char c = s.charAt(i++);
+ if (c < '0' || c > '9') {
+ if (numeric) {
+ if (vidx == 3) {
+ // up to 4 numbers
+ break;
+ }
+ numeric = false;
+ vidx++;
+ }
+ } else {
+ if (numeric) {
+ ver[vidx] = ver[vidx] * 10 + (c - '0');
+ if (ver[vidx] > 255) {
+ // VersionInfo does not support numbers
+ // greater than 255. In such case, we
+ // ignore the number and the rest
+ ver[vidx] = 0;
+ break;
+ }
+ } else {
+ numeric = true;
+ ver[vidx] = c - '0';
+ }
+ }
+ }
+
+ return VersionInfo.getInstance(ver[0], ver[1], ver[2], ver[3]);
+ }
+
+ static {
+ javaVersion = getInstanceLenient(javaVersionString);
+
+ VersionInfo java14Version = VersionInfo.getInstance("1.4.0");
+
+ isJDK14OrHigher = javaVersion.compareTo(java14Version) >= 0;
+ }
+
+ public static boolean enabled() {
+ return debug;
+ }
+
+ public static boolean enabled(String arg) {
+ if (debug) {
+ boolean result = params.indexOf(arg) != -1;
+ if (help) System.out.println("\nICUDebug.enabled(" + arg + ") = " + result);
+ return result;
+ }
+ return false;
+ }
+
+ public static String value(String arg) {
+ String result = "false";
+ if (debug) {
+ int index = params.indexOf(arg);
+ if (index != -1) {
+ index += arg.length();
+ if (params.length() > index && params.charAt(index) == '=') {
+ index += 1;
+ int limit = params.indexOf(",", index);
+ result = params.substring(index, limit == -1 ? params.length() : limit);
+ } else {
+ result = "true";
+ }
+ }
+
+ if (help) System.out.println("\nICUDebug.value(" + arg + ") = " + result);
+ }
+ return result;
+ }
+
+// static public void main(String[] args) {
+// // test
+// String[] tests = {
+// "1.3.0",
+// "1.3.0_02",
+// "1.3.1ea",
+// "1.4.1b43",
+// "___41___5",
+// "x1.4.51xx89ea.7f",
+// "1.6_2009",
+// "10-100-1000-10000",
+// "beta",
+// "0",
+// };
+// for (int i = 0; i < tests.length; ++i) {
+// System.out.println(tests[i] + " => " + getInstanceLenient(tests[i]));
+// }
+// }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICULocaleService.java b/main/classes/core/src/com/ibm/icu/impl/ICULocaleService.java
new file mode 100644
index 00000000000..22bbbdbb641
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICULocaleService.java
@@ -0,0 +1,615 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.Collections;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+import com.ibm.icu.util.ULocale;
+
+public class ICULocaleService extends ICUService {
+ private ULocale fallbackLocale;
+ private String fallbackLocaleName;
+
+ /**
+ * Construct an ICULocaleService.
+ */
+ public ICULocaleService() {
+ }
+
+ /**
+ * Construct an ICULocaleService with a name (useful for debugging).
+ */
+ public ICULocaleService(String name) {
+ super(name);
+ }
+
+ /**
+ * Convenience override for callers using locales. This calls
+ * get(ULocale, int, ULocale[]) with KIND_ANY for kind and null for
+ * actualReturn.
+ */
+ public Object get(ULocale locale) {
+ return get(locale, LocaleKey.KIND_ANY, null);
+ }
+
+ /**
+ * Convenience override for callers using locales. This calls
+ * get(ULocale, int, ULocale[]) with a null actualReturn.
+ */
+ public Object get(ULocale locale, int kind) {
+ return get(locale, kind, null);
+ }
+
+ /**
+ * Convenience override for callers using locales. This calls
+ * get(ULocale, int, ULocale[]) with KIND_ANY for kind.
+ */
+ public Object get(ULocale locale, ULocale[] actualReturn) {
+ return get(locale, LocaleKey.KIND_ANY, actualReturn);
+ }
+
+ /**
+ * Convenience override for callers using locales. This uses
+ * createKey(ULocale.toString(), kind) to create a key, calls getKey, and then
+ * if actualReturn is not null, returns the actualResult from
+ * getKey (stripping any prefix) into a ULocale.
+ */
+ public Object get(ULocale locale, int kind, ULocale[] actualReturn) {
+ Key key = createKey(locale, kind);
+ if (actualReturn == null) {
+ return getKey(key);
+ }
+
+ String[] temp = new String[1];
+ Object result = getKey(key, temp);
+ if (result != null) {
+ int n = temp[0].indexOf("/");
+ if (n >= 0) {
+ temp[0] = temp[0].substring(n+1);
+ }
+ actualReturn[0] = new ULocale(temp[0]);
+ }
+ return result;
+ }
+
+ /**
+ * Convenience override for callers using locales. This calls
+ * registerObject(Object, ULocale, int kind, boolean visible)
+ * passing KIND_ANY for the kind, and true for the visibility.
+ */
+ public Factory registerObject(Object obj, ULocale locale) {
+ return registerObject(obj, locale, LocaleKey.KIND_ANY, true);
+ }
+
+ /**
+ * Convenience override for callers using locales. This calls
+ * registerObject(Object, ULocale, int kind, boolean visible)
+ * passing KIND_ANY for the kind.
+ */
+ public Factory registerObject(Object obj, ULocale locale, boolean visible) {
+ return registerObject(obj, locale, LocaleKey.KIND_ANY, visible);
+ }
+
+ /**
+ * Convenience function for callers using locales. This calls
+ * registerObject(Object, ULocale, int kind, boolean visible)
+ * passing true for the visibility.
+ */
+ public Factory registerObject(Object obj, ULocale locale, int kind) {
+ return registerObject(obj, locale, kind, true);
+ }
+
+ /**
+ * Convenience function for callers using locales. This instantiates
+ * a SimpleLocaleKeyFactory, and registers the factory.
+ */
+ public Factory registerObject(Object obj, ULocale locale, int kind, boolean visible) {
+ Factory factory = new SimpleLocaleKeyFactory(obj, locale, kind, visible);
+ return registerFactory(factory);
+ }
+
+ /**
+ * Convenience method for callers using locales. This returns the standard
+ * Locale list, built from the Set of visible ids.
+ */
+ public Locale[] getAvailableLocales() {
+ // TODO make this wrap getAvailableULocales later
+ Set visIDs = getVisibleIDs();
+ Locale[] locales = new Locale[visIDs.size()];
+ int n = 0;
+ for (String id : visIDs) {
+ Locale loc = LocaleUtility.getLocaleFromName(id);
+ locales[n++] = loc;
+ }
+ return locales;
+ }
+
+ /**
+ * Convenience method for callers using locales. This returns the standard
+ * ULocale list, built from the Set of visible ids.
+ */
+ public ULocale[] getAvailableULocales() {
+ Set visIDs = getVisibleIDs();
+ ULocale[] locales = new ULocale[visIDs.size()];
+ int n = 0;
+ for (String id : visIDs) {
+ locales[n++] = new ULocale(id);
+ }
+ return locales;
+ }
+
+ /**
+ * A subclass of Key that implements a locale fallback mechanism.
+ * The first locale to search for is the locale provided by the
+ * client, and the fallback locale to search for is the current
+ * default locale. If a prefix is present, the currentDescriptor
+ * includes it before the locale proper, separated by "/". This
+ * is the default key instantiated by ICULocaleService.
+ *
+ * Canonicalization adjusts the locale string so that the
+ * section before the first understore is in lower case, and the rest
+ * is in upper case, with no trailing underscores.
+ */
+ public static class LocaleKey extends ICUService.Key {
+ private int kind;
+ private int varstart;
+ private String primaryID;
+ private String fallbackID;
+ private String currentID;
+
+ public static final int KIND_ANY = -1;
+
+ /**
+ * Create a LocaleKey with canonical primary and fallback IDs.
+ */
+ public static LocaleKey createWithCanonicalFallback(String primaryID, String canonicalFallbackID) {
+ return createWithCanonicalFallback(primaryID, canonicalFallbackID, KIND_ANY);
+ }
+
+ /**
+ * Create a LocaleKey with canonical primary and fallback IDs.
+ */
+ public static LocaleKey createWithCanonicalFallback(String primaryID, String canonicalFallbackID, int kind) {
+ if (primaryID == null) {
+ return null;
+ }
+ String canonicalPrimaryID = ULocale.getName(primaryID);
+ return new LocaleKey(primaryID, canonicalPrimaryID, canonicalFallbackID, kind);
+ }
+
+ /**
+ * Create a LocaleKey with canonical primary and fallback IDs.
+ */
+ public static LocaleKey createWithCanonical(ULocale locale, String canonicalFallbackID, int kind) {
+ if (locale == null) {
+ return null;
+ }
+ String canonicalPrimaryID = locale.getName();
+ return new LocaleKey(canonicalPrimaryID, canonicalPrimaryID, canonicalFallbackID, kind);
+ }
+
+ /**
+ * PrimaryID is the user's requested locale string,
+ * canonicalPrimaryID is this string in canonical form,
+ * fallbackID is the current default locale's string in
+ * canonical form.
+ */
+ protected LocaleKey(String primaryID, String canonicalPrimaryID, String canonicalFallbackID, int kind) {
+ super(primaryID);
+ this.kind = kind;
+
+ if (canonicalPrimaryID == null || canonicalPrimaryID.equalsIgnoreCase("root")) {
+ this.primaryID = "";
+ this.fallbackID = null;
+ } else {
+ int idx = canonicalPrimaryID.indexOf('@');
+ if (idx == 4 && canonicalPrimaryID.regionMatches(true, 0, "root", 0, 4)) {
+ this.primaryID = canonicalPrimaryID.substring(4);
+ this.varstart = 0;
+ this.fallbackID = null;
+ } else {
+ this.primaryID = canonicalPrimaryID;
+ this.varstart = idx;
+
+ if (canonicalFallbackID == null || this.primaryID.equals(canonicalFallbackID)) {
+ this.fallbackID = "";
+ } else {
+ this.fallbackID = canonicalFallbackID;
+ }
+ }
+ }
+
+ this.currentID = varstart == -1 ? this.primaryID : this.primaryID.substring(0, varstart);
+ }
+
+ /**
+ * Return the prefix associated with the kind, or null if the kind is KIND_ANY.
+ */
+ public String prefix() {
+ return kind == KIND_ANY ? null : Integer.toString(kind());
+ }
+
+ /**
+ * Return the kind code associated with this key.
+ */
+ public int kind() {
+ return kind;
+ }
+
+ /**
+ * Return the (canonical) original ID.
+ */
+ public String canonicalID() {
+ return primaryID;
+ }
+
+ /**
+ * Return the (canonical) current ID, or null if no current id.
+ */
+ public String currentID() {
+ return currentID;
+ }
+
+ /**
+ * Return the (canonical) current descriptor, or null if no current id.
+ * Includes the keywords, whereas the ID does not include keywords.
+ */
+ public String currentDescriptor() {
+ String result = currentID();
+ if (result != null) {
+ StringBuilder buf = new StringBuilder(); // default capacity 16 is usually good enough
+ if (kind != KIND_ANY) {
+ buf.append(prefix());
+ }
+ buf.append('/');
+ buf.append(result);
+ if (varstart != -1) {
+ buf.append(primaryID.substring(varstart, primaryID.length()));
+ }
+ result = buf.toString();
+ }
+ return result;
+ }
+
+ /**
+ * Convenience method to return the locale corresponding to the (canonical) original ID.
+ */
+ public ULocale canonicalLocale() {
+ return new ULocale(primaryID);
+ }
+
+ /**
+ * Convenience method to return the ulocale corresponding to the (canonical) currentID.
+ */
+ public ULocale currentLocale() {
+ if (varstart == -1) {
+ return new ULocale(currentID);
+ } else {
+ return new ULocale(currentID + primaryID.substring(varstart));
+ }
+ }
+
+ /**
+ * If the key has a fallback, modify the key and return true,
+ * otherwise return false.
+ *
+ * First falls back through the primary ID, then through
+ * the fallbackID. The final fallback is "" (root)
+ * unless the primary id was "" (root), in which case
+ * there is no fallback.
+ */
+ public boolean fallback() {
+ int x = currentID.lastIndexOf('_');
+ if (x != -1) {
+ while (--x >= 0 && currentID.charAt(x) == '_') { // handle zh__PINYIN
+ }
+ currentID = currentID.substring(0, x+1);
+ return true;
+ }
+ if (fallbackID != null) {
+ currentID = fallbackID;
+ if (fallbackID.length() == 0) {
+ fallbackID = null;
+ } else {
+ fallbackID = "";
+ }
+ return true;
+ }
+ currentID = null;
+ return false;
+ }
+
+ /**
+ * If a key created from id would eventually fallback to match the
+ * canonical ID of this key, return true.
+ */
+ public boolean isFallbackOf(String id) {
+ return LocaleUtility.isFallbackOf(canonicalID(), id);
+ }
+ }
+
+ /**
+ * A subclass of Factory that uses LocaleKeys. If 'visible' the
+ * factory reports its IDs.
+ */
+ public static abstract class LocaleKeyFactory implements Factory {
+ protected final String name;
+ protected final boolean visible;
+
+ public static final boolean VISIBLE = true;
+ public static final boolean INVISIBLE = false;
+
+ /**
+ * Constructor used by subclasses.
+ */
+ protected LocaleKeyFactory(boolean visible) {
+ this.visible = visible;
+ this.name = null;
+ }
+
+ /**
+ * Constructor used by subclasses.
+ */
+ protected LocaleKeyFactory(boolean visible, String name) {
+ this.visible = visible;
+ this.name = name;
+ }
+
+ /**
+ * Implement superclass abstract method. This checks the currentID of
+ * the key against the supported IDs, and passes the canonicalLocale and
+ * kind off to handleCreate (which subclasses must implement).
+ */
+ public Object create(Key key, ICUService service) {
+ if (handlesKey(key)) {
+ LocaleKey lkey = (LocaleKey)key;
+ int kind = lkey.kind();
+
+ ULocale uloc = lkey.currentLocale();
+ return handleCreate(uloc, kind, service);
+ } else {
+ // System.out.println("factory: " + this + " did not support id: " + key.currentID());
+ // System.out.println("supported ids: " + getSupportedIDs());
+ }
+ return null;
+ }
+
+ protected boolean handlesKey(Key key) {
+ if (key != null) {
+ String id = key.currentID();
+ Set supported = getSupportedIDs();
+ return supported.contains(id);
+ }
+ return false;
+ }
+
+ /**
+ * Override of superclass method.
+ */
+ public void updateVisibleIDs(Map result) {
+ Set cache = getSupportedIDs();
+ for (String id : cache) {
+ if (visible) {
+ result.put(id, this);
+ } else {
+ result.remove(id);
+ }
+ }
+ }
+
+ /**
+ * Return a localized name for the locale represented by id.
+ */
+ public String getDisplayName(String id, ULocale locale) {
+ // assume if the user called this on us, we must have handled some fallback of this id
+ // if (isSupportedID(id)) {
+ if (locale == null) {
+ return id;
+ }
+ ULocale loc = new ULocale(id);
+ return loc.getDisplayName(locale);
+ // }
+ // return null;
+ }
+
+ ///CLOVER:OFF
+ /**
+ * Utility method used by create(Key, ICUService). Subclasses can
+ * implement this instead of create.
+ */
+ protected Object handleCreate(ULocale loc, int kind, ICUService service) {
+ return null;
+ }
+ ///CLOVER:ON
+
+ /**
+ * Return true if this id is one the factory supports (visible or
+ * otherwise).
+ */
+ protected boolean isSupportedID(String id) {
+ return getSupportedIDs().contains(id);
+ }
+
+ /**
+ * Return the set of ids that this factory supports (visible or
+ * otherwise). This can be called often and might need to be
+ * cached if it is expensive to create.
+ */
+ protected Set getSupportedIDs() {
+ return Collections.emptySet();
+ }
+
+ /**
+ * For debugging.
+ */
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ if (name != null) {
+ buf.append(", name: ");
+ buf.append(name);
+ }
+ buf.append(", visible: ");
+ buf.append(visible);
+ return buf.toString();
+ }
+ }
+
+ /**
+ * A LocaleKeyFactory that just returns a single object for a kind/locale.
+ */
+ public static class SimpleLocaleKeyFactory extends LocaleKeyFactory {
+ private final Object obj;
+ private final String id;
+ private final int kind;
+
+ // TODO: remove when we no longer need this
+ public SimpleLocaleKeyFactory(Object obj, ULocale locale, int kind, boolean visible) {
+ this(obj, locale, kind, visible, null);
+ }
+
+ public SimpleLocaleKeyFactory(Object obj, ULocale locale, int kind, boolean visible, String name) {
+ super(visible, name);
+
+ this.obj = obj;
+ this.id = locale.getBaseName();
+ this.kind = kind;
+ }
+
+ /**
+ * Returns the service object if kind/locale match. Service is not used.
+ */
+ public Object create(Key key, ICUService service) {
+ LocaleKey lkey = (LocaleKey)key;
+ if (kind == LocaleKey.KIND_ANY || kind == lkey.kind()) {
+ String keyID = lkey.currentID();
+ if (id.equals(keyID)) {
+ return obj;
+ }
+ }
+ return null;
+ }
+
+ protected boolean isSupportedID(String idToCheck) {
+ return this.id.equals(idToCheck);
+ }
+
+ public void updateVisibleIDs(Map result) {
+ if (visible) {
+ result.put(id, this);
+ } else {
+ result.remove(id);
+ }
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ buf.append(", id: ");
+ buf.append(id);
+ buf.append(", kind: ");
+ buf.append(kind);
+ return buf.toString();
+ }
+ }
+
+ /**
+ * A LocaleKeyFactory that creates a service based on the ICU locale data.
+ * This is a base class for most ICU factories. Subclasses instantiate it
+ * with a constructor that takes a bundle name, which determines the supported
+ * IDs. Subclasses then override handleCreate to create the actual service
+ * object. The default implementation returns a resource bundle.
+ */
+ public static class ICUResourceBundleFactory extends LocaleKeyFactory {
+ protected final String bundleName;
+
+ /**
+ * Convenience constructor that uses the main ICU bundle name.
+ */
+ public ICUResourceBundleFactory() {
+ this(ICUResourceBundle.ICU_BASE_NAME);
+ }
+
+ /**
+ * A service factory based on ICU resource data in resources
+ * with the given name.
+ */
+ public ICUResourceBundleFactory(String bundleName) {
+ super(true);
+
+ this.bundleName = bundleName;
+ }
+
+ /**
+ * Return the supported IDs. This is the set of all locale names for the bundleName.
+ */
+ protected Set getSupportedIDs() {
+ return ICUResourceBundle.getFullLocaleNameSet(bundleName, loader());
+ }
+
+ /**
+ * Override of superclass method.
+ */
+ public void updateVisibleIDs(Map result) {
+ Set visibleIDs = ICUResourceBundle.getAvailableLocaleNameSet(bundleName, loader()); // only visible ids
+ for (String id : visibleIDs) {
+ result.put(id, this);
+ }
+ }
+
+ /**
+ * Create the service. The default implementation returns the resource bundle
+ * for the locale, ignoring kind, and service.
+ */
+ protected Object handleCreate(ULocale loc, int kind, ICUService service) {
+ return ICUResourceBundle.getBundleInstance(bundleName, loc, loader());
+ }
+
+ protected ClassLoader loader() {
+ ClassLoader cl = getClass().getClassLoader();
+ if (cl == null) {
+ cl = Utility.getFallbackClassLoader();
+ }
+ return cl;
+ }
+
+ public String toString() {
+ return super.toString() + ", bundle: " + bundleName;
+ }
+ }
+
+ /**
+ * Return the name of the current fallback locale. If it has changed since this was
+ * last accessed, the service cache is cleared.
+ */
+ public String validateFallbackLocale() {
+ ULocale loc = ULocale.getDefault();
+ if (loc != fallbackLocale) {
+ synchronized (this) {
+ if (loc != fallbackLocale) {
+ fallbackLocale = loc;
+ fallbackLocaleName = loc.getBaseName();
+ clearServiceCache();
+ }
+ }
+ }
+ return fallbackLocaleName;
+ }
+
+ public Key createKey(String id) {
+ return LocaleKey.createWithCanonicalFallback(id, validateFallbackLocale());
+ }
+
+ public Key createKey(String id, int kind) {
+ return LocaleKey.createWithCanonicalFallback(id, validateFallbackLocale(), kind);
+ }
+
+ public Key createKey(ULocale l, int kind) {
+ return LocaleKey.createWithCanonical(l, validateFallbackLocale(), kind);
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICULogger.java b/main/classes/core/src/com/ibm/icu/impl/ICULogger.java
new file mode 100644
index 00000000000..4ccd8067421
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICULogger.java
@@ -0,0 +1,190 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2009-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.logging.ConsoleHandler;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ *
+ * Extends the Java Logger class adding a method to turn off/on logging.
+ * Classes where logging is wanted contains a static ICULogger object
+ * with logging turned off by default unless the system property
+ * "icu4j.debug.logging" is set to "all"
+ *
+ * If "icu4j.debug.logging" is not set to "all", then the individual loggers needs
+ * to be turned on manually. (e.g. TimeZone.TimeZoneLogger.turnLoggingOn())
+ *
+ * To use logging, the system property "icu4j.debug.logging" must be set to "on" or "all",
+ * otherwise the static ICULogger object will be null. This will help lower any unneccessary
+ * resource usage when logging is not desired.
+ *
+ * Examples :
+ * Usage in code
+ *
+ *
+ * public class Class {
+ * // Create logger object (usually with the class name)
+ * public static ICULogger ClassLogger = ICULogger.getICULogger(Class.class.getName());
+ *
+ * // Method that will use logger.
+ * public boolean hasSomething(Object obj) {
+ * if (obj == null) {
+ * // Log that obj is null.
+ * // Note: Good to check for null and if logging is turned on to minimize resource usage when logging is not needed.
+ * if (ClassLogger != null && ClassLogger.isLoggingOn()) {
+ * ClassLogger.warning("obj is null so false was returned by default.");
+ * }
+ * return false;
+ * }
+ *
+ * ...
+ *
+ * }
+ * }
+ *
+ *
+ * Turning on logging (using the default settings)
+ *
+ *
+ * java -Dicu4j.debug.logging=all program
+ *
+ *
+ */
+
+public class ICULogger extends Logger {
+ private static enum LOGGER_STATUS { ON, OFF, NULL };
+ private static final String GLOBAL_FLAG_TURN_ON_LOGGING = "all";
+ private static final String SYSTEM_PROP_LOGGER = "icu4j.debug.logging";
+
+ private LOGGER_STATUS currentStatus;
+
+ /**
+ * ICULogger constructor that calls the parent constructor with the desired parameters.
+ */
+ private ICULogger(String name, String resourceBundleName) {
+ super(name, resourceBundleName);
+ }
+
+ /**
+ * Set the status to either on or off. Set the level of the logger to INFO.
+ */
+ private void setStatus(LOGGER_STATUS newStatus) {
+ if (currentStatus != newStatus) {
+ /* Default to level INFO */
+ if (currentStatus == LOGGER_STATUS.OFF && newStatus == LOGGER_STATUS.ON) {
+ this.setLevel(Level.INFO);
+ }
+
+ currentStatus = newStatus;
+
+ if (currentStatus == LOGGER_STATUS.OFF){
+ this.setLevel(Level.OFF);
+ }
+ }
+ }
+
+ /**
+ * Check the system property SYSTEM_PROP_LOGGER to see if it is set.
+ * return true if it is otherwise return false.
+ */
+ private static LOGGER_STATUS checkGlobalLoggingFlag() {
+ try {
+ String prop = System.getProperty(SYSTEM_PROP_LOGGER);
+
+ if (prop != null) {
+ if (prop.equals(GLOBAL_FLAG_TURN_ON_LOGGING)) {
+ return LOGGER_STATUS.ON;
+ }
+ return LOGGER_STATUS.OFF;
+ }
+ } catch (SecurityException e) {
+ // Ignore the security exception and fall-through
+ }
+
+ return LOGGER_STATUS.NULL;
+ }
+
+ /**
+ * Instantiates a new ICULogger object with logging turned off by default.
+ *
+ * @param name to be use by the logger (usually is the class name)
+ * @return a new ICULogger object
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static ICULogger getICULogger(String name) {
+ return getICULogger(name, null);
+ }
+
+ /**
+ * Instantiates a new ICULogger object with logging turned off by default
+ * unless the system property "icu4j.debug.logging" is set to "all"
+ *
+ * @param name to be use by the logger (usually is the class name)
+ * @param resourceBundleName name to localize messages (can be null)
+ * @return a new ICULogger object
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static ICULogger getICULogger(String name, String resourceBundleName) {
+ LOGGER_STATUS flag = checkGlobalLoggingFlag();
+ if (flag != LOGGER_STATUS.NULL) {
+ ICULogger logger = new ICULogger(name, resourceBundleName);
+
+ /* Add a default handler to logger*/
+ logger.addHandler(new ConsoleHandler());
+
+ /* Turn off logging by default unless SYSTEM_PROP_LOGGER property is set to "all" */
+ if (flag == LOGGER_STATUS.ON) {
+ logger.turnOnLogging();
+ } else {
+ logger.turnOffLogging();
+ }
+
+ return logger;
+ }
+ return null;
+ }
+
+ /**
+ * Determined if logging is turned on or off. The return value is true if logging is on.
+ *
+ * @return whether logging is turned on or off.
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean isLoggingOn() {
+ if (currentStatus == LOGGER_STATUS.ON) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Turn logging on.
+ *
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public void turnOnLogging() {
+ setStatus(LOGGER_STATUS.ON);
+ }
+
+ /**
+ * Turn logging off.
+ *
+ * @draft ICU 4.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ public void turnOffLogging() {
+ setStatus(LOGGER_STATUS.OFF);
+ }
+
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUNotifier.java b/main/classes/core/src/com/ibm/icu/impl/ICUNotifier.java
new file mode 100644
index 00000000000..90c9206c102
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUNotifier.java
@@ -0,0 +1,169 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.ArrayList;
+import java.util.EventListener;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Abstract implementation of a notification facility. Clients add
+ * EventListeners with addListener and remove them with removeListener.
+ * Notifiers call notifyChanged when they wish to notify listeners.
+ * This queues the listener list on the notification thread, which
+ * eventually dequeues the list and calls notifyListener on each
+ * listener in the list.
+ *
+ * Subclasses override acceptsListener and notifyListener
+ * to add type-safe notification. AcceptsListener should return
+ * true if the listener is of the appropriate type; ICUNotifier
+ * itself will ensure the listener is non-null and that the
+ * identical listener is not already registered with the Notifier.
+ * NotifyListener should cast the listener to the appropriate
+ * type and call the appropriate method on the listener.
+ */
+public abstract class ICUNotifier {
+ private final Object notifyLock = new Object();
+ private NotifyThread notifyThread;
+ private List listeners;
+
+ /**
+ * Add a listener to be notified when notifyChanged is called.
+ * The listener must not be null. AcceptsListener must return
+ * true for the listener. Attempts to concurrently
+ * register the identical listener more than once will be
+ * silently ignored.
+ */
+ public void addListener(EventListener l) {
+ if (l == null) {
+ throw new NullPointerException();
+ }
+
+ if (acceptsListener(l)) {
+ synchronized (notifyLock) {
+ if (listeners == null) {
+ listeners = new ArrayList();
+ } else {
+ // identity equality check
+ for (EventListener ll : listeners) {
+ if (ll == l) {
+ return;
+ }
+ }
+ }
+
+ listeners.add(l);
+ }
+ } else {
+ throw new IllegalStateException("Listener invalid for this notifier.");
+ }
+ }
+
+ /**
+ * Stop notifying this listener. The listener must
+ * not be null. Attemps to remove a listener that is
+ * not registered will be silently ignored.
+ */
+ public void removeListener(EventListener l) {
+ if (l == null) {
+ throw new NullPointerException();
+ }
+ synchronized (notifyLock) {
+ if (listeners != null) {
+ // identity equality check
+ Iterator iter = listeners.iterator();
+ while (iter.hasNext()) {
+ if (iter.next() == l) {
+ iter.remove();
+ if (listeners.size() == 0) {
+ listeners = null;
+ }
+ return;
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Queue a notification on the notification thread for the current
+ * listeners. When the thread unqueues the notification, notifyListener
+ * is called on each listener from the notification thread.
+ */
+ public void notifyChanged() {
+ if (listeners != null) {
+ synchronized (notifyLock) {
+ if (listeners != null) {
+ if (notifyThread == null) {
+ notifyThread = new NotifyThread(this);
+ notifyThread.setDaemon(true);
+ notifyThread.start();
+ }
+ notifyThread.queue(listeners.toArray(new EventListener[listeners.size()]));
+ }
+ }
+ }
+ }
+
+ /**
+ * The notification thread.
+ */
+ private static class NotifyThread extends Thread {
+ private final ICUNotifier notifier;
+ private final List queue = new ArrayList();
+
+ NotifyThread(ICUNotifier notifier) {
+ this.notifier = notifier;
+ }
+
+ /**
+ * Queue the notification on the thread.
+ */
+ public void queue(EventListener[] list) {
+ synchronized (this) {
+ queue.add(list);
+ notify();
+ }
+ }
+
+ /**
+ * Wait for a notification to be queued, then notify all
+ * listeners listed in the notification.
+ */
+ public void run() {
+ EventListener[] list;
+ while (true) {
+ try {
+ synchronized (this) {
+ while (queue.isEmpty()) {
+ wait();
+ }
+ list = queue.remove(0);
+ }
+
+ for (int i = 0; i < list.length; ++i) {
+ notifier.notifyListener(list[i]);
+ }
+ }
+ catch (InterruptedException e) {
+ }
+ }
+ }
+ }
+
+ /**
+ * Subclasses implement this to return true if the listener is
+ * of the appropriate type.
+ */
+ protected abstract boolean acceptsListener(EventListener l);
+
+ /**
+ * Subclasses implement this to notify the listener.
+ */
+ protected abstract void notifyListener(EventListener l);
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICURWLock.java b/main/classes/core/src/com/ibm/icu/impl/ICURWLock.java
new file mode 100644
index 00000000000..ec150708983
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICURWLock.java
@@ -0,0 +1,297 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2006, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+// See Allan Holub's 1999 column in JavaWorld, and Doug Lea's code for RWLocks with writer preference.
+
+
+/**
+ * A simple Reader/Writer lock. This assumes that there will
+ * be little writing contention. It also doesn't allow
+ * active readers to acquire and release a write lock, or
+ * deal with priority inversion issues.
+ *
+ * Access to the lock should be enclosed in a try/finally block
+ * in order to ensure that the lock is always released in case of
+ * exceptions:
+ * try {
+ * lock.acquireRead();
+ * // use service protected by the lock
+ * }
+ * finally {
+ * lock.releaseRead();
+ * }
+ *
+ *
+ * The lock provides utility methods getStats and clearStats
+ * to return statistics on the use of the lock.
+ */
+public class ICURWLock {
+ private Object writeLock = new Object();
+ private Object readLock = new Object();
+ private int wwc; // waiting writers
+ private int rc; // active readers, -1 if there's an active writer
+ private int wrc; // waiting readers
+
+ private Stats stats = new Stats(); // maybe don't init to start...
+
+ /**
+ * Internal class used to gather statistics on the RWLock.
+ */
+ public final static class Stats {
+ /**
+ * Number of times read access granted (read count).
+ */
+ public int _rc;
+
+ /**
+ * Number of times concurrent read access granted (multiple read count).
+ */
+ public int _mrc;
+
+ /**
+ * Number of times blocked for read (waiting reader count).
+ */
+ public int _wrc; // wait for read
+
+ /**
+ * Number of times write access granted (writer count).
+ */
+ public int _wc;
+
+ /**
+ * Number of times blocked for write (waiting writer count).
+ */
+ public int _wwc;
+
+ private Stats() {
+ }
+
+ private Stats(int rc, int mrc, int wrc, int wc, int wwc) {
+ this._rc = rc;
+ this._mrc = mrc;
+ this._wrc = wrc;
+ this._wc = wc;
+ this._wwc = wwc;
+ }
+
+ private Stats(Stats rhs) {
+ this(rhs._rc, rhs._mrc, rhs._wrc, rhs._wc, rhs._wwc);
+ }
+
+ /**
+ * Return a string listing all the stats.
+ */
+ public String toString() {
+ return " rc: " + _rc +
+ " mrc: " + _mrc +
+ " wrc: " + _wrc +
+ " wc: " + _wc +
+ " wwc: " + _wwc;
+ }
+ }
+
+ /**
+ * Reset the stats. Returns existing stats, if any.
+ */
+ public synchronized Stats resetStats() {
+ Stats result = stats;
+ stats = new Stats();
+ return result;
+ }
+
+ /**
+ * Clear the stats (stop collecting stats). Returns existing stats, if any.
+ */
+ public synchronized Stats clearStats() {
+ Stats result = stats;
+ stats = null;
+ return result;
+ }
+
+ /**
+ * Return a snapshot of the current stats. This does not reset the stats.
+ */
+ public synchronized Stats getStats() {
+ return stats == null ? null : new Stats(stats);
+ }
+
+ // utilities
+
+ private synchronized boolean gotRead() {
+ ++rc;
+ if (stats != null) {
+ ++stats._rc;
+ if (rc > 1) ++stats._mrc;
+ }
+ return true;
+ }
+
+ private synchronized boolean getRead() {
+ if (rc >= 0 && wwc == 0) {
+ return gotRead();
+ }
+ ++wrc;
+ return false;
+ }
+
+ private synchronized boolean retryRead() {
+ if (stats != null) ++stats._wrc;
+ if (rc >= 0 && wwc == 0) {
+ --wrc;
+ return gotRead();
+ }
+ return false;
+ }
+
+ private synchronized boolean finishRead() {
+ if (rc > 0) {
+ return (0 == --rc && wwc > 0);
+ }
+ throw new IllegalStateException("no current reader to release");
+ }
+
+ private synchronized boolean gotWrite() {
+ rc = -1;
+ if (stats != null) {
+ ++stats._wc;
+ }
+ return true;
+ }
+
+ private synchronized boolean getWrite() {
+ if (rc == 0) {
+ return gotWrite();
+ }
+ ++wwc;
+ return false;
+ }
+
+ private synchronized boolean retryWrite() {
+ if (stats != null) ++stats._wwc;
+ if (rc == 0) {
+ --wwc;
+ return gotWrite();
+ }
+ return false;
+ }
+
+ private static final int NOTIFY_NONE = 0;
+ private static final int NOTIFY_WRITERS = 1;
+ private static final int NOTIFY_READERS = 2;
+
+ private synchronized int finishWrite() {
+ if (rc < 0) {
+ rc = 0;
+ if (wwc > 0) {
+ return NOTIFY_WRITERS;
+ } else if (wrc > 0) {
+ return NOTIFY_READERS;
+ } else {
+ return NOTIFY_NONE;
+ }
+ }
+ throw new IllegalStateException("no current writer to release");
+ }
+
+ /**
+ * Acquire a read lock, blocking until a read lock is
+ * available. Multiple readers can concurrently hold the read
+ * lock.
+ *
+ * If there's a writer, or a waiting writer, increment the
+ * waiting reader count and block on this. Otherwise
+ * increment the active reader count and return. Caller must call
+ * releaseRead when done (for example, in a finally block).
+ */
+ public void acquireRead() {
+ if (!getRead()) {
+ for (;;) {
+ try {
+ synchronized (readLock) {
+ readLock.wait();
+ }
+ if (retryRead()) {
+ return;
+ }
+ }
+ catch (InterruptedException e) {
+ }
+ }
+ }
+ }
+
+ /**
+ * Release a read lock and return. An error will be thrown
+ * if a read lock is not currently held.
+ *
+ * If this is the last active reader, notify the oldest
+ * waiting writer. Call when finished with work
+ * controlled by acquireRead.
+ */
+ public void releaseRead() {
+ if (finishRead()) {
+ synchronized (writeLock) {
+ writeLock.notify();
+ }
+ }
+ }
+
+ /**
+ * Acquire the write lock, blocking until the write lock is
+ * available. Only one writer can acquire the write lock, and
+ * when held, no readers can acquire the read lock.
+ *
+ * If there are no readers and no waiting writers, mark as
+ * having an active writer and return. Otherwise, add a lock to the
+ * end of the waiting writer list, and block on it. Caller
+ * must call releaseWrite when done (for example, in a finally
+ * block).
+ */
+ public void acquireWrite() {
+ if (!getWrite()) {
+ for (;;) {
+ try {
+ synchronized (writeLock) {
+ writeLock.wait();
+ }
+ if (retryWrite()) {
+ return;
+ }
+ }
+ catch (InterruptedException e) {
+ }
+ }
+ }
+ }
+
+ /**
+ *
Release the write lock and return. An error will be thrown
+ * if the write lock is not currently held.
+ *
+ * If there are waiting readers, make them all active and
+ * notify all of them. Otherwise, notify the oldest waiting
+ * writer, if any. Call when finished with work controlled by
+ * acquireWrite.
+ */
+ public void releaseWrite() {
+ switch (finishWrite()) {
+ case NOTIFY_WRITERS:
+ synchronized (writeLock) {
+ writeLock.notify();
+ }
+ break;
+ case NOTIFY_READERS:
+ synchronized (readLock) {
+ readLock.notifyAll();
+ }
+ break;
+ case NOTIFY_NONE:
+ break;
+ }
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java
new file mode 100644
index 00000000000..5cc12fecb0e
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java
@@ -0,0 +1,1473 @@
+/*
+ * *****************************************************************************
+ * Copyright (C) 2005-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ * *****************************************************************************
+ */
+
+package com.ibm.icu.impl;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.lang.ref.SoftReference;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+import java.util.Set;
+import java.util.StringTokenizer;
+import java.util.concurrent.ConcurrentHashMap;
+
+import com.ibm.icu.impl.URLHandler.URLVisitor;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+import com.ibm.icu.util.UResourceBundleIterator;
+import com.ibm.icu.util.VersionInfo;
+
+public class ICUResourceBundle extends UResourceBundle {
+ /**
+ * The data path to be used with getBundleInstance API
+ */
+ protected static final String ICU_DATA_PATH = "com/ibm/icu/impl/";
+ /**
+ * The data path to be used with getBundleInstance API
+ */
+ public static final String ICU_BUNDLE = "data/icudt" + VersionInfo.ICU_DATA_VERSION_PATH;
+
+ /**
+ * The base name of ICU data to be used with getBundleInstance API
+ */
+ public static final String ICU_BASE_NAME = ICU_DATA_PATH + ICU_BUNDLE;
+
+ /**
+ * The base name of collation data to be used with getBundleInstance API
+ */
+ public static final String ICU_COLLATION_BASE_NAME = ICU_BASE_NAME + "/coll";
+
+ /**
+ * The base name of rbbi data to be used with getData API
+ */
+ public static final String ICU_BRKITR_NAME = "/brkitr";
+
+ /**
+ * The base name of rbbi data to be used with getBundleInstance API
+ */
+ public static final String ICU_BRKITR_BASE_NAME = ICU_BASE_NAME + ICU_BRKITR_NAME;
+
+ /**
+ * The base name of rbnf data to be used with getBundleInstance API
+ */
+ public static final String ICU_RBNF_BASE_NAME = ICU_BASE_NAME + "/rbnf";
+
+ /**
+ * The base name of transliterator data to be used with getBundleInstance API
+ */
+ public static final String ICU_TRANSLIT_BASE_NAME = ICU_BASE_NAME + "/translit";
+
+ public static final String ICU_LANG_BASE_NAME = ICU_BASE_NAME + "/lang";
+ public static final String ICU_CURR_BASE_NAME = ICU_BASE_NAME + "/curr";
+ public static final String ICU_REGION_BASE_NAME = ICU_BASE_NAME + "/region";
+ public static final String ICU_ZONE_BASE_NAME = ICU_BASE_NAME + "/zone";
+
+ /**
+ * The actual path of the resource
+ */
+ protected String resPath;
+
+ /**
+ * The class loader constant to be used with getBundleInstance API
+ */
+ public static final ClassLoader ICU_DATA_CLASS_LOADER;
+ static {
+ ClassLoader loader = ICUData.class.getClassLoader();
+ if (loader == null) {
+ loader = Utility.getFallbackClassLoader();
+ }
+ ICU_DATA_CLASS_LOADER = loader;
+ }
+
+ /**
+ * The name of the resource containing the installed locales
+ */
+ protected static final String INSTALLED_LOCALES = "InstalledLocales";
+
+ public static final int FROM_FALLBACK = 1, FROM_ROOT = 2, FROM_DEFAULT = 3, FROM_LOCALE = 4;
+
+ private int loadingStatus = -1;
+
+ public void setLoadingStatus(int newStatus) {
+ loadingStatus = newStatus;
+ }
+ /**
+ * Returns the loading status of a particular resource.
+ *
+ * @return FROM_FALLBACK if the resource is fetched from fallback bundle
+ * FROM_ROOT if the resource is fetched from root bundle.
+ * FROM_DEFAULT if the resource is fetched from the default locale.
+ */
+ public int getLoadingStatus() {
+ return loadingStatus;
+ }
+
+ public void setLoadingStatus(String requestedLocale){
+ String locale = getLocaleID();
+ if(locale.equals("root")) {
+ setLoadingStatus(FROM_ROOT);
+ } else if(locale.equals(requestedLocale)) {
+ setLoadingStatus(FROM_LOCALE);
+ } else {
+ setLoadingStatus(FROM_FALLBACK);
+ }
+ }
+
+ /**
+ * Returns the respath of this bundle
+ * @return the respath of the bundle
+ */
+ public String getResPath(){
+ return resPath;
+ }
+
+ /**
+ * Returns a functionally equivalent locale, considering keywords as well, for the specified keyword.
+ * @param baseName resource specifier
+ * @param resName top level resource to consider (such as "collations")
+ * @param keyword a particular keyword to consider (such as "collation" )
+ * @param locID The requested locale
+ * @param isAvailable If non-null, 1-element array of fillin parameter that indicates whether the
+ * requested locale was available. The locale is defined as 'available' if it physically
+ * exists within the specified tree and included in 'InstalledLocales'.
+ * @param omitDefault if true, omit keyword and value if default.
+ * 'de_DE\@collation=standard' -> 'de_DE'
+ * @return the locale
+ * @internal ICU 3.0
+ */
+ public static final ULocale getFunctionalEquivalent(String baseName, ClassLoader loader,
+ String resName, String keyword, ULocale locID,
+ boolean isAvailable[], boolean omitDefault) {
+ String kwVal = locID.getKeywordValue(keyword);
+ String baseLoc = locID.getBaseName();
+ String defStr = null;
+ ULocale parent = new ULocale(baseLoc);
+ ULocale defLoc = null; // locale where default (found) resource is
+ boolean lookForDefault = false; // true if kwVal needs to be set
+ ULocale fullBase = null; // base locale of found (target) resource
+ int defDepth = 0; // depth of 'default' marker
+ int resDepth = 0; // depth of found resource;
+
+ if ((kwVal == null) || (kwVal.length() == 0)
+ || kwVal.equals(DEFAULT_TAG)) {
+ kwVal = ""; // default tag is treated as no keyword
+ lookForDefault = true;
+ }
+
+ // Check top level locale first
+ ICUResourceBundle r = null;
+
+ r = (ICUResourceBundle) UResourceBundle.getBundleInstance(baseName, parent);
+ if (isAvailable != null) {
+ isAvailable[0] = false;
+ ULocale[] availableULocales = getAvailEntry(baseName, loader).getULocaleList();
+ for (int i = 0; i < availableULocales.length; i++) {
+ if (parent.equals(availableULocales[i])) {
+ isAvailable[0] = true;
+ break;
+ }
+ }
+ }
+ // determine in which locale (if any) the currently relevant 'default' is
+ do {
+ try {
+ ICUResourceBundle irb = (ICUResourceBundle) r.get(resName);
+ defStr = irb.getString(DEFAULT_TAG);
+ if (lookForDefault == true) {
+ kwVal = defStr;
+ lookForDefault = false;
+ }
+ defLoc = r.getULocale();
+ } catch (MissingResourceException t) {
+ // Ignore error and continue search.
+ }
+ if (defLoc == null) {
+ r = (ICUResourceBundle) r.getParent();
+ defDepth++;
+ }
+ } while ((r != null) && (defLoc == null));
+
+ // Now, search for the named resource
+ parent = new ULocale(baseLoc);
+ r = (ICUResourceBundle) UResourceBundle.getBundleInstance(baseName, parent);
+ // determine in which locale (if any) the named resource is located
+ do {
+ try {
+ ICUResourceBundle irb = (ICUResourceBundle)r.get(resName);
+ /* UResourceBundle urb = */irb.get(kwVal);
+ fullBase = irb.getULocale();
+ // If the get() completed, we have the full base locale
+ // If we fell back to an ancestor of the old 'default',
+ // we need to re calculate the "default" keyword.
+ if ((fullBase != null) && ((resDepth) > defDepth)) {
+ defStr = irb.getString(DEFAULT_TAG);
+ defLoc = r.getULocale();
+ defDepth = resDepth;
+ }
+ } catch (MissingResourceException t) {
+ // Ignore error,
+ }
+ if (fullBase == null) {
+ r = (ICUResourceBundle) r.getParent();
+ resDepth++;
+ }
+ } while ((r != null) && (fullBase == null));
+
+ if (fullBase == null && // Could not find resource 'kwVal'
+ (defStr != null) && // default was defined
+ !defStr.equals(kwVal)) { // kwVal is not default
+ // couldn't find requested resource. Fall back to default.
+ kwVal = defStr; // Fall back to default.
+ parent = new ULocale(baseLoc);
+ r = (ICUResourceBundle) UResourceBundle.getBundleInstance(baseName, parent);
+ resDepth = 0;
+ // determine in which locale (if any) the named resource is located
+ do {
+ try {
+ ICUResourceBundle irb = (ICUResourceBundle)r.get(resName);
+ UResourceBundle urb = irb.get(kwVal);
+
+ // if we didn't fail before this..
+ fullBase = r.getULocale();
+
+ // If the fetched item (urb) is in a different locale than our outer locale (r/fullBase)
+ // then we are in a 'fallback' situation. treat as a missing resource situation.
+ if(!fullBase.toString().equals(urb.getLocale().toString())) {
+ fullBase = null; // fallback condition. Loop and try again.
+ }
+
+ // If we fell back to an ancestor of the old 'default',
+ // we need to re calculate the "default" keyword.
+ if ((fullBase != null) && ((resDepth) > defDepth)) {
+ defStr = irb.getString(DEFAULT_TAG);
+ defLoc = r.getULocale();
+ defDepth = resDepth;
+ }
+ } catch (MissingResourceException t) {
+ // Ignore error, continue search.
+ }
+ if (fullBase == null) {
+ r = (ICUResourceBundle) r.getParent();
+ resDepth++;
+ }
+ } while ((r != null) && (fullBase == null));
+ }
+
+ if (fullBase == null) {
+ throw new MissingResourceException(
+ "Could not find locale containing requested or default keyword.",
+ baseName, keyword + "=" + kwVal);
+ }
+
+ if (omitDefault
+ && defStr.equals(kwVal) // if default was requested and
+ && resDepth <= defDepth) { // default was set in same locale or child
+ return fullBase; // Keyword value is default - no keyword needed in locale
+ } else {
+ return new ULocale(fullBase.toString() + "@" + keyword + "=" + kwVal);
+ }
+ }
+
+ /**
+ * Given a tree path and keyword, return a string enumeration of all possible values for that keyword.
+ * @param baseName resource specifier
+ * @param keyword a particular keyword to consider, must match a top level resource name
+ * within the tree. (i.e. "collations")
+ * @internal ICU 3.0
+ */
+ public static final String[] getKeywordValues(String baseName, String keyword) {
+ Set keywords = new HashSet();
+ ULocale locales[] = createULocaleList(baseName, ICU_DATA_CLASS_LOADER);
+ int i;
+
+ for (i = 0; i < locales.length; i++) {
+ try {
+ UResourceBundle b = UResourceBundle.getBundleInstance(baseName, locales[i]);
+ // downcast to ICUResourceBundle?
+ ICUResourceBundle irb = (ICUResourceBundle) (b.getObject(keyword));
+ Enumeration e = irb.getKeys();
+ while (e.hasMoreElements()) {
+ String s = e.nextElement();
+ if (!DEFAULT_TAG.equals(s)) {
+ // don't add 'default' items
+ keywords.add(s);
+ }
+ }
+ } catch (Throwable t) {
+ //System.err.println("Error in - " + new Integer(i).toString()
+ // + " - " + t.toString());
+ // ignore the err - just skip that resource
+ }
+ }
+ return keywords.toArray(new String[0]);
+ }
+
+ /**
+ * This method performs multilevel fallback for fetching items from the
+ * bundle e.g: If resource is in the form de__PHONEBOOK{ collations{
+ * default{ "phonebook"} } } If the value of "default" key needs to be
+ * accessed, then do:
+ * UResourceBundle bundle = UResourceBundle.getBundleInstance("de__PHONEBOOK");
+ * ICUResourceBundle result = null;
+ * if(bundle instanceof ICUResourceBundle){
+ * result = ((ICUResourceBundle) bundle).getWithFallback("collations/default");
+ * }
+ *
+ *
+ * @param path The path to the required resource key
+ * @return resource represented by the key
+ * @exception MissingResourceException If a resource was not found.
+ */
+ public ICUResourceBundle getWithFallback(String path) throws MissingResourceException {
+ ICUResourceBundle result = null;
+ ICUResourceBundle actualBundle = this;
+
+ // now recurse to pick up sub levels of the items
+ result = findResourceWithFallback(path, actualBundle, null);
+
+ if (result == null) {
+ throw new MissingResourceException(
+ "Can't find resource for bundle "
+ + this.getClass().getName() + ", key " + getType(),
+ path, getKey());
+ }
+ return result;
+ }
+
+ public ICUResourceBundle at(int index) {
+ return (ICUResourceBundle) handleGet(index, null, this);
+ }
+
+ public ICUResourceBundle at(String key) {
+ // don't ever presume the key is an int in disguise, like ResourceArray does.
+ if (this instanceof ICUResourceBundleImpl.ResourceTable) {
+ return (ICUResourceBundle) handleGet(key, null, this);
+ }
+ return null;
+ }
+
+ @Override
+ public ICUResourceBundle findTopLevel(int index) {
+ return (ICUResourceBundle) super.findTopLevel(index);
+ }
+
+ @Override
+ public ICUResourceBundle findTopLevel(String aKey) {
+ return (ICUResourceBundle) super.findTopLevel(aKey);
+ }
+
+ /**
+ * Like getWithFallback, but returns null if the resource is not found instead of
+ * throwing an exception.
+ * @param path the path to the resource
+ * @return the resource, or null
+ */
+ public ICUResourceBundle findWithFallback(String path) {
+ return findResourceWithFallback(path, this, null);
+ }
+
+ // will throw type mismatch exception if the resource is not a string
+ public String getStringWithFallback(String path) throws MissingResourceException {
+ return getWithFallback(path).getString();
+ }
+
+ /**
+ * Return a set of the locale names supported by a collection of resource
+ * bundles.
+ *
+ * @param bundlePrefix the prefix of the resource bundles to use.
+ */
+ public static Set getAvailableLocaleNameSet(String bundlePrefix, ClassLoader loader) {
+ return getAvailEntry(bundlePrefix, loader).getLocaleNameSet();
+ }
+
+ /**
+ * Return a set of all the locale names supported by a collection of
+ * resource bundles.
+ */
+ public static Set getFullLocaleNameSet() {
+ return getFullLocaleNameSet(ICU_BASE_NAME, ICU_DATA_CLASS_LOADER);
+ }
+
+ /**
+ * Return a set of all the locale names supported by a collection of
+ * resource bundles.
+ *
+ * @param bundlePrefix the prefix of the resource bundles to use.
+ */
+ public static Set getFullLocaleNameSet(String bundlePrefix, ClassLoader loader) {
+ return getAvailEntry(bundlePrefix, loader).getFullLocaleNameSet();
+ }
+
+ /**
+ * Return a set of the locale names supported by a collection of resource
+ * bundles.
+ */
+ public static Set getAvailableLocaleNameSet() {
+ return getAvailableLocaleNameSet(ICU_BASE_NAME, ICU_DATA_CLASS_LOADER);
+ }
+
+ /**
+ * Get the set of Locales installed in the specified bundles.
+ * @return the list of available locales
+ */
+ public static final ULocale[] getAvailableULocales(String baseName, ClassLoader loader) {
+ return getAvailEntry(baseName, loader).getULocaleList();
+ }
+
+ /**
+ * Get the set of ULocales installed the base bundle.
+ * @return the list of available locales
+ */
+ public static final ULocale[] getAvailableULocales() {
+ return getAvailableULocales(ICU_BASE_NAME, ICU_DATA_CLASS_LOADER);
+ }
+
+ /**
+ * Get the set of Locales installed in the specified bundles.
+ * @return the list of available locales
+ */
+ public static final Locale[] getAvailableLocales(String baseName, ClassLoader loader) {
+ return getAvailEntry(baseName, loader).getLocaleList();
+ }
+
+ /**
+ * Get the set of Locales installed the base bundle.
+ * @return the list of available locales
+ */
+ public static final Locale[] getAvailableLocales() {
+ return getAvailEntry(ICU_BASE_NAME, ICU_DATA_CLASS_LOADER).getLocaleList();
+ }
+
+ /**
+ * Convert a list of ULocales to a list of Locales. ULocales with a script code will not be converted
+ * since they cannot be represented as a Locale. This means that the two lists will not match
+ * one-to-one, and that the returned list might be shorter than the input list.
+ * @param ulocales a list of ULocales to convert to a list of Locales.
+ * @return the list of converted ULocales
+ */
+ public static final Locale[] getLocaleList(ULocale[] ulocales) {
+ ArrayList list = new ArrayList(ulocales.length);
+ HashSet uniqueSet = new HashSet();
+ for (int i = 0; i < ulocales.length; i++) {
+ Locale loc = ulocales[i].toLocale();
+ if (!uniqueSet.contains(loc)) {
+ list.add(loc);
+ uniqueSet.add(loc);
+ }
+ }
+ return list.toArray(new Locale[list.size()]);
+ }
+
+ /**
+ * Returns the locale of this resource bundle. This method can be used after
+ * a call to getBundle() to determine whether the resource bundle returned
+ * really corresponds to the requested locale or is a fallback.
+ *
+ * @return the locale of this resource bundle
+ */
+ public Locale getLocale() {
+ return getULocale().toLocale();
+ }
+
+
+ // ========== privates ==========
+ private static final String ICU_RESOURCE_INDEX = "res_index";
+
+ private static final String DEFAULT_TAG = "default";
+
+ // The name of text file generated by ICU4J build script including all locale names
+ // (canonical, alias and root)
+ private static final String FULL_LOCALE_NAMES_LIST = "fullLocaleNames.lst";
+
+ // Flag for enabling/disabling debugging code
+ private static final boolean DEBUG = ICUDebug.enabled("localedata");
+
+ // Cache for getAvailableLocales
+ private static SoftReference> GET_AVAILABLE_CACHE;
+ private static final ULocale[] createULocaleList(String baseName,
+ ClassLoader root) {
+ // the canned list is a subset of all the available .res files, the idea
+ // is we don't export them
+ // all. gotta be a better way to do this, since to add a locale you have
+ // to update this list,
+ // and it's embedded in our binary resources.
+ ICUResourceBundle bundle = (ICUResourceBundle) UResourceBundle.instantiateBundle(baseName, ICU_RESOURCE_INDEX, root, true);
+
+ bundle = (ICUResourceBundle)bundle.get(INSTALLED_LOCALES);
+ int length = bundle.getSize();
+ int i = 0;
+ ULocale[] locales = new ULocale[length];
+ UResourceBundleIterator iter = bundle.getIterator();
+ iter.reset();
+ while (iter.hasNext()) {
+ String locstr = iter.next().getKey();
+ if (locstr.equals("root")) {
+ locales[i++] = ULocale.ROOT;
+ } else {
+ locales[i++] = new ULocale(locstr);
+ }
+ }
+ bundle = null;
+ return locales;
+ }
+
+ private static final Locale[] createLocaleList(String baseName, ClassLoader loader) {
+ ULocale[] ulocales = getAvailEntry(baseName, loader).getULocaleList();
+ return getLocaleList(ulocales);
+ }
+
+ private static final String[] createLocaleNameArray(String baseName,
+ ClassLoader root) {
+ ICUResourceBundle bundle = (ICUResourceBundle) UResourceBundle.instantiateBundle( baseName, ICU_RESOURCE_INDEX, root, true);
+ bundle = (ICUResourceBundle)bundle.get(INSTALLED_LOCALES);
+ int length = bundle.getSize();
+ int i = 0;
+ String[] locales = new String[length];
+ UResourceBundleIterator iter = bundle.getIterator();
+ iter.reset();
+ while (iter.hasNext()) {
+ String locstr = iter.next(). getKey();
+ if (locstr.equals("root")) {
+ locales[i++] = ULocale.ROOT.toString();
+ } else {
+ locales[i++] = locstr;
+ }
+ }
+ bundle = null;
+ return locales;
+ }
+
+ private static final List createFullLocaleNameArray(
+ final String baseName, final ClassLoader root) {
+
+ List list = java.security.AccessController
+ .doPrivileged(new java.security.PrivilegedAction>() {
+ public List run() {
+ // WebSphere class loader will return null for a raw
+ // directory name without trailing slash
+ String bn = baseName.endsWith("/")
+ ? baseName
+ : baseName + "/";
+
+ List resList = null;
+
+ String skipScan = ICUConfig.get("com.ibm.icu.impl.ICUResourceBundle.skipRuntimeLocaleResourceScan", "false");
+ if (!skipScan.equalsIgnoreCase("true")) {
+ // scan available locale resources under the base url first
+ try {
+ Enumeration urls = root.getResources(bn);
+ while (urls.hasMoreElements()) {
+ URL url = urls.nextElement();
+ URLHandler handler = URLHandler.get(url);
+ if (handler != null) {
+ final List lst = new ArrayList();
+ URLVisitor v = new URLVisitor() {
+ public void visit(String s) {
+ //TODO: This is ugly hack. We have to figure out how
+ // we can distinguish locale data from others
+ if (s.endsWith(".res")) {
+ String locstr = s.substring(0, s.length() - 4);
+ if (locstr.contains("_") && !locstr.equals("res_index")) {
+ // locale data with country/script contain "_",
+ // except for res_index.res
+ lst.add(locstr);
+ } else if (locstr.length() == 2 || locstr.length() == 3) {
+ // all 2-letter or 3-letter entries are all locale
+ // data at least for now
+ lst.add(locstr);
+ } else if (locstr.equalsIgnoreCase("root")) {
+ // root locale is a special case
+ lst.add(ULocale.ROOT.toString());
+ }
+ }
+ }
+ };
+ handler.guide(v, false);
+
+ if (resList == null) {
+ resList = new ArrayList(lst);
+ } else {
+ resList.addAll(lst);
+ }
+ } else {
+ if (DEBUG) System.out.println("handler for " + url + " is null");
+ }
+ }
+ } catch (IOException e) {
+ if (DEBUG) System.out.println("ouch: " + e.getMessage());
+ resList = null;
+ }
+ }
+
+ if (resList == null) {
+ // look for prebuilt full locale names list next
+ try {
+ InputStream s = root.getResourceAsStream(bn + FULL_LOCALE_NAMES_LIST);
+ if (s != null) {
+ resList = new ArrayList();
+ BufferedReader br = new BufferedReader(new InputStreamReader(s, "ASCII"));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.length() != 0 && !line.startsWith("#")) {
+ if (line.equalsIgnoreCase("root")) {
+ resList.add(ULocale.ROOT.toString());
+ } else {
+ resList.add(line);
+ }
+ }
+ }
+ }
+ } catch (IOException e) {
+ // swallow it
+ }
+ }
+
+ return resList;
+ }
+ });
+
+ return list;
+ }
+
+ private static Set createFullLocaleNameSet(String baseName, ClassLoader loader) {
+ List list = createFullLocaleNameArray(baseName, loader);
+ if(list == null){
+ if (DEBUG) System.out.println("createFullLocaleNameArray returned null");
+ // Use locale name set as the last resort fallback
+ Set locNameSet = createLocaleNameSet(baseName, loader);
+ String rootLocaleID = ULocale.ROOT.toString();
+ if (!locNameSet.contains(rootLocaleID)) {
+ // We need to add the root locale in the set
+ Set tmp = new HashSet(locNameSet);
+ tmp.add(rootLocaleID);
+ locNameSet = Collections.unmodifiableSet(tmp);
+ }
+ return locNameSet;
+ }
+ Set fullLocNameSet = new HashSet();
+ fullLocNameSet.addAll(list);
+ return Collections.unmodifiableSet(fullLocNameSet);
+ }
+
+ private static Set createLocaleNameSet(String baseName, ClassLoader loader) {
+ try {
+ String[] locales = createLocaleNameArray(baseName, loader);
+
+ HashSet set = new HashSet();
+ set.addAll(Arrays.asList(locales));
+ return Collections.unmodifiableSet(set);
+ } catch (MissingResourceException e) {
+ if (DEBUG) {
+ System.out.println("couldn't find index for bundleName: " + baseName);
+ Thread.dumpStack();
+ }
+ }
+ return Collections.emptySet();
+ }
+
+ /**
+ * Holds the prefix, and lazily creates the Locale[] list or the locale name
+ * Set as needed.
+ */
+ private static final class AvailEntry {
+ private String prefix;
+ private ClassLoader loader;
+ private ULocale[] ulocales;
+ private Locale[] locales;
+ private Set nameSet;
+ private Set fullNameSet;
+
+ AvailEntry(String prefix, ClassLoader loader) {
+ this.prefix = prefix;
+ this.loader = loader;
+ }
+
+ ULocale[] getULocaleList() {
+ if (ulocales == null) {
+ ulocales = createULocaleList(prefix, loader);
+ }
+ return ulocales;
+ }
+ Locale[] getLocaleList() {
+ if (locales == null) {
+ locales = createLocaleList(prefix, loader);
+ }
+ return locales;
+ }
+ Set getLocaleNameSet() {
+ if (nameSet == null) {
+ nameSet = createLocaleNameSet(prefix, loader);
+ }
+ return nameSet;
+ }
+ Set getFullLocaleNameSet() {
+ if (fullNameSet == null) {
+ fullNameSet = createFullLocaleNameSet(prefix, loader);
+ }
+ return fullNameSet;
+ }
+ }
+
+ /**
+ * Stores the locale information in a cache accessed by key (bundle prefix).
+ * The cached objects are AvailEntries. The cache is held by a SoftReference
+ * so it can be GC'd.
+ */
+ private static AvailEntry getAvailEntry(String key, ClassLoader loader) {
+ AvailEntry ae = null;
+ Map lcache = null;
+ if (GET_AVAILABLE_CACHE != null) {
+ lcache = GET_AVAILABLE_CACHE.get();
+ if (lcache != null) {
+ ae = lcache.get(key);
+ }
+ }
+
+ if (ae == null) {
+ ae = new AvailEntry(key, loader);
+ if (lcache == null) {
+ lcache = new HashMap();
+ lcache.put(key, ae);
+ GET_AVAILABLE_CACHE = new SoftReference>(lcache);
+ } else {
+ lcache.put(key, ae);
+ }
+ }
+
+ return ae;
+ }
+
+ protected static final ICUResourceBundle findResourceWithFallback(String path,
+ UResourceBundle actualBundle, UResourceBundle requested) {
+ ICUResourceBundle sub = null;
+ if (requested == null) {
+ requested = actualBundle;
+ }
+ while (actualBundle != null) {
+ ICUResourceBundle current = (ICUResourceBundle) actualBundle;
+ if (path.indexOf('/') == -1) { // skip the tokenizer
+ sub = (ICUResourceBundle) current.handleGet(path, null, requested);
+ if (sub != null) {
+ current = sub;
+ break;
+ }
+ } else {
+ StringTokenizer st = new StringTokenizer(path, "/");
+ while (st.hasMoreTokens()) {
+ String subKey = st.nextToken();
+ sub = (ICUResourceBundle) current.handleGet(subKey, null, requested);
+ if (sub == null) {
+ break;
+ }
+ current = sub;
+ }
+ if (sub != null) {
+ //we found it
+ break;
+ }
+ }
+ if (((ICUResourceBundle)actualBundle).resPath.length() != 0) {
+ path = ((ICUResourceBundle)actualBundle).resPath + "/" + path;
+ }
+ // if not try the parent bundle
+ actualBundle = ((ICUResourceBundle) actualBundle).getParent();
+
+ }
+ if(sub != null){
+ sub.setLoadingStatus(((ICUResourceBundle)requested).getLocaleID());
+ }
+ return sub;
+ }
+ public boolean equals(Object other) {
+ if (this == other) {
+ return true;
+ }
+ if (other instanceof ICUResourceBundle) {
+ ICUResourceBundle o = (ICUResourceBundle) other;
+ if (getBaseName().equals(o.getBaseName())
+ && getLocaleID().equals(o.getLocaleID())) {
+ return true;
+ }
+ }
+ return false;
+ }
+ // This method is for super class's instantiateBundle method
+ public static UResourceBundle getBundleInstance(String baseName, String localeID,
+ ClassLoader root, boolean disableFallback){
+ UResourceBundle b = instantiateBundle(baseName, localeID, root, disableFallback);
+ if(b==null){
+ throw new MissingResourceException("Could not find the bundle "+ baseName+"/"+ localeID+".res","","");
+ }
+ return b;
+ }
+ // recursively build bundle .. over-ride super class method.
+ protected synchronized static UResourceBundle instantiateBundle(String baseName, String localeID,
+ ClassLoader root, boolean disableFallback){
+ ULocale defaultLocale = ULocale.getDefault();
+ String localeName = localeID;
+ if(localeName.indexOf('@')>=0){
+ localeName = ULocale.getBaseName(localeID);
+ }
+ String fullName = getFullName(baseName, localeName);
+ ICUResourceBundle b = (ICUResourceBundle)loadFromCache(root, fullName, defaultLocale);
+
+ // here we assume that java type resource bundle organization
+ // is required then the base name contains '.' else
+ // the resource organization is of ICU type
+ // so clients can instantiate resources of the type
+ // com.mycompany.data.MyLocaleElements_en.res and
+ // com.mycompany.data.MyLocaleElements.res
+ //
+ final String rootLocale = (baseName.indexOf('.')==-1) ? "root" : "";
+ final String defaultID = defaultLocale.toString();
+
+ if(localeName.equals("")){
+ localeName = rootLocale;
+ }
+ if(DEBUG) System.out.println("Creating "+fullName+ " currently b is "+b);
+ if (b == null) {
+ b = ICUResourceBundle.createBundle(baseName, localeName, root);
+
+ if(DEBUG)System.out.println("The bundle created is: "+b+" and disableFallback="+disableFallback+" and bundle.getNoFallback="+(b!=null && b.getNoFallback()));
+ if(disableFallback || (b!=null && b.getNoFallback())){
+ // no fallback because the caller said so or because the bundle says so
+ return addToCache(root, fullName, defaultLocale, b);
+ }
+
+ // fallback to locale ID parent
+ if(b == null){
+ int i = localeName.lastIndexOf('_');
+ if (i != -1) {
+ String temp = localeName.substring(0, i);
+ b = (ICUResourceBundle)instantiateBundle(baseName, temp, root, disableFallback);
+ if(b!=null && b.getULocale().equals(temp)){
+ b.setLoadingStatus(ICUResourceBundle.FROM_FALLBACK);
+ }
+ }else{
+ if(defaultID.indexOf(localeName)==-1){
+ b = (ICUResourceBundle)instantiateBundle(baseName, defaultID, root, disableFallback);
+ if(b!=null){
+ b.setLoadingStatus(ICUResourceBundle.FROM_DEFAULT);
+ }
+ }else if(rootLocale.length()!=0){
+ b = ICUResourceBundle.createBundle(baseName, rootLocale, root);
+ if(b!=null){
+ b.setLoadingStatus(ICUResourceBundle.FROM_ROOT);
+ }
+ }
+ }
+ }else{
+ UResourceBundle parent = null;
+ localeName = b.getLocaleID();
+ int i = localeName.lastIndexOf('_');
+
+ b = (ICUResourceBundle)addToCache(root, fullName, defaultLocale, b);
+
+ if (b.getTableResource("%%Parent") != RES_BOGUS) {
+ String parentLocaleName = b.getString("%%Parent");
+ parent = instantiateBundle(baseName, parentLocaleName, root, disableFallback);
+ } else if (i != -1) {
+ parent = instantiateBundle(baseName, localeName.substring(0, i), root, disableFallback);
+ } else if (!localeName.equals(rootLocale)){
+ parent = instantiateBundle(baseName, rootLocale, root, true);
+ }
+
+ if (!b.equals(parent)){
+ b.setParent(parent);
+ }
+ }
+ }
+ return b;
+ }
+ UResourceBundle get(String aKey, HashMap table, UResourceBundle requested) {
+ ICUResourceBundle obj = (ICUResourceBundle)handleGet(aKey, table, requested);
+ if (obj == null) {
+ obj = (ICUResourceBundle)getParent();
+ if (obj != null) {
+ //call the get method to recursively fetch the resource
+ obj = (ICUResourceBundle)obj.get(aKey, table, requested);
+ }
+ if (obj == null) {
+ String fullName = getFullName(getBaseName(), getLocaleID());
+ throw new MissingResourceException(
+ "Can't find resource for bundle " + fullName + ", key "
+ + aKey, this.getClass().getName(), aKey);
+ }
+ }
+ obj.setLoadingStatus(((ICUResourceBundle)requested).getLocaleID());
+ return obj;
+ }
+
+ private static final String ICU_RESOURCE_SUFFIX = ".res";
+ /**
+ * Gets the full name of the resource with suffix.
+ */
+ public static String getFullName(String baseName, String localeName){
+ if(baseName==null || baseName.length()==0){
+ if(localeName.length()==0){
+ return localeName=ULocale.getDefault().toString();
+ }
+ return localeName+ICU_RESOURCE_SUFFIX;
+ }else{
+ if(baseName.indexOf('.')==-1){
+ if(baseName.charAt(baseName.length()-1)!= '/'){
+ return baseName+"/"+localeName+ICU_RESOURCE_SUFFIX;
+ }else{
+ return baseName+localeName+ICU_RESOURCE_SUFFIX;
+ }
+ }else{
+ baseName = baseName.replace('.','/');
+ if(localeName.length()==0){
+ return baseName+ICU_RESOURCE_SUFFIX;
+ }else{
+ return baseName+"_"+localeName+ICU_RESOURCE_SUFFIX;
+ }
+ }
+ }
+ }
+
+ protected String localeID;
+ protected String baseName;
+ protected ULocale ulocale;
+ protected ClassLoader loader;
+
+ /**
+ * Access to the bits and bytes of the resource bundle.
+ * Hides low-level details.
+ */
+ protected ICUResourceBundleReader reader;
+ /** Data member where the subclasses store the key. */
+ protected String key;
+ /** Data member where the subclasses store the offset within resource data. */
+ protected int resource;
+
+ /**
+ * A resource word value that means "no resource".
+ * Note: 0xffffffff == -1
+ * This has the same value as UResourceBundle.NONE, but they are semantically
+ * different and should be used appropriately according to context:
+ * NONE means "no type".
+ * (The type of RES_BOGUS is RES_RESERVED=15 which was defined in ICU4C ures.h.)
+ */
+ public static final int RES_BOGUS = 0xffffffff;
+
+ /**
+ * Resource type constant for aliases;
+ * internally stores a string which identifies the actual resource
+ * storing the data (can be in a different resource bundle).
+ * Resolved internally before delivering the actual resource through the API.
+ */
+ public static final int ALIAS = 3;
+
+ /** Resource type constant for tables with 32-bit count, key offsets and values. */
+ public static final int TABLE32 = 4;
+
+ /**
+ * Resource type constant for tables with 16-bit count, key offsets and values.
+ * All values are STRING_V2 strings.
+ */
+ public static final int TABLE16 = 5;
+
+ /** Resource type constant for 16-bit Unicode strings in formatVersion 2. */
+ public static final int STRING_V2 = 6;
+
+ /**
+ * Resource type constant for arrays with 16-bit count and values.
+ * All values are STRING_V2 strings.
+ */
+ public static final int ARRAY16 = 9;
+
+ private static final ConcurrentHashMap cache =
+ new ConcurrentHashMap();
+ private static final ICUResourceBundle NULL_BUNDLE =
+ new ICUResourceBundle(null, null, null, 0, null) {
+ public int hashCode() {
+ return 0;
+ }
+ public boolean equals(Object rhs) {
+ return this == rhs;
+ }
+ };
+
+ /**
+ *
+ * @param baseName The name for the bundle.
+ * @param localeID The locale identification.
+ * @param root The ClassLoader object root.
+ * @return the new bundle
+ */
+ public static ICUResourceBundle createBundle(String baseName, String localeID,
+ ClassLoader root) {
+
+ String resKey = Integer.toHexString(root.hashCode()) + baseName + localeID;
+ ICUResourceBundle b = cache.get(resKey);
+ if (b == null) {
+ String resolvedName = getFullName(baseName, localeID);
+ ICUResourceBundleReader reader = ICUResourceBundleReader.getReader(resolvedName, root);
+ // could not open the .res file so return null
+ if (reader == null) {
+ b = NULL_BUNDLE;
+ } else {
+ b = getBundle(reader, baseName, localeID, root);
+ }
+ cache.put(resKey, b);
+ }
+ return b == NULL_BUNDLE ? null : b;
+ }
+
+ protected String getLocaleID() {
+ return localeID;
+ }
+
+ protected String getBaseName() {
+ return baseName;
+ }
+
+ public ULocale getULocale() {
+ return ulocale;
+ }
+
+ public UResourceBundle getParent() {
+ return (UResourceBundle) parent;
+ }
+
+ protected void setParent(ResourceBundle parent) {
+ this.parent = parent;
+ }
+
+ public String getKey() {
+ return key;
+ }
+
+ private static final int[] gPublicTypes = new int[] {
+ STRING,
+ BINARY,
+ TABLE,
+ ALIAS,
+
+ TABLE, /* TABLE32 */
+ TABLE, /* TABLE16 */
+ STRING, /* STRING_V2 */
+ INT,
+
+ ARRAY,
+ ARRAY, /* ARRAY16 */
+ NONE,
+ NONE,
+
+ NONE,
+ NONE,
+ INT_VECTOR,
+ NONE
+ };
+
+ public int getType() {
+ return gPublicTypes[ICUResourceBundleReader.RES_GET_TYPE(resource)];
+ }
+
+ /**
+ * Get the noFallback flag specified in the loaded bundle.
+ * @return The noFallback flag.
+ */
+ private boolean getNoFallback() {
+ return reader.getNoFallback();
+ }
+
+ private static ICUResourceBundle getBundle(ICUResourceBundleReader reader,
+ String baseName, String localeID,
+ ClassLoader loader) {
+ ICUResourceBundleImpl bundle;
+ int rootRes = reader.getRootResource();
+ if(gPublicTypes[ICUResourceBundleReader.RES_GET_TYPE(rootRes)] == TABLE) {
+ bundle = new ICUResourceBundleImpl.ResourceTable(reader, null, "", rootRes, null);
+ } else {
+ throw new IllegalStateException("Invalid format error");
+ }
+ bundle.baseName = baseName;
+ bundle.localeID = localeID;
+ bundle.ulocale = new ULocale(localeID);
+ bundle.loader = loader;
+ if(bundle.reader.getUsesPoolBundle()) {
+ bundle.reader.setPoolBundleKeys(
+ ((ICUResourceBundleImpl)getBundleInstance(baseName, "pool", loader, true)).reader);
+ }
+ UResourceBundle alias = bundle.handleGetImpl("%%ALIAS", null, bundle, null, null); // handleGet will cache the bundle with no parent set
+ if(alias != null) {
+ return (ICUResourceBundle)UResourceBundle.getBundleInstance(baseName, alias.getString());
+ } else {
+ return bundle;
+ }
+ }
+ // constructor for inner classes
+ protected ICUResourceBundle(ICUResourceBundleReader reader, String key, String resPath, int resource,
+ ICUResourceBundle container) {
+ this.reader = reader;
+ this.key = key;
+ this.resPath = resPath;
+ this.resource = resource;
+ if(container != null) {
+ baseName = container.baseName;
+ localeID = container.localeID;
+ ulocale = container.ulocale;
+ loader = container.loader;
+ this.parent = container.parent;
+ }
+ }
+
+ private String getAliasValue(int res) {
+ String result = reader.getAlias(res);
+ return result != null ? result : "";
+ }
+ private static final char RES_PATH_SEP_CHAR = '/';
+ private static final String RES_PATH_SEP_STR = "/";
+ private static final String ICUDATA = "ICUDATA";
+ private static final char HYPHEN = '-';
+ private static final String LOCALE = "LOCALE";
+
+ protected ICUResourceBundle findResource(String key,
+ String resPath,
+ int _resource,
+ HashMap table,
+ UResourceBundle requested) {
+ ClassLoader loaderToUse = loader;
+ String locale = null, keyPath = null;
+ String bundleName;
+ String rpath = getAliasValue(_resource);
+ if (table == null) {
+ table = new HashMap();
+ }
+ if (table.get(rpath) != null) {
+ throw new IllegalArgumentException(
+ "Circular references in the resource bundles");
+ }
+ table.put(rpath, "");
+ if (rpath.indexOf(RES_PATH_SEP_CHAR) == 0) {
+ int i = rpath.indexOf(RES_PATH_SEP_CHAR, 1);
+ int j = rpath.indexOf(RES_PATH_SEP_CHAR, i + 1);
+ bundleName = rpath.substring(1, i);
+ if (j < 0) {
+ locale = rpath.substring(i + 1);
+ // if key path is not available,
+ // use the given key path
+ keyPath = resPath;
+ } else {
+ locale = rpath.substring(i + 1, j);
+ keyPath = rpath.substring(j + 1, rpath.length());
+ }
+ //there is a path included
+ if (bundleName.equals(ICUDATA)) {
+ bundleName = ICU_BASE_NAME;
+ loaderToUse = ICU_DATA_CLASS_LOADER;
+ }else if(bundleName.indexOf(ICUDATA)>-1){
+ int idx = bundleName.indexOf(HYPHEN);
+ if(idx>-1){
+ bundleName = ICU_BASE_NAME+RES_PATH_SEP_STR+bundleName.substring(idx+1,bundleName.length());
+ loaderToUse = ICU_DATA_CLASS_LOADER;
+ }
+ }
+ } else {
+ //no path start with locale
+ int i = rpath.indexOf(RES_PATH_SEP_CHAR);
+ if (i != -1) {
+ locale = rpath.substring(0, i);
+ keyPath = rpath.substring(i + 1);
+ } else {
+ locale = rpath;
+ // if key path is not available,
+ // use the given key path
+ keyPath = resPath;
+ }
+ bundleName = baseName;
+ }
+ ICUResourceBundle bundle = null;
+ ICUResourceBundle sub = null;
+ if(bundleName.equals(LOCALE)){
+ bundleName = baseName;
+ keyPath = rpath.substring(LOCALE.length() + 2/* prepending and appending / */, rpath.length());
+ locale = ((ICUResourceBundle)requested).getLocaleID();
+
+ // Get the top bundle of the requested bundle
+ bundle = (ICUResourceBundle)getBundleInstance(bundleName, locale, loaderToUse, false);
+ if (bundle != null) {
+ sub = ICUResourceBundle.findResourceWithFallback(keyPath, bundle, null);
+ // TODO
+ // The resPath of the resolved bundle should reflect the resource path
+ // requested by caller. However, overwriting resPath here will affect cached
+ // resource instance. The resPath is exposed by ICUResourceBundle#getResPath,
+ // but there are no call sites in ICU (and ICUResourceBundle is an implementation
+ // class). We may create a safe clone to overwrite the resPath field, but
+ // it has no benefit at least for now. -Yoshito
+ //if (sub != null) {
+ // sub.resPath = resPath;
+ //}
+ }
+ }else{
+ if (locale == null) {
+ // {dlf} must use requestor's class loader to get resources from same jar
+ bundle = (ICUResourceBundle) getBundleInstance(bundleName, "",
+ loaderToUse, false);
+ } else {
+ bundle = (ICUResourceBundle) getBundleInstance(bundleName, locale,
+ loaderToUse, false);
+ }
+
+ StringTokenizer st = new StringTokenizer(keyPath, "/");
+ ICUResourceBundle current = bundle;
+ while (st.hasMoreTokens()) {
+ String subKey = st.nextToken();
+ sub = (ICUResourceBundle)current.get(subKey, table, requested);
+ if (sub == null) {
+ break;
+ }
+ current = sub;
+ }
+ // TODO
+ // See the comments above.
+ //if (sub != null) {
+ // sub.resPath = resPath;
+ //}
+ }
+ if (sub == null) {
+ throw new MissingResourceException(localeID, baseName, key);
+ }
+ return sub;
+ }
+
+ // Resource bundle lookup cache, which may be used by subclasses
+ // which have nested resources
+ protected ICUCache lookup;
+ private static final int MAX_INITIAL_LOOKUP_SIZE = 64;
+
+ protected void createLookupCache() {
+ lookup = new SimpleCache(ICUCache.WEAK, Math.max(getSize()*2, MAX_INITIAL_LOOKUP_SIZE));
+ }
+
+ protected UResourceBundle handleGet(String resKey, HashMap table, UResourceBundle requested) {
+ UResourceBundle res = null;
+ if (lookup != null) {
+ res = lookup.get(resKey);
+ }
+ if (res == null) {
+ int[] index = new int[1];
+ boolean[] alias = new boolean[1];
+ res = handleGetImpl(resKey, table, requested, index, alias);
+ if (res != null && lookup != null && !alias[0]) {
+ // We do not want to cache a result from alias entry
+ lookup.put(resKey, res);
+ lookup.put(Integer.valueOf(index[0]), res);
+ }
+ }
+ return res;
+ }
+
+ protected UResourceBundle handleGet(int index, HashMap table, UResourceBundle requested) {
+ UResourceBundle res = null;
+ Integer indexKey = null;
+ if (lookup != null) {
+ indexKey = Integer.valueOf(index);
+ res = lookup.get(indexKey);
+ }
+ if (res == null) {
+ boolean[] alias = new boolean[1];
+ res = handleGetImpl(index, table, requested, alias);
+ if (res != null && lookup != null && !alias[0]) {
+ // We do not want to cache a result from alias entry
+ lookup.put(res.getKey(), res);
+ lookup.put(indexKey, res);
+ }
+ }
+ return res;
+ }
+
+ // Subclass which supports key based resource access to implement this method
+ protected UResourceBundle handleGetImpl(String resKey, HashMap table, UResourceBundle requested,
+ int[] index, boolean[] isAlias) {
+ return null;
+ }
+
+ // Subclass which supports index based resource access to implement this method
+ protected UResourceBundle handleGetImpl(int index, HashMap table, UResourceBundle requested,
+ boolean[] isAlias) {
+ return null;
+ }
+
+
+ // TODO Below is a set of workarounds created for org.unicode.cldr.icu.ICU2LDMLWriter
+ /*
+ * Calling getKeys() on a table that has alias's can throw a NullPointerException if parent is not set,
+ * see trac bug: 6514
+ * -Brian Rower - IBM - Sept. 2008
+ */
+
+ /**
+ * Returns the resource handle for the given key within the calling resource table.
+ *
+ * @internal
+ * @deprecated This API is ICU internal only and a workaround see ticket #6514.
+ * @author Brian Rower
+ */
+ protected int getTableResource(String resKey) {
+ return RES_BOGUS;
+ }
+ protected int getTableResource(int index) {
+ return RES_BOGUS;
+ }
+
+ /**
+ * Determines if the object at the specified index of the calling resource table
+ * is an alias. If it is, returns true
+ *
+ * @param index The index of the resource to check
+ * @returns True if the resource at 'index' is an alias, false otherwise.
+ *
+ * @internal
+ * @deprecated This API is ICU internal only and part of a work around see ticket #6514
+ * @author Brian Rower
+ */
+ public boolean isAlias(int index)
+ {
+ //TODO this is part of a workaround for ticket #6514
+ //if index is out of the resource, return false.
+ return ICUResourceBundleReader.RES_GET_TYPE(getTableResource(index)) == ALIAS;
+ }
+
+ /**
+ *
+ * @internal
+ * @deprecated This API is ICU internal only and part of a workaround see ticket #6514.
+ * @author Brian Rower
+ */
+ public boolean isAlias()
+ {
+ //TODO this is part of a workaround for ticket #6514
+ return ICUResourceBundleReader.RES_GET_TYPE(resource) == ALIAS;
+ }
+
+ /**
+ * Determines if the object with the specified key
+ * is an alias. If it is, returns true
+ *
+ * @returns True if the resource with 'key' is an alias, false otherwise.
+ *
+ * @internal
+ * @deprecated This API is ICU internal only and part of a workaround see ticket #6514.
+ * @author Brian Rower
+ */
+ public boolean isAlias(String k)
+ {
+ //TODO this is part of a workaround for ticket #6514
+ //this only applies to tables
+ return ICUResourceBundleReader.RES_GET_TYPE(getTableResource(k)) == ALIAS;
+ }
+
+ /**
+ * This method can be used to retrieve the underlying alias path (aka where the alias points to)
+ * This method was written to allow conversion from ICU back to LDML format.
+ *
+ * @param index The index where the alias path points to.
+ * @return The alias path.
+ * @author Brian Rower
+ * @internal
+ * @deprecated This API is ICU internal only.
+ * @author Brian Rower
+ */
+ public String getAliasPath(int index)
+ {
+ return getAliasValue(getTableResource(index));
+ }
+
+ /**
+ *
+ * @internal
+ * @deprecated This API is ICU internal only
+ * @author Brian Rower
+ */
+ public String getAliasPath()
+ {
+ //TODO cannot allow alias path to end up in public API
+ return getAliasValue(resource);
+ }
+
+ /**
+ *
+ * @internal
+ * @deprecated This API is ICU internal only
+ * @author Brian Rower
+ */
+ public String getAliasPath(String k)
+ {
+ //TODO cannot allow alias path to end up in public API
+ return getAliasValue(getTableResource(k));
+ }
+
+ /*
+ * Helper method for getKeysSafe
+ */
+ protected String getKey(int index) {
+ return null;
+ }
+
+ /**
+ * Returns an Enumeration of the keys belonging to this table or array.
+ * This method differs from the getKeys() method by not following alias paths. This method exposes
+ * underlying alias's. For all general purposes of the ICU resource bundle please use getKeys().
+ *
+ * @return Keys in this table or array.
+ * @internal
+ * @deprecated This API is ICU internal only and a workaround see ticket #6514.
+ * @author Brian Rower
+ */
+ public Enumeration getKeysSafe()
+ {
+ //TODO this is part of a workaround for ticket #6514
+ //the safeness only applies to tables, so use the other method if it's not a table
+ if(!ICUResourceBundleReader.URES_IS_TABLE(resource))
+ {
+ return getKeys();
+ }
+ List v = new ArrayList();
+ int size = getSize();
+ for(int index = 0; index < size; index++)
+ {
+ String curKey = getKey(index);
+ v.add(curKey);
+ }
+
+ //TODO we should use Iterator or List as the return type
+ // instead of Enumeration
+
+ return Collections.enumeration(v);
+ }
+
+ // This is the worker function for the public getKeys().
+ // TODO: Now that UResourceBundle uses handleKeySet(), this function is obsolete.
+ // It is also not inherited from ResourceBundle, and it is not implemented
+ // by ResourceBundleWrapper despite its documentation requiring all subclasses to
+ // implement it.
+ // Consider deprecating UResourceBundle.handleGetKeys(), and consider making it always return null.
+ protected Enumeration handleGetKeys() {
+ return Collections.enumeration(handleKeySet());
+ }
+
+ protected boolean isTopLevelResource() {
+ return resPath.length() == 0;
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleImpl.java b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleImpl.java
new file mode 100644
index 00000000000..5d2ed741454
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleImpl.java
@@ -0,0 +1,213 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2004-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.ibm.icu.util.UResourceBundle;
+import com.ibm.icu.util.UResourceBundleIterator;
+import com.ibm.icu.util.UResourceTypeMismatchException;
+
+class ICUResourceBundleImpl extends ICUResourceBundle {
+ protected ICUResourceBundleImpl(ICUResourceBundleReader reader, String key, String resPath, int resource,
+ ICUResourceBundleImpl container) {
+ super(reader, key, resPath, resource, container);
+ }
+ protected final ICUResourceBundle createBundleObject(String _key,
+ int _resource,
+ HashMap table,
+ UResourceBundle requested,
+ boolean[] isAlias) {
+ if (isAlias != null) {
+ isAlias[0] = false;
+ }
+ String _resPath = resPath + "/" + _key;
+ switch(ICUResourceBundleReader.RES_GET_TYPE(_resource)) {
+ case STRING :
+ case STRING_V2:
+ return new ICUResourceBundleImpl.ResourceString(reader, _key, _resPath, _resource, this);
+ case BINARY:
+ return new ICUResourceBundleImpl.ResourceBinary(reader, _key, _resPath, _resource, this);
+ case ALIAS:
+ if (isAlias != null) {
+ isAlias[0] = true;
+ }
+ return findResource(_key, _resPath, _resource, table, requested);
+ case INT:
+ return new ICUResourceBundleImpl.ResourceInt(reader, _key, _resPath, _resource, this);
+ case INT_VECTOR:
+ return new ICUResourceBundleImpl.ResourceIntVector(reader, _key, _resPath, _resource, this);
+ case ARRAY:
+ case ARRAY16:
+ return new ICUResourceBundleImpl.ResourceArray(reader, _key, _resPath, _resource, this);
+ case TABLE:
+ case TABLE16:
+ case TABLE32:
+ return new ICUResourceBundleImpl.ResourceTable(reader, _key, _resPath, _resource, this);
+ default :
+ throw new IllegalStateException("The resource type is unknown");
+ }
+ }
+
+ // Scalar values ------------------------------------------------------- ***
+
+ private static final class ResourceBinary extends ICUResourceBundleImpl {
+ public ByteBuffer getBinary() {
+ return reader.getBinary(resource);
+ }
+ public byte [] getBinary(byte []ba) {
+ return reader.getBinary(resource, ba);
+ }
+ ResourceBinary(ICUResourceBundleReader reader, String key, String resPath, int resource,
+ ICUResourceBundleImpl container) {
+ super(reader, key, resPath, resource, container);
+ }
+ }
+ private static final class ResourceInt extends ICUResourceBundleImpl {
+ public int getInt() {
+ return ICUResourceBundleReader.RES_GET_INT(resource);
+ }
+ public int getUInt() {
+ return ICUResourceBundleReader.RES_GET_UINT(resource);
+ }
+ ResourceInt(ICUResourceBundleReader reader, String key, String resPath, int resource,
+ ICUResourceBundleImpl container) {
+ super(reader, key, resPath, resource, container);
+ }
+ }
+ private static final class ResourceString extends ICUResourceBundleImpl {
+ private String value;
+ public String getString() {
+ return value;
+ }
+ ResourceString(ICUResourceBundleReader reader, String key, String resPath, int resource,
+ ICUResourceBundleImpl container) {
+ super(reader, key, resPath, resource, container);
+ value = reader.getString(resource);
+ }
+ }
+ private static final class ResourceIntVector extends ICUResourceBundleImpl {
+ private int[] value;
+ public int[] getIntVector() {
+ return value;
+ }
+ ResourceIntVector(ICUResourceBundleReader reader, String key, String resPath, int resource,
+ ICUResourceBundleImpl container) {
+ super(reader, key, resPath, resource, container);
+ value = reader.getIntVector(resource);
+ }
+ }
+
+ // Container values ---------------------------------------------------- ***
+
+ private static class ResourceContainer extends ICUResourceBundleImpl {
+ protected ICUResourceBundleReader.Container value;
+
+ public int getSize() {
+ return value.getSize();
+ }
+ protected int getContainerResource(int index) {
+ return value.getContainerResource(index);
+ }
+ protected UResourceBundle createBundleObject(int index, String resKey, HashMap table,
+ UResourceBundle requested, boolean[] isAlias) {
+ int item = getContainerResource(index);
+ if (item == RES_BOGUS) {
+ throw new IndexOutOfBoundsException();
+ }
+ return createBundleObject(resKey, item, table, requested, isAlias);
+ }
+ ResourceContainer(ICUResourceBundleReader reader, String key, String resPath, int resource,
+ ICUResourceBundleImpl container) {
+ super(reader, key, resPath, resource, container);
+ }
+ }
+ private static class ResourceArray extends ResourceContainer {
+ protected String[] handleGetStringArray() {
+ String[] strings = new String[value.getSize()];
+ UResourceBundleIterator iter = getIterator();
+ int i = 0;
+ while (iter.hasNext()) {
+ strings[i++] = iter.next().getString();
+ }
+ return strings;
+ }
+ public String[] getStringArray() {
+ return handleGetStringArray();
+ }
+ protected UResourceBundle handleGetImpl(String indexStr, HashMap table,
+ UResourceBundle requested,
+ int[] index, boolean[] isAlias) {
+ int i = indexStr.length() > 0 ? Integer.valueOf(indexStr).intValue() : -1;
+ if(index != null) {
+ index[0] = i;
+ }
+ if (i < 0) {
+ throw new UResourceTypeMismatchException("Could not get the correct value for index: "+ index);
+ }
+ return createBundleObject(i, indexStr, table, requested, isAlias);
+ }
+ protected UResourceBundle handleGetImpl(int index, HashMap table,
+ UResourceBundle requested, boolean[] isAlias) {
+ return createBundleObject(index, Integer.toString(index), table, requested, isAlias);
+ }
+ ResourceArray(ICUResourceBundleReader reader, String key, String resPath, int resource,
+ ICUResourceBundleImpl container) {
+ super(reader, key, resPath, resource, container);
+ value = reader.getArray(resource);
+ createLookupCache(); // Use bundle cache to access array entries
+ }
+ }
+ static class ResourceTable extends ResourceContainer {
+ protected String getKey(int index) {
+ return ((ICUResourceBundleReader.Table)value).getKey(index);
+ }
+ protected Set handleKeySet() {
+ TreeSet keySet = new TreeSet();
+ ICUResourceBundleReader.Table table = (ICUResourceBundleReader.Table)value;
+ for (int i = 0; i < table.getSize(); ++i) {
+ keySet.add(table.getKey(i));
+ }
+ return keySet;
+ }
+ protected int getTableResource(String resKey) {
+ return ((ICUResourceBundleReader.Table)value).getTableResource(resKey);
+ }
+ protected int getTableResource(int index) {
+ return getContainerResource(index);
+ }
+ protected UResourceBundle handleGetImpl(String resKey, HashMap table,
+ UResourceBundle requested,
+ int[] index, boolean[] isAlias) {
+ int i = ((ICUResourceBundleReader.Table)value).findTableItem(resKey);
+ if(index != null) {
+ index[0] = i;
+ }
+ if (i < 0) {
+ return null;
+ }
+ return createBundleObject(i, resKey, table, requested, isAlias);
+ }
+ protected UResourceBundle handleGetImpl(int index, HashMap table,
+ UResourceBundle requested, boolean[] isAlias) {
+ String itemKey = ((ICUResourceBundleReader.Table)value).getKey(index);
+ if (itemKey == null) {
+ throw new IndexOutOfBoundsException();
+ }
+ return createBundleObject(index, itemKey, table, requested, isAlias);
+ }
+ ResourceTable(ICUResourceBundleReader reader, String key, String resPath, int resource,
+ ICUResourceBundleImpl container) {
+ super(reader, key, resPath, resource, container);
+ value = reader.getTable(resource);
+ createLookupCache(); // Use bundle cache to access table entries
+ }
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java
new file mode 100644
index 00000000000..8883afcb336
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java
@@ -0,0 +1,862 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2004-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import com.ibm.icu.util.UResourceBundle;
+import com.ibm.icu.util.VersionInfo;
+
+/**
+ * This class reads the *.res resource bundle format
+ *
+ * (For the latest version of the file format documentation see
+ * ICU4C's source/common/uresdata.h file.)
+ *
+ * File format for .res resource bundle files (formatVersion=2, ICU 4.4)
+ *
+ * New in formatVersion 2 compared with 1.3: -------------
+ *
+ * Three new resource types -- String-v2, Table16 and Array16 -- have their
+ * values stored in a new array of 16-bit units between the table key strings
+ * and the start of the other resources.
+ *
+ * genrb eliminates duplicates among Unicode string-v2 values.
+ * Multiple Unicode strings may use the same offset and string data,
+ * or a short string may point to the suffix of a longer string. ("Suffix sharing")
+ * For example, one string "abc" may be reused for another string "bc" by pointing
+ * to the second character. (Short strings-v2 are NUL-terminated
+ * and not preceded by an explicit length value.)
+ *
+ * It is allowed for all resource types to share values.
+ * The swapper code (ures_swap()) has been modified so that it swaps each item
+ * exactly once.
+ *
+ * A resource bundle may use a special pool bundle. Some or all of the table key strings
+ * of the using-bundle are omitted, and the key string offsets for such key strings refer
+ * to offsets in the pool bundle.
+ * The using-bundle's and the pool-bundle's indexes[URES_INDEX_POOL_CHECKSUM] values
+ * must match.
+ * Two bits in indexes[URES_INDEX_ATTRIBUTES] indicate whether a resource bundle
+ * is or uses a pool bundle.
+ *
+ * Table key strings must be compared in ASCII order, even if they are not
+ * stored in ASCII.
+ *
+ * New in formatVersion 1.3 compared with 1.2: -------------
+ *
+ * genrb eliminates duplicates among key strings.
+ * Multiple table items may share one key string, or one item may point
+ * to the suffix of another's key string. ("Suffix sharing")
+ * For example, one key "abc" may be reused for another key "bc" by pointing
+ * to the second character. (Key strings are NUL-terminated.)
+ *
+ * -------------
+ *
+ * An ICU4C resource bundle file (.res) is a binary, memory-mappable file
+ * with nested, hierarchical data structures.
+ * It physically contains the following:
+ *
+ * Resource root; -- 32-bit Resource item, root item for this bundle's tree;
+ * currently, the root item must be a table or table32 resource item
+ * int32_t indexes[indexes[0]]; -- array of indexes for friendly
+ * reading and swapping; see URES_INDEX_* above
+ * new in formatVersion 1.1 (ICU 2.8)
+ * char keys[]; -- characters for key strings
+ * (formatVersion 1.0: up to 65k of characters; 1.1: <2G)
+ * (minus the space for root and indexes[]),
+ * which consist of invariant characters (ASCII/EBCDIC) and are NUL-terminated;
+ * padded to multiple of 4 bytes for 4-alignment of the following data
+ * uint16_t 16BitUnits[]; -- resources that are stored entirely as sequences of 16-bit units
+ * (new in formatVersion 2/ICU 4.4)
+ * data is indexed by the offset values in 16-bit resource types,
+ * with offset 0 pointing to the beginning of this array;
+ * there is a 0 at offset 0, for empty resources;
+ * padded to multiple of 4 bytes for 4-alignment of the following data
+ * data; -- data directly and indirectly indexed by the root item;
+ * the structure is determined by walking the tree
+ *
+ * Each resource bundle item has a 32-bit Resource handle (see typedef above)
+ * which contains the item type number in its upper 4 bits (31..28) and either
+ * an offset or a direct value in its lower 28 bits (27..0).
+ * The order of items is undefined and only determined by walking the tree.
+ * Leaves of the tree may be stored first or last or anywhere in between,
+ * and it is in theory possible to have unreferenced holes in the file.
+ *
+ * 16-bit-unit values:
+ * Starting with formatVersion 2/ICU 4.4, some resources are stored in a special
+ * array of 16-bit units. Each resource value is a sequence of 16-bit units,
+ * with no per-resource padding to a 4-byte boundary.
+ * 16-bit container types (Table16 and Array16) contain Resource16 values
+ * which are offsets to String-v2 resources in the same 16-bit-units array.
+ *
+ * Direct values:
+ * - Empty Unicode strings have an offset value of 0 in the Resource handle itself.
+ * - Starting with formatVersion 2/ICU 4.4, an offset value of 0 for
+ * _any_ resource type indicates an empty value.
+ * - Integer values are 28-bit values stored in the Resource handle itself;
+ * the interpretation of unsigned vs. signed integers is up to the application.
+ *
+ * All other types and values use 28-bit offsets to point to the item's data.
+ * The offset is an index to the first 32-bit word of the value, relative to the
+ * start of the resource data (i.e., the root item handle is at offset 0).
+ * To get byte offsets, the offset is multiplied by 4 (or shifted left by 2 bits).
+ * All resource item values are 4-aligned.
+ *
+ * New in formatVersion 2/ICU 4.4: Some types use offsets into the 16-bit-units array,
+ * indexing 16-bit units in that array.
+ *
+ * The structures (memory layouts) for the values for each item type are listed
+ * in the table below.
+ *
+ * Nested, hierarchical structures: -------------
+ *
+ * Table items contain key-value pairs where the keys are offsets to char * key strings.
+ * The values of these pairs are either Resource handles or
+ * offsets into the 16-bit-units array, depending on the table type.
+ *
+ * Array items are simple vectors of Resource handles,
+ * or of offsets into the 16-bit-units array, depending on the array type.
+ *
+ * Table key string offsets: -------
+ *
+ * Key string offsets are relative to the start of the resource data (of the root handle),
+ * i.e., the first string has an offset of 4+sizeof(indexes).
+ * (After the 4-byte root handle and after the indexes array.)
+ *
+ * If the resource bundle uses a pool bundle, then some key strings are stored
+ * in the pool bundle rather than in the local bundle itself.
+ * - In a Table or Table16, the 16-bit key string offset is local if it is
+ * less than indexes[URES_INDEX_KEYS_TOP]<<2.
+ * Otherwise, subtract indexes[URES_INDEX_KEYS_TOP]<<2 to get the offset into
+ * the pool bundle key strings.
+ * - In a Table32, the 32-bit key string offset is local if it is non-negative.
+ * Otherwise, reset bit 31 to get the pool key string offset.
+ *
+ * Unlike the local offset, the pool key offset is relative to
+ * the start of the key strings, not to the start of the bundle.
+ *
+ * An alias item is special (and new in ICU 2.4): --------------
+ *
+ * Its memory layout is just like for a UnicodeString, but at runtime it resolves to
+ * another resource bundle's item according to the path in the string.
+ * This is used to share items across bundles that are in different lookup/fallback
+ * chains (e.g., large collation data among zh_TW and zh_HK).
+ * This saves space (for large items) and maintenance effort (less duplication of data).
+ *
+ * --------------------------------------------------------------------------
+ *
+ * Resource types:
+ *
+ * Most resources have their values stored at four-byte offsets from the start
+ * of the resource data. These values are at least 4-aligned.
+ * Some resource values are stored directly in the offset field of the Resource itself.
+ * See UResType in unicode/ures.h for enumeration constants for Resource types.
+ *
+ * Some resources have their values stored as sequences of 16-bit units,
+ * at 2-byte offsets from the start of a contiguous 16-bit-unit array between
+ * the table key strings and the other resources. (new in formatVersion 2/ICU 4.4)
+ * At offset 0 of that array is a 16-bit zero value for empty 16-bit resources.
+ * Resource16 values in Table16 and Array16 are 16-bit offsets to String-v2
+ * resources, with the offsets relative to the start of the 16-bit-units array.
+ *
+ * Type Name Memory layout of values
+ * (in parentheses: scalar, non-offset values)
+ *
+ * 0 Unicode String: int32_t length, UChar[length], (UChar)0, (padding)
+ * or (empty string ("") if offset==0)
+ * 1 Binary: int32_t length, uint8_t[length], (padding)
+ * - the start of the bytes is 16-aligned -
+ * 2 Table: uint16_t count, uint16_t keyStringOffsets[count], (uint16_t padding), Resource[count]
+ * 3 Alias: (physically same value layout as string, new in ICU 2.4)
+ * 4 Table32: int32_t count, int32_t keyStringOffsets[count], Resource[count]
+ * (new in formatVersion 1.1/ICU 2.8)
+ * 5 Table16: uint16_t count, uint16_t keyStringOffsets[count], Resource16[count]
+ * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
+ * 6 Unicode String-v2:UChar[length], (UChar)0; length determined by the first UChar:
+ * - if first is not a trail surrogate, then the length is implicit
+ * and u_strlen() needs to be called
+ * - if first<0xdfef then length=first&0x3ff (and skip first)
+ * - if first<0xdfff then length=((first-0xdfef)<<16) | second UChar
+ * - if first==0xdfff then length=((second UChar)<<16) | third UChar
+ * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
+ * 7 Integer: (28-bit offset is integer value)
+ * 8 Array: int32_t count, Resource[count]
+ * 9 Array16: uint16_t count, Resource16[count]
+ * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4)
+ * 14 Integer Vector: int32_t length, int32_t[length]
+ * 15 Reserved: This value denotes special purpose resources and is for internal use.
+ *
+ * Note that there are 3 types with data vector values:
+ * - Vectors of 8-bit bytes stored as type Binary.
+ * - Vectors of 16-bit words stored as type Unicode String or Unicode String-v2
+ * (no value restrictions, all values 0..ffff allowed!).
+ * - Vectors of 32-bit words stored as type Integer Vector.
+ */
+public final class ICUResourceBundleReader implements ICUBinary.Authenticate {
+ /**
+ * File format version that this class understands.
+ * "ResB"
+ */
+ private static final byte DATA_FORMAT_ID[] = {(byte)0x52, (byte)0x65,
+ (byte)0x73, (byte)0x42};
+
+ /* indexes[] value names; indexes are generally 32-bit (Resource) indexes */
+ private static final int URES_INDEX_LENGTH = 0; /* contains URES_INDEX_TOP==the length of indexes[];
+ * formatVersion==1: all bits contain the length of indexes[]
+ * but the length is much less than 0xff;
+ * formatVersion>1:
+ * only bits 7..0 contain the length of indexes[],
+ * bits 31..8 are reserved and set to 0 */
+ private static final int URES_INDEX_KEYS_TOP = 1; /* contains the top of the key strings, */
+ /* same as the bottom of resources or UTF-16 strings, rounded up */
+ //ivate static final int URES_INDEX_RESOURCES_TOP = 2; /* contains the top of all resources */
+ private static final int URES_INDEX_BUNDLE_TOP = 3; /* contains the top of the bundle, */
+ /* in case it were ever different from [2] */
+ //ivate static final int URES_INDEX_MAX_TABLE_LENGTH = 4; /* max. length of any table */
+ private static final int URES_INDEX_ATTRIBUTES = 5; /* attributes bit set, see URES_ATT_* (new in formatVersion 1.2) */
+ private static final int URES_INDEX_16BIT_TOP = 6; /* top of the 16-bit units (UTF-16 string v2 UChars, URES_TABLE16, URES_ARRAY16),
+ * rounded up (new in formatVersion 2.0, ICU 4.4) */
+ private static final int URES_INDEX_POOL_CHECKSUM = 7; /* checksum of the pool bundle (new in formatVersion 2.0, ICU 4.4) */
+ //ivate static final int URES_INDEX_TOP = 8;
+
+ /*
+ * Nofallback attribute, attribute bit 0 in indexes[URES_INDEX_ATTRIBUTES].
+ * New in formatVersion 1.2 (ICU 3.6).
+ *
+ * If set, then this resource bundle is a standalone bundle.
+ * If not set, then the bundle participates in locale fallback, eventually
+ * all the way to the root bundle.
+ * If indexes[] is missing or too short, then the attribute cannot be determined
+ * reliably. Dependency checking should ignore such bundles, and loading should
+ * use fallbacks.
+ */
+ private static final int URES_ATT_NO_FALLBACK = 1;
+
+ /*
+ * Attributes for bundles that are, or use, a pool bundle.
+ * A pool bundle provides key strings that are shared among several other bundles
+ * to reduce their total size.
+ * New in formatVersion 2 (ICU 4.4).
+ */
+ private static final int URES_ATT_IS_POOL_BUNDLE = 2;
+ private static final int URES_ATT_USES_POOL_BUNDLE = 4;
+
+ private static final boolean DEBUG = false;
+
+ private byte[] /* formatVersion, */ dataVersion;
+
+ // See the ResourceData struct in ICU4C/source/common/uresdata.h.
+ private String s16BitUnits;
+ private byte[] poolBundleKeys;
+ private String poolBundleKeysAsString;
+ private int rootRes;
+ private int localKeyLimit;
+ private boolean noFallback; /* see URES_ATT_NO_FALLBACK */
+ private boolean isPoolBundle;
+ private boolean usesPoolBundle;
+
+ // Fields specific to the Java port.
+ private int[] indexes;
+ private byte[] keyStrings;
+ private String keyStringsAsString; // null except if isPoolBundle
+ private byte[] resourceBytes;
+ private int resourceBottom; // File offset where the mixed-type resources start.
+
+ private ICUResourceBundleReader(InputStream stream, String resolvedName){
+ BufferedInputStream bs = new BufferedInputStream(stream);
+ try{
+ if(DEBUG) System.out.println("The InputStream class is: " + stream.getClass().getName());
+ if(DEBUG) System.out.println("The BufferedInputStream class is: " + bs.getClass().getName());
+ if(DEBUG) System.out.println("The bytes avialable in stream before reading the header: " + bs.available());
+
+ dataVersion = ICUBinary.readHeader(bs,DATA_FORMAT_ID,this);
+
+ if(DEBUG) System.out.println("The bytes available in stream after reading the header: " + bs.available());
+
+ readData(bs);
+ stream.close();
+ }catch(IOException ex){
+ throw new RuntimeException("Data file "+ resolvedName+ " is corrupt - " + ex.getMessage());
+ }
+ }
+ static ICUResourceBundleReader getReader(String resolvedName, ClassLoader root) {
+ InputStream stream = ICUData.getStream(root,resolvedName);
+
+ if(stream==null){
+ return null;
+ }
+ ICUResourceBundleReader reader = new ICUResourceBundleReader(stream, resolvedName);
+ return reader;
+ }
+
+ void setPoolBundleKeys(ICUResourceBundleReader poolBundleReader) {
+ if(!poolBundleReader.isPoolBundle) {
+ throw new IllegalStateException("pool.res is not a pool bundle");
+ }
+ if(poolBundleReader.indexes[URES_INDEX_POOL_CHECKSUM] != indexes[URES_INDEX_POOL_CHECKSUM]) {
+ throw new IllegalStateException("pool.res has a different checksum than this bundle");
+ }
+ poolBundleKeys = poolBundleReader.keyStrings;
+ poolBundleKeysAsString = poolBundleReader.keyStringsAsString;
+ }
+
+ // See res_init() in ICU4C/source/common/uresdata.c.
+ private void readData(InputStream stream) throws IOException {
+ DataInputStream ds = new DataInputStream(stream);
+
+ if(DEBUG) System.out.println("The DataInputStream class is: " + ds.getClass().getName());
+ if(DEBUG) System.out.println("The available bytes in the stream before reading the data: "+ds.available());
+
+ rootRes = ds.readInt();
+
+ // read the variable-length indexes[] array
+ int indexes0 = ds.readInt();
+ int indexLength = indexes0 & 0xff;
+ indexes = new int[indexLength];
+ indexes[URES_INDEX_LENGTH] = indexes0;
+ for(int i=1; i URES_INDEX_ATTRIBUTES) {
+ // determine if this resource bundle falls back to a parent bundle
+ // along normal locale ID fallback
+ int att = indexes[URES_INDEX_ATTRIBUTES];
+ noFallback = (att & URES_ATT_NO_FALLBACK) != 0;
+ isPoolBundle = (att & URES_ATT_IS_POOL_BUNDLE) != 0;
+ usesPoolBundle = (att & URES_ATT_USES_POOL_BUNDLE) != 0;
+ }
+
+ int length = indexes[URES_INDEX_BUNDLE_TOP]*4;
+ if(DEBUG) System.out.println("The number of bytes in the bundle: "+length);
+
+ // Read the local key strings.
+ // The keyStrings include NUL characters corresponding to the bytes
+ // up to the end of the indexes.
+ if(indexes[URES_INDEX_KEYS_TOP] > (1 + indexLength)) {
+ int keysBottom = (1 + indexLength) << 2;
+ int keysTop = indexes[URES_INDEX_KEYS_TOP] << 2;
+ resourceBottom = keysTop;
+ if(isPoolBundle) {
+ // Shift the key strings down:
+ // Pool bundle key strings are used with a 0-based index,
+ // unlike regular bundles' key strings for which indexes
+ // are based on the start of the bundle data.
+ keysTop -= keysBottom;
+ keysBottom = 0;
+ } else {
+ localKeyLimit = keysTop;
+ }
+ keyStrings = new byte[keysTop];
+ ds.readFully(keyStrings, keysBottom, keysTop - keysBottom);
+ if(isPoolBundle) {
+ // Overwrite trailing padding bytes so that the conversion works.
+ while(keysBottom < keysTop && keyStrings[keysTop - 1] == (byte)0xaa) {
+ keyStrings[--keysTop] = 0;
+ }
+ keyStringsAsString = new String(keyStrings, "US-ASCII");
+ }
+ }
+
+ // Read the array of 16-bit units.
+ // We are not using
+ // new String(keys, "UTF-16BE")
+ // because the 16-bit units may not be well-formed Unicode.
+ if( indexLength > URES_INDEX_16BIT_TOP &&
+ indexes[URES_INDEX_16BIT_TOP] > indexes[URES_INDEX_KEYS_TOP]
+ ) {
+ int num16BitUnits = (indexes[URES_INDEX_16BIT_TOP] -
+ indexes[URES_INDEX_KEYS_TOP]) * 2;
+ char[] c16BitUnits = new char[num16BitUnits];
+ // Note: Calling readFully() to read data into byte[] and copy
+ // the data to char[] is faster than calling readChar() one by one
+ // for large data
+ byte[] c16BitUnitsBytes = new byte[num16BitUnits * 2];
+ ds.readFully(c16BitUnitsBytes);
+ for (int i = 0; i < num16BitUnits; i++) {
+ c16BitUnits[i] = (char)((c16BitUnitsBytes[i*2] << 8) | (c16BitUnitsBytes[i*2 + 1] & 0xFF));
+ }
+ s16BitUnits = new String(c16BitUnits);
+ resourceBottom = indexes[URES_INDEX_16BIT_TOP] << 2;
+ } else {
+ s16BitUnits = "\0";
+ }
+
+ // Read the block of bytes for the mixed-type resources.
+ resourceBytes = new byte[length - resourceBottom];
+ ds.readFully(resourceBytes);
+ }
+
+ VersionInfo getVersion(){
+ return VersionInfo.getInstance(dataVersion[0],dataVersion[1],dataVersion[2],dataVersion[3]);
+ }
+ public boolean isDataVersionAcceptable(byte version[]){
+ // while ICU4C can read formatVersion 1.0 and up,
+ // ICU4J requires 1.1 as a minimum
+ // formatVersion = version;
+ return ((version[0] == 1 && version[1] >= 1) || version[0] == 2);
+ }
+
+ int getRootResource() {
+ return rootRes;
+ }
+ boolean getNoFallback() {
+ return noFallback;
+ }
+ boolean getUsesPoolBundle() {
+ return usesPoolBundle;
+ }
+
+ static int RES_GET_TYPE(int res) {
+ return res >>> 28;
+ }
+ private static int RES_GET_OFFSET(int res) {
+ return res & 0x0fffffff;
+ }
+ private int getResourceByteOffset(int offset) {
+ return (offset << 2) - resourceBottom;
+ }
+ /* get signed and unsigned integer values directly from the Resource handle */
+ static int RES_GET_INT(int res) {
+ return (res << 4) >> 4;
+ }
+ static int RES_GET_UINT(int res) {
+ return res & 0x0fffffff;
+ }
+ static boolean URES_IS_TABLE(int type) {
+ return type==UResourceBundle.TABLE || type==ICUResourceBundle.TABLE16 || type==ICUResourceBundle.TABLE32;
+ }
+
+ private static byte[] emptyBytes = new byte[0];
+ private static ByteBuffer emptyByteBuffer = ByteBuffer.allocate(0).asReadOnlyBuffer();
+ private static char[] emptyChars = new char[0];
+ private static int[] emptyInts = new int[0];
+ private static String emptyString = "";
+
+ private char getChar(int offset) {
+ return (char)((resourceBytes[offset] << 8) | (resourceBytes[offset + 1] & 0xff));
+ }
+ private char[] getChars(int offset, int count) {
+ char[] chars = new char[count];
+ for(int i = 0; i < count; offset += 2, ++i) {
+ chars[i] = (char)(((int)resourceBytes[offset] << 8) | (resourceBytes[offset + 1] & 0xff));
+ }
+ return chars;
+ }
+ private int getInt(int offset) {
+ return (resourceBytes[offset] << 24) |
+ ((resourceBytes[offset+1] & 0xff) << 16) |
+ ((resourceBytes[offset+2] & 0xff) << 8) |
+ ((resourceBytes[offset+3] & 0xff));
+ }
+ private int[] getInts(int offset, int count) {
+ int[] ints = new int[count];
+ for(int i = 0; i < count; offset += 4, ++i) {
+ ints[i] = (resourceBytes[offset] << 24) |
+ ((resourceBytes[offset+1] & 0xff) << 16) |
+ ((resourceBytes[offset+2] & 0xff) << 8) |
+ ((resourceBytes[offset+3] & 0xff));
+ }
+ return ints;
+ }
+ private char[] getTable16KeyOffsets(int offset) {
+ int length = s16BitUnits.charAt(offset++);
+ if(length > 0) {
+ return s16BitUnits.substring(offset, offset + length).toCharArray();
+ } else {
+ return emptyChars;
+ }
+ }
+ private char[] getTableKeyOffsets(int offset) {
+ int length = getChar(offset);
+ if(length > 0) {
+ return getChars(offset + 2, length);
+ } else {
+ return emptyChars;
+ }
+ }
+ private int[] getTable32KeyOffsets(int offset) {
+ int length = getInt(offset);
+ if(length > 0) {
+ return getInts(offset + 4, length);
+ } else {
+ return emptyInts;
+ }
+ }
+
+ /** Refers to ASCII key string bytes, for key string matching. */
+ private static final class ByteSequence {
+ private byte[] bytes;
+ private int offset;
+ public ByteSequence(byte[] bytes, int offset) {
+ this.bytes = bytes;
+ this.offset = offset;
+ }
+ public byte charAt(int index) {
+ return bytes[offset + index];
+ }
+ }
+ private String makeKeyStringFromBytes(int keyOffset) {
+ StringBuilder sb = new StringBuilder();
+ byte b;
+ while((b = keyStrings[keyOffset++]) != 0) {
+ sb.append((char)b);
+ }
+ return sb.toString();
+ }
+ private String makeKeyStringFromString(int keyOffset) {
+ int endOffset = keyOffset;
+ while(poolBundleKeysAsString.charAt(endOffset) != 0) {
+ ++endOffset;
+ }
+ return poolBundleKeysAsString.substring(keyOffset, endOffset);
+ }
+ private ByteSequence RES_GET_KEY16(char keyOffset) {
+ if(keyOffset < localKeyLimit) {
+ return new ByteSequence(keyStrings, keyOffset);
+ } else {
+ return new ByteSequence(poolBundleKeys, keyOffset - localKeyLimit);
+ }
+ }
+ private String getKey16String(int keyOffset) {
+ if(keyOffset < localKeyLimit) {
+ return makeKeyStringFromBytes(keyOffset);
+ } else {
+ return makeKeyStringFromString(keyOffset - localKeyLimit);
+ }
+ }
+ private ByteSequence RES_GET_KEY32(int keyOffset) {
+ if(keyOffset >= 0) {
+ return new ByteSequence(keyStrings, keyOffset);
+ } else {
+ return new ByteSequence(poolBundleKeys, keyOffset & 0x7fffffff);
+ }
+ }
+ private String getKey32String(int keyOffset) {
+ if(keyOffset >= 0) {
+ return makeKeyStringFromBytes(keyOffset);
+ } else {
+ return makeKeyStringFromString(keyOffset & 0x7fffffff);
+ }
+ }
+ // Compare the length-specified input key with the
+ // NUL-terminated tableKey.
+ private static int compareKeys(CharSequence key, ByteSequence tableKey) {
+ int i;
+ for(i = 0; i < key.length(); ++i) {
+ int c2 = tableKey.charAt(i);
+ if(c2 == 0) {
+ return 1; // key > tableKey because key is longer.
+ }
+ int diff = (int)key.charAt(i) - c2;
+ if(diff != 0) {
+ return diff;
+ }
+ }
+ return -(int)tableKey.charAt(i);
+ }
+ private int compareKeys(CharSequence key, char keyOffset) {
+ return compareKeys(key, RES_GET_KEY16(keyOffset));
+ }
+ private int compareKeys32(CharSequence key, int keyOffset) {
+ return compareKeys(key, RES_GET_KEY32(keyOffset));
+ }
+
+ String getString(int res) {
+ int offset=RES_GET_OFFSET(res);
+ int length;
+ if(RES_GET_TYPE(res)==ICUResourceBundle.STRING_V2) {
+ int first = s16BitUnits.charAt(offset);
+ if((first&0xfffffc00)!=0xdc00) { // C: if(!U16_IS_TRAIL(first)) {
+ if(first==0) {
+ return emptyString;
+ }
+ int endOffset;
+ for(endOffset=offset+1; s16BitUnits.charAt(endOffset)!=0; ++endOffset) {}
+ return s16BitUnits.substring(offset, endOffset);
+ } else if(first<0xdfef) {
+ length=first&0x3ff;
+ ++offset;
+ } else if(first<0xdfff) {
+ length=((first-0xdfef)<<16)|s16BitUnits.charAt(offset+1);
+ offset+=2;
+ } else {
+ length=((int)s16BitUnits.charAt(offset+1)<<16)|s16BitUnits.charAt(offset+2);
+ offset+=3;
+ }
+ return s16BitUnits.substring(offset, offset+length);
+ } else if(res==offset) /* RES_GET_TYPE(res)==URES_STRING */ {
+ if(res==0) {
+ return emptyString;
+ } else {
+ offset=getResourceByteOffset(offset);
+ length=getInt(offset);
+ return new String(getChars(offset+4, length));
+ }
+ } else {
+ return null;
+ }
+ }
+
+ String getAlias(int res) {
+ int offset=RES_GET_OFFSET(res);
+ int length;
+ if(RES_GET_TYPE(res)==ICUResourceBundle.ALIAS) {
+ if(offset==0) {
+ return emptyString;
+ } else {
+ offset=getResourceByteOffset(offset);
+ length=getInt(offset);
+ return new String(getChars(offset+4, length));
+ }
+ } else {
+ return null;
+ }
+ }
+
+ byte[] getBinary(int res, byte[] ba) {
+ int offset=RES_GET_OFFSET(res);
+ int length;
+ if(RES_GET_TYPE(res)==UResourceBundle.BINARY) {
+ if(offset==0) {
+ return emptyBytes;
+ } else {
+ offset=getResourceByteOffset(offset);
+ length=getInt(offset);
+ if(ba==null || ba.length!=length) {
+ ba=new byte[length];
+ }
+ System.arraycopy(resourceBytes, offset+4, ba, 0, length);
+ return ba;
+ }
+ } else {
+ return null;
+ }
+ }
+
+ ByteBuffer getBinary(int res) {
+ int offset=RES_GET_OFFSET(res);
+ int length;
+ if(RES_GET_TYPE(res)==UResourceBundle.BINARY) {
+ if(offset==0) {
+ // Don't just
+ // return emptyByteBuffer;
+ // in case it matters whether the buffer's mark is defined or undefined.
+ return emptyByteBuffer.duplicate();
+ } else {
+ offset=getResourceByteOffset(offset);
+ length=getInt(offset);
+ return ByteBuffer.wrap(resourceBytes, offset+4, length).slice().asReadOnlyBuffer();
+ }
+ } else {
+ return null;
+ }
+ }
+
+ int[] getIntVector(int res) {
+ int offset=RES_GET_OFFSET(res);
+ int length;
+ if(RES_GET_TYPE(res)==UResourceBundle.INT_VECTOR) {
+ if(offset==0) {
+ return emptyInts;
+ } else {
+ offset=getResourceByteOffset(offset);
+ length=getInt(offset);
+ return getInts(offset+4, length);
+ }
+ } else {
+ return null;
+ }
+ }
+
+ Container getArray(int res) {
+ int type=RES_GET_TYPE(res);
+ int offset=RES_GET_OFFSET(res);
+ switch(type) {
+ case UResourceBundle.ARRAY:
+ case ICUResourceBundle.ARRAY16:
+ if(offset==0) {
+ return new Container(this);
+ }
+ break;
+ default:
+ return null;
+ }
+ switch(type) {
+ case UResourceBundle.ARRAY:
+ return new Array(this, offset);
+ case ICUResourceBundle.ARRAY16:
+ return new Array16(this, offset);
+ default:
+ return null;
+ }
+ }
+
+ Table getTable(int res) {
+ int type=RES_GET_TYPE(res);
+ int offset=RES_GET_OFFSET(res);
+ switch(type) {
+ case UResourceBundle.TABLE:
+ case ICUResourceBundle.TABLE16:
+ case ICUResourceBundle.TABLE32:
+ if(offset==0) {
+ return new Table(this);
+ }
+ break;
+ default:
+ return null;
+ }
+ switch(type) {
+ case UResourceBundle.TABLE:
+ return new Table1632(this, offset);
+ case ICUResourceBundle.TABLE16:
+ return new Table16(this, offset);
+ case ICUResourceBundle.TABLE32:
+ return new Table32(this, offset);
+ default:
+ return null;
+ }
+ }
+
+ // Container value classes --------------------------------------------- ***
+
+ static class Container {
+ protected ICUResourceBundleReader reader;
+ protected int size;
+ protected int itemsOffset;
+
+ int getSize() {
+ return size;
+ }
+ int getContainerResource(int index) {
+ return ICUResourceBundle.RES_BOGUS;
+ }
+ protected int getContainer16Resource(int index) {
+ if (index < 0 || size <= index) {
+ return ICUResourceBundle.RES_BOGUS;
+ }
+ return (ICUResourceBundle.STRING_V2 << 28) |
+ reader.s16BitUnits.charAt(itemsOffset + index);
+ }
+ protected int getContainer32Resource(int index) {
+ if (index < 0 || size <= index) {
+ return ICUResourceBundle.RES_BOGUS;
+ }
+ return reader.getInt(itemsOffset + 4 * index);
+ }
+ Container(ICUResourceBundleReader reader) {
+ this.reader = reader;
+ }
+ }
+ private static final class Array extends Container {
+ int getContainerResource(int index) {
+ return getContainer32Resource(index);
+ }
+ Array(ICUResourceBundleReader reader, int offset) {
+ super(reader);
+ offset = reader.getResourceByteOffset(offset);
+ size = reader.getInt(offset);
+ itemsOffset = offset + 4;
+ }
+ }
+ private static final class Array16 extends Container {
+ int getContainerResource(int index) {
+ return getContainer16Resource(index);
+ }
+ Array16(ICUResourceBundleReader reader, int offset) {
+ super(reader);
+ size = reader.s16BitUnits.charAt(offset);
+ itemsOffset = offset + 1;
+ }
+ }
+ static class Table extends Container {
+ protected char[] keyOffsets;
+ protected int[] key32Offsets;
+
+ String getKey(int index) {
+ if (index < 0 || size <= index) {
+ return null;
+ }
+ return keyOffsets != null ?
+ reader.getKey16String(keyOffsets[index]) :
+ reader.getKey32String(key32Offsets[index]);
+ }
+ private static final int URESDATA_ITEM_NOT_FOUND = -1;
+ int findTableItem(CharSequence key) {
+ int mid, start, limit;
+ int result;
+
+ /* do a binary search for the key */
+ start=0;
+ limit=size;
+ while(start 0) {
+ start = mid + 1;
+ } else {
+ /* We found it! */
+ return mid;
+ }
+ }
+ return URESDATA_ITEM_NOT_FOUND; /* not found or table is empty. */
+ }
+ int getTableResource(String resKey) {
+ return getContainerResource(findTableItem(resKey));
+ }
+ Table(ICUResourceBundleReader reader) {
+ super(reader);
+ }
+ }
+ private static final class Table1632 extends Table {
+ int getContainerResource(int index) {
+ return getContainer32Resource(index);
+ }
+ Table1632(ICUResourceBundleReader reader, int offset) {
+ super(reader);
+ offset = reader.getResourceByteOffset(offset);
+ keyOffsets = reader.getTableKeyOffsets(offset);
+ size = keyOffsets.length;
+ itemsOffset = offset + 2 * ((size + 2) & ~1); // Skip padding for 4-alignment.
+ }
+ }
+ private static final class Table16 extends Table {
+ int getContainerResource(int index) {
+ return getContainer16Resource(index);
+ }
+ Table16(ICUResourceBundleReader reader, int offset) {
+ super(reader);
+ keyOffsets = reader.getTable16KeyOffsets(offset);
+ size = keyOffsets.length;
+ itemsOffset = offset + 1 + size;
+ }
+ }
+ private static final class Table32 extends Table {
+ int getContainerResource(int index) {
+ return getContainer32Resource(index);
+ }
+ Table32(ICUResourceBundleReader reader, int offset) {
+ super(reader);
+ offset = reader.getResourceByteOffset(offset);
+ key32Offsets = reader.getTable32KeyOffsets(offset);
+ size = key32Offsets.length;
+ itemsOffset = offset + 4 * (1 + size);
+ }
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUResourceTableAccess.java b/main/classes/core/src/com/ibm/icu/impl/ICUResourceTableAccess.java
new file mode 100644
index 00000000000..561124ec295
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUResourceTableAccess.java
@@ -0,0 +1,103 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+
+/**
+ * Static utility functions for probing resource tables, used by ULocale and
+ * LocaleDisplayNames.
+ */
+public class ICUResourceTableAccess {
+ /**
+ * Utility to fetch locale display data from resource bundle tables. Convenience
+ * wrapper for {@link #getTableString(ICUResourceBundle, String, String, String)}.
+ */
+ public static String getTableString(String path, ULocale locale, String tableName,
+ String itemName) {
+ ICUResourceBundle bundle = (ICUResourceBundle) UResourceBundle.
+ getBundleInstance(path, locale.getBaseName());
+ return getTableString(bundle, tableName, null, itemName);
+ }
+
+ /**
+ * Utility to fetch locale display data from resource bundle tables. Uses fallback
+ * through the "Fallback" resource if available.
+ */
+ public static String getTableString(ICUResourceBundle bundle, String tableName,
+ String subtableName, String item) {
+ try {
+ for (;;) {
+ // special case currency
+ if ("currency".equals(subtableName)) {
+ ICUResourceBundle table = bundle.getWithFallback("Currencies");
+ table = table.getWithFallback(item);
+ return table.getString(1);
+ } else {
+ ICUResourceBundle table = lookup(bundle, tableName);
+ if (table == null) {
+ return item;
+ }
+ ICUResourceBundle stable = table;
+ if (subtableName != null) {
+ stable = lookup(table, subtableName);
+ }
+ if (stable != null) {
+ ICUResourceBundle sbundle = lookup(stable, item);
+ if (sbundle != null) {
+ return sbundle.getString(); // possible real exception
+ }
+ }
+
+ // if we get here, stable was null, or sbundle was null
+ if (subtableName == null) {
+ // may be a deprecated code
+ String currentName = null;
+ if (tableName.equals("Countries")) {
+ currentName = LocaleIDs.getCurrentCountryID(item);
+ } else if (tableName.equals("Languages")) {
+ currentName = LocaleIDs.getCurrentLanguageID(item);
+ }
+ ICUResourceBundle sbundle = lookup(table, currentName);
+ if (sbundle != null) {
+ return sbundle.getString(); // possible real exception
+ }
+ }
+
+ // still can't figure it out? try the fallback mechanism
+ ICUResourceBundle fbundle = lookup(table, "Fallback");
+ if (fbundle == null) {
+ return item;
+ }
+
+ String fallbackLocale = fbundle.getString(); // again, possible exception
+ if (fallbackLocale.length() == 0) {
+ fallbackLocale = "root";
+ }
+
+ if (fallbackLocale.equals(table.getULocale().getName())) {
+ return item;
+ }
+
+ bundle = (ICUResourceBundle) UResourceBundle.getBundleInstance(
+ bundle.getBaseName(), fallbackLocale);
+ }
+ }
+ } catch (Exception e) {
+ // If something is seriously wrong, we might call getString on a resource that is
+ // not a string. That will throw an exception, which we catch and ignore here.
+ }
+
+ return item;
+ }
+
+ // utility to make the call sites in the above code cleaner
+ private static ICUResourceBundle lookup(ICUResourceBundle bundle, String resName) {
+ return ICUResourceBundle.findResourceWithFallback(resName, bundle, null);
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ICUService.java b/main/classes/core/src/com/ibm/icu/impl/ICUService.java
new file mode 100644
index 00000000000..cbd8c676ed5
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ICUService.java
@@ -0,0 +1,985 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2001-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.lang.ref.SoftReference;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.EventListener;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import com.ibm.icu.util.ULocale;
+
+/**
+ * A Service provides access to service objects that implement a
+ * particular service, e.g. transliterators. Users provide a String
+ * id (for example, a locale string) to the service, and get back an
+ * object for that id. Service objects can be any kind of object.
+ * The service object is cached and returned for later queries, so
+ * generally it should not be mutable, or the caller should clone the
+ * object before modifying it.
+ *
+ * Services 'canonicalize' the query id and use the canonical id to
+ * query for the service. The service also defines a mechanism to
+ * 'fallback' the id multiple times. Clients can optionally request
+ * the actual id that was matched by a query when they use an id to
+ * retrieve a service object.
+ *
+ * Service objects are instantiated by Factory objects registered with
+ * the service. The service queries each Factory in turn, from most recently
+ * registered to earliest registered, until one returns a service object.
+ * If none responds with a service object, a fallback id is generated,
+ * and the process repeats until a service object is returned or until
+ * the id has no further fallbacks.
+ *
+ * Factories can be dynamically registered and unregistered with the
+ * service. When registered, a Factory is installed at the head of
+ * the factory list, and so gets 'first crack' at any keys or fallback
+ * keys. When unregistered, it is removed from the service and can no
+ * longer be located through it. Service objects generated by this
+ * factory and held by the client are unaffected.
+ *
+ * ICUService uses Keys to query factories and perform
+ * fallback. The Key defines the canonical form of the id, and
+ * implements the fallback strategy. Custom Keys can be defined that
+ * parse complex IDs into components that Factories can more easily
+ * use. The Key can cache the results of this parsing to save
+ * repeated effort. ICUService provides convenience APIs that
+ * take Strings and generate default Keys for use in querying.
+ *
+ * ICUService provides API to get the list of ids publicly
+ * supported by the service (although queries aren't restricted to
+ * this list). This list contains only 'simple' IDs, and not fully
+ * unique ids. Factories are associated with each simple ID and
+ * the responsible factory can also return a human-readable localized
+ * version of the simple ID, for use in user interfaces. ICUService
+ * can also provide a sorted collection of the all the localized visible
+ * ids.
+ *
+ * ICUService implements ICUNotifier, so that clients can register
+ * to receive notification when factories are added or removed from
+ * the service. ICUService provides a default EventListener subinterface,
+ * ServiceListener, which can be registered with the service. When
+ * the service changes, the ServiceListener's serviceChanged method
+ * is called, with the service as the only argument.
+ *
+ * The ICUService API is both rich and generic, and it is expected
+ * that most implementations will statically 'wrap' ICUService to
+ * present a more appropriate API-- for example, to declare the type
+ * of the objects returned from get, to limit the factories that can
+ * be registered with the service, or to define their own listener
+ * interface with a custom callback method. They might also customize
+ * ICUService by overriding it, for example, to customize the Key and
+ * fallback strategy. ICULocaleService is a customized service that
+ * uses Locale names as ids and uses Keys that implement the standard
+ * resource bundle fallback strategy.
+ */
+public class ICUService extends ICUNotifier {
+ /**
+ * Name used for debugging.
+ */
+ protected final String name;
+
+ /**
+ * Constructor.
+ */
+ public ICUService() {
+ name = "";
+ }
+
+ private static final boolean DEBUG = ICUDebug.enabled("service");
+ /**
+ * Construct with a name (useful for debugging).
+ */
+ public ICUService(String name) {
+ this.name = name;
+ }
+
+ /**
+ * Access to factories is protected by a read-write lock. This is
+ * to allow multiple threads to read concurrently, but keep
+ * changes to the factory list atomic with respect to all readers.
+ */
+ private final ICURWLock factoryLock = new ICURWLock();
+
+ /**
+ * All the factories registered with this service.
+ */
+ private final List factories = new ArrayList();
+
+ /**
+ * Record the default number of factories for this service.
+ * Can be set by markDefault.
+ */
+ private int defaultSize = 0;
+
+ /**
+ * Keys are used to communicate with factories to generate an
+ * instance of the service. Keys define how ids are
+ * canonicalized, provide both a current id and a current
+ * descriptor to use in querying the cache and factories, and
+ * determine the fallback strategy.
+ *
+ * Keys provide both a currentDescriptor and a currentID.
+ * The descriptor contains an optional prefix, followed by '/'
+ * and the currentID. Factories that handle complex keys,
+ * for example number format factories that generate multiple
+ * kinds of formatters for the same locale, use the descriptor
+ * to provide a fully unique identifier for the service object,
+ * while using the currentID (in this case, the locale string),
+ * as the visible IDs that can be localized.
+ *
+ *
The default implementation of Key has no fallbacks and
+ * has no custom descriptors.
+ */
+ public static class Key {
+ private final String id;
+
+ /**
+ * Construct a key from an id.
+ */
+ public Key(String id) {
+ this.id = id;
+ }
+
+ /**
+ * Return the original ID used to construct this key.
+ */
+ public final String id() {
+ return id;
+ }
+
+ /**
+ * Return the canonical version of the original ID. This implementation
+ * returns the original ID unchanged.
+ */
+ public String canonicalID() {
+ return id;
+ }
+
+ /**
+ * Return the (canonical) current ID. This implementation
+ * returns the canonical ID.
+ */
+ public String currentID() {
+ return canonicalID();
+ }
+
+ /**
+ * Return the current descriptor. This implementation returns
+ * the current ID. The current descriptor is used to fully
+ * identify an instance of the service in the cache. A
+ * factory may handle all descriptors for an ID, or just a
+ * particular descriptor. The factory can either parse the
+ * descriptor or use custom API on the key in order to
+ * instantiate the service.
+ */
+ public String currentDescriptor() {
+ return "/" + currentID();
+ }
+
+ /**
+ * If the key has a fallback, modify the key and return true,
+ * otherwise return false. The current ID will change if there
+ * is a fallback. No currentIDs should be repeated, and fallback
+ * must eventually return false. This implmentation has no fallbacks
+ * and always returns false.
+ */
+ public boolean fallback() {
+ return false;
+ }
+
+ /**
+ * If a key created from id would eventually fallback to match the
+ * canonical ID of this key, return true.
+ */
+ public boolean isFallbackOf(String idToCheck) {
+ return canonicalID().equals(idToCheck);
+ }
+ }
+
+ /**
+ * Factories generate the service objects maintained by the
+ * service. A factory generates a service object from a key,
+ * updates id->factory mappings, and returns the display name for
+ * a supported id.
+ */
+ public static interface Factory {
+
+ /**
+ * Create a service object from the key, if this factory
+ * supports the key. Otherwise, return null.
+ *
+ * If the factory supports the key, then it can call
+ * the service's getKey(Key, String[], Factory) method
+ * passing itself as the factory to get the object that
+ * the service would have created prior to the factory's
+ * registration with the service. This can change the
+ * key, so any information required from the key should
+ * be extracted before making such a callback.
+ */
+ public Object create(Key key, ICUService service);
+
+ /**
+ * Update the result IDs (not descriptors) to reflect the IDs
+ * this factory handles. This function and getDisplayName are
+ * used to support ICUService.getDisplayNames. Basically, the
+ * factory has to determine which IDs it will permit to be
+ * available, and of those, which it will provide localized
+ * display names for. In most cases this reflects the IDs that
+ * the factory directly supports.
+ */
+ public void updateVisibleIDs(Map result);
+
+ /**
+ * Return the display name for this id in the provided locale.
+ * This is an localized id, not a descriptor. If the id is
+ * not visible or not defined by the factory, return null.
+ * If locale is null, return id unchanged.
+ */
+ public String getDisplayName(String id, ULocale locale);
+ }
+
+ /**
+ * A default implementation of factory. This provides default
+ * implementations for subclasses, and implements a singleton
+ * factory that matches a single id and returns a single
+ * (possibly deferred-initialized) instance. This implements
+ * updateVisibleIDs to add a mapping from its ID to itself
+ * if visible is true, or to remove any existing mapping
+ * for its ID if visible is false.
+ */
+ public static class SimpleFactory implements Factory {
+ protected Object instance;
+ protected String id;
+ protected boolean visible;
+
+ /**
+ * Convenience constructor that calls SimpleFactory(Object, String, boolean)
+ * with visible true.
+ */
+ public SimpleFactory(Object instance, String id) {
+ this(instance, id, true);
+ }
+
+ /**
+ * Construct a simple factory that maps a single id to a single
+ * service instance. If visible is true, the id will be visible.
+ * Neither the instance nor the id can be null.
+ */
+ public SimpleFactory(Object instance, String id, boolean visible) {
+ if (instance == null || id == null) {
+ throw new IllegalArgumentException("Instance or id is null");
+ }
+ this.instance = instance;
+ this.id = id;
+ this.visible = visible;
+ }
+
+ /**
+ * Return the service instance if the factory's id is equal to
+ * the key's currentID. Service is ignored.
+ */
+ public Object create(Key key, ICUService service) {
+ if (id.equals(key.currentID())) {
+ return instance;
+ }
+ return null;
+ }
+
+ /**
+ * If visible, adds a mapping from id -> this to the result,
+ * otherwise removes id from result.
+ */
+ public void updateVisibleIDs(Map result) {
+ if (visible) {
+ result.put(id, this);
+ } else {
+ result.remove(id);
+ }
+ }
+
+ /**
+ * If this.id equals id, returns id regardless of locale,
+ * otherwise returns null. (This default implementation has
+ * no localized id information.)
+ */
+ public String getDisplayName(String identifier, ULocale locale) {
+ return (visible && id.equals(identifier)) ? identifier : null;
+ }
+
+ /**
+ * For debugging.
+ */
+ public String toString() {
+ StringBuilder buf = new StringBuilder(super.toString());
+ buf.append(", id: ");
+ buf.append(id);
+ buf.append(", visible: ");
+ buf.append(visible);
+ return buf.toString();
+ }
+ }
+
+ /**
+ * Convenience override for get(String, String[]). This uses
+ * createKey to create a key for the provided descriptor.
+ */
+ public Object get(String descriptor) {
+ return getKey(createKey(descriptor), null);
+ }
+
+ /**
+ * Convenience override for get(Key, String[]). This uses
+ * createKey to create a key from the provided descriptor.
+ */
+ public Object get(String descriptor, String[] actualReturn) {
+ if (descriptor == null) {
+ throw new NullPointerException("descriptor must not be null");
+ }
+ return getKey(createKey(descriptor), actualReturn);
+ }
+
+ /**
+ * Convenience override for get(Key, String[]).
+ */
+ public Object getKey(Key key) {
+ return getKey(key, null);
+ }
+
+ /**
+ * Given a key, return a service object, and, if actualReturn
+ * is not null, the descriptor with which it was found in the
+ * first element of actualReturn. If no service object matches
+ * this key, return null, and leave actualReturn unchanged.
+ *
+ * This queries the cache using the key's descriptor, and if no
+ * object in the cache matches it, tries the key on each
+ * registered factory, in order. If none generates a service
+ * object for the key, repeats the process with each fallback of
+ * the key, until either one returns a service object, or the key
+ * has no fallback.
+ *
+ * If key is null, just returns null.
+ */
+ public Object getKey(Key key, String[] actualReturn) {
+ return getKey(key, actualReturn, null);
+ }
+
+ // debugging
+ // Map hardRef;
+
+ public Object getKey(Key key, String[] actualReturn, Factory factory) {
+ if (factories.size() == 0) {
+ return handleDefault(key, actualReturn);
+ }
+
+ if (DEBUG) System.out.println("Service: " + name + " key: " + key.canonicalID());
+
+ CacheEntry result = null;
+ if (key != null) {
+ try {
+ // The factory list can't be modified until we're done,
+ // otherwise we might update the cache with an invalid result.
+ // The cache has to stay in synch with the factory list.
+ factoryLock.acquireRead();
+
+ Map cache = null;
+ SoftReference> cref = cacheref; // copy so we don't need to sync on this
+ if (cref != null) {
+ if (DEBUG) System.out.println("Service " + name + " ref exists");
+ cache = cref.get();
+ }
+ if (cache == null) {
+ if (DEBUG) System.out.println("Service " + name + " cache was empty");
+ // synchronized since additions and queries on the cache must be atomic
+ // they can be interleaved, though
+ cache = Collections.synchronizedMap(new HashMap());
+// hardRef = cache; // debug
+ cref = new SoftReference>(cache);
+ }
+
+ String currentDescriptor = null;
+ ArrayList cacheDescriptorList = null;
+ boolean putInCache = false;
+
+ int NDebug = 0;
+
+ int startIndex = 0;
+ int limit = factories.size();
+ boolean cacheResult = true;
+ if (factory != null) {
+ for (int i = 0; i < limit; ++i) {
+ if (factory == factories.get(i)) {
+ startIndex = i + 1;
+ break;
+ }
+ }
+ if (startIndex == 0) {
+ throw new IllegalStateException("Factory " + factory + "not registered with service: " + this);
+ }
+ cacheResult = false;
+ }
+
+ outer:
+ do {
+ currentDescriptor = key.currentDescriptor();
+ if (DEBUG) System.out.println(name + "[" + NDebug++ + "] looking for: " + currentDescriptor);
+ result = cache.get(currentDescriptor);
+ if (result != null) {
+ if (DEBUG) System.out.println(name + " found with descriptor: " + currentDescriptor);
+ break outer;
+ } else {
+ if (DEBUG) System.out.println("did not find: " + currentDescriptor + " in cache");
+ }
+
+ // first test of cache failed, so we'll have to update
+ // the cache if we eventually succeed-- that is, if we're
+ // going to update the cache at all.
+ putInCache = cacheResult;
+
+ // int n = 0;
+ int index = startIndex;
+ while (index < limit) {
+ Factory f = factories.get(index++);
+ if (DEBUG) System.out.println("trying factory[" + (index-1) + "] " + f.toString());
+ Object service = f.create(key, this);
+ if (service != null) {
+ result = new CacheEntry(currentDescriptor, service);
+ if (DEBUG) System.out.println(name + " factory supported: " + currentDescriptor + ", caching");
+ break outer;
+ } else {
+ if (DEBUG) System.out.println("factory did not support: " + currentDescriptor);
+ }
+ }
+
+ // prepare to load the cache with all additional ids that
+ // will resolve to result, assuming we'll succeed. We
+ // don't want to keep querying on an id that's going to
+ // fallback to the one that succeeded, we want to hit the
+ // cache the first time next goaround.
+ if (cacheDescriptorList == null) {
+ cacheDescriptorList = new ArrayList(5);
+ }
+ cacheDescriptorList.add(currentDescriptor);
+
+ } while (key.fallback());
+
+ if (result != null) {
+ if (putInCache) {
+ if (DEBUG) System.out.println("caching '" + result.actualDescriptor + "'");
+ cache.put(result.actualDescriptor, result);
+ if (cacheDescriptorList != null) {
+ for (String desc : cacheDescriptorList) {
+ if (DEBUG) System.out.println(name + " adding descriptor: '" + desc + "' for actual: '" + result.actualDescriptor + "'");
+
+ cache.put(desc, result);
+ }
+ }
+ // Atomic update. We held the read lock all this time
+ // so we know our cache is consistent with the factory list.
+ // We might stomp over a cache that some other thread
+ // rebuilt, but that's the breaks. They're both good.
+ cacheref = cref;
+ }
+
+ if (actualReturn != null) {
+ // strip null prefix
+ if (result.actualDescriptor.indexOf("/") == 0) {
+ actualReturn[0] = result.actualDescriptor.substring(1);
+ } else {
+ actualReturn[0] = result.actualDescriptor;
+ }
+ }
+
+ if (DEBUG) System.out.println("found in service: " + name);
+
+ return result.service;
+ }
+ }
+ finally {
+ factoryLock.releaseRead();
+ }
+ }
+
+ if (DEBUG) System.out.println("not found in service: " + name);
+
+ return handleDefault(key, actualReturn);
+ }
+ private SoftReference> cacheref;
+
+ // Record the actual id for this service in the cache, so we can return it
+ // even if we succeed later with a different id.
+ private static final class CacheEntry {
+ final String actualDescriptor;
+ final Object service;
+ CacheEntry(String actualDescriptor, Object service) {
+ this.actualDescriptor = actualDescriptor;
+ this.service = service;
+ }
+ }
+
+
+ /**
+ * Default handler for this service if no factory in the list
+ * handled the key.
+ */
+ protected Object handleDefault(Key key, String[] actualIDReturn) {
+ return null;
+ }
+
+ /**
+ * Convenience override for getVisibleIDs(String) that passes null
+ * as the fallback, thus returning all visible IDs.
+ */
+ public Set getVisibleIDs() {
+ return getVisibleIDs(null);
+ }
+
+ /**
+ * Return a snapshot of the visible IDs for this service. This
+ * set will not change as Factories are added or removed, but the
+ * supported ids will, so there is no guarantee that all and only
+ * the ids in the returned set are visible and supported by the
+ * service in subsequent calls.
+ *
+ * matchID is passed to createKey to create a key. If the
+ * key is not null, it is used to filter out ids that don't have
+ * the key as a fallback.
+ */
+ public Set getVisibleIDs(String matchID) {
+ Set result = getVisibleIDMap().keySet();
+
+ Key fallbackKey = createKey(matchID);
+
+ if (fallbackKey != null) {
+ Set temp = new HashSet(result.size());
+ for (String id : result) {
+ if (fallbackKey.isFallbackOf(id)) {
+ temp.add(id);
+ }
+ }
+ result = temp;
+ }
+ return result;
+ }
+
+ /**
+ * Return a map from visible ids to factories.
+ */
+ private Map getVisibleIDMap() {
+ Map idcache = null;
+ SoftReference> ref = idref;
+ if (ref != null) {
+ idcache = ref.get();
+ }
+ while (idcache == null) {
+ synchronized (this) { // or idref-only lock?
+ if (ref == idref || idref == null) {
+ // no other thread updated idref before we got the lock, so
+ // grab the factory list and update it ourselves
+ try {
+ factoryLock.acquireRead();
+ idcache = new HashMap();
+ ListIterator lIter = factories.listIterator(factories.size());
+ while (lIter.hasPrevious()) {
+ Factory f = lIter.previous();
+ f.updateVisibleIDs(idcache);
+ }
+ idcache = Collections.unmodifiableMap(idcache);
+ idref = new SoftReference>(idcache);
+ }
+ finally {
+ factoryLock.releaseRead();
+ }
+ } else {
+ // another thread updated idref, but gc may have stepped
+ // in and undone its work, leaving idcache null. If so,
+ // retry.
+ ref = idref;
+ idcache = ref.get();
+ }
+ }
+ }
+
+ return idcache;
+ }
+ private SoftReference> idref;
+
+ /**
+ * Convenience override for getDisplayName(String, ULocale) that
+ * uses the current default locale.
+ */
+ public String getDisplayName(String id) {
+ return getDisplayName(id, ULocale.getDefault());
+ }
+
+ /**
+ * Given a visible id, return the display name in the requested locale.
+ * If there is no directly supported id corresponding to this id, return
+ * null.
+ */
+ public String getDisplayName(String id, ULocale locale) {
+ Map m = getVisibleIDMap();
+ Factory f = m.get(id);
+ if (f != null) {
+ return f.getDisplayName(id, locale);
+ }
+
+ Key key = createKey(id);
+ while (key.fallback()) {
+ f = m.get(key.currentID());
+ if (f != null) {
+ return f.getDisplayName(id, locale);
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Convenience override of getDisplayNames(ULocale, Comparator, String) that
+ * uses the current default Locale as the locale, null as
+ * the comparator, and null for the matchID.
+ */
+ public SortedMap getDisplayNames() {
+ ULocale locale = ULocale.getDefault();
+ return getDisplayNames(locale, null, null);
+ }
+
+ /**
+ * Convenience override of getDisplayNames(ULocale, Comparator, String) that
+ * uses null for the comparator, and null for the matchID.
+ */
+ public SortedMap getDisplayNames(ULocale locale) {
+ return getDisplayNames(locale, null, null);
+ }
+
+ /**
+ * Convenience override of getDisplayNames(ULocale, Comparator, String) that
+ * uses null for the matchID, thus returning all display names.
+ */
+ public SortedMap getDisplayNames(ULocale locale, Comparator com) {
+ return getDisplayNames(locale, com, null);
+ }
+
+ /**
+ * Convenience override of getDisplayNames(ULocale, Comparator, String) that
+ * uses null for the comparator.
+ */
+ public SortedMap getDisplayNames(ULocale locale, String matchID) {
+ return getDisplayNames(locale, null, matchID);
+ }
+
+ /**
+ * Return a snapshot of the mapping from display names to visible
+ * IDs for this service. This set will not change as factories
+ * are added or removed, but the supported ids will, so there is
+ * no guarantee that all and only the ids in the returned map will
+ * be visible and supported by the service in subsequent calls,
+ * nor is there any guarantee that the current display names match
+ * those in the set. The display names are sorted based on the
+ * comparator provided.
+ */
+ public SortedMap getDisplayNames(ULocale locale, Comparator com, String matchID) {
+ SortedMap dncache = null;
+ LocaleRef ref = dnref;
+
+ if (ref != null) {
+ dncache = ref.get(locale, com);
+ }
+
+ while (dncache == null) {
+ synchronized (this) {
+ if (ref == dnref || dnref == null) {
+ dncache = new TreeMap(com); // sorted
+
+ Map m = getVisibleIDMap();
+ Iterator> ei = m.entrySet().iterator();
+ while (ei.hasNext()) {
+ Entry e = ei.next();
+ String id = e.getKey();
+ Factory f = e.getValue();
+ dncache.put(f.getDisplayName(id, locale), id);
+ }
+
+ dncache = Collections.unmodifiableSortedMap(dncache);
+ dnref = new LocaleRef(dncache, locale, com);
+ } else {
+ ref = dnref;
+ dncache = ref.get(locale, com);
+ }
+ }
+ }
+
+ Key matchKey = createKey(matchID);
+ if (matchKey == null) {
+ return dncache;
+ }
+
+ SortedMap result = new TreeMap(dncache);
+ Iterator> iter = result.entrySet().iterator();
+ while (iter.hasNext()) {
+ Entry e = iter.next();
+ if (!matchKey.isFallbackOf(e.getValue())) {
+ iter.remove();
+ }
+ }
+ return result;
+ }
+
+ // we define a class so we get atomic simultaneous access to the
+ // locale, comparator, and corresponding map.
+ private static class LocaleRef {
+ private final ULocale locale;
+ private SoftReference> ref;
+ private Comparator com;
+
+ LocaleRef(SortedMap dnCache, ULocale locale, Comparator com) {
+ this.locale = locale;
+ this.com = com;
+ this.ref = new SoftReference>(dnCache);
+ }
+
+
+ SortedMap get(ULocale loc, Comparator comp) {
+ SortedMap m = ref.get();
+ if (m != null &&
+ this.locale.equals(loc) &&
+ (this.com == comp || (this.com != null && this.com.equals(comp)))) {
+
+ return m;
+ }
+ return null;
+ }
+ }
+ private LocaleRef dnref;
+
+ /**
+ * Return a snapshot of the currently registered factories. There
+ * is no guarantee that the list will still match the current
+ * factory list of the service subsequent to this call.
+ */
+ public final List factories() {
+ try {
+ factoryLock.acquireRead();
+ return new ArrayList(factories);
+ }
+ finally{
+ factoryLock.releaseRead();
+ }
+ }
+
+ /**
+ * A convenience override of registerObject(Object, String, boolean)
+ * that defaults visible to true.
+ */
+ public Factory registerObject(Object obj, String id) {
+ return registerObject(obj, id, true);
+ }
+
+ /**
+ * Register an object with the provided id. The id will be
+ * canonicalized. The canonicalized ID will be returned by
+ * getVisibleIDs if visible is true.
+ */
+ public Factory registerObject(Object obj, String id, boolean visible) {
+ String canonicalID = createKey(id).canonicalID();
+ return registerFactory(new SimpleFactory(obj, canonicalID, visible));
+ }
+
+ /**
+ * Register a Factory. Returns the factory if the service accepts
+ * the factory, otherwise returns null. The default implementation
+ * accepts all factories.
+ */
+ public final Factory registerFactory(Factory factory) {
+ if (factory == null) {
+ throw new NullPointerException();
+ }
+ try {
+ factoryLock.acquireWrite();
+ factories.add(0, factory);
+ clearCaches();
+ }
+ finally {
+ factoryLock.releaseWrite();
+ }
+ notifyChanged();
+ return factory;
+ }
+
+ /**
+ * Unregister a factory. The first matching registered factory will
+ * be removed from the list. Returns true if a matching factory was
+ * removed.
+ */
+ public final boolean unregisterFactory(Factory factory) {
+ if (factory == null) {
+ throw new NullPointerException();
+ }
+
+ boolean result = false;
+ try {
+ factoryLock.acquireWrite();
+ if (factories.remove(factory)) {
+ result = true;
+ clearCaches();
+ }
+ }
+ finally {
+ factoryLock.releaseWrite();
+ }
+
+ if (result) {
+ notifyChanged();
+ }
+ return result;
+ }
+
+ /**
+ * Reset the service to the default factories. The factory
+ * lock is acquired and then reInitializeFactories is called.
+ */
+ public final void reset() {
+ try {
+ factoryLock.acquireWrite();
+ reInitializeFactories();
+ clearCaches();
+ }
+ finally {
+ factoryLock.releaseWrite();
+ }
+ notifyChanged();
+ }
+
+ /**
+ * Reinitialize the factory list to its default state. By default
+ * this clears the list. Subclasses can override to provide other
+ * default initialization of the factory list. Subclasses must
+ * not call this method directly, as it must only be called while
+ * holding write access to the factory list.
+ */
+ protected void reInitializeFactories() {
+ factories.clear();
+ }
+
+ /**
+ * Return true if the service is in its default state. The default
+ * implementation returns true if there are no factories registered.
+ */
+ public boolean isDefault() {
+ return factories.size() == defaultSize;
+ }
+
+ /**
+ * Set the default size to the current number of registered factories.
+ * Used by subclasses to customize the behavior of isDefault.
+ */
+ protected void markDefault() {
+ defaultSize = factories.size();
+ }
+
+ /**
+ * Create a key from an id. This creates a Key instance.
+ * Subclasses can override to define more useful keys appropriate
+ * to the factories they accept. If id is null, returns null.
+ */
+ public Key createKey(String id) {
+ return id == null ? null : new Key(id);
+ }
+
+ /**
+ * Clear caches maintained by this service. Subclasses can
+ * override if they implement additional that need to be cleared
+ * when the service changes. Subclasses should generally not call
+ * this method directly, as it must only be called while
+ * synchronized on this.
+ */
+ protected void clearCaches() {
+ // we don't synchronize on these because methods that use them
+ // copy before use, and check for changes if they modify the
+ // caches.
+ cacheref = null;
+ idref = null;
+ dnref = null;
+ }
+
+ /**
+ * Clears only the service cache.
+ * This can be called by subclasses when a change affects the service
+ * cache but not the id caches, e.g., when the default locale changes
+ * the resolution of ids changes, but not the visible ids themselves.
+ */
+ protected void clearServiceCache() {
+ cacheref = null;
+ }
+
+ /**
+ * ServiceListener is the listener that ICUService provides by default.
+ * ICUService will notifiy this listener when factories are added to
+ * or removed from the service. Subclasses can provide
+ * different listener interfaces that extend EventListener, and modify
+ * acceptsListener and notifyListener as appropriate.
+ */
+ public static interface ServiceListener extends EventListener {
+ public void serviceChanged(ICUService service);
+ }
+
+ /**
+ * Return true if the listener is accepted; by default this
+ * requires a ServiceListener. Subclasses can override to accept
+ * different listeners.
+ */
+ protected boolean acceptsListener(EventListener l) {
+ return l instanceof ServiceListener;
+ }
+
+ /**
+ * Notify the listener, which by default is a ServiceListener.
+ * Subclasses can override to use a different listener.
+ */
+ protected void notifyListener(EventListener l) {
+ ((ServiceListener)l).serviceChanged(this);
+ }
+
+ /**
+ * Return a string describing the statistics for this service.
+ * This also resets the statistics. Used for debugging purposes.
+ */
+ public String stats() {
+ ICURWLock.Stats stats = factoryLock.resetStats();
+ if (stats != null) {
+ return stats.toString();
+ }
+ return "no stats";
+ }
+
+ /**
+ * Return the name of this service. This will be the empty string if none was assigned.
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Returns the result of super.toString, appending the name in curly braces.
+ */
+ public String toString() {
+ return super.toString() + "{" + name + "}";
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/IDNA2003.java b/main/classes/core/src/com/ibm/icu/impl/IDNA2003.java
new file mode 100644
index 00000000000..106a2337527
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/IDNA2003.java
@@ -0,0 +1,436 @@
+/*
+*******************************************************************************
+* Copyright (C) 2003-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*/
+package com.ibm.icu.impl;
+
+import com.ibm.icu.text.IDNA;
+import com.ibm.icu.text.StringPrep;
+import com.ibm.icu.text.StringPrepParseException;
+import com.ibm.icu.text.UCharacterIterator;
+
+/**
+ * IDNA2003 implementation code, moved out of com.ibm.icu.text.IDNA.java
+ * while extending that class to support IDNA2008/UTS #46 as well.
+ * @author Ram Viswanadha
+ */
+public final class IDNA2003 {
+ /* IDNA ACE Prefix is "xn--" */
+ private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
+ //private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length;
+
+ private static final int MAX_LABEL_LENGTH = 63;
+ private static final int HYPHEN = 0x002D;
+ private static final int CAPITAL_A = 0x0041;
+ private static final int CAPITAL_Z = 0x005A;
+ private static final int LOWER_CASE_DELTA = 0x0020;
+ private static final int FULL_STOP = 0x002E;
+ private static final int MAX_DOMAIN_NAME_LENGTH = 255;
+
+ // The NamePrep profile object
+ private static final StringPrep namePrep = StringPrep.getInstance(StringPrep.RFC3491_NAMEPREP);
+
+ private static boolean startsWithPrefix(StringBuffer src){
+ boolean startsWithPrefix = true;
+
+ if(src.length() < ACE_PREFIX.length){
+ return false;
+ }
+ for(int i=0; i0x007A){
+ return false;
+ }
+ //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
+ if( (ch==0x002D) ||
+ (0x0030 <= ch && ch <= 0x0039) ||
+ (0x0041 <= ch && ch <= 0x005A) ||
+ (0x0061 <= ch && ch <= 0x007A)
+ ){
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Ascertain if the given code point is a label separator as
+ * defined by the IDNA RFC
+ *
+ * @param ch The code point to be ascertained
+ * @return true if the char is a label separator
+ * @stable ICU 2.8
+ */
+ private static boolean isLabelSeparator(int ch){
+ switch(ch){
+ case 0x002e:
+ case 0x3002:
+ case 0xFF0E:
+ case 0xFF61:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ public static StringBuffer convertToASCII(UCharacterIterator src, int options)
+ throws StringPrepParseException{
+
+ boolean[] caseFlags = null;
+
+ // the source contains all ascii codepoints
+ boolean srcIsASCII = true;
+ // assume the source contains all LDH codepoints
+ boolean srcIsLDH = true;
+
+ //get the options
+ boolean useSTD3ASCIIRules = ((options & IDNA.USE_STD3_RULES) != 0);
+ int ch;
+ // step 1
+ while((ch = src.next())!= UCharacterIterator.DONE){
+ if(ch> 0x7f){
+ srcIsASCII = false;
+ }
+ }
+ int failPos = -1;
+ src.setToStart();
+ StringBuffer processOut = null;
+ // step 2 is performed only if the source contains non ASCII
+ if(!srcIsASCII){
+ // step 2
+ processOut = namePrep.prepare(src, options);
+ }else{
+ processOut = new StringBuffer(src.getText());
+ }
+ int poLen = processOut.length();
+
+ if(poLen==0){
+ throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
+ }
+ StringBuffer dest = new StringBuffer();
+
+ // reset the variable to verify if output of prepare is ASCII or not
+ srcIsASCII = true;
+
+ // step 3 & 4
+ for(int j=0;j 0x7F){
+ srcIsASCII = false;
+ }else if(isLDHChar(ch)==false){
+ // here we do not assemble surrogates
+ // since we know that LDH code points
+ // are in the ASCII range only
+ srcIsLDH = false;
+ failPos = j;
+ }
+ }
+
+ if(useSTD3ASCIIRules == true){
+ // verify 3a and 3b
+ if( srcIsLDH == false /* source contains some non-LDH characters */
+ || processOut.charAt(0) == HYPHEN
+ || processOut.charAt(processOut.length()-1) == HYPHEN){
+
+ /* populate the parseError struct */
+ if(srcIsLDH==false){
+ throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",
+ StringPrepParseException.STD3_ASCII_RULES_ERROR,
+ processOut.toString(),
+ (failPos>0) ? (failPos-1) : failPos);
+ }else if(processOut.charAt(0) == HYPHEN){
+ throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
+ StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
+
+ }else{
+ throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
+ StringPrepParseException.STD3_ASCII_RULES_ERROR,
+ processOut.toString(),
+ (poLen>0) ? poLen-1 : poLen);
+
+ }
+ }
+ }
+ if(srcIsASCII){
+ dest = processOut;
+ }else{
+ // step 5 : verify the sequence does not begin with ACE prefix
+ if(!startsWithPrefix(processOut)){
+
+ //step 6: encode the sequence with punycode
+ caseFlags = new boolean[poLen];
+
+ StringBuilder punyout = Punycode.encode(processOut,caseFlags);
+
+ // convert all codepoints to lower case ASCII
+ StringBuffer lowerOut = toASCIILower(punyout);
+
+ //Step 7: prepend the ACE prefix
+ dest.append(ACE_PREFIX,0,ACE_PREFIX.length);
+ //Step 6: copy the contents in b2 into dest
+ dest.append(lowerOut);
+ }else{
+
+ throw new StringPrepParseException("The input does not start with the ACE Prefix.",
+ StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
+ }
+ }
+ if(dest.length() > MAX_LABEL_LENGTH){
+ throw new StringPrepParseException("The labels in the input are too long. Length > 63.",
+ StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
+ }
+ return dest;
+ }
+
+ public static StringBuffer convertIDNToASCII(String src,int options)
+ throws StringPrepParseException{
+
+ char[] srcArr = src.toCharArray();
+ StringBuffer result = new StringBuffer();
+ int sepIndex=0;
+ int oldSepIndex=0;
+ for(;;){
+ sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
+ String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
+ //make sure this is not a root label separator.
+ if(!(label.length()==0 && sepIndex==srcArr.length)){
+ UCharacterIterator iter = UCharacterIterator.getInstance(label);
+ result.append(convertToASCII(iter,options));
+ }
+ if(sepIndex==srcArr.length){
+ break;
+ }
+
+ // increment the sepIndex to skip past the separator
+ sepIndex++;
+ oldSepIndex = sepIndex;
+ result.append((char)FULL_STOP);
+ }
+ if(result.length() > MAX_DOMAIN_NAME_LENGTH){
+ throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
+ }
+ return result;
+ }
+
+ public static StringBuffer convertToUnicode(UCharacterIterator src, int options)
+ throws StringPrepParseException{
+
+ boolean[] caseFlags = null;
+
+ // the source contains all ascii codepoints
+ boolean srcIsASCII = true;
+ // assume the source contains all LDH codepoints
+ //boolean srcIsLDH = true;
+
+ //get the options
+ //boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
+
+ //int failPos = -1;
+ int ch;
+ int saveIndex = src.getIndex();
+ // step 1: find out if all the codepoints in src are ASCII
+ while((ch=src.next())!= UCharacterIterator.DONE){
+ if(ch>0x7F){
+ srcIsASCII = false;
+ }/*else if((srcIsLDH = isLDHChar(ch))==false){
+ failPos = src.getIndex();
+ }*/
+ }
+ StringBuffer processOut;
+
+ if(srcIsASCII == false){
+ try {
+ // step 2: process the string
+ src.setIndex(saveIndex);
+ processOut = namePrep.prepare(src,options);
+ } catch (StringPrepParseException ex) {
+ return new StringBuffer(src.getText());
+ }
+
+ }else{
+ //just point to source
+ processOut = new StringBuffer(src.getText());
+ }
+ // TODO:
+ // The RFC states that
+ //
+ // ToUnicode never fails. If any step fails, then the original input
+ // is returned immediately in that step.
+ //
+
+ //step 3: verify ACE Prefix
+ if(startsWithPrefix(processOut)){
+ StringBuffer decodeOut = null;
+
+ //step 4: Remove the ACE Prefix
+ String temp = processOut.substring(ACE_PREFIX.length,processOut.length());
+
+ //step 5: Decode using punycode
+ try {
+ decodeOut = new StringBuffer(Punycode.decode(temp,caseFlags));
+ } catch (StringPrepParseException e) {
+ decodeOut = null;
+ }
+
+ //step 6:Apply toASCII
+ if (decodeOut != null) {
+ StringBuffer toASCIIOut = convertToASCII(UCharacterIterator.getInstance(decodeOut), options);
+
+ //step 7: verify
+ if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){
+// throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
+// StringPrepParseException.VERIFICATION_ERROR);
+ decodeOut = null;
+ }
+ }
+
+ //step 8: return output of step 5
+ if (decodeOut != null) {
+ return decodeOut;
+ }
+ }
+
+// }else{
+// // verify that STD3 ASCII rules are satisfied
+// if(useSTD3ASCIIRules == true){
+// if( srcIsLDH == false /* source contains some non-LDH characters */
+// || processOut.charAt(0) == HYPHEN
+// || processOut.charAt(processOut.length()-1) == HYPHEN){
+//
+// if(srcIsLDH==false){
+// throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
+// StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
+// (failPos>0) ? (failPos-1) : failPos);
+// }else if(processOut.charAt(0) == HYPHEN){
+// throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
+// StringPrepParseException.STD3_ASCII_RULES_ERROR,
+// processOut.toString(),0);
+//
+// }else{
+// throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
+// StringPrepParseException.STD3_ASCII_RULES_ERROR,
+// processOut.toString(),
+// processOut.length());
+//
+// }
+// }
+// }
+// // just return the source
+// return new StringBuffer(src.getText());
+// }
+
+ return new StringBuffer(src.getText());
+ }
+
+ public static StringBuffer convertIDNToUnicode(String src, int options)
+ throws StringPrepParseException{
+
+ char[] srcArr = src.toCharArray();
+ StringBuffer result = new StringBuffer();
+ int sepIndex=0;
+ int oldSepIndex=0;
+ for(;;){
+ sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
+ String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
+ if(label.length()==0 && sepIndex!=srcArr.length ){
+ throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
+ }
+ UCharacterIterator iter = UCharacterIterator.getInstance(label);
+ result.append(convertToUnicode(iter,options));
+ if(sepIndex==srcArr.length){
+ break;
+ }
+ // Unlike the ToASCII operation we don't normalize the label separators
+ result.append(srcArr[sepIndex]);
+ // increment the sepIndex to skip past the separator
+ sepIndex++;
+ oldSepIndex =sepIndex;
+ }
+ if(result.length() > MAX_DOMAIN_NAME_LENGTH){
+ throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
+ }
+ return result;
+ }
+
+ public static int compare(String s1, String s2, int options) throws StringPrepParseException{
+ StringBuffer s1Out = convertIDNToASCII(s1, options);
+ StringBuffer s2Out = convertIDNToASCII(s2, options);
+ return compareCaseInsensitiveASCII(s1Out,s2Out);
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/IllegalIcuArgumentException.java b/main/classes/core/src/com/ibm/icu/impl/IllegalIcuArgumentException.java
new file mode 100644
index 00000000000..204a26477fb
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/IllegalIcuArgumentException.java
@@ -0,0 +1,32 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, Google, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+/**
+ * @author markdavis
+ *
+ */
+public class IllegalIcuArgumentException extends IllegalArgumentException {
+ private static final long serialVersionUID = 3789261542830211225L;
+
+ public IllegalIcuArgumentException(String errorMessage) {
+ super(errorMessage);
+ }
+
+ public IllegalIcuArgumentException(Throwable cause) {
+ super(cause);
+ }
+
+ public IllegalIcuArgumentException(String errorMessage, Throwable cause) {
+ super(errorMessage, cause);
+ }
+
+ public synchronized IllegalIcuArgumentException initCause(Throwable cause) {
+ return (IllegalIcuArgumentException) super.initCause(cause);
+ }
+
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/ImplicitCEGenerator.java b/main/classes/core/src/com/ibm/icu/impl/ImplicitCEGenerator.java
new file mode 100644
index 00000000000..41bd01e424b
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/ImplicitCEGenerator.java
@@ -0,0 +1,433 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2004-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+/**
+ * For generation of Implicit CEs
+ * @author Mark Davis
+ *
+ * Cleaned up so that changes can be made more easily.
+ * Old values:
+# First Implicit: E26A792D
+# Last Implicit: E3DC70C0
+# First CJK: E0030300
+# Last CJK: E0A9DD00
+# First CJK_A: E0A9DF00
+# Last CJK_A: E0DE3100
+@internal
+ */
+public class ImplicitCEGenerator {
+
+ /**
+ * constants
+ */
+ static final boolean DEBUG = false;
+
+ static final long topByte = 0xFF000000L;
+ static final long bottomByte = 0xFFL;
+ static final long fourBytes = 0xFFFFFFFFL;
+
+ static final int MAX_INPUT = 0x220001; // 2 * Unicode range + 2
+
+// public static final int CJK_BASE = 0x4E00;
+// public static final int CJK_LIMIT = 0x9FFF+1;
+// public static final int CJK_COMPAT_USED_BASE = 0xFA0E;
+// public static final int CJK_COMPAT_USED_LIMIT = 0xFA2F+1;
+// public static final int CJK_A_BASE = 0x3400;
+// public static final int CJK_A_LIMIT = 0x4DBF+1;
+// public static final int CJK_B_BASE = 0x20000;
+// public static final int CJK_B_LIMIT = 0x2A6DF+1;
+
+ public static final int
+ // 4E00;;Lo;0;L;;;;;N;;;;;
+ // 9FCB;;Lo;0;L;;;;;N;;;;;
+ CJK_BASE = 0x4E00,
+ CJK_LIMIT = 0x9FCB+1,
+
+ CJK_COMPAT_USED_BASE = 0xFA0E,
+ CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
+
+ //3400;;Lo;0;L;;;;;N;;;;;
+ //4DB5;;Lo;0;L;;;;;N;;;;;
+
+ CJK_A_BASE = 0x3400,
+ CJK_A_LIMIT = 0x4DB5+1,
+
+ //20000;;Lo;0;L;;;;;N;;;;;
+ //2A6D6;;Lo;0;L;;;;;N;;;;;
+
+ CJK_B_BASE = 0x20000,
+ CJK_B_LIMIT = 0x2A6D6+1,
+
+ //2A700;;Lo;0;L;;;;;N;;;;;
+ //2B734;;Lo;0;L;;;;;N;;;;;
+
+ CJK_C_BASE = 0x2A700,
+ CJK_C_LIMIT = 0x2B734+1,
+
+ //2B740;;Lo;0;L;;;;;N;;;;;
+ //2B81D;;Lo;0;L;;;;;N;;;;;
+
+ CJK_D_BASE = 0x2B740,
+ CJK_D_LIMIT = 0x2B81D+1
+
+ // when adding to this list, look for all occurrences (in project) of CJK_C_BASE and CJK_C_LIMIT, etc. to check for code that needs changing!!!!
+ ;
+
+// private void throwError(String title, int cp) {
+// throw new IllegalArgumentException(title + "\t" + Utility.hex(cp, 6) + "\t" +
+// Utility.hex(getImplicitFromRaw(cp) & fourBytes));
+// }
+//
+// private void throwError(String title, long ce) {
+// throw new IllegalArgumentException(title + "\t" + Utility.hex(ce & fourBytes));
+// }
+//
+// private void show(int i) {
+// if (i >= 0 && i <= MAX_INPUT) {
+// System.out.println(Utility.hex(i) + "\t" + Utility.hex(getImplicitFromRaw(i) & fourBytes));
+// }
+// }
+
+ /**
+ * Precomputed by constructor
+ */
+ int final3Multiplier;
+ int final4Multiplier;
+ int final3Count;
+ int final4Count;
+ int medialCount;
+ int min3Primary;
+ int min4Primary;
+ int max4Primary;
+ int minTrail;
+ int maxTrail;
+ int max3Trail;
+ int max4Trail;
+ int min4Boundary;
+
+ public int getGap4() {
+ return final4Multiplier - 1;
+ }
+
+ public int getGap3() {
+ return final3Multiplier - 1;
+ }
+
+ // old comment
+ // we must skip all 00, 01, 02, FF bytes, so most bytes have 252 values
+ // we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case)
+ // we shift so that HAN all has the same first primary, for compression.
+ // for the 4 byte case, we make the gap as large as we can fit.
+
+ /**
+ * Supply parameters for generating implicit CEs
+ */
+ public ImplicitCEGenerator(int minPrimary, int maxPrimary) {
+ // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
+ this(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1);
+ }
+
+ /**
+ * Set up to generate implicits.
+ * @param minPrimary The minimum primary value.
+ * @param maxPrimary The maximum primary value.
+ * @param minTrail final byte
+ * @param maxTrail final byte
+ * @param gap3 the gap we leave for tailoring for 3-byte forms
+ * @param primaries3count number of 3-byte primarys we can use (normally 1)
+ */
+ public ImplicitCEGenerator(int minPrimary, int maxPrimary, int minTrail, int maxTrail, int gap3, int primaries3count) {
+ // some simple parameter checks
+ if (minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) {
+ throw new IllegalArgumentException("bad lead bytes");
+ }
+ if (minTrail < 0 || minTrail >= maxTrail || maxTrail > 0xFF) {
+ throw new IllegalArgumentException("bad trail bytes");
+ }
+ if (primaries3count < 1) {
+ throw new IllegalArgumentException("bad three-byte primaries");
+ }
+
+ this.minTrail = minTrail;
+ this.maxTrail = maxTrail;
+
+ min3Primary = minPrimary;
+ max4Primary = maxPrimary;
+ // compute constants for use later.
+ // number of values we can use in trailing bytes
+ // leave room for empty values between AND above, e.g. if gap = 2
+ // range 3..7 => +3 -4 -5 -6 -7: so 1 value
+ // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
+ // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
+ final3Multiplier = gap3 + 1;
+ final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
+ max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
+
+ // medials can use full range
+ medialCount = (maxTrail - minTrail + 1);
+ // find out how many values fit in each form
+ int threeByteCount = medialCount * final3Count;
+ // now determine where the 3/4 boundary is.
+ // we use 3 bytes below the boundary, and 4 above
+ int primariesAvailable = maxPrimary - minPrimary + 1;
+ int primaries4count = primariesAvailable - primaries3count;
+
+ int min3ByteCoverage = primaries3count * threeByteCount;
+ min4Primary = minPrimary + primaries3count;
+ min4Boundary = min3ByteCoverage;
+ // Now expand out the multiplier for the 4 bytes, and redo.
+
+ int totalNeeded = MAX_INPUT - min4Boundary;
+ int neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
+ if (DEBUG) System.out.println("neededPerPrimaryByte: " + neededPerPrimaryByte);
+
+ int neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
+ if (DEBUG) System.out.println("neededPerFinalByte: " + neededPerFinalByte);
+
+ int gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
+ if (DEBUG) System.out.println("expandedGap: " + gap4);
+ if (gap4 < 1) throw new IllegalArgumentException("must have larger gap4s");
+
+ final4Multiplier = gap4 + 1;
+ final4Count = neededPerFinalByte;
+ max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
+
+ if (primaries4count * medialCount * medialCount * final4Count < MAX_INPUT) {
+ throw new IllegalArgumentException("internal error");
+ }
+ if (DEBUG) {
+ System.out.println("final4Count: " + final4Count);
+ for (int counter = 0; counter < final4Count; ++counter) {
+ int value = minTrail + (1 + counter)*final4Multiplier;
+ System.out.println(counter + "\t" + value + "\t" + Utility.hex(value));
+ }
+ }
+ }
+
+ static public int divideAndRoundUp(int a, int b) {
+ return 1 + (a-1)/b;
+ }
+
+ /**
+ * Converts implicit CE into raw integer
+ * @param implicit The implicit value passed.
+ * @return -1 if illegal format
+ */
+ public int getRawFromImplicit(int implicit) {
+ int result;
+ int b3 = implicit & 0xFF;
+ implicit >>= 8;
+ int b2 = implicit & 0xFF;
+ implicit >>= 8;
+ int b1 = implicit & 0xFF;
+ implicit >>= 8;
+ int b0 = implicit & 0xFF;
+
+ // simple parameter checks
+ if (b0 < min3Primary || b0 > max4Primary
+ || b1 < minTrail || b1 > maxTrail) return -1;
+ // normal offsets
+ b1 -= minTrail;
+
+ // take care of the final values, and compose
+ if (b0 < min4Primary) {
+ if (b2 < minTrail || b2 > max3Trail || b3 != 0) return -1;
+ b2 -= minTrail;
+ int remainder = b2 % final3Multiplier;
+ if (remainder != 0) return -1;
+ b0 -= min3Primary;
+ b2 /= final3Multiplier;
+ result = ((b0 * medialCount) + b1) * final3Count + b2;
+ } else {
+ if (b2 < minTrail || b2 > maxTrail
+ || b3 < minTrail || b3 > max4Trail) return -1;
+ b2 -= minTrail;
+ b3 -= minTrail;
+ int remainder = b3 % final4Multiplier;
+ if (remainder != 0) return -1;
+ b3 /= final4Multiplier;
+ b0 -= min4Primary;
+ result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
+ }
+ // final check
+ if (result < 0 || result > MAX_INPUT) return -1;
+ return result;
+ }
+
+ /**
+ * Generate the implicit CE, from raw integer.
+ * Left shifted to put the first byte at the top of an int.
+ * @param cp code point
+ * @return Primary implicit weight
+ */
+ public int getImplicitFromRaw(int cp) {
+ if (cp < 0 || cp > MAX_INPUT) {
+ throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp));
+ }
+ int last0 = cp - min4Boundary;
+ if (last0 < 0) {
+ int last1 = cp / final3Count;
+ last0 = cp % final3Count;
+
+ int last2 = last1 / medialCount;
+ last1 %= medialCount;
+
+ last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start
+ last1 = minTrail + last1; // offset
+ last2 = min3Primary + last2; // offset
+
+ if (last2 >= min4Primary) {
+ throw new IllegalArgumentException("4-byte out of range: " +
+ Utility.hex(cp) + ", " + Utility.hex(last2));
+ }
+
+ return (last2 << 24) + (last1 << 16) + (last0 << 8);
+ } else {
+ int last1 = last0 / final4Count;
+ last0 %= final4Count;
+
+ int last2 = last1 / medialCount;
+ last1 %= medialCount;
+
+ int last3 = last2 / medialCount;
+ last2 %= medialCount;
+
+ last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start
+ last1 = minTrail + last1; // offset
+ last2 = minTrail + last2; // offset
+ last3 = min4Primary + last3; // offset
+
+ if (last3 > max4Primary) {
+ throw new IllegalArgumentException("4-byte out of range: " +
+ Utility.hex(cp) + ", " + Utility.hex(last3));
+ }
+
+ return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
+ }
+ }
+
+ /**
+ * Gets an Implicit from a code point. Internally,
+ * swaps (which produces a raw value 0..220000,
+ * then converts raw to implicit.
+ * @param cp The code point to convert to implicit.
+ * @return Primary implicit weight
+ */
+ public int getImplicitFromCodePoint(int cp) {
+ if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
+
+ // Produce Raw value
+ // note, we add 1 so that the first value is always empty!!
+ cp = ImplicitCEGenerator.swapCJK(cp) + 1;
+ // we now have a range of numbers from 0 to 220000.
+
+ if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
+
+ return getImplicitFromRaw(cp);
+ }
+
+ /**
+ * Function used to:
+ * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
+ * b) bump any non-CJK characters by 10FFFF.
+ * The relevant blocks are:
+ * A: 4E00..9FFF; CJK Unified Ideographs
+ * F900..FAFF; CJK Compatibility Ideographs
+ * B: 3400..4DBF; CJK Unified Ideographs Extension A
+ * 20000..XX; CJK Unified Ideographs Extension B (and others later on)
+ * As long as
+ * no new B characters are allocated between 4E00 and FAFF, and
+ * no new A characters are outside of this range,
+ * (very high probability) this simple code will work.
+ * The reordered blocks are:
+ * Block1 is CJK
+ * Block2 is CJK_COMPAT_USED
+ * Block3 is CJK_A
+ * (all contiguous)
+ * Any other CJK gets its normal code point
+ * Any non-CJK gets +10FFFF
+ * When we reorder Block1, we make sure that it is at the very start,
+ * so that it will use a 3-byte form.
+ * Warning: the we only pick up the compatibility characters that are
+ * NOT decomposed, so that block is smaller!
+ */
+
+ static int NON_CJK_OFFSET = 0x110000;
+
+ public static int swapCJK(int i) {
+
+ if (i >= CJK_BASE) {
+ if (i < CJK_LIMIT) return i - CJK_BASE;
+
+ if (i < CJK_COMPAT_USED_BASE) return i + NON_CJK_OFFSET;
+
+ if (i < CJK_COMPAT_USED_LIMIT) return i - CJK_COMPAT_USED_BASE
+ + (CJK_LIMIT - CJK_BASE);
+ if (i < CJK_B_BASE) return i + NON_CJK_OFFSET;
+
+ if (i < CJK_B_LIMIT) return i; // non-BMP-CJK
+
+ if (i < CJK_C_BASE) return i + NON_CJK_OFFSET;
+
+ if (i < CJK_C_LIMIT) return i; // non-BMP-CJK
+
+ if (i < CJK_D_BASE) return i + NON_CJK_OFFSET;
+
+ if (i < CJK_D_LIMIT) return i; // non-BMP-CJK
+
+ return i + NON_CJK_OFFSET; // non-CJK
+ }
+ if (i < CJK_A_BASE) return i + NON_CJK_OFFSET;
+
+ if (i < CJK_A_LIMIT) return i - CJK_A_BASE
+ + (CJK_LIMIT - CJK_BASE)
+ + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
+ return i + NON_CJK_OFFSET; // non-CJK
+ }
+
+
+ /**
+ * @return Minimal trail value
+ */
+ public int getMinTrail() {
+ return minTrail;
+ }
+
+ /**
+ * @return Maximal trail value
+ */
+ public int getMaxTrail() {
+ return maxTrail;
+ }
+
+ public int getCodePointFromRaw(int i) {
+ i--;
+ int result = 0;
+ if(i >= NON_CJK_OFFSET) {
+ result = i - NON_CJK_OFFSET;
+ } else if(i >= CJK_B_BASE) {
+ result = i;
+ } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
+ // rest of CJKs, compacted
+ if(i < CJK_LIMIT - CJK_BASE) {
+ result = i + CJK_BASE;
+ } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
+ result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE);
+ } else {
+ result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
+ }
+ } else {
+ result = -1;
+ }
+ return result;
+ }
+
+ public int getRawFromCodePoint(int i) {
+ return swapCJK(i)+1;
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/IntTrie.java b/main/classes/core/src/com/ibm/icu/impl/IntTrie.java
new file mode 100644
index 00000000000..edcf19e7364
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/IntTrie.java
@@ -0,0 +1,333 @@
+/*
+******************************************************************************
+* Copyright (C) 1996-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+******************************************************************************
+*/
+
+package com.ibm.icu.impl;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import com.ibm.icu.text.UTF16;
+
+/**
+ * Trie implementation which stores data in int, 32 bits.
+ * @author synwee
+ * @see com.ibm.icu.impl.Trie
+ * @since release 2.1, Jan 01 2002
+ */
+public class IntTrie extends Trie
+{
+ // public constructors ---------------------------------------------
+
+ /**
+ * Creates a new Trie with the settings for the trie data.
+ * Unserialize the 32-bit-aligned input stream and use the data for the
+ * trie.
+ * @param inputStream file input stream to a ICU data file, containing
+ * the trie
+ * @param dataManipulate object which provides methods to parse the char
+ * data
+ * @throws IOException thrown when data reading fails
+ */
+ public IntTrie(InputStream inputStream, DataManipulate dataManipulate)
+ throws IOException
+ {
+ super(inputStream, dataManipulate);
+ if (!isIntTrie()) {
+ throw new IllegalArgumentException(
+ "Data given does not belong to a int trie.");
+ }
+ }
+
+ /**
+ * Make a dummy IntTrie.
+ * A dummy trie is an empty runtime trie, used when a real data trie cannot
+ * be loaded.
+ *
+ * The trie always returns the initialValue,
+ * or the leadUnitValue for lead surrogate code points.
+ * The Latin-1 part is always set up to be linear.
+ *
+ * @param initialValue the initial value that is set for all code points
+ * @param leadUnitValue the value for lead surrogate code _units_ that do not
+ * have associated supplementary data
+ * @param dataManipulate object which provides methods to parse the char data
+ */
+ @SuppressWarnings("all") // No way to ignore dead code warning specifically - see eclipse bug#282770
+ public IntTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
+ super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
+
+ int dataLength, latin1Length, i, limit;
+ char block;
+
+ /* calculate the actual size of the dummy trie data */
+
+ /* max(Latin-1, block 0) */
+ dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
+ if(leadUnitValue!=initialValue) {
+ dataLength+=DATA_BLOCK_LENGTH;
+ }
+ m_data_=new int[dataLength];
+ m_dataLength_=dataLength;
+
+ m_initialValue_=initialValue;
+
+ /* fill the index and data arrays */
+
+ /* indexes are preset to 0 (block 0) */
+
+ /* Latin-1 data */
+ for(i=0; i>INDEX_STAGE_2_SHIFT_);
+ i=0xd800>>INDEX_STAGE_1_SHIFT_;
+ limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
+ for(; i> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+ + (ch & INDEX_STAGE_3_MASK_);
+ return m_data_[offset];
+ }
+
+ // handle U+D800..U+10FFFF
+ offset = getCodePointOffset(ch);
+ return (offset >= 0) ? m_data_[offset] : m_initialValue_;
+ }
+
+ /**
+ * Gets the value to the data which this lead surrogate character points
+ * to.
+ * Returned data may contain folding offset information for the next
+ * trailing surrogate character.
+ * This method does not guarantee correct results for trail surrogates.
+ * @param ch lead surrogate character
+ * @return data value
+ */
+ public final int getLeadValue(char ch)
+ {
+ return m_data_[getLeadOffset(ch)];
+ }
+
+ /**
+ * Get the value associated with the BMP code point.
+ * Lead surrogate code points are treated as normal code points, with
+ * unfolded values that may differ from getLeadValue() results.
+ * @param ch the input BMP code point
+ * @return trie data value associated with the BMP codepoint
+ */
+ public final int getBMPValue(char ch)
+ {
+ return m_data_[getBMPOffset(ch)];
+ }
+
+ /**
+ * Get the value associated with a pair of surrogates.
+ * @param lead a lead surrogate
+ * @param trail a trail surrogate
+ */
+ public final int getSurrogateValue(char lead, char trail)
+ {
+ if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) {
+ throw new IllegalArgumentException(
+ "Argument characters do not form a supplementary character");
+ }
+ // get fold position for the next trail surrogate
+ int offset = getSurrogateOffset(lead, trail);
+
+ // get the real data from the folded lead/trail units
+ if (offset > 0) {
+ return m_data_[offset];
+ }
+
+ // return m_initialValue_ if there is an error
+ return m_initialValue_;
+ }
+
+ /**
+ * Get a value from a folding offset (from the value of a lead surrogate)
+ * and a trail surrogate.
+ * @param leadvalue the value of a lead surrogate that contains the
+ * folding offset
+ * @param trail surrogate
+ * @return trie data value associated with the trail character
+ */
+ public final int getTrailValue(int leadvalue, char trail)
+ {
+ if (m_dataManipulate_ == null) {
+ throw new NullPointerException(
+ "The field DataManipulate in this Trie is null");
+ }
+ int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
+ if (offset > 0) {
+ return m_data_[getRawOffset(offset,
+ (char)(trail & SURROGATE_MASK_))];
+ }
+ return m_initialValue_;
+ }
+
+ /**
+ * Gets the latin 1 fast path value.
+ * Note this only works if latin 1 characters have their own linear
+ * array.
+ * @param ch latin 1 characters
+ * @return value associated with latin character
+ */
+ public final int getLatin1LinearValue(char ch)
+ {
+ return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch];
+ }
+
+ /**
+ * Checks if the argument Trie has the same data as this Trie
+ * @param other Trie to check
+ * @return true if the argument Trie has the same data as this Trie, false
+ * otherwise
+ */
+ ///CLOVER:OFF
+ public boolean equals(Object other)
+ {
+ boolean result = super.equals(other);
+ if (result && other instanceof IntTrie) {
+ IntTrie othertrie = (IntTrie)other;
+ if (m_initialValue_ != othertrie.m_initialValue_
+ || !Arrays.equals(m_data_, othertrie.m_data_)) {
+ return false;
+ }
+ return true;
+ }
+ return false;
+ }
+ ///CLOVER:ON
+
+ // protected methods -----------------------------------------------
+
+ /**
+ * Parses the input stream and stores its trie content into a index and
+ * data array
+ * @param inputStream data input stream containing trie data
+ * @exception IOException thrown when data reading fails
+ */
+ protected final void unserialize(InputStream inputStream)
+ throws IOException
+ {
+ super.unserialize(inputStream);
+ // one used for initial value
+ m_data_ = new int[m_dataLength_];
+ DataInputStream input = new DataInputStream(inputStream);
+ for (int i = 0; i < m_dataLength_; i ++) {
+ m_data_[i] = input.readInt();
+ }
+ m_initialValue_ = m_data_[0];
+ }
+
+ /**
+ * Gets the offset to the data which the surrogate pair points to.
+ * @param lead lead surrogate
+ * @param trail trailing surrogate
+ * @return offset to data
+ */
+ protected final int getSurrogateOffset(char lead, char trail)
+ {
+ if (m_dataManipulate_ == null) {
+ throw new NullPointerException(
+ "The field DataManipulate in this Trie is null");
+ }
+ // get fold position for the next trail surrogate
+ int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
+
+ // get the real data from the folded lead/trail units
+ if (offset > 0) {
+ return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
+ }
+
+ // return -1 if there is an error, in this case we return the default
+ // value: m_initialValue_
+ return -1;
+ }
+
+ /**
+ * Gets the value at the argument index.
+ * For use internally in TrieIterator
+ * @param index value at index will be retrieved
+ * @return 32 bit value
+ * @see com.ibm.icu.impl.TrieIterator
+ */
+ protected final int getValue(int index)
+ {
+ return m_data_[index];
+ }
+
+ /**
+ * Gets the default initial value
+ * @return 32 bit value
+ */
+ protected final int getInitialValue()
+ {
+ return m_initialValue_;
+ }
+
+ // package private methods -----------------------------------------
+
+ /**
+ * Internal constructor for builder use
+ * @param index the index array to be slotted into this trie
+ * @param data the data array to be slotted into this trie
+ * @param initialvalue the initial value for this trie
+ * @param options trie options to use
+ * @param datamanipulate folding implementation
+ */
+ IntTrie(char index[], int data[], int initialvalue, int options,
+ DataManipulate datamanipulate)
+ {
+ super(index, options, datamanipulate);
+ m_data_ = data;
+ m_dataLength_ = m_data_.length;
+ m_initialValue_ = initialvalue;
+ }
+
+ // private data members --------------------------------------------
+
+ /**
+ * Default value
+ */
+ private int m_initialValue_;
+ /**
+ * Array of char data
+ */
+ private int m_data_[];
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/IntTrieBuilder.java b/main/classes/core/src/com/ibm/icu/impl/IntTrieBuilder.java
new file mode 100644
index 00000000000..ad5a77485dd
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/IntTrieBuilder.java
@@ -0,0 +1,792 @@
+/*
+******************************************************************************
+* Copyright (C) 1996-2010, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+******************************************************************************
+*/
+
+package com.ibm.icu.impl;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Arrays;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+
+/**
+ * Builder class to manipulate and generate a trie.
+ * This is useful for ICU data in primitive types.
+ * Provides a compact way to store information that is indexed by Unicode
+ * values, such as character properties, types, keyboard values, etc. This is
+ * very useful when you have a block of Unicode data that contains significant
+ * values while the rest of the Unicode data is unused in the application or
+ * when you have a lot of redundance, such as where all 21,000 Han ideographs
+ * have the same value. However, lookup is much faster than a hash table.
+ * A trie of any primitive data type serves two purposes:
+ *
+ * Fast access of the indexed values.
+ * Smaller memory footprint.
+ *
+ * This is a direct port from the ICU4C version
+ * @author Syn Wee Quek
+ */
+public class IntTrieBuilder extends TrieBuilder
+{
+ // public constructor ----------------------------------------------
+
+ /**
+ * Copy constructor
+ */
+ public IntTrieBuilder(IntTrieBuilder table)
+ {
+ super(table);
+ m_data_ = new int[m_dataCapacity_];
+ System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
+ m_initialValue_ = table.m_initialValue_;
+ m_leadUnitValue_ = table.m_leadUnitValue_;
+ }
+
+ /**
+ * Constructs a build table
+ * @param aliasdata data to be filled into table
+ * @param maxdatalength maximum data length allowed in table
+ * @param initialvalue inital data value
+ * @param latin1linear is latin 1 to be linear
+ */
+ public IntTrieBuilder(int aliasdata[], int maxdatalength,
+ int initialvalue, int leadunitvalue,
+ boolean latin1linear)
+ {
+ super();
+ if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear
+ && maxdatalength < 1024)) {
+ throw new IllegalArgumentException(
+ "Argument maxdatalength is too small");
+ }
+
+ if (aliasdata != null) {
+ m_data_ = aliasdata;
+ }
+ else {
+ m_data_ = new int[maxdatalength];
+ }
+
+ // preallocate and reset the first data block (block index 0)
+ int j = DATA_BLOCK_LENGTH;
+
+ if (latin1linear) {
+ // preallocate and reset the first block (number 0) and Latin-1
+ // (U+0000..U+00ff) after that made sure above that
+ // maxDataLength >= 1024
+ // set indexes to point to consecutive data blocks
+ int i = 0;
+ do {
+ // do this at least for trie->index[0] even if that block is
+ // only partly used for Latin-1
+ m_index_[i ++] = j;
+ j += DATA_BLOCK_LENGTH;
+ } while (i < (256 >> SHIFT_));
+ }
+
+ m_dataLength_ = j;
+ // reset the initially allocated blocks to the initial value
+ Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
+ m_initialValue_ = initialvalue;
+ m_leadUnitValue_ = leadunitvalue;
+ m_dataCapacity_ = maxdatalength;
+ m_isLatin1Linear_ = latin1linear;
+ m_isCompacted_ = false;
+ }
+
+ // public methods -------------------------------------------------------
+
+ /*public final void print()
+ {
+ int i = 0;
+ int oldvalue = m_index_[i];
+ int count = 0;
+ System.out.println("index length " + m_indexLength_
+ + " --------------------------");
+ while (i < m_indexLength_) {
+ if (m_index_[i] != oldvalue) {
+ System.out.println("index has " + count + " counts of "
+ + Integer.toHexString(oldvalue));
+ count = 0;
+ oldvalue = m_index_[i];
+ }
+ count ++;
+ i ++;
+ }
+ System.out.println("index has " + count + " counts of "
+ + Integer.toHexString(oldvalue));
+ i = 0;
+ oldvalue = m_data_[i];
+ count = 0;
+ System.out.println("data length " + m_dataLength_
+ + " --------------------------");
+ while (i < m_dataLength_) {
+ if (m_data_[i] != oldvalue) {
+ if ((oldvalue & 0xf1000000) == 0xf1000000) {
+ int temp = oldvalue & 0xffffff;
+ temp += 0x320;
+ oldvalue = 0xf1000000 | temp;
+ }
+ if ((oldvalue & 0xf2000000) == 0xf2000000) {
+ int temp = oldvalue & 0xffffff;
+ temp += 0x14a;
+ oldvalue = 0xf2000000 | temp;
+ }
+ System.out.println("data has " + count + " counts of "
+ + Integer.toHexString(oldvalue));
+ count = 0;
+ oldvalue = m_data_[i];
+ }
+ count ++;
+ i ++;
+ }
+ if ((oldvalue & 0xf1000000) == 0xf1000000) {
+ int temp = oldvalue & 0xffffff;
+ temp += 0x320;
+ oldvalue = 0xf1000000 | temp;
+ }
+ if ((oldvalue & 0xf2000000) == 0xf2000000) {
+ int temp = oldvalue & 0xffffff;
+ temp += 0x14a;
+ oldvalue = 0xf2000000 | temp;
+ }
+ System.out.println("data has " + count + " counts of "
+ + Integer.toHexString(oldvalue));
+ }
+ */
+ /**
+ * Gets a 32 bit data from the table data
+ * @param ch codepoint which data is to be retrieved
+ * @return the 32 bit data
+ */
+ public int getValue(int ch)
+ {
+ // valid, uncompacted trie and valid c?
+ if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
+ return 0;
+ }
+
+ int block = m_index_[ch >> SHIFT_];
+ return m_data_[Math.abs(block) + (ch & MASK_)];
+ }
+
+ /**
+ * Get a 32 bit data from the table data
+ * @param ch code point for which data is to be retrieved.
+ * @param inBlockZero Output parameter, inBlockZero[0] returns true if the
+ * char maps into block zero, otherwise false.
+ * @return the 32 bit data value.
+ */
+ public int getValue(int ch, boolean [] inBlockZero)
+ {
+ // valid, uncompacted trie and valid c?
+ if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
+ if (inBlockZero != null) {
+ inBlockZero[0] = true;
+ }
+ return 0;
+ }
+
+ int block = m_index_[ch >> SHIFT_];
+ if (inBlockZero != null) {
+ inBlockZero[0] = (block == 0);
+ }
+ return m_data_[Math.abs(block) + (ch & MASK_)];
+ }
+
+
+ /**
+ * Sets a 32 bit data in the table data
+ * @param ch codepoint which data is to be set
+ * @param value to set
+ * @return true if the set is successful, otherwise
+ * if the table has been compacted return false
+ */
+ public boolean setValue(int ch, int value)
+ {
+ // valid, uncompacted trie and valid c?
+ if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
+ return false;
+ }
+
+ int block = getDataBlock(ch);
+ if (block < 0) {
+ return false;
+ }
+
+ m_data_[block + (ch & MASK_)] = value;
+ return true;
+ }
+
+ /**
+ * Serializes the build table with 32 bit data
+ * @param datamanipulate builder raw fold method implementation
+ * @param triedatamanipulate result trie fold method
+ * @return a new trie
+ */
+ public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
+ Trie.DataManipulate triedatamanipulate)
+ {
+ if (datamanipulate == null) {
+ throw new IllegalArgumentException("Parameters can not be null");
+ }
+ // fold and compact if necessary, also checks that indexLength is
+ // within limits
+ if (!m_isCompacted_) {
+ // compact once without overlap to improve folding
+ compact(false);
+ // fold the supplementary part of the index array
+ fold(datamanipulate);
+ // compact again with overlap for minimum data array length
+ compact(true);
+ m_isCompacted_ = true;
+ }
+ // is dataLength within limits?
+ if (m_dataLength_ >= MAX_DATA_LENGTH_) {
+ throw new ArrayIndexOutOfBoundsException("Data length too small");
+ }
+
+ char index[] = new char[m_indexLength_];
+ int data[] = new int[m_dataLength_];
+ // write the index (stage 1) array and the 32-bit data (stage 2) array
+ // write 16-bit index values shifted right by INDEX_SHIFT_
+ for (int i = 0; i < m_indexLength_; i ++) {
+ index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_);
+ }
+ // write 32-bit data values
+ System.arraycopy(m_data_, 0, data, 0, m_dataLength_);
+
+ int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
+ options |= OPTIONS_DATA_IS_32_BIT_;
+ if (m_isLatin1Linear_) {
+ options |= OPTIONS_LATIN1_IS_LINEAR_;
+ }
+ return new IntTrie(index, data, m_initialValue_, options,
+ triedatamanipulate);
+ }
+
+
+ /**
+ * Serializes the build table to an output stream.
+ *
+ * Compacts the build-time trie after all values are set, and then
+ * writes the serialized form onto an output stream.
+ *
+ * After this, this build-time Trie can only be serialized again and/or closed;
+ * no further values can be added.
+ *
+ * This function is the rough equivalent of utrie_seriaize() in ICU4C.
+ *
+ * @param os the output stream to which the seriaized trie will be written.
+ * If nul, the function still returns the size of the serialized Trie.
+ * @param reduceTo16Bits If true, reduce the data size to 16 bits. The resulting
+ * serialized form can then be used to create a CharTrie.
+ * @param datamanipulate builder raw fold method implementation
+ * @return the number of bytes written to the output stream.
+ */
+ public int serialize(OutputStream os, boolean reduceTo16Bits,
+ TrieBuilder.DataManipulate datamanipulate) throws IOException {
+ if (datamanipulate == null) {
+ throw new IllegalArgumentException("Parameters can not be null");
+ }
+
+ // fold and compact if necessary, also checks that indexLength is
+ // within limits
+ if (!m_isCompacted_) {
+ // compact once without overlap to improve folding
+ compact(false);
+ // fold the supplementary part of the index array
+ fold(datamanipulate);
+ // compact again with overlap for minimum data array length
+ compact(true);
+ m_isCompacted_ = true;
+ }
+
+ // is dataLength within limits?
+ int length;
+ if (reduceTo16Bits) {
+ length = m_dataLength_ + m_indexLength_;
+ } else {
+ length = m_dataLength_;
+ }
+ if (length >= MAX_DATA_LENGTH_) {
+ throw new ArrayIndexOutOfBoundsException("Data length too small");
+ }
+
+ // struct UTrieHeader {
+ // int32_t signature;
+ // int32_t options (a bit field)
+ // int32_t indexLength
+ // int32_t dataLength
+ length = Trie.HEADER_LENGTH_ + 2*m_indexLength_;
+ if(reduceTo16Bits) {
+ length+=2*m_dataLength_;
+ } else {
+ length+=4*m_dataLength_;
+ }
+
+ if (os == null) {
+ // No output stream. Just return the length of the serialized Trie, in bytes.
+ return length;
+ }
+
+ DataOutputStream dos = new DataOutputStream(os);
+ dos.writeInt(Trie.HEADER_SIGNATURE_);
+
+ int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<>> Trie.INDEX_STAGE_2_SHIFT_;
+ dos.writeChar(v);
+ }
+
+ /* write 16-bit data values */
+ for(int i=0; i>> Trie.INDEX_STAGE_2_SHIFT_;
+ dos.writeChar(v);
+ }
+
+ /* write 32-bit data values */
+ for(int i=0; i UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE
+ || limit > (UCharacter.MAX_VALUE + 1) || start > limit) {
+ return false;
+ }
+
+ if (start == limit) {
+ return true; // nothing to do
+ }
+
+ if ((start & MASK_) != 0) {
+ // set partial block at [start..following block boundary[
+ int block = getDataBlock(start);
+ if (block < 0) {
+ return false;
+ }
+
+ int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_;
+ if (nextStart <= limit) {
+ fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH,
+ value, overwrite);
+ start = nextStart;
+ }
+ else {
+ fillBlock(block, start & MASK_, limit & MASK_,
+ value, overwrite);
+ return true;
+ }
+ }
+
+ // number of positions in the last, partial block
+ int rest = limit & MASK_;
+
+ // round down limit to a block boundary
+ limit &= ~MASK_;
+
+ // iterate over all-value blocks
+ int repeatBlock = 0;
+ if (value == m_initialValue_) {
+ // repeatBlock = 0; assigned above
+ }
+ else {
+ repeatBlock = -1;
+ }
+ while (start < limit) {
+ // get index value
+ int block = m_index_[start >> SHIFT_];
+ if (block > 0) {
+ // already allocated, fill in value
+ fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite);
+ }
+ else if (m_data_[-block] != value && (block == 0 || overwrite)) {
+ // set the repeatBlock instead of the current block 0 or range
+ // block
+ if (repeatBlock >= 0) {
+ m_index_[start >> SHIFT_] = -repeatBlock;
+ }
+ else {
+ // create and set and fill the repeatBlock
+ repeatBlock = getDataBlock(start);
+ if (repeatBlock < 0) {
+ return false;
+ }
+
+ // set the negative block number to indicate that it is a
+ // repeat block
+ m_index_[start >> SHIFT_] = -repeatBlock;
+ fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true);
+ }
+ }
+
+ start += DATA_BLOCK_LENGTH;
+ }
+
+ if (rest > 0) {
+ // set partial block at [last block boundary..limit[
+ int block = getDataBlock(start);
+ if (block < 0) {
+ return false;
+ }
+ fillBlock(block, 0, rest, value, overwrite);
+ }
+
+ return true;
+ }
+
+ // protected data member ------------------------------------------------
+
+ protected int m_data_[];
+ protected int m_initialValue_;
+
+ // private data member ------------------------------------------------
+
+ private int m_leadUnitValue_;
+
+ // private methods ------------------------------------------------------
+
+ private int allocDataBlock()
+ {
+ int newBlock = m_dataLength_;
+ int newTop = newBlock + DATA_BLOCK_LENGTH;
+ if (newTop > m_dataCapacity_) {
+ // out of memory in the data array
+ return -1;
+ }
+ m_dataLength_ = newTop;
+ return newBlock;
+ }
+
+ /**
+ * No error checking for illegal arguments.
+ * @param ch codepoint to look for
+ * @return -1 if no new data block available (out of memory in data array)
+ */
+ private int getDataBlock(int ch)
+ {
+ ch >>= SHIFT_;
+ int indexValue = m_index_[ch];
+ if (indexValue > 0) {
+ return indexValue;
+ }
+
+ // allocate a new data block
+ int newBlock = allocDataBlock();
+ if (newBlock < 0) {
+ // out of memory in the data array
+ return -1;
+ }
+ m_index_[ch] = newBlock;
+
+ // copy-on-write for a block from a setRange()
+ System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock,
+ DATA_BLOCK_LENGTH << 2);
+ return newBlock;
+ }
+
+ /**
+ * Compact a folded build-time trie.
+ * The compaction
+ * - removes blocks that are identical with earlier ones
+ * - overlaps adjacent blocks as much as possible (if overlap == true)
+ * - moves blocks in steps of the data granularity
+ * - moves and overlaps blocks that overlap with multiple values in the overlap region
+ *
+ * It does not
+ * - try to move and overlap blocks that are not already adjacent
+ * @param overlap flag
+ */
+ private void compact(boolean overlap)
+ {
+ if (m_isCompacted_) {
+ return; // nothing left to do
+ }
+
+ // compaction
+ // initialize the index map with "block is used/unused" flags
+ findUnusedBlocks();
+
+ // if Latin-1 is preallocated and linear, then do not compact Latin-1
+ // data
+ int overlapStart = DATA_BLOCK_LENGTH;
+ if (m_isLatin1Linear_ && SHIFT_ <= 8) {
+ overlapStart += 256;
+ }
+
+ int newStart = DATA_BLOCK_LENGTH;
+ int i;
+ for (int start = newStart; start < m_dataLength_;) {
+ // start: index of first entry of current block
+ // newStart: index where the current block is to be moved
+ // (right after current end of already-compacted data)
+ // skip blocks that are not used
+ if (m_map_[start >>> SHIFT_] < 0) {
+ // advance start to the next block
+ start += DATA_BLOCK_LENGTH;
+ // leave newStart with the previous block!
+ continue;
+ }
+ // search for an identical block
+ if (start >= overlapStart) {
+ i = findSameDataBlock(m_data_, newStart, start,
+ overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
+ if (i >= 0) {
+ // found an identical block, set the other block's index
+ // value for the current block
+ m_map_[start >>> SHIFT_] = i;
+ // advance start to the next block
+ start += DATA_BLOCK_LENGTH;
+ // leave newStart with the previous block!
+ continue;
+ }
+ }
+ // see if the beginning of this block can be overlapped with the
+ // end of the previous block
+ if(overlap && start>=overlapStart) {
+ /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
+ for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_;
+ i>0 && !equal_int(m_data_, newStart-i, start, i);
+ i-=DATA_GRANULARITY_) {}
+ } else {
+ i=0;
+ }
+ if (i > 0) {
+ // some overlap
+ m_map_[start >>> SHIFT_] = newStart - i;
+ // move the non-overlapping indexes to their new positions
+ start += i;
+ for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) {
+ m_data_[newStart ++] = m_data_[start ++];
+ }
+ }
+ else if (newStart < start) {
+ // no overlap, just move the indexes to their new positions
+ m_map_[start >>> SHIFT_] = newStart;
+ for (i = DATA_BLOCK_LENGTH; i > 0; -- i) {
+ m_data_[newStart ++] = m_data_[start ++];
+ }
+ }
+ else { // no overlap && newStart==start
+ m_map_[start >>> SHIFT_] = start;
+ newStart += DATA_BLOCK_LENGTH;
+ start = newStart;
+ }
+ }
+ // now adjust the index (stage 1) table
+ for (i = 0; i < m_indexLength_; ++ i) {
+ m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
+ }
+ m_dataLength_ = newStart;
+ }
+
+ /**
+ * Find the same data block
+ * @param data array
+ * @param dataLength
+ * @param otherBlock
+ * @param step
+ */
+ private static final int findSameDataBlock(int data[], int dataLength,
+ int otherBlock, int step)
+ {
+ // ensure that we do not even partially get past dataLength
+ dataLength -= DATA_BLOCK_LENGTH;
+
+ for (int block = 0; block <= dataLength; block += step) {
+ if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) {
+ return block;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Fold the normalization data for supplementary code points into
+ * a compact area on top of the BMP-part of the trie index,
+ * with the lead surrogates indexing this compact area.
+ *
+ * Duplicate the index values for lead surrogates:
+ * From inside the BMP area, where some may be overridden with folded values,
+ * to just after the BMP area, where they can be retrieved for
+ * code point lookups.
+ * @param manipulate fold implementation
+ */
+ private final void fold(DataManipulate manipulate)
+ {
+ int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
+ int index[] = m_index_;
+ // copy the lead surrogate indexes into a temporary array
+ System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
+ SURROGATE_BLOCK_COUNT_);
+
+ // set all values for lead surrogate code *units* to leadUnitValue
+ // so that by default runtime lookups will find no data for associated
+ // supplementary code points, unless there is data for such code points
+ // which will result in a non-zero folding value below that is set for
+ // the respective lead units
+ // the above saved the indexes for surrogate code *points*
+ // fill the indexes with simplified code from utrie_setRange32()
+ int block = 0;
+ if (m_leadUnitValue_ == m_initialValue_) {
+ // leadUnitValue == initialValue, use all-initial-value block
+ // block = 0; if block here left empty
+ }
+ else {
+ // create and fill the repeatBlock
+ block = allocDataBlock();
+ if (block < 0) {
+ // data table overflow
+ throw new IllegalStateException("Internal error: Out of memory space");
+ }
+ fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true);
+ // negative block number to indicate that it is a repeat block
+ block = -block;
+ }
+ for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) {
+ m_index_[c] = block;
+ }
+
+ // Fold significant index values into the area just after the BMP
+ // indexes.
+ // In case the first lead surrogate has significant data,
+ // its index block must be used first (in which case the folding is a
+ // no-op).
+ // Later all folded index blocks are moved up one to insert the copied
+ // lead surrogate indexes.
+ int indexLength = BMP_INDEX_LENGTH_;
+ // search for any index (stage 1) entries for supplementary code points
+ for (int c = 0x10000; c < 0x110000;) {
+ if (index[c >> SHIFT_] != 0) {
+ // there is data, treat the full block for a lead surrogate
+ c &= ~0x3ff;
+ // is there an identical index block?
+ block = findSameIndexBlock(index, indexLength, c >> SHIFT_);
+
+ // get a folded value for [c..c+0x400[ and,
+ // if different from the value for the lead surrogate code
+ // point, set it for the lead surrogate code unit
+
+ int value = manipulate.getFoldedValue(c,
+ block + SURROGATE_BLOCK_COUNT_);
+ if (value != getValue(UTF16.getLeadSurrogate(c))) {
+ if (!setValue(UTF16.getLeadSurrogate(c), value)) {
+ // data table overflow
+ throw new ArrayIndexOutOfBoundsException(
+ "Data table overflow");
+ }
+ // if we did not find an identical index block...
+ if (block == indexLength) {
+ // move the actual index (stage 1) entries from the
+ // supplementary position to the new one
+ System.arraycopy(index, c >> SHIFT_, index, indexLength,
+ SURROGATE_BLOCK_COUNT_);
+ indexLength += SURROGATE_BLOCK_COUNT_;
+ }
+ }
+ c += 0x400;
+ }
+ else {
+ c += DATA_BLOCK_LENGTH;
+ }
+ }
+
+ // index array overflow?
+ // This is to guarantee that a folding offset is of the form
+ // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
+ // If the index is too large, then n>=1024 and more than 10 bits are
+ // necessary.
+ // In fact, it can only ever become n==1024 with completely unfoldable
+ // data and the additional block of duplicated values for lead
+ // surrogates.
+ if (indexLength >= MAX_INDEX_LENGTH_) {
+ throw new ArrayIndexOutOfBoundsException("Index table overflow");
+ }
+ // make space for the lead surrogate index block and insert it between
+ // the BMP indexes and the folded ones
+ System.arraycopy(index, BMP_INDEX_LENGTH_, index,
+ BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_,
+ indexLength - BMP_INDEX_LENGTH_);
+ System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
+ SURROGATE_BLOCK_COUNT_);
+ indexLength += SURROGATE_BLOCK_COUNT_;
+ m_indexLength_ = indexLength;
+ }
+
+ /**
+ * @internal
+ */
+ private void fillBlock(int block, int start, int limit, int value,
+ boolean overwrite)
+ {
+ limit += block;
+ block += start;
+ if (overwrite) {
+ while (block < limit) {
+ m_data_[block ++] = value;
+ }
+ }
+ else {
+ while (block < limit) {
+ if (m_data_[block] == m_initialValue_) {
+ m_data_[block] = value;
+ }
+ ++ block;
+ }
+ }
+ }
+}
+
diff --git a/main/classes/core/src/com/ibm/icu/impl/InvalidFormatException.java b/main/classes/core/src/com/ibm/icu/impl/InvalidFormatException.java
new file mode 100644
index 00000000000..6f7c2a93c9b
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/InvalidFormatException.java
@@ -0,0 +1,21 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+package com.ibm.icu.impl;
+
+public class InvalidFormatException extends Exception {
+
+ // Generated by serialver from JDK 1.4.1_01
+ static final long serialVersionUID = 8883328905089345791L;
+
+ public InvalidFormatException(){}
+
+ public InvalidFormatException(String message){
+ super(message);
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/IterableComparator.java b/main/classes/core/src/com/ibm/icu/impl/IterableComparator.java
new file mode 100644
index 00000000000..478048c75e3
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/IterableComparator.java
@@ -0,0 +1,61 @@
+/*
+ ************************************************************************************
+ * Copyright (C) 2007-2010, Google Inc, International Business Machines Corporation *
+ * and others. All Rights Reserved. *
+ ************************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.Comparator;
+import java.util.Iterator;
+
+public class IterableComparator implements Comparator> {
+ private final Comparator comparator;
+ private final int shorterFirst; // = 1 for shorter first, -1 otherwise
+
+ public IterableComparator() {
+ this(null, true);
+ }
+
+ public IterableComparator(Comparator comparator) {
+ this(comparator, true);
+ }
+
+ public IterableComparator(Comparator comparator, boolean shorterFirst) {
+ this.comparator = comparator;
+ this.shorterFirst = shorterFirst ? 1 : -1;
+ }
+
+ public int compare(Iterable a, Iterable b) {
+ if (a == null) {
+ return b == null ? 0 : -shorterFirst;
+ } else if (b == null) {
+ return shorterFirst;
+ }
+ Iterator ai = a.iterator();
+ Iterator bi = b.iterator();
+ while (true) {
+ if (!ai.hasNext()) {
+ return bi.hasNext() ? -shorterFirst : 0;
+ }
+ if (!bi.hasNext()) {
+ return shorterFirst;
+ }
+ T aItem = ai.next();
+ T bItem = bi.next();
+ @SuppressWarnings("unchecked")
+ int result = comparator != null ? comparator.compare(aItem, bItem) : ((Comparable)aItem).compareTo(bItem);
+ if (result != 0) {
+ return result;
+ }
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ public static int compareIterables(Iterable a, Iterable b) {
+ return NOCOMPARATOR.compare(a, b);
+ }
+
+ @SuppressWarnings("rawtypes")
+ private static final IterableComparator NOCOMPARATOR = new IterableComparator();
+}
\ No newline at end of file
diff --git a/main/classes/core/src/com/ibm/icu/impl/JavaTimeZone.java b/main/classes/core/src/com/ibm/icu/impl/JavaTimeZone.java
new file mode 100644
index 00000000000..3edeca7a061
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/JavaTimeZone.java
@@ -0,0 +1,209 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008-2011, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.util.Date;
+import java.util.TreeSet;
+
+import com.ibm.icu.util.TimeZone;
+
+/**
+ * JavaTimeZone inherits com.ibm.icu.util.TimeZone and wraps java.util.TimeZone.
+ * We used to have JDKTimeZone which wrapped Java TimeZone and used it as primary
+ * TimeZone implementation until ICU4J 3.4.1. This class works exactly like
+ * JDKTimeZone and allows ICU users who use ICU4J and JDK date/time/calendar
+ * services in mix to maintain only JDK timezone rules.
+ *
+ * This TimeZone subclass is returned by the TimeZone factory method getTimeZone(String)
+ * when the default timezone type in TimeZone class is TimeZone.TIMEZONE_JDK.
+ */
+public class JavaTimeZone extends TimeZone {
+
+ private static final long serialVersionUID = 6977448185543929364L;
+
+ private static final TreeSet AVAILABLESET;
+
+ private java.util.TimeZone javatz;
+ private transient java.util.Calendar javacal;
+
+ static {
+ AVAILABLESET = new TreeSet();
+ String[] availableIds = java.util.TimeZone.getAvailableIDs();
+ for (int i = 0; i < availableIds.length; i++) {
+ AVAILABLESET.add(availableIds[i]);
+ }
+ }
+
+ /**
+ * Constructs a JavaTimeZone with the default Java TimeZone
+ */
+ public JavaTimeZone() {
+ javatz = java.util.TimeZone.getDefault();
+ setID(javatz.getID());
+ javacal = new java.util.GregorianCalendar(javatz);
+ }
+
+ /**
+ * Constructs a JavaTimeZone with the given timezone ID.
+ * @param id A timezone ID, either a system ID or a custom ID.
+ */
+ public JavaTimeZone(String id) {
+ if (AVAILABLESET.contains(id)) {
+ javatz = java.util.TimeZone.getTimeZone(id);
+ }
+ if (javatz == null) {
+ // Use ICU's canonical ID mapping
+ boolean[] isSystemID = new boolean[1];
+ String canonicalID = TimeZone.getCanonicalID(id, isSystemID);
+ if (isSystemID[0] && AVAILABLESET.contains(canonicalID)) {
+ javatz = java.util.TimeZone.getTimeZone(canonicalID);
+ }
+ }
+
+ if (javatz == null){
+ int[] fields = new int[4];
+ if (ZoneMeta.parseCustomID(id, fields)) {
+ // JDK does not support offset seconds.
+ // If custom ID, we create java.util.SimpleTimeZone here.
+ id = ZoneMeta.formatCustomID(fields[1], fields[2], fields[3], fields[0] < 0);
+ int offset = fields[0] * ((fields[1] * 60 + fields[2]) * 60 + fields[3]) * 1000;
+ javatz = new java.util.SimpleTimeZone(offset, id);
+ }
+ }
+ if (javatz == null) {
+ // Final fallback
+ id = TimeZone.UNKNOWN_ZONE_ID;
+ javatz = new java.util.SimpleTimeZone(0, id);
+ }
+ setID(id);
+ javacal = new java.util.GregorianCalendar(javatz);
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.util.TimeZone#getOffset(int, int, int, int, int, int)
+ */
+ public int getOffset(int era, int year, int month, int day, int dayOfWeek, int milliseconds) {
+ return javatz.getOffset(era, year, month, day, dayOfWeek, milliseconds);
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.util.TimeZone#getOffset(long, boolean, int[])
+ */
+ public void getOffset(long date, boolean local, int[] offsets) {
+ synchronized (javacal) {
+ if (local) {
+ int fields[] = new int[6];
+ Grego.timeToFields(date, fields);
+ int hour, min, sec, mil;
+ int tmp = fields[5];
+ mil = tmp % 1000;
+ tmp /= 1000;
+ sec = tmp % 60;
+ tmp /= 60;
+ min = tmp % 60;
+ hour = tmp / 60;
+ javacal.clear();
+ javacal.set(fields[0], fields[1], fields[2], hour, min, sec);
+ javacal.set(java.util.Calendar.MILLISECOND, mil);
+
+ int doy1, hour1, min1, sec1, mil1;
+ doy1 = javacal.get(java.util.Calendar.DAY_OF_YEAR);
+ hour1 = javacal.get(java.util.Calendar.HOUR_OF_DAY);
+ min1 = javacal.get(java.util.Calendar.MINUTE);
+ sec1 = javacal.get(java.util.Calendar.SECOND);
+ mil1 = javacal.get(java.util.Calendar.MILLISECOND);
+
+ if (fields[4] != doy1 || hour != hour1 || min != min1 || sec != sec1 || mil != mil1) {
+ // Calendar field(s) were changed due to the adjustment for non-existing time
+ // Note: This code does not support non-existing local time at year boundary properly.
+ // But, it should work fine for real timezones.
+ int dayDelta = Math.abs(doy1 - fields[4]) > 1 ? 1 : doy1 - fields[4];
+ int delta = ((((dayDelta * 24) + hour1 - hour) * 60 + min1 - min) * 60 + sec1 - sec) * 1000 + mil1 - mil;
+
+ // In this case, we use the offsets before the transition
+ javacal.setTimeInMillis(javacal.getTimeInMillis() - delta - 1);
+ }
+ } else {
+ javacal.setTimeInMillis(date);
+ }
+ offsets[0] = javacal.get(java.util.Calendar.ZONE_OFFSET);
+ offsets[1] = javacal.get(java.util.Calendar.DST_OFFSET);
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.util.TimeZone#getRawOffset()
+ */
+ public int getRawOffset() {
+ return javatz.getRawOffset();
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.util.TimeZone#inDaylightTime(java.util.Date)
+ */
+ public boolean inDaylightTime(Date date) {
+ return javatz.inDaylightTime(date);
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.util.TimeZone#setRawOffset(int)
+ */
+ public void setRawOffset(int offsetMillis) {
+ javatz.setRawOffset(offsetMillis);
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.util.TimeZone#useDaylightTime()
+ */
+ public boolean useDaylightTime() {
+ return javatz.useDaylightTime();
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.util.TimeZone#getDSTSavings()
+ */
+ public int getDSTSavings() {
+ int dstSavings = super.getDSTSavings();
+ try {
+ // hack so test compiles and runs in both JDK 1.3 and JDK 1.4+
+ final Object[] args = new Object[0];
+ final Class>[] argtypes = new Class[0];
+ java.lang.reflect.Method m = javatz.getClass().getMethod("getDSTSavings", argtypes);
+ dstSavings = ((Integer) m.invoke(javatz, args)).intValue();
+ } catch (Exception e) {
+ // just use the result returned by super.getDSTSavings()
+ }
+ return dstSavings;
+ }
+
+ public java.util.TimeZone unwrap() {
+ return javatz;
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.util.TimeZone#clone()
+ */
+ public Object clone() {
+ JavaTimeZone other = (JavaTimeZone)super.clone();
+ other.javatz = (java.util.TimeZone)javatz.clone();
+ return other;
+ }
+
+ /* (non-Javadoc)
+ * @see com.ibm.icu.util.TimeZone#hashCode()
+ */
+ public int hashCode() {
+ return super.hashCode() + javatz.hashCode();
+ }
+
+ private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
+ s.defaultReadObject();
+ javacal = new java.util.GregorianCalendar(javatz);
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java b/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java
new file mode 100644
index 00000000000..0ee5cb65024
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java
@@ -0,0 +1,332 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2009-2010, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.Iterator;
+import java.util.Locale;
+
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.LocaleDisplayNames;
+import com.ibm.icu.text.MessageFormat;
+import com.ibm.icu.util.ULocale;
+import com.ibm.icu.util.UResourceBundle;
+
+public class LocaleDisplayNamesImpl extends LocaleDisplayNames {
+ private final ULocale locale;
+ private final DialectHandling dialectHandling;
+ private final DataTable langData;
+ private final DataTable regionData;
+ private final Appender appender;
+ private final MessageFormat format;
+
+ private static final Cache cache = new Cache();
+
+ public static LocaleDisplayNames getInstance(ULocale locale, DialectHandling dialectHandling) {
+ synchronized (cache) {
+ return cache.get(locale, dialectHandling);
+ }
+ }
+
+ public LocaleDisplayNamesImpl(ULocale locale, DialectHandling dialectHandling) {
+ this.dialectHandling = dialectHandling;
+ this.langData = LangDataTables.impl.get(locale);
+ this.regionData = RegionDataTables.impl.get(locale);
+ this.locale = ULocale.ROOT.equals(langData.getLocale()) ? regionData.getLocale() :
+ langData.getLocale();
+
+ // Note, by going through DataTable, this uses table lookup rather than straight lookup.
+ // That should get us the same data, I think. This way we don't have to explicitly
+ // load the bundle again. Using direct lookup didn't seem to make an appreciable
+ // difference in performance.
+ String sep = langData.get("localeDisplayPattern", "separator");
+ if ("separator".equals(sep)) {
+ sep = ", ";
+ }
+ this.appender = new Appender(sep);
+
+ String pattern = langData.get("localeDisplayPattern", "pattern");
+ if ("pattern".equals(pattern)) {
+ pattern = "{0} ({1})";
+ }
+ this.format = new MessageFormat(pattern);
+ }
+
+ @Override
+ public ULocale getLocale() {
+ return locale;
+ }
+
+ @Override
+ public DialectHandling getDialectHandling() {
+ return dialectHandling;
+ }
+
+ @Override
+ public String localeDisplayName(ULocale locale) {
+ return localeDisplayNameInternal(locale);
+ }
+
+ @Override
+ public String localeDisplayName(Locale locale) {
+ return localeDisplayNameInternal(ULocale.forLocale(locale));
+ }
+
+ @Override
+ public String localeDisplayName(String localeId) {
+ return localeDisplayNameInternal(new ULocale(localeId));
+ }
+
+ private String localeDisplayNameInternal(ULocale locale) {
+ // lang
+ // lang (script, country, variant, keyword=value, ...)
+ // script, country, variant, keyword=value, ...
+
+ String resultName = null;
+
+ String lang = locale.getLanguage();
+
+ // Empty basename indicates root locale (keywords are ignored for this).
+ // Our data uses 'root' to access display names for the root locale in the
+ // "Languages" table.
+ if (locale.getBaseName().length() == 0) {
+ lang = "root";
+ }
+ String script = locale.getScript();
+ String country = locale.getCountry();
+ String variant = locale.getVariant();
+
+ boolean hasScript = script.length() > 0;
+ boolean hasCountry = country.length() > 0;
+ boolean hasVariant = variant.length() > 0;
+
+ // always have a value for lang
+ if (dialectHandling == DialectHandling.DIALECT_NAMES) {
+ do { // loop construct is so we can break early out of search
+ if (hasScript && hasCountry) {
+ String langScriptCountry = lang + '_' + script + '_' + country;
+ String result = localeIdName(langScriptCountry);
+ if (!result.equals(langScriptCountry)) {
+ resultName = result;
+ hasScript = false;
+ hasCountry = false;
+ break;
+ }
+ }
+ if (hasScript) {
+ String langScript = lang + '_' + script;
+ String result = localeIdName(langScript);
+ if (!result.equals(langScript)) {
+ resultName = result;
+ hasScript = false;
+ break;
+ }
+ }
+ if (hasCountry) {
+ String langCountry = lang + '_' + country;
+ String result = localeIdName(langCountry);
+ if (!result.equals(langCountry)) {
+ resultName = result;
+ hasCountry = false;
+ break;
+ }
+ }
+ } while (false);
+ }
+
+ if (resultName == null) {
+ resultName = localeIdName(lang);
+ }
+
+ StringBuilder buf = new StringBuilder();
+ if (hasScript) {
+ // first element, don't need appender
+ buf.append(scriptDisplayName(script));
+ }
+ if (hasCountry) {
+ appender.append(regionDisplayName(country), buf);
+ }
+ if (hasVariant) {
+ appender.append(variantDisplayName(variant), buf);
+ }
+
+ Iterator keys = locale.getKeywords();
+ if (keys != null) {
+ while (keys.hasNext()) {
+ String key = keys.next();
+ String value = locale.getKeywordValue(key);
+ appender.append(keyDisplayName(key), buf)
+ .append("=")
+ .append(keyValueDisplayName(key, value));
+ }
+ }
+
+ String resultRemainder = null;
+ if (buf.length() > 0) {
+ resultRemainder = buf.toString();
+ }
+
+ if (resultRemainder != null) {
+ return format.format(new Object[] {resultName, resultRemainder});
+ }
+
+ return resultName;
+ }
+
+ private String localeIdName(String localeId) {
+ return langData.get("Languages", localeId);
+ }
+
+ @Override
+ public String languageDisplayName(String lang) {
+ // Special case to eliminate non-languages, which pollute our data.
+ if (lang.equals("root") || lang.indexOf('_') != -1) {
+ return lang;
+ }
+ return langData.get("Languages", lang);
+ }
+
+ @Override
+ public String scriptDisplayName(String script) {
+ return langData.get("Scripts", script);
+ }
+
+ @Override
+ public String scriptDisplayName(int scriptCode) {
+ return scriptDisplayName(UScript.getShortName(scriptCode));
+ }
+
+ @Override
+ public String regionDisplayName(String region) {
+ return regionData.get("Countries", region);
+ }
+
+ @Override
+ public String variantDisplayName(String variant) {
+ return langData.get("Variants", variant);
+ }
+
+ @Override
+ public String keyDisplayName(String key) {
+ return langData.get("Keys", key);
+ }
+
+ @Override
+ public String keyValueDisplayName(String key, String value) {
+ return langData.get("Types", key, value);
+ }
+
+ public static class DataTable {
+ ULocale getLocale() {
+ return ULocale.ROOT;
+ }
+
+ String get(String tableName, String code) {
+ return get(tableName, null, code);
+ }
+
+ String get(String tableName, String subTableName, String code) {
+ return code;
+ }
+ }
+
+ static class ICUDataTable extends DataTable {
+ private final ICUResourceBundle bundle;
+
+ public ICUDataTable(String path, ULocale locale) {
+ this.bundle = (ICUResourceBundle) UResourceBundle.getBundleInstance(
+ path, locale.getBaseName());
+ }
+
+ public ULocale getLocale() {
+ return bundle.getULocale();
+ }
+
+ public String get(String tableName, String subTableName, String code) {
+ return ICUResourceTableAccess.getTableString(bundle, tableName, subTableName,
+ code);
+ }
+ }
+
+ static abstract class DataTables {
+ public abstract DataTable get(ULocale locale);
+ public static DataTables load(String className) {
+ try {
+ return (DataTables) Class.forName(className).newInstance();
+ } catch (Throwable t) {
+ final DataTable NO_OP = new DataTable();
+ return new DataTables() {
+ public DataTable get(ULocale locale) {
+ return NO_OP;
+ }
+ };
+ }
+ }
+ }
+
+ static abstract class ICUDataTables extends DataTables {
+ private final String path;
+
+ protected ICUDataTables(String path) {
+ this.path = path;
+ }
+
+ @Override
+ public DataTable get(ULocale locale) {
+ return new ICUDataTable(path, locale);
+ }
+ }
+
+ static class LangDataTables {
+ static final DataTables impl = DataTables.load("com.ibm.icu.impl.ICULangDataTables");
+ }
+
+ static class RegionDataTables {
+ static final DataTables impl = DataTables.load("com.ibm.icu.impl.ICURegionDataTables");
+ }
+
+ public static enum DataTableType {
+ LANG, REGION;
+ }
+
+ public static boolean haveData(DataTableType type) {
+ switch (type) {
+ case LANG: return LangDataTables.impl instanceof ICUDataTables;
+ case REGION: return RegionDataTables.impl instanceof ICUDataTables;
+ default:
+ throw new IllegalArgumentException("unknown type: " + type);
+ }
+ }
+
+ static class Appender {
+ private final String sep;
+
+ Appender(String sep) {
+ this.sep = sep;
+ }
+ StringBuilder append(String s, StringBuilder b) {
+ if (b.length() > 0) {
+ b.append(sep);
+ }
+ b.append(s);
+ return b;
+ }
+ }
+
+ private static class Cache {
+ private ULocale locale;
+ private DialectHandling dialectHandling;
+ private LocaleDisplayNames cache;
+ public LocaleDisplayNames get(ULocale locale, DialectHandling dialectHandling) {
+ if (!(dialectHandling == this.dialectHandling && locale.equals(this.locale))) {
+ this.locale = locale;
+ this.dialectHandling = dialectHandling;
+ this.cache = new LocaleDisplayNamesImpl(locale, dialectHandling);
+ }
+ return cache;
+ }
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/LocaleIDParser.java b/main/classes/core/src/com/ibm/icu/impl/LocaleIDParser.java
new file mode 100644
index 00000000000..6a4d392fff8
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/LocaleIDParser.java
@@ -0,0 +1,741 @@
+/*
+******************************************************************************
+* Copyright (C) 2003-2009, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+******************************************************************************
+*/
+
+package com.ibm.icu.impl;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import com.ibm.icu.impl.locale.AsciiUtil;
+
+/**
+ * Utility class to parse and normalize locale ids (including POSIX style)
+ */
+public final class LocaleIDParser {
+ private char[] id;
+ private int index;
+ private char[] buffer;
+ private int blen;
+ // um, don't handle POSIX ids unless we request it. why not? well... because.
+ private boolean canonicalize;
+ private boolean hadCountry;
+
+ // used when canonicalizing
+ Map keywords;
+ String baseName;
+
+ /**
+ * Parsing constants.
+ */
+ private static final char KEYWORD_SEPARATOR = '@';
+ private static final char HYPHEN = '-';
+ private static final char KEYWORD_ASSIGN = '=';
+ private static final char COMMA = ',';
+ private static final char ITEM_SEPARATOR = ';';
+ private static final char DOT = '.';
+ private static final char UNDERSCORE = '_';
+
+ public LocaleIDParser(String localeID) {
+ this(localeID, false);
+ }
+
+ public LocaleIDParser(String localeID, boolean canonicalize) {
+ id = localeID.toCharArray();
+ index = 0;
+ buffer = new char[id.length + 5];
+ blen = 0;
+ this.canonicalize = canonicalize;
+ }
+
+ private void reset() {
+ index = blen = 0;
+ }
+
+ // utilities for working on text in the buffer
+
+ /**
+ * Append c to the buffer.
+ */
+ private void append(char c) {
+ try {
+ buffer[blen] = c;
+ }
+ catch (IndexOutOfBoundsException e) {
+ if (buffer.length > 512) {
+ // something is seriously wrong, let this go
+ throw e;
+ }
+ char[] nbuffer = new char[buffer.length * 2];
+ System.arraycopy(buffer, 0, nbuffer, 0, buffer.length);
+ nbuffer[blen] = c;
+ buffer = nbuffer;
+ }
+ ++blen;
+ }
+
+ private void addSeparator() {
+ append(UNDERSCORE);
+ }
+
+ /**
+ * Returns the text in the buffer from start to blen as a String.
+ */
+ private String getString(int start) {
+ if (start == blen) {
+ return "";
+ }
+ return new String(buffer, start, blen-start);
+ }
+
+ /**
+ * Set the length of the buffer to pos, then append the string.
+ */
+ private void set(int pos, String s) {
+ this.blen = pos; // no safety
+ append(s);
+ }
+
+ /**
+ * Append the string to the buffer.
+ */
+ private void append(String s) {
+ for (int i = 0; i < s.length(); ++i) {
+ append(s.charAt(i));
+ }
+ }
+
+ // utilities for parsing text out of the id
+
+ /**
+ * Character to indicate no more text is available in the id.
+ */
+ private static final char DONE = '\uffff';
+
+ /**
+ * Returns the character at index in the id, and advance index. The returned character
+ * is DONE if index was at the limit of the buffer. The index is advanced regardless
+ * so that decrementing the index will always 'unget' the last character returned.
+ */
+ private char next() {
+ if (index == id.length) {
+ index++;
+ return DONE;
+ }
+
+ return id[index++];
+ }
+
+ /**
+ * Advance index until the next terminator or id separator, and leave it there.
+ */
+ private void skipUntilTerminatorOrIDSeparator() {
+ while (!isTerminatorOrIDSeparator(next())) {
+ }
+ --index;
+ }
+
+ /**
+ * Returns true if the character at index in the id is a terminator.
+ */
+ private boolean atTerminator() {
+ return index >= id.length || isTerminator(id[index]);
+ }
+
+ /*
+ * Returns true if the character is an id separator (underscore or hyphen).
+ */
+ /* private boolean isIDSeparator(char c) {
+ return c == UNDERSCORE || c == HYPHEN;
+ }*/
+
+ /**
+ * Returns true if the character is a terminator (keyword separator, dot, or DONE).
+ * Dot is a terminator because of the POSIX form, where dot precedes the codepage.
+ */
+ private boolean isTerminator(char c) {
+ // always terminate at DOT, even if not handling POSIX. It's an error...
+ return c == KEYWORD_SEPARATOR || c == DONE || c == DOT;
+ }
+
+ /**
+ * Returns true if the character is a terminator or id separator.
+ */
+ private boolean isTerminatorOrIDSeparator(char c) {
+ return c == KEYWORD_SEPARATOR || c == UNDERSCORE || c == HYPHEN ||
+ c == DONE || c == DOT;
+ }
+
+ /**
+ * Returns true if the start of the buffer has an experimental or private language
+ * prefix, the pattern '[ixIX][-_].' shows the syntax checked.
+ */
+ private boolean haveExperimentalLanguagePrefix() {
+ if (id.length > 2) {
+ char c = id[1];
+ if (c == HYPHEN || c == UNDERSCORE) {
+ c = id[0];
+ return c == 'x' || c == 'X' || c == 'i' || c == 'I';
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if a value separator occurs at or after index.
+ */
+ private boolean haveKeywordAssign() {
+ // assume it is safe to start from index
+ for (int i = index; i < id.length; ++i) {
+ if (id[i] == KEYWORD_ASSIGN) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Advance index past language, and accumulate normalized language code in buffer.
+ * Index must be at 0 when this is called. Index is left at a terminator or id
+ * separator. Returns the start of the language code in the buffer.
+ */
+ private int parseLanguage() {
+ if (haveExperimentalLanguagePrefix()) {
+ append(Character.toLowerCase(id[0]));
+ append(HYPHEN);
+ index = 2;
+ }
+
+ char c;
+ while(!isTerminatorOrIDSeparator(c = next())) {
+ append(Character.toLowerCase(c));
+ }
+ --index; // unget
+
+ if (blen == 3) {
+ String lang = LocaleIDs.threeToTwoLetterLanguage(getString(0));
+ if (lang != null) {
+ set(0, lang);
+ }
+ }
+
+ return 0;
+ }
+
+ /**
+ * Advance index past language. Index must be at 0 when this is called. Index
+ * is left at a terminator or id separator.
+ */
+ private void skipLanguage() {
+ if (haveExperimentalLanguagePrefix()) {
+ index = 2;
+ }
+ skipUntilTerminatorOrIDSeparator();
+ }
+
+ /**
+ * Advance index past script, and accumulate normalized script in buffer.
+ * Index must be immediately after the language.
+ * If the item at this position is not a script (is not four characters
+ * long) leave index and buffer unchanged. Otherwise index is left at
+ * a terminator or id separator. Returns the start of the script code
+ * in the buffer (this may be equal to the buffer length, if there is no
+ * script).
+ */
+ private int parseScript() {
+ if (!atTerminator()) {
+ int oldIndex = index; // save original index
+ ++index;
+
+ int oldBlen = blen; // get before append hyphen, if we truncate everything is undone
+ char c;
+ while(!isTerminatorOrIDSeparator(c = next())) {
+ if (blen == oldBlen) { // first pass
+ addSeparator();
+ append(Character.toUpperCase(c));
+ } else {
+ append(Character.toLowerCase(c));
+ }
+ }
+ --index; // unget
+
+ /* If it's not exactly 4 characters long, then it's not a script. */
+ if (index - oldIndex != 5) { // +1 to account for separator
+ index = oldIndex;
+ blen = oldBlen;
+ } else {
+ oldBlen++; // index past hyphen, for clients who want to extract just the script
+ }
+
+ return oldBlen;
+ }
+ return blen;
+ }
+
+ /**
+ * Advance index past script.
+ * Index must be immediately after the language and IDSeparator.
+ * If the item at this position is not a script (is not four characters
+ * long) leave index. Otherwise index is left at a terminator or
+ * id separator.
+ */
+ private void skipScript() {
+ if (!atTerminator()) {
+ int oldIndex = index;
+ ++index;
+
+ skipUntilTerminatorOrIDSeparator();
+ if (index - oldIndex != 5) { // +1 to account for separator
+ index = oldIndex;
+ }
+ }
+ }
+
+ /**
+ * Advance index past country, and accumulate normalized country in buffer.
+ * Index must be immediately after the script (if there is one, else language)
+ * and IDSeparator. Return the start of the country code in the buffer.
+ */
+ private int parseCountry() {
+ if (!atTerminator()) {
+ int oldIndex = index;
+ ++index;
+
+ int oldBlen = blen;
+ char c;
+ while (!isTerminatorOrIDSeparator(c = next())) {
+ if (oldBlen == blen) { // first, add hyphen
+ hadCountry = true; // we have a country, let variant parsing know
+ addSeparator();
+ ++oldBlen; // increment past hyphen
+ }
+ append(Character.toUpperCase(c));
+ }
+ --index; // unget
+
+ int charsAppended = blen - oldBlen;
+
+ if (charsAppended == 0) {
+ // Do nothing.
+ }
+ else if (charsAppended < 2 || charsAppended > 3) {
+ // It's not a country, so return index and blen to
+ // their previous values.
+ index = oldIndex;
+ --oldBlen;
+ blen = oldBlen;
+ hadCountry = false;
+ }
+ else if (charsAppended == 3) {
+ String region = LocaleIDs.threeToTwoLetterRegion(getString(oldBlen));
+ if (region != null) {
+ set(oldBlen, region);
+ }
+ }
+
+ return oldBlen;
+ }
+
+ return blen;
+ }
+
+ /**
+ * Advance index past country.
+ * Index must be immediately after the script (if there is one, else language)
+ * and IDSeparator.
+ */
+ private void skipCountry() {
+ if (!atTerminator()) {
+ ++index;
+ /*
+ * Save the index point after the separator, since the format
+ * requires two separators if the country is not present.
+ */
+ int oldIndex = index;
+
+ skipUntilTerminatorOrIDSeparator();
+ int charsSkipped = index - oldIndex;
+ if (charsSkipped < 2 || charsSkipped > 3) {
+ index = oldIndex;
+ }
+ }
+ }
+
+ /**
+ * Advance index past variant, and accumulate normalized variant in buffer. This ignores
+ * the codepage information from POSIX ids. Index must be immediately after the country
+ * or script. Index is left at the keyword separator or at the end of the text. Return
+ * the start of the variant code in the buffer.
+ *
+ * In standard form, we can have the following forms:
+ * ll__VVVV
+ * ll_CC_VVVV
+ * ll_Ssss_VVVV
+ * ll_Ssss_CC_VVVV
+ *
+ * This also handles POSIX ids, which can have the following forms (pppp is code page id):
+ * ll_CC.pppp --> ll_CC
+ * ll_CC.pppp@VVVV --> ll_CC_VVVV
+ * ll_CC@VVVV --> ll_CC_VVVV
+ *
+ * We identify this use of '@' in POSIX ids by looking for an '=' following
+ * the '@'. If there is one, we consider '@' to start a keyword list, instead of
+ * being part of a POSIX id.
+ *
+ * Note: since it was decided that we want an option to not handle POSIX ids, this
+ * becomes a bit more complex.
+ */
+ private int parseVariant() {
+ int oldBlen = blen;
+
+ boolean start = true;
+ boolean needSeparator = true;
+ boolean skipping = false;
+ char c;
+ while ((c = next()) != DONE) {
+ if (c == DOT) {
+ start = false;
+ skipping = true;
+ } else if (c == KEYWORD_SEPARATOR) {
+ if (haveKeywordAssign()) {
+ break;
+ }
+ skipping = false;
+ start = false;
+ needSeparator = true; // add another underscore if we have more text
+ } else if (start) {
+ start = false;
+ } else if (!skipping) {
+ if (needSeparator) {
+ boolean incOldBlen = blen == oldBlen; // need to skip separators
+ needSeparator = false;
+ if (incOldBlen && !hadCountry) { // no country, we'll need two
+ addSeparator();
+ ++oldBlen; // for sure
+ }
+ addSeparator();
+ if (incOldBlen) { // only for the first separator
+ ++oldBlen;
+ }
+ }
+ c = Character.toUpperCase(c);
+ if (c == HYPHEN || c == COMMA) {
+ c = UNDERSCORE;
+ }
+ append(c);
+ }
+ }
+ --index; // unget
+
+ return oldBlen;
+ }
+
+ // no need for skipvariant, to get the keywords we'll just scan directly for
+ // the keyword separator
+
+ /**
+ * Returns the normalized language id, or the empty string.
+ */
+ public String getLanguage() {
+ reset();
+ return getString(parseLanguage());
+ }
+
+ /**
+ * Returns the normalized script id, or the empty string.
+ */
+ public String getScript() {
+ reset();
+ skipLanguage();
+ return getString(parseScript());
+ }
+
+ /**
+ * return the normalized country id, or the empty string.
+ */
+ public String getCountry() {
+ reset();
+ skipLanguage();
+ skipScript();
+ return getString(parseCountry());
+ }
+
+ /**
+ * Returns the normalized variant id, or the empty string.
+ */
+ public String getVariant() {
+ reset();
+ skipLanguage();
+ skipScript();
+ skipCountry();
+ return getString(parseVariant());
+ }
+
+ /**
+ * Returns the language, script, country, and variant as separate strings.
+ */
+ public String[] getLanguageScriptCountryVariant() {
+ reset();
+ return new String[] {
+ getString(parseLanguage()),
+ getString(parseScript()),
+ getString(parseCountry()),
+ getString(parseVariant())
+ };
+ }
+
+ public void setBaseName(String baseName) {
+ this.baseName = baseName;
+ }
+
+ public void parseBaseName() {
+ if (baseName != null) {
+ set(0, baseName);
+ } else {
+ reset();
+ parseLanguage();
+ parseScript();
+ parseCountry();
+ parseVariant();
+
+ // catch unwanted trailing underscore after country if there was no variant
+ if (blen > 1 && buffer[blen-1] == UNDERSCORE) {
+ --blen;
+ }
+ }
+ }
+
+ /**
+ * Returns the normalized base form of the locale id. The base
+ * form does not include keywords.
+ */
+ public String getBaseName() {
+ if (baseName != null) {
+ return baseName;
+ }
+ parseBaseName();
+ return getString(0);
+ }
+
+ /**
+ * Returns the normalized full form of the locale id. The full
+ * form includes keywords if they are present.
+ */
+ public String getName() {
+ parseBaseName();
+ parseKeywords();
+ return getString(0);
+ }
+
+ // keyword utilities
+
+ /**
+ * If we have keywords, advance index to the start of the keywords and return true,
+ * otherwise return false.
+ */
+ private boolean setToKeywordStart() {
+ for (int i = index; i < id.length; ++i) {
+ if (id[i] == KEYWORD_SEPARATOR) {
+ if (canonicalize) {
+ for (int j = ++i; j < id.length; ++j) { // increment i past separator for return
+ if (id[j] == KEYWORD_ASSIGN) {
+ index = i;
+ return true;
+ }
+ }
+ } else {
+ if (++i < id.length) {
+ index = i;
+ return true;
+ }
+ }
+ break;
+ }
+ }
+ return false;
+ }
+
+ private static boolean isDoneOrKeywordAssign(char c) {
+ return c == DONE || c == KEYWORD_ASSIGN;
+ }
+
+ private static boolean isDoneOrItemSeparator(char c) {
+ return c == DONE || c == ITEM_SEPARATOR;
+ }
+
+ private String getKeyword() {
+ int start = index;
+ while (!isDoneOrKeywordAssign(next())) {
+ }
+ --index;
+ return AsciiUtil.toLowerString(new String(id, start, index-start).trim());
+ }
+
+ private String getValue() {
+ int start = index;
+ while (!isDoneOrItemSeparator(next())) {
+ }
+ --index;
+ return new String(id, start, index-start).trim(); // leave case alone
+ }
+
+ private Comparator getKeyComparator() {
+ final Comparator comp = new Comparator() {
+ public int compare(String lhs, String rhs) {
+ return lhs.compareTo(rhs);
+ }
+ };
+ return comp;
+ }
+
+ /**
+ * Returns a map of the keywords and values, or null if there are none.
+ */
+ public Map getKeywordMap() {
+ if (keywords == null) {
+ TreeMap m = null;
+ if (setToKeywordStart()) {
+ // trim spaces and convert to lower case, both keywords and values.
+ do {
+ String key = getKeyword();
+ if (key.length() == 0) {
+ break;
+ }
+ char c = next();
+ if (c != KEYWORD_ASSIGN) {
+ // throw new IllegalArgumentException("key '" + key + "' missing a value.");
+ if (c == DONE) {
+ break;
+ } else {
+ continue;
+ }
+ }
+ String value = getValue();
+ if (value.length() == 0) {
+ // throw new IllegalArgumentException("key '" + key + "' missing a value.");
+ continue;
+ }
+ if (m == null) {
+ m = new TreeMap(getKeyComparator());
+ } else if (m.containsKey(key)) {
+ // throw new IllegalArgumentException("key '" + key + "' already has a value.");
+ continue;
+ }
+ m.put(key, value);
+ } while (next() == ITEM_SEPARATOR);
+ }
+ keywords = m != null ? m : Collections.emptyMap();
+ }
+
+ return keywords;
+ }
+
+
+ /**
+ * Parse the keywords and return start of the string in the buffer.
+ */
+ private int parseKeywords() {
+ int oldBlen = blen;
+ Map m = getKeywordMap();
+ if (!m.isEmpty()) {
+ boolean first = true;
+ for (Map.Entry e : m.entrySet()) {
+ append(first ? KEYWORD_SEPARATOR : ITEM_SEPARATOR);
+ first = false;
+ append(e.getKey());
+ append(KEYWORD_ASSIGN);
+ append(e.getValue());
+ }
+ if (blen != oldBlen) {
+ ++oldBlen;
+ }
+ }
+ return oldBlen;
+ }
+
+ /**
+ * Returns an iterator over the keywords, or null if we have an empty map.
+ */
+ public Iterator getKeywords() {
+ Map m = getKeywordMap();
+ return m.isEmpty() ? null : m.keySet().iterator();
+ }
+
+ /**
+ * Returns the value for the named keyword, or null if the keyword is not
+ * present.
+ */
+ public String getKeywordValue(String keywordName) {
+ Map m = getKeywordMap();
+ return m.isEmpty() ? null : m.get(AsciiUtil.toLowerString(keywordName.trim()));
+ }
+
+ /**
+ * Set the keyword value only if it is not already set to something else.
+ */
+ public void defaultKeywordValue(String keywordName, String value) {
+ setKeywordValue(keywordName, value, false);
+ }
+
+ /**
+ * Set the value for the named keyword, or unset it if value is null. If
+ * keywordName itself is null, unset all keywords. If keywordName is not null,
+ * value must not be null.
+ */
+ public void setKeywordValue(String keywordName, String value) {
+ setKeywordValue(keywordName, value, true);
+ }
+
+ /**
+ * Set the value for the named keyword, or unset it if value is null. If
+ * keywordName itself is null, unset all keywords. If keywordName is not null,
+ * value must not be null. If reset is true, ignore any previous value for
+ * the keyword, otherwise do not change the keyword (including removal of
+ * one or all keywords).
+ */
+ private void setKeywordValue(String keywordName, String value, boolean reset) {
+ if (keywordName == null) {
+ if (reset) {
+ // force new map, ignore value
+ keywords = Collections.emptyMap();
+ }
+ } else {
+ keywordName = AsciiUtil.toLowerString(keywordName.trim());
+ if (keywordName.length() == 0) {
+ throw new IllegalArgumentException("keyword must not be empty");
+ }
+ if (value != null) {
+ value = value.trim();
+ if (value.length() == 0) {
+ throw new IllegalArgumentException("value must not be empty");
+ }
+ }
+ Map m = getKeywordMap();
+ if (m.isEmpty()) { // it is EMPTY_MAP
+ if (value != null) {
+ // force new map
+ keywords = new TreeMap(getKeyComparator());
+ keywords.put(keywordName, value.trim());
+ }
+ } else {
+ if (reset || !m.containsKey(keywordName)) {
+ if (value != null) {
+ m.put(keywordName, value);
+ } else {
+ m.remove(keywordName);
+ if (m.isEmpty()) {
+ // force new map
+ keywords = Collections.emptyMap();
+ }
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/main/classes/core/src/com/ibm/icu/impl/LocaleIDs.java b/main/classes/core/src/com/ibm/icu/impl/LocaleIDs.java
new file mode 100644
index 00000000000..02f633e2294
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/LocaleIDs.java
@@ -0,0 +1,536 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.MissingResourceException;
+
+import com.ibm.icu.util.ULocale;
+
+
+/**
+ * Utilities for mapping between old and new language, country, and other
+ * locale ID related names.
+ */
+public class LocaleIDs {
+
+ /**
+ * Returns a list of all 2-letter country codes defined in ISO 3166.
+ * Can be used to create Locales.
+ * @stable ICU 3.0
+ */
+ public static String[] getISOCountries() {
+ initCountryTables();
+ return _countries.clone();
+ }
+
+ /**
+ * Returns a list of all 2-letter language codes defined in ISO 639.
+ * Can be used to create Locales.
+ * [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed.
+ * The list this function returns includes both the new and the old codes for the
+ * languages whose codes have changed.]
+ * @stable ICU 3.0
+ */
+ public static String[] getISOLanguages() {
+ initLanguageTables();
+ return _languages.clone();
+ }
+
+ /**
+ * Returns a three-letter abbreviation for the provided country. If the provided
+ * country is empty, returns the empty string. Otherwise, returns
+ * an uppercase ISO 3166 3-letter country code.
+ * @exception MissingResourceException Throws MissingResourceException if the
+ * three-letter country abbreviation is not available for this locale.
+ * @stable ICU 3.0
+ */
+ public static String getISO3Country(String country){
+ initCountryTables();
+
+ int offset = findIndex(_countries, country);
+ if(offset>=0){
+ return _countries3[offset];
+ }else{
+ offset = findIndex(_obsoleteCountries, country);
+ if(offset>=0){
+ return _obsoleteCountries3[offset];
+ }
+ }
+ return "";
+ }
+ /**
+ * Returns a three-letter abbreviation for the language. If language is
+ * empty, returns the empty string. Otherwise, returns
+ * a lowercase ISO 639-2/T language code.
+ * The ISO 639-2 language codes can be found on-line at
+ * ftp://dkuug.dk/i18n/iso-639-2.txt
+ * @exception MissingResourceException Throws MissingResourceException if the
+ * three-letter language abbreviation is not available for this locale.
+ * @stable ICU 3.0
+ */
+ public static String getISO3Language(String language) {
+ initLanguageTables();
+
+ int offset = findIndex(_languages, language);
+ if(offset>=0){
+ return _languages3[offset];
+ } else {
+ offset = findIndex(_obsoleteLanguages, language);
+ if (offset >= 0) {
+ return _obsoleteLanguages3[offset];
+ }
+ }
+ return "";
+ }
+
+ public static String threeToTwoLetterLanguage(String lang) {
+ initLanguageTables();
+
+ /* convert 3 character code to 2 character code if possible *CWB*/
+ int offset = findIndex(_languages3, lang);
+ if (offset >= 0) {
+ return _languages[offset];
+ }
+
+ offset = findIndex(_obsoleteLanguages3, lang);
+ if (offset >= 0) {
+ return _obsoleteLanguages[offset];
+ }
+
+ return null;
+ }
+
+ public static String threeToTwoLetterRegion(String region) {
+ initCountryTables();
+
+ /* convert 3 character code to 2 character code if possible *CWB*/
+ int offset = findIndex(_countries3, region);
+ if (offset >= 0) {
+ return _countries[offset];
+ }
+
+ offset = findIndex(_obsoleteCountries3, region);
+ if (offset >= 0) {
+ return _obsoleteCountries[offset];
+ }
+
+ return null;
+ }
+
+ /**
+ * linear search of the string array. the arrays are unfortunately ordered by the
+ * two-letter target code, not the three-letter search code, which seems backwards.
+ */
+ private static int findIndex(String[] array, String target){
+ for (int i = 0; i < array.length; i++) {
+ if (target.equals(array[i])) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+
+ /**
+ * Tables used in normalizing portions of the id.
+ */
+ /* tables updated per http://lcweb.loc.gov/standards/iso639-2/
+ to include the revisions up to 2001/7/27 *CWB*/
+ /* The 3 character codes are the terminology codes like RFC 3066.
+ This is compatible with prior ICU codes */
+ /* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in
+ the table but now at the end of the table because
+ 3 character codes are duplicates. This avoids bad searches
+ going from 3 to 2 character codes.*/
+ /* The range qaa-qtz is reserved for local use. */
+
+ private static String[] _languages;
+ private static String[] _replacementLanguages;
+ private static String[] _obsoleteLanguages;
+ private static String[] _languages3;
+ private static String[] _obsoleteLanguages3;
+
+ // Avoid initializing languages tables unless we have to.
+ private static void initLanguageTables() {
+ if (_languages == null) {
+
+ /* This list MUST be in sorted order, and MUST contain the two-letter codes
+ if one exists otherwise use the three letter code */
+ String[] tempLanguages = {
+ "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
+ "afh", "ak", "akk", "ale", "alg", "am", "an", "ang", "apa",
+ "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
+ "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
+ "bai", "bal", "ban", "bas", "bat", "be", "bej",
+ "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
+ "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
+ "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
+ "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
+ "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
+ "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
+ "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
+ "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
+ "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
+ "enm", "eo", "es", "et", "eu", "ewo", "fa",
+ "fan", "fat", "ff", "fi", "fiu", "fj", "fo", "fon",
+ "fr", "frm", "fro", "fur", "fy", "ga", "gaa", "gay",
+ "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn",
+ "goh", "gon", "gor", "got", "grb", "grc", "gu", "gv",
+ "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him",
+ "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
+ "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
+ "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
+ "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
+ "kac", "kam", "kar", "kaw", "kbd", "kg", "kha", "khi",
+ "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
+ "ko", "kok", "kos", "kpe", "kr", "krc", "kro", "kru", "ks",
+ "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
+ "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
+ "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
+ "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
+ "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min",
+ "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
+ "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
+ "mus", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
+ "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
+ "niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub",
+ "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
+ "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
+ "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
+ "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
+ "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
+ "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
+ "sas", "sat", "sc", "sco", "sd", "se", "sel", "sem",
+ "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
+ "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
+ "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
+ "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
+ "sv", "sw", "syr", "ta", "tai", "te", "tem", "ter",
+ "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
+ "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr",
+ "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
+ "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
+ "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
+ "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
+ "yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd",
+ "zu", "zun",
+ };
+
+ String[] tempReplacementLanguages = {
+ "id", "he", "yi", "jv", "sr", "nb",/* replacement language codes */
+ };
+
+ String[] tempObsoleteLanguages = {
+ "in", "iw", "ji", "jw", "sh", "no", /* obsolete language codes */
+ };
+
+ /* This list MUST contain a three-letter code for every two-letter code in the
+ list above, and they MUST ne in the same order (i.e., the same language must
+ be in the same place in both lists)! */
+ String[] tempLanguages3 = {
+ /*"aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
+ "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
+ /*"afh", "ak", "akk", "ale", "alg", "am", "an", "ang", "apa", */
+ "afh", "aka", "akk", "ale", "alg", "amh", "arg", "ang", "apa",
+ /*"ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
+ "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
+ /*"ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
+ "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
+ /*"bai", "bal", "ban", "bas", "bat", "be", "bej", */
+ "bai", "bal", "ban", "bas", "bat", "bel", "bej",
+ /*"bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
+ "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
+ /*"bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
+ "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
+ /*"btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
+ "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
+ /*"ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
+ "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
+ /*"chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
+ "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
+ /*"cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
+ "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
+ /*"cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
+ "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
+ /*"dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
+ "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
+ /*"dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
+ "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
+ /*"enm", "eo", "es", "et", "eu", "ewo", "fa", */
+ "enm", "epo", "spa", "est", "eus", "ewo", "fas",
+ /*"fan", "fat", "ff", "fi", "fiu", "fj", "fo", "fon", */
+ "fan", "fat", "ful", "fin", "fiu", "fij", "fao", "fon",
+ /*"fr", "frm", "fro", "fur", "fy", "ga", "gaa", "gay", */
+ "fra", "frm", "fro", "fur", "fry", "gle", "gaa", "gay",
+ /*"gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
+ "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
+ /*"goh", "gon", "gor", "got", "grb", "grc", "gu", "gv", */
+ "goh", "gon", "gor", "got", "grb", "grc", "guj", "glv",
+ /*"gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
+ "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
+ /*"hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
+ "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
+ /*"ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
+ "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
+ /*"ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
+ "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
+ /*"iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
+ "iku", "jpn", "jbo", "jpr", "jrb", "jaw", "kat", "kaa", "kab",
+ /*"kac", "kam", "kar", "kaw", "kbd", "kg", "kha", "khi", */
+ "kac", "kam", "kar", "kaw", "kbd", "kon", "kha", "khi",
+ /*"kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
+ "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
+ /*"ko", "kok", "kos", "kpe", "kr", "krc", "kro", "kru", "ks", */
+ "kor", "kok", "kos", "kpe", "kau", "krc", "kro", "kru", "kas",
+ /*"ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
+ "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
+ /*"lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
+ "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
+ /*"loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
+ "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
+ /*"lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
+ "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
+ /*"mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min", */
+ "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
+ /*"mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
+ "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
+ /*"mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
+ "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
+ /*"mus", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
+ "mus", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
+ /*"nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
+ "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
+ /*"niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub", */
+ "niu", "nld", "nno", "nor", "nog", "non", "nbl", "nso", "nub",
+ /*"nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
+ "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
+ /*"om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
+ "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
+ /*"pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
+ "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
+ /*"pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
+ "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
+ /*"raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
+ "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
+ /*"ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
+ "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
+ /*"sas", "sat", "sc", "sco", "sd", "se", "sel", "sem", */
+ "sas", "sat", "srd", "sco", "snd", "sme", "sel", "sem",
+ /*"sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
+ "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
+ /*"sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
+ "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
+ /*"sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
+ "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
+ /*"srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
+ "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
+ /*"sv", "sw", "syr", "ta", "tai", "te", "tem", "ter", */
+ "swe", "swa", "syr", "tam", "tai", "tel", "tem", "ter",
+ /*"tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
+ "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
+ /*"tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", */
+ "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
+ /*"ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
+ "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
+ /*"ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
+ "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
+ /*"uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
+ "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
+ /*"wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
+ "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
+ /*"yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd", */
+ "yid", "yor", "ypk", "zha", "zap", "zen", "zho", "znd",
+ /*"zu", "zun", */
+ "zul", "zun",
+ };
+
+ String[] tempObsoleteLanguages3 = {
+ /* "in", "iw", "ji", "jw", "sh", */
+ "ind", "heb", "yid", "jaw", "srp",
+ };
+
+ synchronized (ULocale.class) {
+ if (_languages == null) {
+ _languages = tempLanguages;
+ _replacementLanguages = tempReplacementLanguages;
+ _obsoleteLanguages = tempObsoleteLanguages;
+ _languages3 = tempLanguages3;
+ _obsoleteLanguages3 = tempObsoleteLanguages3;
+ }
+ }
+ }
+ }
+
+ private static String[] _countries;
+ private static String[] _deprecatedCountries;
+ private static String[] _replacementCountries;
+ private static String[] _obsoleteCountries;
+ private static String[] _countries3;
+ private static String[] _obsoleteCountries3;
+
+ // Avoid initializing country tables unless we have to.
+ private static void initCountryTables() {
+ if (_countries == null) {
+ /* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
+ http://www.evertype.com/standards/iso3166/iso3166-1-en.html
+ added new codes keeping the old ones for compatibility
+ updated to include 1999/12/03 revisions *CWB*/
+
+ /* RO(ROM) is now RO(ROU) according to
+ http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
+ */
+
+ /* This list MUST be in sorted order, and MUST contain only two-letter codes! */
+ String[] tempCountries = {
+ "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
+ "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
+ "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
+ "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
+ "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
+ "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
+ "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
+ "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
+ "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
+ "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
+ "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
+ "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
+ "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
+ "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
+ "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
+ "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
+ "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
+ "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
+ "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
+ "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
+ "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
+ "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
+ "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
+ "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
+ "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
+ "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
+ "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
+ "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
+ "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
+ "WS", "YE", "YT", "ZA", "ZM", "ZW",
+ };
+
+ /* this table is used for 3 letter codes */
+ String[] tempObsoleteCountries = {
+ "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */
+ };
+
+ String[] tempDeprecatedCountries = {
+ "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" /* deprecated country list */
+ };
+ String[] tempReplacementCountries = {
+ /* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
+ "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", /* replacement country codes */
+ };
+
+ /* This list MUST contain a three-letter code for every two-letter code in
+ the above list, and they MUST be listed in the same order! */
+ String[] tempCountries3 = {
+ /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
+ "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
+ /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
+ "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
+ /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
+ "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
+ /* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
+ "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
+ /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
+ "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
+ /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
+ "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
+ /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
+ "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
+ /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
+ "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
+ /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
+ "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
+ /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
+ "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
+ /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
+ "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
+ /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
+ "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
+ /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
+ "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
+ /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
+ "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
+ /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
+ "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
+ /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
+ "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
+ /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
+ "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
+ /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
+ "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
+ /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
+ "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
+ /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
+ "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
+ /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
+ "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
+ /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
+ "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
+ /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
+ "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
+ /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
+ "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
+ /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
+ "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
+ /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
+ "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
+ /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
+ "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
+ /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
+ "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
+ /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
+ "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
+ /* "WS", "YE", "YT", "ZA", "ZM", "ZW" */
+ "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
+ };
+
+ String[] tempObsoleteCountries3 = {
+ /*"FX", "CS", "RO", "TP", "YU", "ZR", */
+ "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
+ };
+
+ synchronized (ULocale.class) {
+ if (_countries == null) {
+ _countries = tempCountries;
+ _deprecatedCountries = tempDeprecatedCountries;
+ _replacementCountries = tempReplacementCountries;
+ _obsoleteCountries = tempObsoleteCountries;
+ _countries3 = tempCountries3;
+ _obsoleteCountries3 = tempObsoleteCountries3;
+ }
+ }
+ }
+ }
+
+ public static String getCurrentCountryID(String oldID){
+ initCountryTables();
+ int offset = findIndex(_deprecatedCountries, oldID);
+ if (offset >= 0) {
+ return _replacementCountries[offset];
+ }
+ return oldID;
+ }
+
+ public static String getCurrentLanguageID(String oldID){
+ initLanguageTables();
+ int offset = findIndex(_obsoleteLanguages, oldID);
+ if (offset >= 0) {
+ return _replacementLanguages[offset];
+ }
+ return oldID;
+ }
+
+
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/LocaleUtility.java b/main/classes/core/src/com/ibm/icu/impl/LocaleUtility.java
new file mode 100644
index 00000000000..06337386474
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/LocaleUtility.java
@@ -0,0 +1,132 @@
+/*
+ ******************************************************************************
+ * Copyright (C) 1996-2007, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ ******************************************************************************
+ *
+ ******************************************************************************
+ */
+
+package com.ibm.icu.impl;
+
+import java.util.Locale;
+
+/**
+ * A class to hold utility functions missing from java.util.Locale.
+ */
+public class LocaleUtility {
+
+ /**
+ * A helper function to convert a string of the form
+ * aa_BB_CC to a locale object. Why isn't this in Locale?
+ */
+ public static Locale getLocaleFromName(String name) {
+ String language = "";
+ String country = "";
+ String variant = "";
+
+ int i1 = name.indexOf('_');
+ if (i1 < 0) {
+ language = name;
+ } else {
+ language = name.substring(0, i1);
+ ++i1;
+ int i2 = name.indexOf('_', i1);
+ if (i2 < 0) {
+ country = name.substring(i1);
+ } else {
+ country = name.substring(i1, i2);
+ variant = name.substring(i2+1);
+ }
+ }
+
+ return new Locale(language, country, variant);
+ }
+
+ /**
+ * Compare two locale strings of the form aa_BB_CC, and
+ * return true if parent is a 'strict' fallback of child, that is,
+ * if child =~ "^parent(_.+)*" (roughly).
+ */
+ public static boolean isFallbackOf(String parent, String child) {
+ if (!child.startsWith(parent)) {
+ return false;
+ }
+ int i = parent.length();
+ return (i == child.length() ||
+ child.charAt(i) == '_');
+ }
+
+ /**
+ * Compare two locales, and return true if the parent is a
+ * 'strict' fallback of the child (parent string is a fallback
+ * of child string).
+ */
+ public static boolean isFallbackOf(Locale parent, Locale child) {
+ return isFallbackOf(parent.toString(), child.toString());
+ }
+
+
+ /*
+ * Convenience method that calls canonicalLocaleString(String) with
+ * locale.toString();
+ */
+ /*public static String canonicalLocaleString(Locale locale) {
+ return canonicalLocaleString(locale.toString());
+ }*/
+
+ /*
+ * You'd think that Locale canonicalizes, since it munges the
+ * renamed languages, but it doesn't quite. It forces the region
+ * to be upper case but doesn't do anything about the language or
+ * variant. Our canonical form is 'lower_UPPER_UPPER'.
+ */
+ /*public static String canonicalLocaleString(String id) {
+ if (id != null) {
+ int x = id.indexOf("_");
+ if (x == -1) {
+ id = id.toLowerCase(Locale.ENGLISH);
+ } else {
+ StringBuffer buf = new StringBuffer();
+ buf.append(id.substring(0, x).toLowerCase(Locale.ENGLISH));
+ buf.append(id.substring(x).toUpperCase(Locale.ENGLISH));
+
+ int len = buf.length();
+ int n = len;
+ while (--n >= 0 && buf.charAt(n) == '_') {
+ }
+ if (++n != len) {
+ buf.delete(n, len);
+ }
+ id = buf.toString();
+ }
+ }
+ return id;
+ }*/
+
+ /**
+ * Fallback from the given locale name by removing the rightmost _-delimited
+ * element. If there is none, return the root locale ("", "", ""). If this
+ * is the root locale, return null. NOTE: The string "root" is not
+ * recognized; do not use it.
+ *
+ * @return a new Locale that is a fallback from the given locale, or null.
+ */
+ public static Locale fallback(Locale loc) {
+
+ // Split the locale into parts and remove the rightmost part
+ String[] parts = new String[]
+ { loc.getLanguage(), loc.getCountry(), loc.getVariant() };
+ int i;
+ for (i=2; i>=0; --i) {
+ if (parts[i].length() != 0) {
+ parts[i] = "";
+ break;
+ }
+ }
+ if (i<0) {
+ return null; // All parts were empty
+ }
+ return new Locale(parts[0], parts[1], parts[2]);
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/MultiComparator.java b/main/classes/core/src/com/ibm/icu/impl/MultiComparator.java
new file mode 100644
index 00000000000..01ade38fa4d
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/MultiComparator.java
@@ -0,0 +1,36 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.util.Comparator;
+
+public class MultiComparator implements Comparator {
+ private Comparator[] comparators;
+
+ public MultiComparator (Comparator... comparators) {
+ this.comparators = comparators;
+ }
+
+ /* Lexigraphic compare. Returns the first difference
+ * @return zero if equal. Otherwise +/- (i+1)
+ * where i is the index of the first comparator finding a difference
+ * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
+ */
+ public int compare(T arg0, T arg1) {
+ for (int i = 0; i < comparators.length; ++i) {
+ int result = comparators[i].compare(arg0, arg1);
+ if (result == 0) {
+ continue;
+ }
+ if (result > 0) {
+ return i + 1;
+ }
+ return -(i + 1);
+ }
+ return 0;
+ }
+}
\ No newline at end of file
diff --git a/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java b/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java
new file mode 100644
index 00000000000..920e515c80c
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java
@@ -0,0 +1,374 @@
+/*
+*******************************************************************************
+* Copyright (C) 2009-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*/
+package com.ibm.icu.impl;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import com.ibm.icu.text.Normalizer;
+import com.ibm.icu.text.Normalizer2;
+
+public final class Norm2AllModes {
+ // Public API dispatch via Normalizer2 subclasses -------------------------- ***
+
+ // Normalizer2 implementation for the old UNORM_NONE.
+ public static final class NoopNormalizer2 extends Normalizer2 {
+ @Override
+ public StringBuilder normalize(CharSequence src, StringBuilder dest) {
+ if(dest!=src) {
+ dest.setLength(0);
+ return dest.append(src);
+ } else {
+ throw new IllegalArgumentException();
+ }
+ }
+ @Override
+ public Appendable normalize(CharSequence src, Appendable dest) {
+ if(dest!=src) {
+ try {
+ return dest.append(src);
+ } catch(IOException e) {
+ throw new RuntimeException(e); // Avoid declaring "throws IOException".
+ }
+ } else {
+ throw new IllegalArgumentException();
+ }
+ }
+ @Override
+ public StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second) {
+ if(first!=second) {
+ return first.append(second);
+ } else {
+ throw new IllegalArgumentException();
+ }
+ }
+ @Override
+ public StringBuilder append(StringBuilder first, CharSequence second) {
+ if(first!=second) {
+ return first.append(second);
+ } else {
+ throw new IllegalArgumentException();
+ }
+ }
+ @Override
+ public String getDecomposition(int c) {
+ return null;
+ }
+ @Override
+ public boolean isNormalized(CharSequence s) { return true; }
+ @Override
+ public Normalizer.QuickCheckResult quickCheck(CharSequence s) { return Normalizer.YES; }
+ @Override
+ public int spanQuickCheckYes(CharSequence s) { return s.length(); }
+ @Override
+ public boolean hasBoundaryBefore(int c) { return true; }
+ @Override
+ public boolean hasBoundaryAfter(int c) { return true; }
+ @Override
+ public boolean isInert(int c) { return true; }
+ }
+
+ // Intermediate class:
+ // Has Normalizer2Impl and does boilerplate argument checking and setup.
+ public static abstract class Normalizer2WithImpl extends Normalizer2 {
+ public Normalizer2WithImpl(Normalizer2Impl ni) {
+ impl=ni;
+ }
+
+ // normalize
+ @Override
+ public StringBuilder normalize(CharSequence src, StringBuilder dest) {
+ if(dest==src) {
+ throw new IllegalArgumentException();
+ }
+ dest.setLength(0);
+ normalize(src, new Normalizer2Impl.ReorderingBuffer(impl, dest, src.length()));
+ return dest;
+ }
+ @Override
+ public Appendable normalize(CharSequence src, Appendable dest) {
+ if(dest==src) {
+ throw new IllegalArgumentException();
+ }
+ Normalizer2Impl.ReorderingBuffer buffer=
+ new Normalizer2Impl.ReorderingBuffer(impl, dest, src.length());
+ normalize(src, buffer);
+ buffer.flush();
+ return dest;
+ }
+ protected abstract void normalize(CharSequence src, Normalizer2Impl.ReorderingBuffer buffer);
+
+ // normalize and append
+ @Override
+ public StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second) {
+ return normalizeSecondAndAppend(first, second, true);
+ }
+ @Override
+ public StringBuilder append(StringBuilder first, CharSequence second) {
+ return normalizeSecondAndAppend(first, second, false);
+ }
+ public StringBuilder normalizeSecondAndAppend(
+ StringBuilder first, CharSequence second, boolean doNormalize) {
+ if(first==second) {
+ throw new IllegalArgumentException();
+ }
+ normalizeAndAppend(
+ second, doNormalize,
+ new Normalizer2Impl.ReorderingBuffer(impl, first, first.length()+second.length()));
+ return first;
+ }
+ protected abstract void normalizeAndAppend(
+ CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer);
+
+ @Override
+ public String getDecomposition(int c) {
+ return impl.getDecomposition(c);
+ }
+
+ // quick checks
+ @Override
+ public boolean isNormalized(CharSequence s) {
+ return s.length()==spanQuickCheckYes(s);
+ }
+ @Override
+ public Normalizer.QuickCheckResult quickCheck(CharSequence s) {
+ return isNormalized(s) ? Normalizer.YES : Normalizer.NO;
+ }
+
+ public int getQuickCheck(int c) {
+ return 1;
+ }
+
+ public final Normalizer2Impl impl;
+ }
+
+ public static final class DecomposeNormalizer2 extends Normalizer2WithImpl {
+ public DecomposeNormalizer2(Normalizer2Impl ni) {
+ super(ni);
+ }
+
+ @Override
+ protected void normalize(CharSequence src, Normalizer2Impl.ReorderingBuffer buffer) {
+ impl.decompose(src, 0, src.length(), buffer);
+ }
+ @Override
+ protected void normalizeAndAppend(
+ CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) {
+ impl.decomposeAndAppend(src, doNormalize, buffer);
+ }
+ @Override
+ public int spanQuickCheckYes(CharSequence s) {
+ return impl.decompose(s, 0, s.length(), null);
+ }
+ @Override
+ public int getQuickCheck(int c) {
+ return impl.isDecompYes(impl.getNorm16(c)) ? 1 : 0;
+ }
+ @Override
+ public boolean hasBoundaryBefore(int c) { return impl.hasDecompBoundary(c, true); }
+ @Override
+ public boolean hasBoundaryAfter(int c) { return impl.hasDecompBoundary(c, false); }
+ @Override
+ public boolean isInert(int c) { return impl.isDecompInert(c); }
+ }
+
+ public static final class ComposeNormalizer2 extends Normalizer2WithImpl {
+ public ComposeNormalizer2(Normalizer2Impl ni, boolean fcc) {
+ super(ni);
+ onlyContiguous=fcc;
+ }
+
+ @Override
+ protected void normalize(CharSequence src, Normalizer2Impl.ReorderingBuffer buffer) {
+ impl.compose(src, 0, src.length(), onlyContiguous, true, buffer);
+ }
+ @Override
+ protected void normalizeAndAppend(
+ CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) {
+ impl.composeAndAppend(src, doNormalize, onlyContiguous, buffer);
+ }
+
+ @Override
+ public boolean isNormalized(CharSequence s) {
+ // 5: small destCapacity for substring normalization
+ return impl.compose(s, 0, s.length(),
+ onlyContiguous, false,
+ new Normalizer2Impl.ReorderingBuffer(impl, new StringBuilder(), 5));
+ }
+ @Override
+ public Normalizer.QuickCheckResult quickCheck(CharSequence s) {
+ int spanLengthAndMaybe=impl.composeQuickCheck(s, 0, s.length(), onlyContiguous, false);
+ if((spanLengthAndMaybe&1)!=0) {
+ return Normalizer.MAYBE;
+ } else if((spanLengthAndMaybe>>>1)==s.length()) {
+ return Normalizer.YES;
+ } else {
+ return Normalizer.NO;
+ }
+ }
+ @Override
+ public int spanQuickCheckYes(CharSequence s) {
+ return impl.composeQuickCheck(s, 0, s.length(), onlyContiguous, true)>>>1;
+ }
+ @Override
+ public int getQuickCheck(int c) {
+ return impl.getCompQuickCheck(impl.getNorm16(c));
+ }
+ @Override
+ public boolean hasBoundaryBefore(int c) { return impl.hasCompBoundaryBefore(c); }
+ @Override
+ public boolean hasBoundaryAfter(int c) {
+ return impl.hasCompBoundaryAfter(c, onlyContiguous, false);
+ }
+ @Override
+ public boolean isInert(int c) {
+ return impl.hasCompBoundaryAfter(c, onlyContiguous, true);
+ }
+
+ private final boolean onlyContiguous;
+ }
+
+ public static final class FCDNormalizer2 extends Normalizer2WithImpl {
+ public FCDNormalizer2(Normalizer2Impl ni) {
+ super(ni);
+ }
+
+ @Override
+ protected void normalize(CharSequence src, Normalizer2Impl.ReorderingBuffer buffer) {
+ impl.makeFCD(src, 0, src.length(), buffer);
+ }
+ @Override
+ protected void normalizeAndAppend(
+ CharSequence src, boolean doNormalize, Normalizer2Impl.ReorderingBuffer buffer) {
+ impl.makeFCDAndAppend(src, doNormalize, buffer);
+ }
+ @Override
+ public int spanQuickCheckYes(CharSequence s) {
+ return impl.makeFCD(s, 0, s.length(), null);
+ }
+ @Override
+ public int getQuickCheck(int c) {
+ return impl.isDecompYes(impl.getNorm16(c)) ? 1 : 0;
+ }
+ @Override
+ public boolean hasBoundaryBefore(int c) { return impl.hasFCDBoundaryBefore(c); }
+ @Override
+ public boolean hasBoundaryAfter(int c) { return impl.hasFCDBoundaryAfter(c); }
+ @Override
+ public boolean isInert(int c) { return impl.isFCDInert(c); }
+ }
+
+ // instance cache ---------------------------------------------------------- ***
+
+ private Norm2AllModes(Normalizer2Impl ni) {
+ impl=ni;
+ comp=new ComposeNormalizer2(ni, false);
+ decomp=new DecomposeNormalizer2(ni);
+ fcd=new FCDNormalizer2(ni);
+ fcc=new ComposeNormalizer2(ni, true);
+ }
+
+ public final Normalizer2Impl impl;
+ public final ComposeNormalizer2 comp;
+ public final DecomposeNormalizer2 decomp;
+ public final FCDNormalizer2 fcd;
+ public final ComposeNormalizer2 fcc;
+
+ private static Norm2AllModes getInstanceFromSingleton(Norm2AllModesSingleton singleton) {
+ if(singleton.exception!=null) {
+ throw singleton.exception;
+ }
+ return singleton.allModes;
+ }
+ public static Norm2AllModes getNFCInstance() {
+ return getInstanceFromSingleton(NFCSingleton.INSTANCE);
+ }
+ public static Norm2AllModes getNFKCInstance() {
+ return getInstanceFromSingleton(NFKCSingleton.INSTANCE);
+ }
+ public static Norm2AllModes getNFKC_CFInstance() {
+ return getInstanceFromSingleton(NFKC_CFSingleton.INSTANCE);
+ }
+ // For use in properties APIs.
+ public static Normalizer2WithImpl getN2WithImpl(int index) {
+ switch(index) {
+ case 0: return getNFCInstance().decomp; // NFD
+ case 1: return getNFKCInstance().decomp; // NFKD
+ case 2: return getNFCInstance().comp; // NFC
+ case 3: return getNFKCInstance().comp; // NFKC
+ default: return null;
+ }
+ }
+ public static Norm2AllModes getInstance(InputStream data, String name) {
+ if(data==null) {
+ Norm2AllModesSingleton singleton;
+ if(name.equals("nfc")) {
+ singleton=NFCSingleton.INSTANCE;
+ } else if(name.equals("nfkc")) {
+ singleton=NFKCSingleton.INSTANCE;
+ } else if(name.equals("nfkc_cf")) {
+ singleton=NFKC_CFSingleton.INSTANCE;
+ } else {
+ singleton=null;
+ }
+ if(singleton!=null) {
+ if(singleton.exception!=null) {
+ throw singleton.exception;
+ }
+ return singleton.allModes;
+ }
+ }
+ return cache.getInstance(name, data);
+ }
+ private static CacheBase cache =
+ new SoftCache() {
+ protected Norm2AllModes createInstance(String key, InputStream data) {
+ Normalizer2Impl impl;
+ if(data==null) {
+ impl=new Normalizer2Impl().load(ICUResourceBundle.ICU_BUNDLE+"/"+key+".nrm");
+ } else {
+ impl=new Normalizer2Impl().load(data);
+ }
+ return new Norm2AllModes(impl);
+ }
+ };
+
+ public static final NoopNormalizer2 NOOP_NORMALIZER2=new NoopNormalizer2();
+ /**
+ * Gets the FCD normalizer, with the FCD data initialized.
+ * @return FCD normalizer
+ */
+ public static Normalizer2 getFCDNormalizer2() {
+ Norm2AllModes allModes=getNFCInstance();
+ allModes.impl.getFCDTrie();
+ return allModes.fcd;
+ }
+
+ private static final class Norm2AllModesSingleton {
+ private Norm2AllModesSingleton(String name) {
+ try {
+ Normalizer2Impl impl=new Normalizer2Impl().load(
+ ICUResourceBundle.ICU_BUNDLE+"/"+name+".nrm");
+ allModes=new Norm2AllModes(impl);
+ } catch(RuntimeException e) {
+ exception=e;
+ }
+ }
+
+ private Norm2AllModes allModes;
+ private RuntimeException exception;
+ }
+ private static final class NFCSingleton {
+ private static final Norm2AllModesSingleton INSTANCE=new Norm2AllModesSingleton("nfc");
+ }
+ private static final class NFKCSingleton {
+ private static final Norm2AllModesSingleton INSTANCE=new Norm2AllModesSingleton("nfkc");
+ }
+ private static final class NFKC_CFSingleton {
+ private static final Norm2AllModesSingleton INSTANCE=new Norm2AllModesSingleton("nfkc_cf");
+ }
+}
diff --git a/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java b/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java
new file mode 100644
index 00000000000..529fea2e84d
--- /dev/null
+++ b/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java
@@ -0,0 +1,2053 @@
+/*
+*******************************************************************************
+* Copyright (C) 2009-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*/
+package com.ibm.icu.impl;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.VersionInfo;
+
+public final class Normalizer2Impl {
+ public static final class Hangul {
+ /* Korean Hangul and Jamo constants */
+ public static final int JAMO_L_BASE=0x1100; /* "lead" jamo */
+ public static final int JAMO_V_BASE=0x1161; /* "vowel" jamo */
+ public static final int JAMO_T_BASE=0x11a7; /* "trail" jamo */
+
+ public static final int HANGUL_BASE=0xac00;
+
+ public static final int JAMO_L_COUNT=19;
+ public static final int JAMO_V_COUNT=21;
+ public static final int JAMO_T_COUNT=28;
+
+ public static final int JAMO_L_LIMIT=JAMO_L_BASE+JAMO_L_COUNT;
+ public static final int JAMO_V_LIMIT=JAMO_V_BASE+JAMO_V_COUNT;
+
+ public static final int JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT;
+
+ public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT;
+ public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT;
+
+ public static boolean isHangul(int c) {
+ return HANGUL_BASE<=c && c
+ * If dest is a StringBuilder, then the buffer writes directly to it.
+ * Otherwise, the buffer maintains a StringBuilder for intermediate text segments
+ * until no further changes are necessary and whole segments are appended.
+ * append() methods that take combining-class values always write to the StringBuilder.
+ * Other append() methods flush and append to the Appendable.
+ */
+ public static final class ReorderingBuffer implements Appendable {
+ public ReorderingBuffer(Normalizer2Impl ni, Appendable dest, int destCapacity) {
+ impl=ni;
+ app=dest;
+ if(app instanceof StringBuilder) {
+ appIsStringBuilder=true;
+ str=(StringBuilder)dest;
+ // In Java, the constructor subsumes public void init(int destCapacity) {
+ str.ensureCapacity(destCapacity);
+ reorderStart=0;
+ if(str.length()==0) {
+ lastCC=0;
+ } else {
+ setIterator();
+ lastCC=previousCC();
+ // Set reorderStart after the last code point with cc<=1 if there is one.
+ if(lastCC>1) {
+ while(previousCC()>1) {}
+ }
+ reorderStart=codePointLimit;
+ }
+ } else {
+ appIsStringBuilder=false;
+ str=new StringBuilder();
+ reorderStart=0;
+ lastCC=0;
+ }
+ }
+
+ public boolean isEmpty() { return str.length()==0; }
+ public int length() { return str.length(); }
+ public int getLastCC() { return lastCC; }
+
+ public StringBuilder getStringBuilder() { return str; }
+
+ public boolean equals(CharSequence s, int start, int limit) {
+ return UTF16Plus.equal(str, 0, str.length(), s, start, limit);
+ }
+
+ // For Hangul composition, replacing the Leading consonant Jamo with the syllable.
+ public void setLastChar(char c) {
+ str.setCharAt(str.length()-1, c);
+ }
+
+ public void append(int c, int cc) {
+ if(lastCC<=cc || cc==0) {
+ str.appendCodePoint(c);
+ lastCC=cc;
+ if(cc<=1) {
+ reorderStart=str.length();
+ }
+ } else {
+ insert(c, cc);
+ }
+ }
+ // s must be in NFD, otherwise change the implementation.
+ public void append(CharSequence s, int start, int limit,
+ int leadCC, int trailCC) {
+ if(start==limit) {
+ return;
+ }
+ if(lastCC<=leadCC || leadCC==0) {
+ if(trailCC<=1) {
+ reorderStart=str.length()+(limit-start);
+ } else if(leadCC<=1) {
+ reorderStart=str.length()+1; // Ok if not a code point boundary.
+ }
+ str.append(s, start, limit);
+ lastCC=trailCC;
+ } else {
+ int c=Character.codePointAt(s, start);
+ start+=Character.charCount(c);
+ insert(c, leadCC); // insert first code point
+ while(startcc;) {}
+ // insert c at codePointLimit, after the character with prevCC<=cc
+ if(c<=0xffff) {
+ str.insert(codePointLimit, (char)c);
+ if(cc<=1) {
+ reorderStart=codePointLimit+1;
+ }
+ } else {
+ str.insert(codePointLimit, Character.toChars(c));
+ if(cc<=1) {
+ reorderStart=codePointLimit+2;
+ }
+ }
+ }
+
+ private final Normalizer2Impl impl;
+ private final Appendable app;
+ private final StringBuilder str;
+ private final boolean appIsStringBuilder;
+ private int reorderStart;
+ private int lastCC;
+
+ // private backward iterator
+ private void setIterator() { codePointStart=str.length(); }
+ private void skipPrevious() { // Requires 0=codePointStart) {
+ return 0;
+ }
+ int c=str.codePointBefore(codePointStart);
+ codePointStart-=Character.charCount(c);
+ if(c(nextOffset-offset)) {
+ throw new IOException("Normalizer2 data: not enough bytes for normTrie");
+ }
+ ds.skipBytes((nextOffset-offset)-trieLength); // skip padding after trie bytes
+
+ // Read the composition and mapping data.
+ offset=nextOffset;
+ nextOffset=inIndexes[IX_RESERVED2_OFFSET];
+ int numChars=(nextOffset-offset)/2;
+ char[] chars;
+ if(numChars!=0) {
+ chars=new char[numChars];
+ for(int i=0; i trieIterator=normTrie.iterator();
+ Trie2.Range range;
+ while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
+ /* add the start code point to the USet */
+ set.add(range.startCodePoint);
+ }
+
+ /* add Hangul LV syllables and LV+1 because of skippables */
+ for(int c=Hangul.HANGUL_BASE; c trieIterator=canonIterData.iterator(segmentStarterMapper);
+ Trie2.Range range;
+ while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
+ /* add the start code point to the USet */
+ set.add(range.startCodePoint);
+ }
+ }
+ private static final Trie2.ValueMapper segmentStarterMapper=new Trie2.ValueMapper() {
+ public int map(int in) {
+ return in&CANON_NOT_SEGMENT_STARTER;
+ }
+ };
+
+ // low-level properties ------------------------------------------------ ***
+
+ public Trie2_16 getNormTrie() { return normTrie; }
+ /**
+ * Builds and returns the FCD trie based on the data used in this instance.
+ * This is required before any of {@link #getFCD16(int)} or
+ * {@link #getFCD16FromSingleLead(char)} are called,
+ * or else they crash.
+ * This method is called automatically by Normalizer2.getInstance(..., Mode.FCD).
+ * @return The FCD trie for this instance's data.
+ */
+ public synchronized Trie2_16 getFCDTrie() {
+ if(fcdTrie!=null) {
+ return fcdTrie;
+ }
+ Trie2Writable newFCDTrie=new Trie2Writable(0, 0);
+ Iterator trieIterator=normTrie.iterator();
+ Trie2.Range range;
+ while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
+ // Set the FCD value for a range of same-norm16 characters.
+ if(range.value!=0) {
+ setFCD16FromNorm16(range.startCodePoint, range.endCodePoint, range.value, newFCDTrie);
+ }
+ }
+ for(char lead=0xd800; lead<0xdc00; ++lead) {
+ // Collect (OR together) the FCD values for a range of supplementary characters,
+ // for their lead surrogate code unit.
+ int oredValue=newFCDTrie.get(lead);
+ trieIterator=normTrie.iteratorForLeadSurrogate(lead);
+ while(trieIterator.hasNext()) {
+ oredValue|=trieIterator.next().value;
+ }
+ if(oredValue!=0) {
+ // Set a "bad" value for makeFCD() to break the quick check loop
+ // and look up the value for the supplementary code point.
+ // If there is any lccc, then set the worst-case lccc of 1.
+ // The ORed-together value's tccc is already the worst case.
+ if(oredValue>0xff) {
+ oredValue=0x100|(oredValue&0xff);
+ }
+ newFCDTrie.setForLeadSurrogateCodeUnit(lead, oredValue);
+ }
+ }
+ return fcdTrie=newFCDTrie.toTrie2_16();
+ }
+
+ /**
+ * Builds the canonical-iterator data for this instance.
+ * This is required before any of {@link #isCanonSegmentStarter(int)} or
+ * {@link #getCanonStartSet(int, UnicodeSet)} are called,
+ * or else they crash.
+ * @return this
+ */
+ public synchronized Normalizer2Impl ensureCanonIterData() {
+ if(canonIterData==null) {
+ Trie2Writable newData=new Trie2Writable(0, 0);
+ canonStartSets=new ArrayList();
+ Iterator trieIterator=normTrie.iterator();
+ Trie2.Range range;
+ while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
+ final int norm16=range.value;
+ if(norm16==0 || (minYesNo<=norm16 && norm16=minMaybeYes) {
+ // not a segment starter if it occurs in a decomposition or has cc!=0
+ newValue|=CANON_NOT_SEGMENT_STARTER;
+ if(norm16=minNoNo) {
+ while((norm16_2+=Character.charCount(c2))=MIN_NORMAL_MAYBE_YES) {
+ return norm16&0xff;
+ }
+ if(norm16=MIN_NORMAL_MAYBE_YES ? norm16&0xff : 0;
+ }
+
+ /**
+ * Returns the FCD data for code point c.
+ * {@link #getFCDTrie()} must have been called before this method,
+ * or else this method will crash.
+ * @param c A Unicode code point.
+ * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
+ */
+ public int getFCD16(int c) { return fcdTrie.get(c); }
+ /**
+ * Returns the FCD data for the single-or-lead code unit c.
+ * {@link #getFCDTrie()} must have been called before this method,
+ * or else this method will crash.
+ * @param c A Unicode code point.
+ * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
+ */
+ public int getFCD16FromSingleLead(char c) { return fcdTrie.getFromU16SingleLead(c); }
+
+ private void setFCD16FromNorm16(int start, int end, int norm16, Trie2Writable newFCDTrie) {
+ // Only loops for 1:1 algorithmic mappings.
+ for(;;) {
+ if(norm16>=MIN_NORMAL_MAYBE_YES) {
+ norm16&=0xff;
+ norm16|=norm16<<8;
+ } else if(norm16<=minYesNo || minMaybeYes<=norm16) {
+ // no decomposition or Hangul syllable, all zeros
+ break;
+ } else if(limitNoNo<=norm16) {
+ int delta=norm16-(minMaybeYes-MAX_DELTA-1);
+ if(start==end) {
+ start+=delta;
+ norm16=getNorm16(start);
+ } else {
+ // the same delta leads from different original characters to different mappings
+ do {
+ int c=start+delta;
+ setFCD16FromNorm16(c, c, getNorm16(c), newFCDTrie);
+ } while(++start<=end);
+ break;
+ }
+ } else {
+ // c decomposes, get everything from the variable-length extra data
+ int firstUnit=extraData.charAt(norm16);
+ if((firstUnit&MAPPING_LENGTH_MASK)==0) {
+ // A character that is deleted (maps to an empty string) must
+ // get the worst-case lccc and tccc values because arbitrary
+ // characters on both sides will become adjacent.
+ norm16=0x1ff;
+ } else {
+ if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
+ norm16=extraData.charAt(norm16+1)&0xff00; // lccc
+ } else {
+ norm16=0;
+ }
+ norm16|=firstUnit>>8; // tccc
+ }
+ }
+ newFCDTrie.setRange(start, end, norm16, true);
+ break;
+ }
+ }
+
+ /**
+ * Get the decomposition for one code point.
+ * @param c code point
+ * @return c's decomposition, if it has one; returns null if it does not have a decomposition
+ */
+ public String getDecomposition(int c) {
+ int decomp=-1;
+ int norm16;
+ for(;;) {
+ if(c{@link #ensureCanonIterData()} must have been called before this method,
+ * or else this method will crash.
+ * @param c A Unicode code point.
+ * @return true if c starts a canonical-iterator string segment.
+ */
+ public boolean isCanonSegmentStarter(int c) {
+ return canonIterData.get(c)>=0;
+ }
+ /**
+ * Returns true if there are characters whose decomposition starts with c.
+ * If so, then the set is cleared and then filled with those characters.
+ * {@link #ensureCanonIterData()} must have been called before this method,
+ * or else this method will crash.
+ * @param c A Unicode code point.
+ * @param set A UnicodeSet to receive the characters whose decompositions
+ * start with c, if there are any.
+ * @return true if there are characters whose decomposition starts with c.
+ */
+ public boolean getCanonStartSet(int c, UnicodeSet set) {
+ int canonValue=canonIterData.get(c)&~CANON_NOT_SEGMENT_STARTER;
+ if(canonValue==0) {
+ return false;
+ }
+ set.clear();
+ int value=canonValue&CANON_VALUE_MASK;
+ if((canonValue&CANON_HAS_SET)!=0) {
+ set.addAll(canonStartSets.get(value));
+ } else if(value!=0) {
+ set.add(value);
+ }
+ if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
+ int norm16=getNorm16(c);
+ if(norm16==JAMO_L) {
+ int syllable=Hangul.HANGUL_BASE+(c-Hangul.JAMO_L_BASE)*Hangul.JAMO_VT_COUNT;
+ set.add(syllable, syllable+Hangul.JAMO_VT_COUNT-1);
+ } else {
+ addComposites(getCompositionsList(norm16), set);
+ }
+ }
+ return true;
+ }
+
+ public static final int MIN_CCC_LCCC_CP=0x300;
+
+ public static final int MIN_YES_YES_WITH_CC=0xff01;
+ public static final int JAMO_VT=0xff00;
+ public static final int MIN_NORMAL_MAYBE_YES=0xfe00;
+ public static final int JAMO_L=1;
+ public static final int MAX_DELTA=0x40;
+
+ // Byte offsets from the start of the data, after the generic header.
+ public static final int IX_NORM_TRIE_OFFSET=0;
+ public static final int IX_EXTRA_DATA_OFFSET=1;
+ public static final int IX_RESERVED2_OFFSET=2;
+ public static final int IX_TOTAL_SIZE=7;
+
+ // Code point thresholds for quick check codes.
+ public static final int IX_MIN_DECOMP_NO_CP=8;
+ public static final int IX_MIN_COMP_NO_MAYBE_CP=9;
+
+ // Norm16 value thresholds for quick check combinations and types of extra data.
+ public static final int IX_MIN_YES_NO=10;
+ public static final int IX_MIN_NO_NO=11;
+ public static final int IX_LIMIT_NO_NO=12;
+ public static final int IX_MIN_MAYBE_YES=13;
+
+ public static final int IX_COUNT=16;
+
+ public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80;
+ public static final int MAPPING_PLUS_COMPOSITION_LIST=0x40;
+ public static final int MAPPING_NO_COMP_BOUNDARY_AFTER=0x20;
+ public static final int MAPPING_LENGTH_MASK=0x1f;
+
+ public static final int COMP_1_LAST_TUPLE=0x8000;
+ public static final int COMP_1_TRIPLE=1;
+ public static final int COMP_1_TRAIL_LIMIT=0x3400;
+ public static final int COMP_1_TRAIL_MASK=0x7ffe;
+ public static final int COMP_1_TRAIL_SHIFT=9; // 10-1 for the "triple" bit
+ public static final int COMP_2_TRAIL_SHIFT=6;
+ public static final int COMP_2_TRAIL_MASK=0xffc0;
+
+ // higher-level functionality ------------------------------------------ ***
+
+ // Dual functionality:
+ // buffer!=NULL: normalize
+ // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
+ public int decompose(CharSequence s, int src, int limit,
+ ReorderingBuffer buffer) {
+ int minNoCP=minDecompNoCP;
+
+ int prevSrc;
+ int c=0;
+ int norm16=0;
+
+ // only for quick check
+ int prevBoundary=src;
+ int prevCC=0;
+
+ for(;;) {
+ // count code units below the minimum or with irrelevant data for the quick check
+ for(prevSrc=src; src!=limit;) {
+ if( (c=s.charAt(src))=limit) {
+ break;
+ }
+ c=Character.codePointAt(s, src);
+ cc=getCC(getNorm16(c));
+ };
+ buffer.append(s, 0, src, firstCC, prevCC);
+ buffer.append(s, src, limit);
+ }
+ // Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
+ // doCompose: normalize
+ // !doCompose: isNormalized (buffer must be empty and initialized)
+ public boolean compose(CharSequence s, int src, int limit,
+ boolean onlyContiguous,
+ boolean doCompose,
+ ReorderingBuffer buffer) {
+ int minNoMaybeCP=minCompNoMaybeCP;
+
+ /*
+ * prevBoundary points to the last character before the current one
+ * that has a composition boundary before it with ccc==0 and quick check "yes".
+ * Keeping track of prevBoundary saves us looking for a composition boundary
+ * when we find a "no" or "maybe".
+ *
+ * When we back out from prevSrc back to prevBoundary,
+ * then we also remove those same characters (which had been simply copied
+ * or canonically-order-inserted) from the ReorderingBuffer.
+ * Therefore, at all times, the [prevBoundary..prevSrc[ source units
+ * must correspond 1:1 to destination units at the end of the destination buffer.
+ */
+ int prevBoundary=src;
+ int prevSrc;
+ int c=0;
+ int norm16=0;
+
+ // only for isNormalized
+ int prevCC=0;
+
+ for(;;) {
+ // count code units below the minimum or with irrelevant data for the quick check
+ for(prevSrc=src; src!=limit;) {
+ if( (c=s.charAt(src))=minNoNo.
+ * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
+ * or has ccc!=0.
+ * Check for Jamo V/T, then for regular characters.
+ * c is not a Hangul syllable or Jamo L because those have "yes" properties.
+ */
+ if(isJamoVT(norm16) && prevBoundary!=prevSrc) {
+ char prev=s.charAt(prevSrc-1);
+ boolean needToDecompose=false;
+ if(c=MIN_YES_YES_WITH_CC) {
+ int cc=norm16&0xff; // cc!=0
+ if( onlyContiguous && // FCC
+ (doCompose ? buffer.getLastCC() : prevCC)==0 &&
+ prevBoundarycc
+ ) {
+ // Fails FCD test, need to decompose and contiguously recompose.
+ if(!doCompose) {
+ return false;
+ }
+ } else if(doCompose) {
+ buffer.append(c, cc);
+ continue;
+ } else if(prevCC<=cc) {
+ prevCC=cc;
+ continue;
+ } else {
+ return false;
+ }
+ } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) {
+ return false;
+ }
+
+ /*
+ * Find appropriate boundaries around this character,
+ * decompose the source text from between the boundaries,
+ * and recompose it.
+ *
+ * We may need to remove the last few characters from the ReorderingBuffer
+ * to account for source text that was copied or appended
+ * but needs to take part in the recomposition.
+ */
+
+ /*
+ * Find the last composition boundary in [prevBoundary..src[.
+ * It is either the decomposition of the current character (at prevSrc),
+ * or prevBoundary.
+ */
+ if(hasCompBoundaryBefore(c, norm16)) {
+ prevBoundary=prevSrc;
+ } else if(doCompose) {
+ buffer.removeSuffix(prevSrc-prevBoundary);
+ }
+
+ // Find the next composition boundary in [src..limit[ -
+ // modifies src to point to the next starter.
+ src=findNextCompBoundary(s, src, limit);
+
+ // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it.
+ int recomposeStartIndex=buffer.length();
+ decomposeShort(s, prevBoundary, src, buffer);
+ recompose(buffer, recomposeStartIndex, onlyContiguous);
+ if(!doCompose) {
+ if(!buffer.equals(s, prevBoundary, src)) {
+ return false;
+ }
+ buffer.remove();
+ prevCC=0;
+ }
+
+ // Move to the next starter. We never need to look back before this point again.
+ prevBoundary=src;
+ }
+ return true;
+ }
+ /**
+ * Very similar to compose(): Make the same changes in both places if relevant.
+ * doSpan: spanQuickCheckYes (ignore bit 0 of the return value)
+ * !doSpan: quickCheck
+ * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and
+ * bit 0: set if "maybe"; otherwise, if the span length<s.length()
+ * then the quick check result is "no"
+ */
+ public int composeQuickCheck(CharSequence s, int src, int limit,
+ boolean onlyContiguous, boolean doSpan) {
+ int qcResult=0;
+ int minNoMaybeCP=minCompNoMaybeCP;
+
+ /*
+ * prevBoundary points to the last character before the current one
+ * that has a composition boundary before it with ccc==0 and quick check "yes".
+ */
+ int prevBoundary=src;
+ int prevSrc;
+ int c=0;
+ int norm16=0;
+ int prevCC=0;
+
+ for(;;) {
+ // count code units below the minimum or with irrelevant data for the quick check
+ for(prevSrc=src;;) {
+ if(src==limit) {
+ return (src<<1)|qcResult; // "yes" or "maybe"
+ }
+ if( (c=s.charAt(src))=minNoNo.
+ * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
+ * or has ccc!=0.
+ */
+ if(isMaybeOrNonZeroCC(norm16)) {
+ int cc=getCCFromYesOrMaybe(norm16);
+ if( onlyContiguous && // FCC
+ cc!=0 &&
+ prevCC==0 &&
+ prevBoundarycc
+ ) {
+ // Fails FCD test.
+ } else if(prevCC<=cc || cc==0) {
+ prevCC=cc;
+ if(norm16appendZeroCC() because we track
+ // the lead and trail combining classes here, rather than leaving it to
+ // the ReorderingBuffer.
+ // The exception is the call to decomposeShort() which uses the buffer
+ // in the normal way.
+
+ // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
+ // Similar to the prevBoundary in the compose() implementation.
+ int prevBoundary=src;
+ int prevSrc;
+ int c=0;
+ int prevFCD16=0;
+ int fcd16=0;
+
+ for(;;) {
+ // count code units with lccc==0
+ for(prevSrc=src; src!=limit;) {
+ if((c=s.charAt(src))1) {
+ --prevBoundary;
+ }
+ } else {
+ int p=src-1;
+ if( Character.isLowSurrogate(s.charAt(p)) && prevSrc1) {
+ prevBoundary=p;
+ }
+ }
+ if(buffer!=null) {
+ // The last lccc==0 character is excluded from the
+ // flush-and-append call in case it needs to be modified.
+ buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
+ buffer.append(s, prevBoundary, src);
+ }
+ // The start of the current character (c).
+ prevSrc=src;
+ } else if(src==limit) {
+ break;
+ }
+
+ src+=Character.charCount(c);
+ // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
+ // Check for proper order, and decompose locally if necessary.
+ if((prevFCD16&0xff)<=(fcd16>>8)) {
+ // proper order: prev tccc <= current lccc
+ if((fcd16&0xff)<=1) {
+ prevBoundary=src;
+ }
+ if(buffer!=null) {
+ buffer.appendZeroCC(c);
+ }
+ prevFCD16=fcd16;
+ continue;
+ } else if(buffer==null) {
+ return prevBoundary; // quick check "no"
+ } else {
+ /*
+ * Back out the part of the source that we copied or appended
+ * already but is now going to be decomposed.
+ * prevSrc is set to after what was copied/appended.
+ */
+ buffer.removeSuffix(prevSrc-prevBoundary);
+ /*
+ * Find the part of the source that needs to be decomposed,
+ * up to the next safe boundary.
+ */
+ src=findNextFCDBoundary(s, src, limit);
+ /*
+ * The source text does not fulfill the conditions for FCD.
+ * Decompose and reorder a limited piece of the text.
+ */
+ decomposeShort(s, prevBoundary, src, buffer);
+ prevBoundary=src;
+ prevFCD16=0;
+ }
+ }
+ return src;
+ }
+ public void makeFCDAndAppend(CharSequence s, boolean doMakeFCD, ReorderingBuffer buffer) {
+ int src=0, limit=s.length();
+ if(!buffer.isEmpty()) {
+ int firstBoundaryInSrc=findNextFCDBoundary(s, 0, limit);
+ if(0!=firstBoundaryInSrc) {
+ int lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStringBuilder(),
+ buffer.length());
+ StringBuilder middle=new StringBuilder((buffer.length()-lastBoundaryInDest)+
+ firstBoundaryInSrc+16);
+ middle.append(buffer.getStringBuilder(), lastBoundaryInDest, buffer.length());
+ buffer.removeSuffix(buffer.length()-lastBoundaryInDest);
+ middle.append(s, 0, firstBoundaryInSrc);
+ makeFCD(middle, 0, middle.length(), buffer);
+ src=firstBoundaryInSrc;
+ }
+ }
+ if(doMakeFCD) {
+ makeFCD(s, src, limit, buffer);
+ } else {
+ buffer.append(s, src, limit);
+ }
+ }
+
+ // Note: hasDecompBoundary() could be implemented as aliases to
+ // hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
+ // at the cost of building the FCD trie for a decomposition normalizer.
+ public boolean hasDecompBoundary(int c, boolean before) {
+ for(;;) {
+ if(cMIN_NORMAL_MAYBE_YES) {
+ return false; // ccc!=0
+ } else if(isDecompNoAlgorithmic(norm16)) {
+ c=mapAlgorithmic(c, norm16);
+ } else {
+ // c decomposes, get everything from the variable-length extra data
+ int firstUnit=extraData.charAt(norm16++);
+ if((firstUnit&MAPPING_LENGTH_MASK)==0) {
+ return false;
+ }
+ if(!before) {
+ // decomp after-boundary: same as hasFCDBoundaryAfter(),
+ // fcd16<=1 || trailCC==0
+ if(firstUnit>0x1ff) {
+ return false; // trailCC>1
+ }
+ if(firstUnit<=0xff) {
+ return true; // trailCC==0
+ }
+ // if(trailCC==1) test leadCC==0, same as checking for before-boundary
+ }
+ // true if leadCC==0 (hasFCDBoundaryBefore())
+ return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(norm16)&0xff00)==0;
+ }
+ }
+ }
+ public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); }
+
+ public boolean hasCompBoundaryBefore(int c) {
+ return c= (testInert ? minNoNo : minMaybeYes)) {
+ return false;
+ } else if(isDecompNoAlgorithmic(norm16)) {
+ c=mapAlgorithmic(c, norm16);
+ } else {
+ // c decomposes, get everything from the variable-length extra data.
+ // If testInert, then c must be a yesNo character which has lccc=0,
+ // otherwise it could be a noNo.
+ int firstUnit=extraData.charAt(norm16);
+ // true if
+ // c is not deleted, and
+ // it and its decomposition do not combine forward, and it has a starter, and
+ // if FCC then trailCC<=1
+ return
+ (firstUnit&MAPPING_LENGTH_MASK)!=0 &&
+ (firstUnit&(MAPPING_PLUS_COMPOSITION_LIST|MAPPING_NO_COMP_BOUNDARY_AFTER))==0 &&
+ (!onlyContiguous || firstUnit<=0x1ff);
+ }
+ }
+ }
+
+ public boolean hasFCDBoundaryBefore(int c) { return c=minMaybeYes; }
+ private static boolean isInert(int norm16) { return norm16==0; }
+ // static UBool isJamoL(uint16_t norm16) const { return norm16==1; }
+ private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; }
+ private boolean isHangul(int norm16) { return norm16==minYesNo; }
+ private boolean isCompYesAndZeroCC(int norm16) { return norm16=MIN_YES_YES_WITH_CC || norm16=limitNoNo; }
+
+ // For use with isCompYes().
+ // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
+ // static uint8_t getCCFromYes(uint16_t norm16) {
+ // return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0;
+ // }
+ private int getCCFromNoNo(int norm16) {
+ if((extraData.charAt(norm16)&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
+ return extraData.charAt(norm16+1)&0xff;
+ } else {
+ return 0;
+ }
+ }
+ // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
+ int getTrailCCFromCompYesAndZeroCC(CharSequence s, int cpStart, int cpLimit) {
+ int c;
+ if(cpStart==(cpLimit-1)) {
+ c=s.charAt(cpStart);
+ } else {
+ c=Character.codePointAt(s, cpStart);
+ }
+ int prevNorm16=getNorm16(c);
+ if(prevNorm16<=minYesNo) {
+ return 0; // yesYes and Hangul LV/LVT have ccc=tccc=0
+ } else {
+ return extraData.charAt(prevNorm16)>>8; // tccc from yesNo
+ }
+ }
+
+ // Requires algorithmic-NoNo.
+ private int mapAlgorithmic(int c, int norm16) {
+ return c+norm16-(minMaybeYes-MAX_DELTA-1);
+ }
+
+ // Requires minYesNo